Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 Initialized slot model with checkpoint at logs/slot/slot-model.mdl.lang-chinese ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.imprisoned-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.imprisoned-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.imprisoned-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.imprisoned-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.agreement-length-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.agreement-length-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.agreement-length-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.agreement-length-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1128]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 614103147, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:26:58.660144: step: 2/459, loss: 0.6609009504318237 2023-01-22 14:26:59.275812: step: 4/459, loss: 0.44676199555397034 2023-01-22 14:26:59.956614: step: 6/459, loss: 1.3175387382507324 2023-01-22 14:27:00.625550: step: 8/459, loss: 0.4006584882736206 2023-01-22 14:27:01.249053: step: 10/459, loss: 0.974093496799469 2023-01-22 14:27:01.956214: step: 12/459, loss: 2.123244285583496 2023-01-22 14:27:02.575884: step: 14/459, loss: 6.271106243133545 2023-01-22 14:27:03.289682: step: 16/459, loss: 0.4103129506111145 2023-01-22 14:27:03.904737: step: 18/459, loss: 1.2765804529190063 2023-01-22 14:27:04.552222: step: 20/459, loss: 1.4342560768127441 2023-01-22 14:27:05.290107: step: 22/459, loss: 0.3509953022003174 2023-01-22 14:27:05.886130: step: 24/459, loss: 0.527281641960144 2023-01-22 14:27:06.560645: step: 26/459, loss: 6.649859428405762 2023-01-22 14:27:07.179138: step: 28/459, loss: 0.7118364572525024 2023-01-22 14:27:07.818346: step: 30/459, loss: 0.8199900984764099 2023-01-22 14:27:08.489382: step: 32/459, loss: 1.5155360698699951 2023-01-22 14:27:09.088423: step: 34/459, loss: 1.9907934665679932 2023-01-22 14:27:09.702777: step: 36/459, loss: 2.3536367416381836 2023-01-22 14:27:10.402213: step: 38/459, loss: 1.7567286491394043 2023-01-22 14:27:11.069844: step: 40/459, loss: 1.87650728225708 2023-01-22 14:27:11.684114: step: 42/459, loss: 0.4061623811721802 2023-01-22 14:27:12.265644: step: 44/459, loss: 1.119411587715149 2023-01-22 14:27:12.868261: step: 46/459, loss: 1.51370108127594 2023-01-22 14:27:13.483271: step: 48/459, loss: 2.8381829261779785 2023-01-22 14:27:14.235787: step: 50/459, loss: 8.911797523498535 2023-01-22 14:27:14.864815: step: 52/459, loss: 0.9423637390136719 2023-01-22 14:27:15.517009: step: 54/459, loss: 0.33485618233680725 2023-01-22 14:27:16.170023: step: 56/459, loss: 0.6903952956199646 2023-01-22 14:27:16.783965: step: 58/459, loss: 1.0161726474761963 2023-01-22 14:27:17.385496: step: 60/459, loss: 0.43791651725769043 2023-01-22 14:27:17.978075: step: 62/459, loss: 0.911318838596344 2023-01-22 14:27:18.698060: step: 64/459, loss: 1.6800897121429443 2023-01-22 14:27:19.293990: step: 66/459, loss: 1.8837060928344727 2023-01-22 14:27:19.946308: step: 68/459, loss: 2.0487561225891113 2023-01-22 14:27:20.588895: step: 70/459, loss: 1.4771852493286133 2023-01-22 14:27:21.149259: step: 72/459, loss: 1.1530661582946777 2023-01-22 14:27:21.721256: step: 74/459, loss: 2.878239870071411 2023-01-22 14:27:22.305149: step: 76/459, loss: 0.983544111251831 2023-01-22 14:27:22.911312: step: 78/459, loss: 0.24089117348194122 2023-01-22 14:27:23.538391: step: 80/459, loss: 1.1248552799224854 2023-01-22 14:27:24.180815: step: 82/459, loss: 1.1366848945617676 2023-01-22 14:27:24.802489: step: 84/459, loss: 0.8414322733879089 2023-01-22 14:27:25.446949: step: 86/459, loss: 0.5279879570007324 2023-01-22 14:27:26.123563: step: 88/459, loss: 1.4702757596969604 2023-01-22 14:27:26.754992: step: 90/459, loss: 0.9600844979286194 2023-01-22 14:27:27.363927: step: 92/459, loss: 2.229374885559082 2023-01-22 14:27:28.022143: step: 94/459, loss: 1.24513840675354 2023-01-22 14:27:28.669484: step: 96/459, loss: 0.347251832485199 2023-01-22 14:27:29.317958: step: 98/459, loss: 1.9219521284103394 2023-01-22 14:27:29.888538: step: 100/459, loss: 3.7574567794799805 2023-01-22 14:27:30.499614: step: 102/459, loss: 0.37003934383392334 2023-01-22 14:27:31.114108: step: 104/459, loss: 1.6983604431152344 2023-01-22 14:27:31.770086: step: 106/459, loss: 0.33641964197158813 2023-01-22 14:27:32.357183: step: 108/459, loss: 6.566459655761719 2023-01-22 14:27:33.024713: step: 110/459, loss: 5.861473083496094 2023-01-22 14:27:33.622779: step: 112/459, loss: 1.3614609241485596 2023-01-22 14:27:34.207571: step: 114/459, loss: 0.31852948665618896 2023-01-22 14:27:34.820926: step: 116/459, loss: 2.2822659015655518 2023-01-22 14:27:35.444133: step: 118/459, loss: 3.243795156478882 2023-01-22 14:27:36.066413: step: 120/459, loss: 1.9705829620361328 2023-01-22 14:27:36.620675: step: 122/459, loss: 0.35402077436447144 2023-01-22 14:27:37.300908: step: 124/459, loss: 1.3675024509429932 2023-01-22 14:27:38.026224: step: 126/459, loss: 0.7963196039199829 2023-01-22 14:27:38.681027: step: 128/459, loss: 0.7056236863136292 2023-01-22 14:27:39.283965: step: 130/459, loss: 0.43536970019340515 2023-01-22 14:27:39.995201: step: 132/459, loss: 0.9245129227638245 2023-01-22 14:27:40.623419: step: 134/459, loss: 1.5262551307678223 2023-01-22 14:27:41.390181: step: 136/459, loss: 5.9070281982421875 2023-01-22 14:27:42.047019: step: 138/459, loss: 1.5463321208953857 2023-01-22 14:27:42.613738: step: 140/459, loss: 1.27412748336792 2023-01-22 14:27:43.212147: step: 142/459, loss: 1.0227552652359009 2023-01-22 14:27:43.862414: step: 144/459, loss: 0.33387431502342224 2023-01-22 14:27:44.447600: step: 146/459, loss: 0.5659223198890686 2023-01-22 14:27:45.064600: step: 148/459, loss: 1.1912673711776733 2023-01-22 14:27:45.718490: step: 150/459, loss: 4.006309986114502 2023-01-22 14:27:46.435358: step: 152/459, loss: 1.3545511960983276 2023-01-22 14:27:47.066350: step: 154/459, loss: 2.4051551818847656 2023-01-22 14:27:47.689731: step: 156/459, loss: 1.0870273113250732 2023-01-22 14:27:48.321227: step: 158/459, loss: 1.8615362644195557 2023-01-22 14:27:48.956384: step: 160/459, loss: 1.2419357299804688 2023-01-22 14:27:49.656015: step: 162/459, loss: 0.8373663425445557 2023-01-22 14:27:50.275982: step: 164/459, loss: 0.5043231248855591 2023-01-22 14:27:50.937489: step: 166/459, loss: 0.6730886101722717 2023-01-22 14:27:51.582501: step: 168/459, loss: 0.5327233076095581 2023-01-22 14:27:52.172134: step: 170/459, loss: 5.680742263793945 2023-01-22 14:27:52.807201: step: 172/459, loss: 4.416818618774414 2023-01-22 14:27:53.399158: step: 174/459, loss: 1.3968111276626587 2023-01-22 14:27:54.045758: step: 176/459, loss: 0.8655428290367126 2023-01-22 14:27:54.669594: step: 178/459, loss: 7.629850387573242 2023-01-22 14:27:55.265840: step: 180/459, loss: 0.6262179613113403 2023-01-22 14:27:55.862661: step: 182/459, loss: 1.037073016166687 2023-01-22 14:27:56.523686: step: 184/459, loss: 0.6067306995391846 2023-01-22 14:27:57.138400: step: 186/459, loss: 0.7328194975852966 2023-01-22 14:27:57.740739: step: 188/459, loss: 1.4120668172836304 2023-01-22 14:27:58.426968: step: 190/459, loss: 0.8596044778823853 2023-01-22 14:27:59.017047: step: 192/459, loss: 0.4623202383518219 2023-01-22 14:27:59.614921: step: 194/459, loss: 1.094504952430725 2023-01-22 14:28:00.275012: step: 196/459, loss: 1.3292064666748047 2023-01-22 14:28:00.882385: step: 198/459, loss: 0.800514817237854 2023-01-22 14:28:01.500033: step: 200/459, loss: 1.3763377666473389 2023-01-22 14:28:02.161264: step: 202/459, loss: 1.0348625183105469 2023-01-22 14:28:02.770840: step: 204/459, loss: 2.9850966930389404 2023-01-22 14:28:03.500221: step: 206/459, loss: 0.9652550220489502 2023-01-22 14:28:04.112289: step: 208/459, loss: 1.893532633781433 2023-01-22 14:28:04.711412: step: 210/459, loss: 0.2777131497859955 2023-01-22 14:28:05.342363: step: 212/459, loss: 1.1944459676742554 2023-01-22 14:28:06.001752: step: 214/459, loss: 2.395505666732788 2023-01-22 14:28:06.660352: step: 216/459, loss: 1.441265344619751 2023-01-22 14:28:07.280752: step: 218/459, loss: 4.313056945800781 2023-01-22 14:28:07.961841: step: 220/459, loss: 0.5647115111351013 2023-01-22 14:28:08.551257: step: 222/459, loss: 0.3284572958946228 2023-01-22 14:28:09.142137: step: 224/459, loss: 1.3437472581863403 2023-01-22 14:28:09.738389: step: 226/459, loss: 1.2635694742202759 2023-01-22 14:28:10.367160: step: 228/459, loss: 0.3586079180240631 2023-01-22 14:28:11.052498: step: 230/459, loss: 0.7293155193328857 2023-01-22 14:28:11.651213: step: 232/459, loss: 0.6329115629196167 2023-01-22 14:28:12.261800: step: 234/459, loss: 0.719170868396759 2023-01-22 14:28:12.955378: step: 236/459, loss: 1.7368648052215576 2023-01-22 14:28:13.560368: step: 238/459, loss: 0.7447088956832886 2023-01-22 14:28:14.289822: step: 240/459, loss: 3.3329415321350098 2023-01-22 14:28:14.914507: step: 242/459, loss: 0.8786925077438354 2023-01-22 14:28:15.549392: step: 244/459, loss: 5.82568359375 2023-01-22 14:28:16.218462: step: 246/459, loss: 0.3923717141151428 2023-01-22 14:28:16.887277: step: 248/459, loss: 1.4683759212493896 2023-01-22 14:28:17.477509: step: 250/459, loss: 5.679880142211914 2023-01-22 14:28:18.070398: step: 252/459, loss: 2.288546323776245 2023-01-22 14:28:18.668936: step: 254/459, loss: 0.38428691029548645 2023-01-22 14:28:19.266782: step: 256/459, loss: 3.761946201324463 2023-01-22 14:28:19.844920: step: 258/459, loss: 1.5715994834899902 2023-01-22 14:28:20.471141: step: 260/459, loss: 4.195012092590332 2023-01-22 14:28:21.067890: step: 262/459, loss: 0.5665162801742554 2023-01-22 14:28:21.750305: step: 264/459, loss: 0.7148298621177673 2023-01-22 14:28:22.377474: step: 266/459, loss: 0.2920573949813843 2023-01-22 14:28:23.018685: step: 268/459, loss: 0.44026273488998413 2023-01-22 14:28:23.679078: step: 270/459, loss: 0.48039689660072327 2023-01-22 14:28:24.245514: step: 272/459, loss: 0.8831166625022888 2023-01-22 14:28:24.924698: step: 274/459, loss: 7.605053901672363 2023-01-22 14:28:25.588536: step: 276/459, loss: 1.3313579559326172 2023-01-22 14:28:26.231029: step: 278/459, loss: 2.693357467651367 2023-01-22 14:28:26.800064: step: 280/459, loss: 2.160871744155884 2023-01-22 14:28:27.575298: step: 282/459, loss: 1.244189977645874 2023-01-22 14:28:28.203506: step: 284/459, loss: 1.1164497137069702 2023-01-22 14:28:28.835809: step: 286/459, loss: 1.6493041515350342 2023-01-22 14:28:29.430263: step: 288/459, loss: 1.0958843231201172 2023-01-22 14:28:30.045203: step: 290/459, loss: 3.1014037132263184 2023-01-22 14:28:30.774407: step: 292/459, loss: 1.880725383758545 2023-01-22 14:28:31.357855: step: 294/459, loss: 0.7658699154853821 2023-01-22 14:28:31.951885: step: 296/459, loss: 0.4788148105144501 2023-01-22 14:28:32.646987: step: 298/459, loss: 0.9003066420555115 2023-01-22 14:28:33.296922: step: 300/459, loss: 1.384387493133545 2023-01-22 14:28:33.965690: step: 302/459, loss: 0.6441896557807922 2023-01-22 14:28:34.594774: step: 304/459, loss: 0.9566419720649719 2023-01-22 14:28:35.183561: step: 306/459, loss: 1.390059232711792 2023-01-22 14:28:35.851078: step: 308/459, loss: 0.3508758544921875 2023-01-22 14:28:36.574728: step: 310/459, loss: 2.898324489593506 2023-01-22 14:28:37.132076: step: 312/459, loss: 2.6446533203125 2023-01-22 14:28:37.710088: step: 314/459, loss: 0.2085002213716507 2023-01-22 14:28:38.306333: step: 316/459, loss: 0.35167887806892395 2023-01-22 14:28:38.930408: step: 318/459, loss: 4.732967853546143 2023-01-22 14:28:39.556142: step: 320/459, loss: 0.837172269821167 2023-01-22 14:28:40.236436: step: 322/459, loss: 0.44810789823532104 2023-01-22 14:28:40.854870: step: 324/459, loss: 1.863267183303833 2023-01-22 14:28:41.491967: step: 326/459, loss: 1.1240952014923096 2023-01-22 14:28:42.069440: step: 328/459, loss: 0.3678165078163147 2023-01-22 14:28:42.669194: step: 330/459, loss: 1.3943079710006714 2023-01-22 14:28:43.372075: step: 332/459, loss: 0.9765610694885254 2023-01-22 14:28:44.013802: step: 334/459, loss: 0.9625980257987976 2023-01-22 14:28:44.641998: step: 336/459, loss: 1.1791753768920898 2023-01-22 14:28:45.375614: step: 338/459, loss: 0.569951057434082 2023-01-22 14:28:46.006979: step: 340/459, loss: 0.34668269753456116 2023-01-22 14:28:46.637598: step: 342/459, loss: 1.860259771347046 2023-01-22 14:28:47.261864: step: 344/459, loss: 0.8273605704307556 2023-01-22 14:28:47.861004: step: 346/459, loss: 1.3773082494735718 2023-01-22 14:28:48.458267: step: 348/459, loss: 0.2818903625011444 2023-01-22 14:28:49.098488: step: 350/459, loss: 0.7859560251235962 2023-01-22 14:28:49.721520: step: 352/459, loss: 1.3652961254119873 2023-01-22 14:28:50.412786: step: 354/459, loss: 0.4961138963699341 2023-01-22 14:28:51.096492: step: 356/459, loss: 2.2906723022460938 2023-01-22 14:28:51.711387: step: 358/459, loss: 4.032597541809082 2023-01-22 14:28:52.318555: step: 360/459, loss: 0.3702103793621063 2023-01-22 14:28:52.951839: step: 362/459, loss: 3.1597847938537598 2023-01-22 14:28:53.548996: step: 364/459, loss: 1.459557294845581 2023-01-22 14:28:54.144259: step: 366/459, loss: 1.1557308435440063 2023-01-22 14:28:54.756416: step: 368/459, loss: 0.31309738755226135 2023-01-22 14:28:55.489914: step: 370/459, loss: 1.769155502319336 2023-01-22 14:28:56.104152: step: 372/459, loss: 1.3502014875411987 2023-01-22 14:28:56.712392: step: 374/459, loss: 0.863433837890625 2023-01-22 14:28:57.323169: step: 376/459, loss: 2.715714931488037 2023-01-22 14:28:57.933068: step: 378/459, loss: 0.5673260688781738 2023-01-22 14:28:58.552239: step: 380/459, loss: 1.1222858428955078 2023-01-22 14:28:59.197848: step: 382/459, loss: 2.9269652366638184 2023-01-22 14:28:59.865198: step: 384/459, loss: 1.987292766571045 2023-01-22 14:29:00.470586: step: 386/459, loss: 3.1562612056732178 2023-01-22 14:29:01.149809: step: 388/459, loss: 0.729246973991394 2023-01-22 14:29:01.801721: step: 390/459, loss: 0.6939705610275269 2023-01-22 14:29:02.406482: step: 392/459, loss: 1.8591463565826416 2023-01-22 14:29:02.999251: step: 394/459, loss: 2.0006914138793945 2023-01-22 14:29:03.640204: step: 396/459, loss: 2.178534507751465 2023-01-22 14:29:04.318575: step: 398/459, loss: 1.4751259088516235 2023-01-22 14:29:04.877999: step: 400/459, loss: 1.4749085903167725 2023-01-22 14:29:05.421141: step: 402/459, loss: 0.7423455119132996 2023-01-22 14:29:06.070480: step: 404/459, loss: 5.546173095703125 2023-01-22 14:29:06.665345: step: 406/459, loss: 2.9464893341064453 2023-01-22 14:29:07.291115: step: 408/459, loss: 1.3915984630584717 2023-01-22 14:29:07.925848: step: 410/459, loss: 2.0007333755493164 2023-01-22 14:29:08.549947: step: 412/459, loss: 2.1030008792877197 2023-01-22 14:29:09.170815: step: 414/459, loss: 0.33122098445892334 2023-01-22 14:29:09.791282: step: 416/459, loss: 1.0713053941726685 2023-01-22 14:29:10.380193: step: 418/459, loss: 2.76092529296875 2023-01-22 14:29:11.020583: step: 420/459, loss: 2.8861141204833984 2023-01-22 14:29:11.639785: step: 422/459, loss: 2.2925820350646973 2023-01-22 14:29:12.308762: step: 424/459, loss: 0.9439370632171631 2023-01-22 14:29:12.911011: step: 426/459, loss: 0.6497427225112915 2023-01-22 14:29:13.604032: step: 428/459, loss: 0.8317243456840515 2023-01-22 14:29:14.235460: step: 430/459, loss: 0.8799197673797607 2023-01-22 14:29:14.977327: step: 432/459, loss: 0.6714870929718018 2023-01-22 14:29:15.595903: step: 434/459, loss: 0.6122024059295654 2023-01-22 14:29:16.274679: step: 436/459, loss: 0.4190187454223633 2023-01-22 14:29:16.917207: step: 438/459, loss: 0.6707789301872253 2023-01-22 14:29:17.531193: step: 440/459, loss: 1.1117956638336182 2023-01-22 14:29:18.116515: step: 442/459, loss: 0.5173147916793823 2023-01-22 14:29:18.741887: step: 444/459, loss: 0.21573525667190552 2023-01-22 14:29:19.344950: step: 446/459, loss: 1.2909857034683228 2023-01-22 14:29:19.969859: step: 448/459, loss: 0.4435303509235382 2023-01-22 14:29:20.728728: step: 450/459, loss: 1.2235996723175049 2023-01-22 14:29:21.412035: step: 452/459, loss: 0.4172327518463135 2023-01-22 14:29:21.999407: step: 454/459, loss: 0.2887446880340576 2023-01-22 14:29:22.655741: step: 456/459, loss: 0.827552080154419 2023-01-22 14:29:23.325744: step: 458/459, loss: 0.4326735734939575 2023-01-22 14:29:23.963192: step: 460/459, loss: 0.79527747631073 2023-01-22 14:29:24.581562: step: 462/459, loss: 0.38880953192710876 2023-01-22 14:29:25.212511: step: 464/459, loss: 0.9219633340835571 2023-01-22 14:29:25.829755: step: 466/459, loss: 1.100590467453003 2023-01-22 14:29:26.385568: step: 468/459, loss: 0.5012067556381226 2023-01-22 14:29:26.983380: step: 470/459, loss: 0.8936017751693726 2023-01-22 14:29:27.583995: step: 472/459, loss: 1.2698901891708374 2023-01-22 14:29:28.235802: step: 474/459, loss: 3.7319135665893555 2023-01-22 14:29:28.853404: step: 476/459, loss: 0.9244711399078369 2023-01-22 14:29:29.451624: step: 478/459, loss: 1.2516469955444336 2023-01-22 14:29:30.125564: step: 480/459, loss: 1.4046916961669922 2023-01-22 14:29:30.741708: step: 482/459, loss: 1.014866828918457 2023-01-22 14:29:31.384058: step: 484/459, loss: 1.392783522605896 2023-01-22 14:29:31.953557: step: 486/459, loss: 0.48208779096603394 2023-01-22 14:29:32.613294: step: 488/459, loss: 0.5803625583648682 2023-01-22 14:29:33.157867: step: 490/459, loss: 0.32152363657951355 2023-01-22 14:29:33.836362: step: 492/459, loss: 3.2945051193237305 2023-01-22 14:29:34.431868: step: 494/459, loss: 0.6090602874755859 2023-01-22 14:29:35.074289: step: 496/459, loss: 0.32732954621315 2023-01-22 14:29:35.739670: step: 498/459, loss: 0.5089225172996521 2023-01-22 14:29:36.381723: step: 500/459, loss: 0.42297905683517456 2023-01-22 14:29:37.047781: step: 502/459, loss: 5.101817607879639 2023-01-22 14:29:37.664257: step: 504/459, loss: 0.44977083802223206 2023-01-22 14:29:38.301086: step: 506/459, loss: 1.677537202835083 2023-01-22 14:29:38.877056: step: 508/459, loss: 1.5656334161758423 2023-01-22 14:29:39.503255: step: 510/459, loss: 0.7007424235343933 2023-01-22 14:29:40.154454: step: 512/459, loss: 1.1630393266677856 2023-01-22 14:29:40.774173: step: 514/459, loss: 0.5237444639205933 2023-01-22 14:29:41.385892: step: 516/459, loss: 0.5379632711410522 2023-01-22 14:29:42.039353: step: 518/459, loss: 1.1174237728118896 2023-01-22 14:29:42.582679: step: 520/459, loss: 1.2595041990280151 2023-01-22 14:29:43.194005: step: 522/459, loss: 0.8427435755729675 2023-01-22 14:29:43.963250: step: 524/459, loss: 0.7434089183807373 2023-01-22 14:29:44.611423: step: 526/459, loss: 1.0243136882781982 2023-01-22 14:29:45.230444: step: 528/459, loss: 1.1501965522766113 2023-01-22 14:29:45.838137: step: 530/459, loss: 0.5120384097099304 2023-01-22 14:29:46.371355: step: 532/459, loss: 0.4203619658946991 2023-01-22 14:29:47.026461: step: 534/459, loss: 0.345784455537796 2023-01-22 14:29:47.701642: step: 536/459, loss: 11.193289756774902 2023-01-22 14:29:48.353351: step: 538/459, loss: 0.3349110782146454 2023-01-22 14:29:48.898547: step: 540/459, loss: 0.6423145532608032 2023-01-22 14:29:49.546751: step: 542/459, loss: 2.0063259601593018 2023-01-22 14:29:50.210527: step: 544/459, loss: 1.064115047454834 2023-01-22 14:29:50.876307: step: 546/459, loss: 0.89347904920578 2023-01-22 14:29:51.581879: step: 548/459, loss: 1.6640443801879883 2023-01-22 14:29:52.204170: step: 550/459, loss: 0.7500909566879272 2023-01-22 14:29:52.852904: step: 552/459, loss: 4.220468044281006 2023-01-22 14:29:53.461708: step: 554/459, loss: 1.1537108421325684 2023-01-22 14:29:54.114036: step: 556/459, loss: 0.8371274471282959 2023-01-22 14:29:54.743925: step: 558/459, loss: 0.5862605571746826 2023-01-22 14:29:55.422577: step: 560/459, loss: 0.8800095319747925 2023-01-22 14:29:56.039344: step: 562/459, loss: 0.8730455636978149 2023-01-22 14:29:56.621240: step: 564/459, loss: 0.4777841567993164 2023-01-22 14:29:57.291985: step: 566/459, loss: 0.8858281970024109 2023-01-22 14:29:57.879385: step: 568/459, loss: 0.5045402646064758 2023-01-22 14:29:58.419682: step: 570/459, loss: 0.6838365197181702 2023-01-22 14:29:58.999807: step: 572/459, loss: 0.7800625562667847 2023-01-22 14:29:59.574811: step: 574/459, loss: 0.8030989170074463 2023-01-22 14:30:00.227684: step: 576/459, loss: 0.5203374028205872 2023-01-22 14:30:01.018688: step: 578/459, loss: 0.5483607649803162 2023-01-22 14:30:01.671253: step: 580/459, loss: 1.17616868019104 2023-01-22 14:30:02.341384: step: 582/459, loss: 0.7123319506645203 2023-01-22 14:30:02.994858: step: 584/459, loss: 1.290265440940857 2023-01-22 14:30:03.649107: step: 586/459, loss: 1.2675426006317139 2023-01-22 14:30:04.262718: step: 588/459, loss: 0.8798182010650635 2023-01-22 14:30:04.925364: step: 590/459, loss: 0.16587430238723755 2023-01-22 14:30:05.531229: step: 592/459, loss: 0.605862021446228 2023-01-22 14:30:06.132423: step: 594/459, loss: 1.2142670154571533 2023-01-22 14:30:06.724331: step: 596/459, loss: 0.40700477361679077 2023-01-22 14:30:07.314445: step: 598/459, loss: 0.693231463432312 2023-01-22 14:30:08.009523: step: 600/459, loss: 1.136051893234253 2023-01-22 14:30:08.627620: step: 602/459, loss: 1.6396827697753906 2023-01-22 14:30:09.297855: step: 604/459, loss: 0.4998682737350464 2023-01-22 14:30:09.904833: step: 606/459, loss: 0.18946358561515808 2023-01-22 14:30:10.558422: step: 608/459, loss: 1.1771668195724487 2023-01-22 14:30:11.219299: step: 610/459, loss: 0.7430922389030457 2023-01-22 14:30:11.868387: step: 612/459, loss: 0.7828319072723389 2023-01-22 14:30:12.474624: step: 614/459, loss: 0.7191724181175232 2023-01-22 14:30:13.063288: step: 616/459, loss: 0.2628850042819977 2023-01-22 14:30:13.754044: step: 618/459, loss: 0.7330833077430725 2023-01-22 14:30:14.286019: step: 620/459, loss: 0.5027115345001221 2023-01-22 14:30:14.837105: step: 622/459, loss: 3.7073373794555664 2023-01-22 14:30:15.464757: step: 624/459, loss: 1.1311981678009033 2023-01-22 14:30:16.090441: step: 626/459, loss: 0.9985918998718262 2023-01-22 14:30:16.670301: step: 628/459, loss: 1.645505428314209 2023-01-22 14:30:17.268942: step: 630/459, loss: 0.5768250823020935 2023-01-22 14:30:17.960276: step: 632/459, loss: 2.751464605331421 2023-01-22 14:30:18.599494: step: 634/459, loss: 0.27402958273887634 2023-01-22 14:30:19.293327: step: 636/459, loss: 0.8997859954833984 2023-01-22 14:30:19.989121: step: 638/459, loss: 1.087684988975525 2023-01-22 14:30:20.679255: step: 640/459, loss: 1.561093807220459 2023-01-22 14:30:21.327282: step: 642/459, loss: 0.39576613903045654 2023-01-22 14:30:21.834631: step: 644/459, loss: 0.29172930121421814 2023-01-22 14:30:22.412667: step: 646/459, loss: 1.1688823699951172 2023-01-22 14:30:23.046917: step: 648/459, loss: 0.39291155338287354 2023-01-22 14:30:23.721114: step: 650/459, loss: 0.5683266520500183 2023-01-22 14:30:24.300765: step: 652/459, loss: 0.7318969368934631 2023-01-22 14:30:24.965496: step: 654/459, loss: 0.46837353706359863 2023-01-22 14:30:25.589716: step: 656/459, loss: 0.6668569445610046 2023-01-22 14:30:26.197939: step: 658/459, loss: 0.8007469177246094 2023-01-22 14:30:26.792260: step: 660/459, loss: 0.38554900884628296 2023-01-22 14:30:27.395244: step: 662/459, loss: 4.97885799407959 2023-01-22 14:30:28.016652: step: 664/459, loss: 1.474827527999878 2023-01-22 14:30:28.593661: step: 666/459, loss: 0.34090614318847656 2023-01-22 14:30:29.237760: step: 668/459, loss: 2.0148725509643555 2023-01-22 14:30:29.907950: step: 670/459, loss: 0.8113162517547607 2023-01-22 14:30:30.581037: step: 672/459, loss: 5.088605880737305 2023-01-22 14:30:31.173731: step: 674/459, loss: 3.8674798011779785 2023-01-22 14:30:31.825549: step: 676/459, loss: 0.74178147315979 2023-01-22 14:30:32.513629: step: 678/459, loss: 1.153260588645935 2023-01-22 14:30:33.173716: step: 680/459, loss: 0.28147363662719727 2023-01-22 14:30:33.785752: step: 682/459, loss: 0.6873048543930054 2023-01-22 14:30:34.426274: step: 684/459, loss: 1.6336290836334229 2023-01-22 14:30:35.056355: step: 686/459, loss: 1.5862452983856201 2023-01-22 14:30:35.802293: step: 688/459, loss: 2.2473535537719727 2023-01-22 14:30:36.375186: step: 690/459, loss: 2.154212236404419 2023-01-22 14:30:36.976498: step: 692/459, loss: 1.7607557773590088 2023-01-22 14:30:37.709589: step: 694/459, loss: 0.9487708210945129 2023-01-22 14:30:38.313986: step: 696/459, loss: 3.084261894226074 2023-01-22 14:30:38.965668: step: 698/459, loss: 4.230199337005615 2023-01-22 14:30:39.553743: step: 700/459, loss: 5.857696056365967 2023-01-22 14:30:40.128422: step: 702/459, loss: 1.9567030668258667 2023-01-22 14:30:40.715441: step: 704/459, loss: 2.3528270721435547 2023-01-22 14:30:41.387493: step: 706/459, loss: 0.8562732338905334 2023-01-22 14:30:41.930018: step: 708/459, loss: 0.859169065952301 2023-01-22 14:30:42.510498: step: 710/459, loss: 0.7013393640518188 2023-01-22 14:30:43.187807: step: 712/459, loss: 0.4600679874420166 2023-01-22 14:30:43.799793: step: 714/459, loss: 2.1165287494659424 2023-01-22 14:30:44.410543: step: 716/459, loss: 1.634990930557251 2023-01-22 14:30:45.039896: step: 718/459, loss: 0.412847638130188 2023-01-22 14:30:45.626418: step: 720/459, loss: 0.5293720364570618 2023-01-22 14:30:46.334968: step: 722/459, loss: 1.2182053327560425 2023-01-22 14:30:46.916334: step: 724/459, loss: 2.5339059829711914 2023-01-22 14:30:47.555943: step: 726/459, loss: 0.45814329385757446 2023-01-22 14:30:48.091773: step: 728/459, loss: 0.4320974349975586 2023-01-22 14:30:48.691254: step: 730/459, loss: 0.6993181109428406 2023-01-22 14:30:49.343563: step: 732/459, loss: 0.4416637420654297 2023-01-22 14:30:49.986370: step: 734/459, loss: 0.2580925524234772 2023-01-22 14:30:50.638287: step: 736/459, loss: 0.6715008020401001 2023-01-22 14:30:51.334676: step: 738/459, loss: 0.5585587620735168 2023-01-22 14:30:51.941511: step: 740/459, loss: 0.22251538932323456 2023-01-22 14:30:52.516240: step: 742/459, loss: 1.1320841312408447 2023-01-22 14:30:53.141862: step: 744/459, loss: 0.6453964114189148 2023-01-22 14:30:53.720459: step: 746/459, loss: 0.25955086946487427 2023-01-22 14:30:54.438452: step: 748/459, loss: 0.5357388854026794 2023-01-22 14:30:55.020000: step: 750/459, loss: 5.490289688110352 2023-01-22 14:30:55.647430: step: 752/459, loss: 1.7644929885864258 2023-01-22 14:30:56.291218: step: 754/459, loss: 1.112346887588501 2023-01-22 14:30:56.930743: step: 756/459, loss: 0.4189261794090271 2023-01-22 14:30:57.582999: step: 758/459, loss: 2.9105019569396973 2023-01-22 14:30:58.228354: step: 760/459, loss: 0.6759624481201172 2023-01-22 14:30:58.832462: step: 762/459, loss: 0.502894401550293 2023-01-22 14:30:59.433530: step: 764/459, loss: 1.525104284286499 2023-01-22 14:31:00.077808: step: 766/459, loss: 0.20574504137039185 2023-01-22 14:31:00.687211: step: 768/459, loss: 0.23970338702201843 2023-01-22 14:31:01.247182: step: 770/459, loss: 0.7958066463470459 2023-01-22 14:31:01.861424: step: 772/459, loss: 1.5479021072387695 2023-01-22 14:31:02.530023: step: 774/459, loss: 0.3686779737472534 2023-01-22 14:31:03.125062: step: 776/459, loss: 1.0622365474700928 2023-01-22 14:31:03.829336: step: 778/459, loss: 2.521888017654419 2023-01-22 14:31:04.420011: step: 780/459, loss: 0.6727300882339478 2023-01-22 14:31:05.021566: step: 782/459, loss: 0.7123700380325317 2023-01-22 14:31:05.576231: step: 784/459, loss: 1.3215794563293457 2023-01-22 14:31:06.212815: step: 786/459, loss: 1.4925076961517334 2023-01-22 14:31:06.822170: step: 788/459, loss: 0.47037839889526367 2023-01-22 14:31:07.390677: step: 790/459, loss: 0.2651430666446686 2023-01-22 14:31:08.063395: step: 792/459, loss: 0.9964885711669922 2023-01-22 14:31:08.661418: step: 794/459, loss: 1.1707725524902344 2023-01-22 14:31:09.272060: step: 796/459, loss: 1.8745872974395752 2023-01-22 14:31:09.898495: step: 798/459, loss: 0.5276437997817993 2023-01-22 14:31:10.532508: step: 800/459, loss: 0.6929957270622253 2023-01-22 14:31:11.126874: step: 802/459, loss: 0.5584697723388672 2023-01-22 14:31:11.734591: step: 804/459, loss: 0.7662319540977478 2023-01-22 14:31:12.342257: step: 806/459, loss: 0.8934573531150818 2023-01-22 14:31:12.957669: step: 808/459, loss: 0.4222351014614105 2023-01-22 14:31:13.626744: step: 810/459, loss: 0.750187873840332 2023-01-22 14:31:14.254278: step: 812/459, loss: 0.5048958659172058 2023-01-22 14:31:14.884062: step: 814/459, loss: 1.282601237297058 2023-01-22 14:31:15.518164: step: 816/459, loss: 0.925735592842102 2023-01-22 14:31:16.214450: step: 818/459, loss: 0.8852437138557434 2023-01-22 14:31:16.852110: step: 820/459, loss: 0.39841949939727783 2023-01-22 14:31:17.483027: step: 822/459, loss: 1.1984930038452148 2023-01-22 14:31:18.022101: step: 824/459, loss: 1.1208444833755493 2023-01-22 14:31:18.626474: step: 826/459, loss: 0.5696044564247131 2023-01-22 14:31:19.264601: step: 828/459, loss: 0.4647400975227356 2023-01-22 14:31:19.908979: step: 830/459, loss: 0.810476541519165 2023-01-22 14:31:20.518623: step: 832/459, loss: 0.7775711417198181 2023-01-22 14:31:21.109775: step: 834/459, loss: 0.4361999034881592 2023-01-22 14:31:21.853787: step: 836/459, loss: 2.596975326538086 2023-01-22 14:31:22.442398: step: 838/459, loss: 0.6960049867630005 2023-01-22 14:31:23.076668: step: 840/459, loss: 1.208038330078125 2023-01-22 14:31:23.701298: step: 842/459, loss: 0.3478604555130005 2023-01-22 14:31:24.392376: step: 844/459, loss: 0.3569980263710022 2023-01-22 14:31:24.976605: step: 846/459, loss: 0.38117900490760803 2023-01-22 14:31:25.622665: step: 848/459, loss: 1.3601258993148804 2023-01-22 14:31:26.196112: step: 850/459, loss: 0.7515277862548828 2023-01-22 14:31:26.868405: step: 852/459, loss: 0.39792558550834656 2023-01-22 14:31:27.510714: step: 854/459, loss: 1.0336499214172363 2023-01-22 14:31:28.067598: step: 856/459, loss: 0.36521682143211365 2023-01-22 14:31:28.661565: step: 858/459, loss: 0.4468002915382385 2023-01-22 14:31:29.269864: step: 860/459, loss: 0.8416862487792969 2023-01-22 14:31:29.853030: step: 862/459, loss: 1.8639121055603027 2023-01-22 14:31:30.474389: step: 864/459, loss: 0.8860180974006653 2023-01-22 14:31:31.102819: step: 866/459, loss: 0.5348467230796814 2023-01-22 14:31:31.768340: step: 868/459, loss: 0.9545867443084717 2023-01-22 14:31:32.374147: step: 870/459, loss: 1.3659945726394653 2023-01-22 14:31:33.000561: step: 872/459, loss: 0.28335490822792053 2023-01-22 14:31:33.623393: step: 874/459, loss: 1.6344313621520996 2023-01-22 14:31:34.275453: step: 876/459, loss: 0.23180833458900452 2023-01-22 14:31:34.816141: step: 878/459, loss: 0.4921947717666626 2023-01-22 14:31:35.405957: step: 880/459, loss: 0.6251797676086426 2023-01-22 14:31:36.045902: step: 882/459, loss: 0.7172222137451172 2023-01-22 14:31:36.681481: step: 884/459, loss: 0.5685659646987915 2023-01-22 14:31:37.309582: step: 886/459, loss: 0.9820916652679443 2023-01-22 14:31:37.958328: step: 888/459, loss: 1.535813808441162 2023-01-22 14:31:38.561611: step: 890/459, loss: 0.7126172184944153 2023-01-22 14:31:39.219655: step: 892/459, loss: 0.9117324948310852 2023-01-22 14:31:39.804547: step: 894/459, loss: 1.4162225723266602 2023-01-22 14:31:40.372473: step: 896/459, loss: 0.40261000394821167 2023-01-22 14:31:41.012200: step: 898/459, loss: 2.2458577156066895 2023-01-22 14:31:41.624515: step: 900/459, loss: 1.7899366617202759 2023-01-22 14:31:42.190212: step: 902/459, loss: 2.3875515460968018 2023-01-22 14:31:42.823067: step: 904/459, loss: 1.4735095500946045 2023-01-22 14:31:43.425248: step: 906/459, loss: 0.9546256065368652 2023-01-22 14:31:44.079929: step: 908/459, loss: 0.21665766835212708 2023-01-22 14:31:44.735535: step: 910/459, loss: 0.3044429123401642 2023-01-22 14:31:45.362145: step: 912/459, loss: 0.9181021451950073 2023-01-22 14:31:45.973590: step: 914/459, loss: 0.9925346374511719 2023-01-22 14:31:46.596889: step: 916/459, loss: 0.7003886699676514 2023-01-22 14:31:47.229444: step: 918/459, loss: 2.278181314468384 2023-01-22 14:31:47.691857: step: 920/459, loss: 0.07973415404558182 ================================================== Loss: 1.356 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.24750702247191012, 'r': 0.2082053402646503, 'f1': 0.2261614476386037}, 'combined': 0.1666452772073922, 'epoch': 0} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33516084479364583, 'r': 0.23601162936467257, 'f1': 0.276980634249062}, 'combined': 0.17726760591939966, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.24679951690821256, 'r': 0.20994287827730748, 'f1': 0.2268841319891637}, 'combined': 0.16717778146569956, 'epoch': 0} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3450369955294604, 'r': 0.24233995874755201, 'f1': 0.2847106978036272}, 'combined': 0.18221484659432136, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.24254956684040008, 'r': 0.20999565522287944, 'f1': 0.2251017256593784}, 'combined': 0.16586442943322618, 'epoch': 0} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3422416899860493, 'r': 0.24099777806640132, 'f1': 0.2828323231407607}, 'combined': 0.20278543923299824, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19135802469135801, 'r': 0.2952380952380952, 'f1': 0.23220973782771537}, 'combined': 0.15480649188514356, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.21739130434782608, 'f1': 0.2564102564102564}, 'combined': 0.1282051282051282, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.125, 'r': 0.034482758620689655, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.24750702247191012, 'r': 0.2082053402646503, 'f1': 0.2261614476386037}, 'combined': 0.1666452772073922, 'epoch': 0} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33516084479364583, 'r': 0.23601162936467257, 'f1': 0.276980634249062}, 'combined': 0.17726760591939966, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19135802469135801, 'r': 0.2952380952380952, 'f1': 0.23220973782771537}, 'combined': 0.15480649188514356, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.24679951690821256, 'r': 0.20994287827730748, 'f1': 0.2268841319891637}, 'combined': 0.16717778146569956, 'epoch': 0} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3450369955294604, 'r': 0.24233995874755201, 'f1': 0.2847106978036272}, 'combined': 0.18221484659432136, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.21739130434782608, 'f1': 0.2564102564102564}, 'combined': 0.1282051282051282, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.24254956684040008, 'r': 0.20999565522287944, 'f1': 0.2251017256593784}, 'combined': 0.16586442943322618, 'epoch': 0} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3422416899860493, 'r': 0.24099777806640132, 'f1': 0.2828323231407607}, 'combined': 0.20278543923299824, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.125, 'r': 0.034482758620689655, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:34:46.234232: step: 2/459, loss: 0.6210440397262573 2023-01-22 14:34:46.834644: step: 4/459, loss: 0.5699330568313599 2023-01-22 14:34:47.471245: step: 6/459, loss: 0.5475696325302124 2023-01-22 14:34:48.069260: step: 8/459, loss: 1.1470813751220703 2023-01-22 14:34:48.721981: step: 10/459, loss: 0.23819734156131744 2023-01-22 14:34:49.407579: step: 12/459, loss: 0.5859457850456238 2023-01-22 14:34:50.013364: step: 14/459, loss: 0.5053662657737732 2023-01-22 14:34:50.652905: step: 16/459, loss: 1.4114676713943481 2023-01-22 14:34:51.259303: step: 18/459, loss: 0.4065549969673157 2023-01-22 14:34:51.858485: step: 20/459, loss: 0.8497780561447144 2023-01-22 14:34:52.508624: step: 22/459, loss: 0.17562460899353027 2023-01-22 14:34:53.175330: step: 24/459, loss: 1.8728657960891724 2023-01-22 14:34:53.771826: step: 26/459, loss: 0.38020166754722595 2023-01-22 14:34:54.445137: step: 28/459, loss: 2.0714521408081055 2023-01-22 14:34:55.036070: step: 30/459, loss: 0.616027295589447 2023-01-22 14:34:55.657385: step: 32/459, loss: 0.44751089811325073 2023-01-22 14:34:56.286772: step: 34/459, loss: 0.2712917625904083 2023-01-22 14:34:56.931303: step: 36/459, loss: 1.1258692741394043 2023-01-22 14:34:57.498971: step: 38/459, loss: 0.7491227388381958 2023-01-22 14:34:58.106201: step: 40/459, loss: 2.055781126022339 2023-01-22 14:34:58.746855: step: 42/459, loss: 1.0261337757110596 2023-01-22 14:34:59.329012: step: 44/459, loss: 0.7928591370582581 2023-01-22 14:35:00.012112: step: 46/459, loss: 2.394397258758545 2023-01-22 14:35:00.618123: step: 48/459, loss: 2.756887912750244 2023-01-22 14:35:01.281808: step: 50/459, loss: 3.0915608406066895 2023-01-22 14:35:01.989837: step: 52/459, loss: 2.875868797302246 2023-01-22 14:35:02.593814: step: 54/459, loss: 0.7422020435333252 2023-01-22 14:35:03.207519: step: 56/459, loss: 0.27434292435646057 2023-01-22 14:35:03.789335: step: 58/459, loss: 0.8765119314193726 2023-01-22 14:35:04.394236: step: 60/459, loss: 1.1655511856079102 2023-01-22 14:35:05.007334: step: 62/459, loss: 1.3832309246063232 2023-01-22 14:35:05.609084: step: 64/459, loss: 0.5682365298271179 2023-01-22 14:35:06.204213: step: 66/459, loss: 0.4649835228919983 2023-01-22 14:35:06.814278: step: 68/459, loss: 0.4321804642677307 2023-01-22 14:35:07.494035: step: 70/459, loss: 0.8209225535392761 2023-01-22 14:35:08.045808: step: 72/459, loss: 0.15063685178756714 2023-01-22 14:35:08.717414: step: 74/459, loss: 1.5287243127822876 2023-01-22 14:35:09.296564: step: 76/459, loss: 0.37549519538879395 2023-01-22 14:35:09.928907: step: 78/459, loss: 0.6200800538063049 2023-01-22 14:35:10.646065: step: 80/459, loss: 0.641595721244812 2023-01-22 14:35:11.312142: step: 82/459, loss: 2.2695717811584473 2023-01-22 14:35:11.946386: step: 84/459, loss: 1.4880621433258057 2023-01-22 14:35:12.608054: step: 86/459, loss: 0.3161192238330841 2023-01-22 14:35:13.188531: step: 88/459, loss: 0.368236243724823 2023-01-22 14:35:13.780198: step: 90/459, loss: 1.7576806545257568 2023-01-22 14:35:14.417980: step: 92/459, loss: 1.466010570526123 2023-01-22 14:35:15.028009: step: 94/459, loss: 2.0973353385925293 2023-01-22 14:35:15.635245: step: 96/459, loss: 0.9277809262275696 2023-01-22 14:35:16.202456: step: 98/459, loss: 0.2942020297050476 2023-01-22 14:35:16.814338: step: 100/459, loss: 0.2256079912185669 2023-01-22 14:35:17.451119: step: 102/459, loss: 0.4162184000015259 2023-01-22 14:35:17.985673: step: 104/459, loss: 1.3743622303009033 2023-01-22 14:35:18.579005: step: 106/459, loss: 1.2959697246551514 2023-01-22 14:35:19.225921: step: 108/459, loss: 1.2903311252593994 2023-01-22 14:35:19.916815: step: 110/459, loss: 0.5832201242446899 2023-01-22 14:35:20.506803: step: 112/459, loss: 1.1088731288909912 2023-01-22 14:35:21.190405: step: 114/459, loss: 2.407776355743408 2023-01-22 14:35:21.822006: step: 116/459, loss: 2.4941775798797607 2023-01-22 14:35:22.448396: step: 118/459, loss: 0.48635199666023254 2023-01-22 14:35:23.001604: step: 120/459, loss: 0.49088653922080994 2023-01-22 14:35:23.655914: step: 122/459, loss: 0.1768179088830948 2023-01-22 14:35:24.271747: step: 124/459, loss: 10.91745376586914 2023-01-22 14:35:24.927430: step: 126/459, loss: 0.5103351473808289 2023-01-22 14:35:25.576528: step: 128/459, loss: 0.5546374320983887 2023-01-22 14:35:26.154351: step: 130/459, loss: 0.49660712480545044 2023-01-22 14:35:26.772390: step: 132/459, loss: 0.5985330939292908 2023-01-22 14:35:27.416487: step: 134/459, loss: 0.9546290040016174 2023-01-22 14:35:28.009526: step: 136/459, loss: 2.711028814315796 2023-01-22 14:35:28.632891: step: 138/459, loss: 0.6540079712867737 2023-01-22 14:35:29.280259: step: 140/459, loss: 3.849931240081787 2023-01-22 14:35:29.918325: step: 142/459, loss: 0.37978672981262207 2023-01-22 14:35:30.534684: step: 144/459, loss: 0.8701390027999878 2023-01-22 14:35:31.157079: step: 146/459, loss: 0.8039233088493347 2023-01-22 14:35:31.774506: step: 148/459, loss: 0.7028909921646118 2023-01-22 14:35:32.377211: step: 150/459, loss: 2.9558632373809814 2023-01-22 14:35:32.993371: step: 152/459, loss: 1.238844871520996 2023-01-22 14:35:33.634617: step: 154/459, loss: 0.5561333894729614 2023-01-22 14:35:34.244454: step: 156/459, loss: 1.7532535791397095 2023-01-22 14:35:34.898276: step: 158/459, loss: 0.5932532548904419 2023-01-22 14:35:35.531291: step: 160/459, loss: 0.3572555184364319 2023-01-22 14:35:36.205303: step: 162/459, loss: 1.4459691047668457 2023-01-22 14:35:36.830752: step: 164/459, loss: 2.2246108055114746 2023-01-22 14:35:37.591907: step: 166/459, loss: 0.3890683352947235 2023-01-22 14:35:38.264981: step: 168/459, loss: 0.8203005790710449 2023-01-22 14:35:38.861065: step: 170/459, loss: 0.32339948415756226 2023-01-22 14:35:39.489269: step: 172/459, loss: 0.9114584922790527 2023-01-22 14:35:40.109716: step: 174/459, loss: 0.3161958158016205 2023-01-22 14:35:40.806330: step: 176/459, loss: 2.490220069885254 2023-01-22 14:35:41.452460: step: 178/459, loss: 0.301972895860672 2023-01-22 14:35:42.014977: step: 180/459, loss: 0.2602306604385376 2023-01-22 14:35:42.668936: step: 182/459, loss: 0.6332975029945374 2023-01-22 14:35:43.319943: step: 184/459, loss: 1.2572662830352783 2023-01-22 14:35:43.970544: step: 186/459, loss: 0.8329378962516785 2023-01-22 14:35:44.598480: step: 188/459, loss: 0.42068934440612793 2023-01-22 14:35:45.210723: step: 190/459, loss: 1.7799770832061768 2023-01-22 14:35:45.893318: step: 192/459, loss: 0.45970237255096436 2023-01-22 14:35:46.512754: step: 194/459, loss: 0.2408919334411621 2023-01-22 14:35:47.142638: step: 196/459, loss: 0.3187008798122406 2023-01-22 14:35:47.726118: step: 198/459, loss: 0.31403812766075134 2023-01-22 14:35:48.348844: step: 200/459, loss: 1.022056221961975 2023-01-22 14:35:48.949627: step: 202/459, loss: 0.4534759223461151 2023-01-22 14:35:49.620458: step: 204/459, loss: 0.910302996635437 2023-01-22 14:35:50.190099: step: 206/459, loss: 1.0879690647125244 2023-01-22 14:35:50.799821: step: 208/459, loss: 2.8005881309509277 2023-01-22 14:35:51.453436: step: 210/459, loss: 0.6242811679840088 2023-01-22 14:35:52.074352: step: 212/459, loss: 0.821319043636322 2023-01-22 14:35:52.661475: step: 214/459, loss: 1.5232930183410645 2023-01-22 14:35:53.215955: step: 216/459, loss: 0.20606335997581482 2023-01-22 14:35:53.869084: step: 218/459, loss: 0.8023688197135925 2023-01-22 14:35:54.445995: step: 220/459, loss: 0.43096861243247986 2023-01-22 14:35:54.967658: step: 222/459, loss: 0.7024382948875427 2023-01-22 14:35:55.624687: step: 224/459, loss: 0.4340493083000183 2023-01-22 14:35:56.222264: step: 226/459, loss: 0.4297870099544525 2023-01-22 14:35:56.841606: step: 228/459, loss: 0.9672773480415344 2023-01-22 14:35:57.393144: step: 230/459, loss: 0.5675604343414307 2023-01-22 14:35:58.036639: step: 232/459, loss: 0.9714511036872864 2023-01-22 14:35:58.674122: step: 234/459, loss: 0.5284965634346008 2023-01-22 14:35:59.278800: step: 236/459, loss: 3.211935520172119 2023-01-22 14:35:59.914593: step: 238/459, loss: 1.2681729793548584 2023-01-22 14:36:00.589563: step: 240/459, loss: 0.7016074061393738 2023-01-22 14:36:01.168384: step: 242/459, loss: 0.3551260530948639 2023-01-22 14:36:01.783825: step: 244/459, loss: 0.940794825553894 2023-01-22 14:36:02.373113: step: 246/459, loss: 2.101869583129883 2023-01-22 14:36:03.024258: step: 248/459, loss: 1.7561874389648438 2023-01-22 14:36:03.603718: step: 250/459, loss: 1.311143398284912 2023-01-22 14:36:04.195647: step: 252/459, loss: 4.729093551635742 2023-01-22 14:36:04.880563: step: 254/459, loss: 0.5276802778244019 2023-01-22 14:36:05.525989: step: 256/459, loss: 0.5056878328323364 2023-01-22 14:36:06.128992: step: 258/459, loss: 0.2159598022699356 2023-01-22 14:36:06.823600: step: 260/459, loss: 0.7619063258171082 2023-01-22 14:36:07.470480: step: 262/459, loss: 2.5425822734832764 2023-01-22 14:36:08.102867: step: 264/459, loss: 0.3100149929523468 2023-01-22 14:36:08.736236: step: 266/459, loss: 4.838332176208496 2023-01-22 14:36:09.330071: step: 268/459, loss: 0.7117918133735657 2023-01-22 14:36:09.923476: step: 270/459, loss: 0.6007698178291321 2023-01-22 14:36:10.541558: step: 272/459, loss: 1.6744433641433716 2023-01-22 14:36:11.167264: step: 274/459, loss: 0.39961859583854675 2023-01-22 14:36:11.724399: step: 276/459, loss: 0.2476656585931778 2023-01-22 14:36:12.333080: step: 278/459, loss: 0.4690190255641937 2023-01-22 14:36:12.940069: step: 280/459, loss: 0.2232533097267151 2023-01-22 14:36:13.492117: step: 282/459, loss: 0.7218128442764282 2023-01-22 14:36:14.072490: step: 284/459, loss: 0.6944237947463989 2023-01-22 14:36:14.713795: step: 286/459, loss: 2.8529415130615234 2023-01-22 14:36:15.282724: step: 288/459, loss: 0.369789183139801 2023-01-22 14:36:15.828661: step: 290/459, loss: 1.6471076011657715 2023-01-22 14:36:16.503930: step: 292/459, loss: 4.787013530731201 2023-01-22 14:36:17.102532: step: 294/459, loss: 0.4733874201774597 2023-01-22 14:36:17.696515: step: 296/459, loss: 0.19529786705970764 2023-01-22 14:36:18.344497: step: 298/459, loss: 1.5220551490783691 2023-01-22 14:36:18.960318: step: 300/459, loss: 1.1952213048934937 2023-01-22 14:36:19.614327: step: 302/459, loss: 1.228694200515747 2023-01-22 14:36:20.257629: step: 304/459, loss: 0.7453956604003906 2023-01-22 14:36:20.868834: step: 306/459, loss: 0.8339964151382446 2023-01-22 14:36:21.571807: step: 308/459, loss: 1.1177968978881836 2023-01-22 14:36:22.139462: step: 310/459, loss: 1.5100589990615845 2023-01-22 14:36:22.750759: step: 312/459, loss: 2.016418695449829 2023-01-22 14:36:23.352951: step: 314/459, loss: 0.6957283616065979 2023-01-22 14:36:23.993253: step: 316/459, loss: 0.9033395648002625 2023-01-22 14:36:24.528154: step: 318/459, loss: 0.3943186402320862 2023-01-22 14:36:25.162795: step: 320/459, loss: 0.3649212121963501 2023-01-22 14:36:25.723813: step: 322/459, loss: 0.2846723794937134 2023-01-22 14:36:26.342571: step: 324/459, loss: 0.6572908759117126 2023-01-22 14:36:26.928816: step: 326/459, loss: 0.8092230558395386 2023-01-22 14:36:27.490781: step: 328/459, loss: 0.5767310261726379 2023-01-22 14:36:28.103630: step: 330/459, loss: 1.5972974300384521 2023-01-22 14:36:28.791659: step: 332/459, loss: 0.5593241453170776 2023-01-22 14:36:29.535041: step: 334/459, loss: 1.5114631652832031 2023-01-22 14:36:30.134255: step: 336/459, loss: 0.7734324932098389 2023-01-22 14:36:30.769662: step: 338/459, loss: 0.8851126432418823 2023-01-22 14:36:31.377652: step: 340/459, loss: 1.2570627927780151 2023-01-22 14:36:31.988453: step: 342/459, loss: 0.6122789978981018 2023-01-22 14:36:32.671537: step: 344/459, loss: 0.6748073101043701 2023-01-22 14:36:33.320050: step: 346/459, loss: 0.4652940630912781 2023-01-22 14:36:33.930939: step: 348/459, loss: 1.223505973815918 2023-01-22 14:36:34.559822: step: 350/459, loss: 0.37728211283683777 2023-01-22 14:36:35.197576: step: 352/459, loss: 0.7408112287521362 2023-01-22 14:36:35.894490: step: 354/459, loss: 1.0943304300308228 2023-01-22 14:36:36.541411: step: 356/459, loss: 0.835176408290863 2023-01-22 14:36:37.156700: step: 358/459, loss: 1.1295841932296753 2023-01-22 14:36:37.800388: step: 360/459, loss: 0.6471317410469055 2023-01-22 14:36:38.461922: step: 362/459, loss: 0.3221111595630646 2023-01-22 14:36:39.011276: step: 364/459, loss: 1.4091538190841675 2023-01-22 14:36:39.654602: step: 366/459, loss: 0.25252920389175415 2023-01-22 14:36:40.251082: step: 368/459, loss: 0.6204012632369995 2023-01-22 14:36:41.005666: step: 370/459, loss: 1.168938159942627 2023-01-22 14:36:41.614240: step: 372/459, loss: 0.15782688558101654 2023-01-22 14:36:42.244844: step: 374/459, loss: 0.4780386686325073 2023-01-22 14:36:42.930184: step: 376/459, loss: 0.2001846730709076 2023-01-22 14:36:43.518498: step: 378/459, loss: 5.280773639678955 2023-01-22 14:36:44.206944: step: 380/459, loss: 0.7607866525650024 2023-01-22 14:36:44.795266: step: 382/459, loss: 0.12779000401496887 2023-01-22 14:36:45.449005: step: 384/459, loss: 0.6203588843345642 2023-01-22 14:36:46.055241: step: 386/459, loss: 2.4696547985076904 2023-01-22 14:36:46.732653: step: 388/459, loss: 0.2555775046348572 2023-01-22 14:36:47.397766: step: 390/459, loss: 1.5313866138458252 2023-01-22 14:36:47.998417: step: 392/459, loss: 0.18499764800071716 2023-01-22 14:36:48.657300: step: 394/459, loss: 0.29981282353401184 2023-01-22 14:36:49.295109: step: 396/459, loss: 0.5054959058761597 2023-01-22 14:36:49.892924: step: 398/459, loss: 0.5036651492118835 2023-01-22 14:36:50.577922: step: 400/459, loss: 1.0170577764511108 2023-01-22 14:36:51.205496: step: 402/459, loss: 0.27279967069625854 2023-01-22 14:36:51.808025: step: 404/459, loss: 1.9614510536193848 2023-01-22 14:36:52.411470: step: 406/459, loss: 0.28013700246810913 2023-01-22 14:36:53.032975: step: 408/459, loss: 0.9013902544975281 2023-01-22 14:36:53.635970: step: 410/459, loss: 0.9473934173583984 2023-01-22 14:36:54.306510: step: 412/459, loss: 0.19332432746887207 2023-01-22 14:36:54.947493: step: 414/459, loss: 2.519922971725464 2023-01-22 14:36:55.590437: step: 416/459, loss: 1.4409193992614746 2023-01-22 14:36:56.294881: step: 418/459, loss: 0.966354489326477 2023-01-22 14:36:56.897301: step: 420/459, loss: 0.50836181640625 2023-01-22 14:36:57.487828: step: 422/459, loss: 0.5332148671150208 2023-01-22 14:36:58.160279: step: 424/459, loss: 0.39478635787963867 2023-01-22 14:36:58.886394: step: 426/459, loss: 0.7689067721366882 2023-01-22 14:36:59.434387: step: 428/459, loss: 1.0690267086029053 2023-01-22 14:37:00.013515: step: 430/459, loss: 0.9418800473213196 2023-01-22 14:37:00.597018: step: 432/459, loss: 0.17155398428440094 2023-01-22 14:37:01.231444: step: 434/459, loss: 1.2410879135131836 2023-01-22 14:37:01.867258: step: 436/459, loss: 0.23153522610664368 2023-01-22 14:37:02.476692: step: 438/459, loss: 1.1248855590820312 2023-01-22 14:37:03.087350: step: 440/459, loss: 0.11789719760417938 2023-01-22 14:37:03.708649: step: 442/459, loss: 0.604404091835022 2023-01-22 14:37:04.298650: step: 444/459, loss: 0.6948612928390503 2023-01-22 14:37:04.917967: step: 446/459, loss: 0.4282061755657196 2023-01-22 14:37:05.546127: step: 448/459, loss: 0.484658807516098 2023-01-22 14:37:06.266752: step: 450/459, loss: 0.407156378030777 2023-01-22 14:37:06.917281: step: 452/459, loss: 5.258767604827881 2023-01-22 14:37:07.558326: step: 454/459, loss: 2.0388429164886475 2023-01-22 14:37:08.162470: step: 456/459, loss: 1.5582484006881714 2023-01-22 14:37:08.767160: step: 458/459, loss: 1.2634551525115967 2023-01-22 14:37:09.428316: step: 460/459, loss: 0.48627564311027527 2023-01-22 14:37:09.993906: step: 462/459, loss: 0.4543045163154602 2023-01-22 14:37:10.657855: step: 464/459, loss: 0.28704574704170227 2023-01-22 14:37:11.280226: step: 466/459, loss: 0.9774456024169922 2023-01-22 14:37:11.856257: step: 468/459, loss: 0.4134821891784668 2023-01-22 14:37:12.478477: step: 470/459, loss: 0.7662481069564819 2023-01-22 14:37:13.057209: step: 472/459, loss: 0.512565016746521 2023-01-22 14:37:13.721942: step: 474/459, loss: 0.5604459047317505 2023-01-22 14:37:14.283737: step: 476/459, loss: 1.6972349882125854 2023-01-22 14:37:14.933339: step: 478/459, loss: 0.8478610515594482 2023-01-22 14:37:15.518122: step: 480/459, loss: 1.1393707990646362 2023-01-22 14:37:16.158513: step: 482/459, loss: 0.23778745532035828 2023-01-22 14:37:16.812842: step: 484/459, loss: 1.5353100299835205 2023-01-22 14:37:17.415629: step: 486/459, loss: 0.9259750247001648 2023-01-22 14:37:18.126551: step: 488/459, loss: 1.1104886531829834 2023-01-22 14:37:18.795859: step: 490/459, loss: 1.8982880115509033 2023-01-22 14:37:19.471070: step: 492/459, loss: 2.4728775024414062 2023-01-22 14:37:20.093246: step: 494/459, loss: 0.8539971709251404 2023-01-22 14:37:20.692562: step: 496/459, loss: 1.374588966369629 2023-01-22 14:37:21.312699: step: 498/459, loss: 0.30086207389831543 2023-01-22 14:37:21.898198: step: 500/459, loss: 0.4882652461528778 2023-01-22 14:37:22.515122: step: 502/459, loss: 0.6550005078315735 2023-01-22 14:37:23.098462: step: 504/459, loss: 0.19611908495426178 2023-01-22 14:37:23.715800: step: 506/459, loss: 0.27398452162742615 2023-01-22 14:37:24.313491: step: 508/459, loss: 0.31015661358833313 2023-01-22 14:37:24.913575: step: 510/459, loss: 0.9322705268859863 2023-01-22 14:37:25.549993: step: 512/459, loss: 2.527038097381592 2023-01-22 14:37:26.149065: step: 514/459, loss: 0.6325510144233704 2023-01-22 14:37:26.745166: step: 516/459, loss: 0.3072889447212219 2023-01-22 14:37:27.401968: step: 518/459, loss: 2.0679235458374023 2023-01-22 14:37:28.101113: step: 520/459, loss: 1.391613483428955 2023-01-22 14:37:28.760553: step: 522/459, loss: 0.6896799802780151 2023-01-22 14:37:29.372668: step: 524/459, loss: 1.1132698059082031 2023-01-22 14:37:30.043217: step: 526/459, loss: 0.3325039744377136 2023-01-22 14:37:30.655878: step: 528/459, loss: 0.5781549215316772 2023-01-22 14:37:31.281677: step: 530/459, loss: 0.08789904415607452 2023-01-22 14:37:31.891588: step: 532/459, loss: 0.3319101333618164 2023-01-22 14:37:32.558768: step: 534/459, loss: 6.152883052825928 2023-01-22 14:37:33.155187: step: 536/459, loss: 0.38570255041122437 2023-01-22 14:37:33.763931: step: 538/459, loss: 0.6183006763458252 2023-01-22 14:37:34.389357: step: 540/459, loss: 1.2355018854141235 2023-01-22 14:37:34.998590: step: 542/459, loss: 0.23648105561733246 2023-01-22 14:37:35.582438: step: 544/459, loss: 1.1125671863555908 2023-01-22 14:37:36.137241: step: 546/459, loss: 0.7291033267974854 2023-01-22 14:37:36.765565: step: 548/459, loss: 2.110819101333618 2023-01-22 14:37:37.413826: step: 550/459, loss: 1.403279423713684 2023-01-22 14:37:38.034697: step: 552/459, loss: 0.6242890954017639 2023-01-22 14:37:38.642981: step: 554/459, loss: 0.21706034243106842 2023-01-22 14:37:39.245613: step: 556/459, loss: 0.398914098739624 2023-01-22 14:37:39.880991: step: 558/459, loss: 0.765787661075592 2023-01-22 14:37:40.448895: step: 560/459, loss: 0.5877220034599304 2023-01-22 14:37:41.062876: step: 562/459, loss: 0.7062118053436279 2023-01-22 14:37:41.637005: step: 564/459, loss: 0.29065990447998047 2023-01-22 14:37:42.376056: step: 566/459, loss: 2.2967729568481445 2023-01-22 14:37:42.953710: step: 568/459, loss: 0.8255789875984192 2023-01-22 14:37:43.683489: step: 570/459, loss: 0.9075847268104553 2023-01-22 14:37:44.336598: step: 572/459, loss: 1.9055426120758057 2023-01-22 14:37:44.996485: step: 574/459, loss: 1.5380029678344727 2023-01-22 14:37:45.575886: step: 576/459, loss: 1.2974427938461304 2023-01-22 14:37:46.181643: step: 578/459, loss: 0.9113611578941345 2023-01-22 14:37:46.749846: step: 580/459, loss: 0.47640761733055115 2023-01-22 14:37:47.402977: step: 582/459, loss: 0.43438708782196045 2023-01-22 14:37:47.990344: step: 584/459, loss: 0.9832230806350708 2023-01-22 14:37:48.632822: step: 586/459, loss: 0.4256443381309509 2023-01-22 14:37:49.293112: step: 588/459, loss: 1.55489182472229 2023-01-22 14:37:49.969058: step: 590/459, loss: 0.3960545063018799 2023-01-22 14:37:50.575789: step: 592/459, loss: 0.6987643837928772 2023-01-22 14:37:51.138679: step: 594/459, loss: 1.0956460237503052 2023-01-22 14:37:51.778804: step: 596/459, loss: 0.5314270257949829 2023-01-22 14:37:52.392461: step: 598/459, loss: 0.7239264249801636 2023-01-22 14:37:53.047877: step: 600/459, loss: 0.7205620408058167 2023-01-22 14:37:53.786881: step: 602/459, loss: 0.817240834236145 2023-01-22 14:37:54.356936: step: 604/459, loss: 0.7852263450622559 2023-01-22 14:37:54.926818: step: 606/459, loss: 0.9987796545028687 2023-01-22 14:37:55.534598: step: 608/459, loss: 0.47130027413368225 2023-01-22 14:37:56.133471: step: 610/459, loss: 1.6240251064300537 2023-01-22 14:37:56.740084: step: 612/459, loss: 0.4933268427848816 2023-01-22 14:37:57.359637: step: 614/459, loss: 0.2915036976337433 2023-01-22 14:37:58.091691: step: 616/459, loss: 0.2946451008319855 2023-01-22 14:37:58.776512: step: 618/459, loss: 1.429090142250061 2023-01-22 14:37:59.548972: step: 620/459, loss: 0.6409589052200317 2023-01-22 14:38:00.145979: step: 622/459, loss: 0.11915981769561768 2023-01-22 14:38:00.766023: step: 624/459, loss: 0.19610407948493958 2023-01-22 14:38:01.328204: step: 626/459, loss: 0.494052916765213 2023-01-22 14:38:01.919193: step: 628/459, loss: 0.44706031680107117 2023-01-22 14:38:02.625835: step: 630/459, loss: 0.3022806644439697 2023-01-22 14:38:03.190664: step: 632/459, loss: 0.8886095285415649 2023-01-22 14:38:03.851255: step: 634/459, loss: 0.5123456716537476 2023-01-22 14:38:04.532601: step: 636/459, loss: 0.9785904884338379 2023-01-22 14:38:05.233433: step: 638/459, loss: 0.29442596435546875 2023-01-22 14:38:05.837536: step: 640/459, loss: 0.698955774307251 2023-01-22 14:38:06.510389: step: 642/459, loss: 0.22333982586860657 2023-01-22 14:38:07.147751: step: 644/459, loss: 0.5680491328239441 2023-01-22 14:38:07.773067: step: 646/459, loss: 0.3026081323623657 2023-01-22 14:38:08.473213: step: 648/459, loss: 1.3583433628082275 2023-01-22 14:38:09.077818: step: 650/459, loss: 2.8593764305114746 2023-01-22 14:38:09.738301: step: 652/459, loss: 2.7872843742370605 2023-01-22 14:38:10.318161: step: 654/459, loss: 1.4481720924377441 2023-01-22 14:38:10.978465: step: 656/459, loss: 0.17651470005512238 2023-01-22 14:38:11.619863: step: 658/459, loss: 0.6473169326782227 2023-01-22 14:38:12.194027: step: 660/459, loss: 1.14304780960083 2023-01-22 14:38:12.783655: step: 662/459, loss: 1.0768033266067505 2023-01-22 14:38:13.330439: step: 664/459, loss: 0.28102314472198486 2023-01-22 14:38:13.973482: step: 666/459, loss: 0.6743432283401489 2023-01-22 14:38:14.619011: step: 668/459, loss: 0.6291317343711853 2023-01-22 14:38:15.231821: step: 670/459, loss: 0.7912682890892029 2023-01-22 14:38:15.869325: step: 672/459, loss: 0.4848170876502991 2023-01-22 14:38:16.501086: step: 674/459, loss: 0.5729967355728149 2023-01-22 14:38:17.086781: step: 676/459, loss: 0.6401682496070862 2023-01-22 14:38:17.748121: step: 678/459, loss: 2.663412094116211 2023-01-22 14:38:18.364145: step: 680/459, loss: 1.1573940515518188 2023-01-22 14:38:18.948488: step: 682/459, loss: 0.32169482111930847 2023-01-22 14:38:19.616279: step: 684/459, loss: 0.20277458429336548 2023-01-22 14:38:20.210863: step: 686/459, loss: 0.30212199687957764 2023-01-22 14:38:20.815317: step: 688/459, loss: 0.4320056438446045 2023-01-22 14:38:21.445901: step: 690/459, loss: 0.46936795115470886 2023-01-22 14:38:22.046524: step: 692/459, loss: 1.011640191078186 2023-01-22 14:38:22.676712: step: 694/459, loss: 0.398764967918396 2023-01-22 14:38:23.273426: step: 696/459, loss: 0.6439483761787415 2023-01-22 14:38:23.866729: step: 698/459, loss: 1.095115065574646 2023-01-22 14:38:24.502487: step: 700/459, loss: 0.5240609645843506 2023-01-22 14:38:25.168266: step: 702/459, loss: 1.058170199394226 2023-01-22 14:38:25.838693: step: 704/459, loss: 0.4978712797164917 2023-01-22 14:38:26.504581: step: 706/459, loss: 0.9820329546928406 2023-01-22 14:38:27.119073: step: 708/459, loss: 4.144487380981445 2023-01-22 14:38:27.732278: step: 710/459, loss: 0.635043203830719 2023-01-22 14:38:28.364900: step: 712/459, loss: 1.4403595924377441 2023-01-22 14:38:28.981188: step: 714/459, loss: 0.2030550241470337 2023-01-22 14:38:29.589643: step: 716/459, loss: 0.6542195677757263 2023-01-22 14:38:30.235560: step: 718/459, loss: 0.5519685745239258 2023-01-22 14:38:30.816103: step: 720/459, loss: 2.1218206882476807 2023-01-22 14:38:31.471273: step: 722/459, loss: 0.4172571897506714 2023-01-22 14:38:32.089650: step: 724/459, loss: 0.29777657985687256 2023-01-22 14:38:32.679553: step: 726/459, loss: 0.26734405755996704 2023-01-22 14:38:33.370591: step: 728/459, loss: 0.575775682926178 2023-01-22 14:38:33.981821: step: 730/459, loss: 0.5393244028091431 2023-01-22 14:38:34.554477: step: 732/459, loss: 1.4563909769058228 2023-01-22 14:38:35.169527: step: 734/459, loss: 0.32367876172065735 2023-01-22 14:38:35.739630: step: 736/459, loss: 0.3127518594264984 2023-01-22 14:38:36.327840: step: 738/459, loss: 2.376704454421997 2023-01-22 14:38:36.979076: step: 740/459, loss: 0.534239649772644 2023-01-22 14:38:37.640527: step: 742/459, loss: 0.29965952038764954 2023-01-22 14:38:38.264441: step: 744/459, loss: 1.7303075790405273 2023-01-22 14:38:38.878552: step: 746/459, loss: 0.2621397376060486 2023-01-22 14:38:39.573520: step: 748/459, loss: 2.360227584838867 2023-01-22 14:38:40.241994: step: 750/459, loss: 0.33315375447273254 2023-01-22 14:38:40.842731: step: 752/459, loss: 1.0340306758880615 2023-01-22 14:38:41.438819: step: 754/459, loss: 0.440502792596817 2023-01-22 14:38:42.144553: step: 756/459, loss: 0.6162654161453247 2023-01-22 14:38:42.821014: step: 758/459, loss: 0.8736534714698792 2023-01-22 14:38:43.432560: step: 760/459, loss: 1.2085046768188477 2023-01-22 14:38:44.064985: step: 762/459, loss: 0.426537424325943 2023-01-22 14:38:44.663273: step: 764/459, loss: 1.0352745056152344 2023-01-22 14:38:45.254422: step: 766/459, loss: 1.1495018005371094 2023-01-22 14:38:45.861498: step: 768/459, loss: 0.14274823665618896 2023-01-22 14:38:46.474530: step: 770/459, loss: 2.8490383625030518 2023-01-22 14:38:47.070927: step: 772/459, loss: 1.4160187244415283 2023-01-22 14:38:47.679933: step: 774/459, loss: 0.15831832587718964 2023-01-22 14:38:48.270605: step: 776/459, loss: 0.6441932320594788 2023-01-22 14:38:48.932231: step: 778/459, loss: 0.31991279125213623 2023-01-22 14:38:49.556883: step: 780/459, loss: 0.30827796459198 2023-01-22 14:38:50.159768: step: 782/459, loss: 0.2749210000038147 2023-01-22 14:38:50.791661: step: 784/459, loss: 0.4471341371536255 2023-01-22 14:38:51.410340: step: 786/459, loss: 0.15816733241081238 2023-01-22 14:38:52.008259: step: 788/459, loss: 0.09810060262680054 2023-01-22 14:38:52.604863: step: 790/459, loss: 0.4469147026538849 2023-01-22 14:38:53.280755: step: 792/459, loss: 0.25904208421707153 2023-01-22 14:38:53.964615: step: 794/459, loss: 2.374877452850342 2023-01-22 14:38:54.615411: step: 796/459, loss: 0.1886911392211914 2023-01-22 14:38:55.251866: step: 798/459, loss: 4.061589241027832 2023-01-22 14:38:55.833282: step: 800/459, loss: 0.19243483245372772 2023-01-22 14:38:56.432360: step: 802/459, loss: 0.15780627727508545 2023-01-22 14:38:57.054395: step: 804/459, loss: 0.3482784330844879 2023-01-22 14:38:57.612004: step: 806/459, loss: 0.10060393065214157 2023-01-22 14:38:58.247976: step: 808/459, loss: 1.959104299545288 2023-01-22 14:38:58.855494: step: 810/459, loss: 0.6958537101745605 2023-01-22 14:38:59.500674: step: 812/459, loss: 0.7465230822563171 2023-01-22 14:39:00.075813: step: 814/459, loss: 0.34102410078048706 2023-01-22 14:39:00.716750: step: 816/459, loss: 0.40629899501800537 2023-01-22 14:39:01.253830: step: 818/459, loss: 0.3373373746871948 2023-01-22 14:39:01.923186: step: 820/459, loss: 1.1590826511383057 2023-01-22 14:39:02.578705: step: 822/459, loss: 0.5692976713180542 2023-01-22 14:39:03.269860: step: 824/459, loss: 1.0992755889892578 2023-01-22 14:39:03.896212: step: 826/459, loss: 0.24121619760990143 2023-01-22 14:39:04.510289: step: 828/459, loss: 0.3528984785079956 2023-01-22 14:39:05.251655: step: 830/459, loss: 1.0699913501739502 2023-01-22 14:39:05.869539: step: 832/459, loss: 1.462984323501587 2023-01-22 14:39:06.652008: step: 834/459, loss: 0.38147807121276855 2023-01-22 14:39:07.285601: step: 836/459, loss: 1.1932157278060913 2023-01-22 14:39:07.898752: step: 838/459, loss: 0.7379177808761597 2023-01-22 14:39:08.596160: step: 840/459, loss: 0.3303000032901764 2023-01-22 14:39:09.162648: step: 842/459, loss: 1.2433104515075684 2023-01-22 14:39:09.804357: step: 844/459, loss: 3.0587830543518066 2023-01-22 14:39:10.363353: step: 846/459, loss: 0.7132796049118042 2023-01-22 14:39:10.991316: step: 848/459, loss: 0.47394439578056335 2023-01-22 14:39:11.563085: step: 850/459, loss: 0.37347209453582764 2023-01-22 14:39:12.278519: step: 852/459, loss: 0.36471015214920044 2023-01-22 14:39:12.920491: step: 854/459, loss: 0.18189819157123566 2023-01-22 14:39:13.602994: step: 856/459, loss: 0.6408281922340393 2023-01-22 14:39:14.258146: step: 858/459, loss: 0.9891318082809448 2023-01-22 14:39:14.936585: step: 860/459, loss: 2.113581895828247 2023-01-22 14:39:15.520251: step: 862/459, loss: 0.8801969289779663 2023-01-22 14:39:16.133136: step: 864/459, loss: 0.5598085522651672 2023-01-22 14:39:16.787686: step: 866/459, loss: 0.6872086524963379 2023-01-22 14:39:17.366578: step: 868/459, loss: 1.4497435092926025 2023-01-22 14:39:18.038991: step: 870/459, loss: 1.2836002111434937 2023-01-22 14:39:18.666355: step: 872/459, loss: 1.4075968265533447 2023-01-22 14:39:19.393999: step: 874/459, loss: 0.42038699984550476 2023-01-22 14:39:20.026122: step: 876/459, loss: 0.7118603587150574 2023-01-22 14:39:20.613124: step: 878/459, loss: 0.43947815895080566 2023-01-22 14:39:21.170555: step: 880/459, loss: 1.492477297782898 2023-01-22 14:39:21.832569: step: 882/459, loss: 0.7897783517837524 2023-01-22 14:39:22.417998: step: 884/459, loss: 1.9262809753417969 2023-01-22 14:39:23.073634: step: 886/459, loss: 0.17120294272899628 2023-01-22 14:39:23.680146: step: 888/459, loss: 0.5159050822257996 2023-01-22 14:39:24.308868: step: 890/459, loss: 0.5752629041671753 2023-01-22 14:39:24.931407: step: 892/459, loss: 0.2196063995361328 2023-01-22 14:39:25.521373: step: 894/459, loss: 2.1913113594055176 2023-01-22 14:39:26.166943: step: 896/459, loss: 1.0330579280853271 2023-01-22 14:39:26.768969: step: 898/459, loss: 0.4718877375125885 2023-01-22 14:39:27.481568: step: 900/459, loss: 0.48795387148857117 2023-01-22 14:39:28.102262: step: 902/459, loss: 1.605465292930603 2023-01-22 14:39:28.817421: step: 904/459, loss: 1.2142069339752197 2023-01-22 14:39:29.433332: step: 906/459, loss: 1.2058660984039307 2023-01-22 14:39:30.140561: step: 908/459, loss: 1.8539512157440186 2023-01-22 14:39:30.803072: step: 910/459, loss: 0.5666487216949463 2023-01-22 14:39:31.438682: step: 912/459, loss: 1.232564091682434 2023-01-22 14:39:32.012223: step: 914/459, loss: 0.7929008603096008 2023-01-22 14:39:32.655661: step: 916/459, loss: 1.9765093326568604 2023-01-22 14:39:33.286599: step: 918/459, loss: 2.310915470123291 2023-01-22 14:39:33.735246: step: 920/459, loss: 0.024986347183585167 ================================================== Loss: 0.978 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2793854002375373, 'r': 0.328158563087354, 'f1': 0.3018142456318247}, 'combined': 0.22238944414976555, 'epoch': 1} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33830276531343184, 'r': 0.2474935607934479, 'f1': 0.28585961462112375}, 'combined': 0.18295015335751916, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2771977511895471, 'r': 0.3303229369013958, 'f1': 0.30143755454032134}, 'combined': 0.22211188229286835, 'epoch': 1} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33558826373119, 'r': 0.25161485833894504, 'f1': 0.2875972223759756}, 'combined': 0.18406222232062439, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29402583508521984, 'r': 0.3269433384439067, 'f1': 0.30961211026044716}, 'combined': 0.22813523913927683, 'epoch': 1} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3423541418131789, 'r': 0.25465905170424247, 'f1': 0.29206584393053214}, 'combined': 0.209405699421891, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2017195767195767, 'r': 0.3630952380952381, 'f1': 0.2593537414965986}, 'combined': 0.17290249433106575, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23214285714285715, 'r': 0.2826086956521739, 'f1': 0.2549019607843137}, 'combined': 0.12745098039215685, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2777777777777778, 'r': 0.1724137931034483, 'f1': 0.21276595744680854}, 'combined': 0.14184397163120568, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2793854002375373, 'r': 0.328158563087354, 'f1': 0.3018142456318247}, 'combined': 0.22238944414976555, 'epoch': 1} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33830276531343184, 'r': 0.2474935607934479, 'f1': 0.28585961462112375}, 'combined': 0.18295015335751916, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2017195767195767, 'r': 0.3630952380952381, 'f1': 0.2593537414965986}, 'combined': 0.17290249433106575, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2771977511895471, 'r': 0.3303229369013958, 'f1': 0.30143755454032134}, 'combined': 0.22211188229286835, 'epoch': 1} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33558826373119, 'r': 0.25161485833894504, 'f1': 0.2875972223759756}, 'combined': 0.18406222232062439, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23214285714285715, 'r': 0.2826086956521739, 'f1': 0.2549019607843137}, 'combined': 0.12745098039215685, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29402583508521984, 'r': 0.3269433384439067, 'f1': 0.30961211026044716}, 'combined': 0.22813523913927683, 'epoch': 1} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3423541418131789, 'r': 0.25465905170424247, 'f1': 0.29206584393053214}, 'combined': 0.209405699421891, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2777777777777778, 'r': 0.1724137931034483, 'f1': 0.21276595744680854}, 'combined': 0.14184397163120568, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:42:31.726429: step: 2/459, loss: 0.2945861518383026 2023-01-22 14:42:32.335774: step: 4/459, loss: 1.3375523090362549 2023-01-22 14:42:32.928598: step: 6/459, loss: 2.2398338317871094 2023-01-22 14:42:33.635122: step: 8/459, loss: 2.717142105102539 2023-01-22 14:42:34.287891: step: 10/459, loss: 0.7525579929351807 2023-01-22 14:42:34.999129: step: 12/459, loss: 0.3362821638584137 2023-01-22 14:42:35.702353: step: 14/459, loss: 1.010425090789795 2023-01-22 14:42:36.284080: step: 16/459, loss: 0.35140377283096313 2023-01-22 14:42:36.985014: step: 18/459, loss: 0.8306406140327454 2023-01-22 14:42:37.585211: step: 20/459, loss: 2.931713819503784 2023-01-22 14:42:38.157421: step: 22/459, loss: 0.682776689529419 2023-01-22 14:42:38.879794: step: 24/459, loss: 1.089444637298584 2023-01-22 14:42:39.524433: step: 26/459, loss: 0.2960153818130493 2023-01-22 14:42:40.149475: step: 28/459, loss: 0.5430965423583984 2023-01-22 14:42:40.813568: step: 30/459, loss: 2.609980821609497 2023-01-22 14:42:41.475120: step: 32/459, loss: 4.542341232299805 2023-01-22 14:42:42.081004: step: 34/459, loss: 0.7427352666854858 2023-01-22 14:42:42.731434: step: 36/459, loss: 0.5200637578964233 2023-01-22 14:42:43.405268: step: 38/459, loss: 4.290955543518066 2023-01-22 14:42:44.049845: step: 40/459, loss: 0.304524302482605 2023-01-22 14:42:44.691143: step: 42/459, loss: 0.855749785900116 2023-01-22 14:42:45.313170: step: 44/459, loss: 0.3874163329601288 2023-01-22 14:42:45.917310: step: 46/459, loss: 1.220400094985962 2023-01-22 14:42:46.499362: step: 48/459, loss: 2.6074695587158203 2023-01-22 14:42:47.223972: step: 50/459, loss: 0.4530969262123108 2023-01-22 14:42:47.798480: step: 52/459, loss: 0.28544291853904724 2023-01-22 14:42:48.438665: step: 54/459, loss: 1.9367573261260986 2023-01-22 14:42:49.079376: step: 56/459, loss: 5.476502418518066 2023-01-22 14:42:49.705470: step: 58/459, loss: 0.6286271810531616 2023-01-22 14:42:50.331124: step: 60/459, loss: 0.5020625591278076 2023-01-22 14:42:51.026477: step: 62/459, loss: 0.5355586409568787 2023-01-22 14:42:51.648063: step: 64/459, loss: 0.46263641119003296 2023-01-22 14:42:52.262352: step: 66/459, loss: 1.5383695363998413 2023-01-22 14:42:52.908350: step: 68/459, loss: 0.6582084894180298 2023-01-22 14:42:53.574539: step: 70/459, loss: 2.111907482147217 2023-01-22 14:42:54.167147: step: 72/459, loss: 1.3613487482070923 2023-01-22 14:42:54.788054: step: 74/459, loss: 0.5476066470146179 2023-01-22 14:42:55.389542: step: 76/459, loss: 1.6449123620986938 2023-01-22 14:42:56.019823: step: 78/459, loss: 0.8710778951644897 2023-01-22 14:42:56.692616: step: 80/459, loss: 0.9931475520133972 2023-01-22 14:42:57.328266: step: 82/459, loss: 0.6653872728347778 2023-01-22 14:42:57.900905: step: 84/459, loss: 0.18329362571239471 2023-01-22 14:42:58.511249: step: 86/459, loss: 0.39404168725013733 2023-01-22 14:42:59.120338: step: 88/459, loss: 0.8995416164398193 2023-01-22 14:42:59.722809: step: 90/459, loss: 0.3157386779785156 2023-01-22 14:43:00.329721: step: 92/459, loss: 0.5106749534606934 2023-01-22 14:43:00.952593: step: 94/459, loss: 0.962185263633728 2023-01-22 14:43:01.603707: step: 96/459, loss: 0.9266047477722168 2023-01-22 14:43:02.256895: step: 98/459, loss: 0.9735736846923828 2023-01-22 14:43:02.936283: step: 100/459, loss: 1.4234263896942139 2023-01-22 14:43:03.534102: step: 102/459, loss: 1.663977026939392 2023-01-22 14:43:04.145561: step: 104/459, loss: 1.0873807668685913 2023-01-22 14:43:04.826589: step: 106/459, loss: 0.4646606147289276 2023-01-22 14:43:05.489363: step: 108/459, loss: 0.8479255437850952 2023-01-22 14:43:06.081428: step: 110/459, loss: 1.0843884944915771 2023-01-22 14:43:06.701612: step: 112/459, loss: 1.7648119926452637 2023-01-22 14:43:07.350188: step: 114/459, loss: 0.27909907698631287 2023-01-22 14:43:07.929787: step: 116/459, loss: 0.488800585269928 2023-01-22 14:43:08.561447: step: 118/459, loss: 0.28771552443504333 2023-01-22 14:43:09.248431: step: 120/459, loss: 0.8838553428649902 2023-01-22 14:43:09.885801: step: 122/459, loss: 1.3634735345840454 2023-01-22 14:43:10.476121: step: 124/459, loss: 1.1970973014831543 2023-01-22 14:43:11.107151: step: 126/459, loss: 1.0312391519546509 2023-01-22 14:43:11.806037: step: 128/459, loss: 0.5673823356628418 2023-01-22 14:43:12.445838: step: 130/459, loss: 1.3545950651168823 2023-01-22 14:43:13.051471: step: 132/459, loss: 1.5156985521316528 2023-01-22 14:43:13.686475: step: 134/459, loss: 1.3642785549163818 2023-01-22 14:43:14.324817: step: 136/459, loss: 0.6265555024147034 2023-01-22 14:43:14.941634: step: 138/459, loss: 0.926995575428009 2023-01-22 14:43:15.510314: step: 140/459, loss: 0.5315567851066589 2023-01-22 14:43:16.197256: step: 142/459, loss: 0.7069587707519531 2023-01-22 14:43:16.787442: step: 144/459, loss: 0.8253792524337769 2023-01-22 14:43:17.430340: step: 146/459, loss: 1.3610002994537354 2023-01-22 14:43:18.063760: step: 148/459, loss: 1.1462457180023193 2023-01-22 14:43:18.679002: step: 150/459, loss: 0.4693123698234558 2023-01-22 14:43:19.289093: step: 152/459, loss: 1.4392645359039307 2023-01-22 14:43:19.902747: step: 154/459, loss: 2.0333337783813477 2023-01-22 14:43:20.555463: step: 156/459, loss: 0.48083117604255676 2023-01-22 14:43:21.167486: step: 158/459, loss: 0.8982654213905334 2023-01-22 14:43:21.803793: step: 160/459, loss: 0.5174713730812073 2023-01-22 14:43:22.420792: step: 162/459, loss: 3.224520683288574 2023-01-22 14:43:23.062034: step: 164/459, loss: 0.7184847593307495 2023-01-22 14:43:23.685139: step: 166/459, loss: 0.9982325434684753 2023-01-22 14:43:24.256666: step: 168/459, loss: 0.9111635088920593 2023-01-22 14:43:24.861100: step: 170/459, loss: 0.5698059797286987 2023-01-22 14:43:25.488390: step: 172/459, loss: 0.5856573581695557 2023-01-22 14:43:26.107989: step: 174/459, loss: 0.2289925515651703 2023-01-22 14:43:26.809391: step: 176/459, loss: 0.8606970310211182 2023-01-22 14:43:27.412215: step: 178/459, loss: 0.3114602863788605 2023-01-22 14:43:28.001872: step: 180/459, loss: 0.8423815965652466 2023-01-22 14:43:28.604224: step: 182/459, loss: 5.661559104919434 2023-01-22 14:43:29.224723: step: 184/459, loss: 0.8914599418640137 2023-01-22 14:43:29.908735: step: 186/459, loss: 3.1625208854675293 2023-01-22 14:43:30.610068: step: 188/459, loss: 2.3475332260131836 2023-01-22 14:43:31.330784: step: 190/459, loss: 0.7106536030769348 2023-01-22 14:43:31.928751: step: 192/459, loss: 1.2375035285949707 2023-01-22 14:43:32.545819: step: 194/459, loss: 0.5342201590538025 2023-01-22 14:43:33.176402: step: 196/459, loss: 0.44157877564430237 2023-01-22 14:43:33.785856: step: 198/459, loss: 0.34626826643943787 2023-01-22 14:43:34.381413: step: 200/459, loss: 0.5654081702232361 2023-01-22 14:43:34.954414: step: 202/459, loss: 2.011528253555298 2023-01-22 14:43:35.568038: step: 204/459, loss: 1.58829927444458 2023-01-22 14:43:36.203530: step: 206/459, loss: 0.9661394953727722 2023-01-22 14:43:36.842558: step: 208/459, loss: 0.6158255934715271 2023-01-22 14:43:37.488635: step: 210/459, loss: 0.7662718296051025 2023-01-22 14:43:38.126919: step: 212/459, loss: 0.9300849437713623 2023-01-22 14:43:38.863971: step: 214/459, loss: 0.8057277798652649 2023-01-22 14:43:39.469425: step: 216/459, loss: 0.9276961088180542 2023-01-22 14:43:40.031875: step: 218/459, loss: 0.7318058609962463 2023-01-22 14:43:40.672259: step: 220/459, loss: 0.39631274342536926 2023-01-22 14:43:41.323086: step: 222/459, loss: 1.351833462715149 2023-01-22 14:43:41.949303: step: 224/459, loss: 1.7773594856262207 2023-01-22 14:43:42.527055: step: 226/459, loss: 2.0873863697052 2023-01-22 14:43:43.108509: step: 228/459, loss: 0.19115173816680908 2023-01-22 14:43:43.724471: step: 230/459, loss: 0.9088853597640991 2023-01-22 14:43:44.387110: step: 232/459, loss: 0.19725021719932556 2023-01-22 14:43:44.992597: step: 234/459, loss: 0.3124952018260956 2023-01-22 14:43:45.608492: step: 236/459, loss: 0.8658949136734009 2023-01-22 14:43:46.244489: step: 238/459, loss: 1.1110478639602661 2023-01-22 14:43:46.837345: step: 240/459, loss: 0.3039073944091797 2023-01-22 14:43:47.460733: step: 242/459, loss: 0.4855636656284332 2023-01-22 14:43:48.030610: step: 244/459, loss: 0.4193297028541565 2023-01-22 14:43:48.731509: step: 246/459, loss: 3.260974645614624 2023-01-22 14:43:49.332146: step: 248/459, loss: 1.8786005973815918 2023-01-22 14:43:49.957881: step: 250/459, loss: 0.31720373034477234 2023-01-22 14:43:50.690142: step: 252/459, loss: 0.7534573078155518 2023-01-22 14:43:51.236547: step: 254/459, loss: 0.383730411529541 2023-01-22 14:43:51.898646: step: 256/459, loss: 1.6482603549957275 2023-01-22 14:43:52.608960: step: 258/459, loss: 0.868137538433075 2023-01-22 14:43:53.184369: step: 260/459, loss: 0.3023183345794678 2023-01-22 14:43:53.812344: step: 262/459, loss: 1.1448016166687012 2023-01-22 14:43:54.347987: step: 264/459, loss: 0.6590020060539246 2023-01-22 14:43:54.971064: step: 266/459, loss: 0.9159161448478699 2023-01-22 14:43:55.558141: step: 268/459, loss: 0.5441351532936096 2023-01-22 14:43:56.195175: step: 270/459, loss: 0.7074903845787048 2023-01-22 14:43:56.823267: step: 272/459, loss: 0.24128632247447968 2023-01-22 14:43:57.426791: step: 274/459, loss: 0.23215201497077942 2023-01-22 14:43:58.012905: step: 276/459, loss: 0.39580726623535156 2023-01-22 14:43:58.591696: step: 278/459, loss: 0.44448122382164 2023-01-22 14:43:59.237795: step: 280/459, loss: 0.7065459489822388 2023-01-22 14:43:59.877714: step: 282/459, loss: 0.16368210315704346 2023-01-22 14:44:00.441737: step: 284/459, loss: 1.1632940769195557 2023-01-22 14:44:01.081754: step: 286/459, loss: 2.152431011199951 2023-01-22 14:44:01.715066: step: 288/459, loss: 0.3132167458534241 2023-01-22 14:44:02.361816: step: 290/459, loss: 0.2712653577327728 2023-01-22 14:44:02.991587: step: 292/459, loss: 2.1586222648620605 2023-01-22 14:44:03.660518: step: 294/459, loss: 0.49609485268592834 2023-01-22 14:44:04.302389: step: 296/459, loss: 0.6659852266311646 2023-01-22 14:44:04.935859: step: 298/459, loss: 0.8360181450843811 2023-01-22 14:44:05.584443: step: 300/459, loss: 0.8529871106147766 2023-01-22 14:44:06.178731: step: 302/459, loss: 3.0604193210601807 2023-01-22 14:44:06.819497: step: 304/459, loss: 0.957890510559082 2023-01-22 14:44:07.507430: step: 306/459, loss: 0.5572599768638611 2023-01-22 14:44:08.101887: step: 308/459, loss: 0.42565420269966125 2023-01-22 14:44:08.746349: step: 310/459, loss: 0.41236239671707153 2023-01-22 14:44:09.497941: step: 312/459, loss: 0.9511789083480835 2023-01-22 14:44:10.139972: step: 314/459, loss: 0.9979352951049805 2023-01-22 14:44:10.705903: step: 316/459, loss: 0.20076508820056915 2023-01-22 14:44:11.317095: step: 318/459, loss: 1.1632930040359497 2023-01-22 14:44:12.043971: step: 320/459, loss: 0.636467456817627 2023-01-22 14:44:12.631870: step: 322/459, loss: 0.883121907711029 2023-01-22 14:44:13.271676: step: 324/459, loss: 0.5508447289466858 2023-01-22 14:44:13.880408: step: 326/459, loss: 1.6290867328643799 2023-01-22 14:44:14.480475: step: 328/459, loss: 2.5316481590270996 2023-01-22 14:44:15.152176: step: 330/459, loss: 0.9257813096046448 2023-01-22 14:44:15.839662: step: 332/459, loss: 1.1571818590164185 2023-01-22 14:44:16.495307: step: 334/459, loss: 2.425467014312744 2023-01-22 14:44:17.118775: step: 336/459, loss: 0.2648104131221771 2023-01-22 14:44:17.755026: step: 338/459, loss: 1.324173927307129 2023-01-22 14:44:18.463573: step: 340/459, loss: 1.3311035633087158 2023-01-22 14:44:19.114724: step: 342/459, loss: 0.8116718530654907 2023-01-22 14:44:19.756222: step: 344/459, loss: 0.8396702408790588 2023-01-22 14:44:20.336634: step: 346/459, loss: 0.8036899566650391 2023-01-22 14:44:20.965606: step: 348/459, loss: 0.22391687333583832 2023-01-22 14:44:21.595403: step: 350/459, loss: 0.5610983371734619 2023-01-22 14:44:22.168680: step: 352/459, loss: 0.8128755688667297 2023-01-22 14:44:22.789334: step: 354/459, loss: 0.44368553161621094 2023-01-22 14:44:23.338005: step: 356/459, loss: 1.5854908227920532 2023-01-22 14:44:23.972206: step: 358/459, loss: 0.3815900385379791 2023-01-22 14:44:24.602805: step: 360/459, loss: 1.0513497591018677 2023-01-22 14:44:25.188909: step: 362/459, loss: 0.7593823671340942 2023-01-22 14:44:25.831788: step: 364/459, loss: 0.5572647452354431 2023-01-22 14:44:26.523220: step: 366/459, loss: 0.4779700040817261 2023-01-22 14:44:27.170208: step: 368/459, loss: 1.2194868326187134 2023-01-22 14:44:27.795164: step: 370/459, loss: 1.1851884126663208 2023-01-22 14:44:28.366570: step: 372/459, loss: 1.0748274326324463 2023-01-22 14:44:28.992698: step: 374/459, loss: 0.8438742756843567 2023-01-22 14:44:29.614211: step: 376/459, loss: 1.466718316078186 2023-01-22 14:44:30.190700: step: 378/459, loss: 2.2708940505981445 2023-01-22 14:44:30.830236: step: 380/459, loss: 4.304389476776123 2023-01-22 14:44:31.456114: step: 382/459, loss: 1.284483551979065 2023-01-22 14:44:32.130026: step: 384/459, loss: 0.9475678205490112 2023-01-22 14:44:32.751848: step: 386/459, loss: 1.106303334236145 2023-01-22 14:44:33.362335: step: 388/459, loss: 0.88243567943573 2023-01-22 14:44:33.975326: step: 390/459, loss: 1.133114218711853 2023-01-22 14:44:34.615512: step: 392/459, loss: 1.2853572368621826 2023-01-22 14:44:35.289307: step: 394/459, loss: 0.5076777338981628 2023-01-22 14:44:35.858412: step: 396/459, loss: 1.591703176498413 2023-01-22 14:44:36.561966: step: 398/459, loss: 0.6906570196151733 2023-01-22 14:44:37.223459: step: 400/459, loss: 0.7602169513702393 2023-01-22 14:44:37.807681: step: 402/459, loss: 1.6517244577407837 2023-01-22 14:44:38.466505: step: 404/459, loss: 0.9013317823410034 2023-01-22 14:44:39.128145: step: 406/459, loss: 1.3557554483413696 2023-01-22 14:44:39.815601: step: 408/459, loss: 0.6205309629440308 2023-01-22 14:44:40.412894: step: 410/459, loss: 2.635955810546875 2023-01-22 14:44:40.979754: step: 412/459, loss: 1.317895770072937 2023-01-22 14:44:41.553337: step: 414/459, loss: 1.0024558305740356 2023-01-22 14:44:42.155802: step: 416/459, loss: 0.4247799217700958 2023-01-22 14:44:42.746280: step: 418/459, loss: 0.612002432346344 2023-01-22 14:44:43.384439: step: 420/459, loss: 0.5970445275306702 2023-01-22 14:44:44.038420: step: 422/459, loss: 0.9105852842330933 2023-01-22 14:44:44.684571: step: 424/459, loss: 1.0322164297103882 2023-01-22 14:44:45.305739: step: 426/459, loss: 1.3127753734588623 2023-01-22 14:44:45.918071: step: 428/459, loss: 0.1906375288963318 2023-01-22 14:44:46.535856: step: 430/459, loss: 1.3553297519683838 2023-01-22 14:44:47.172236: step: 432/459, loss: 1.5220725536346436 2023-01-22 14:44:47.803391: step: 434/459, loss: 0.6956392526626587 2023-01-22 14:44:48.449660: step: 436/459, loss: 1.0739949941635132 2023-01-22 14:44:49.132406: step: 438/459, loss: 7.243132591247559 2023-01-22 14:44:49.784596: step: 440/459, loss: 0.8301319479942322 2023-01-22 14:44:50.390813: step: 442/459, loss: 1.9501830339431763 2023-01-22 14:44:50.979795: step: 444/459, loss: 0.6457964181900024 2023-01-22 14:44:51.627031: step: 446/459, loss: 0.1738618165254593 2023-01-22 14:44:52.323856: step: 448/459, loss: 0.5039651393890381 2023-01-22 14:44:52.849039: step: 450/459, loss: 0.3077044188976288 2023-01-22 14:44:53.479449: step: 452/459, loss: 1.343899130821228 2023-01-22 14:44:54.084688: step: 454/459, loss: 0.19788731634616852 2023-01-22 14:44:54.700075: step: 456/459, loss: 1.0821161270141602 2023-01-22 14:44:55.282572: step: 458/459, loss: 1.8612968921661377 2023-01-22 14:44:55.891706: step: 460/459, loss: 0.46068528294563293 2023-01-22 14:44:56.526589: step: 462/459, loss: 1.0941803455352783 2023-01-22 14:44:57.139938: step: 464/459, loss: 0.9519140720367432 2023-01-22 14:44:57.803989: step: 466/459, loss: 5.233526229858398 2023-01-22 14:44:58.480672: step: 468/459, loss: 0.44095131754875183 2023-01-22 14:44:59.107969: step: 470/459, loss: 1.2272306680679321 2023-01-22 14:44:59.729332: step: 472/459, loss: 0.5409550666809082 2023-01-22 14:45:00.442909: step: 474/459, loss: 0.5939500331878662 2023-01-22 14:45:01.085655: step: 476/459, loss: 0.6509438157081604 2023-01-22 14:45:01.713110: step: 478/459, loss: 0.8594257235527039 2023-01-22 14:45:02.360683: step: 480/459, loss: 5.214653015136719 2023-01-22 14:45:02.950581: step: 482/459, loss: 1.7746129035949707 2023-01-22 14:45:03.644166: step: 484/459, loss: 1.871708631515503 2023-01-22 14:45:04.320645: step: 486/459, loss: 0.21471542119979858 2023-01-22 14:45:04.909102: step: 488/459, loss: 1.061169147491455 2023-01-22 14:45:05.536831: step: 490/459, loss: 0.6290455460548401 2023-01-22 14:45:06.172684: step: 492/459, loss: 1.2726651430130005 2023-01-22 14:45:06.873202: step: 494/459, loss: 0.3054748475551605 2023-01-22 14:45:07.440222: step: 496/459, loss: 1.4598276615142822 2023-01-22 14:45:08.090807: step: 498/459, loss: 1.2801862955093384 2023-01-22 14:45:08.694529: step: 500/459, loss: 3.0444109439849854 2023-01-22 14:45:09.321249: step: 502/459, loss: 0.2435675710439682 2023-01-22 14:45:09.969503: step: 504/459, loss: 2.0929062366485596 2023-01-22 14:45:10.549869: step: 506/459, loss: 1.3586487770080566 2023-01-22 14:45:11.198405: step: 508/459, loss: 0.4165704846382141 2023-01-22 14:45:11.854321: step: 510/459, loss: 0.7794864177703857 2023-01-22 14:45:12.478339: step: 512/459, loss: 0.8274415731430054 2023-01-22 14:45:13.107868: step: 514/459, loss: 1.4741102457046509 2023-01-22 14:45:13.675322: step: 516/459, loss: 0.8742100596427917 2023-01-22 14:45:14.291812: step: 518/459, loss: 0.5478226542472839 2023-01-22 14:45:14.990302: step: 520/459, loss: 0.45281410217285156 2023-01-22 14:45:15.763450: step: 522/459, loss: 0.4505572021007538 2023-01-22 14:45:16.392859: step: 524/459, loss: 1.3758777379989624 2023-01-22 14:45:16.980246: step: 526/459, loss: 2.934774875640869 2023-01-22 14:45:17.586587: step: 528/459, loss: 0.8695944547653198 2023-01-22 14:45:18.173382: step: 530/459, loss: 1.9559979438781738 2023-01-22 14:45:18.773409: step: 532/459, loss: 2.804025173187256 2023-01-22 14:45:19.378254: step: 534/459, loss: 3.9824328422546387 2023-01-22 14:45:19.990937: step: 536/459, loss: 1.0133695602416992 2023-01-22 14:45:20.573487: step: 538/459, loss: 0.6775952577590942 2023-01-22 14:45:21.265750: step: 540/459, loss: 1.5616990327835083 2023-01-22 14:45:21.941726: step: 542/459, loss: 0.7113668918609619 2023-01-22 14:45:22.565734: step: 544/459, loss: 0.663385272026062 2023-01-22 14:45:23.182657: step: 546/459, loss: 1.5054446458816528 2023-01-22 14:45:23.808561: step: 548/459, loss: 1.9495230913162231 2023-01-22 14:45:24.454524: step: 550/459, loss: 0.8803411722183228 2023-01-22 14:45:25.132538: step: 552/459, loss: 0.603722870349884 2023-01-22 14:45:25.748449: step: 554/459, loss: 1.683943510055542 2023-01-22 14:45:26.447599: step: 556/459, loss: 0.3651862144470215 2023-01-22 14:45:27.073839: step: 558/459, loss: 0.3695099949836731 2023-01-22 14:45:27.718266: step: 560/459, loss: 0.8973431587219238 2023-01-22 14:45:28.340635: step: 562/459, loss: 4.150864124298096 2023-01-22 14:45:29.003693: step: 564/459, loss: 0.16876909136772156 2023-01-22 14:45:29.659666: step: 566/459, loss: 0.34000054001808167 2023-01-22 14:45:30.274247: step: 568/459, loss: 0.43445485830307007 2023-01-22 14:45:30.860173: step: 570/459, loss: 0.8858822584152222 2023-01-22 14:45:31.403589: step: 572/459, loss: 0.641492486000061 2023-01-22 14:45:32.039915: step: 574/459, loss: 0.6816587448120117 2023-01-22 14:45:32.655020: step: 576/459, loss: 0.49363410472869873 2023-01-22 14:45:33.236883: step: 578/459, loss: 0.23172840476036072 2023-01-22 14:45:33.865210: step: 580/459, loss: 0.8822307586669922 2023-01-22 14:45:34.471528: step: 582/459, loss: 0.1723383069038391 2023-01-22 14:45:35.088469: step: 584/459, loss: 0.6046379804611206 2023-01-22 14:45:35.727720: step: 586/459, loss: 1.2801240682601929 2023-01-22 14:45:36.373645: step: 588/459, loss: 1.4471607208251953 2023-01-22 14:45:37.022151: step: 590/459, loss: 0.393567830324173 2023-01-22 14:45:37.649615: step: 592/459, loss: 0.3101704716682434 2023-01-22 14:45:38.253636: step: 594/459, loss: 0.36256900429725647 2023-01-22 14:45:38.941126: step: 596/459, loss: 0.4109722971916199 2023-01-22 14:45:39.649317: step: 598/459, loss: 1.341117024421692 2023-01-22 14:45:40.273496: step: 600/459, loss: 1.1868726015090942 2023-01-22 14:45:40.878238: step: 602/459, loss: 0.4696422219276428 2023-01-22 14:45:41.482004: step: 604/459, loss: 1.9967137575149536 2023-01-22 14:45:42.140058: step: 606/459, loss: 0.41010066866874695 2023-01-22 14:45:42.717202: step: 608/459, loss: 0.357271283864975 2023-01-22 14:45:43.323475: step: 610/459, loss: 0.5059231519699097 2023-01-22 14:45:43.975525: step: 612/459, loss: 0.4772096872329712 2023-01-22 14:45:44.627990: step: 614/459, loss: 0.3015650808811188 2023-01-22 14:45:45.262043: step: 616/459, loss: 2.2744226455688477 2023-01-22 14:45:45.860568: step: 618/459, loss: 0.8733551502227783 2023-01-22 14:45:46.544441: step: 620/459, loss: 1.7140902280807495 2023-01-22 14:45:47.129425: step: 622/459, loss: 0.5770049691200256 2023-01-22 14:45:47.750826: step: 624/459, loss: 0.37053775787353516 2023-01-22 14:45:48.380729: step: 626/459, loss: 0.30354249477386475 2023-01-22 14:45:48.999105: step: 628/459, loss: 0.8405967354774475 2023-01-22 14:45:49.684934: step: 630/459, loss: 1.8091312646865845 2023-01-22 14:45:50.293225: step: 632/459, loss: 1.586220622062683 2023-01-22 14:45:50.925630: step: 634/459, loss: 0.4784316122531891 2023-01-22 14:45:51.520587: step: 636/459, loss: 1.0049660205841064 2023-01-22 14:45:52.111783: step: 638/459, loss: 0.7297889590263367 2023-01-22 14:45:52.783836: step: 640/459, loss: 1.6560137271881104 2023-01-22 14:45:53.402726: step: 642/459, loss: 0.9224131107330322 2023-01-22 14:45:54.051468: step: 644/459, loss: 0.869172215461731 2023-01-22 14:45:54.681318: step: 646/459, loss: 0.39343971014022827 2023-01-22 14:45:55.319024: step: 648/459, loss: 0.42257097363471985 2023-01-22 14:45:55.909744: step: 650/459, loss: 0.331013947725296 2023-01-22 14:45:56.505412: step: 652/459, loss: 0.5863726139068604 2023-01-22 14:45:57.136757: step: 654/459, loss: 0.4945682883262634 2023-01-22 14:45:57.752365: step: 656/459, loss: 5.351576328277588 2023-01-22 14:45:58.456544: step: 658/459, loss: 0.5504857897758484 2023-01-22 14:45:59.155020: step: 660/459, loss: 1.358358383178711 2023-01-22 14:45:59.874192: step: 662/459, loss: 0.4992581009864807 2023-01-22 14:46:00.466140: step: 664/459, loss: 1.4993596076965332 2023-01-22 14:46:01.095437: step: 666/459, loss: 0.9323006868362427 2023-01-22 14:46:01.745522: step: 668/459, loss: 1.1850236654281616 2023-01-22 14:46:02.429387: step: 670/459, loss: 1.286252498626709 2023-01-22 14:46:03.068479: step: 672/459, loss: 0.673964262008667 2023-01-22 14:46:03.699967: step: 674/459, loss: 0.7960541844367981 2023-01-22 14:46:04.389209: step: 676/459, loss: 1.3361719846725464 2023-01-22 14:46:04.976480: step: 678/459, loss: 1.4908019304275513 2023-01-22 14:46:05.592626: step: 680/459, loss: 0.39792436361312866 2023-01-22 14:46:06.203294: step: 682/459, loss: 0.4477353096008301 2023-01-22 14:46:06.767454: step: 684/459, loss: 0.3945517838001251 2023-01-22 14:46:07.432711: step: 686/459, loss: 0.6043502688407898 2023-01-22 14:46:08.023719: step: 688/459, loss: 1.4235973358154297 2023-01-22 14:46:08.627096: step: 690/459, loss: 1.7479157447814941 2023-01-22 14:46:09.259095: step: 692/459, loss: 0.1273966282606125 2023-01-22 14:46:09.858808: step: 694/459, loss: 3.2354135513305664 2023-01-22 14:46:10.522625: step: 696/459, loss: 0.4065965712070465 2023-01-22 14:46:11.168265: step: 698/459, loss: 0.9491575956344604 2023-01-22 14:46:11.805506: step: 700/459, loss: 0.9658905267715454 2023-01-22 14:46:12.458215: step: 702/459, loss: 0.28532683849334717 2023-01-22 14:46:13.017736: step: 704/459, loss: 0.7463811039924622 2023-01-22 14:46:13.646524: step: 706/459, loss: 0.726513683795929 2023-01-22 14:46:14.208651: step: 708/459, loss: 0.2577226758003235 2023-01-22 14:46:14.840942: step: 710/459, loss: 1.0210587978363037 2023-01-22 14:46:15.434055: step: 712/459, loss: 2.259723424911499 2023-01-22 14:46:16.087065: step: 714/459, loss: 0.6045011281967163 2023-01-22 14:46:16.806700: step: 716/459, loss: 0.9461110830307007 2023-01-22 14:46:17.379514: step: 718/459, loss: 0.500162661075592 2023-01-22 14:46:18.029635: step: 720/459, loss: 2.6313116550445557 2023-01-22 14:46:18.619299: step: 722/459, loss: 0.19364877045154572 2023-01-22 14:46:19.344715: step: 724/459, loss: 0.39817917346954346 2023-01-22 14:46:19.990131: step: 726/459, loss: 0.8207187652587891 2023-01-22 14:46:20.590796: step: 728/459, loss: 0.461626261472702 2023-01-22 14:46:21.175107: step: 730/459, loss: 0.8068612217903137 2023-01-22 14:46:21.794687: step: 732/459, loss: 5.240665435791016 2023-01-22 14:46:22.425757: step: 734/459, loss: 1.323441743850708 2023-01-22 14:46:23.057162: step: 736/459, loss: 0.7564518451690674 2023-01-22 14:46:23.776637: step: 738/459, loss: 1.479673147201538 2023-01-22 14:46:24.384289: step: 740/459, loss: 0.40440815687179565 2023-01-22 14:46:24.992256: step: 742/459, loss: 0.9749516844749451 2023-01-22 14:46:25.613464: step: 744/459, loss: 0.29867419600486755 2023-01-22 14:46:26.217271: step: 746/459, loss: 0.787574291229248 2023-01-22 14:46:26.846293: step: 748/459, loss: 0.10143180191516876 2023-01-22 14:46:27.514959: step: 750/459, loss: 1.5983433723449707 2023-01-22 14:46:28.124559: step: 752/459, loss: 1.3800702095031738 2023-01-22 14:46:28.741469: step: 754/459, loss: 1.1733800172805786 2023-01-22 14:46:29.337418: step: 756/459, loss: 1.3213828802108765 2023-01-22 14:46:29.947528: step: 758/459, loss: 0.2607763409614563 2023-01-22 14:46:30.534955: step: 760/459, loss: 0.22029836475849152 2023-01-22 14:46:31.182924: step: 762/459, loss: 0.4614824056625366 2023-01-22 14:46:31.833779: step: 764/459, loss: 0.9310044050216675 2023-01-22 14:46:32.494190: step: 766/459, loss: 0.46977943181991577 2023-01-22 14:46:33.112028: step: 768/459, loss: 0.35748469829559326 2023-01-22 14:46:33.738474: step: 770/459, loss: 0.29582706093788147 2023-01-22 14:46:34.307122: step: 772/459, loss: 2.518548011779785 2023-01-22 14:46:34.895063: step: 774/459, loss: 0.2868914306163788 2023-01-22 14:46:35.542360: step: 776/459, loss: 1.4803481101989746 2023-01-22 14:46:36.207679: step: 778/459, loss: 6.864080429077148 2023-01-22 14:46:36.772696: step: 780/459, loss: 0.254459023475647 2023-01-22 14:46:37.349791: step: 782/459, loss: 0.5618382692337036 2023-01-22 14:46:37.971445: step: 784/459, loss: 1.6749303340911865 2023-01-22 14:46:38.697249: step: 786/459, loss: 1.2920362949371338 2023-01-22 14:46:39.363054: step: 788/459, loss: 0.35017311573028564 2023-01-22 14:46:40.013428: step: 790/459, loss: 0.6409149169921875 2023-01-22 14:46:40.631935: step: 792/459, loss: 0.8659201860427856 2023-01-22 14:46:41.231289: step: 794/459, loss: 0.6793332695960999 2023-01-22 14:46:41.844010: step: 796/459, loss: 2.180924892425537 2023-01-22 14:46:42.420006: step: 798/459, loss: 0.22129029035568237 2023-01-22 14:46:43.067061: step: 800/459, loss: 0.41813889145851135 2023-01-22 14:46:43.663985: step: 802/459, loss: 1.450250506401062 2023-01-22 14:46:44.308396: step: 804/459, loss: 0.3551454246044159 2023-01-22 14:46:44.928753: step: 806/459, loss: 1.6290874481201172 2023-01-22 14:46:45.525993: step: 808/459, loss: 0.96565842628479 2023-01-22 14:46:46.153492: step: 810/459, loss: 1.7306187152862549 2023-01-22 14:46:46.796185: step: 812/459, loss: 0.6341475248336792 2023-01-22 14:46:47.428021: step: 814/459, loss: 0.7687172889709473 2023-01-22 14:46:48.022866: step: 816/459, loss: 0.9398401975631714 2023-01-22 14:46:48.643933: step: 818/459, loss: 1.0506632328033447 2023-01-22 14:46:49.293366: step: 820/459, loss: 2.177058219909668 2023-01-22 14:46:49.854504: step: 822/459, loss: 0.771179735660553 2023-01-22 14:46:50.446236: step: 824/459, loss: 0.4507567286491394 2023-01-22 14:46:51.022749: step: 826/459, loss: 0.27571001648902893 2023-01-22 14:46:51.642012: step: 828/459, loss: 1.2443981170654297 2023-01-22 14:46:52.262998: step: 830/459, loss: 0.7017070651054382 2023-01-22 14:46:52.879564: step: 832/459, loss: 1.494908094406128 2023-01-22 14:46:53.578317: step: 834/459, loss: 1.1968483924865723 2023-01-22 14:46:54.256662: step: 836/459, loss: 0.5102422833442688 2023-01-22 14:46:54.875563: step: 838/459, loss: 0.6499947309494019 2023-01-22 14:46:55.503353: step: 840/459, loss: 0.9436559081077576 2023-01-22 14:46:56.159820: step: 842/459, loss: 1.2985159158706665 2023-01-22 14:46:56.728547: step: 844/459, loss: 4.945404052734375 2023-01-22 14:46:57.363122: step: 846/459, loss: 0.40743839740753174 2023-01-22 14:46:58.035743: step: 848/459, loss: 1.0397007465362549 2023-01-22 14:46:58.653464: step: 850/459, loss: 0.2673805058002472 2023-01-22 14:46:59.296261: step: 852/459, loss: 1.0713376998901367 2023-01-22 14:46:59.929458: step: 854/459, loss: 1.3568059206008911 2023-01-22 14:47:00.555454: step: 856/459, loss: 4.94245719909668 2023-01-22 14:47:01.220245: step: 858/459, loss: 0.8139264583587646 2023-01-22 14:47:01.811446: step: 860/459, loss: 0.7922298908233643 2023-01-22 14:47:02.434864: step: 862/459, loss: 0.586086094379425 2023-01-22 14:47:03.024993: step: 864/459, loss: 0.4096706211566925 2023-01-22 14:47:03.647843: step: 866/459, loss: 1.1181334257125854 2023-01-22 14:47:04.292784: step: 868/459, loss: 0.29054513573646545 2023-01-22 14:47:04.891812: step: 870/459, loss: 0.42512285709381104 2023-01-22 14:47:05.553690: step: 872/459, loss: 0.47541508078575134 2023-01-22 14:47:06.193996: step: 874/459, loss: 1.5170016288757324 2023-01-22 14:47:06.873438: step: 876/459, loss: 1.6472043991088867 2023-01-22 14:47:07.492097: step: 878/459, loss: 1.0153489112854004 2023-01-22 14:47:08.183499: step: 880/459, loss: 3.45654034614563 2023-01-22 14:47:08.770793: step: 882/459, loss: 0.39468756318092346 2023-01-22 14:47:09.437075: step: 884/459, loss: 2.7002193927764893 2023-01-22 14:47:10.077583: step: 886/459, loss: 1.0050517320632935 2023-01-22 14:47:10.655599: step: 888/459, loss: 0.25843745470046997 2023-01-22 14:47:11.304310: step: 890/459, loss: 0.2893606126308441 2023-01-22 14:47:11.901308: step: 892/459, loss: 0.963717520236969 2023-01-22 14:47:12.510864: step: 894/459, loss: 0.6077747344970703 2023-01-22 14:47:13.098830: step: 896/459, loss: 0.8349976539611816 2023-01-22 14:47:13.736225: step: 898/459, loss: 1.6649893522262573 2023-01-22 14:47:14.378326: step: 900/459, loss: 0.7088192701339722 2023-01-22 14:47:14.991651: step: 902/459, loss: 1.74690580368042 2023-01-22 14:47:15.620624: step: 904/459, loss: 0.9719572067260742 2023-01-22 14:47:16.263197: step: 906/459, loss: 0.7845654487609863 2023-01-22 14:47:16.830396: step: 908/459, loss: 0.4008420705795288 2023-01-22 14:47:17.417002: step: 910/459, loss: 0.34005144238471985 2023-01-22 14:47:18.043631: step: 912/459, loss: 2.7981059551239014 2023-01-22 14:47:18.588518: step: 914/459, loss: 1.552026629447937 2023-01-22 14:47:19.234010: step: 916/459, loss: 3.2264034748077393 2023-01-22 14:47:19.841807: step: 918/459, loss: 0.4548453688621521 2023-01-22 14:47:20.295526: step: 920/459, loss: 0.4484649896621704 ================================================== Loss: 1.098 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26853316257161114, 'r': 0.2817563864861223, 'f1': 0.2749859003043855}, 'combined': 0.20262118969796827, 'epoch': 2} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.28900387649465215, 'r': 0.2572711459539508, 'f1': 0.27221584526284565}, 'combined': 0.1742181409682212, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26288895695846276, 'r': 0.28678795304559573, 'f1': 0.2743189116088307}, 'combined': 0.20212972434334894, 'epoch': 2} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.29955084110195046, 'r': 0.2631263286630563, 'f1': 0.28015962723351495}, 'combined': 0.17930216142944955, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27638717045961064, 'r': 0.2889502236623202, 'f1': 0.2825291075809353}, 'combined': 0.20817934242805758, 'epoch': 2} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.29681130646254605, 'r': 0.26038446430577905, 'f1': 0.27740717746862664}, 'combined': 0.19889571214731724, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2237654320987654, 'r': 0.3452380952380952, 'f1': 0.27153558052434457}, 'combined': 0.18102372034956304, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.20535714285714285, 'r': 0.25, 'f1': 0.22549019607843138}, 'combined': 0.11274509803921569, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23076923076923078, 'r': 0.10344827586206896, 'f1': 0.14285714285714288}, 'combined': 0.09523809523809525, 'epoch': 2} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2793854002375373, 'r': 0.328158563087354, 'f1': 0.3018142456318247}, 'combined': 0.22238944414976555, 'epoch': 1} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33830276531343184, 'r': 0.2474935607934479, 'f1': 0.28585961462112375}, 'combined': 0.18295015335751916, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2017195767195767, 'r': 0.3630952380952381, 'f1': 0.2593537414965986}, 'combined': 0.17290249433106575, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2771977511895471, 'r': 0.3303229369013958, 'f1': 0.30143755454032134}, 'combined': 0.22211188229286835, 'epoch': 1} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33558826373119, 'r': 0.25161485833894504, 'f1': 0.2875972223759756}, 'combined': 0.18406222232062439, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23214285714285715, 'r': 0.2826086956521739, 'f1': 0.2549019607843137}, 'combined': 0.12745098039215685, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29402583508521984, 'r': 0.3269433384439067, 'f1': 0.30961211026044716}, 'combined': 0.22813523913927683, 'epoch': 1} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3423541418131789, 'r': 0.25465905170424247, 'f1': 0.29206584393053214}, 'combined': 0.209405699421891, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2777777777777778, 'r': 0.1724137931034483, 'f1': 0.21276595744680854}, 'combined': 0.14184397163120568, 'epoch': 1} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:49:59.758340: step: 2/459, loss: 0.8398518562316895 2023-01-22 14:50:00.378916: step: 4/459, loss: 0.4643823504447937 2023-01-22 14:50:01.037563: step: 6/459, loss: 1.0509742498397827 2023-01-22 14:50:01.662257: step: 8/459, loss: 0.24180439114570618 2023-01-22 14:50:02.290931: step: 10/459, loss: 1.1789408922195435 2023-01-22 14:50:02.921775: step: 12/459, loss: 0.5192345976829529 2023-01-22 14:50:03.549301: step: 14/459, loss: 2.068976402282715 2023-01-22 14:50:04.228782: step: 16/459, loss: 0.3742629587650299 2023-01-22 14:50:04.838796: step: 18/459, loss: 0.2601674795150757 2023-01-22 14:50:05.488550: step: 20/459, loss: 0.25158241391181946 2023-01-22 14:50:06.106757: step: 22/459, loss: 1.9159363508224487 2023-01-22 14:50:06.743148: step: 24/459, loss: 0.4232289791107178 2023-01-22 14:50:07.388345: step: 26/459, loss: 0.5582979917526245 2023-01-22 14:50:08.053939: step: 28/459, loss: 0.23837345838546753 2023-01-22 14:50:08.646176: step: 30/459, loss: 2.0122532844543457 2023-01-22 14:50:09.250638: step: 32/459, loss: 0.43666356801986694 2023-01-22 14:50:09.869910: step: 34/459, loss: 0.4129379391670227 2023-01-22 14:50:10.462052: step: 36/459, loss: 1.2447326183319092 2023-01-22 14:50:11.066046: step: 38/459, loss: 0.48522746562957764 2023-01-22 14:50:11.654363: step: 40/459, loss: 0.5354541540145874 2023-01-22 14:50:12.306362: step: 42/459, loss: 0.43180879950523376 2023-01-22 14:50:12.896639: step: 44/459, loss: 0.45705723762512207 2023-01-22 14:50:13.501820: step: 46/459, loss: 0.19811034202575684 2023-01-22 14:50:14.146487: step: 48/459, loss: 0.29498982429504395 2023-01-22 14:50:14.772066: step: 50/459, loss: 0.3008671998977661 2023-01-22 14:50:15.348650: step: 52/459, loss: 0.39366379380226135 2023-01-22 14:50:15.931527: step: 54/459, loss: 0.3632752597332001 2023-01-22 14:50:16.555124: step: 56/459, loss: 0.3858112096786499 2023-01-22 14:50:17.180872: step: 58/459, loss: 0.4434092044830322 2023-01-22 14:50:17.797757: step: 60/459, loss: 0.437463641166687 2023-01-22 14:50:18.374324: step: 62/459, loss: 0.687658965587616 2023-01-22 14:50:18.964254: step: 64/459, loss: 0.798130452632904 2023-01-22 14:50:19.573367: step: 66/459, loss: 1.2362902164459229 2023-01-22 14:50:20.206654: step: 68/459, loss: 0.8526632785797119 2023-01-22 14:50:20.788247: step: 70/459, loss: 2.05098032951355 2023-01-22 14:50:21.387937: step: 72/459, loss: 0.5306727290153503 2023-01-22 14:50:21.973599: step: 74/459, loss: 1.288901448249817 2023-01-22 14:50:22.645811: step: 76/459, loss: 0.8959571719169617 2023-01-22 14:50:23.256145: step: 78/459, loss: 0.9425198435783386 2023-01-22 14:50:23.861220: step: 80/459, loss: 1.7352957725524902 2023-01-22 14:50:24.443633: step: 82/459, loss: 2.9687552452087402 2023-01-22 14:50:25.150517: step: 84/459, loss: 1.7499959468841553 2023-01-22 14:50:25.785167: step: 86/459, loss: 0.8051714301109314 2023-01-22 14:50:26.377186: step: 88/459, loss: 0.8566228151321411 2023-01-22 14:50:26.999154: step: 90/459, loss: 0.5954667925834656 2023-01-22 14:50:27.585474: step: 92/459, loss: 1.5713154077529907 2023-01-22 14:50:28.127604: step: 94/459, loss: 0.5238369703292847 2023-01-22 14:50:28.797993: step: 96/459, loss: 0.8181029558181763 2023-01-22 14:50:29.379715: step: 98/459, loss: 0.21895208954811096 2023-01-22 14:50:30.012722: step: 100/459, loss: 1.460027813911438 2023-01-22 14:50:30.588186: step: 102/459, loss: 0.646634578704834 2023-01-22 14:50:31.230313: step: 104/459, loss: 0.6030356287956238 2023-01-22 14:50:31.855040: step: 106/459, loss: 0.41200122237205505 2023-01-22 14:50:32.453094: step: 108/459, loss: 0.20166930556297302 2023-01-22 14:50:33.105749: step: 110/459, loss: 0.8606836795806885 2023-01-22 14:50:33.727862: step: 112/459, loss: 1.0438076257705688 2023-01-22 14:50:34.398521: step: 114/459, loss: 1.2169158458709717 2023-01-22 14:50:35.013106: step: 116/459, loss: 2.853444814682007 2023-01-22 14:50:35.664493: step: 118/459, loss: 0.607858419418335 2023-01-22 14:50:36.306026: step: 120/459, loss: 0.325610488653183 2023-01-22 14:50:36.930531: step: 122/459, loss: 0.6578608751296997 2023-01-22 14:50:37.565857: step: 124/459, loss: 3.3967418670654297 2023-01-22 14:50:38.254561: step: 126/459, loss: 1.8983210325241089 2023-01-22 14:50:38.864589: step: 128/459, loss: 1.1950592994689941 2023-01-22 14:50:39.494160: step: 130/459, loss: 0.7629843354225159 2023-01-22 14:50:40.166519: step: 132/459, loss: 1.2296628952026367 2023-01-22 14:50:40.768053: step: 134/459, loss: 2.584042549133301 2023-01-22 14:50:41.367828: step: 136/459, loss: 1.9797656536102295 2023-01-22 14:50:41.959501: step: 138/459, loss: 0.7087061405181885 2023-01-22 14:50:42.559565: step: 140/459, loss: 0.21673773229122162 2023-01-22 14:50:43.219341: step: 142/459, loss: 0.6439197659492493 2023-01-22 14:50:43.793994: step: 144/459, loss: 1.4619593620300293 2023-01-22 14:50:44.384671: step: 146/459, loss: 1.3808728456497192 2023-01-22 14:50:45.096301: step: 148/459, loss: 0.5764639377593994 2023-01-22 14:50:45.707143: step: 150/459, loss: 0.925938606262207 2023-01-22 14:50:46.349070: step: 152/459, loss: 0.5003314018249512 2023-01-22 14:50:46.971056: step: 154/459, loss: 0.13819794356822968 2023-01-22 14:50:47.544278: step: 156/459, loss: 0.5583202838897705 2023-01-22 14:50:48.127572: step: 158/459, loss: 0.4731675386428833 2023-01-22 14:50:48.723328: step: 160/459, loss: 0.17071029543876648 2023-01-22 14:50:49.463220: step: 162/459, loss: 1.0577044486999512 2023-01-22 14:50:50.064764: step: 164/459, loss: 0.4055927097797394 2023-01-22 14:50:50.686511: step: 166/459, loss: 0.33484184741973877 2023-01-22 14:50:51.313980: step: 168/459, loss: 0.22579815983772278 2023-01-22 14:50:51.975851: step: 170/459, loss: 1.5454840660095215 2023-01-22 14:50:52.554631: step: 172/459, loss: 1.522342324256897 2023-01-22 14:50:53.189659: step: 174/459, loss: 0.33964452147483826 2023-01-22 14:50:53.815785: step: 176/459, loss: 0.2081921547651291 2023-01-22 14:50:54.497873: step: 178/459, loss: 0.7003662586212158 2023-01-22 14:50:55.132122: step: 180/459, loss: 1.582457184791565 2023-01-22 14:50:55.780067: step: 182/459, loss: 0.46888262033462524 2023-01-22 14:50:56.426515: step: 184/459, loss: 1.6301935911178589 2023-01-22 14:50:57.038377: step: 186/459, loss: 0.6399088501930237 2023-01-22 14:50:57.698210: step: 188/459, loss: 0.7389960289001465 2023-01-22 14:50:58.343095: step: 190/459, loss: 0.9320856928825378 2023-01-22 14:50:58.999425: step: 192/459, loss: 0.6803966164588928 2023-01-22 14:50:59.573922: step: 194/459, loss: 0.6688287258148193 2023-01-22 14:51:00.201033: step: 196/459, loss: 0.32185012102127075 2023-01-22 14:51:00.884353: step: 198/459, loss: 0.6615158915519714 2023-01-22 14:51:01.457913: step: 200/459, loss: 0.34428727626800537 2023-01-22 14:51:02.146730: step: 202/459, loss: 0.7662001252174377 2023-01-22 14:51:02.729607: step: 204/459, loss: 0.6323988437652588 2023-01-22 14:51:03.365501: step: 206/459, loss: 1.0463508367538452 2023-01-22 14:51:04.062565: step: 208/459, loss: 0.29220202565193176 2023-01-22 14:51:04.681249: step: 210/459, loss: 1.0019216537475586 2023-01-22 14:51:05.290399: step: 212/459, loss: 1.724448561668396 2023-01-22 14:51:05.913928: step: 214/459, loss: 0.49727004766464233 2023-01-22 14:51:06.505551: step: 216/459, loss: 0.15741997957229614 2023-01-22 14:51:07.155132: step: 218/459, loss: 0.8448032736778259 2023-01-22 14:51:07.739684: step: 220/459, loss: 0.1118348091840744 2023-01-22 14:51:08.412361: step: 222/459, loss: 0.13483506441116333 2023-01-22 14:51:09.026710: step: 224/459, loss: 2.1009771823883057 2023-01-22 14:51:09.619659: step: 226/459, loss: 0.917556643486023 2023-01-22 14:51:10.257140: step: 228/459, loss: 0.44820526242256165 2023-01-22 14:51:10.843005: step: 230/459, loss: 0.8944314122200012 2023-01-22 14:51:11.481850: step: 232/459, loss: 2.6133575439453125 2023-01-22 14:51:12.085384: step: 234/459, loss: 0.21422994136810303 2023-01-22 14:51:12.729762: step: 236/459, loss: 0.43063387274742126 2023-01-22 14:51:13.387826: step: 238/459, loss: 1.2705694437026978 2023-01-22 14:51:13.995236: step: 240/459, loss: 2.8331995010375977 2023-01-22 14:51:14.633281: step: 242/459, loss: 1.2545119524002075 2023-01-22 14:51:15.249659: step: 244/459, loss: 0.3239467442035675 2023-01-22 14:51:15.894275: step: 246/459, loss: 1.8936917781829834 2023-01-22 14:51:16.567652: step: 248/459, loss: 0.8507328629493713 2023-01-22 14:51:17.272589: step: 250/459, loss: 0.46914219856262207 2023-01-22 14:51:17.937869: step: 252/459, loss: 1.6144390106201172 2023-01-22 14:51:18.617891: step: 254/459, loss: 0.7190415859222412 2023-01-22 14:51:19.258699: step: 256/459, loss: 0.2810624837875366 2023-01-22 14:51:19.886136: step: 258/459, loss: 0.47287625074386597 2023-01-22 14:51:20.511924: step: 260/459, loss: 0.4314813017845154 2023-01-22 14:51:21.073982: step: 262/459, loss: 0.7485593557357788 2023-01-22 14:51:21.750527: step: 264/459, loss: 1.643078327178955 2023-01-22 14:51:22.377224: step: 266/459, loss: 1.153994083404541 2023-01-22 14:51:23.040230: step: 268/459, loss: 0.8230201601982117 2023-01-22 14:51:23.662050: step: 270/459, loss: 0.6315735578536987 2023-01-22 14:51:24.293994: step: 272/459, loss: 2.0737760066986084 2023-01-22 14:51:24.915730: step: 274/459, loss: 0.7791745066642761 2023-01-22 14:51:25.565950: step: 276/459, loss: 1.0031168460845947 2023-01-22 14:51:26.156052: step: 278/459, loss: 0.25088217854499817 2023-01-22 14:51:26.756154: step: 280/459, loss: 0.4648754894733429 2023-01-22 14:51:27.395220: step: 282/459, loss: 0.639931857585907 2023-01-22 14:51:28.097939: step: 284/459, loss: 1.2609652280807495 2023-01-22 14:51:28.781689: step: 286/459, loss: 2.4682717323303223 2023-01-22 14:51:29.432920: step: 288/459, loss: 1.4612616300582886 2023-01-22 14:51:30.101519: step: 290/459, loss: 0.8148635029792786 2023-01-22 14:51:30.714271: step: 292/459, loss: 0.32706883549690247 2023-01-22 14:51:31.334256: step: 294/459, loss: 0.3320845067501068 2023-01-22 14:51:31.921940: step: 296/459, loss: 0.4271482825279236 2023-01-22 14:51:32.546411: step: 298/459, loss: 0.3624137341976166 2023-01-22 14:51:33.126779: step: 300/459, loss: 0.4353237450122833 2023-01-22 14:51:33.782135: step: 302/459, loss: 1.8668828010559082 2023-01-22 14:51:34.380136: step: 304/459, loss: 0.7225024700164795 2023-01-22 14:51:34.985209: step: 306/459, loss: 1.2784525156021118 2023-01-22 14:51:35.583791: step: 308/459, loss: 0.4639173746109009 2023-01-22 14:51:36.200239: step: 310/459, loss: 0.9570068120956421 2023-01-22 14:51:36.856262: step: 312/459, loss: 0.6577621102333069 2023-01-22 14:51:37.454191: step: 314/459, loss: 1.0670807361602783 2023-01-22 14:51:38.078385: step: 316/459, loss: 0.688703179359436 2023-01-22 14:51:38.694748: step: 318/459, loss: 0.5971472263336182 2023-01-22 14:51:39.343005: step: 320/459, loss: 0.9152560830116272 2023-01-22 14:51:39.967477: step: 322/459, loss: 0.8218833208084106 2023-01-22 14:51:40.489415: step: 324/459, loss: 0.6854164600372314 2023-01-22 14:51:41.080539: step: 326/459, loss: 2.0866236686706543 2023-01-22 14:51:41.679985: step: 328/459, loss: 0.41258859634399414 2023-01-22 14:51:42.273023: step: 330/459, loss: 1.8683630228042603 2023-01-22 14:51:42.959488: step: 332/459, loss: 1.2984533309936523 2023-01-22 14:51:43.533612: step: 334/459, loss: 0.8640950322151184 2023-01-22 14:51:44.147257: step: 336/459, loss: 0.3041331470012665 2023-01-22 14:51:44.747432: step: 338/459, loss: 0.7117694616317749 2023-01-22 14:51:45.345188: step: 340/459, loss: 0.3328053057193756 2023-01-22 14:51:46.036506: step: 342/459, loss: 0.3472191393375397 2023-01-22 14:51:46.639540: step: 344/459, loss: 1.4023175239562988 2023-01-22 14:51:47.318319: step: 346/459, loss: 0.688301146030426 2023-01-22 14:51:47.933604: step: 348/459, loss: 0.8969360589981079 2023-01-22 14:51:48.567160: step: 350/459, loss: 0.15304307639598846 2023-01-22 14:51:49.170578: step: 352/459, loss: 3.1760106086730957 2023-01-22 14:51:49.781534: step: 354/459, loss: 0.42276424169540405 2023-01-22 14:51:50.441631: step: 356/459, loss: 1.4280107021331787 2023-01-22 14:51:51.107232: step: 358/459, loss: 1.489329218864441 2023-01-22 14:51:51.740224: step: 360/459, loss: 3.3480448722839355 2023-01-22 14:51:52.384995: step: 362/459, loss: 1.651407241821289 2023-01-22 14:51:53.071942: step: 364/459, loss: 0.29789143800735474 2023-01-22 14:51:53.608666: step: 366/459, loss: 0.3557799756526947 2023-01-22 14:51:54.236210: step: 368/459, loss: 0.6576789021492004 2023-01-22 14:51:54.882381: step: 370/459, loss: 0.43738219141960144 2023-01-22 14:51:55.514878: step: 372/459, loss: 0.7646472454071045 2023-01-22 14:51:56.098381: step: 374/459, loss: 0.6220682859420776 2023-01-22 14:51:56.735544: step: 376/459, loss: 1.923992395401001 2023-01-22 14:51:57.431473: step: 378/459, loss: 0.5052127838134766 2023-01-22 14:51:58.101778: step: 380/459, loss: 2.516754150390625 2023-01-22 14:51:58.714414: step: 382/459, loss: 0.22576162219047546 2023-01-22 14:51:59.316993: step: 384/459, loss: 0.85765141248703 2023-01-22 14:51:59.911898: step: 386/459, loss: 0.6250236630439758 2023-01-22 14:52:00.537392: step: 388/459, loss: 0.9515461921691895 2023-01-22 14:52:01.188005: step: 390/459, loss: 0.5371105074882507 2023-01-22 14:52:01.836052: step: 392/459, loss: 1.9260034561157227 2023-01-22 14:52:02.435805: step: 394/459, loss: 2.5023562908172607 2023-01-22 14:52:03.061832: step: 396/459, loss: 0.9022126793861389 2023-01-22 14:52:03.631948: step: 398/459, loss: 0.927818775177002 2023-01-22 14:52:04.294967: step: 400/459, loss: 0.7610685229301453 2023-01-22 14:52:04.982043: step: 402/459, loss: 1.9528543949127197 2023-01-22 14:52:05.614855: step: 404/459, loss: 0.6716485619544983 2023-01-22 14:52:06.278618: step: 406/459, loss: 0.8794859647750854 2023-01-22 14:52:06.935037: step: 408/459, loss: 0.39211589097976685 2023-01-22 14:52:07.592370: step: 410/459, loss: 0.6255475282669067 2023-01-22 14:52:08.203521: step: 412/459, loss: 0.40752479434013367 2023-01-22 14:52:08.854030: step: 414/459, loss: 1.5614726543426514 2023-01-22 14:52:09.452960: step: 416/459, loss: 0.8619905114173889 2023-01-22 14:52:10.113357: step: 418/459, loss: 1.7619341611862183 2023-01-22 14:52:10.754876: step: 420/459, loss: 0.5494446754455566 2023-01-22 14:52:11.341820: step: 422/459, loss: 0.9468473792076111 2023-01-22 14:52:11.947602: step: 424/459, loss: 0.498108834028244 2023-01-22 14:52:12.557858: step: 426/459, loss: 0.9887772798538208 2023-01-22 14:52:13.149420: step: 428/459, loss: 0.5967298150062561 2023-01-22 14:52:13.906939: step: 430/459, loss: 0.3676604926586151 2023-01-22 14:52:14.541221: step: 432/459, loss: 1.0705273151397705 2023-01-22 14:52:15.224127: step: 434/459, loss: 0.1907239407300949 2023-01-22 14:52:15.822392: step: 436/459, loss: 0.8449384570121765 2023-01-22 14:52:16.457163: step: 438/459, loss: 0.7839425206184387 2023-01-22 14:52:17.097353: step: 440/459, loss: 1.6597201824188232 2023-01-22 14:52:17.686475: step: 442/459, loss: 0.47356486320495605 2023-01-22 14:52:18.293601: step: 444/459, loss: 0.34125280380249023 2023-01-22 14:52:18.798336: step: 446/459, loss: 0.3708876967430115 2023-01-22 14:52:19.477114: step: 448/459, loss: 2.50014066696167 2023-01-22 14:52:20.066883: step: 450/459, loss: 0.6246120929718018 2023-01-22 14:52:20.701032: step: 452/459, loss: 0.49481338262557983 2023-01-22 14:52:21.318387: step: 454/459, loss: 3.880502939224243 2023-01-22 14:52:21.920970: step: 456/459, loss: 0.3493911325931549 2023-01-22 14:52:22.545539: step: 458/459, loss: 1.0803173780441284 2023-01-22 14:52:23.172777: step: 460/459, loss: 0.2683171331882477 2023-01-22 14:52:23.821749: step: 462/459, loss: 0.6364011764526367 2023-01-22 14:52:24.436057: step: 464/459, loss: 0.456399530172348 2023-01-22 14:52:25.106365: step: 466/459, loss: 0.586546778678894 2023-01-22 14:52:25.775969: step: 468/459, loss: 1.655277967453003 2023-01-22 14:52:26.359431: step: 470/459, loss: 0.8837939500808716 2023-01-22 14:52:27.043590: step: 472/459, loss: 0.5657742023468018 2023-01-22 14:52:27.713686: step: 474/459, loss: 0.911923885345459 2023-01-22 14:52:28.311929: step: 476/459, loss: 1.8218770027160645 2023-01-22 14:52:28.941385: step: 478/459, loss: 0.4360191524028778 2023-01-22 14:52:29.619705: step: 480/459, loss: 1.9349634647369385 2023-01-22 14:52:30.288102: step: 482/459, loss: 0.4149055480957031 2023-01-22 14:52:30.931044: step: 484/459, loss: 0.33818066120147705 2023-01-22 14:52:31.537354: step: 486/459, loss: 0.2746763229370117 2023-01-22 14:52:32.278383: step: 488/459, loss: 3.161151885986328 2023-01-22 14:52:32.882381: step: 490/459, loss: 0.513812243938446 2023-01-22 14:52:33.534933: step: 492/459, loss: 2.2441842555999756 2023-01-22 14:52:34.189766: step: 494/459, loss: 1.2531307935714722 2023-01-22 14:52:34.844134: step: 496/459, loss: 0.2675786018371582 2023-01-22 14:52:35.495656: step: 498/459, loss: 11.093106269836426 2023-01-22 14:52:36.100645: step: 500/459, loss: 0.4394725263118744 2023-01-22 14:52:36.714436: step: 502/459, loss: 0.8046543598175049 2023-01-22 14:52:37.321413: step: 504/459, loss: 1.0858978033065796 2023-01-22 14:52:37.951922: step: 506/459, loss: 2.2075858116149902 2023-01-22 14:52:38.540479: step: 508/459, loss: 0.19660529494285583 2023-01-22 14:52:39.265917: step: 510/459, loss: 0.7700721621513367 2023-01-22 14:52:39.891098: step: 512/459, loss: 1.401474952697754 2023-01-22 14:52:40.580874: step: 514/459, loss: 0.530198872089386 2023-01-22 14:52:41.237101: step: 516/459, loss: 6.241462707519531 2023-01-22 14:52:41.913444: step: 518/459, loss: 8.67455005645752 2023-01-22 14:52:42.518918: step: 520/459, loss: 0.9396746754646301 2023-01-22 14:52:43.189929: step: 522/459, loss: 0.8450738191604614 2023-01-22 14:52:43.787458: step: 524/459, loss: 0.8661453127861023 2023-01-22 14:52:44.465609: step: 526/459, loss: 0.5209345817565918 2023-01-22 14:52:45.084918: step: 528/459, loss: 0.29330021142959595 2023-01-22 14:52:45.674472: step: 530/459, loss: 0.8710822463035583 2023-01-22 14:52:46.306280: step: 532/459, loss: 0.261177122592926 2023-01-22 14:52:46.937487: step: 534/459, loss: 0.5598915219306946 2023-01-22 14:52:47.584205: step: 536/459, loss: 0.7292505502700806 2023-01-22 14:52:48.256177: step: 538/459, loss: 1.2415754795074463 2023-01-22 14:52:48.906932: step: 540/459, loss: 0.6016242504119873 2023-01-22 14:52:49.566119: step: 542/459, loss: 1.1622949838638306 2023-01-22 14:52:50.169068: step: 544/459, loss: 0.8525893092155457 2023-01-22 14:52:50.812095: step: 546/459, loss: 1.3187843561172485 2023-01-22 14:52:51.420316: step: 548/459, loss: 0.7773754596710205 2023-01-22 14:52:52.051047: step: 550/459, loss: 0.9668177366256714 2023-01-22 14:52:52.660701: step: 552/459, loss: 0.2572127878665924 2023-01-22 14:52:53.355261: step: 554/459, loss: 1.1011465787887573 2023-01-22 14:52:53.958973: step: 556/459, loss: 1.6079418659210205 2023-01-22 14:52:54.685903: step: 558/459, loss: 0.34781786799430847 2023-01-22 14:52:55.287526: step: 560/459, loss: 0.3472540080547333 2023-01-22 14:52:55.913022: step: 562/459, loss: 0.8786882162094116 2023-01-22 14:52:56.543054: step: 564/459, loss: 0.4167827367782593 2023-01-22 14:52:57.214808: step: 566/459, loss: 0.41220492124557495 2023-01-22 14:52:57.814859: step: 568/459, loss: 1.6876705884933472 2023-01-22 14:52:58.410288: step: 570/459, loss: 0.19969187676906586 2023-01-22 14:52:58.996853: step: 572/459, loss: 0.5721907615661621 2023-01-22 14:52:59.730695: step: 574/459, loss: 1.158447265625 2023-01-22 14:53:00.394819: step: 576/459, loss: 0.7297154068946838 2023-01-22 14:53:00.998448: step: 578/459, loss: 0.20299437642097473 2023-01-22 14:53:01.639111: step: 580/459, loss: 0.23064874112606049 2023-01-22 14:53:02.240228: step: 582/459, loss: 0.4549032747745514 2023-01-22 14:53:02.877991: step: 584/459, loss: 2.683457374572754 2023-01-22 14:53:03.469598: step: 586/459, loss: 0.7868238687515259 2023-01-22 14:53:04.027122: step: 588/459, loss: 0.6786232590675354 2023-01-22 14:53:04.664558: step: 590/459, loss: 1.439798355102539 2023-01-22 14:53:05.275240: step: 592/459, loss: 1.150038480758667 2023-01-22 14:53:05.891589: step: 594/459, loss: 1.1049665212631226 2023-01-22 14:53:06.495801: step: 596/459, loss: 0.577436089515686 2023-01-22 14:53:07.142076: step: 598/459, loss: 0.5256879329681396 2023-01-22 14:53:07.785429: step: 600/459, loss: 1.2705572843551636 2023-01-22 14:53:08.382784: step: 602/459, loss: 0.3305928409099579 2023-01-22 14:53:09.008862: step: 604/459, loss: 1.7984533309936523 2023-01-22 14:53:09.659595: step: 606/459, loss: 4.386632442474365 2023-01-22 14:53:10.264988: step: 608/459, loss: 0.674734890460968 2023-01-22 14:53:10.895661: step: 610/459, loss: 2.577136516571045 2023-01-22 14:53:11.532814: step: 612/459, loss: 0.18706120550632477 2023-01-22 14:53:12.177284: step: 614/459, loss: 0.9143052697181702 2023-01-22 14:53:12.794490: step: 616/459, loss: 1.1272063255310059 2023-01-22 14:53:13.434209: step: 618/459, loss: 0.42831864953041077 2023-01-22 14:53:14.073700: step: 620/459, loss: 1.5272623300552368 2023-01-22 14:53:14.708097: step: 622/459, loss: 0.4063927233219147 2023-01-22 14:53:15.335436: step: 624/459, loss: 1.5815749168395996 2023-01-22 14:53:15.949492: step: 626/459, loss: 0.21056343615055084 2023-01-22 14:53:16.535374: step: 628/459, loss: 0.6560976505279541 2023-01-22 14:53:17.133031: step: 630/459, loss: 4.816953659057617 2023-01-22 14:53:17.729810: step: 632/459, loss: 0.29058128595352173 2023-01-22 14:53:18.333117: step: 634/459, loss: 0.8279641270637512 2023-01-22 14:53:18.925761: step: 636/459, loss: 6.1486005783081055 2023-01-22 14:53:19.608440: step: 638/459, loss: 1.2810375690460205 2023-01-22 14:53:20.278687: step: 640/459, loss: 1.0371769666671753 2023-01-22 14:53:20.920857: step: 642/459, loss: 0.2369418740272522 2023-01-22 14:53:21.605519: step: 644/459, loss: 0.8725597262382507 2023-01-22 14:53:22.192036: step: 646/459, loss: 0.3197387158870697 2023-01-22 14:53:22.846790: step: 648/459, loss: 2.501336097717285 2023-01-22 14:53:23.548228: step: 650/459, loss: 2.279496192932129 2023-01-22 14:53:24.154872: step: 652/459, loss: 0.8108385801315308 2023-01-22 14:53:24.765687: step: 654/459, loss: 1.0682557821273804 2023-01-22 14:53:25.346436: step: 656/459, loss: 0.32625263929367065 2023-01-22 14:53:25.948554: step: 658/459, loss: 0.205687016248703 2023-01-22 14:53:26.591731: step: 660/459, loss: 0.35915082693099976 2023-01-22 14:53:27.299760: step: 662/459, loss: 1.9290274381637573 2023-01-22 14:53:27.934105: step: 664/459, loss: 0.419192373752594 2023-01-22 14:53:28.537076: step: 666/459, loss: 3.363100051879883 2023-01-22 14:53:29.142470: step: 668/459, loss: 0.8175494074821472 2023-01-22 14:53:29.802328: step: 670/459, loss: 0.5954905152320862 2023-01-22 14:53:30.485780: step: 672/459, loss: 1.46538245677948 2023-01-22 14:53:31.129018: step: 674/459, loss: 0.3085485100746155 2023-01-22 14:53:31.809623: step: 676/459, loss: 0.9936469197273254 2023-01-22 14:53:32.395863: step: 678/459, loss: 0.7950632572174072 2023-01-22 14:53:33.033792: step: 680/459, loss: 0.35935381054878235 2023-01-22 14:53:33.692198: step: 682/459, loss: 0.3885619044303894 2023-01-22 14:53:34.286407: step: 684/459, loss: 0.2708093523979187 2023-01-22 14:53:34.963631: step: 686/459, loss: 0.44381847977638245 2023-01-22 14:53:35.529589: step: 688/459, loss: 0.47104188799858093 2023-01-22 14:53:36.176376: step: 690/459, loss: 0.9214016795158386 2023-01-22 14:53:36.803376: step: 692/459, loss: 0.26192334294319153 2023-01-22 14:53:37.478597: step: 694/459, loss: 0.24338804185390472 2023-01-22 14:53:38.047576: step: 696/459, loss: 1.7899621725082397 2023-01-22 14:53:38.690397: step: 698/459, loss: 0.1931503862142563 2023-01-22 14:53:39.328736: step: 700/459, loss: 3.321481704711914 2023-01-22 14:53:39.931508: step: 702/459, loss: 0.27917036414146423 2023-01-22 14:53:40.572553: step: 704/459, loss: 0.2701716721057892 2023-01-22 14:53:41.171859: step: 706/459, loss: 0.20399795472621918 2023-01-22 14:53:41.731282: step: 708/459, loss: 0.21222837269306183 2023-01-22 14:53:42.348310: step: 710/459, loss: 2.312427282333374 2023-01-22 14:53:42.979758: step: 712/459, loss: 0.5684782862663269 2023-01-22 14:53:43.687347: step: 714/459, loss: 1.2425131797790527 2023-01-22 14:53:44.352359: step: 716/459, loss: 0.2305610477924347 2023-01-22 14:53:44.917074: step: 718/459, loss: 0.3743547797203064 2023-01-22 14:53:45.494861: step: 720/459, loss: 0.5601423978805542 2023-01-22 14:53:46.152115: step: 722/459, loss: 1.1852672100067139 2023-01-22 14:53:46.821187: step: 724/459, loss: 0.8934194445610046 2023-01-22 14:53:47.404911: step: 726/459, loss: 1.349035382270813 2023-01-22 14:53:48.126433: step: 728/459, loss: 3.105424404144287 2023-01-22 14:53:48.796835: step: 730/459, loss: 0.716583251953125 2023-01-22 14:53:49.441176: step: 732/459, loss: 1.3789149522781372 2023-01-22 14:53:50.036084: step: 734/459, loss: 0.3377475440502167 2023-01-22 14:53:50.636738: step: 736/459, loss: 0.33901146054267883 2023-01-22 14:53:51.245253: step: 738/459, loss: 0.3760363459587097 2023-01-22 14:53:51.881999: step: 740/459, loss: 1.1459914445877075 2023-01-22 14:53:52.539292: step: 742/459, loss: 0.4196164906024933 2023-01-22 14:53:53.137732: step: 744/459, loss: 0.6080082058906555 2023-01-22 14:53:53.790423: step: 746/459, loss: 1.344560146331787 2023-01-22 14:53:54.423960: step: 748/459, loss: 1.2764101028442383 2023-01-22 14:53:55.029058: step: 750/459, loss: 2.1771578788757324 2023-01-22 14:53:55.687361: step: 752/459, loss: 2.293926239013672 2023-01-22 14:53:56.322504: step: 754/459, loss: 2.5195374488830566 2023-01-22 14:53:56.923377: step: 756/459, loss: 0.7379446029663086 2023-01-22 14:53:57.585112: step: 758/459, loss: 1.0174202919006348 2023-01-22 14:53:58.184230: step: 760/459, loss: 0.5963167548179626 2023-01-22 14:53:58.890572: step: 762/459, loss: 0.455695241689682 2023-01-22 14:53:59.541369: step: 764/459, loss: 0.6346972584724426 2023-01-22 14:54:00.098518: step: 766/459, loss: 0.40697699785232544 2023-01-22 14:54:00.723653: step: 768/459, loss: 0.9977796673774719 2023-01-22 14:54:01.301778: step: 770/459, loss: 0.7930999398231506 2023-01-22 14:54:01.913082: step: 772/459, loss: 1.517263412475586 2023-01-22 14:54:02.541500: step: 774/459, loss: 1.5792834758758545 2023-01-22 14:54:03.159018: step: 776/459, loss: 1.1263720989227295 2023-01-22 14:54:03.746927: step: 778/459, loss: 0.4506717920303345 2023-01-22 14:54:04.373778: step: 780/459, loss: 3.1227402687072754 2023-01-22 14:54:05.018202: step: 782/459, loss: 1.2141224145889282 2023-01-22 14:54:05.628036: step: 784/459, loss: 0.6507594585418701 2023-01-22 14:54:06.238599: step: 786/459, loss: 7.223819255828857 2023-01-22 14:54:06.892743: step: 788/459, loss: 0.38250139355659485 2023-01-22 14:54:07.545424: step: 790/459, loss: 1.4810631275177002 2023-01-22 14:54:08.157289: step: 792/459, loss: 0.5404181480407715 2023-01-22 14:54:08.780968: step: 794/459, loss: 3.827531576156616 2023-01-22 14:54:09.470129: step: 796/459, loss: 0.3615395426750183 2023-01-22 14:54:10.112127: step: 798/459, loss: 1.1874518394470215 2023-01-22 14:54:10.797197: step: 800/459, loss: 0.823689877986908 2023-01-22 14:54:11.441218: step: 802/459, loss: 0.3731580972671509 2023-01-22 14:54:12.091620: step: 804/459, loss: 0.6335652470588684 2023-01-22 14:54:12.660944: step: 806/459, loss: 0.8372552990913391 2023-01-22 14:54:13.283169: step: 808/459, loss: 1.0268703699111938 2023-01-22 14:54:13.936581: step: 810/459, loss: 0.4857747256755829 2023-01-22 14:54:14.540635: step: 812/459, loss: 3.458653450012207 2023-01-22 14:54:15.171281: step: 814/459, loss: 0.32796117663383484 2023-01-22 14:54:15.908221: step: 816/459, loss: 0.588982105255127 2023-01-22 14:54:16.553822: step: 818/459, loss: 0.971589207649231 2023-01-22 14:54:17.239120: step: 820/459, loss: 0.49299490451812744 2023-01-22 14:54:17.869964: step: 822/459, loss: 2.511847734451294 2023-01-22 14:54:18.496563: step: 824/459, loss: 0.8866496086120605 2023-01-22 14:54:19.143513: step: 826/459, loss: 1.369660496711731 2023-01-22 14:54:19.712816: step: 828/459, loss: 0.20811061561107635 2023-01-22 14:54:20.365454: step: 830/459, loss: 0.5714547634124756 2023-01-22 14:54:21.052440: step: 832/459, loss: 0.5202130675315857 2023-01-22 14:54:21.784232: step: 834/459, loss: 0.5009088516235352 2023-01-22 14:54:22.397606: step: 836/459, loss: 1.149706244468689 2023-01-22 14:54:23.064981: step: 838/459, loss: 1.1185855865478516 2023-01-22 14:54:23.699986: step: 840/459, loss: 3.0560057163238525 2023-01-22 14:54:24.300683: step: 842/459, loss: 0.9818127751350403 2023-01-22 14:54:24.927582: step: 844/459, loss: 1.045640230178833 2023-01-22 14:54:25.491632: step: 846/459, loss: 1.7719815969467163 2023-01-22 14:54:26.189586: step: 848/459, loss: 6.287725448608398 2023-01-22 14:54:26.795984: step: 850/459, loss: 0.3048804998397827 2023-01-22 14:54:27.454216: step: 852/459, loss: 1.3366230726242065 2023-01-22 14:54:28.101772: step: 854/459, loss: 0.4401209354400635 2023-01-22 14:54:28.809881: step: 856/459, loss: 0.40600597858428955 2023-01-22 14:54:29.400940: step: 858/459, loss: 0.3782948851585388 2023-01-22 14:54:29.996207: step: 860/459, loss: 1.2490930557250977 2023-01-22 14:54:30.640247: step: 862/459, loss: 2.19287371635437 2023-01-22 14:54:31.246436: step: 864/459, loss: 1.1306854486465454 2023-01-22 14:54:31.879952: step: 866/459, loss: 0.8736238479614258 2023-01-22 14:54:32.499417: step: 868/459, loss: 1.0416250228881836 2023-01-22 14:54:33.151032: step: 870/459, loss: 1.725113868713379 2023-01-22 14:54:33.723013: step: 872/459, loss: 1.2922784090042114 2023-01-22 14:54:34.375915: step: 874/459, loss: 0.5124685168266296 2023-01-22 14:54:34.959063: step: 876/459, loss: 0.33843880891799927 2023-01-22 14:54:35.652166: step: 878/459, loss: 0.6355993747711182 2023-01-22 14:54:36.295080: step: 880/459, loss: 0.6824013590812683 2023-01-22 14:54:36.919918: step: 882/459, loss: 0.7047916650772095 2023-01-22 14:54:37.514883: step: 884/459, loss: 0.4498177766799927 2023-01-22 14:54:38.130011: step: 886/459, loss: 0.9215335249900818 2023-01-22 14:54:38.749060: step: 888/459, loss: 0.28214532136917114 2023-01-22 14:54:39.388328: step: 890/459, loss: 1.2813720703125 2023-01-22 14:54:40.032666: step: 892/459, loss: 0.5520343780517578 2023-01-22 14:54:40.652864: step: 894/459, loss: 0.3534702658653259 2023-01-22 14:54:41.293894: step: 896/459, loss: 0.8006798028945923 2023-01-22 14:54:42.016456: step: 898/459, loss: 0.7070424556732178 2023-01-22 14:54:42.604373: step: 900/459, loss: 1.8407232761383057 2023-01-22 14:54:43.173798: step: 902/459, loss: 0.6151673793792725 2023-01-22 14:54:43.726281: step: 904/459, loss: 0.2929648458957672 2023-01-22 14:54:44.433219: step: 906/459, loss: 2.337923526763916 2023-01-22 14:54:45.102042: step: 908/459, loss: 0.3041127920150757 2023-01-22 14:54:45.763348: step: 910/459, loss: 0.6827573776245117 2023-01-22 14:54:46.419474: step: 912/459, loss: 2.975863456726074 2023-01-22 14:54:47.050683: step: 914/459, loss: 0.9744635820388794 2023-01-22 14:54:47.697814: step: 916/459, loss: 0.9121844172477722 2023-01-22 14:54:48.302814: step: 918/459, loss: 0.42135244607925415 2023-01-22 14:54:48.773584: step: 920/459, loss: 0.02587936632335186 ================================================== Loss: 1.042 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35135498911409524, 'r': 0.22648780961797063, 'f1': 0.2754300029607045}, 'combined': 0.2029484232342033, 'epoch': 3} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.37507504666228264, 'r': 0.2043699936935919, 'f1': 0.2645775857324318}, 'combined': 0.1693296548687563, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3487643930704721, 'r': 0.2366850985109631, 'f1': 0.2819964349376114}, 'combined': 0.2077868467961347, 'epoch': 3} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.37191133451689895, 'r': 0.20399488429984033, 'f1': 0.2634734863966322}, 'combined': 0.1686230312938446, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35498946219003036, 'r': 0.21473842703366675, 'f1': 0.2676010080113303}, 'combined': 0.19717969011361178, 'epoch': 3} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3738950796201493, 'r': 0.21156824214655132, 'f1': 0.27022811438904065}, 'combined': 0.19374845937327445, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2633333333333333, 'r': 0.1880952380952381, 'f1': 0.21944444444444441}, 'combined': 0.14629629629629626, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34375, 'r': 0.11956521739130435, 'f1': 0.1774193548387097}, 'combined': 0.08870967741935484, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.06896551724137931, 'f1': 0.1111111111111111}, 'combined': 0.07407407407407407, 'epoch': 3} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2793854002375373, 'r': 0.328158563087354, 'f1': 0.3018142456318247}, 'combined': 0.22238944414976555, 'epoch': 1} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33830276531343184, 'r': 0.2474935607934479, 'f1': 0.28585961462112375}, 'combined': 0.18295015335751916, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2017195767195767, 'r': 0.3630952380952381, 'f1': 0.2593537414965986}, 'combined': 0.17290249433106575, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2771977511895471, 'r': 0.3303229369013958, 'f1': 0.30143755454032134}, 'combined': 0.22211188229286835, 'epoch': 1} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33558826373119, 'r': 0.25161485833894504, 'f1': 0.2875972223759756}, 'combined': 0.18406222232062439, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23214285714285715, 'r': 0.2826086956521739, 'f1': 0.2549019607843137}, 'combined': 0.12745098039215685, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29402583508521984, 'r': 0.3269433384439067, 'f1': 0.30961211026044716}, 'combined': 0.22813523913927683, 'epoch': 1} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3423541418131789, 'r': 0.25465905170424247, 'f1': 0.29206584393053214}, 'combined': 0.209405699421891, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2777777777777778, 'r': 0.1724137931034483, 'f1': 0.21276595744680854}, 'combined': 0.14184397163120568, 'epoch': 1} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 14:57:23.991880: step: 2/459, loss: 0.35351505875587463 2023-01-22 14:57:24.612896: step: 4/459, loss: 0.3737386465072632 2023-01-22 14:57:25.301975: step: 6/459, loss: 0.4218239486217499 2023-01-22 14:57:25.911808: step: 8/459, loss: 0.6641525030136108 2023-01-22 14:57:26.550428: step: 10/459, loss: 0.7250014543533325 2023-01-22 14:57:27.113306: step: 12/459, loss: 3.3092613220214844 2023-01-22 14:57:27.719965: step: 14/459, loss: 1.27743661403656 2023-01-22 14:57:28.346501: step: 16/459, loss: 0.4615519046783447 2023-01-22 14:57:28.936290: step: 18/459, loss: 2.28450608253479 2023-01-22 14:57:29.559771: step: 20/459, loss: 0.1996404230594635 2023-01-22 14:57:30.208087: step: 22/459, loss: 0.4552402198314667 2023-01-22 14:57:30.872723: step: 24/459, loss: 0.7762945294380188 2023-01-22 14:57:31.560383: step: 26/459, loss: 0.5254566669464111 2023-01-22 14:57:32.182481: step: 28/459, loss: 0.8074111938476562 2023-01-22 14:57:32.842357: step: 30/459, loss: 1.009743332862854 2023-01-22 14:57:33.437437: step: 32/459, loss: 2.08298921585083 2023-01-22 14:57:34.087172: step: 34/459, loss: 0.48979851603507996 2023-01-22 14:57:34.738313: step: 36/459, loss: 1.3773154020309448 2023-01-22 14:57:35.335485: step: 38/459, loss: 0.9837642312049866 2023-01-22 14:57:35.945369: step: 40/459, loss: 0.6969042420387268 2023-01-22 14:57:36.533372: step: 42/459, loss: 0.5275053977966309 2023-01-22 14:57:37.145604: step: 44/459, loss: 0.29203909635543823 2023-01-22 14:57:37.803828: step: 46/459, loss: 0.8849007487297058 2023-01-22 14:57:38.366510: step: 48/459, loss: 1.5914435386657715 2023-01-22 14:57:38.975533: step: 50/459, loss: 0.22303012013435364 2023-01-22 14:57:39.582280: step: 52/459, loss: 0.7617901563644409 2023-01-22 14:57:40.232428: step: 54/459, loss: 0.7315495610237122 2023-01-22 14:57:40.818029: step: 56/459, loss: 0.36292943358421326 2023-01-22 14:57:41.404468: step: 58/459, loss: 0.4322194755077362 2023-01-22 14:57:42.044324: step: 60/459, loss: 1.2922251224517822 2023-01-22 14:57:42.650340: step: 62/459, loss: 0.8647807240486145 2023-01-22 14:57:43.238996: step: 64/459, loss: 0.24725548923015594 2023-01-22 14:57:43.797734: step: 66/459, loss: 0.24291421473026276 2023-01-22 14:57:44.484907: step: 68/459, loss: 0.14437070488929749 2023-01-22 14:57:45.077180: step: 70/459, loss: 0.7456201314926147 2023-01-22 14:57:45.666556: step: 72/459, loss: 0.9970555305480957 2023-01-22 14:57:46.237573: step: 74/459, loss: 0.8037850260734558 2023-01-22 14:57:46.805230: step: 76/459, loss: 0.9315568208694458 2023-01-22 14:57:47.445326: step: 78/459, loss: 0.24841855466365814 2023-01-22 14:57:48.103625: step: 80/459, loss: 0.4086030423641205 2023-01-22 14:57:48.636443: step: 82/459, loss: 0.4764077663421631 2023-01-22 14:57:49.181982: step: 84/459, loss: 0.8078222274780273 2023-01-22 14:57:49.866697: step: 86/459, loss: 0.6514307856559753 2023-01-22 14:57:50.457860: step: 88/459, loss: 3.8930294513702393 2023-01-22 14:57:51.044045: step: 90/459, loss: 0.6260170936584473 2023-01-22 14:57:51.665049: step: 92/459, loss: 0.9406738877296448 2023-01-22 14:57:52.295585: step: 94/459, loss: 0.5999481678009033 2023-01-22 14:57:52.874240: step: 96/459, loss: 1.1487587690353394 2023-01-22 14:57:53.467007: step: 98/459, loss: 0.8285297155380249 2023-01-22 14:57:54.120707: step: 100/459, loss: 0.43348923325538635 2023-01-22 14:57:54.717513: step: 102/459, loss: 5.499032974243164 2023-01-22 14:57:55.305233: step: 104/459, loss: 0.5653872489929199 2023-01-22 14:57:55.854547: step: 106/459, loss: 1.1715176105499268 2023-01-22 14:57:56.399933: step: 108/459, loss: 0.9635467529296875 2023-01-22 14:57:57.010686: step: 110/459, loss: 0.4682919681072235 2023-01-22 14:57:57.616138: step: 112/459, loss: 0.33701518177986145 2023-01-22 14:57:58.310354: step: 114/459, loss: 0.9865213632583618 2023-01-22 14:57:58.932289: step: 116/459, loss: 0.4635048806667328 2023-01-22 14:57:59.531701: step: 118/459, loss: 5.3993024826049805 2023-01-22 14:58:00.128161: step: 120/459, loss: 1.0891554355621338 2023-01-22 14:58:00.776676: step: 122/459, loss: 0.8033370971679688 2023-01-22 14:58:01.348507: step: 124/459, loss: 0.8709210753440857 2023-01-22 14:58:01.996570: step: 126/459, loss: 0.5815564393997192 2023-01-22 14:58:02.617225: step: 128/459, loss: 0.7133592367172241 2023-01-22 14:58:03.200266: step: 130/459, loss: 0.26937901973724365 2023-01-22 14:58:03.819203: step: 132/459, loss: 0.8264710903167725 2023-01-22 14:58:04.550143: step: 134/459, loss: 0.5526084899902344 2023-01-22 14:58:05.159639: step: 136/459, loss: 0.2942453622817993 2023-01-22 14:58:05.781784: step: 138/459, loss: 0.4787570834159851 2023-01-22 14:58:06.385666: step: 140/459, loss: 1.1113612651824951 2023-01-22 14:58:07.053520: step: 142/459, loss: 0.8727877140045166 2023-01-22 14:58:07.690630: step: 144/459, loss: 0.17630845308303833 2023-01-22 14:58:08.251324: step: 146/459, loss: 0.4942283034324646 2023-01-22 14:58:08.830530: step: 148/459, loss: 0.18253293633460999 2023-01-22 14:58:09.447907: step: 150/459, loss: 0.23842355608940125 2023-01-22 14:58:10.199541: step: 152/459, loss: 0.8554129600524902 2023-01-22 14:58:10.775751: step: 154/459, loss: 0.6076799631118774 2023-01-22 14:58:11.522812: step: 156/459, loss: 6.954806327819824 2023-01-22 14:58:12.149262: step: 158/459, loss: 0.22649644315242767 2023-01-22 14:58:12.844999: step: 160/459, loss: 0.39264345169067383 2023-01-22 14:58:13.434654: step: 162/459, loss: 0.9838932156562805 2023-01-22 14:58:14.185704: step: 164/459, loss: 0.46493595838546753 2023-01-22 14:58:14.861010: step: 166/459, loss: 0.9453555941581726 2023-01-22 14:58:15.491472: step: 168/459, loss: 0.5664196610450745 2023-01-22 14:58:16.040506: step: 170/459, loss: 1.8552570343017578 2023-01-22 14:58:16.672455: step: 172/459, loss: 0.399594783782959 2023-01-22 14:58:17.308066: step: 174/459, loss: 0.5173706412315369 2023-01-22 14:58:17.942114: step: 176/459, loss: 4.834889888763428 2023-01-22 14:58:18.623159: step: 178/459, loss: 0.29288387298583984 2023-01-22 14:58:19.225709: step: 180/459, loss: 0.5159820318222046 2023-01-22 14:58:19.852510: step: 182/459, loss: 1.1898598670959473 2023-01-22 14:58:20.482954: step: 184/459, loss: 0.3413047194480896 2023-01-22 14:58:21.169287: step: 186/459, loss: 0.2697296142578125 2023-01-22 14:58:21.731408: step: 188/459, loss: 0.33146917819976807 2023-01-22 14:58:22.430548: step: 190/459, loss: 0.7143064737319946 2023-01-22 14:58:23.014550: step: 192/459, loss: 0.391271710395813 2023-01-22 14:58:23.646696: step: 194/459, loss: 1.6792771816253662 2023-01-22 14:58:24.243257: step: 196/459, loss: 1.6560204029083252 2023-01-22 14:58:24.853524: step: 198/459, loss: 0.37271350622177124 2023-01-22 14:58:25.468590: step: 200/459, loss: 1.1553771495819092 2023-01-22 14:58:26.121946: step: 202/459, loss: 1.4438241720199585 2023-01-22 14:58:26.732240: step: 204/459, loss: 2.4271206855773926 2023-01-22 14:58:27.323885: step: 206/459, loss: 0.5765254497528076 2023-01-22 14:58:27.921717: step: 208/459, loss: 0.4130575656890869 2023-01-22 14:58:28.557112: step: 210/459, loss: 0.8640986680984497 2023-01-22 14:58:29.263629: step: 212/459, loss: 0.5179693698883057 2023-01-22 14:58:29.886562: step: 214/459, loss: 2.3819947242736816 2023-01-22 14:58:30.516839: step: 216/459, loss: 0.5583990812301636 2023-01-22 14:58:31.105371: step: 218/459, loss: 0.4479759633541107 2023-01-22 14:58:31.703114: step: 220/459, loss: 0.3309788107872009 2023-01-22 14:58:32.286612: step: 222/459, loss: 0.7112441658973694 2023-01-22 14:58:32.885034: step: 224/459, loss: 0.6166911125183105 2023-01-22 14:58:33.525387: step: 226/459, loss: 0.2518443465232849 2023-01-22 14:58:34.099902: step: 228/459, loss: 1.2648816108703613 2023-01-22 14:58:34.736368: step: 230/459, loss: 0.2924942672252655 2023-01-22 14:58:35.399849: step: 232/459, loss: 0.12281548976898193 2023-01-22 14:58:36.042421: step: 234/459, loss: 0.3394499719142914 2023-01-22 14:58:36.666913: step: 236/459, loss: 0.22777578234672546 2023-01-22 14:58:37.380383: step: 238/459, loss: 1.2662527561187744 2023-01-22 14:58:38.027169: step: 240/459, loss: 1.4509148597717285 2023-01-22 14:58:38.673355: step: 242/459, loss: 1.6090832948684692 2023-01-22 14:58:39.334564: step: 244/459, loss: 1.8509559631347656 2023-01-22 14:58:39.941957: step: 246/459, loss: 0.3525547385215759 2023-01-22 14:58:40.561299: step: 248/459, loss: 1.0995585918426514 2023-01-22 14:58:41.204804: step: 250/459, loss: 5.158425331115723 2023-01-22 14:58:41.878886: step: 252/459, loss: 0.5495701432228088 2023-01-22 14:58:42.476479: step: 254/459, loss: 2.466674566268921 2023-01-22 14:58:43.091446: step: 256/459, loss: 1.4334381818771362 2023-01-22 14:58:43.754523: step: 258/459, loss: 1.5086551904678345 2023-01-22 14:58:44.377595: step: 260/459, loss: 0.5385316610336304 2023-01-22 14:58:45.021079: step: 262/459, loss: 0.627558171749115 2023-01-22 14:58:45.692786: step: 264/459, loss: 3.8231728076934814 2023-01-22 14:58:46.389372: step: 266/459, loss: 2.095003843307495 2023-01-22 14:58:46.969844: step: 268/459, loss: 0.4236992299556732 2023-01-22 14:58:47.587472: step: 270/459, loss: 1.0951642990112305 2023-01-22 14:58:48.230738: step: 272/459, loss: 1.089877963066101 2023-01-22 14:58:48.908689: step: 274/459, loss: 0.6851375102996826 2023-01-22 14:58:49.535668: step: 276/459, loss: 1.9033409357070923 2023-01-22 14:58:50.131322: step: 278/459, loss: 0.6282751560211182 2023-01-22 14:58:50.764997: step: 280/459, loss: 0.603122889995575 2023-01-22 14:58:51.340392: step: 282/459, loss: 1.4734318256378174 2023-01-22 14:58:51.973587: step: 284/459, loss: 0.2000703066587448 2023-01-22 14:58:52.587640: step: 286/459, loss: 0.36500924825668335 2023-01-22 14:58:53.209441: step: 288/459, loss: 2.080204486846924 2023-01-22 14:58:53.860379: step: 290/459, loss: 0.560064971446991 2023-01-22 14:58:54.572947: step: 292/459, loss: 0.6970157027244568 2023-01-22 14:58:55.214588: step: 294/459, loss: 0.16472433507442474 2023-01-22 14:58:55.863706: step: 296/459, loss: 0.5106488466262817 2023-01-22 14:58:56.427352: step: 298/459, loss: 0.4635222554206848 2023-01-22 14:58:57.089416: step: 300/459, loss: 0.4714438021183014 2023-01-22 14:58:57.752440: step: 302/459, loss: 1.2357920408248901 2023-01-22 14:58:58.398054: step: 304/459, loss: 1.6775257587432861 2023-01-22 14:58:59.040684: step: 306/459, loss: 0.19140349328517914 2023-01-22 14:58:59.693282: step: 308/459, loss: 0.6259984374046326 2023-01-22 14:59:00.318714: step: 310/459, loss: 0.3037130832672119 2023-01-22 14:59:00.974272: step: 312/459, loss: 1.1598334312438965 2023-01-22 14:59:01.693337: step: 314/459, loss: 0.2912275493144989 2023-01-22 14:59:02.298660: step: 316/459, loss: 1.7167121171951294 2023-01-22 14:59:02.919564: step: 318/459, loss: 0.31190580129623413 2023-01-22 14:59:03.464698: step: 320/459, loss: 1.6132316589355469 2023-01-22 14:59:04.114070: step: 322/459, loss: 2.378019094467163 2023-01-22 14:59:04.849367: step: 324/459, loss: 0.1610361635684967 2023-01-22 14:59:05.476585: step: 326/459, loss: 0.36185479164123535 2023-01-22 14:59:06.097232: step: 328/459, loss: 0.2745131850242615 2023-01-22 14:59:06.688485: step: 330/459, loss: 0.8475165963172913 2023-01-22 14:59:07.303323: step: 332/459, loss: 0.39679452776908875 2023-01-22 14:59:08.008724: step: 334/459, loss: 0.7777525186538696 2023-01-22 14:59:08.642857: step: 336/459, loss: 0.8795884251594543 2023-01-22 14:59:09.225355: step: 338/459, loss: 0.6839327812194824 2023-01-22 14:59:09.865227: step: 340/459, loss: 7.170321464538574 2023-01-22 14:59:10.467768: step: 342/459, loss: 1.0697476863861084 2023-01-22 14:59:11.016718: step: 344/459, loss: 0.19755275547504425 2023-01-22 14:59:11.605865: step: 346/459, loss: 0.2249654233455658 2023-01-22 14:59:12.216917: step: 348/459, loss: 0.3696065843105316 2023-01-22 14:59:12.813827: step: 350/459, loss: 0.3938450813293457 2023-01-22 14:59:13.421056: step: 352/459, loss: 0.3306106626987457 2023-01-22 14:59:14.022060: step: 354/459, loss: 0.2166425585746765 2023-01-22 14:59:14.649450: step: 356/459, loss: 1.3610038757324219 2023-01-22 14:59:15.231250: step: 358/459, loss: 1.1140087842941284 2023-01-22 14:59:15.917313: step: 360/459, loss: 0.4373718202114105 2023-01-22 14:59:16.662477: step: 362/459, loss: 1.871800422668457 2023-01-22 14:59:17.343365: step: 364/459, loss: 0.296303927898407 2023-01-22 14:59:17.972551: step: 366/459, loss: 0.39145511388778687 2023-01-22 14:59:18.565105: step: 368/459, loss: 0.5774985551834106 2023-01-22 14:59:19.182282: step: 370/459, loss: 0.3297582268714905 2023-01-22 14:59:19.888063: step: 372/459, loss: 0.4229975640773773 2023-01-22 14:59:20.461320: step: 374/459, loss: 0.3545452952384949 2023-01-22 14:59:21.089198: step: 376/459, loss: 0.8005603551864624 2023-01-22 14:59:21.760593: step: 378/459, loss: 0.3399691879749298 2023-01-22 14:59:22.438821: step: 380/459, loss: 0.5770674347877502 2023-01-22 14:59:23.159464: step: 382/459, loss: 7.1543426513671875 2023-01-22 14:59:23.753636: step: 384/459, loss: 0.6102972030639648 2023-01-22 14:59:24.334822: step: 386/459, loss: 1.0909634828567505 2023-01-22 14:59:24.936590: step: 388/459, loss: 0.6971231698989868 2023-01-22 14:59:25.619967: step: 390/459, loss: 1.4983582496643066 2023-01-22 14:59:26.223136: step: 392/459, loss: 1.0330841541290283 2023-01-22 14:59:26.929464: step: 394/459, loss: 0.21969929337501526 2023-01-22 14:59:27.622104: step: 396/459, loss: 0.23954816162586212 2023-01-22 14:59:28.230102: step: 398/459, loss: 1.5411853790283203 2023-01-22 14:59:28.861239: step: 400/459, loss: 0.44886651635169983 2023-01-22 14:59:29.453334: step: 402/459, loss: 0.5215051770210266 2023-01-22 14:59:30.073960: step: 404/459, loss: 0.3777138292789459 2023-01-22 14:59:30.632159: step: 406/459, loss: 0.26830899715423584 2023-01-22 14:59:31.296941: step: 408/459, loss: 2.5388641357421875 2023-01-22 14:59:31.924554: step: 410/459, loss: 0.5322813391685486 2023-01-22 14:59:32.559111: step: 412/459, loss: 2.1427488327026367 2023-01-22 14:59:33.132881: step: 414/459, loss: 0.3834417760372162 2023-01-22 14:59:33.733260: step: 416/459, loss: 1.8682465553283691 2023-01-22 14:59:34.335022: step: 418/459, loss: 0.4625534415245056 2023-01-22 14:59:34.959056: step: 420/459, loss: 1.2184056043624878 2023-01-22 14:59:35.604635: step: 422/459, loss: 4.828584671020508 2023-01-22 14:59:36.217196: step: 424/459, loss: 0.7413101196289062 2023-01-22 14:59:37.016796: step: 426/459, loss: 0.36927616596221924 2023-01-22 14:59:37.676560: step: 428/459, loss: 0.5644208192825317 2023-01-22 14:59:38.328669: step: 430/459, loss: 0.4294578731060028 2023-01-22 14:59:38.976670: step: 432/459, loss: 0.5862581133842468 2023-01-22 14:59:39.692434: step: 434/459, loss: 0.4169001281261444 2023-01-22 14:59:40.338883: step: 436/459, loss: 0.8635799884796143 2023-01-22 14:59:40.939347: step: 438/459, loss: 0.38874924182891846 2023-01-22 14:59:41.618952: step: 440/459, loss: 0.35236069560050964 2023-01-22 14:59:42.240182: step: 442/459, loss: 0.5622062683105469 2023-01-22 14:59:42.844500: step: 444/459, loss: 0.2912589907646179 2023-01-22 14:59:43.556530: step: 446/459, loss: 0.6686769127845764 2023-01-22 14:59:44.132266: step: 448/459, loss: 1.482969045639038 2023-01-22 14:59:44.764971: step: 450/459, loss: 2.537806749343872 2023-01-22 14:59:45.366998: step: 452/459, loss: 0.9907529950141907 2023-01-22 14:59:45.999760: step: 454/459, loss: 0.747535765171051 2023-01-22 14:59:46.607882: step: 456/459, loss: 0.3112391233444214 2023-01-22 14:59:47.174642: step: 458/459, loss: 0.16425691545009613 2023-01-22 14:59:47.774275: step: 460/459, loss: 0.9404773712158203 2023-01-22 14:59:48.417904: step: 462/459, loss: 0.5583269596099854 2023-01-22 14:59:49.028500: step: 464/459, loss: 0.23644962906837463 2023-01-22 14:59:49.655248: step: 466/459, loss: 0.9262332916259766 2023-01-22 14:59:50.248656: step: 468/459, loss: 2.090968608856201 2023-01-22 14:59:50.960652: step: 470/459, loss: 0.4244643449783325 2023-01-22 14:59:51.673158: step: 472/459, loss: 0.9806670546531677 2023-01-22 14:59:52.348847: step: 474/459, loss: 0.7742682695388794 2023-01-22 14:59:52.894596: step: 476/459, loss: 0.8604333996772766 2023-01-22 14:59:53.571746: step: 478/459, loss: 0.23837833106517792 2023-01-22 14:59:54.143293: step: 480/459, loss: 0.4033704102039337 2023-01-22 14:59:54.807870: step: 482/459, loss: 0.8875759243965149 2023-01-22 14:59:55.411334: step: 484/459, loss: 0.5874180793762207 2023-01-22 14:59:56.029968: step: 486/459, loss: 0.6893266439437866 2023-01-22 14:59:56.701304: step: 488/459, loss: 0.3044484257698059 2023-01-22 14:59:57.354008: step: 490/459, loss: 0.5761837363243103 2023-01-22 14:59:57.986622: step: 492/459, loss: 0.702411413192749 2023-01-22 14:59:58.589206: step: 494/459, loss: 0.6657178401947021 2023-01-22 14:59:59.193915: step: 496/459, loss: 4.004636287689209 2023-01-22 14:59:59.855494: step: 498/459, loss: 0.6994023323059082 2023-01-22 15:00:00.487636: step: 500/459, loss: 5.048579692840576 2023-01-22 15:00:01.091374: step: 502/459, loss: 0.4351760149002075 2023-01-22 15:00:01.692366: step: 504/459, loss: 1.290708065032959 2023-01-22 15:00:02.314937: step: 506/459, loss: 1.073487401008606 2023-01-22 15:00:02.892094: step: 508/459, loss: 1.109041690826416 2023-01-22 15:00:03.522713: step: 510/459, loss: 0.14333273470401764 2023-01-22 15:00:04.090938: step: 512/459, loss: 0.37456217408180237 2023-01-22 15:00:04.726426: step: 514/459, loss: 0.17754125595092773 2023-01-22 15:00:05.301510: step: 516/459, loss: 2.3992412090301514 2023-01-22 15:00:05.910685: step: 518/459, loss: 1.3298102617263794 2023-01-22 15:00:06.450558: step: 520/459, loss: 0.538236141204834 2023-01-22 15:00:07.112799: step: 522/459, loss: 0.24642974138259888 2023-01-22 15:00:07.835645: step: 524/459, loss: 0.655828058719635 2023-01-22 15:00:08.460723: step: 526/459, loss: 0.4264677166938782 2023-01-22 15:00:09.098372: step: 528/459, loss: 1.026757001876831 2023-01-22 15:00:09.681385: step: 530/459, loss: 0.5578076839447021 2023-01-22 15:00:10.361725: step: 532/459, loss: 1.3688119649887085 2023-01-22 15:00:11.048454: step: 534/459, loss: 0.31749606132507324 2023-01-22 15:00:11.641873: step: 536/459, loss: 1.587491750717163 2023-01-22 15:00:12.278303: step: 538/459, loss: 0.35688894987106323 2023-01-22 15:00:12.894913: step: 540/459, loss: 0.5425432324409485 2023-01-22 15:00:13.528134: step: 542/459, loss: 0.5712619423866272 2023-01-22 15:00:14.162638: step: 544/459, loss: 0.21082976460456848 2023-01-22 15:00:14.747949: step: 546/459, loss: 1.4559690952301025 2023-01-22 15:00:15.338885: step: 548/459, loss: 0.8858966827392578 2023-01-22 15:00:15.992176: step: 550/459, loss: 0.8254902362823486 2023-01-22 15:00:16.618687: step: 552/459, loss: 0.31102392077445984 2023-01-22 15:00:17.203669: step: 554/459, loss: 1.4849827289581299 2023-01-22 15:00:17.817858: step: 556/459, loss: 0.35040199756622314 2023-01-22 15:00:18.367675: step: 558/459, loss: 0.934795081615448 2023-01-22 15:00:18.980604: step: 560/459, loss: 0.6593267917633057 2023-01-22 15:00:19.590948: step: 562/459, loss: 0.1280091404914856 2023-01-22 15:00:20.214672: step: 564/459, loss: 1.4100955724716187 2023-01-22 15:00:20.873057: step: 566/459, loss: 0.26679667830467224 2023-01-22 15:00:21.492550: step: 568/459, loss: 0.29464778304100037 2023-01-22 15:00:22.080684: step: 570/459, loss: 0.7837017178535461 2023-01-22 15:00:22.700412: step: 572/459, loss: 1.77711820602417 2023-01-22 15:00:23.316757: step: 574/459, loss: 1.1063870191574097 2023-01-22 15:00:23.893784: step: 576/459, loss: 0.9454250335693359 2023-01-22 15:00:24.507264: step: 578/459, loss: 0.15608854591846466 2023-01-22 15:00:25.138750: step: 580/459, loss: 0.975094735622406 2023-01-22 15:00:25.793327: step: 582/459, loss: 1.981694221496582 2023-01-22 15:00:26.406412: step: 584/459, loss: 3.0524981021881104 2023-01-22 15:00:27.127220: step: 586/459, loss: 1.1475942134857178 2023-01-22 15:00:27.763519: step: 588/459, loss: 0.3981750011444092 2023-01-22 15:00:28.362703: step: 590/459, loss: 0.813323438167572 2023-01-22 15:00:29.057060: step: 592/459, loss: 1.2731671333312988 2023-01-22 15:00:29.714037: step: 594/459, loss: 0.49309971928596497 2023-01-22 15:00:30.351207: step: 596/459, loss: 0.9403312802314758 2023-01-22 15:00:31.021633: step: 598/459, loss: 2.112837076187134 2023-01-22 15:00:31.714630: step: 600/459, loss: 0.41827961802482605 2023-01-22 15:00:32.312298: step: 602/459, loss: 0.5128335952758789 2023-01-22 15:00:32.910874: step: 604/459, loss: 1.3165467977523804 2023-01-22 15:00:33.547627: step: 606/459, loss: 0.34454143047332764 2023-01-22 15:00:34.126025: step: 608/459, loss: 0.7101902365684509 2023-01-22 15:00:34.722939: step: 610/459, loss: 9.194612503051758 2023-01-22 15:00:35.332066: step: 612/459, loss: 1.027464747428894 2023-01-22 15:00:35.924665: step: 614/459, loss: 0.9529290795326233 2023-01-22 15:00:36.536577: step: 616/459, loss: 1.6441230773925781 2023-01-22 15:00:37.160066: step: 618/459, loss: 0.8459868431091309 2023-01-22 15:00:37.738022: step: 620/459, loss: 0.2956472933292389 2023-01-22 15:00:38.396957: step: 622/459, loss: 0.29496756196022034 2023-01-22 15:00:38.995565: step: 624/459, loss: 0.22708159685134888 2023-01-22 15:00:39.624106: step: 626/459, loss: 4.165173530578613 2023-01-22 15:00:40.268164: step: 628/459, loss: 1.263288974761963 2023-01-22 15:00:40.928209: step: 630/459, loss: 0.6833214163780212 2023-01-22 15:00:41.523250: step: 632/459, loss: 1.3527271747589111 2023-01-22 15:00:42.137302: step: 634/459, loss: 1.3046780824661255 2023-01-22 15:00:42.806022: step: 636/459, loss: 3.979536533355713 2023-01-22 15:00:43.444775: step: 638/459, loss: 0.7318082451820374 2023-01-22 15:00:44.034694: step: 640/459, loss: 0.5147238969802856 2023-01-22 15:00:44.686809: step: 642/459, loss: 0.6640335917472839 2023-01-22 15:00:45.312439: step: 644/459, loss: 0.6623867154121399 2023-01-22 15:00:45.931737: step: 646/459, loss: 0.24724017083644867 2023-01-22 15:00:46.532933: step: 648/459, loss: 0.7442870736122131 2023-01-22 15:00:47.198486: step: 650/459, loss: 1.2145416736602783 2023-01-22 15:00:47.783588: step: 652/459, loss: 0.16415342688560486 2023-01-22 15:00:48.394660: step: 654/459, loss: 0.9790352582931519 2023-01-22 15:00:49.013708: step: 656/459, loss: 0.9547768831253052 2023-01-22 15:00:49.740213: step: 658/459, loss: 0.3548496961593628 2023-01-22 15:00:50.355638: step: 660/459, loss: 0.34024447202682495 2023-01-22 15:00:50.951584: step: 662/459, loss: 0.623997151851654 2023-01-22 15:00:51.569427: step: 664/459, loss: 0.9678491353988647 2023-01-22 15:00:52.191185: step: 666/459, loss: 0.6363632678985596 2023-01-22 15:00:52.854141: step: 668/459, loss: 0.6052090525627136 2023-01-22 15:00:53.470544: step: 670/459, loss: 0.4268539845943451 2023-01-22 15:00:54.128966: step: 672/459, loss: 1.306275725364685 2023-01-22 15:00:54.757753: step: 674/459, loss: 0.9007842540740967 2023-01-22 15:00:55.366814: step: 676/459, loss: 0.469879150390625 2023-01-22 15:00:56.001575: step: 678/459, loss: 0.6750226020812988 2023-01-22 15:00:56.632941: step: 680/459, loss: 0.3350133001804352 2023-01-22 15:00:57.267241: step: 682/459, loss: 0.25871163606643677 2023-01-22 15:00:57.895126: step: 684/459, loss: 1.545377254486084 2023-01-22 15:00:58.579935: step: 686/459, loss: 1.1244299411773682 2023-01-22 15:00:59.183035: step: 688/459, loss: 0.38496655225753784 2023-01-22 15:00:59.864184: step: 690/459, loss: 0.7930543422698975 2023-01-22 15:01:00.522232: step: 692/459, loss: 0.9532507658004761 2023-01-22 15:01:01.095943: step: 694/459, loss: 0.23917226493358612 2023-01-22 15:01:01.744924: step: 696/459, loss: 1.0812896490097046 2023-01-22 15:01:02.382277: step: 698/459, loss: 0.7308365702629089 2023-01-22 15:01:03.007303: step: 700/459, loss: 0.5093178153038025 2023-01-22 15:01:03.620181: step: 702/459, loss: 0.2825549840927124 2023-01-22 15:01:04.205324: step: 704/459, loss: 0.3172868490219116 2023-01-22 15:01:04.832885: step: 706/459, loss: 0.7376245856285095 2023-01-22 15:01:05.417075: step: 708/459, loss: 0.1878138780593872 2023-01-22 15:01:06.052887: step: 710/459, loss: 0.42094627022743225 2023-01-22 15:01:06.674476: step: 712/459, loss: 0.3443391025066376 2023-01-22 15:01:07.316586: step: 714/459, loss: 0.4145815372467041 2023-01-22 15:01:07.954896: step: 716/459, loss: 1.3632001876831055 2023-01-22 15:01:08.596604: step: 718/459, loss: 1.5363328456878662 2023-01-22 15:01:09.219427: step: 720/459, loss: 0.5830351710319519 2023-01-22 15:01:09.868300: step: 722/459, loss: 0.3073706328868866 2023-01-22 15:01:10.486788: step: 724/459, loss: 0.6764310002326965 2023-01-22 15:01:11.060806: step: 726/459, loss: 0.6156159043312073 2023-01-22 15:01:11.680937: step: 728/459, loss: 0.6074985265731812 2023-01-22 15:01:12.325811: step: 730/459, loss: 0.6022450923919678 2023-01-22 15:01:12.928916: step: 732/459, loss: 0.1931115984916687 2023-01-22 15:01:13.575923: step: 734/459, loss: 0.36060911417007446 2023-01-22 15:01:14.241589: step: 736/459, loss: 0.6752879619598389 2023-01-22 15:01:14.834568: step: 738/459, loss: 1.3036683797836304 2023-01-22 15:01:15.482433: step: 740/459, loss: 2.7109341621398926 2023-01-22 15:01:16.114135: step: 742/459, loss: 0.8737717270851135 2023-01-22 15:01:16.784062: step: 744/459, loss: 0.5564104318618774 2023-01-22 15:01:17.369782: step: 746/459, loss: 0.48212867975234985 2023-01-22 15:01:17.972990: step: 748/459, loss: 0.5045826435089111 2023-01-22 15:01:18.636977: step: 750/459, loss: 1.5448074340820312 2023-01-22 15:01:19.265925: step: 752/459, loss: 0.7337875962257385 2023-01-22 15:01:19.928094: step: 754/459, loss: 3.2183310985565186 2023-01-22 15:01:20.565335: step: 756/459, loss: 0.2025032639503479 2023-01-22 15:01:21.136932: step: 758/459, loss: 0.2901787757873535 2023-01-22 15:01:21.752721: step: 760/459, loss: 0.37995773553848267 2023-01-22 15:01:22.424605: step: 762/459, loss: 0.13011756539344788 2023-01-22 15:01:23.035403: step: 764/459, loss: 0.3708142936229706 2023-01-22 15:01:23.605519: step: 766/459, loss: 0.5477674007415771 2023-01-22 15:01:24.210944: step: 768/459, loss: 1.8287227153778076 2023-01-22 15:01:24.761056: step: 770/459, loss: 1.4200137853622437 2023-01-22 15:01:25.372801: step: 772/459, loss: 0.9841118454933167 2023-01-22 15:01:25.950460: step: 774/459, loss: 0.6927581429481506 2023-01-22 15:01:26.543430: step: 776/459, loss: 1.628683090209961 2023-01-22 15:01:27.181722: step: 778/459, loss: 0.48526015877723694 2023-01-22 15:01:27.830566: step: 780/459, loss: 0.518096923828125 2023-01-22 15:01:28.461077: step: 782/459, loss: 0.5652273893356323 2023-01-22 15:01:29.071723: step: 784/459, loss: 1.1530791521072388 2023-01-22 15:01:29.699031: step: 786/459, loss: 1.4519498348236084 2023-01-22 15:01:30.357629: step: 788/459, loss: 0.697517991065979 2023-01-22 15:01:30.926578: step: 790/459, loss: 0.8584719896316528 2023-01-22 15:01:31.516380: step: 792/459, loss: 0.37355929613113403 2023-01-22 15:01:32.120642: step: 794/459, loss: 1.0803978443145752 2023-01-22 15:01:32.819862: step: 796/459, loss: 0.5410293936729431 2023-01-22 15:01:33.418219: step: 798/459, loss: 6.554257869720459 2023-01-22 15:01:34.031160: step: 800/459, loss: 0.6581606864929199 2023-01-22 15:01:34.759149: step: 802/459, loss: 1.2966583967208862 2023-01-22 15:01:35.362207: step: 804/459, loss: 0.8709930181503296 2023-01-22 15:01:36.008027: step: 806/459, loss: 0.7116865515708923 2023-01-22 15:01:36.751848: step: 808/459, loss: 0.5635862946510315 2023-01-22 15:01:37.353914: step: 810/459, loss: 0.5405212044715881 2023-01-22 15:01:37.926641: step: 812/459, loss: 1.5194367170333862 2023-01-22 15:01:38.550625: step: 814/459, loss: 0.2725529074668884 2023-01-22 15:01:39.199646: step: 816/459, loss: 0.4577837288379669 2023-01-22 15:01:39.804088: step: 818/459, loss: 0.8662641048431396 2023-01-22 15:01:40.446548: step: 820/459, loss: 5.047370433807373 2023-01-22 15:01:41.108699: step: 822/459, loss: 0.6026519536972046 2023-01-22 15:01:41.779830: step: 824/459, loss: 0.3277706205844879 2023-01-22 15:01:42.407889: step: 826/459, loss: 0.9363315105438232 2023-01-22 15:01:43.035214: step: 828/459, loss: 0.472231924533844 2023-01-22 15:01:43.621400: step: 830/459, loss: 0.4954955279827118 2023-01-22 15:01:44.224066: step: 832/459, loss: 3.876652956008911 2023-01-22 15:01:44.851631: step: 834/459, loss: 0.7085077166557312 2023-01-22 15:01:45.428397: step: 836/459, loss: 0.09033335745334625 2023-01-22 15:01:45.990279: step: 838/459, loss: 0.77341228723526 2023-01-22 15:01:46.642670: step: 840/459, loss: 0.6768239140510559 2023-01-22 15:01:47.260294: step: 842/459, loss: 1.4462571144104004 2023-01-22 15:01:47.898604: step: 844/459, loss: 0.34028807282447815 2023-01-22 15:01:48.535163: step: 846/459, loss: 0.9880974292755127 2023-01-22 15:01:49.143151: step: 848/459, loss: 0.3849209249019623 2023-01-22 15:01:49.722073: step: 850/459, loss: 0.5790651440620422 2023-01-22 15:01:50.359971: step: 852/459, loss: 0.5081334114074707 2023-01-22 15:01:50.981497: step: 854/459, loss: 0.33887261152267456 2023-01-22 15:01:51.649565: step: 856/459, loss: 0.9162741899490356 2023-01-22 15:01:52.224605: step: 858/459, loss: 0.5560190677642822 2023-01-22 15:01:52.900920: step: 860/459, loss: 0.2886817753314972 2023-01-22 15:01:53.575438: step: 862/459, loss: 0.9629916548728943 2023-01-22 15:01:54.261914: step: 864/459, loss: 0.4945828914642334 2023-01-22 15:01:54.804361: step: 866/459, loss: 0.5839922428131104 2023-01-22 15:01:55.433758: step: 868/459, loss: 0.5373674631118774 2023-01-22 15:01:56.108384: step: 870/459, loss: 1.7175034284591675 2023-01-22 15:01:56.749205: step: 872/459, loss: 1.850757360458374 2023-01-22 15:01:57.339149: step: 874/459, loss: 0.344366192817688 2023-01-22 15:01:57.923606: step: 876/459, loss: 0.34696000814437866 2023-01-22 15:01:58.538963: step: 878/459, loss: 0.4614221453666687 2023-01-22 15:01:59.222725: step: 880/459, loss: 0.3412914574146271 2023-01-22 15:01:59.781834: step: 882/459, loss: 0.5585355758666992 2023-01-22 15:02:00.360977: step: 884/459, loss: 0.3479032516479492 2023-01-22 15:02:00.988783: step: 886/459, loss: 1.054995059967041 2023-01-22 15:02:01.541503: step: 888/459, loss: 0.3836417496204376 2023-01-22 15:02:02.134629: step: 890/459, loss: 2.5373034477233887 2023-01-22 15:02:02.843672: step: 892/459, loss: 0.4757588803768158 2023-01-22 15:02:03.507973: step: 894/459, loss: 0.36857426166534424 2023-01-22 15:02:04.093533: step: 896/459, loss: 2.00103759765625 2023-01-22 15:02:04.738674: step: 898/459, loss: 2.831582546234131 2023-01-22 15:02:05.359739: step: 900/459, loss: 0.17843124270439148 2023-01-22 15:02:06.027176: step: 902/459, loss: 0.45073071122169495 2023-01-22 15:02:06.572848: step: 904/459, loss: 0.2447817176580429 2023-01-22 15:02:07.230703: step: 906/459, loss: 0.41594845056533813 2023-01-22 15:02:07.837866: step: 908/459, loss: 0.6090520024299622 2023-01-22 15:02:08.542826: step: 910/459, loss: 0.4230607748031616 2023-01-22 15:02:09.246146: step: 912/459, loss: 2.29099702835083 2023-01-22 15:02:09.849982: step: 914/459, loss: 0.4011280834674835 2023-01-22 15:02:10.383727: step: 916/459, loss: 0.2609415054321289 2023-01-22 15:02:11.007482: step: 918/459, loss: 0.7769078016281128 2023-01-22 15:02:11.455596: step: 920/459, loss: 0.03702351078391075 ================================================== Loss: 0.962 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27463650173611115, 'r': 0.29960345643939396, 'f1': 0.2865772192028986}, 'combined': 0.21116216151792525, 'epoch': 4} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3275304334361018, 'r': 0.25382122518550904, 'f1': 0.28600305741761856}, 'combined': 0.18304195674727586, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2603525270568457, 'r': 0.302758431085044, 'f1': 0.2799587593921248}, 'combined': 0.2062854016573551, 'epoch': 4} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.32590197792868997, 'r': 0.25906545613932164, 'f1': 0.28866545264462334}, 'combined': 0.18474588969255892, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2751519958629172, 'r': 0.3006869348729228, 'f1': 0.28735330608670273}, 'combined': 0.21173401501125463, 'epoch': 4} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3399433936139728, 'r': 0.2725727937886582, 'f1': 0.30255305062313215}, 'combined': 0.21692482874866081, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2387005649717514, 'r': 0.40238095238095234, 'f1': 0.29964539007092195}, 'combined': 0.1997635933806146, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22321428571428573, 'r': 0.2717391304347826, 'f1': 0.2450980392156863}, 'combined': 0.12254901960784315, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2638888888888889, 'r': 0.16379310344827586, 'f1': 0.20212765957446807}, 'combined': 0.13475177304964536, 'epoch': 4} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27463650173611115, 'r': 0.29960345643939396, 'f1': 0.2865772192028986}, 'combined': 0.21116216151792525, 'epoch': 4} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3275304334361018, 'r': 0.25382122518550904, 'f1': 0.28600305741761856}, 'combined': 0.18304195674727586, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2387005649717514, 'r': 0.40238095238095234, 'f1': 0.29964539007092195}, 'combined': 0.1997635933806146, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2771977511895471, 'r': 0.3303229369013958, 'f1': 0.30143755454032134}, 'combined': 0.22211188229286835, 'epoch': 1} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33558826373119, 'r': 0.25161485833894504, 'f1': 0.2875972223759756}, 'combined': 0.18406222232062439, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23214285714285715, 'r': 0.2826086956521739, 'f1': 0.2549019607843137}, 'combined': 0.12745098039215685, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29402583508521984, 'r': 0.3269433384439067, 'f1': 0.30961211026044716}, 'combined': 0.22813523913927683, 'epoch': 1} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3423541418131789, 'r': 0.25465905170424247, 'f1': 0.29206584393053214}, 'combined': 0.209405699421891, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2777777777777778, 'r': 0.1724137931034483, 'f1': 0.21276595744680854}, 'combined': 0.14184397163120568, 'epoch': 1} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 15:04:55.333651: step: 2/459, loss: 1.1250957250595093 2023-01-22 15:04:55.947165: step: 4/459, loss: 0.5849816203117371 2023-01-22 15:04:56.574968: step: 6/459, loss: 0.3061555325984955 2023-01-22 15:04:57.235639: step: 8/459, loss: 1.4607326984405518 2023-01-22 15:04:57.850209: step: 10/459, loss: 1.1955264806747437 2023-01-22 15:04:58.456584: step: 12/459, loss: 0.738239049911499 2023-01-22 15:04:59.069019: step: 14/459, loss: 0.24805527925491333 2023-01-22 15:04:59.832385: step: 16/459, loss: 1.1191184520721436 2023-01-22 15:05:00.413448: step: 18/459, loss: 0.6409338712692261 2023-01-22 15:05:00.962462: step: 20/459, loss: 0.9961411356925964 2023-01-22 15:05:01.568412: step: 22/459, loss: 0.486610472202301 2023-01-22 15:05:02.194367: step: 24/459, loss: 0.8025717735290527 2023-01-22 15:05:02.802357: step: 26/459, loss: 2.1908762454986572 2023-01-22 15:05:03.398492: step: 28/459, loss: 0.4036599099636078 2023-01-22 15:05:04.042273: step: 30/459, loss: 0.2366650402545929 2023-01-22 15:05:04.659429: step: 32/459, loss: 0.4997628331184387 2023-01-22 15:05:05.264074: step: 34/459, loss: 0.5818465352058411 2023-01-22 15:05:05.876381: step: 36/459, loss: 0.7786909341812134 2023-01-22 15:05:06.511486: step: 38/459, loss: 1.353695034980774 2023-01-22 15:05:07.129809: step: 40/459, loss: 0.25941649079322815 2023-01-22 15:05:07.772471: step: 42/459, loss: 0.20465007424354553 2023-01-22 15:05:08.371924: step: 44/459, loss: 0.566429615020752 2023-01-22 15:05:09.057425: step: 46/459, loss: 0.13367672264575958 2023-01-22 15:05:09.669189: step: 48/459, loss: 0.2941857576370239 2023-01-22 15:05:10.271415: step: 50/459, loss: 0.6332839131355286 2023-01-22 15:05:10.939313: step: 52/459, loss: 0.3266799747943878 2023-01-22 15:05:11.488721: step: 54/459, loss: 0.65900719165802 2023-01-22 15:05:12.153227: step: 56/459, loss: 0.3880600333213806 2023-01-22 15:05:12.777303: step: 58/459, loss: 0.5900053381919861 2023-01-22 15:05:13.379728: step: 60/459, loss: 0.16163985431194305 2023-01-22 15:05:13.982639: step: 62/459, loss: 0.5139826536178589 2023-01-22 15:05:14.610809: step: 64/459, loss: 0.6075003147125244 2023-01-22 15:05:15.331084: step: 66/459, loss: 6.058202266693115 2023-01-22 15:05:15.960677: step: 68/459, loss: 2.2191426753997803 2023-01-22 15:05:16.546058: step: 70/459, loss: 0.9786855578422546 2023-01-22 15:05:17.225779: step: 72/459, loss: 0.8317492604255676 2023-01-22 15:05:17.813233: step: 74/459, loss: 0.46353065967559814 2023-01-22 15:05:18.422258: step: 76/459, loss: 0.9819772839546204 2023-01-22 15:05:19.072071: step: 78/459, loss: 0.3264426589012146 2023-01-22 15:05:19.750410: step: 80/459, loss: 1.1906678676605225 2023-01-22 15:05:20.278566: step: 82/459, loss: 0.5917677879333496 2023-01-22 15:05:20.891925: step: 84/459, loss: 0.5492069125175476 2023-01-22 15:05:21.673374: step: 86/459, loss: 0.2593958377838135 2023-01-22 15:05:22.310920: step: 88/459, loss: 1.9528496265411377 2023-01-22 15:05:22.903256: step: 90/459, loss: 0.2518801987171173 2023-01-22 15:05:23.470579: step: 92/459, loss: 0.7394717931747437 2023-01-22 15:05:24.045471: step: 94/459, loss: 0.30697351694107056 2023-01-22 15:05:24.672902: step: 96/459, loss: 0.5116348266601562 2023-01-22 15:05:25.291792: step: 98/459, loss: 0.5890006422996521 2023-01-22 15:05:25.880458: step: 100/459, loss: 0.47932764887809753 2023-01-22 15:05:26.530555: step: 102/459, loss: 1.0527708530426025 2023-01-22 15:05:27.289969: step: 104/459, loss: 1.0107903480529785 2023-01-22 15:05:27.903452: step: 106/459, loss: 0.24110901355743408 2023-01-22 15:05:28.548563: step: 108/459, loss: 0.35080650448799133 2023-01-22 15:05:29.204449: step: 110/459, loss: 0.1798781007528305 2023-01-22 15:05:29.808014: step: 112/459, loss: 1.2978017330169678 2023-01-22 15:05:30.488394: step: 114/459, loss: 0.8062027096748352 2023-01-22 15:05:31.124376: step: 116/459, loss: 0.6498515009880066 2023-01-22 15:05:31.796290: step: 118/459, loss: 0.8612428903579712 2023-01-22 15:05:32.365457: step: 120/459, loss: 0.35194361209869385 2023-01-22 15:05:32.965538: step: 122/459, loss: 0.3722173869609833 2023-01-22 15:05:33.553325: step: 124/459, loss: 1.109161376953125 2023-01-22 15:05:34.120333: step: 126/459, loss: 0.3650253117084503 2023-01-22 15:05:34.724389: step: 128/459, loss: 0.41189441084861755 2023-01-22 15:05:35.341429: step: 130/459, loss: 0.521376371383667 2023-01-22 15:05:35.976078: step: 132/459, loss: 0.957996666431427 2023-01-22 15:05:36.637733: step: 134/459, loss: 0.34582778811454773 2023-01-22 15:05:37.293553: step: 136/459, loss: 0.4513161778450012 2023-01-22 15:05:38.001471: step: 138/459, loss: 0.383499413728714 2023-01-22 15:05:38.634845: step: 140/459, loss: 0.3807213306427002 2023-01-22 15:05:39.324523: step: 142/459, loss: 0.770026683807373 2023-01-22 15:05:39.924905: step: 144/459, loss: 0.4898189604282379 2023-01-22 15:05:40.495476: step: 146/459, loss: 0.32060733437538147 2023-01-22 15:05:41.134920: step: 148/459, loss: 2.4350221157073975 2023-01-22 15:05:41.761108: step: 150/459, loss: 0.3748868703842163 2023-01-22 15:05:42.451707: step: 152/459, loss: 1.7465571165084839 2023-01-22 15:05:43.064577: step: 154/459, loss: 0.13935813307762146 2023-01-22 15:05:43.752516: step: 156/459, loss: 0.19593283534049988 2023-01-22 15:05:44.360304: step: 158/459, loss: 0.9951680302619934 2023-01-22 15:05:44.965486: step: 160/459, loss: 0.9329002499580383 2023-01-22 15:05:45.603000: step: 162/459, loss: 0.2291296273469925 2023-01-22 15:05:46.284941: step: 164/459, loss: 0.17546409368515015 2023-01-22 15:05:46.959762: step: 166/459, loss: 2.176149845123291 2023-01-22 15:05:47.537378: step: 168/459, loss: 0.32461491227149963 2023-01-22 15:05:48.153644: step: 170/459, loss: 0.1569189429283142 2023-01-22 15:05:48.803266: step: 172/459, loss: 0.4803396761417389 2023-01-22 15:05:49.528432: step: 174/459, loss: 0.39426153898239136 2023-01-22 15:05:50.181244: step: 176/459, loss: 2.004765748977661 2023-01-22 15:05:50.869317: step: 178/459, loss: 0.8986284732818604 2023-01-22 15:05:51.479449: step: 180/459, loss: 0.4441792368888855 2023-01-22 15:05:52.023405: step: 182/459, loss: 0.3993532657623291 2023-01-22 15:05:52.605355: step: 184/459, loss: 0.1313290148973465 2023-01-22 15:05:53.311987: step: 186/459, loss: 0.6774348020553589 2023-01-22 15:05:53.945832: step: 188/459, loss: 0.9368045926094055 2023-01-22 15:05:54.480886: step: 190/459, loss: 0.5447070598602295 2023-01-22 15:05:55.096503: step: 192/459, loss: 1.1950680017471313 2023-01-22 15:05:55.747322: step: 194/459, loss: 0.3566013276576996 2023-01-22 15:05:56.305850: step: 196/459, loss: 1.4393688440322876 2023-01-22 15:05:56.862953: step: 198/459, loss: 0.40468499064445496 2023-01-22 15:05:57.522813: step: 200/459, loss: 0.23558788001537323 2023-01-22 15:05:58.119271: step: 202/459, loss: 0.4718135595321655 2023-01-22 15:05:58.700157: step: 204/459, loss: 0.22728925943374634 2023-01-22 15:05:59.346531: step: 206/459, loss: 0.38432031869888306 2023-01-22 15:05:59.985458: step: 208/459, loss: 1.354995846748352 2023-01-22 15:06:00.617537: step: 210/459, loss: 0.2671155035495758 2023-01-22 15:06:01.184107: step: 212/459, loss: 0.45875775814056396 2023-01-22 15:06:01.803323: step: 214/459, loss: 0.9111521244049072 2023-01-22 15:06:02.334957: step: 216/459, loss: 0.7565405368804932 2023-01-22 15:06:02.955827: step: 218/459, loss: 0.3840561509132385 2023-01-22 15:06:03.641332: step: 220/459, loss: 1.0055395364761353 2023-01-22 15:06:04.222846: step: 222/459, loss: 1.6339489221572876 2023-01-22 15:06:04.868828: step: 224/459, loss: 0.4888855814933777 2023-01-22 15:06:05.481151: step: 226/459, loss: 0.5576695799827576 2023-01-22 15:06:06.059807: step: 228/459, loss: 1.1307101249694824 2023-01-22 15:06:06.671143: step: 230/459, loss: 1.8767163753509521 2023-01-22 15:06:07.296962: step: 232/459, loss: 0.46976134181022644 2023-01-22 15:06:07.964024: step: 234/459, loss: 3.9134721755981445 2023-01-22 15:06:08.590644: step: 236/459, loss: 0.7685517072677612 2023-01-22 15:06:09.215208: step: 238/459, loss: 0.22677868604660034 2023-01-22 15:06:09.803848: step: 240/459, loss: 0.7064693570137024 2023-01-22 15:06:10.326783: step: 242/459, loss: 0.26305630803108215 2023-01-22 15:06:10.960357: step: 244/459, loss: 0.6114598512649536 2023-01-22 15:06:11.544978: step: 246/459, loss: 0.9623990058898926 2023-01-22 15:06:12.174441: step: 248/459, loss: 0.27438220381736755 2023-01-22 15:06:12.810899: step: 250/459, loss: 0.8586673736572266 2023-01-22 15:06:13.477064: step: 252/459, loss: 0.36987584829330444 2023-01-22 15:06:14.172384: step: 254/459, loss: 5.455345630645752 2023-01-22 15:06:14.829804: step: 256/459, loss: 2.3300869464874268 2023-01-22 15:06:15.414810: step: 258/459, loss: 0.3853767216205597 2023-01-22 15:06:16.066749: step: 260/459, loss: 0.7167751789093018 2023-01-22 15:06:16.671610: step: 262/459, loss: 0.6666237115859985 2023-01-22 15:06:17.264875: step: 264/459, loss: 1.3618899583816528 2023-01-22 15:06:17.883059: step: 266/459, loss: 0.3742560148239136 2023-01-22 15:06:18.509061: step: 268/459, loss: 0.3000938892364502 2023-01-22 15:06:19.159482: step: 270/459, loss: 0.6537333130836487 2023-01-22 15:06:19.769898: step: 272/459, loss: 0.7675333619117737 2023-01-22 15:06:20.431263: step: 274/459, loss: 0.695469319820404 2023-01-22 15:06:21.035976: step: 276/459, loss: 0.25850632786750793 2023-01-22 15:06:21.579992: step: 278/459, loss: 0.7744042277336121 2023-01-22 15:06:22.204323: step: 280/459, loss: 1.3816345930099487 2023-01-22 15:06:22.802746: step: 282/459, loss: 0.14591647684574127 2023-01-22 15:06:23.432886: step: 284/459, loss: 0.41483718156814575 2023-01-22 15:06:24.038699: step: 286/459, loss: 0.38305991888046265 2023-01-22 15:06:24.654459: step: 288/459, loss: 0.3262856602668762 2023-01-22 15:06:25.321075: step: 290/459, loss: 1.336330771446228 2023-01-22 15:06:25.919833: step: 292/459, loss: 0.5204101800918579 2023-01-22 15:06:26.609489: step: 294/459, loss: 0.39390844106674194 2023-01-22 15:06:27.171252: step: 296/459, loss: 0.49119114875793457 2023-01-22 15:06:27.872426: step: 298/459, loss: 1.5409685373306274 2023-01-22 15:06:28.473712: step: 300/459, loss: 0.20837414264678955 2023-01-22 15:06:29.063745: step: 302/459, loss: 0.17542971670627594 2023-01-22 15:06:29.776197: step: 304/459, loss: 0.4962100386619568 2023-01-22 15:06:30.381505: step: 306/459, loss: 0.24285200238227844 2023-01-22 15:06:31.021604: step: 308/459, loss: 1.7460172176361084 2023-01-22 15:06:31.634738: step: 310/459, loss: 0.9370901584625244 2023-01-22 15:06:32.261188: step: 312/459, loss: 0.42303457856178284 2023-01-22 15:06:32.899732: step: 314/459, loss: 0.19337718188762665 2023-01-22 15:06:33.505300: step: 316/459, loss: 0.15657950937747955 2023-01-22 15:06:34.136097: step: 318/459, loss: 0.5365594029426575 2023-01-22 15:06:34.800540: step: 320/459, loss: 0.8350354433059692 2023-01-22 15:06:35.413330: step: 322/459, loss: 0.9210812449455261 2023-01-22 15:06:36.023252: step: 324/459, loss: 1.9735816717147827 2023-01-22 15:06:36.673847: step: 326/459, loss: 0.9943981170654297 2023-01-22 15:06:37.216212: step: 328/459, loss: 0.6291860938072205 2023-01-22 15:06:37.813803: step: 330/459, loss: 2.2642784118652344 2023-01-22 15:06:38.392527: step: 332/459, loss: 0.3269355595111847 2023-01-22 15:06:39.014587: step: 334/459, loss: 0.4039030075073242 2023-01-22 15:06:39.646415: step: 336/459, loss: 1.1470744609832764 2023-01-22 15:06:40.257502: step: 338/459, loss: 0.40219026803970337 2023-01-22 15:06:40.830891: step: 340/459, loss: 1.1833754777908325 2023-01-22 15:06:41.436988: step: 342/459, loss: 1.2051258087158203 2023-01-22 15:06:42.048370: step: 344/459, loss: 1.3399385213851929 2023-01-22 15:06:42.721734: step: 346/459, loss: 0.6854642033576965 2023-01-22 15:06:43.296545: step: 348/459, loss: 0.9592252373695374 2023-01-22 15:06:43.892021: step: 350/459, loss: 0.7546688318252563 2023-01-22 15:06:44.512756: step: 352/459, loss: 1.800621509552002 2023-01-22 15:06:45.181979: step: 354/459, loss: 2.4855105876922607 2023-01-22 15:06:45.873390: step: 356/459, loss: 0.7821961641311646 2023-01-22 15:06:46.511681: step: 358/459, loss: 0.4410243630409241 2023-01-22 15:06:47.098908: step: 360/459, loss: 1.9141044616699219 2023-01-22 15:06:47.792497: step: 362/459, loss: 0.31983524560928345 2023-01-22 15:06:48.388303: step: 364/459, loss: 1.8422753810882568 2023-01-22 15:06:48.988681: step: 366/459, loss: 0.7850809693336487 2023-01-22 15:06:49.682498: step: 368/459, loss: 0.3681296408176422 2023-01-22 15:06:50.302663: step: 370/459, loss: 0.3304518163204193 2023-01-22 15:06:50.910019: step: 372/459, loss: 0.6121298670768738 2023-01-22 15:06:51.585774: step: 374/459, loss: 0.40334340929985046 2023-01-22 15:06:52.227274: step: 376/459, loss: 0.6266382336616516 2023-01-22 15:06:52.871271: step: 378/459, loss: 2.181429862976074 2023-01-22 15:06:53.531175: step: 380/459, loss: 0.28243786096572876 2023-01-22 15:06:54.185059: step: 382/459, loss: 0.7137157320976257 2023-01-22 15:06:54.837304: step: 384/459, loss: 3.400825262069702 2023-01-22 15:06:55.497725: step: 386/459, loss: 0.8162968158721924 2023-01-22 15:06:56.117967: step: 388/459, loss: 0.43362051248550415 2023-01-22 15:06:56.756901: step: 390/459, loss: 0.34723618626594543 2023-01-22 15:06:57.335415: step: 392/459, loss: 0.35118991136550903 2023-01-22 15:06:57.918741: step: 394/459, loss: 0.8710368275642395 2023-01-22 15:06:58.542327: step: 396/459, loss: 1.2673113346099854 2023-01-22 15:06:59.105061: step: 398/459, loss: 0.3631570041179657 2023-01-22 15:06:59.664988: step: 400/459, loss: 0.298519492149353 2023-01-22 15:07:00.320559: step: 402/459, loss: 0.43955183029174805 2023-01-22 15:07:00.924522: step: 404/459, loss: 0.56607985496521 2023-01-22 15:07:01.520727: step: 406/459, loss: 0.32772862911224365 2023-01-22 15:07:02.126959: step: 408/459, loss: 0.3634019196033478 2023-01-22 15:07:02.812589: step: 410/459, loss: 0.6387931704521179 2023-01-22 15:07:03.455271: step: 412/459, loss: 0.3418464660644531 2023-01-22 15:07:04.039740: step: 414/459, loss: 0.46940064430236816 2023-01-22 15:07:04.639651: step: 416/459, loss: 3.140993595123291 2023-01-22 15:07:05.238737: step: 418/459, loss: 0.9005918502807617 2023-01-22 15:07:05.857674: step: 420/459, loss: 0.4856315553188324 2023-01-22 15:07:06.474525: step: 422/459, loss: 0.5036431550979614 2023-01-22 15:07:07.092608: step: 424/459, loss: 0.48600465059280396 2023-01-22 15:07:07.677182: step: 426/459, loss: 0.5432004332542419 2023-01-22 15:07:08.395159: step: 428/459, loss: 0.5272541642189026 2023-01-22 15:07:08.998860: step: 430/459, loss: 0.2463691681623459 2023-01-22 15:07:09.578534: step: 432/459, loss: 1.1149524450302124 2023-01-22 15:07:10.161012: step: 434/459, loss: 0.37950220704078674 2023-01-22 15:07:10.785111: step: 436/459, loss: 0.9572031497955322 2023-01-22 15:07:11.586674: step: 438/459, loss: 1.9614462852478027 2023-01-22 15:07:12.201476: step: 440/459, loss: 0.6300488710403442 2023-01-22 15:07:12.829661: step: 442/459, loss: 0.37274619936943054 2023-01-22 15:07:13.427788: step: 444/459, loss: 0.511939525604248 2023-01-22 15:07:14.151296: step: 446/459, loss: 0.8238839507102966 2023-01-22 15:07:14.769242: step: 448/459, loss: 0.26054009795188904 2023-01-22 15:07:15.390654: step: 450/459, loss: 0.6724486351013184 2023-01-22 15:07:16.015922: step: 452/459, loss: 0.2485314905643463 2023-01-22 15:07:16.623737: step: 454/459, loss: 0.2898577153682709 2023-01-22 15:07:17.262613: step: 456/459, loss: 3.938953161239624 2023-01-22 15:07:17.895524: step: 458/459, loss: 2.2837331295013428 2023-01-22 15:07:18.515537: step: 460/459, loss: 0.40152671933174133 2023-01-22 15:07:19.253426: step: 462/459, loss: 1.4363292455673218 2023-01-22 15:07:19.905675: step: 464/459, loss: 0.46348118782043457 2023-01-22 15:07:20.530080: step: 466/459, loss: 1.2896990776062012 2023-01-22 15:07:21.133646: step: 468/459, loss: 0.7119784355163574 2023-01-22 15:07:21.724729: step: 470/459, loss: 3.134246349334717 2023-01-22 15:07:22.395881: step: 472/459, loss: 1.1176090240478516 2023-01-22 15:07:22.990905: step: 474/459, loss: 1.322977900505066 2023-01-22 15:07:23.702476: step: 476/459, loss: 0.6672422289848328 2023-01-22 15:07:24.319702: step: 478/459, loss: 0.9962826371192932 2023-01-22 15:07:24.890744: step: 480/459, loss: 2.8155853748321533 2023-01-22 15:07:25.526117: step: 482/459, loss: 0.14899109303951263 2023-01-22 15:07:26.221271: step: 484/459, loss: 0.24216555058956146 2023-01-22 15:07:26.800811: step: 486/459, loss: 0.5326796174049377 2023-01-22 15:07:27.415093: step: 488/459, loss: 5.881984233856201 2023-01-22 15:07:28.009151: step: 490/459, loss: 0.13877612352371216 2023-01-22 15:07:28.696508: step: 492/459, loss: 0.812782883644104 2023-01-22 15:07:29.332382: step: 494/459, loss: 0.9800285696983337 2023-01-22 15:07:29.958034: step: 496/459, loss: 0.3011130690574646 2023-01-22 15:07:30.579080: step: 498/459, loss: 1.225419282913208 2023-01-22 15:07:31.176939: step: 500/459, loss: 0.8235507607460022 2023-01-22 15:07:31.817645: step: 502/459, loss: 0.5383784174919128 2023-01-22 15:07:32.487468: step: 504/459, loss: 0.4879634976387024 2023-01-22 15:07:33.102264: step: 506/459, loss: 0.23081764578819275 2023-01-22 15:07:33.668170: step: 508/459, loss: 0.4666285812854767 2023-01-22 15:07:34.287375: step: 510/459, loss: 0.3398003578186035 2023-01-22 15:07:34.908278: step: 512/459, loss: 0.6405749320983887 2023-01-22 15:07:35.522819: step: 514/459, loss: 0.4594379663467407 2023-01-22 15:07:36.227394: step: 516/459, loss: 1.7195886373519897 2023-01-22 15:07:36.879230: step: 518/459, loss: 0.7384719252586365 2023-01-22 15:07:37.519846: step: 520/459, loss: 0.12196625769138336 2023-01-22 15:07:38.129804: step: 522/459, loss: 0.5152952671051025 2023-01-22 15:07:38.760489: step: 524/459, loss: 0.7651795148849487 2023-01-22 15:07:39.396294: step: 526/459, loss: 0.4149414896965027 2023-01-22 15:07:40.014466: step: 528/459, loss: 0.9945996999740601 2023-01-22 15:07:40.633578: step: 530/459, loss: 1.2530547380447388 2023-01-22 15:07:41.320637: step: 532/459, loss: 1.7386212348937988 2023-01-22 15:07:41.991365: step: 534/459, loss: 0.6403185129165649 2023-01-22 15:07:42.576624: step: 536/459, loss: 0.5174029469490051 2023-01-22 15:07:43.203613: step: 538/459, loss: 1.2801268100738525 2023-01-22 15:07:43.845997: step: 540/459, loss: 0.9696680307388306 2023-01-22 15:07:44.439995: step: 542/459, loss: 0.20496287941932678 2023-01-22 15:07:45.091499: step: 544/459, loss: 0.5966973304748535 2023-01-22 15:07:45.748804: step: 546/459, loss: 0.7209455370903015 2023-01-22 15:07:46.370247: step: 548/459, loss: 0.25124916434288025 2023-01-22 15:07:47.081199: step: 550/459, loss: 0.25177350640296936 2023-01-22 15:07:47.683631: step: 552/459, loss: 0.614197313785553 2023-01-22 15:07:48.273412: step: 554/459, loss: 2.0573761463165283 2023-01-22 15:07:48.927844: step: 556/459, loss: 1.0167183876037598 2023-01-22 15:07:49.521724: step: 558/459, loss: 2.0652530193328857 2023-01-22 15:07:50.161140: step: 560/459, loss: 1.2404944896697998 2023-01-22 15:07:50.775035: step: 562/459, loss: 0.7271109819412231 2023-01-22 15:07:51.427733: step: 564/459, loss: 0.27765652537345886 2023-01-22 15:07:52.053076: step: 566/459, loss: 1.4150278568267822 2023-01-22 15:07:52.683042: step: 568/459, loss: 0.791638970375061 2023-01-22 15:07:53.296896: step: 570/459, loss: 0.359813392162323 2023-01-22 15:07:53.947822: step: 572/459, loss: 1.1951818466186523 2023-01-22 15:07:54.529025: step: 574/459, loss: 0.6610995531082153 2023-01-22 15:07:55.163448: step: 576/459, loss: 0.24860604107379913 2023-01-22 15:07:55.762448: step: 578/459, loss: 0.3388851284980774 2023-01-22 15:07:56.325401: step: 580/459, loss: 0.36801934242248535 2023-01-22 15:07:56.942959: step: 582/459, loss: 0.3666348159313202 2023-01-22 15:07:57.557768: step: 584/459, loss: 0.18387389183044434 2023-01-22 15:07:58.168678: step: 586/459, loss: 0.748452365398407 2023-01-22 15:07:58.721656: step: 588/459, loss: 0.8213256597518921 2023-01-22 15:07:59.395445: step: 590/459, loss: 0.3537514805793762 2023-01-22 15:07:59.956359: step: 592/459, loss: 0.4075838625431061 2023-01-22 15:08:00.543559: step: 594/459, loss: 0.6387861967086792 2023-01-22 15:08:01.178123: step: 596/459, loss: 0.5668677091598511 2023-01-22 15:08:01.862210: step: 598/459, loss: 0.931701123714447 2023-01-22 15:08:02.496473: step: 600/459, loss: 0.2926390767097473 2023-01-22 15:08:03.153327: step: 602/459, loss: 3.0197625160217285 2023-01-22 15:08:03.737115: step: 604/459, loss: 0.1713789403438568 2023-01-22 15:08:04.426479: step: 606/459, loss: 0.8392295241355896 2023-01-22 15:08:05.078778: step: 608/459, loss: 0.8915883302688599 2023-01-22 15:08:05.699036: step: 610/459, loss: 0.9676210880279541 2023-01-22 15:08:06.280904: step: 612/459, loss: 0.3248918652534485 2023-01-22 15:08:06.862280: step: 614/459, loss: 1.374133825302124 2023-01-22 15:08:07.448239: step: 616/459, loss: 1.3091236352920532 2023-01-22 15:08:08.113073: step: 618/459, loss: 1.1576485633850098 2023-01-22 15:08:08.731816: step: 620/459, loss: 1.0357855558395386 2023-01-22 15:08:09.354740: step: 622/459, loss: 0.9551465511322021 2023-01-22 15:08:09.971663: step: 624/459, loss: 0.6550151109695435 2023-01-22 15:08:10.563142: step: 626/459, loss: 0.3031185269355774 2023-01-22 15:08:11.207623: step: 628/459, loss: 0.3133928179740906 2023-01-22 15:08:11.881159: step: 630/459, loss: 0.9696245193481445 2023-01-22 15:08:12.516990: step: 632/459, loss: 1.2033727169036865 2023-01-22 15:08:13.094707: step: 634/459, loss: 0.571358323097229 2023-01-22 15:08:13.735973: step: 636/459, loss: 1.1624913215637207 2023-01-22 15:08:14.391306: step: 638/459, loss: 0.5092325210571289 2023-01-22 15:08:15.008680: step: 640/459, loss: 12.160849571228027 2023-01-22 15:08:15.662940: step: 642/459, loss: 0.3148859739303589 2023-01-22 15:08:16.293896: step: 644/459, loss: 0.8614201545715332 2023-01-22 15:08:16.936047: step: 646/459, loss: 0.2541893422603607 2023-01-22 15:08:17.555933: step: 648/459, loss: 0.6744579672813416 2023-01-22 15:08:18.216902: step: 650/459, loss: 0.3104857802391052 2023-01-22 15:08:18.872952: step: 652/459, loss: 0.7794855833053589 2023-01-22 15:08:19.471864: step: 654/459, loss: 0.6687971949577332 2023-01-22 15:08:20.135852: step: 656/459, loss: 0.7563040256500244 2023-01-22 15:08:20.778854: step: 658/459, loss: 0.681649386882782 2023-01-22 15:08:21.362537: step: 660/459, loss: 0.3918609023094177 2023-01-22 15:08:22.059029: step: 662/459, loss: 0.4740074872970581 2023-01-22 15:08:22.649703: step: 664/459, loss: 0.4640263020992279 2023-01-22 15:08:23.306818: step: 666/459, loss: 0.9613162279129028 2023-01-22 15:08:23.953103: step: 668/459, loss: 0.27040329575538635 2023-01-22 15:08:24.552915: step: 670/459, loss: 0.5713587403297424 2023-01-22 15:08:25.201159: step: 672/459, loss: 1.4149391651153564 2023-01-22 15:08:25.790434: step: 674/459, loss: 0.3320057988166809 2023-01-22 15:08:26.427415: step: 676/459, loss: 0.9992173910140991 2023-01-22 15:08:27.035471: step: 678/459, loss: 4.4014081954956055 2023-01-22 15:08:27.670797: step: 680/459, loss: 0.464174747467041 2023-01-22 15:08:28.323382: step: 682/459, loss: 0.5857687592506409 2023-01-22 15:08:28.949299: step: 684/459, loss: 0.5991096496582031 2023-01-22 15:08:29.599094: step: 686/459, loss: 0.5145123600959778 2023-01-22 15:08:30.266141: step: 688/459, loss: 2.1923186779022217 2023-01-22 15:08:30.865511: step: 690/459, loss: 0.7714333534240723 2023-01-22 15:08:31.548593: step: 692/459, loss: 1.532431721687317 2023-01-22 15:08:32.156761: step: 694/459, loss: 1.1597645282745361 2023-01-22 15:08:32.760859: step: 696/459, loss: 0.5730098485946655 2023-01-22 15:08:33.296309: step: 698/459, loss: 1.6965497732162476 2023-01-22 15:08:33.923635: step: 700/459, loss: 0.22442388534545898 2023-01-22 15:08:34.541065: step: 702/459, loss: 0.8804800510406494 2023-01-22 15:08:35.131201: step: 704/459, loss: 4.368315696716309 2023-01-22 15:08:35.755679: step: 706/459, loss: 0.5034193396568298 2023-01-22 15:08:36.412887: step: 708/459, loss: 0.38979005813598633 2023-01-22 15:08:36.992019: step: 710/459, loss: 0.25143322348594666 2023-01-22 15:08:37.616195: step: 712/459, loss: 0.3560500741004944 2023-01-22 15:08:38.280148: step: 714/459, loss: 0.8433790802955627 2023-01-22 15:08:38.886133: step: 716/459, loss: 0.2729099988937378 2023-01-22 15:08:39.461201: step: 718/459, loss: 0.36927205324172974 2023-01-22 15:08:40.171097: step: 720/459, loss: 3.716538906097412 2023-01-22 15:08:40.731336: step: 722/459, loss: 0.4914548397064209 2023-01-22 15:08:41.360023: step: 724/459, loss: 2.123474359512329 2023-01-22 15:08:41.959168: step: 726/459, loss: 1.0742183923721313 2023-01-22 15:08:42.590670: step: 728/459, loss: 0.8802111148834229 2023-01-22 15:08:43.249748: step: 730/459, loss: 0.4920675456523895 2023-01-22 15:08:43.976488: step: 732/459, loss: 0.6010702252388 2023-01-22 15:08:44.639974: step: 734/459, loss: 0.425454705953598 2023-01-22 15:08:45.259727: step: 736/459, loss: 0.3106186091899872 2023-01-22 15:08:45.898970: step: 738/459, loss: 0.8190872073173523 2023-01-22 15:08:46.524819: step: 740/459, loss: 0.8275226354598999 2023-01-22 15:08:47.152017: step: 742/459, loss: 1.1606758832931519 2023-01-22 15:08:47.718269: step: 744/459, loss: 0.6159114837646484 2023-01-22 15:08:48.330414: step: 746/459, loss: 0.8750706315040588 2023-01-22 15:08:48.956555: step: 748/459, loss: 0.7647720575332642 2023-01-22 15:08:49.593629: step: 750/459, loss: 1.9950623512268066 2023-01-22 15:08:50.229360: step: 752/459, loss: 0.26233309507369995 2023-01-22 15:08:50.946929: step: 754/459, loss: 1.1412550210952759 2023-01-22 15:08:51.596003: step: 756/459, loss: 0.8010326623916626 2023-01-22 15:08:52.288953: step: 758/459, loss: 0.7362023591995239 2023-01-22 15:08:52.951796: step: 760/459, loss: 1.8129559755325317 2023-01-22 15:08:53.539824: step: 762/459, loss: 0.3201133608818054 2023-01-22 15:08:54.200574: step: 764/459, loss: 1.1928919553756714 2023-01-22 15:08:54.828524: step: 766/459, loss: 0.21606124937534332 2023-01-22 15:08:55.435316: step: 768/459, loss: 0.23774008452892303 2023-01-22 15:08:56.117535: step: 770/459, loss: 1.1091246604919434 2023-01-22 15:08:56.736638: step: 772/459, loss: 1.77199387550354 2023-01-22 15:08:57.389998: step: 774/459, loss: 1.4626317024230957 2023-01-22 15:08:58.007026: step: 776/459, loss: 1.5008907318115234 2023-01-22 15:08:58.583510: step: 778/459, loss: 0.5384165644645691 2023-01-22 15:08:59.265938: step: 780/459, loss: 1.4235645532608032 2023-01-22 15:08:59.948142: step: 782/459, loss: 0.5806401968002319 2023-01-22 15:09:00.508152: step: 784/459, loss: 3.7197210788726807 2023-01-22 15:09:01.123588: step: 786/459, loss: 0.7730777859687805 2023-01-22 15:09:01.794836: step: 788/459, loss: 0.5992450714111328 2023-01-22 15:09:02.403410: step: 790/459, loss: 0.44467321038246155 2023-01-22 15:09:02.982537: step: 792/459, loss: 0.7247563004493713 2023-01-22 15:09:03.540844: step: 794/459, loss: 1.4569634199142456 2023-01-22 15:09:04.131687: step: 796/459, loss: 0.21024134755134583 2023-01-22 15:09:04.738645: step: 798/459, loss: 0.1670311838388443 2023-01-22 15:09:05.322686: step: 800/459, loss: 0.319731742143631 2023-01-22 15:09:05.952278: step: 802/459, loss: 1.7152023315429688 2023-01-22 15:09:06.647187: step: 804/459, loss: 1.6759612560272217 2023-01-22 15:09:07.237921: step: 806/459, loss: 1.9673770666122437 2023-01-22 15:09:07.821588: step: 808/459, loss: 0.5166868567466736 2023-01-22 15:09:08.448799: step: 810/459, loss: 0.8115724325180054 2023-01-22 15:09:09.046768: step: 812/459, loss: 0.9990595579147339 2023-01-22 15:09:09.671770: step: 814/459, loss: 0.556398868560791 2023-01-22 15:09:10.358637: step: 816/459, loss: 0.4815308749675751 2023-01-22 15:09:11.097237: step: 818/459, loss: 0.5884028077125549 2023-01-22 15:09:11.745554: step: 820/459, loss: 0.5000594854354858 2023-01-22 15:09:12.382121: step: 822/459, loss: 0.6838361620903015 2023-01-22 15:09:12.972231: step: 824/459, loss: 0.5536155104637146 2023-01-22 15:09:13.522626: step: 826/459, loss: 2.115755081176758 2023-01-22 15:09:14.111349: step: 828/459, loss: 0.27825793623924255 2023-01-22 15:09:14.728029: step: 830/459, loss: 0.8797720670700073 2023-01-22 15:09:15.318023: step: 832/459, loss: 0.9331833124160767 2023-01-22 15:09:15.975093: step: 834/459, loss: 1.731446385383606 2023-01-22 15:09:16.588636: step: 836/459, loss: 0.47772476077079773 2023-01-22 15:09:17.158555: step: 838/459, loss: 0.2740611135959625 2023-01-22 15:09:17.745064: step: 840/459, loss: 1.187747597694397 2023-01-22 15:09:18.360543: step: 842/459, loss: 0.2285318821668625 2023-01-22 15:09:19.003609: step: 844/459, loss: 0.23283833265304565 2023-01-22 15:09:19.588416: step: 846/459, loss: 12.704627990722656 2023-01-22 15:09:20.227175: step: 848/459, loss: 0.5069969892501831 2023-01-22 15:09:20.960133: step: 850/459, loss: 1.0400396585464478 2023-01-22 15:09:21.527595: step: 852/459, loss: 0.6475605368614197 2023-01-22 15:09:22.183589: step: 854/459, loss: 0.7911975383758545 2023-01-22 15:09:22.812634: step: 856/459, loss: 0.5539909601211548 2023-01-22 15:09:23.434968: step: 858/459, loss: 0.5772046446800232 2023-01-22 15:09:24.028412: step: 860/459, loss: 0.8840752243995667 2023-01-22 15:09:24.695044: step: 862/459, loss: 0.9499339461326599 2023-01-22 15:09:25.333702: step: 864/459, loss: 0.8115397095680237 2023-01-22 15:09:25.941974: step: 866/459, loss: 0.8461280465126038 2023-01-22 15:09:26.557724: step: 868/459, loss: 1.3312066793441772 2023-01-22 15:09:27.154520: step: 870/459, loss: 0.43299204111099243 2023-01-22 15:09:27.824973: step: 872/459, loss: 0.5879578590393066 2023-01-22 15:09:28.428158: step: 874/459, loss: 0.8139932155609131 2023-01-22 15:09:29.044175: step: 876/459, loss: 0.9599544405937195 2023-01-22 15:09:29.659117: step: 878/459, loss: 1.4573544263839722 2023-01-22 15:09:30.247698: step: 880/459, loss: 0.8966002464294434 2023-01-22 15:09:30.886900: step: 882/459, loss: 1.3563281297683716 2023-01-22 15:09:31.561335: step: 884/459, loss: 3.5108797550201416 2023-01-22 15:09:32.265893: step: 886/459, loss: 1.3896827697753906 2023-01-22 15:09:32.957125: step: 888/459, loss: 0.5859240293502808 2023-01-22 15:09:33.546558: step: 890/459, loss: 0.4472804665565491 2023-01-22 15:09:34.178769: step: 892/459, loss: 3.691192150115967 2023-01-22 15:09:34.806703: step: 894/459, loss: 0.7774364948272705 2023-01-22 15:09:35.501525: step: 896/459, loss: 0.3433125913143158 2023-01-22 15:09:36.235724: step: 898/459, loss: 0.25245559215545654 2023-01-22 15:09:36.910172: step: 900/459, loss: 0.29540860652923584 2023-01-22 15:09:37.551928: step: 902/459, loss: 0.6061186194419861 2023-01-22 15:09:38.193632: step: 904/459, loss: 1.1684499979019165 2023-01-22 15:09:38.755337: step: 906/459, loss: 0.5196197032928467 2023-01-22 15:09:39.346607: step: 908/459, loss: 1.115676760673523 2023-01-22 15:09:40.047053: step: 910/459, loss: 1.377803087234497 2023-01-22 15:09:40.735566: step: 912/459, loss: 0.45894014835357666 2023-01-22 15:09:41.353180: step: 914/459, loss: 0.3965403139591217 2023-01-22 15:09:41.955315: step: 916/459, loss: 0.48682108521461487 2023-01-22 15:09:42.582203: step: 918/459, loss: 1.0501278638839722 2023-01-22 15:09:43.036490: step: 920/459, loss: 0.0015311443712562323 ================================================== Loss: 0.910 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27645185664939553, 'r': 0.3031545928030303, 'f1': 0.28918812104787717}, 'combined': 0.21308598393001474, 'epoch': 5} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.2808055272556637, 'r': 0.24032676131400413, 'f1': 0.2589940573808955}, 'combined': 0.1657561967237731, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.279556856187291, 'r': 0.3166193181818182, 'f1': 0.2969360568383659}, 'combined': 0.21879498924932222, 'epoch': 5} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.287737269204934, 'r': 0.24834621965829298, 'f1': 0.2665945306891456}, 'combined': 0.17062049964105316, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28352328496392193, 'r': 0.3092981290515512, 'f1': 0.2958503843101794}, 'combined': 0.21799502001802692, 'epoch': 5} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.28991209084126907, 'r': 0.2643703353357258, 'f1': 0.27655272133456926}, 'combined': 0.19828308322101193, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2353395061728395, 'r': 0.3630952380952381, 'f1': 0.2855805243445693}, 'combined': 0.19038701622971288, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.3804347826086957, 'f1': 0.3017241379310345}, 'combined': 0.15086206896551724, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.1724137931034483, 'f1': 0.22222222222222224}, 'combined': 0.14814814814814814, 'epoch': 5} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27463650173611115, 'r': 0.29960345643939396, 'f1': 0.2865772192028986}, 'combined': 0.21116216151792525, 'epoch': 4} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3275304334361018, 'r': 0.25382122518550904, 'f1': 0.28600305741761856}, 'combined': 0.18304195674727586, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2387005649717514, 'r': 0.40238095238095234, 'f1': 0.29964539007092195}, 'combined': 0.1997635933806146, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.279556856187291, 'r': 0.3166193181818182, 'f1': 0.2969360568383659}, 'combined': 0.21879498924932222, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.287737269204934, 'r': 0.24834621965829298, 'f1': 0.2665945306891456}, 'combined': 0.17062049964105316, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.3804347826086957, 'f1': 0.3017241379310345}, 'combined': 0.15086206896551724, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29402583508521984, 'r': 0.3269433384439067, 'f1': 0.30961211026044716}, 'combined': 0.22813523913927683, 'epoch': 1} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3423541418131789, 'r': 0.25465905170424247, 'f1': 0.29206584393053214}, 'combined': 0.209405699421891, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2777777777777778, 'r': 0.1724137931034483, 'f1': 0.21276595744680854}, 'combined': 0.14184397163120568, 'epoch': 1} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 15:12:31.881815: step: 2/459, loss: 0.21774934232234955 2023-01-22 15:12:32.489519: step: 4/459, loss: 0.6623240113258362 2023-01-22 15:12:33.098346: step: 6/459, loss: 1.194414734840393 2023-01-22 15:12:33.789462: step: 8/459, loss: 1.1236461400985718 2023-01-22 15:12:34.390060: step: 10/459, loss: 0.7657080888748169 2023-01-22 15:12:34.999223: step: 12/459, loss: 1.4271870851516724 2023-01-22 15:12:35.595814: step: 14/459, loss: 1.3152210712432861 2023-01-22 15:12:36.308507: step: 16/459, loss: 0.19761709868907928 2023-01-22 15:12:36.931748: step: 18/459, loss: 0.36811065673828125 2023-01-22 15:12:37.528085: step: 20/459, loss: 3.226193428039551 2023-01-22 15:12:38.183293: step: 22/459, loss: 0.6982096433639526 2023-01-22 15:12:38.814259: step: 24/459, loss: 0.23039168119430542 2023-01-22 15:12:39.425830: step: 26/459, loss: 1.2359626293182373 2023-01-22 15:12:40.036937: step: 28/459, loss: 0.3010197579860687 2023-01-22 15:12:40.640630: step: 30/459, loss: 0.5330190062522888 2023-01-22 15:12:41.285024: step: 32/459, loss: 0.9670319557189941 2023-01-22 15:12:41.825622: step: 34/459, loss: 0.3160071074962616 2023-01-22 15:12:42.467458: step: 36/459, loss: 0.42735275626182556 2023-01-22 15:12:43.072747: step: 38/459, loss: 0.28910747170448303 2023-01-22 15:12:43.703117: step: 40/459, loss: 0.5440165996551514 2023-01-22 15:12:44.281380: step: 42/459, loss: 0.25041159987449646 2023-01-22 15:12:44.912604: step: 44/459, loss: 0.539626955986023 2023-01-22 15:12:45.521504: step: 46/459, loss: 0.9609349966049194 2023-01-22 15:12:46.223924: step: 48/459, loss: 0.3780251443386078 2023-01-22 15:12:46.861629: step: 50/459, loss: 0.1430547684431076 2023-01-22 15:12:47.443430: step: 52/459, loss: 0.4764854311943054 2023-01-22 15:12:48.064299: step: 54/459, loss: 0.6043117642402649 2023-01-22 15:12:48.630506: step: 56/459, loss: 0.30158573389053345 2023-01-22 15:12:49.275654: step: 58/459, loss: 0.41918638348579407 2023-01-22 15:12:49.921207: step: 60/459, loss: 4.082719326019287 2023-01-22 15:12:50.683747: step: 62/459, loss: 0.4487907290458679 2023-01-22 15:12:51.290529: step: 64/459, loss: 0.5230746865272522 2023-01-22 15:12:51.887826: step: 66/459, loss: 1.7801910638809204 2023-01-22 15:12:52.486217: step: 68/459, loss: 0.25323957204818726 2023-01-22 15:12:53.127432: step: 70/459, loss: 0.5828599333763123 2023-01-22 15:12:53.716702: step: 72/459, loss: 0.8588560819625854 2023-01-22 15:12:54.405776: step: 74/459, loss: 0.48388993740081787 2023-01-22 15:12:55.019064: step: 76/459, loss: 0.4122595191001892 2023-01-22 15:12:55.730548: step: 78/459, loss: 0.3775588572025299 2023-01-22 15:12:56.369445: step: 80/459, loss: 0.6077477335929871 2023-01-22 15:12:56.946167: step: 82/459, loss: 0.52215576171875 2023-01-22 15:12:57.540754: step: 84/459, loss: 0.4339229464530945 2023-01-22 15:12:58.136862: step: 86/459, loss: 0.3180791735649109 2023-01-22 15:12:58.780997: step: 88/459, loss: 0.6867761611938477 2023-01-22 15:12:59.367701: step: 90/459, loss: 0.9468379020690918 2023-01-22 15:13:00.006391: step: 92/459, loss: 0.6237719655036926 2023-01-22 15:13:00.568241: step: 94/459, loss: 2.1833887100219727 2023-01-22 15:13:01.125299: step: 96/459, loss: 0.5726689696311951 2023-01-22 15:13:01.730128: step: 98/459, loss: 0.9518427848815918 2023-01-22 15:13:02.318485: step: 100/459, loss: 7.421259880065918 2023-01-22 15:13:02.906351: step: 102/459, loss: 0.2849976122379303 2023-01-22 15:13:03.564541: step: 104/459, loss: 0.7553761601448059 2023-01-22 15:13:04.110982: step: 106/459, loss: 0.468957781791687 2023-01-22 15:13:04.784213: step: 108/459, loss: 0.22243748605251312 2023-01-22 15:13:05.341680: step: 110/459, loss: 0.3334888815879822 2023-01-22 15:13:06.033815: step: 112/459, loss: 0.48760488629341125 2023-01-22 15:13:06.612208: step: 114/459, loss: 0.26574987173080444 2023-01-22 15:13:07.230997: step: 116/459, loss: 0.48726823925971985 2023-01-22 15:13:07.826075: step: 118/459, loss: 0.5145311951637268 2023-01-22 15:13:08.456811: step: 120/459, loss: 1.1123756170272827 2023-01-22 15:13:09.083087: step: 122/459, loss: 0.6692723631858826 2023-01-22 15:13:09.680935: step: 124/459, loss: 0.8614280223846436 2023-01-22 15:13:10.252627: step: 126/459, loss: 1.4138237237930298 2023-01-22 15:13:10.877226: step: 128/459, loss: 0.5448827147483826 2023-01-22 15:13:11.586153: step: 130/459, loss: 0.1679219752550125 2023-01-22 15:13:12.218554: step: 132/459, loss: 0.37689492106437683 2023-01-22 15:13:12.946501: step: 134/459, loss: 0.28099313378334045 2023-01-22 15:13:13.532487: step: 136/459, loss: 1.0191391706466675 2023-01-22 15:13:14.121082: step: 138/459, loss: 0.17633919417858124 2023-01-22 15:13:14.726783: step: 140/459, loss: 0.26479899883270264 2023-01-22 15:13:15.462798: step: 142/459, loss: 0.40367740392684937 2023-01-22 15:13:16.061780: step: 144/459, loss: 0.1904570460319519 2023-01-22 15:13:16.688658: step: 146/459, loss: 0.14634719491004944 2023-01-22 15:13:17.308176: step: 148/459, loss: 0.5796341896057129 2023-01-22 15:13:17.895408: step: 150/459, loss: 0.26979225873947144 2023-01-22 15:13:18.517614: step: 152/459, loss: 1.118283748626709 2023-01-22 15:13:19.231365: step: 154/459, loss: 0.6232640743255615 2023-01-22 15:13:19.904677: step: 156/459, loss: 0.1936156004667282 2023-01-22 15:13:20.551922: step: 158/459, loss: 0.28632718324661255 2023-01-22 15:13:21.161024: step: 160/459, loss: 0.5896260142326355 2023-01-22 15:13:21.794574: step: 162/459, loss: 1.3083043098449707 2023-01-22 15:13:22.448859: step: 164/459, loss: 0.291223406791687 2023-01-22 15:13:23.004781: step: 166/459, loss: 0.16435889899730682 2023-01-22 15:13:23.623433: step: 168/459, loss: 0.37989357113838196 2023-01-22 15:13:24.241895: step: 170/459, loss: 0.20420920848846436 2023-01-22 15:13:24.831122: step: 172/459, loss: 0.508120059967041 2023-01-22 15:13:25.462535: step: 174/459, loss: 0.4608747363090515 2023-01-22 15:13:26.137818: step: 176/459, loss: 0.8012973666191101 2023-01-22 15:13:26.846809: step: 178/459, loss: 0.39450812339782715 2023-01-22 15:13:27.499370: step: 180/459, loss: 0.7672123312950134 2023-01-22 15:13:28.144086: step: 182/459, loss: 0.4793742001056671 2023-01-22 15:13:28.745333: step: 184/459, loss: 0.14457589387893677 2023-01-22 15:13:29.482841: step: 186/459, loss: 0.82129967212677 2023-01-22 15:13:30.091738: step: 188/459, loss: 0.23498037457466125 2023-01-22 15:13:30.721453: step: 190/459, loss: 0.49061477184295654 2023-01-22 15:13:31.352434: step: 192/459, loss: 0.2049599438905716 2023-01-22 15:13:31.963518: step: 194/459, loss: 6.198484420776367 2023-01-22 15:13:32.630573: step: 196/459, loss: 1.176997423171997 2023-01-22 15:13:33.202353: step: 198/459, loss: 0.10949189215898514 2023-01-22 15:13:33.898105: step: 200/459, loss: 0.6994654536247253 2023-01-22 15:13:34.517248: step: 202/459, loss: 0.503574013710022 2023-01-22 15:13:35.123811: step: 204/459, loss: 0.4216698706150055 2023-01-22 15:13:35.739640: step: 206/459, loss: 0.5273122787475586 2023-01-22 15:13:36.426011: step: 208/459, loss: 0.6937316060066223 2023-01-22 15:13:37.025877: step: 210/459, loss: 0.24739867448806763 2023-01-22 15:13:37.671488: step: 212/459, loss: 0.7539664506912231 2023-01-22 15:13:38.331646: step: 214/459, loss: 0.13857311010360718 2023-01-22 15:13:39.084555: step: 216/459, loss: 0.7347659468650818 2023-01-22 15:13:39.707810: step: 218/459, loss: 0.89742112159729 2023-01-22 15:13:40.345855: step: 220/459, loss: 1.1176096200942993 2023-01-22 15:13:40.977268: step: 222/459, loss: 0.5755427479743958 2023-01-22 15:13:41.537888: step: 224/459, loss: 0.1456456035375595 2023-01-22 15:13:42.137954: step: 226/459, loss: 0.2164612114429474 2023-01-22 15:13:42.801354: step: 228/459, loss: 0.4611245393753052 2023-01-22 15:13:43.388964: step: 230/459, loss: 0.19389308989048004 2023-01-22 15:13:44.038565: step: 232/459, loss: 0.5255462527275085 2023-01-22 15:13:44.620690: step: 234/459, loss: 2.3939239978790283 2023-01-22 15:13:45.226043: step: 236/459, loss: 0.37020644545555115 2023-01-22 15:13:45.870266: step: 238/459, loss: 0.5099071264266968 2023-01-22 15:13:46.514424: step: 240/459, loss: 0.25525736808776855 2023-01-22 15:13:47.132612: step: 242/459, loss: 0.7130287885665894 2023-01-22 15:13:47.751554: step: 244/459, loss: 0.3557690382003784 2023-01-22 15:13:48.394438: step: 246/459, loss: 1.063022494316101 2023-01-22 15:13:48.981420: step: 248/459, loss: 0.6023043990135193 2023-01-22 15:13:49.571297: step: 250/459, loss: 0.7240192890167236 2023-01-22 15:13:50.207032: step: 252/459, loss: 0.4566067159175873 2023-01-22 15:13:50.883701: step: 254/459, loss: 0.9013195037841797 2023-01-22 15:13:51.520423: step: 256/459, loss: 0.2662453353404999 2023-01-22 15:13:52.131543: step: 258/459, loss: 0.918789267539978 2023-01-22 15:13:52.761590: step: 260/459, loss: 0.7642461061477661 2023-01-22 15:13:53.414497: step: 262/459, loss: 0.9302868247032166 2023-01-22 15:13:54.007810: step: 264/459, loss: 0.36266443133354187 2023-01-22 15:13:54.578225: step: 266/459, loss: 1.127708077430725 2023-01-22 15:13:55.151710: step: 268/459, loss: 0.329738050699234 2023-01-22 15:13:55.741344: step: 270/459, loss: 0.27264830470085144 2023-01-22 15:13:56.341475: step: 272/459, loss: 2.277843952178955 2023-01-22 15:13:56.972975: step: 274/459, loss: 0.28730008006095886 2023-01-22 15:13:57.600661: step: 276/459, loss: 0.380757600069046 2023-01-22 15:13:58.252879: step: 278/459, loss: 0.4134669303894043 2023-01-22 15:13:58.890848: step: 280/459, loss: 0.6485140919685364 2023-01-22 15:13:59.470017: step: 282/459, loss: 2.152944326400757 2023-01-22 15:14:00.157270: step: 284/459, loss: 1.1930330991744995 2023-01-22 15:14:00.729335: step: 286/459, loss: 0.6020668745040894 2023-01-22 15:14:01.356954: step: 288/459, loss: 0.7588706612586975 2023-01-22 15:14:02.048837: step: 290/459, loss: 0.38773223757743835 2023-01-22 15:14:02.689003: step: 292/459, loss: 0.1688104122877121 2023-01-22 15:14:03.294973: step: 294/459, loss: 0.41655805706977844 2023-01-22 15:14:03.893243: step: 296/459, loss: 1.5393545627593994 2023-01-22 15:14:04.538084: step: 298/459, loss: 0.22753743827342987 2023-01-22 15:14:05.126529: step: 300/459, loss: 0.4951467514038086 2023-01-22 15:14:05.711499: step: 302/459, loss: 0.35558855533599854 2023-01-22 15:14:06.348856: step: 304/459, loss: 1.0130497217178345 2023-01-22 15:14:06.987514: step: 306/459, loss: 0.1444246470928192 2023-01-22 15:14:07.606798: step: 308/459, loss: 0.48798227310180664 2023-01-22 15:14:08.202949: step: 310/459, loss: 0.3777657151222229 2023-01-22 15:14:08.796592: step: 312/459, loss: 0.2059086114168167 2023-01-22 15:14:09.404674: step: 314/459, loss: 1.4141992330551147 2023-01-22 15:14:10.007454: step: 316/459, loss: 0.15523843467235565 2023-01-22 15:14:10.731051: step: 318/459, loss: 1.0765267610549927 2023-01-22 15:14:11.308904: step: 320/459, loss: 0.7016246318817139 2023-01-22 15:14:11.976985: step: 322/459, loss: 0.221882626414299 2023-01-22 15:14:12.582707: step: 324/459, loss: 0.35049426555633545 2023-01-22 15:14:13.202279: step: 326/459, loss: 0.5724924206733704 2023-01-22 15:14:13.800616: step: 328/459, loss: 0.3476944863796234 2023-01-22 15:14:14.417760: step: 330/459, loss: 0.4103849530220032 2023-01-22 15:14:15.067809: step: 332/459, loss: 0.27614420652389526 2023-01-22 15:14:15.605901: step: 334/459, loss: 0.24649682641029358 2023-01-22 15:14:16.217660: step: 336/459, loss: 0.4612618088722229 2023-01-22 15:14:16.901511: step: 338/459, loss: 13.81995677947998 2023-01-22 15:14:17.521843: step: 340/459, loss: 0.3063504695892334 2023-01-22 15:14:18.099879: step: 342/459, loss: 0.33773383498191833 2023-01-22 15:14:18.746493: step: 344/459, loss: 0.8442946672439575 2023-01-22 15:14:19.397753: step: 346/459, loss: 0.6921852231025696 2023-01-22 15:14:19.988784: step: 348/459, loss: 1.0039012432098389 2023-01-22 15:14:20.646575: step: 350/459, loss: 0.6988862156867981 2023-01-22 15:14:21.279809: step: 352/459, loss: 0.8278802037239075 2023-01-22 15:14:21.853477: step: 354/459, loss: 0.2956060767173767 2023-01-22 15:14:22.442418: step: 356/459, loss: 0.42355719208717346 2023-01-22 15:14:23.072323: step: 358/459, loss: 0.1828337013721466 2023-01-22 15:14:23.685126: step: 360/459, loss: 0.15949861705303192 2023-01-22 15:14:24.328288: step: 362/459, loss: 1.0549304485321045 2023-01-22 15:14:24.934097: step: 364/459, loss: 0.37684839963912964 2023-01-22 15:14:25.566098: step: 366/459, loss: 1.967369556427002 2023-01-22 15:14:26.164993: step: 368/459, loss: 0.3520438075065613 2023-01-22 15:14:26.795510: step: 370/459, loss: 0.3517197370529175 2023-01-22 15:14:27.419934: step: 372/459, loss: 0.26992934942245483 2023-01-22 15:14:27.980540: step: 374/459, loss: 0.6493769288063049 2023-01-22 15:14:28.596490: step: 376/459, loss: 0.52801513671875 2023-01-22 15:14:29.231779: step: 378/459, loss: 0.3326655924320221 2023-01-22 15:14:29.879143: step: 380/459, loss: 0.8430228233337402 2023-01-22 15:14:30.513781: step: 382/459, loss: 0.5593937635421753 2023-01-22 15:14:31.144081: step: 384/459, loss: 0.982745885848999 2023-01-22 15:14:31.792881: step: 386/459, loss: 0.5197815299034119 2023-01-22 15:14:32.410476: step: 388/459, loss: 0.25402674078941345 2023-01-22 15:14:33.004948: step: 390/459, loss: 0.21921828389167786 2023-01-22 15:14:33.589492: step: 392/459, loss: 0.29072216153144836 2023-01-22 15:14:34.191311: step: 394/459, loss: 0.500728964805603 2023-01-22 15:14:34.788940: step: 396/459, loss: 0.6266765594482422 2023-01-22 15:14:35.416419: step: 398/459, loss: 0.33362990617752075 2023-01-22 15:14:36.004953: step: 400/459, loss: 0.1941831111907959 2023-01-22 15:14:36.607890: step: 402/459, loss: 0.1874822974205017 2023-01-22 15:14:37.176615: step: 404/459, loss: 0.5704163312911987 2023-01-22 15:14:37.812393: step: 406/459, loss: 0.4012613594532013 2023-01-22 15:14:38.419045: step: 408/459, loss: 1.6897799968719482 2023-01-22 15:14:38.993674: step: 410/459, loss: 0.4995502233505249 2023-01-22 15:14:39.637792: step: 412/459, loss: 2.4794912338256836 2023-01-22 15:14:40.243298: step: 414/459, loss: 1.016413688659668 2023-01-22 15:14:40.965889: step: 416/459, loss: 0.22857680916786194 2023-01-22 15:14:41.565544: step: 418/459, loss: 1.0964105129241943 2023-01-22 15:14:42.218793: step: 420/459, loss: 0.2909694015979767 2023-01-22 15:14:42.830150: step: 422/459, loss: 4.287620544433594 2023-01-22 15:14:43.416285: step: 424/459, loss: 0.26084572076797485 2023-01-22 15:14:44.078468: step: 426/459, loss: 0.4237214922904968 2023-01-22 15:14:44.739293: step: 428/459, loss: 0.7785623669624329 2023-01-22 15:14:45.440289: step: 430/459, loss: 0.3513590097427368 2023-01-22 15:14:46.040703: step: 432/459, loss: 1.5635051727294922 2023-01-22 15:14:46.690394: step: 434/459, loss: 0.6280377507209778 2023-01-22 15:14:47.356766: step: 436/459, loss: 1.1249462366104126 2023-01-22 15:14:48.011229: step: 438/459, loss: 0.6810205578804016 2023-01-22 15:14:48.651965: step: 440/459, loss: 0.32943955063819885 2023-01-22 15:14:49.286198: step: 442/459, loss: 0.32933905720710754 2023-01-22 15:14:49.901071: step: 444/459, loss: 0.921031653881073 2023-01-22 15:14:50.481706: step: 446/459, loss: 0.47107312083244324 2023-01-22 15:14:51.134000: step: 448/459, loss: 0.798490583896637 2023-01-22 15:14:51.728208: step: 450/459, loss: 0.7212517261505127 2023-01-22 15:14:52.379628: step: 452/459, loss: 1.133374810218811 2023-01-22 15:14:53.041556: step: 454/459, loss: 0.3721953332424164 2023-01-22 15:14:53.665821: step: 456/459, loss: 0.5438445806503296 2023-01-22 15:14:54.303176: step: 458/459, loss: 0.673523485660553 2023-01-22 15:14:54.957591: step: 460/459, loss: 0.21303172409534454 2023-01-22 15:14:55.716525: step: 462/459, loss: 0.42315155267715454 2023-01-22 15:14:56.272891: step: 464/459, loss: 0.588072657585144 2023-01-22 15:14:56.893247: step: 466/459, loss: 0.49986398220062256 2023-01-22 15:14:57.471517: step: 468/459, loss: 0.4457983076572418 2023-01-22 15:14:58.079991: step: 470/459, loss: 0.38703447580337524 2023-01-22 15:14:58.723781: step: 472/459, loss: 0.24975287914276123 2023-01-22 15:14:59.448521: step: 474/459, loss: 0.5355886220932007 2023-01-22 15:15:00.120988: step: 476/459, loss: 0.3484116494655609 2023-01-22 15:15:00.700913: step: 478/459, loss: 0.3393586575984955 2023-01-22 15:15:01.329192: step: 480/459, loss: 0.23786583542823792 2023-01-22 15:15:01.941069: step: 482/459, loss: 0.1973905712366104 2023-01-22 15:15:02.559236: step: 484/459, loss: 0.6194872856140137 2023-01-22 15:15:03.167364: step: 486/459, loss: 0.13542544841766357 2023-01-22 15:15:03.810119: step: 488/459, loss: 0.2077353298664093 2023-01-22 15:15:04.442474: step: 490/459, loss: 0.5035763382911682 2023-01-22 15:15:05.011975: step: 492/459, loss: 1.7735174894332886 2023-01-22 15:15:05.623793: step: 494/459, loss: 0.3028446137905121 2023-01-22 15:15:06.297952: step: 496/459, loss: 0.740693211555481 2023-01-22 15:15:06.974461: step: 498/459, loss: 0.29170554876327515 2023-01-22 15:15:07.540793: step: 500/459, loss: 1.7395408153533936 2023-01-22 15:15:08.105873: step: 502/459, loss: 0.4676857888698578 2023-01-22 15:15:08.736500: step: 504/459, loss: 1.1041088104248047 2023-01-22 15:15:09.359764: step: 506/459, loss: 0.48986583948135376 2023-01-22 15:15:09.983161: step: 508/459, loss: 1.4547796249389648 2023-01-22 15:15:10.580517: step: 510/459, loss: 0.9871118068695068 2023-01-22 15:15:11.198513: step: 512/459, loss: 0.49205300211906433 2023-01-22 15:15:11.807491: step: 514/459, loss: 0.5757095217704773 2023-01-22 15:15:12.531705: step: 516/459, loss: 0.4805539846420288 2023-01-22 15:15:13.137526: step: 518/459, loss: 0.21390597522258759 2023-01-22 15:15:13.775703: step: 520/459, loss: 0.5874000787734985 2023-01-22 15:15:14.439275: step: 522/459, loss: 0.17357364296913147 2023-01-22 15:15:15.081915: step: 524/459, loss: 0.3872908353805542 2023-01-22 15:15:15.666996: step: 526/459, loss: 0.7186589241027832 2023-01-22 15:15:16.296789: step: 528/459, loss: 0.41899311542510986 2023-01-22 15:15:16.913050: step: 530/459, loss: 1.0885847806930542 2023-01-22 15:15:17.569749: step: 532/459, loss: 0.7550298571586609 2023-01-22 15:15:18.273176: step: 534/459, loss: 0.4915105402469635 2023-01-22 15:15:19.014923: step: 536/459, loss: 0.3839082717895508 2023-01-22 15:15:19.623749: step: 538/459, loss: 1.0293729305267334 2023-01-22 15:15:20.280698: step: 540/459, loss: 0.4918147921562195 2023-01-22 15:15:20.914470: step: 542/459, loss: 0.2367931455373764 2023-01-22 15:15:21.523502: step: 544/459, loss: 0.2382286936044693 2023-01-22 15:15:22.152973: step: 546/459, loss: 0.27998167276382446 2023-01-22 15:15:22.797643: step: 548/459, loss: 0.16786202788352966 2023-01-22 15:15:23.364817: step: 550/459, loss: 0.7673588991165161 2023-01-22 15:15:23.960536: step: 552/459, loss: 0.8693388104438782 2023-01-22 15:15:24.624487: step: 554/459, loss: 0.9504124522209167 2023-01-22 15:15:25.347017: step: 556/459, loss: 0.4416505694389343 2023-01-22 15:15:25.985166: step: 558/459, loss: 0.3432334363460541 2023-01-22 15:15:26.631162: step: 560/459, loss: 0.20504355430603027 2023-01-22 15:15:27.261997: step: 562/459, loss: 0.5671997666358948 2023-01-22 15:15:27.817088: step: 564/459, loss: 0.38842323422431946 2023-01-22 15:15:28.412223: step: 566/459, loss: 1.1160006523132324 2023-01-22 15:15:29.019578: step: 568/459, loss: 0.7236613035202026 2023-01-22 15:15:29.653825: step: 570/459, loss: 0.19492149353027344 2023-01-22 15:15:30.356499: step: 572/459, loss: 0.4776560962200165 2023-01-22 15:15:31.034974: step: 574/459, loss: 0.3472641408443451 2023-01-22 15:15:31.694552: step: 576/459, loss: 0.42813754081726074 2023-01-22 15:15:32.298340: step: 578/459, loss: 0.20674124360084534 2023-01-22 15:15:32.895204: step: 580/459, loss: 0.27848461270332336 2023-01-22 15:15:33.557795: step: 582/459, loss: 2.370424747467041 2023-01-22 15:15:34.180285: step: 584/459, loss: 0.7403349876403809 2023-01-22 15:15:34.827623: step: 586/459, loss: 1.5915939807891846 2023-01-22 15:15:35.392098: step: 588/459, loss: 0.775283694267273 2023-01-22 15:15:35.977133: step: 590/459, loss: 0.12964053452014923 2023-01-22 15:15:36.618993: step: 592/459, loss: 0.3902817368507385 2023-01-22 15:15:37.207402: step: 594/459, loss: 0.5314715504646301 2023-01-22 15:15:37.862804: step: 596/459, loss: 0.21055008471012115 2023-01-22 15:15:38.445988: step: 598/459, loss: 0.73885577917099 2023-01-22 15:15:39.071659: step: 600/459, loss: 1.0904853343963623 2023-01-22 15:15:39.695268: step: 602/459, loss: 0.244905486702919 2023-01-22 15:15:40.280282: step: 604/459, loss: 0.5875725150108337 2023-01-22 15:15:40.902527: step: 606/459, loss: 0.7330551743507385 2023-01-22 15:15:41.531336: step: 608/459, loss: 0.27580946683883667 2023-01-22 15:15:42.182224: step: 610/459, loss: 0.35060933232307434 2023-01-22 15:15:42.813727: step: 612/459, loss: 0.23027996718883514 2023-01-22 15:15:43.414806: step: 614/459, loss: 0.47166675329208374 2023-01-22 15:15:44.064156: step: 616/459, loss: 0.27754896879196167 2023-01-22 15:15:44.673326: step: 618/459, loss: 1.3493735790252686 2023-01-22 15:15:45.339446: step: 620/459, loss: 0.3191864788532257 2023-01-22 15:15:46.019426: step: 622/459, loss: 0.15932688117027283 2023-01-22 15:15:46.747768: step: 624/459, loss: 0.6722443103790283 2023-01-22 15:15:47.362811: step: 626/459, loss: 0.6581690907478333 2023-01-22 15:15:47.956880: step: 628/459, loss: 0.18302373588085175 2023-01-22 15:15:48.576033: step: 630/459, loss: 0.4704693853855133 2023-01-22 15:15:49.174855: step: 632/459, loss: 1.3946224451065063 2023-01-22 15:15:49.837657: step: 634/459, loss: 0.2950683534145355 2023-01-22 15:15:50.432366: step: 636/459, loss: 0.4382808208465576 2023-01-22 15:15:51.041254: step: 638/459, loss: 0.41739940643310547 2023-01-22 15:15:51.632388: step: 640/459, loss: 0.1837598830461502 2023-01-22 15:15:52.251007: step: 642/459, loss: 1.1744537353515625 2023-01-22 15:15:52.904987: step: 644/459, loss: 0.7022131681442261 2023-01-22 15:15:53.526449: step: 646/459, loss: 0.35941460728645325 2023-01-22 15:15:54.117762: step: 648/459, loss: 0.23533588647842407 2023-01-22 15:15:54.828612: step: 650/459, loss: 0.33994200825691223 2023-01-22 15:15:55.451003: step: 652/459, loss: 0.5047181844711304 2023-01-22 15:15:56.152849: step: 654/459, loss: 0.3333030045032501 2023-01-22 15:15:56.768021: step: 656/459, loss: 0.5263914465904236 2023-01-22 15:15:57.470775: step: 658/459, loss: 0.6358025670051575 2023-01-22 15:15:58.109351: step: 660/459, loss: 0.6422153115272522 2023-01-22 15:15:58.729748: step: 662/459, loss: 0.7888187170028687 2023-01-22 15:15:59.374420: step: 664/459, loss: 0.7185153961181641 2023-01-22 15:16:00.016592: step: 666/459, loss: 0.8778061866760254 2023-01-22 15:16:00.615219: step: 668/459, loss: 0.9403241872787476 2023-01-22 15:16:01.230206: step: 670/459, loss: 0.7305970788002014 2023-01-22 15:16:01.847902: step: 672/459, loss: 0.10090101510286331 2023-01-22 15:16:02.477613: step: 674/459, loss: 1.0654308795928955 2023-01-22 15:16:03.199928: step: 676/459, loss: 0.924439549446106 2023-01-22 15:16:03.840857: step: 678/459, loss: 0.8704256415367126 2023-01-22 15:16:04.477516: step: 680/459, loss: 0.09926293045282364 2023-01-22 15:16:05.087531: step: 682/459, loss: 0.29192444682121277 2023-01-22 15:16:05.702347: step: 684/459, loss: 0.3442986011505127 2023-01-22 15:16:06.340865: step: 686/459, loss: 0.1492638885974884 2023-01-22 15:16:07.027982: step: 688/459, loss: 0.7655090093612671 2023-01-22 15:16:07.652368: step: 690/459, loss: 0.5981879830360413 2023-01-22 15:16:08.274593: step: 692/459, loss: 0.2919316291809082 2023-01-22 15:16:08.959579: step: 694/459, loss: 0.5074561238288879 2023-01-22 15:16:09.521877: step: 696/459, loss: 0.41159820556640625 2023-01-22 15:16:10.088000: step: 698/459, loss: 0.40812423825263977 2023-01-22 15:16:10.834738: step: 700/459, loss: 1.0641930103302002 2023-01-22 15:16:11.457753: step: 702/459, loss: 0.5214575529098511 2023-01-22 15:16:12.058307: step: 704/459, loss: 0.18851865828037262 2023-01-22 15:16:12.767797: step: 706/459, loss: 0.19534628093242645 2023-01-22 15:16:13.376732: step: 708/459, loss: 0.5272305011749268 2023-01-22 15:16:13.960322: step: 710/459, loss: 0.8048396110534668 2023-01-22 15:16:14.687456: step: 712/459, loss: 0.620564341545105 2023-01-22 15:16:15.306961: step: 714/459, loss: 0.8489289283752441 2023-01-22 15:16:15.899811: step: 716/459, loss: 0.46932366490364075 2023-01-22 15:16:16.542922: step: 718/459, loss: 0.32428818941116333 2023-01-22 15:16:17.215006: step: 720/459, loss: 0.8640026450157166 2023-01-22 15:16:17.843906: step: 722/459, loss: 0.8971908688545227 2023-01-22 15:16:18.449930: step: 724/459, loss: 2.4583210945129395 2023-01-22 15:16:19.102788: step: 726/459, loss: 0.7555744647979736 2023-01-22 15:16:19.830130: step: 728/459, loss: 1.0887441635131836 2023-01-22 15:16:20.459442: step: 730/459, loss: 1.1567542552947998 2023-01-22 15:16:21.075887: step: 732/459, loss: 3.5957775115966797 2023-01-22 15:16:21.690977: step: 734/459, loss: 0.20200419425964355 2023-01-22 15:16:22.259440: step: 736/459, loss: 0.15236401557922363 2023-01-22 15:16:22.963425: step: 738/459, loss: 0.4841930866241455 2023-01-22 15:16:23.589913: step: 740/459, loss: 0.32511967420578003 2023-01-22 15:16:24.272797: step: 742/459, loss: 0.8290405869483948 2023-01-22 15:16:24.984052: step: 744/459, loss: 0.2166324257850647 2023-01-22 15:16:25.618132: step: 746/459, loss: 0.3043784201145172 2023-01-22 15:16:26.412322: step: 748/459, loss: 0.9277738332748413 2023-01-22 15:16:27.018977: step: 750/459, loss: 0.3391251564025879 2023-01-22 15:16:27.683602: step: 752/459, loss: 0.08430712670087814 2023-01-22 15:16:28.271436: step: 754/459, loss: 0.20281299948692322 2023-01-22 15:16:28.917974: step: 756/459, loss: 0.8467041254043579 2023-01-22 15:16:29.518535: step: 758/459, loss: 0.23566095530986786 2023-01-22 15:16:30.139944: step: 760/459, loss: 0.3327324092388153 2023-01-22 15:16:30.813180: step: 762/459, loss: 0.38707679510116577 2023-01-22 15:16:31.466604: step: 764/459, loss: 0.6168732643127441 2023-01-22 15:16:32.101958: step: 766/459, loss: 0.39685800671577454 2023-01-22 15:16:32.662010: step: 768/459, loss: 0.32312676310539246 2023-01-22 15:16:33.255295: step: 770/459, loss: 0.2338918298482895 2023-01-22 15:16:33.904854: step: 772/459, loss: 0.23932448029518127 2023-01-22 15:16:34.592750: step: 774/459, loss: 0.5376511216163635 2023-01-22 15:16:35.306710: step: 776/459, loss: 2.014958381652832 2023-01-22 15:16:35.892532: step: 778/459, loss: 1.1833876371383667 2023-01-22 15:16:36.550845: step: 780/459, loss: 0.9824889898300171 2023-01-22 15:16:37.085882: step: 782/459, loss: 0.2828008532524109 2023-01-22 15:16:37.749443: step: 784/459, loss: 0.4181790351867676 2023-01-22 15:16:38.349442: step: 786/459, loss: 1.1543245315551758 2023-01-22 15:16:38.956137: step: 788/459, loss: 0.7924105525016785 2023-01-22 15:16:39.583356: step: 790/459, loss: 0.30871087312698364 2023-01-22 15:16:40.232018: step: 792/459, loss: 0.7439732551574707 2023-01-22 15:16:40.833793: step: 794/459, loss: 0.6447631120681763 2023-01-22 15:16:41.436723: step: 796/459, loss: 0.3906788229942322 2023-01-22 15:16:42.065173: step: 798/459, loss: 0.6008749604225159 2023-01-22 15:16:42.778559: step: 800/459, loss: 0.3297148644924164 2023-01-22 15:16:43.422693: step: 802/459, loss: 0.1839151680469513 2023-01-22 15:16:44.104256: step: 804/459, loss: 0.4292660355567932 2023-01-22 15:16:44.704817: step: 806/459, loss: 0.5120895504951477 2023-01-22 15:16:45.308520: step: 808/459, loss: 0.4609067440032959 2023-01-22 15:16:45.909520: step: 810/459, loss: 0.37746697664260864 2023-01-22 15:16:46.530079: step: 812/459, loss: 0.8792716264724731 2023-01-22 15:16:47.175581: step: 814/459, loss: 0.28186315298080444 2023-01-22 15:16:47.787262: step: 816/459, loss: 0.2925615906715393 2023-01-22 15:16:48.407988: step: 818/459, loss: 0.8502675294876099 2023-01-22 15:16:48.988877: step: 820/459, loss: 0.6943069100379944 2023-01-22 15:16:49.682200: step: 822/459, loss: 1.2863948345184326 2023-01-22 15:16:50.266266: step: 824/459, loss: 5.67760705947876 2023-01-22 15:16:50.911828: step: 826/459, loss: 1.860021948814392 2023-01-22 15:16:51.514362: step: 828/459, loss: 1.012995719909668 2023-01-22 15:16:52.134910: step: 830/459, loss: 0.3222934305667877 2023-01-22 15:16:52.778761: step: 832/459, loss: 0.583283007144928 2023-01-22 15:16:53.361615: step: 834/459, loss: 0.4701175093650818 2023-01-22 15:16:53.993355: step: 836/459, loss: 0.2971593141555786 2023-01-22 15:16:54.636748: step: 838/459, loss: 1.2665756940841675 2023-01-22 15:16:55.236956: step: 840/459, loss: 0.2374562919139862 2023-01-22 15:16:55.819452: step: 842/459, loss: 0.48797860741615295 2023-01-22 15:16:56.391270: step: 844/459, loss: 0.5722166895866394 2023-01-22 15:16:57.017886: step: 846/459, loss: 0.40460702776908875 2023-01-22 15:16:57.665271: step: 848/459, loss: 1.6116199493408203 2023-01-22 15:16:58.278523: step: 850/459, loss: 0.447061151266098 2023-01-22 15:16:58.923438: step: 852/459, loss: 0.6551575064659119 2023-01-22 15:16:59.613295: step: 854/459, loss: 0.29243576526641846 2023-01-22 15:17:00.260670: step: 856/459, loss: 0.23578418791294098 2023-01-22 15:17:00.863606: step: 858/459, loss: 0.8092825412750244 2023-01-22 15:17:01.450054: step: 860/459, loss: 0.964705765247345 2023-01-22 15:17:02.098067: step: 862/459, loss: 1.407042384147644 2023-01-22 15:17:02.763215: step: 864/459, loss: 0.23323114216327667 2023-01-22 15:17:03.347917: step: 866/459, loss: 0.23732173442840576 2023-01-22 15:17:04.021266: step: 868/459, loss: 0.3982031047344208 2023-01-22 15:17:04.640980: step: 870/459, loss: 0.796453058719635 2023-01-22 15:17:05.273896: step: 872/459, loss: 0.6261579394340515 2023-01-22 15:17:05.878409: step: 874/459, loss: 0.2732127606868744 2023-01-22 15:17:06.515575: step: 876/459, loss: 0.3770942687988281 2023-01-22 15:17:07.108675: step: 878/459, loss: 1.4512251615524292 2023-01-22 15:17:07.702526: step: 880/459, loss: 0.45559269189834595 2023-01-22 15:17:08.359545: step: 882/459, loss: 0.4985981583595276 2023-01-22 15:17:09.001933: step: 884/459, loss: 0.2242615818977356 2023-01-22 15:17:09.610587: step: 886/459, loss: 0.7975412607192993 2023-01-22 15:17:10.268308: step: 888/459, loss: 3.677731990814209 2023-01-22 15:17:10.904357: step: 890/459, loss: 0.34561026096343994 2023-01-22 15:17:11.532928: step: 892/459, loss: 0.17092707753181458 2023-01-22 15:17:12.140935: step: 894/459, loss: 0.7366908192634583 2023-01-22 15:17:12.741250: step: 896/459, loss: 0.3241656422615051 2023-01-22 15:17:13.358147: step: 898/459, loss: 0.8274350166320801 2023-01-22 15:17:14.018285: step: 900/459, loss: 0.8481771945953369 2023-01-22 15:17:14.688294: step: 902/459, loss: 1.752841830253601 2023-01-22 15:17:15.275507: step: 904/459, loss: 0.25777027010917664 2023-01-22 15:17:15.893899: step: 906/459, loss: 1.3667256832122803 2023-01-22 15:17:16.515763: step: 908/459, loss: 1.9152380228042603 2023-01-22 15:17:17.123947: step: 910/459, loss: 1.1674526929855347 2023-01-22 15:17:17.739140: step: 912/459, loss: 0.2704983949661255 2023-01-22 15:17:18.356753: step: 914/459, loss: 0.9990341067314148 2023-01-22 15:17:18.949502: step: 916/459, loss: 1.1280548572540283 2023-01-22 15:17:19.568316: step: 918/459, loss: 0.911435067653656 2023-01-22 15:17:20.051560: step: 920/459, loss: 0.010146044194698334 ================================================== Loss: 0.706 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2703384076615209, 'r': 0.2990650695762176, 'f1': 0.2839771021021021}, 'combined': 0.20924628575944362, 'epoch': 6} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3113289222750901, 'r': 0.27064710303043227, 'f1': 0.2895661238247926}, 'combined': 0.1853223192478672, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2682042833607908, 'r': 0.3089184060721063, 'f1': 0.2871252204585538}, 'combined': 0.21156595191682911, 'epoch': 6} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3146317163011242, 'r': 0.2746603548835585, 'f1': 0.29329041771480757}, 'combined': 0.18770586733747682, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2733614993459821, 'r': 0.2992480049658668, 'f1': 0.28571961414462504}, 'combined': 0.21053024200130266, 'epoch': 6} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.31922480995095, 'r': 0.2890435551919511, 'f1': 0.3033854109839181}, 'combined': 0.21752161542243184, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29398148148148145, 'r': 0.30238095238095236, 'f1': 0.2981220657276995}, 'combined': 0.19874804381846634, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.19285714285714287, 'r': 0.29347826086956524, 'f1': 0.23275862068965517}, 'combined': 0.11637931034482758, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2608695652173913, 'r': 0.20689655172413793, 'f1': 0.23076923076923075}, 'combined': 0.15384615384615383, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27463650173611115, 'r': 0.29960345643939396, 'f1': 0.2865772192028986}, 'combined': 0.21116216151792525, 'epoch': 4} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3275304334361018, 'r': 0.25382122518550904, 'f1': 0.28600305741761856}, 'combined': 0.18304195674727586, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2387005649717514, 'r': 0.40238095238095234, 'f1': 0.29964539007092195}, 'combined': 0.1997635933806146, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.279556856187291, 'r': 0.3166193181818182, 'f1': 0.2969360568383659}, 'combined': 0.21879498924932222, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.287737269204934, 'r': 0.24834621965829298, 'f1': 0.2665945306891456}, 'combined': 0.17062049964105316, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.3804347826086957, 'f1': 0.3017241379310345}, 'combined': 0.15086206896551724, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29402583508521984, 'r': 0.3269433384439067, 'f1': 0.30961211026044716}, 'combined': 0.22813523913927683, 'epoch': 1} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3423541418131789, 'r': 0.25465905170424247, 'f1': 0.29206584393053214}, 'combined': 0.209405699421891, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2777777777777778, 'r': 0.1724137931034483, 'f1': 0.21276595744680854}, 'combined': 0.14184397163120568, 'epoch': 1} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 15:20:00.153196: step: 2/459, loss: 0.5803914070129395 2023-01-22 15:20:00.794394: step: 4/459, loss: 0.2968847453594208 2023-01-22 15:20:01.383483: step: 6/459, loss: 0.6723167896270752 2023-01-22 15:20:02.008068: step: 8/459, loss: 0.24166879057884216 2023-01-22 15:20:02.619637: step: 10/459, loss: 0.2500041723251343 2023-01-22 15:20:03.252478: step: 12/459, loss: 0.6877835392951965 2023-01-22 15:20:03.875365: step: 14/459, loss: 0.43752166628837585 2023-01-22 15:20:04.561490: step: 16/459, loss: 0.17243948578834534 2023-01-22 15:20:05.188849: step: 18/459, loss: 0.1584683060646057 2023-01-22 15:20:05.741110: step: 20/459, loss: 0.19631831347942352 2023-01-22 15:20:06.323838: step: 22/459, loss: 0.22093386948108673 2023-01-22 15:20:06.982292: step: 24/459, loss: 0.5024628639221191 2023-01-22 15:20:07.649356: step: 26/459, loss: 0.18575426936149597 2023-01-22 15:20:08.341062: step: 28/459, loss: 0.6050416827201843 2023-01-22 15:20:08.943210: step: 30/459, loss: 0.8475002646446228 2023-01-22 15:20:09.627610: step: 32/459, loss: 0.357475221157074 2023-01-22 15:20:10.297381: step: 34/459, loss: 0.21129116415977478 2023-01-22 15:20:10.913448: step: 36/459, loss: 0.7257735729217529 2023-01-22 15:20:11.698523: step: 38/459, loss: 0.778672993183136 2023-01-22 15:20:12.382795: step: 40/459, loss: 0.3289131224155426 2023-01-22 15:20:12.968749: step: 42/459, loss: 0.2985653281211853 2023-01-22 15:20:13.613564: step: 44/459, loss: 0.384563148021698 2023-01-22 15:20:14.242419: step: 46/459, loss: 0.6725402474403381 2023-01-22 15:20:14.838003: step: 48/459, loss: 0.19663524627685547 2023-01-22 15:20:15.525298: step: 50/459, loss: 0.16798369586467743 2023-01-22 15:20:16.156267: step: 52/459, loss: 0.2505118250846863 2023-01-22 15:20:16.833909: step: 54/459, loss: 0.4385372996330261 2023-01-22 15:20:17.461449: step: 56/459, loss: 0.36636993288993835 2023-01-22 15:20:18.072803: step: 58/459, loss: 2.4980266094207764 2023-01-22 15:20:18.658454: step: 60/459, loss: 0.3459934890270233 2023-01-22 15:20:19.323525: step: 62/459, loss: 0.15317969024181366 2023-01-22 15:20:19.934740: step: 64/459, loss: 0.21445029973983765 2023-01-22 15:20:20.494598: step: 66/459, loss: 1.8141539096832275 2023-01-22 15:20:21.156786: step: 68/459, loss: 0.32611724734306335 2023-01-22 15:20:21.796916: step: 70/459, loss: 0.4028526842594147 2023-01-22 15:20:22.419299: step: 72/459, loss: 0.22177502512931824 2023-01-22 15:20:23.148362: step: 74/459, loss: 0.5436217188835144 2023-01-22 15:20:23.764357: step: 76/459, loss: 0.10742014646530151 2023-01-22 15:20:24.345569: step: 78/459, loss: 0.11026818305253983 2023-01-22 15:20:24.925631: step: 80/459, loss: 0.2177916169166565 2023-01-22 15:20:25.546473: step: 82/459, loss: 0.37785786390304565 2023-01-22 15:20:26.236897: step: 84/459, loss: 1.3760075569152832 2023-01-22 15:20:26.840979: step: 86/459, loss: 0.1671517938375473 2023-01-22 15:20:27.440061: step: 88/459, loss: 0.3947390913963318 2023-01-22 15:20:28.105608: step: 90/459, loss: 0.3471064567565918 2023-01-22 15:20:28.702939: step: 92/459, loss: 0.28763821721076965 2023-01-22 15:20:29.362065: step: 94/459, loss: 2.4137320518493652 2023-01-22 15:20:29.979126: step: 96/459, loss: 1.3618450164794922 2023-01-22 15:20:30.549763: step: 98/459, loss: 0.1391868144273758 2023-01-22 15:20:31.212838: step: 100/459, loss: 0.2805194854736328 2023-01-22 15:20:31.832531: step: 102/459, loss: 0.19579127430915833 2023-01-22 15:20:32.447586: step: 104/459, loss: 0.1174357458949089 2023-01-22 15:20:33.127529: step: 106/459, loss: 1.030760645866394 2023-01-22 15:20:33.772619: step: 108/459, loss: 0.8416414856910706 2023-01-22 15:20:34.345878: step: 110/459, loss: 0.27731019258499146 2023-01-22 15:20:34.941784: step: 112/459, loss: 0.18773381412029266 2023-01-22 15:20:35.587301: step: 114/459, loss: 0.3697500228881836 2023-01-22 15:20:36.202825: step: 116/459, loss: 0.23280364274978638 2023-01-22 15:20:36.827914: step: 118/459, loss: 0.6442134976387024 2023-01-22 15:20:37.433225: step: 120/459, loss: 0.42518988251686096 2023-01-22 15:20:38.031525: step: 122/459, loss: 0.632259726524353 2023-01-22 15:20:38.639163: step: 124/459, loss: 0.8739961981773376 2023-01-22 15:20:39.215424: step: 126/459, loss: 0.6445445418357849 2023-01-22 15:20:39.813831: step: 128/459, loss: 0.21152307093143463 2023-01-22 15:20:40.448171: step: 130/459, loss: 0.6267078518867493 2023-01-22 15:20:41.191163: step: 132/459, loss: 0.40014755725860596 2023-01-22 15:20:41.885462: step: 134/459, loss: 1.2398409843444824 2023-01-22 15:20:42.529721: step: 136/459, loss: 0.7656758427619934 2023-01-22 15:20:43.214358: step: 138/459, loss: 0.1527511030435562 2023-01-22 15:20:43.877601: step: 140/459, loss: 0.611782431602478 2023-01-22 15:20:44.496012: step: 142/459, loss: 0.5201629996299744 2023-01-22 15:20:45.116704: step: 144/459, loss: 0.9564363956451416 2023-01-22 15:20:45.725704: step: 146/459, loss: 0.23481103777885437 2023-01-22 15:20:46.362273: step: 148/459, loss: 1.4260646104812622 2023-01-22 15:20:47.045491: step: 150/459, loss: 0.44150510430336 2023-01-22 15:20:47.629768: step: 152/459, loss: 0.22387470304965973 2023-01-22 15:20:48.354580: step: 154/459, loss: 0.519149661064148 2023-01-22 15:20:48.940924: step: 156/459, loss: 0.1792115569114685 2023-01-22 15:20:49.638547: step: 158/459, loss: 0.5101233720779419 2023-01-22 15:20:50.242956: step: 160/459, loss: 0.8086355328559875 2023-01-22 15:20:50.878171: step: 162/459, loss: 0.3418756127357483 2023-01-22 15:20:51.533809: step: 164/459, loss: 4.260010719299316 2023-01-22 15:20:52.160337: step: 166/459, loss: 0.2871413230895996 2023-01-22 15:20:52.820298: step: 168/459, loss: 0.9588357210159302 2023-01-22 15:20:53.394419: step: 170/459, loss: 0.569258451461792 2023-01-22 15:20:53.951438: step: 172/459, loss: 0.242995947599411 2023-01-22 15:20:54.612566: step: 174/459, loss: 0.2630918323993683 2023-01-22 15:20:55.255852: step: 176/459, loss: 0.1559864580631256 2023-01-22 15:20:55.866613: step: 178/459, loss: 2.2881062030792236 2023-01-22 15:20:56.462635: step: 180/459, loss: 0.2055037021636963 2023-01-22 15:20:57.101094: step: 182/459, loss: 0.12067535519599915 2023-01-22 15:20:57.722196: step: 184/459, loss: 0.3513210117816925 2023-01-22 15:20:58.405916: step: 186/459, loss: 0.24378371238708496 2023-01-22 15:20:58.984939: step: 188/459, loss: 0.5171254277229309 2023-01-22 15:20:59.598790: step: 190/459, loss: 0.557132363319397 2023-01-22 15:21:00.183602: step: 192/459, loss: 0.8259037137031555 2023-01-22 15:21:00.797751: step: 194/459, loss: 1.1231485605239868 2023-01-22 15:21:01.410671: step: 196/459, loss: 0.27564841508865356 2023-01-22 15:21:02.075326: step: 198/459, loss: 0.3037627637386322 2023-01-22 15:21:02.704412: step: 200/459, loss: 0.42455026507377625 2023-01-22 15:21:03.370893: step: 202/459, loss: 1.3016772270202637 2023-01-22 15:21:03.996272: step: 204/459, loss: 0.26516300439834595 2023-01-22 15:21:04.644509: step: 206/459, loss: 0.7221193313598633 2023-01-22 15:21:05.275676: step: 208/459, loss: 0.25853538513183594 2023-01-22 15:21:05.921262: step: 210/459, loss: 0.21808022260665894 2023-01-22 15:21:06.586870: step: 212/459, loss: 0.8349469304084778 2023-01-22 15:21:07.198246: step: 214/459, loss: 0.3244260549545288 2023-01-22 15:21:07.797155: step: 216/459, loss: 0.15803343057632446 2023-01-22 15:21:08.448435: step: 218/459, loss: 0.8678179979324341 2023-01-22 15:21:09.031162: step: 220/459, loss: 0.09152106195688248 2023-01-22 15:21:09.616051: step: 222/459, loss: 0.2625151574611664 2023-01-22 15:21:10.253805: step: 224/459, loss: 0.15601393580436707 2023-01-22 15:21:10.873734: step: 226/459, loss: 0.377032995223999 2023-01-22 15:21:11.443863: step: 228/459, loss: 0.4707416892051697 2023-01-22 15:21:12.203792: step: 230/459, loss: 0.9052315950393677 2023-01-22 15:21:12.862218: step: 232/459, loss: 0.21287992596626282 2023-01-22 15:21:13.549926: step: 234/459, loss: 1.144433856010437 2023-01-22 15:21:14.167488: step: 236/459, loss: 0.3278124928474426 2023-01-22 15:21:14.745471: step: 238/459, loss: 0.13387930393218994 2023-01-22 15:21:15.378728: step: 240/459, loss: 0.5622267723083496 2023-01-22 15:21:15.975156: step: 242/459, loss: 0.315301775932312 2023-01-22 15:21:16.572487: step: 244/459, loss: 0.5328688025474548 2023-01-22 15:21:17.177339: step: 246/459, loss: 0.18658001720905304 2023-01-22 15:21:17.742899: step: 248/459, loss: 1.3642607927322388 2023-01-22 15:21:18.333552: step: 250/459, loss: 0.14237135648727417 2023-01-22 15:21:18.928196: step: 252/459, loss: 0.41465920209884644 2023-01-22 15:21:19.610608: step: 254/459, loss: 0.3072522282600403 2023-01-22 15:21:20.261036: step: 256/459, loss: 0.7759531140327454 2023-01-22 15:21:20.925236: step: 258/459, loss: 0.2167145162820816 2023-01-22 15:21:21.533922: step: 260/459, loss: 0.3942192494869232 2023-01-22 15:21:22.126793: step: 262/459, loss: 0.3173633813858032 2023-01-22 15:21:22.750104: step: 264/459, loss: 1.1850749254226685 2023-01-22 15:21:23.362371: step: 266/459, loss: 0.4052099585533142 2023-01-22 15:21:24.024054: step: 268/459, loss: 0.710238516330719 2023-01-22 15:21:24.613461: step: 270/459, loss: 0.3306393623352051 2023-01-22 15:21:25.193990: step: 272/459, loss: 0.29363784193992615 2023-01-22 15:21:25.823797: step: 274/459, loss: 0.41291365027427673 2023-01-22 15:21:26.442958: step: 276/459, loss: 0.40876996517181396 2023-01-22 15:21:27.160111: step: 278/459, loss: 1.0362060070037842 2023-01-22 15:21:27.854611: step: 280/459, loss: 0.6046481132507324 2023-01-22 15:21:28.487062: step: 282/459, loss: 0.20918318629264832 2023-01-22 15:21:29.115135: step: 284/459, loss: 0.26487496495246887 2023-01-22 15:21:29.715761: step: 286/459, loss: 0.9730357527732849 2023-01-22 15:21:30.345780: step: 288/459, loss: 0.4531010687351227 2023-01-22 15:21:30.921494: step: 290/459, loss: 0.14813072979450226 2023-01-22 15:21:31.581636: step: 292/459, loss: 0.6251538991928101 2023-01-22 15:21:32.226409: step: 294/459, loss: 0.21136942505836487 2023-01-22 15:21:32.891596: step: 296/459, loss: 0.4623223841190338 2023-01-22 15:21:33.528153: step: 298/459, loss: 0.20480145514011383 2023-01-22 15:21:34.205992: step: 300/459, loss: 0.4767853319644928 2023-01-22 15:21:34.834422: step: 302/459, loss: 0.6174289584159851 2023-01-22 15:21:35.374128: step: 304/459, loss: 0.22456151247024536 2023-01-22 15:21:36.018901: step: 306/459, loss: 0.34704747796058655 2023-01-22 15:21:36.694571: step: 308/459, loss: 0.2736630439758301 2023-01-22 15:21:37.297604: step: 310/459, loss: 0.21736152470111847 2023-01-22 15:21:37.931328: step: 312/459, loss: 0.29812371730804443 2023-01-22 15:21:38.511036: step: 314/459, loss: 0.37591636180877686 2023-01-22 15:21:39.123378: step: 316/459, loss: 0.6314796805381775 2023-01-22 15:21:39.869680: step: 318/459, loss: 0.7250523567199707 2023-01-22 15:21:40.504833: step: 320/459, loss: 0.6138522624969482 2023-01-22 15:21:41.126583: step: 322/459, loss: 0.47587934136390686 2023-01-22 15:21:41.712809: step: 324/459, loss: 0.12672238051891327 2023-01-22 15:21:42.299589: step: 326/459, loss: 0.8178750276565552 2023-01-22 15:21:42.923123: step: 328/459, loss: 0.31823402643203735 2023-01-22 15:21:43.484476: step: 330/459, loss: 0.1595243215560913 2023-01-22 15:21:44.116965: step: 332/459, loss: 0.1909574717283249 2023-01-22 15:21:44.790160: step: 334/459, loss: 0.396312952041626 2023-01-22 15:21:45.421438: step: 336/459, loss: 0.23568128049373627 2023-01-22 15:21:46.019448: step: 338/459, loss: 0.10804253816604614 2023-01-22 15:21:46.710798: step: 340/459, loss: 0.9498347640037537 2023-01-22 15:21:47.340173: step: 342/459, loss: 0.4744448661804199 2023-01-22 15:21:47.932362: step: 344/459, loss: 4.76877498626709 2023-01-22 15:21:48.562554: step: 346/459, loss: 0.6311253905296326 2023-01-22 15:21:49.181106: step: 348/459, loss: 0.40692809224128723 2023-01-22 15:21:49.830553: step: 350/459, loss: 0.7663507461547852 2023-01-22 15:21:50.467328: step: 352/459, loss: 1.2454643249511719 2023-01-22 15:21:51.196982: step: 354/459, loss: 0.28465700149536133 2023-01-22 15:21:51.825338: step: 356/459, loss: 0.4695777893066406 2023-01-22 15:21:52.421002: step: 358/459, loss: 0.2994840443134308 2023-01-22 15:21:53.019035: step: 360/459, loss: 0.20981571078300476 2023-01-22 15:21:53.698843: step: 362/459, loss: 0.40415894985198975 2023-01-22 15:21:54.357031: step: 364/459, loss: 0.6177805662155151 2023-01-22 15:21:54.911774: step: 366/459, loss: 0.3565528988838196 2023-01-22 15:21:55.534511: step: 368/459, loss: 0.7801136374473572 2023-01-22 15:21:56.146870: step: 370/459, loss: 1.097099781036377 2023-01-22 15:21:56.710963: step: 372/459, loss: 0.1995658427476883 2023-01-22 15:21:57.337881: step: 374/459, loss: 0.3666130006313324 2023-01-22 15:21:58.017745: step: 376/459, loss: 0.7471248507499695 2023-01-22 15:21:58.688368: step: 378/459, loss: 1.134049892425537 2023-01-22 15:21:59.312444: step: 380/459, loss: 0.3375816345214844 2023-01-22 15:21:59.906988: step: 382/459, loss: 0.6204594373703003 2023-01-22 15:22:00.601054: step: 384/459, loss: 1.4727789163589478 2023-01-22 15:22:01.227814: step: 386/459, loss: 0.5268188118934631 2023-01-22 15:22:01.851357: step: 388/459, loss: 0.46797117590904236 2023-01-22 15:22:02.453678: step: 390/459, loss: 1.1953842639923096 2023-01-22 15:22:03.059088: step: 392/459, loss: 0.5633912682533264 2023-01-22 15:22:03.687343: step: 394/459, loss: 0.4633677303791046 2023-01-22 15:22:04.307434: step: 396/459, loss: 0.2843838036060333 2023-01-22 15:22:04.971177: step: 398/459, loss: 0.20608484745025635 2023-01-22 15:22:05.638320: step: 400/459, loss: 1.306155800819397 2023-01-22 15:22:06.275065: step: 402/459, loss: 0.35887545347213745 2023-01-22 15:22:06.899413: step: 404/459, loss: 0.2585238814353943 2023-01-22 15:22:07.539193: step: 406/459, loss: 0.5347831845283508 2023-01-22 15:22:08.162440: step: 408/459, loss: 0.17913833260536194 2023-01-22 15:22:08.737857: step: 410/459, loss: 0.4711592495441437 2023-01-22 15:22:09.318232: step: 412/459, loss: 0.14528276026248932 2023-01-22 15:22:09.995454: step: 414/459, loss: 1.5739394426345825 2023-01-22 15:22:10.656144: step: 416/459, loss: 0.46608489751815796 2023-01-22 15:22:11.244797: step: 418/459, loss: 0.2707403898239136 2023-01-22 15:22:11.880134: step: 420/459, loss: 0.164920836687088 2023-01-22 15:22:12.482331: step: 422/459, loss: 1.2766045331954956 2023-01-22 15:22:13.114709: step: 424/459, loss: 0.3452325761318207 2023-01-22 15:22:13.758646: step: 426/459, loss: 0.29729557037353516 2023-01-22 15:22:14.398842: step: 428/459, loss: 0.20685622096061707 2023-01-22 15:22:14.987143: step: 430/459, loss: 0.16847126185894012 2023-01-22 15:22:15.601631: step: 432/459, loss: 0.2073439061641693 2023-01-22 15:22:16.236693: step: 434/459, loss: 2.612943172454834 2023-01-22 15:22:16.828298: step: 436/459, loss: 0.5618066787719727 2023-01-22 15:22:17.437025: step: 438/459, loss: 3.0394015312194824 2023-01-22 15:22:18.052238: step: 440/459, loss: 0.1573449671268463 2023-01-22 15:22:18.716094: step: 442/459, loss: 0.4453533887863159 2023-01-22 15:22:19.353493: step: 444/459, loss: 2.1031861305236816 2023-01-22 15:22:19.939152: step: 446/459, loss: 0.2101619988679886 2023-01-22 15:22:20.488373: step: 448/459, loss: 2.070025682449341 2023-01-22 15:22:21.148892: step: 450/459, loss: 0.5068386793136597 2023-01-22 15:22:21.724421: step: 452/459, loss: 0.5298334956169128 2023-01-22 15:22:22.315574: step: 454/459, loss: 1.4727836847305298 2023-01-22 15:22:22.921396: step: 456/459, loss: 0.22266370058059692 2023-01-22 15:22:23.544985: step: 458/459, loss: 0.21911731362342834 2023-01-22 15:22:24.121601: step: 460/459, loss: 0.4738686680793762 2023-01-22 15:22:24.742319: step: 462/459, loss: 0.2166491150856018 2023-01-22 15:22:25.341018: step: 464/459, loss: 0.6831449270248413 2023-01-22 15:22:25.889505: step: 466/459, loss: 0.2104986011981964 2023-01-22 15:22:26.512987: step: 468/459, loss: 0.8132599592208862 2023-01-22 15:22:27.122248: step: 470/459, loss: 1.1451870203018188 2023-01-22 15:22:27.774709: step: 472/459, loss: 0.2082943469285965 2023-01-22 15:22:28.379454: step: 474/459, loss: 0.31853607296943665 2023-01-22 15:22:28.984462: step: 476/459, loss: 0.33776330947875977 2023-01-22 15:22:29.563722: step: 478/459, loss: 0.22694028913974762 2023-01-22 15:22:30.231011: step: 480/459, loss: 0.3475223481655121 2023-01-22 15:22:30.793373: step: 482/459, loss: 0.733771562576294 2023-01-22 15:22:31.426133: step: 484/459, loss: 0.9573681950569153 2023-01-22 15:22:32.022628: step: 486/459, loss: 0.12514257431030273 2023-01-22 15:22:32.679902: step: 488/459, loss: 0.2650180459022522 2023-01-22 15:22:33.267066: step: 490/459, loss: 0.4428163170814514 2023-01-22 15:22:33.890497: step: 492/459, loss: 1.1470547914505005 2023-01-22 15:22:34.503770: step: 494/459, loss: 0.28551360964775085 2023-01-22 15:22:35.104254: step: 496/459, loss: 0.03787682205438614 2023-01-22 15:22:35.658561: step: 498/459, loss: 0.5881651043891907 2023-01-22 15:22:36.205200: step: 500/459, loss: 1.6384412050247192 2023-01-22 15:22:36.800311: step: 502/459, loss: 0.9127843379974365 2023-01-22 15:22:37.448552: step: 504/459, loss: 0.3910522162914276 2023-01-22 15:22:38.062103: step: 506/459, loss: 0.19152477383613586 2023-01-22 15:22:38.685226: step: 508/459, loss: 0.1406506448984146 2023-01-22 15:22:39.225202: step: 510/459, loss: 0.15281754732131958 2023-01-22 15:22:39.800815: step: 512/459, loss: 0.8463693857192993 2023-01-22 15:22:40.438498: step: 514/459, loss: 0.23142293095588684 2023-01-22 15:22:41.113173: step: 516/459, loss: 0.5709249973297119 2023-01-22 15:22:41.689866: step: 518/459, loss: 0.11177532374858856 2023-01-22 15:22:42.342250: step: 520/459, loss: 0.15162557363510132 2023-01-22 15:22:42.964489: step: 522/459, loss: 0.23878151178359985 2023-01-22 15:22:43.595850: step: 524/459, loss: 1.0252002477645874 2023-01-22 15:22:44.284020: step: 526/459, loss: 0.1315208226442337 2023-01-22 15:22:44.916114: step: 528/459, loss: 0.4753657579421997 2023-01-22 15:22:45.608054: step: 530/459, loss: 1.3615155220031738 2023-01-22 15:22:46.202378: step: 532/459, loss: 0.757366955280304 2023-01-22 15:22:46.876517: step: 534/459, loss: 0.6775677800178528 2023-01-22 15:22:47.523033: step: 536/459, loss: 0.22803322970867157 2023-01-22 15:22:48.068857: step: 538/459, loss: 0.11239159852266312 2023-01-22 15:22:48.716149: step: 540/459, loss: 0.8075510263442993 2023-01-22 15:22:49.466665: step: 542/459, loss: 0.5871496796607971 2023-01-22 15:22:50.151424: step: 544/459, loss: 0.37484773993492126 2023-01-22 15:22:50.741535: step: 546/459, loss: 0.20009730756282806 2023-01-22 15:22:51.358179: step: 548/459, loss: 0.30995574593544006 2023-01-22 15:22:52.051549: step: 550/459, loss: 0.4559810161590576 2023-01-22 15:22:52.697609: step: 552/459, loss: 2.1565117835998535 2023-01-22 15:22:53.334764: step: 554/459, loss: 0.42791807651519775 2023-01-22 15:22:54.027912: step: 556/459, loss: 0.37778401374816895 2023-01-22 15:22:54.654732: step: 558/459, loss: 0.8193542957305908 2023-01-22 15:22:55.250495: step: 560/459, loss: 0.6679403185844421 2023-01-22 15:22:55.961013: step: 562/459, loss: 0.29980748891830444 2023-01-22 15:22:56.639178: step: 564/459, loss: 0.1963479369878769 2023-01-22 15:22:57.192179: step: 566/459, loss: 0.3296442925930023 2023-01-22 15:22:57.842323: step: 568/459, loss: 0.43836942315101624 2023-01-22 15:22:58.486383: step: 570/459, loss: 1.1257067918777466 2023-01-22 15:22:59.124075: step: 572/459, loss: 0.24765148758888245 2023-01-22 15:22:59.782938: step: 574/459, loss: 0.36402779817581177 2023-01-22 15:23:00.449000: step: 576/459, loss: 0.2518664002418518 2023-01-22 15:23:01.119602: step: 578/459, loss: 0.48960429430007935 2023-01-22 15:23:01.723004: step: 580/459, loss: 0.6344400644302368 2023-01-22 15:23:02.330761: step: 582/459, loss: 2.6753625869750977 2023-01-22 15:23:02.977808: step: 584/459, loss: 0.27188727259635925 2023-01-22 15:23:03.622568: step: 586/459, loss: 0.3934114873409271 2023-01-22 15:23:04.257667: step: 588/459, loss: 0.17045211791992188 2023-01-22 15:23:04.891500: step: 590/459, loss: 0.5294079184532166 2023-01-22 15:23:05.518929: step: 592/459, loss: 0.5383955836296082 2023-01-22 15:23:06.151034: step: 594/459, loss: 0.27244889736175537 2023-01-22 15:23:06.763942: step: 596/459, loss: 0.6372602581977844 2023-01-22 15:23:07.467025: step: 598/459, loss: 0.4068149924278259 2023-01-22 15:23:08.113785: step: 600/459, loss: 0.2604975998401642 2023-01-22 15:23:08.722109: step: 602/459, loss: 0.965455949306488 2023-01-22 15:23:09.354290: step: 604/459, loss: 0.5294778347015381 2023-01-22 15:23:09.940056: step: 606/459, loss: 0.16985148191452026 2023-01-22 15:23:10.554951: step: 608/459, loss: 1.6558798551559448 2023-01-22 15:23:11.194163: step: 610/459, loss: 0.7324720621109009 2023-01-22 15:23:11.792714: step: 612/459, loss: 0.4809347689151764 2023-01-22 15:23:12.371101: step: 614/459, loss: 0.2047688215970993 2023-01-22 15:23:13.059393: step: 616/459, loss: 0.3570200502872467 2023-01-22 15:23:13.750610: step: 618/459, loss: 0.4054410457611084 2023-01-22 15:23:14.427185: step: 620/459, loss: 0.9139653444290161 2023-01-22 15:23:15.030276: step: 622/459, loss: 0.792643129825592 2023-01-22 15:23:15.619801: step: 624/459, loss: 1.1399147510528564 2023-01-22 15:23:16.240233: step: 626/459, loss: 1.1627644300460815 2023-01-22 15:23:16.910757: step: 628/459, loss: 12.559342384338379 2023-01-22 15:23:17.475738: step: 630/459, loss: 0.1342494785785675 2023-01-22 15:23:18.073541: step: 632/459, loss: 0.46846678853034973 2023-01-22 15:23:18.664533: step: 634/459, loss: 0.4093381464481354 2023-01-22 15:23:19.248453: step: 636/459, loss: 0.8124847412109375 2023-01-22 15:23:19.892013: step: 638/459, loss: 0.22500362992286682 2023-01-22 15:23:20.470435: step: 640/459, loss: 0.33325597643852234 2023-01-22 15:23:21.074338: step: 642/459, loss: 0.4989381730556488 2023-01-22 15:23:21.744932: step: 644/459, loss: 0.5311490297317505 2023-01-22 15:23:22.401865: step: 646/459, loss: 0.38586705923080444 2023-01-22 15:23:23.025695: step: 648/459, loss: 1.1546471118927002 2023-01-22 15:23:23.668422: step: 650/459, loss: 0.3500148057937622 2023-01-22 15:23:24.281877: step: 652/459, loss: 0.1526736468076706 2023-01-22 15:23:24.969078: step: 654/459, loss: 0.40192320942878723 2023-01-22 15:23:25.558752: step: 656/459, loss: 0.16635189950466156 2023-01-22 15:23:26.150224: step: 658/459, loss: 0.4386045038700104 2023-01-22 15:23:26.752404: step: 660/459, loss: 0.45597073435783386 2023-01-22 15:23:27.418624: step: 662/459, loss: 0.4244166612625122 2023-01-22 15:23:28.013717: step: 664/459, loss: 0.9576419591903687 2023-01-22 15:23:28.595725: step: 666/459, loss: 0.24198831617832184 2023-01-22 15:23:29.211816: step: 668/459, loss: 0.7303219437599182 2023-01-22 15:23:29.887493: step: 670/459, loss: 1.0919196605682373 2023-01-22 15:23:30.531599: step: 672/459, loss: 0.3131244480609894 2023-01-22 15:23:31.153196: step: 674/459, loss: 0.4546944499015808 2023-01-22 15:23:31.775235: step: 676/459, loss: 0.23884332180023193 2023-01-22 15:23:32.363740: step: 678/459, loss: 0.3161064684391022 2023-01-22 15:23:33.054207: step: 680/459, loss: 0.8141167759895325 2023-01-22 15:23:33.675737: step: 682/459, loss: 0.3058764338493347 2023-01-22 15:23:34.274983: step: 684/459, loss: 0.5554999113082886 2023-01-22 15:23:34.945700: step: 686/459, loss: 0.3193308413028717 2023-01-22 15:23:35.607669: step: 688/459, loss: 0.6244921088218689 2023-01-22 15:23:36.209114: step: 690/459, loss: 0.291228324174881 2023-01-22 15:23:36.790390: step: 692/459, loss: 0.5045892000198364 2023-01-22 15:23:37.402342: step: 694/459, loss: 1.011904001235962 2023-01-22 15:23:38.077730: step: 696/459, loss: 0.34571516513824463 2023-01-22 15:23:38.638187: step: 698/459, loss: 0.18966272473335266 2023-01-22 15:23:39.277265: step: 700/459, loss: 0.5362945199012756 2023-01-22 15:23:39.894180: step: 702/459, loss: 0.14980879426002502 2023-01-22 15:23:40.577280: step: 704/459, loss: 0.7801526784896851 2023-01-22 15:23:41.165919: step: 706/459, loss: 0.1204671561717987 2023-01-22 15:23:41.812236: step: 708/459, loss: 0.6770131587982178 2023-01-22 15:23:42.439677: step: 710/459, loss: 0.1625037044286728 2023-01-22 15:23:43.049338: step: 712/459, loss: 0.48841261863708496 2023-01-22 15:23:43.577701: step: 714/459, loss: 0.17331723868846893 2023-01-22 15:23:44.231992: step: 716/459, loss: 0.19478115439414978 2023-01-22 15:23:44.852120: step: 718/459, loss: 0.48953792452812195 2023-01-22 15:23:45.489349: step: 720/459, loss: 0.6060608625411987 2023-01-22 15:23:46.099219: step: 722/459, loss: 0.35050490498542786 2023-01-22 15:23:46.726849: step: 724/459, loss: 0.20511215925216675 2023-01-22 15:23:47.336900: step: 726/459, loss: 0.7968136072158813 2023-01-22 15:23:48.013030: step: 728/459, loss: 0.1437624990940094 2023-01-22 15:23:48.604016: step: 730/459, loss: 0.3404155671596527 2023-01-22 15:23:49.190814: step: 732/459, loss: 6.439411163330078 2023-01-22 15:23:49.845445: step: 734/459, loss: 0.4630647301673889 2023-01-22 15:23:50.525113: step: 736/459, loss: 0.17782387137413025 2023-01-22 15:23:51.206029: step: 738/459, loss: 0.27658629417419434 2023-01-22 15:23:51.878732: step: 740/459, loss: 1.1935728788375854 2023-01-22 15:23:52.507459: step: 742/459, loss: 0.618191659450531 2023-01-22 15:23:53.098689: step: 744/459, loss: 0.6035469770431519 2023-01-22 15:23:53.713048: step: 746/459, loss: 0.26997748017311096 2023-01-22 15:23:54.464840: step: 748/459, loss: 0.3550719618797302 2023-01-22 15:23:55.106008: step: 750/459, loss: 0.13607481122016907 2023-01-22 15:23:55.755732: step: 752/459, loss: 0.29278433322906494 2023-01-22 15:23:56.364858: step: 754/459, loss: 0.46324247121810913 2023-01-22 15:23:56.975363: step: 756/459, loss: 0.7986186742782593 2023-01-22 15:23:57.693854: step: 758/459, loss: 0.12370672821998596 2023-01-22 15:23:58.303255: step: 760/459, loss: 0.42831987142562866 2023-01-22 15:23:58.932357: step: 762/459, loss: 0.2456105500459671 2023-01-22 15:23:59.598657: step: 764/459, loss: 0.1939760446548462 2023-01-22 15:24:00.205985: step: 766/459, loss: 0.42399564385414124 2023-01-22 15:24:00.809325: step: 768/459, loss: 0.7310754656791687 2023-01-22 15:24:01.446150: step: 770/459, loss: 0.5203260183334351 2023-01-22 15:24:02.154401: step: 772/459, loss: 0.8540104031562805 2023-01-22 15:24:02.804608: step: 774/459, loss: 0.4690095782279968 2023-01-22 15:24:03.446895: step: 776/459, loss: 1.6022462844848633 2023-01-22 15:24:04.136274: step: 778/459, loss: 0.3179781436920166 2023-01-22 15:24:04.773471: step: 780/459, loss: 0.3922058045864105 2023-01-22 15:24:05.389558: step: 782/459, loss: 0.23432619869709015 2023-01-22 15:24:05.988348: step: 784/459, loss: 0.26452234387397766 2023-01-22 15:24:06.568925: step: 786/459, loss: 0.9372262358665466 2023-01-22 15:24:07.174450: step: 788/459, loss: 0.3618421256542206 2023-01-22 15:24:07.951960: step: 790/459, loss: 0.19453710317611694 2023-01-22 15:24:08.569678: step: 792/459, loss: 0.41731882095336914 2023-01-22 15:24:09.121056: step: 794/459, loss: 0.15472769737243652 2023-01-22 15:24:09.687800: step: 796/459, loss: 0.5213311314582825 2023-01-22 15:24:10.323360: step: 798/459, loss: 1.3410916328430176 2023-01-22 15:24:10.946996: step: 800/459, loss: 0.34266647696495056 2023-01-22 15:24:11.642014: step: 802/459, loss: 0.3003466725349426 2023-01-22 15:24:12.219370: step: 804/459, loss: 0.36678752303123474 2023-01-22 15:24:12.776094: step: 806/459, loss: 0.7016041874885559 2023-01-22 15:24:13.400198: step: 808/459, loss: 0.31062570214271545 2023-01-22 15:24:14.040227: step: 810/459, loss: 1.006265640258789 2023-01-22 15:24:14.605265: step: 812/459, loss: 0.24781201779842377 2023-01-22 15:24:15.216141: step: 814/459, loss: 0.4160636067390442 2023-01-22 15:24:15.779604: step: 816/459, loss: 0.3602263927459717 2023-01-22 15:24:16.402506: step: 818/459, loss: 0.5913470983505249 2023-01-22 15:24:16.986710: step: 820/459, loss: 0.25373539328575134 2023-01-22 15:24:17.543034: step: 822/459, loss: 0.09038509428501129 2023-01-22 15:24:18.199599: step: 824/459, loss: 1.010368824005127 2023-01-22 15:24:18.786114: step: 826/459, loss: 0.1231628879904747 2023-01-22 15:24:19.440546: step: 828/459, loss: 0.183567613363266 2023-01-22 15:24:20.049861: step: 830/459, loss: 0.9728459119796753 2023-01-22 15:24:20.692130: step: 832/459, loss: 0.1319602131843567 2023-01-22 15:24:21.361403: step: 834/459, loss: 0.37020841240882874 2023-01-22 15:24:22.029411: step: 836/459, loss: 0.7530136108398438 2023-01-22 15:24:22.621602: step: 838/459, loss: 0.34448984265327454 2023-01-22 15:24:23.315410: step: 840/459, loss: 0.9521815180778503 2023-01-22 15:24:23.908993: step: 842/459, loss: 0.3878316283226013 2023-01-22 15:24:24.484077: step: 844/459, loss: 0.3897787630558014 2023-01-22 15:24:25.084059: step: 846/459, loss: 0.45259132981300354 2023-01-22 15:24:25.742890: step: 848/459, loss: 0.6108866930007935 2023-01-22 15:24:26.386036: step: 850/459, loss: 6.880231857299805 2023-01-22 15:24:26.983558: step: 852/459, loss: 0.2584288716316223 2023-01-22 15:24:27.624862: step: 854/459, loss: 0.46530765295028687 2023-01-22 15:24:28.384683: step: 856/459, loss: 0.6598843336105347 2023-01-22 15:24:29.029364: step: 858/459, loss: 0.7568182945251465 2023-01-22 15:24:29.654476: step: 860/459, loss: 0.4232601523399353 2023-01-22 15:24:30.286253: step: 862/459, loss: 0.636298656463623 2023-01-22 15:24:30.927590: step: 864/459, loss: 0.29865819215774536 2023-01-22 15:24:31.634266: step: 866/459, loss: 0.6559057235717773 2023-01-22 15:24:32.286923: step: 868/459, loss: 2.241851329803467 2023-01-22 15:24:32.887986: step: 870/459, loss: 1.258194088935852 2023-01-22 15:24:33.540969: step: 872/459, loss: 0.1537170708179474 2023-01-22 15:24:34.101265: step: 874/459, loss: 0.5536041259765625 2023-01-22 15:24:34.709910: step: 876/459, loss: 0.327373206615448 2023-01-22 15:24:35.320737: step: 878/459, loss: 0.19122497737407684 2023-01-22 15:24:35.927269: step: 880/459, loss: 0.5117901563644409 2023-01-22 15:24:36.547877: step: 882/459, loss: 0.42642468214035034 2023-01-22 15:24:37.177836: step: 884/459, loss: 0.36761629581451416 2023-01-22 15:24:37.762914: step: 886/459, loss: 0.07566090673208237 2023-01-22 15:24:38.356076: step: 888/459, loss: 0.47665077447891235 2023-01-22 15:24:38.995446: step: 890/459, loss: 0.5525952577590942 2023-01-22 15:24:39.576455: step: 892/459, loss: 0.3930504620075226 2023-01-22 15:24:40.185339: step: 894/459, loss: 0.26422473788261414 2023-01-22 15:24:40.882366: step: 896/459, loss: 1.182375192642212 2023-01-22 15:24:41.525973: step: 898/459, loss: 0.2573138177394867 2023-01-22 15:24:42.171849: step: 900/459, loss: 0.26646214723587036 2023-01-22 15:24:42.777042: step: 902/459, loss: 0.249113067984581 2023-01-22 15:24:43.445740: step: 904/459, loss: 0.5319468379020691 2023-01-22 15:24:44.086000: step: 906/459, loss: 0.1835823953151703 2023-01-22 15:24:44.723117: step: 908/459, loss: 0.5573098063468933 2023-01-22 15:24:45.379720: step: 910/459, loss: 0.23924750089645386 2023-01-22 15:24:46.056392: step: 912/459, loss: 0.2827318608760834 2023-01-22 15:24:46.705890: step: 914/459, loss: 0.6728211045265198 2023-01-22 15:24:47.398295: step: 916/459, loss: 0.6610950231552124 2023-01-22 15:24:48.078791: step: 918/459, loss: 0.431226521730423 2023-01-22 15:24:48.596892: step: 920/459, loss: 0.023213671520352364 ================================================== Loss: 0.594 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28385255427447026, 'r': 0.3237104081953637, 'f1': 0.3024740870903486}, 'combined': 0.22287564311920424, 'epoch': 7} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3241998926349282, 'r': 0.29657088999876935, 'f1': 0.30977054135288123}, 'combined': 0.19825314646584397, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2797278904926072, 'r': 0.3338687725234344, 'f1': 0.30440976318313134}, 'combined': 0.22430193076651783, 'epoch': 7} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3271284749925289, 'r': 0.29714664561481074, 'f1': 0.31141759721114737}, 'combined': 0.19930726221513428, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2887313338741276, 'r': 0.320508216919098, 'f1': 0.30379106172008025}, 'combined': 0.22384604547795386, 'epoch': 7} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.33070809808446827, 'r': 0.30633379786168213, 'f1': 0.3180546470004617}, 'combined': 0.2280391808682556, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19270833333333331, 'r': 0.35238095238095235, 'f1': 0.24915824915824908}, 'combined': 0.1661054994388327, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22727272727272727, 'r': 0.43478260869565216, 'f1': 0.2985074626865672}, 'combined': 0.1492537313432836, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24, 'r': 0.20689655172413793, 'f1': 0.22222222222222224}, 'combined': 0.14814814814814814, 'epoch': 7} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27463650173611115, 'r': 0.29960345643939396, 'f1': 0.2865772192028986}, 'combined': 0.21116216151792525, 'epoch': 4} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3275304334361018, 'r': 0.25382122518550904, 'f1': 0.28600305741761856}, 'combined': 0.18304195674727586, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2387005649717514, 'r': 0.40238095238095234, 'f1': 0.29964539007092195}, 'combined': 0.1997635933806146, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2797278904926072, 'r': 0.3338687725234344, 'f1': 0.30440976318313134}, 'combined': 0.22430193076651783, 'epoch': 7} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3271284749925289, 'r': 0.29714664561481074, 'f1': 0.31141759721114737}, 'combined': 0.19930726221513428, 'epoch': 7} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22727272727272727, 'r': 0.43478260869565216, 'f1': 0.2985074626865672}, 'combined': 0.1492537313432836, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2887313338741276, 'r': 0.320508216919098, 'f1': 0.30379106172008025}, 'combined': 0.22384604547795386, 'epoch': 7} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.33070809808446827, 'r': 0.30633379786168213, 'f1': 0.3180546470004617}, 'combined': 0.2280391808682556, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24, 'r': 0.20689655172413793, 'f1': 0.22222222222222224}, 'combined': 0.14814814814814814, 'epoch': 7} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 15:27:48.205324: step: 2/459, loss: 0.188322514295578 2023-01-22 15:27:48.910102: step: 4/459, loss: 0.566713273525238 2023-01-22 15:27:49.583624: step: 6/459, loss: 0.2897389531135559 2023-01-22 15:27:50.238081: step: 8/459, loss: 0.2144949585199356 2023-01-22 15:27:50.830643: step: 10/459, loss: 0.891121506690979 2023-01-22 15:27:51.429341: step: 12/459, loss: 0.3861578404903412 2023-01-22 15:27:52.081330: step: 14/459, loss: 0.8423375487327576 2023-01-22 15:27:52.684040: step: 16/459, loss: 0.15320971608161926 2023-01-22 15:27:53.263414: step: 18/459, loss: 0.10784182697534561 2023-01-22 15:27:53.875561: step: 20/459, loss: 0.3913766145706177 2023-01-22 15:27:54.466403: step: 22/459, loss: 0.6600277423858643 2023-01-22 15:27:55.070626: step: 24/459, loss: 0.24278207123279572 2023-01-22 15:27:55.723875: step: 26/459, loss: 0.31654420495033264 2023-01-22 15:27:56.328179: step: 28/459, loss: 0.2159385234117508 2023-01-22 15:27:56.893310: step: 30/459, loss: 0.5570846796035767 2023-01-22 15:27:57.558054: step: 32/459, loss: 0.33487701416015625 2023-01-22 15:27:58.212508: step: 34/459, loss: 0.7813060283660889 2023-01-22 15:27:58.864399: step: 36/459, loss: 0.3023883104324341 2023-01-22 15:27:59.477560: step: 38/459, loss: 0.8444631099700928 2023-01-22 15:28:00.164005: step: 40/459, loss: 0.6825090050697327 2023-01-22 15:28:00.836402: step: 42/459, loss: 0.14725475013256073 2023-01-22 15:28:01.469491: step: 44/459, loss: 0.4373602271080017 2023-01-22 15:28:02.070276: step: 46/459, loss: 0.17005856335163116 2023-01-22 15:28:02.653921: step: 48/459, loss: 0.1739121973514557 2023-01-22 15:28:03.310134: step: 50/459, loss: 0.6942759156227112 2023-01-22 15:28:03.938788: step: 52/459, loss: 0.44170913100242615 2023-01-22 15:28:04.593204: step: 54/459, loss: 0.7348113059997559 2023-01-22 15:28:05.269462: step: 56/459, loss: 0.632363498210907 2023-01-22 15:28:05.941727: step: 58/459, loss: 0.20922087132930756 2023-01-22 15:28:06.626368: step: 60/459, loss: 0.1338041126728058 2023-01-22 15:28:07.264774: step: 62/459, loss: 0.44583550095558167 2023-01-22 15:28:07.909724: step: 64/459, loss: 0.19036439061164856 2023-01-22 15:28:08.566513: step: 66/459, loss: 0.1880076676607132 2023-01-22 15:28:09.319795: step: 68/459, loss: 0.3869456946849823 2023-01-22 15:28:09.934616: step: 70/459, loss: 0.28696808218955994 2023-01-22 15:28:10.584605: step: 72/459, loss: 0.12828639149665833 2023-01-22 15:28:11.204592: step: 74/459, loss: 2.220947027206421 2023-01-22 15:28:11.824134: step: 76/459, loss: 0.2254684418439865 2023-01-22 15:28:12.464949: step: 78/459, loss: 0.12975424528121948 2023-01-22 15:28:13.117607: step: 80/459, loss: 0.20650270581245422 2023-01-22 15:28:13.737689: step: 82/459, loss: 0.17772608995437622 2023-01-22 15:28:14.345764: step: 84/459, loss: 0.14210060238838196 2023-01-22 15:28:14.978230: step: 86/459, loss: 0.23148703575134277 2023-01-22 15:28:15.572712: step: 88/459, loss: 0.3287460207939148 2023-01-22 15:28:16.202189: step: 90/459, loss: 0.8860474824905396 2023-01-22 15:28:16.820001: step: 92/459, loss: 0.12391741573810577 2023-01-22 15:28:17.506491: step: 94/459, loss: 0.397208034992218 2023-01-22 15:28:18.123502: step: 96/459, loss: 0.11715436726808548 2023-01-22 15:28:18.769005: step: 98/459, loss: 0.249114528298378 2023-01-22 15:28:19.382926: step: 100/459, loss: 0.11831999570131302 2023-01-22 15:28:20.070032: step: 102/459, loss: 10.548068046569824 2023-01-22 15:28:20.650430: step: 104/459, loss: 0.3585111200809479 2023-01-22 15:28:21.272995: step: 106/459, loss: 0.210575670003891 2023-01-22 15:28:21.880192: step: 108/459, loss: 0.221779003739357 2023-01-22 15:28:22.589308: step: 110/459, loss: 0.1655876189470291 2023-01-22 15:28:23.217409: step: 112/459, loss: 0.4341915547847748 2023-01-22 15:28:23.857987: step: 114/459, loss: 1.2490744590759277 2023-01-22 15:28:24.487920: step: 116/459, loss: 0.17085444927215576 2023-01-22 15:28:25.132684: step: 118/459, loss: 0.3724310100078583 2023-01-22 15:28:25.762304: step: 120/459, loss: 0.7414011359214783 2023-01-22 15:28:26.381317: step: 122/459, loss: 0.3654281497001648 2023-01-22 15:28:26.956877: step: 124/459, loss: 0.12356110662221909 2023-01-22 15:28:27.560329: step: 126/459, loss: 0.117648646235466 2023-01-22 15:28:28.238414: step: 128/459, loss: 0.23111122846603394 2023-01-22 15:28:28.912554: step: 130/459, loss: 0.6038681864738464 2023-01-22 15:28:29.566055: step: 132/459, loss: 0.12098684906959534 2023-01-22 15:28:30.177075: step: 134/459, loss: 2.458033323287964 2023-01-22 15:28:30.778437: step: 136/459, loss: 0.29528871178627014 2023-01-22 15:28:31.386257: step: 138/459, loss: 0.37970635294914246 2023-01-22 15:28:32.003057: step: 140/459, loss: 0.1894586980342865 2023-01-22 15:28:32.580936: step: 142/459, loss: 0.2866278290748596 2023-01-22 15:28:33.150287: step: 144/459, loss: 0.4587806463241577 2023-01-22 15:28:33.775679: step: 146/459, loss: 0.10783271491527557 2023-01-22 15:28:34.387174: step: 148/459, loss: 0.2950993478298187 2023-01-22 15:28:35.002601: step: 150/459, loss: 0.14805185794830322 2023-01-22 15:28:35.664633: step: 152/459, loss: 0.573534369468689 2023-01-22 15:28:36.203692: step: 154/459, loss: 0.29137519001960754 2023-01-22 15:28:36.837212: step: 156/459, loss: 0.3016453683376312 2023-01-22 15:28:37.383753: step: 158/459, loss: 0.8812946081161499 2023-01-22 15:28:37.979167: step: 160/459, loss: 0.4252793788909912 2023-01-22 15:28:38.592713: step: 162/459, loss: 0.49666717648506165 2023-01-22 15:28:39.222984: step: 164/459, loss: 0.10668210685253143 2023-01-22 15:28:39.825672: step: 166/459, loss: 0.41282254457473755 2023-01-22 15:28:40.443267: step: 168/459, loss: 0.1148475706577301 2023-01-22 15:28:41.106588: step: 170/459, loss: 0.371509850025177 2023-01-22 15:28:41.731256: step: 172/459, loss: 0.25096115469932556 2023-01-22 15:28:42.355907: step: 174/459, loss: 0.16569428145885468 2023-01-22 15:28:42.920474: step: 176/459, loss: 0.8059505224227905 2023-01-22 15:28:43.560848: step: 178/459, loss: 0.48943233489990234 2023-01-22 15:28:44.262683: step: 180/459, loss: 0.5882872343063354 2023-01-22 15:28:44.819622: step: 182/459, loss: 0.3343072831630707 2023-01-22 15:28:45.446698: step: 184/459, loss: 0.0713796317577362 2023-01-22 15:28:46.087383: step: 186/459, loss: 0.1790579855442047 2023-01-22 15:28:46.707987: step: 188/459, loss: 0.7927854657173157 2023-01-22 15:28:47.353036: step: 190/459, loss: 0.26817235350608826 2023-01-22 15:28:47.999114: step: 192/459, loss: 0.8170387744903564 2023-01-22 15:28:48.606170: step: 194/459, loss: 0.19418227672576904 2023-01-22 15:28:49.237662: step: 196/459, loss: 2.096613883972168 2023-01-22 15:28:49.862474: step: 198/459, loss: 0.6270467042922974 2023-01-22 15:28:50.484810: step: 200/459, loss: 0.13937848806381226 2023-01-22 15:28:51.092540: step: 202/459, loss: 0.18653851747512817 2023-01-22 15:28:51.680544: step: 204/459, loss: 0.4074476659297943 2023-01-22 15:28:52.296426: step: 206/459, loss: 0.1848572939634323 2023-01-22 15:28:52.884481: step: 208/459, loss: 0.40996894240379333 2023-01-22 15:28:53.489744: step: 210/459, loss: 0.3814491033554077 2023-01-22 15:28:54.007756: step: 212/459, loss: 0.30085206031799316 2023-01-22 15:28:54.625348: step: 214/459, loss: 0.4539579749107361 2023-01-22 15:28:55.275168: step: 216/459, loss: 0.5747705698013306 2023-01-22 15:28:55.830252: step: 218/459, loss: 0.7887055277824402 2023-01-22 15:28:56.389349: step: 220/459, loss: 0.7843065857887268 2023-01-22 15:28:57.018092: step: 222/459, loss: 0.8246763348579407 2023-01-22 15:28:57.659260: step: 224/459, loss: 0.1510680466890335 2023-01-22 15:28:58.251729: step: 226/459, loss: 0.23453053832054138 2023-01-22 15:28:58.889247: step: 228/459, loss: 0.2435365617275238 2023-01-22 15:28:59.649107: step: 230/459, loss: 0.2899942696094513 2023-01-22 15:29:00.324414: step: 232/459, loss: 0.1543651670217514 2023-01-22 15:29:00.905799: step: 234/459, loss: 0.25322261452674866 2023-01-22 15:29:01.480059: step: 236/459, loss: 1.070400595664978 2023-01-22 15:29:02.131700: step: 238/459, loss: 0.4839406907558441 2023-01-22 15:29:02.709209: step: 240/459, loss: 0.19946300983428955 2023-01-22 15:29:03.288440: step: 242/459, loss: 0.12659472227096558 2023-01-22 15:29:03.895255: step: 244/459, loss: 0.16162505745887756 2023-01-22 15:29:04.575343: step: 246/459, loss: 0.1222245916724205 2023-01-22 15:29:05.221990: step: 248/459, loss: 0.12973488867282867 2023-01-22 15:29:05.887363: step: 250/459, loss: 0.5556093454360962 2023-01-22 15:29:06.487106: step: 252/459, loss: 0.647042453289032 2023-01-22 15:29:07.110202: step: 254/459, loss: 0.28638756275177 2023-01-22 15:29:07.730850: step: 256/459, loss: 0.6684116125106812 2023-01-22 15:29:08.352500: step: 258/459, loss: 0.2972085177898407 2023-01-22 15:29:09.008850: step: 260/459, loss: 0.41457226872444153 2023-01-22 15:29:09.625645: step: 262/459, loss: 0.2239067256450653 2023-01-22 15:29:10.212312: step: 264/459, loss: 0.20483948290348053 2023-01-22 15:29:10.871255: step: 266/459, loss: 0.44608715176582336 2023-01-22 15:29:11.490141: step: 268/459, loss: 0.30109190940856934 2023-01-22 15:29:12.142125: step: 270/459, loss: 0.7713238596916199 2023-01-22 15:29:12.792663: step: 272/459, loss: 0.17977546155452728 2023-01-22 15:29:13.464999: step: 274/459, loss: 0.6260220408439636 2023-01-22 15:29:14.099402: step: 276/459, loss: 0.27167999744415283 2023-01-22 15:29:14.692105: step: 278/459, loss: 0.7559990882873535 2023-01-22 15:29:15.320551: step: 280/459, loss: 0.2989843785762787 2023-01-22 15:29:15.882738: step: 282/459, loss: 0.5895629525184631 2023-01-22 15:29:16.446479: step: 284/459, loss: 0.1766647845506668 2023-01-22 15:29:17.170879: step: 286/459, loss: 0.28436997532844543 2023-01-22 15:29:17.814989: step: 288/459, loss: 0.19074276089668274 2023-01-22 15:29:18.403884: step: 290/459, loss: 0.3094840347766876 2023-01-22 15:29:19.033113: step: 292/459, loss: 0.22406524419784546 2023-01-22 15:29:19.669804: step: 294/459, loss: 0.732819676399231 2023-01-22 15:29:20.317163: step: 296/459, loss: 0.4129226505756378 2023-01-22 15:29:20.921135: step: 298/459, loss: 0.12427052855491638 2023-01-22 15:29:21.512162: step: 300/459, loss: 0.17609819769859314 2023-01-22 15:29:22.160855: step: 302/459, loss: 0.6431460380554199 2023-01-22 15:29:22.773366: step: 304/459, loss: 0.4383181631565094 2023-01-22 15:29:23.399196: step: 306/459, loss: 0.3433317244052887 2023-01-22 15:29:24.009134: step: 308/459, loss: 0.1935356706380844 2023-01-22 15:29:24.605423: step: 310/459, loss: 0.20719783008098602 2023-01-22 15:29:25.188042: step: 312/459, loss: 0.6190075278282166 2023-01-22 15:29:25.792630: step: 314/459, loss: 0.1497437059879303 2023-01-22 15:29:26.480084: step: 316/459, loss: 0.6091998815536499 2023-01-22 15:29:27.080595: step: 318/459, loss: 0.6010189056396484 2023-01-22 15:29:27.732051: step: 320/459, loss: 0.4953651428222656 2023-01-22 15:29:28.326491: step: 322/459, loss: 0.18967023491859436 2023-01-22 15:29:28.955426: step: 324/459, loss: 0.21491819620132446 2023-01-22 15:29:29.598951: step: 326/459, loss: 0.20807892084121704 2023-01-22 15:29:30.241314: step: 328/459, loss: 0.14792375266551971 2023-01-22 15:29:30.875320: step: 330/459, loss: 0.5430980920791626 2023-01-22 15:29:31.569955: step: 332/459, loss: 0.3386732339859009 2023-01-22 15:29:32.229336: step: 334/459, loss: 5.339176654815674 2023-01-22 15:29:32.887558: step: 336/459, loss: 0.40461301803588867 2023-01-22 15:29:33.529752: step: 338/459, loss: 0.11788647621870041 2023-01-22 15:29:34.081990: step: 340/459, loss: 0.14100386202335358 2023-01-22 15:29:34.767601: step: 342/459, loss: 0.2513488829135895 2023-01-22 15:29:35.355966: step: 344/459, loss: 0.07227914780378342 2023-01-22 15:29:35.917669: step: 346/459, loss: 0.25305667519569397 2023-01-22 15:29:36.502550: step: 348/459, loss: 0.16180197894573212 2023-01-22 15:29:37.148052: step: 350/459, loss: 0.19300569593906403 2023-01-22 15:29:37.659852: step: 352/459, loss: 0.13236241042613983 2023-01-22 15:29:38.236524: step: 354/459, loss: 0.9848792552947998 2023-01-22 15:29:38.847222: step: 356/459, loss: 0.11455225944519043 2023-01-22 15:29:39.466209: step: 358/459, loss: 0.2022639513015747 2023-01-22 15:29:40.123157: step: 360/459, loss: 0.4504850506782532 2023-01-22 15:29:40.766465: step: 362/459, loss: 0.30888721346855164 2023-01-22 15:29:41.436544: step: 364/459, loss: 0.04684288054704666 2023-01-22 15:29:42.028334: step: 366/459, loss: 0.5234158039093018 2023-01-22 15:29:42.704991: step: 368/459, loss: 0.19181719422340393 2023-01-22 15:29:43.342846: step: 370/459, loss: 0.29077428579330444 2023-01-22 15:29:43.971445: step: 372/459, loss: 0.3036850690841675 2023-01-22 15:29:44.581253: step: 374/459, loss: 0.08471488207578659 2023-01-22 15:29:45.221467: step: 376/459, loss: 0.2949294447898865 2023-01-22 15:29:45.852713: step: 378/459, loss: 0.32727470993995667 2023-01-22 15:29:46.500396: step: 380/459, loss: 1.294270634651184 2023-01-22 15:29:47.089744: step: 382/459, loss: 0.1927587240934372 2023-01-22 15:29:47.759257: step: 384/459, loss: 0.11558783799409866 2023-01-22 15:29:48.346346: step: 386/459, loss: 0.10492229461669922 2023-01-22 15:29:48.975043: step: 388/459, loss: 0.3004617691040039 2023-01-22 15:29:49.581623: step: 390/459, loss: 0.1418018937110901 2023-01-22 15:29:50.238246: step: 392/459, loss: 0.09187407791614532 2023-01-22 15:29:50.853804: step: 394/459, loss: 0.18068373203277588 2023-01-22 15:29:51.452777: step: 396/459, loss: 0.4411693215370178 2023-01-22 15:29:52.084004: step: 398/459, loss: 0.12040300667285919 2023-01-22 15:29:52.692137: step: 400/459, loss: 0.5597167611122131 2023-01-22 15:29:53.361893: step: 402/459, loss: 0.6759153008460999 2023-01-22 15:29:53.996821: step: 404/459, loss: 1.4973396062850952 2023-01-22 15:29:54.569785: step: 406/459, loss: 0.1453390121459961 2023-01-22 15:29:55.128535: step: 408/459, loss: 0.6925283074378967 2023-01-22 15:29:55.747403: step: 410/459, loss: 0.35022222995758057 2023-01-22 15:29:56.336500: step: 412/459, loss: 0.28106924891471863 2023-01-22 15:29:57.012481: step: 414/459, loss: 0.19977648556232452 2023-01-22 15:29:57.661313: step: 416/459, loss: 0.5973584651947021 2023-01-22 15:29:58.269061: step: 418/459, loss: 0.08671547472476959 2023-01-22 15:29:58.880243: step: 420/459, loss: 0.428000807762146 2023-01-22 15:29:59.527702: step: 422/459, loss: 0.9043363928794861 2023-01-22 15:30:00.254842: step: 424/459, loss: 0.18938048183918 2023-01-22 15:30:00.880879: step: 426/459, loss: 1.528273582458496 2023-01-22 15:30:01.475627: step: 428/459, loss: 0.7608939409255981 2023-01-22 15:30:02.170663: step: 430/459, loss: 0.3680419623851776 2023-01-22 15:30:02.783942: step: 432/459, loss: 0.14479117095470428 2023-01-22 15:30:03.383673: step: 434/459, loss: 0.5600517392158508 2023-01-22 15:30:04.086860: step: 436/459, loss: 0.1682647466659546 2023-01-22 15:30:04.725831: step: 438/459, loss: 0.6074236035346985 2023-01-22 15:30:05.296803: step: 440/459, loss: 0.22780029475688934 2023-01-22 15:30:05.972973: step: 442/459, loss: 0.17561082541942596 2023-01-22 15:30:06.640577: step: 444/459, loss: 0.34118831157684326 2023-01-22 15:30:07.243168: step: 446/459, loss: 0.16300679743289948 2023-01-22 15:30:07.871476: step: 448/459, loss: 0.11173156648874283 2023-01-22 15:30:08.518413: step: 450/459, loss: 2.114173412322998 2023-01-22 15:30:09.238824: step: 452/459, loss: 0.09089374542236328 2023-01-22 15:30:09.897983: step: 454/459, loss: 0.282769113779068 2023-01-22 15:30:10.506470: step: 456/459, loss: 0.22326168417930603 2023-01-22 15:30:11.205510: step: 458/459, loss: 0.4886505901813507 2023-01-22 15:30:11.788566: step: 460/459, loss: 0.47390255331993103 2023-01-22 15:30:12.412546: step: 462/459, loss: 0.1374041587114334 2023-01-22 15:30:13.046720: step: 464/459, loss: 0.3460673689842224 2023-01-22 15:30:13.674812: step: 466/459, loss: 0.5288684368133545 2023-01-22 15:30:14.346749: step: 468/459, loss: 0.3589972257614136 2023-01-22 15:30:14.967079: step: 470/459, loss: 0.5424367785453796 2023-01-22 15:30:15.597328: step: 472/459, loss: 0.6550979614257812 2023-01-22 15:30:16.212660: step: 474/459, loss: 0.3685828447341919 2023-01-22 15:30:16.823296: step: 476/459, loss: 0.08813030272722244 2023-01-22 15:30:17.450414: step: 478/459, loss: 0.2122722715139389 2023-01-22 15:30:18.066546: step: 480/459, loss: 0.5576691031455994 2023-01-22 15:30:18.709556: step: 482/459, loss: 0.37822961807250977 2023-01-22 15:30:19.331168: step: 484/459, loss: 0.2723803222179413 2023-01-22 15:30:20.021920: step: 486/459, loss: 0.3422851264476776 2023-01-22 15:30:20.648726: step: 488/459, loss: 0.2891812026500702 2023-01-22 15:30:21.264937: step: 490/459, loss: 0.1411810666322708 2023-01-22 15:30:21.903712: step: 492/459, loss: 0.2989750802516937 2023-01-22 15:30:22.551660: step: 494/459, loss: 0.42983540892601013 2023-01-22 15:30:23.161179: step: 496/459, loss: 0.3347719609737396 2023-01-22 15:30:23.779909: step: 498/459, loss: 0.17387105524539948 2023-01-22 15:30:24.414335: step: 500/459, loss: 0.14204372465610504 2023-01-22 15:30:25.025561: step: 502/459, loss: 0.38448867201805115 2023-01-22 15:30:25.655198: step: 504/459, loss: 0.905399739742279 2023-01-22 15:30:26.293388: step: 506/459, loss: 0.6367484331130981 2023-01-22 15:30:26.904847: step: 508/459, loss: 0.1882667988538742 2023-01-22 15:30:27.564247: step: 510/459, loss: 0.23280960321426392 2023-01-22 15:30:28.244953: step: 512/459, loss: 0.0781833827495575 2023-01-22 15:30:28.885592: step: 514/459, loss: 0.825528621673584 2023-01-22 15:30:29.550965: step: 516/459, loss: 0.19667130708694458 2023-01-22 15:30:30.205220: step: 518/459, loss: 1.0906049013137817 2023-01-22 15:30:30.835575: step: 520/459, loss: 0.18537911772727966 2023-01-22 15:30:31.431949: step: 522/459, loss: 0.31064483523368835 2023-01-22 15:30:32.078634: step: 524/459, loss: 0.9156175255775452 2023-01-22 15:30:32.697500: step: 526/459, loss: 0.23545849323272705 2023-01-22 15:30:33.370845: step: 528/459, loss: 0.36821630597114563 2023-01-22 15:30:33.987721: step: 530/459, loss: 0.11235227435827255 2023-01-22 15:30:34.618791: step: 532/459, loss: 0.479583740234375 2023-01-22 15:30:35.228253: step: 534/459, loss: 0.20146627724170685 2023-01-22 15:30:35.899670: step: 536/459, loss: 1.0095129013061523 2023-01-22 15:30:36.510721: step: 538/459, loss: 0.3831072747707367 2023-01-22 15:30:37.142318: step: 540/459, loss: 0.26299014687538147 2023-01-22 15:30:37.761466: step: 542/459, loss: 0.3719468414783478 2023-01-22 15:30:38.478539: step: 544/459, loss: 0.1776200532913208 2023-01-22 15:30:39.104382: step: 546/459, loss: 0.17972421646118164 2023-01-22 15:30:39.689633: step: 548/459, loss: 0.4248196482658386 2023-01-22 15:30:40.387149: step: 550/459, loss: 14.665335655212402 2023-01-22 15:30:41.011050: step: 552/459, loss: 0.20689326524734497 2023-01-22 15:30:41.627495: step: 554/459, loss: 0.15360090136528015 2023-01-22 15:30:42.272074: step: 556/459, loss: 0.12485209107398987 2023-01-22 15:30:42.877167: step: 558/459, loss: 0.28416433930397034 2023-01-22 15:30:43.502612: step: 560/459, loss: 0.4188927412033081 2023-01-22 15:30:44.171880: step: 562/459, loss: 0.7018170356750488 2023-01-22 15:30:44.759398: step: 564/459, loss: 0.37581712007522583 2023-01-22 15:30:45.370440: step: 566/459, loss: 0.3070726990699768 2023-01-22 15:30:46.087453: step: 568/459, loss: 0.493694931268692 2023-01-22 15:30:46.736712: step: 570/459, loss: 1.03826105594635 2023-01-22 15:30:47.367137: step: 572/459, loss: 0.257646769285202 2023-01-22 15:30:47.979801: step: 574/459, loss: 0.6388002634048462 2023-01-22 15:30:48.577374: step: 576/459, loss: 0.529765784740448 2023-01-22 15:30:49.162530: step: 578/459, loss: 0.26222091913223267 2023-01-22 15:30:49.815855: step: 580/459, loss: 0.2105180323123932 2023-01-22 15:30:50.451937: step: 582/459, loss: 0.3731021583080292 2023-01-22 15:30:51.087752: step: 584/459, loss: 0.11950968205928802 2023-01-22 15:30:51.663376: step: 586/459, loss: 0.5904467105865479 2023-01-22 15:30:52.318782: step: 588/459, loss: 0.7918018102645874 2023-01-22 15:30:52.874521: step: 590/459, loss: 0.4619719684123993 2023-01-22 15:30:53.520784: step: 592/459, loss: 0.8965120315551758 2023-01-22 15:30:54.180681: step: 594/459, loss: 2.431922435760498 2023-01-22 15:30:54.791853: step: 596/459, loss: 0.3734939694404602 2023-01-22 15:30:55.438072: step: 598/459, loss: 0.49112755060195923 2023-01-22 15:30:56.079859: step: 600/459, loss: 0.5999159812927246 2023-01-22 15:30:56.675174: step: 602/459, loss: 0.1420172154903412 2023-01-22 15:30:57.219571: step: 604/459, loss: 0.3335857689380646 2023-01-22 15:30:57.835455: step: 606/459, loss: 0.3729221820831299 2023-01-22 15:30:58.499585: step: 608/459, loss: 0.24586352705955505 2023-01-22 15:30:59.097788: step: 610/459, loss: 0.48684290051460266 2023-01-22 15:30:59.677647: step: 612/459, loss: 0.6771071553230286 2023-01-22 15:31:00.335216: step: 614/459, loss: 0.13480645418167114 2023-01-22 15:31:01.008575: step: 616/459, loss: 2.0089423656463623 2023-01-22 15:31:01.635823: step: 618/459, loss: 0.28868526220321655 2023-01-22 15:31:02.205878: step: 620/459, loss: 0.28561830520629883 2023-01-22 15:31:02.838883: step: 622/459, loss: 0.5032442808151245 2023-01-22 15:31:03.423121: step: 624/459, loss: 0.38965362310409546 2023-01-22 15:31:04.037732: step: 626/459, loss: 1.6013612747192383 2023-01-22 15:31:04.696112: step: 628/459, loss: 0.3183538019657135 2023-01-22 15:31:05.349126: step: 630/459, loss: 0.22380925714969635 2023-01-22 15:31:05.968487: step: 632/459, loss: 0.30871355533599854 2023-01-22 15:31:06.643701: step: 634/459, loss: 0.5093263983726501 2023-01-22 15:31:07.253007: step: 636/459, loss: 0.6628724336624146 2023-01-22 15:31:07.858972: step: 638/459, loss: 0.26897987723350525 2023-01-22 15:31:08.444264: step: 640/459, loss: 0.0851622000336647 2023-01-22 15:31:09.133113: step: 642/459, loss: 0.3306724727153778 2023-01-22 15:31:09.814287: step: 644/459, loss: 0.18673957884311676 2023-01-22 15:31:10.396235: step: 646/459, loss: 0.42140719294548035 2023-01-22 15:31:10.995135: step: 648/459, loss: 0.9977365136146545 2023-01-22 15:31:11.576464: step: 650/459, loss: 0.06416915357112885 2023-01-22 15:31:12.180325: step: 652/459, loss: 0.1789780557155609 2023-01-22 15:31:12.850531: step: 654/459, loss: 0.17052052915096283 2023-01-22 15:31:13.482314: step: 656/459, loss: 0.796863853931427 2023-01-22 15:31:14.087177: step: 658/459, loss: 0.711889922618866 2023-01-22 15:31:14.683097: step: 660/459, loss: 0.44478315114974976 2023-01-22 15:31:15.303279: step: 662/459, loss: 0.16583354771137238 2023-01-22 15:31:15.906284: step: 664/459, loss: 1.1746985912322998 2023-01-22 15:31:16.471993: step: 666/459, loss: 0.1611568033695221 2023-01-22 15:31:17.100875: step: 668/459, loss: 0.15111055970191956 2023-01-22 15:31:17.822675: step: 670/459, loss: 0.865103006362915 2023-01-22 15:31:18.542960: step: 672/459, loss: 0.20695500075817108 2023-01-22 15:31:19.151806: step: 674/459, loss: 0.4758073687553406 2023-01-22 15:31:19.782202: step: 676/459, loss: 0.21829137206077576 2023-01-22 15:31:20.397554: step: 678/459, loss: 0.3364788293838501 2023-01-22 15:31:21.057397: step: 680/459, loss: 0.2361581176519394 2023-01-22 15:31:21.686224: step: 682/459, loss: 0.22467179596424103 2023-01-22 15:31:22.397072: step: 684/459, loss: 0.21301540732383728 2023-01-22 15:31:23.035395: step: 686/459, loss: 0.657206654548645 2023-01-22 15:31:23.718393: step: 688/459, loss: 0.24894435703754425 2023-01-22 15:31:24.403149: step: 690/459, loss: 0.42386046051979065 2023-01-22 15:31:25.053751: step: 692/459, loss: 0.2483527660369873 2023-01-22 15:31:25.718772: step: 694/459, loss: 0.22542084753513336 2023-01-22 15:31:26.364553: step: 696/459, loss: 0.700757622718811 2023-01-22 15:31:26.907678: step: 698/459, loss: 0.2899307608604431 2023-01-22 15:31:27.603280: step: 700/459, loss: 0.30620256066322327 2023-01-22 15:31:28.248486: step: 702/459, loss: 0.2355465143918991 2023-01-22 15:31:28.844787: step: 704/459, loss: 1.0513681173324585 2023-01-22 15:31:29.472398: step: 706/459, loss: 0.9188523888587952 2023-01-22 15:31:30.090316: step: 708/459, loss: 0.45874524116516113 2023-01-22 15:31:30.681817: step: 710/459, loss: 0.33727818727493286 2023-01-22 15:31:31.273987: step: 712/459, loss: 0.2524116039276123 2023-01-22 15:31:31.954737: step: 714/459, loss: 0.18425889313220978 2023-01-22 15:31:32.612965: step: 716/459, loss: 0.20798148214817047 2023-01-22 15:31:33.209905: step: 718/459, loss: 0.45368579030036926 2023-01-22 15:31:33.754806: step: 720/459, loss: 0.27645403146743774 2023-01-22 15:31:34.503510: step: 722/459, loss: 1.6022977828979492 2023-01-22 15:31:35.136019: step: 724/459, loss: 1.613969326019287 2023-01-22 15:31:35.741930: step: 726/459, loss: 12.246381759643555 2023-01-22 15:31:36.408937: step: 728/459, loss: 0.5909008979797363 2023-01-22 15:31:37.027013: step: 730/459, loss: 0.4556034505367279 2023-01-22 15:31:37.652992: step: 732/459, loss: 0.7282405495643616 2023-01-22 15:31:38.237569: step: 734/459, loss: 0.1966792345046997 2023-01-22 15:31:38.885195: step: 736/459, loss: 0.26952633261680603 2023-01-22 15:31:39.485832: step: 738/459, loss: 0.5278282165527344 2023-01-22 15:31:40.112272: step: 740/459, loss: 0.19499891996383667 2023-01-22 15:31:40.707031: step: 742/459, loss: 0.402125746011734 2023-01-22 15:31:41.301686: step: 744/459, loss: 0.1943647712469101 2023-01-22 15:31:41.906053: step: 746/459, loss: 0.08575937896966934 2023-01-22 15:31:42.590716: step: 748/459, loss: 0.09664107114076614 2023-01-22 15:31:43.155544: step: 750/459, loss: 0.04265977069735527 2023-01-22 15:31:43.787672: step: 752/459, loss: 0.7664592266082764 2023-01-22 15:31:44.416123: step: 754/459, loss: 0.3756682872772217 2023-01-22 15:31:44.983915: step: 756/459, loss: 0.5506476163864136 2023-01-22 15:31:45.600544: step: 758/459, loss: 0.40378862619400024 2023-01-22 15:31:46.231773: step: 760/459, loss: 0.1255844533443451 2023-01-22 15:31:46.936104: step: 762/459, loss: 0.29168087244033813 2023-01-22 15:31:47.506188: step: 764/459, loss: 0.22652316093444824 2023-01-22 15:31:48.098747: step: 766/459, loss: 0.31535038352012634 2023-01-22 15:31:48.692290: step: 768/459, loss: 0.11398965865373611 2023-01-22 15:31:49.293218: step: 770/459, loss: 0.7630260586738586 2023-01-22 15:31:49.929718: step: 772/459, loss: 0.24662503600120544 2023-01-22 15:31:50.548864: step: 774/459, loss: 0.33580678701400757 2023-01-22 15:31:51.156463: step: 776/459, loss: 0.8018659949302673 2023-01-22 15:31:51.847064: step: 778/459, loss: 0.1609964668750763 2023-01-22 15:31:52.466848: step: 780/459, loss: 0.1514301300048828 2023-01-22 15:31:53.000733: step: 782/459, loss: 0.13548912107944489 2023-01-22 15:31:53.626255: step: 784/459, loss: 17.336647033691406 2023-01-22 15:31:54.248990: step: 786/459, loss: 0.3454987704753876 2023-01-22 15:31:54.877981: step: 788/459, loss: 0.8185910582542419 2023-01-22 15:31:55.586779: step: 790/459, loss: 0.4088590145111084 2023-01-22 15:31:56.274676: step: 792/459, loss: 0.14908866584300995 2023-01-22 15:31:56.851888: step: 794/459, loss: 0.4637329578399658 2023-01-22 15:31:57.549880: step: 796/459, loss: 9.705608367919922 2023-01-22 15:31:58.199268: step: 798/459, loss: 0.6753536462783813 2023-01-22 15:31:58.768468: step: 800/459, loss: 0.5368838906288147 2023-01-22 15:31:59.420190: step: 802/459, loss: 1.2083460092544556 2023-01-22 15:32:00.141142: step: 804/459, loss: 0.7152119874954224 2023-01-22 15:32:00.775536: step: 806/459, loss: 0.2486729621887207 2023-01-22 15:32:01.467441: step: 808/459, loss: 0.3784220814704895 2023-01-22 15:32:02.107805: step: 810/459, loss: 0.13724064826965332 2023-01-22 15:32:02.745111: step: 812/459, loss: 0.13112258911132812 2023-01-22 15:32:03.333564: step: 814/459, loss: 0.37705284357070923 2023-01-22 15:32:04.049450: step: 816/459, loss: 0.5430654883384705 2023-01-22 15:32:04.634414: step: 818/459, loss: 0.4274962842464447 2023-01-22 15:32:05.252598: step: 820/459, loss: 0.9376887679100037 2023-01-22 15:32:05.927789: step: 822/459, loss: 0.31696847081184387 2023-01-22 15:32:06.564923: step: 824/459, loss: 13.491560935974121 2023-01-22 15:32:07.229652: step: 826/459, loss: 1.0893733501434326 2023-01-22 15:32:07.852931: step: 828/459, loss: 0.20282265543937683 2023-01-22 15:32:08.433266: step: 830/459, loss: 0.5917674899101257 2023-01-22 15:32:09.115654: step: 832/459, loss: 0.5308868885040283 2023-01-22 15:32:09.684623: step: 834/459, loss: 1.049363374710083 2023-01-22 15:32:10.274230: step: 836/459, loss: 0.22336165606975555 2023-01-22 15:32:10.911006: step: 838/459, loss: 0.429612934589386 2023-01-22 15:32:11.550100: step: 840/459, loss: 0.2442082017660141 2023-01-22 15:32:12.147704: step: 842/459, loss: 0.31660187244415283 2023-01-22 15:32:12.778491: step: 844/459, loss: 0.3831627368927002 2023-01-22 15:32:13.393963: step: 846/459, loss: 0.1586369425058365 2023-01-22 15:32:14.078860: step: 848/459, loss: 0.21557731926441193 2023-01-22 15:32:14.693322: step: 850/459, loss: 0.3040861487388611 2023-01-22 15:32:15.279350: step: 852/459, loss: 0.2901424467563629 2023-01-22 15:32:15.876750: step: 854/459, loss: 1.3082011938095093 2023-01-22 15:32:16.492529: step: 856/459, loss: 0.9950810670852661 2023-01-22 15:32:17.087012: step: 858/459, loss: 0.302564412355423 2023-01-22 15:32:17.738279: step: 860/459, loss: 0.46231940388679504 2023-01-22 15:32:18.331424: step: 862/459, loss: 0.3916146755218506 2023-01-22 15:32:19.029637: step: 864/459, loss: 0.6221484541893005 2023-01-22 15:32:19.710915: step: 866/459, loss: 0.23385001718997955 2023-01-22 15:32:20.399452: step: 868/459, loss: 0.2670295834541321 2023-01-22 15:32:20.992240: step: 870/459, loss: 0.3328472375869751 2023-01-22 15:32:21.623717: step: 872/459, loss: 0.8232846260070801 2023-01-22 15:32:22.233571: step: 874/459, loss: 0.4054291546344757 2023-01-22 15:32:22.886465: step: 876/459, loss: 0.34289857745170593 2023-01-22 15:32:23.443673: step: 878/459, loss: 0.23925262689590454 2023-01-22 15:32:24.080051: step: 880/459, loss: 0.13359026610851288 2023-01-22 15:32:24.684236: step: 882/459, loss: 0.25245392322540283 2023-01-22 15:32:25.322607: step: 884/459, loss: 0.11559193581342697 2023-01-22 15:32:25.875066: step: 886/459, loss: 0.7003084421157837 2023-01-22 15:32:26.608301: step: 888/459, loss: 0.3059837520122528 2023-01-22 15:32:27.254038: step: 890/459, loss: 0.4554714560508728 2023-01-22 15:32:27.884692: step: 892/459, loss: 0.5279324054718018 2023-01-22 15:32:28.486948: step: 894/459, loss: 0.16511769592761993 2023-01-22 15:32:29.185454: step: 896/459, loss: 0.5453627705574036 2023-01-22 15:32:29.897671: step: 898/459, loss: 0.4549400806427002 2023-01-22 15:32:30.485462: step: 900/459, loss: 0.13465186953544617 2023-01-22 15:32:31.106882: step: 902/459, loss: 0.41034024953842163 2023-01-22 15:32:31.804737: step: 904/459, loss: 0.7103533148765564 2023-01-22 15:32:32.390977: step: 906/459, loss: 0.2775198221206665 2023-01-22 15:32:33.020744: step: 908/459, loss: 0.38809195160865784 2023-01-22 15:32:33.658641: step: 910/459, loss: 0.7419427037239075 2023-01-22 15:32:34.358931: step: 912/459, loss: 0.2685409188270569 2023-01-22 15:32:35.002466: step: 914/459, loss: 0.7283276319503784 2023-01-22 15:32:35.582661: step: 916/459, loss: 0.15454062819480896 2023-01-22 15:32:36.187256: step: 918/459, loss: 0.2485641986131668 2023-01-22 15:32:36.648961: step: 920/459, loss: 0.053542058914899826 ================================================== Loss: 0.592 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2888125741173431, 'r': 0.2899086370172192, 'f1': 0.289359567628929}, 'combined': 0.21321231298973714, 'epoch': 8} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3340692170787828, 'r': 0.300327922178196, 'f1': 0.316301280760745}, 'combined': 0.20243281968687676, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29491878705533603, 'r': 0.29547840524709185, 'f1': 0.2951983309293222}, 'combined': 0.21751455963213212, 'epoch': 8} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33014149246324115, 'r': 0.2914248992561883, 'f1': 0.309577391773836}, 'combined': 0.198129530735255, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30635204297908203, 'r': 0.29300716231900836, 'f1': 0.2995310391179795}, 'combined': 0.2207070814553533, 'epoch': 8} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34071983672307743, 'r': 0.31367364530436603, 'f1': 0.3266378292195583}, 'combined': 0.23419316057251352, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2904040404040404, 'r': 0.2738095238095238, 'f1': 0.28186274509803916}, 'combined': 0.1879084967320261, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.3695652173913043, 'f1': 0.2982456140350877}, 'combined': 0.14912280701754385, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29411764705882354, 'r': 0.1724137931034483, 'f1': 0.2173913043478261}, 'combined': 0.14492753623188406, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27463650173611115, 'r': 0.29960345643939396, 'f1': 0.2865772192028986}, 'combined': 0.21116216151792525, 'epoch': 4} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3275304334361018, 'r': 0.25382122518550904, 'f1': 0.28600305741761856}, 'combined': 0.18304195674727586, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2387005649717514, 'r': 0.40238095238095234, 'f1': 0.29964539007092195}, 'combined': 0.1997635933806146, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2797278904926072, 'r': 0.3338687725234344, 'f1': 0.30440976318313134}, 'combined': 0.22430193076651783, 'epoch': 7} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3271284749925289, 'r': 0.29714664561481074, 'f1': 0.31141759721114737}, 'combined': 0.19930726221513428, 'epoch': 7} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22727272727272727, 'r': 0.43478260869565216, 'f1': 0.2985074626865672}, 'combined': 0.1492537313432836, 'epoch': 7} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2887313338741276, 'r': 0.320508216919098, 'f1': 0.30379106172008025}, 'combined': 0.22384604547795386, 'epoch': 7} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.33070809808446827, 'r': 0.30633379786168213, 'f1': 0.3180546470004617}, 'combined': 0.2280391808682556, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24, 'r': 0.20689655172413793, 'f1': 0.22222222222222224}, 'combined': 0.14814814814814814, 'epoch': 7} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 15:35:15.001160: step: 2/459, loss: 0.18439321219921112 2023-01-22 15:35:15.747040: step: 4/459, loss: 0.8986378908157349 2023-01-22 15:35:16.348920: step: 6/459, loss: 0.8407979607582092 2023-01-22 15:35:16.950410: step: 8/459, loss: 0.17893101274967194 2023-01-22 15:35:17.565151: step: 10/459, loss: 0.13888488709926605 2023-01-22 15:35:18.145044: step: 12/459, loss: 1.4197001457214355 2023-01-22 15:35:18.812636: step: 14/459, loss: 0.09886149317026138 2023-01-22 15:35:19.418879: step: 16/459, loss: 0.12001192569732666 2023-01-22 15:35:20.080678: step: 18/459, loss: 0.19063037633895874 2023-01-22 15:35:20.721380: step: 20/459, loss: 0.06039010360836983 2023-01-22 15:35:21.340098: step: 22/459, loss: 0.1095406711101532 2023-01-22 15:35:22.048180: step: 24/459, loss: 0.16318657994270325 2023-01-22 15:35:22.626766: step: 26/459, loss: 0.12247822433710098 2023-01-22 15:35:23.276591: step: 28/459, loss: 0.2911108732223511 2023-01-22 15:35:23.885723: step: 30/459, loss: 0.17264080047607422 2023-01-22 15:35:24.541698: step: 32/459, loss: 0.16069787740707397 2023-01-22 15:35:25.159513: step: 34/459, loss: 1.1810535192489624 2023-01-22 15:35:25.868895: step: 36/459, loss: 0.14775002002716064 2023-01-22 15:35:26.530606: step: 38/459, loss: 0.34599757194519043 2023-01-22 15:35:27.166007: step: 40/459, loss: 0.2523183226585388 2023-01-22 15:35:27.821556: step: 42/459, loss: 0.6118503212928772 2023-01-22 15:35:28.397299: step: 44/459, loss: 0.24570801854133606 2023-01-22 15:35:28.967277: step: 46/459, loss: 0.5893775820732117 2023-01-22 15:35:29.606437: step: 48/459, loss: 0.42613843083381653 2023-01-22 15:35:30.206273: step: 50/459, loss: 0.6821727752685547 2023-01-22 15:35:30.813590: step: 52/459, loss: 0.09626701474189758 2023-01-22 15:35:31.453846: step: 54/459, loss: 0.39154428243637085 2023-01-22 15:35:32.097840: step: 56/459, loss: 0.3103923201560974 2023-01-22 15:35:32.798439: step: 58/459, loss: 0.2187756597995758 2023-01-22 15:35:33.510234: step: 60/459, loss: 0.2137937843799591 2023-01-22 15:35:34.182981: step: 62/459, loss: 0.44266054034233093 2023-01-22 15:35:34.801105: step: 64/459, loss: 0.18643751740455627 2023-01-22 15:35:35.417671: step: 66/459, loss: 0.7481662034988403 2023-01-22 15:35:36.125192: step: 68/459, loss: 0.6304805278778076 2023-01-22 15:35:36.696845: step: 70/459, loss: 0.46103212237358093 2023-01-22 15:35:37.305946: step: 72/459, loss: 0.1344056874513626 2023-01-22 15:35:37.906444: step: 74/459, loss: 0.1584649682044983 2023-01-22 15:35:38.548043: step: 76/459, loss: 0.3946804404258728 2023-01-22 15:35:39.157462: step: 78/459, loss: 0.1737135350704193 2023-01-22 15:35:39.826992: step: 80/459, loss: 0.19965402781963348 2023-01-22 15:35:40.434417: step: 82/459, loss: 1.1748374700546265 2023-01-22 15:35:41.193004: step: 84/459, loss: 0.424256831407547 2023-01-22 15:35:41.798705: step: 86/459, loss: 3.055642604827881 2023-01-22 15:35:42.403098: step: 88/459, loss: 0.10489419847726822 2023-01-22 15:35:43.033430: step: 90/459, loss: 0.40554338693618774 2023-01-22 15:35:43.714428: step: 92/459, loss: 0.14238794147968292 2023-01-22 15:35:44.324801: step: 94/459, loss: 0.6449218392372131 2023-01-22 15:35:45.000889: step: 96/459, loss: 0.12023614346981049 2023-01-22 15:35:45.666143: step: 98/459, loss: 0.32278645038604736 2023-01-22 15:35:46.256623: step: 100/459, loss: 0.26236921548843384 2023-01-22 15:35:46.888342: step: 102/459, loss: 0.12313416600227356 2023-01-22 15:35:47.519014: step: 104/459, loss: 0.07188842445611954 2023-01-22 15:35:48.078440: step: 106/459, loss: 0.4006103277206421 2023-01-22 15:35:48.668070: step: 108/459, loss: 0.4506216049194336 2023-01-22 15:35:49.341959: step: 110/459, loss: 0.1802813857793808 2023-01-22 15:35:50.003923: step: 112/459, loss: 0.46876704692840576 2023-01-22 15:35:50.767682: step: 114/459, loss: 0.47070324420928955 2023-01-22 15:35:51.541852: step: 116/459, loss: 0.21796147525310516 2023-01-22 15:35:52.139806: step: 118/459, loss: 0.16616225242614746 2023-01-22 15:35:52.776949: step: 120/459, loss: 0.5017578601837158 2023-01-22 15:35:53.403853: step: 122/459, loss: 0.44728022813796997 2023-01-22 15:35:54.004316: step: 124/459, loss: 0.13026976585388184 2023-01-22 15:35:54.568328: step: 126/459, loss: 0.08323169499635696 2023-01-22 15:35:55.173044: step: 128/459, loss: 0.037357527762651443 2023-01-22 15:35:55.787956: step: 130/459, loss: 0.5293829441070557 2023-01-22 15:35:56.399402: step: 132/459, loss: 0.06573091447353363 2023-01-22 15:35:57.093356: step: 134/459, loss: 0.06899810582399368 2023-01-22 15:35:57.652620: step: 136/459, loss: 0.3001314699649811 2023-01-22 15:35:58.266301: step: 138/459, loss: 0.2802428603172302 2023-01-22 15:35:58.924114: step: 140/459, loss: 0.46363741159439087 2023-01-22 15:35:59.548629: step: 142/459, loss: 0.5584475994110107 2023-01-22 15:36:00.145731: step: 144/459, loss: 1.3088366985321045 2023-01-22 15:36:00.730411: step: 146/459, loss: 0.7164338827133179 2023-01-22 15:36:01.335225: step: 148/459, loss: 0.1281273066997528 2023-01-22 15:36:01.941011: step: 150/459, loss: 0.7415100932121277 2023-01-22 15:36:02.569358: step: 152/459, loss: 0.44268354773521423 2023-01-22 15:36:03.191077: step: 154/459, loss: 0.44128501415252686 2023-01-22 15:36:03.841251: step: 156/459, loss: 0.23141776025295258 2023-01-22 15:36:04.568803: step: 158/459, loss: 0.13984400033950806 2023-01-22 15:36:05.186303: step: 160/459, loss: 0.25037631392478943 2023-01-22 15:36:05.857222: step: 162/459, loss: 0.20631468296051025 2023-01-22 15:36:06.487463: step: 164/459, loss: 0.20600515604019165 2023-01-22 15:36:07.089223: step: 166/459, loss: 0.10454583168029785 2023-01-22 15:36:07.717744: step: 168/459, loss: 0.6229168176651001 2023-01-22 15:36:08.327071: step: 170/459, loss: 0.14938987791538239 2023-01-22 15:36:08.966587: step: 172/459, loss: 0.5210754871368408 2023-01-22 15:36:09.668870: step: 174/459, loss: 0.20729337632656097 2023-01-22 15:36:10.232050: step: 176/459, loss: 0.13880464434623718 2023-01-22 15:36:10.803180: step: 178/459, loss: 0.17645342648029327 2023-01-22 15:36:11.444265: step: 180/459, loss: 0.21857430040836334 2023-01-22 15:36:12.078731: step: 182/459, loss: 0.7617982625961304 2023-01-22 15:36:12.734837: step: 184/459, loss: 0.39541953802108765 2023-01-22 15:36:13.315688: step: 186/459, loss: 0.08707297593355179 2023-01-22 15:36:14.018358: step: 188/459, loss: 0.42042118310928345 2023-01-22 15:36:14.608831: step: 190/459, loss: 0.11075430363416672 2023-01-22 15:36:15.215672: step: 192/459, loss: 0.1615828424692154 2023-01-22 15:36:15.793411: step: 194/459, loss: 0.19777964055538177 2023-01-22 15:36:16.446329: step: 196/459, loss: 1.0041266679763794 2023-01-22 15:36:17.050653: step: 198/459, loss: 0.8463156819343567 2023-01-22 15:36:17.618267: step: 200/459, loss: 0.06610386073589325 2023-01-22 15:36:18.237274: step: 202/459, loss: 0.4792713522911072 2023-01-22 15:36:18.916616: step: 204/459, loss: 0.21623903512954712 2023-01-22 15:36:19.562970: step: 206/459, loss: 0.17938849329948425 2023-01-22 15:36:20.216844: step: 208/459, loss: 0.39717918634414673 2023-01-22 15:36:20.807613: step: 210/459, loss: 0.10324682295322418 2023-01-22 15:36:21.417700: step: 212/459, loss: 0.29441243410110474 2023-01-22 15:36:22.039083: step: 214/459, loss: 0.07395590096712112 2023-01-22 15:36:22.645219: step: 216/459, loss: 0.16921505331993103 2023-01-22 15:36:23.271743: step: 218/459, loss: 0.09391270577907562 2023-01-22 15:36:23.935167: step: 220/459, loss: 0.1863817423582077 2023-01-22 15:36:24.541404: step: 222/459, loss: 0.5489475727081299 2023-01-22 15:36:25.135036: step: 224/459, loss: 0.18508563935756683 2023-01-22 15:36:25.864265: step: 226/459, loss: 0.30985578894615173 2023-01-22 15:36:26.455055: step: 228/459, loss: 0.1078592836856842 2023-01-22 15:36:27.022340: step: 230/459, loss: 0.060014449059963226 2023-01-22 15:36:27.703319: step: 232/459, loss: 0.44057947397232056 2023-01-22 15:36:28.345629: step: 234/459, loss: 0.7772139310836792 2023-01-22 15:36:28.967039: step: 236/459, loss: 0.42620277404785156 2023-01-22 15:36:29.613982: step: 238/459, loss: 0.7748092412948608 2023-01-22 15:36:30.238139: step: 240/459, loss: 0.131893590092659 2023-01-22 15:36:30.894282: step: 242/459, loss: 0.47346705198287964 2023-01-22 15:36:31.520190: step: 244/459, loss: 0.11961937695741653 2023-01-22 15:36:32.138359: step: 246/459, loss: 0.08558046072721481 2023-01-22 15:36:32.737948: step: 248/459, loss: 0.3139632046222687 2023-01-22 15:36:33.388884: step: 250/459, loss: 0.7056500315666199 2023-01-22 15:36:34.032633: step: 252/459, loss: 0.6867575645446777 2023-01-22 15:36:34.688080: step: 254/459, loss: 0.3518187701702118 2023-01-22 15:36:35.301394: step: 256/459, loss: 0.4132686257362366 2023-01-22 15:36:35.913548: step: 258/459, loss: 0.34573784470558167 2023-01-22 15:36:36.572948: step: 260/459, loss: 0.8501167893409729 2023-01-22 15:36:37.205594: step: 262/459, loss: 0.17938324809074402 2023-01-22 15:36:37.833717: step: 264/459, loss: 0.27315837144851685 2023-01-22 15:36:38.431672: step: 266/459, loss: 1.717817783355713 2023-01-22 15:36:39.016364: step: 268/459, loss: 0.22012801468372345 2023-01-22 15:36:39.620301: step: 270/459, loss: 0.361280620098114 2023-01-22 15:36:40.223989: step: 272/459, loss: 0.29369616508483887 2023-01-22 15:36:40.841726: step: 274/459, loss: 0.47330671548843384 2023-01-22 15:36:41.448822: step: 276/459, loss: 0.1960812509059906 2023-01-22 15:36:42.038405: step: 278/459, loss: 0.7916755676269531 2023-01-22 15:36:42.673308: step: 280/459, loss: 0.37093427777290344 2023-01-22 15:36:43.322802: step: 282/459, loss: 0.163028284907341 2023-01-22 15:36:43.928933: step: 284/459, loss: 0.2731055021286011 2023-01-22 15:36:44.605995: step: 286/459, loss: 0.16971495747566223 2023-01-22 15:36:45.249166: step: 288/459, loss: 0.40978753566741943 2023-01-22 15:36:45.863114: step: 290/459, loss: 0.18284781277179718 2023-01-22 15:36:46.463400: step: 292/459, loss: 0.3923346698284149 2023-01-22 15:36:47.086450: step: 294/459, loss: 0.15832941234111786 2023-01-22 15:36:47.685339: step: 296/459, loss: 0.5028421878814697 2023-01-22 15:36:48.325010: step: 298/459, loss: 0.08952820301055908 2023-01-22 15:36:48.954008: step: 300/459, loss: 0.7199866771697998 2023-01-22 15:36:49.525388: step: 302/459, loss: 0.3754819333553314 2023-01-22 15:36:50.179643: step: 304/459, loss: 0.1556716412305832 2023-01-22 15:36:50.893521: step: 306/459, loss: 0.213858500123024 2023-01-22 15:36:51.530691: step: 308/459, loss: 1.053368091583252 2023-01-22 15:36:52.148835: step: 310/459, loss: 0.19873455166816711 2023-01-22 15:36:52.770691: step: 312/459, loss: 0.3938579261302948 2023-01-22 15:36:53.365738: step: 314/459, loss: 0.865601658821106 2023-01-22 15:36:53.972234: step: 316/459, loss: 0.34802815318107605 2023-01-22 15:36:54.553321: step: 318/459, loss: 0.30020594596862793 2023-01-22 15:36:55.259689: step: 320/459, loss: 0.12245538830757141 2023-01-22 15:36:55.868223: step: 322/459, loss: 0.32039275765419006 2023-01-22 15:36:56.495920: step: 324/459, loss: 0.34237611293792725 2023-01-22 15:36:57.060429: step: 326/459, loss: 0.7837325930595398 2023-01-22 15:36:57.721239: step: 328/459, loss: 0.06596418470144272 2023-01-22 15:36:58.364923: step: 330/459, loss: 0.2149493396282196 2023-01-22 15:36:58.945892: step: 332/459, loss: 0.18699859082698822 2023-01-22 15:36:59.597005: step: 334/459, loss: 0.570656955242157 2023-01-22 15:37:00.229408: step: 336/459, loss: 0.2099992036819458 2023-01-22 15:37:00.807729: step: 338/459, loss: 0.3715413212776184 2023-01-22 15:37:01.410473: step: 340/459, loss: 0.21672268211841583 2023-01-22 15:37:02.035020: step: 342/459, loss: 0.09950505197048187 2023-01-22 15:37:02.645906: step: 344/459, loss: 0.4766213893890381 2023-01-22 15:37:03.305578: step: 346/459, loss: 0.18004047870635986 2023-01-22 15:37:03.860133: step: 348/459, loss: 0.23743072152137756 2023-01-22 15:37:04.475902: step: 350/459, loss: 0.16311174631118774 2023-01-22 15:37:05.090896: step: 352/459, loss: 0.12065590173006058 2023-01-22 15:37:05.739710: step: 354/459, loss: 0.23487871885299683 2023-01-22 15:37:06.328322: step: 356/459, loss: 0.11987750977277756 2023-01-22 15:37:06.932681: step: 358/459, loss: 0.18686789274215698 2023-01-22 15:37:07.510773: step: 360/459, loss: 0.5150120258331299 2023-01-22 15:37:08.165115: step: 362/459, loss: 0.2824525833129883 2023-01-22 15:37:08.782467: step: 364/459, loss: 0.27566733956336975 2023-01-22 15:37:09.391205: step: 366/459, loss: 0.3833446204662323 2023-01-22 15:37:09.949229: step: 368/459, loss: 0.10307053476572037 2023-01-22 15:37:10.570022: step: 370/459, loss: 1.027220606803894 2023-01-22 15:37:11.203593: step: 372/459, loss: 0.07560738921165466 2023-01-22 15:37:11.800645: step: 374/459, loss: 0.15305685997009277 2023-01-22 15:37:12.465402: step: 376/459, loss: 0.11928015202283859 2023-01-22 15:37:13.074586: step: 378/459, loss: 0.18275076150894165 2023-01-22 15:37:13.672421: step: 380/459, loss: 0.12547485530376434 2023-01-22 15:37:14.331021: step: 382/459, loss: 0.23664912581443787 2023-01-22 15:37:14.966269: step: 384/459, loss: 0.6064171195030212 2023-01-22 15:37:15.547732: step: 386/459, loss: 0.045988522469997406 2023-01-22 15:37:16.200568: step: 388/459, loss: 0.15828804671764374 2023-01-22 15:37:16.782486: step: 390/459, loss: 0.2432098686695099 2023-01-22 15:37:17.347553: step: 392/459, loss: 0.42282724380493164 2023-01-22 15:37:17.967854: step: 394/459, loss: 0.06064790487289429 2023-01-22 15:37:18.674456: step: 396/459, loss: 0.18200626969337463 2023-01-22 15:37:19.302680: step: 398/459, loss: 0.23114250600337982 2023-01-22 15:37:19.977606: step: 400/459, loss: 0.2651146948337555 2023-01-22 15:37:20.654705: step: 402/459, loss: 2.082402229309082 2023-01-22 15:37:21.259744: step: 404/459, loss: 0.31448668241500854 2023-01-22 15:37:21.800422: step: 406/459, loss: 0.4454498291015625 2023-01-22 15:37:22.454937: step: 408/459, loss: 1.0057098865509033 2023-01-22 15:37:23.078169: step: 410/459, loss: 0.3360697031021118 2023-01-22 15:37:23.750292: step: 412/459, loss: 0.1495560258626938 2023-01-22 15:37:24.409406: step: 414/459, loss: 1.5416226387023926 2023-01-22 15:37:25.084385: step: 416/459, loss: 0.20728392899036407 2023-01-22 15:37:25.635027: step: 418/459, loss: 0.07899286597967148 2023-01-22 15:37:26.270599: step: 420/459, loss: 1.404572606086731 2023-01-22 15:37:26.879478: step: 422/459, loss: 0.993308961391449 2023-01-22 15:37:27.456268: step: 424/459, loss: 0.9785451889038086 2023-01-22 15:37:28.059362: step: 426/459, loss: 0.2673916220664978 2023-01-22 15:37:28.685672: step: 428/459, loss: 0.31633689999580383 2023-01-22 15:37:29.361670: step: 430/459, loss: 0.6267914175987244 2023-01-22 15:37:29.962525: step: 432/459, loss: 0.466937780380249 2023-01-22 15:37:30.608293: step: 434/459, loss: 0.17066074907779694 2023-01-22 15:37:31.247186: step: 436/459, loss: 0.1659730225801468 2023-01-22 15:37:31.827451: step: 438/459, loss: 0.17135414481163025 2023-01-22 15:37:32.439888: step: 440/459, loss: 0.2138075977563858 2023-01-22 15:37:33.131659: step: 442/459, loss: 0.8821046948432922 2023-01-22 15:37:33.739375: step: 444/459, loss: 0.1475393921136856 2023-01-22 15:37:34.331343: step: 446/459, loss: 0.042717497795820236 2023-01-22 15:37:34.916622: step: 448/459, loss: 0.199868306517601 2023-01-22 15:37:35.573559: step: 450/459, loss: 0.19300724565982819 2023-01-22 15:37:36.282032: step: 452/459, loss: 0.8738548755645752 2023-01-22 15:37:36.885820: step: 454/459, loss: 0.19554735720157623 2023-01-22 15:37:37.503882: step: 456/459, loss: 0.40214136242866516 2023-01-22 15:37:38.113294: step: 458/459, loss: 0.17833256721496582 2023-01-22 15:37:38.684127: step: 460/459, loss: 0.1267579197883606 2023-01-22 15:37:39.356855: step: 462/459, loss: 0.25947046279907227 2023-01-22 15:37:39.945536: step: 464/459, loss: 0.20731684565544128 2023-01-22 15:37:40.494153: step: 466/459, loss: 0.166166752576828 2023-01-22 15:37:41.121487: step: 468/459, loss: 0.292460560798645 2023-01-22 15:37:41.760834: step: 470/459, loss: 0.5697593688964844 2023-01-22 15:37:42.368654: step: 472/459, loss: 0.9000967741012573 2023-01-22 15:37:42.999269: step: 474/459, loss: 0.29239580035209656 2023-01-22 15:37:43.615399: step: 476/459, loss: 0.09455792605876923 2023-01-22 15:37:44.258150: step: 478/459, loss: 0.31909704208374023 2023-01-22 15:37:44.835339: step: 480/459, loss: 2.982056140899658 2023-01-22 15:37:45.454939: step: 482/459, loss: 0.145815908908844 2023-01-22 15:37:46.085740: step: 484/459, loss: 0.16498112678527832 2023-01-22 15:37:46.712207: step: 486/459, loss: 0.1399378627538681 2023-01-22 15:37:47.330542: step: 488/459, loss: 0.21162459254264832 2023-01-22 15:37:47.904805: step: 490/459, loss: 0.45241034030914307 2023-01-22 15:37:48.564671: step: 492/459, loss: 0.4401078224182129 2023-01-22 15:37:49.236204: step: 494/459, loss: 0.11138725280761719 2023-01-22 15:37:49.877111: step: 496/459, loss: 0.527977466583252 2023-01-22 15:37:50.598243: step: 498/459, loss: 0.12086356431245804 2023-01-22 15:37:51.240303: step: 500/459, loss: 0.28425246477127075 2023-01-22 15:37:51.874067: step: 502/459, loss: 0.4891122579574585 2023-01-22 15:37:52.588815: step: 504/459, loss: 0.32131654024124146 2023-01-22 15:37:53.192315: step: 506/459, loss: 0.36264580488204956 2023-01-22 15:37:53.811661: step: 508/459, loss: 0.6645919680595398 2023-01-22 15:37:54.436869: step: 510/459, loss: 0.2011360079050064 2023-01-22 15:37:55.065296: step: 512/459, loss: 0.09906935691833496 2023-01-22 15:37:55.649586: step: 514/459, loss: 0.4558353126049042 2023-01-22 15:37:56.261443: step: 516/459, loss: 0.17551398277282715 2023-01-22 15:37:56.911610: step: 518/459, loss: 0.40904438495635986 2023-01-22 15:37:57.543155: step: 520/459, loss: 0.1810484528541565 2023-01-22 15:37:58.101770: step: 522/459, loss: 0.15523682534694672 2023-01-22 15:37:58.753147: step: 524/459, loss: 0.3691319227218628 2023-01-22 15:37:59.370619: step: 526/459, loss: 0.48791107535362244 2023-01-22 15:37:59.973200: step: 528/459, loss: 0.354061096906662 2023-01-22 15:38:00.638340: step: 530/459, loss: 0.5648361444473267 2023-01-22 15:38:01.248282: step: 532/459, loss: 0.45139968395233154 2023-01-22 15:38:01.872712: step: 534/459, loss: 0.19726978242397308 2023-01-22 15:38:02.508176: step: 536/459, loss: 0.2520568370819092 2023-01-22 15:38:03.145423: step: 538/459, loss: 0.30226755142211914 2023-01-22 15:38:03.714114: step: 540/459, loss: 0.36519038677215576 2023-01-22 15:38:04.394902: step: 542/459, loss: 0.21075822412967682 2023-01-22 15:38:04.962823: step: 544/459, loss: 0.3860914707183838 2023-01-22 15:38:05.539271: step: 546/459, loss: 0.2340962141752243 2023-01-22 15:38:06.169975: step: 548/459, loss: 0.4595526158809662 2023-01-22 15:38:06.802505: step: 550/459, loss: 0.10705343633890152 2023-01-22 15:38:07.367083: step: 552/459, loss: 3.4361469745635986 2023-01-22 15:38:07.971354: step: 554/459, loss: 0.19068904221057892 2023-01-22 15:38:08.582666: step: 556/459, loss: 0.4188966155052185 2023-01-22 15:38:09.170163: step: 558/459, loss: 0.15403220057487488 2023-01-22 15:38:09.855952: step: 560/459, loss: 0.5323830246925354 2023-01-22 15:38:10.506496: step: 562/459, loss: 0.1517057567834854 2023-01-22 15:38:11.140940: step: 564/459, loss: 0.14885766804218292 2023-01-22 15:38:11.755663: step: 566/459, loss: 0.4852938652038574 2023-01-22 15:38:12.373028: step: 568/459, loss: 0.32408690452575684 2023-01-22 15:38:12.967435: step: 570/459, loss: 0.4284626543521881 2023-01-22 15:38:13.630616: step: 572/459, loss: 0.801398754119873 2023-01-22 15:38:14.221975: step: 574/459, loss: 0.4309310019016266 2023-01-22 15:38:14.844167: step: 576/459, loss: 0.11286843568086624 2023-01-22 15:38:15.395549: step: 578/459, loss: 0.14410194754600525 2023-01-22 15:38:16.029312: step: 580/459, loss: 0.18278172612190247 2023-01-22 15:38:16.629399: step: 582/459, loss: 0.3103119432926178 2023-01-22 15:38:17.223401: step: 584/459, loss: 0.2026996612548828 2023-01-22 15:38:17.912314: step: 586/459, loss: 0.28742700815200806 2023-01-22 15:38:18.496029: step: 588/459, loss: 0.25346776843070984 2023-01-22 15:38:19.124756: step: 590/459, loss: 0.2939833998680115 2023-01-22 15:38:19.790996: step: 592/459, loss: 0.454448401927948 2023-01-22 15:38:20.423633: step: 594/459, loss: 0.2869735062122345 2023-01-22 15:38:21.044354: step: 596/459, loss: 0.45523616671562195 2023-01-22 15:38:21.738006: step: 598/459, loss: 0.35957515239715576 2023-01-22 15:38:22.390372: step: 600/459, loss: 0.12421543151140213 2023-01-22 15:38:23.029015: step: 602/459, loss: 0.16286712884902954 2023-01-22 15:38:23.628846: step: 604/459, loss: 0.12312368303537369 2023-01-22 15:38:24.275545: step: 606/459, loss: 0.22691082954406738 2023-01-22 15:38:24.888711: step: 608/459, loss: 0.13836796581745148 2023-01-22 15:38:25.527085: step: 610/459, loss: 0.1614159643650055 2023-01-22 15:38:26.125343: step: 612/459, loss: 0.10212172567844391 2023-01-22 15:38:26.771346: step: 614/459, loss: 0.3948909640312195 2023-01-22 15:38:27.385485: step: 616/459, loss: 0.09240804612636566 2023-01-22 15:38:28.016077: step: 618/459, loss: 0.09940757602453232 2023-01-22 15:38:28.682741: step: 620/459, loss: 0.3025522530078888 2023-01-22 15:38:29.313159: step: 622/459, loss: 0.5962736010551453 2023-01-22 15:38:29.945532: step: 624/459, loss: 0.33666473627090454 2023-01-22 15:38:30.648074: step: 626/459, loss: 0.2345014214515686 2023-01-22 15:38:31.265120: step: 628/459, loss: 0.2600373327732086 2023-01-22 15:38:31.962052: step: 630/459, loss: 0.22698578238487244 2023-01-22 15:38:32.627511: step: 632/459, loss: 0.4723261296749115 2023-01-22 15:38:33.238224: step: 634/459, loss: 0.22383373975753784 2023-01-22 15:38:33.823677: step: 636/459, loss: 0.18489912152290344 2023-01-22 15:38:34.395904: step: 638/459, loss: 0.5044118762016296 2023-01-22 15:38:35.022551: step: 640/459, loss: 0.39955681562423706 2023-01-22 15:38:35.742691: step: 642/459, loss: 0.2955659031867981 2023-01-22 15:38:36.406322: step: 644/459, loss: 0.3004939556121826 2023-01-22 15:38:37.062531: step: 646/459, loss: 0.5252423286437988 2023-01-22 15:38:37.665826: step: 648/459, loss: 0.221584752202034 2023-01-22 15:38:38.303919: step: 650/459, loss: 0.13387778401374817 2023-01-22 15:38:38.909013: step: 652/459, loss: 0.3574215769767761 2023-01-22 15:38:39.575846: step: 654/459, loss: 0.9834014773368835 2023-01-22 15:38:40.218617: step: 656/459, loss: 0.7178690433502197 2023-01-22 15:38:40.878592: step: 658/459, loss: 0.692368745803833 2023-01-22 15:38:41.464972: step: 660/459, loss: 0.5273587703704834 2023-01-22 15:38:42.043809: step: 662/459, loss: 0.5832122564315796 2023-01-22 15:38:42.638623: step: 664/459, loss: 0.4947511553764343 2023-01-22 15:38:43.253223: step: 666/459, loss: 0.02446294203400612 2023-01-22 15:38:43.854551: step: 668/459, loss: 0.3528730869293213 2023-01-22 15:38:44.498881: step: 670/459, loss: 0.4028245210647583 2023-01-22 15:38:45.051217: step: 672/459, loss: 0.7383182644844055 2023-01-22 15:38:45.650134: step: 674/459, loss: 0.10176069289445877 2023-01-22 15:38:46.283557: step: 676/459, loss: 0.5754250288009644 2023-01-22 15:38:46.910646: step: 678/459, loss: 0.8677403926849365 2023-01-22 15:38:47.523562: step: 680/459, loss: 0.4589494466781616 2023-01-22 15:38:48.117095: step: 682/459, loss: 0.2502700090408325 2023-01-22 15:38:48.700313: step: 684/459, loss: 0.2495073676109314 2023-01-22 15:38:49.300875: step: 686/459, loss: 0.7986702919006348 2023-01-22 15:38:49.944435: step: 688/459, loss: 0.28607290983200073 2023-01-22 15:38:50.538393: step: 690/459, loss: 0.3685033321380615 2023-01-22 15:38:51.128299: step: 692/459, loss: 0.21847385168075562 2023-01-22 15:38:51.751313: step: 694/459, loss: 0.03283016011118889 2023-01-22 15:38:52.369549: step: 696/459, loss: 0.2973082661628723 2023-01-22 15:38:52.995198: step: 698/459, loss: 0.24851839244365692 2023-01-22 15:38:53.586227: step: 700/459, loss: 1.1131194829940796 2023-01-22 15:38:54.228907: step: 702/459, loss: 0.9541948437690735 2023-01-22 15:38:54.831495: step: 704/459, loss: 1.3309869766235352 2023-01-22 15:38:55.511771: step: 706/459, loss: 1.0142126083374023 2023-01-22 15:38:56.129032: step: 708/459, loss: 0.3355753719806671 2023-01-22 15:38:56.737388: step: 710/459, loss: 0.37209707498550415 2023-01-22 15:38:57.343665: step: 712/459, loss: 0.32686638832092285 2023-01-22 15:38:57.952798: step: 714/459, loss: 0.20362971723079681 2023-01-22 15:38:58.595143: step: 716/459, loss: 0.16876961290836334 2023-01-22 15:38:59.301427: step: 718/459, loss: 0.3659484386444092 2023-01-22 15:38:59.923196: step: 720/459, loss: 0.22536559402942657 2023-01-22 15:39:00.574643: step: 722/459, loss: 0.08051011711359024 2023-01-22 15:39:01.158657: step: 724/459, loss: 0.18452997505664825 2023-01-22 15:39:01.809925: step: 726/459, loss: 0.25369101762771606 2023-01-22 15:39:02.515251: step: 728/459, loss: 0.10096472501754761 2023-01-22 15:39:03.166964: step: 730/459, loss: 0.3824891746044159 2023-01-22 15:39:03.781484: step: 732/459, loss: 0.49917393922805786 2023-01-22 15:39:04.436277: step: 734/459, loss: 0.29444387555122375 2023-01-22 15:39:05.084464: step: 736/459, loss: 0.21011686325073242 2023-01-22 15:39:05.661933: step: 738/459, loss: 0.13684450089931488 2023-01-22 15:39:06.282633: step: 740/459, loss: 0.4364947974681854 2023-01-22 15:39:06.992918: step: 742/459, loss: 0.5213909149169922 2023-01-22 15:39:07.582039: step: 744/459, loss: 0.25392189621925354 2023-01-22 15:39:08.155639: step: 746/459, loss: 0.4470738470554352 2023-01-22 15:39:08.747928: step: 748/459, loss: 0.1712038516998291 2023-01-22 15:39:09.371594: step: 750/459, loss: 0.3320842981338501 2023-01-22 15:39:09.998349: step: 752/459, loss: 0.26711949706077576 2023-01-22 15:39:10.578272: step: 754/459, loss: 0.20321215689182281 2023-01-22 15:39:11.249765: step: 756/459, loss: 0.9307518601417542 2023-01-22 15:39:11.850262: step: 758/459, loss: 0.17064183950424194 2023-01-22 15:39:12.451959: step: 760/459, loss: 0.16415415704250336 2023-01-22 15:39:13.063039: step: 762/459, loss: 0.22619758546352386 2023-01-22 15:39:13.637799: step: 764/459, loss: 0.2863119840621948 2023-01-22 15:39:14.296813: step: 766/459, loss: 0.10006478428840637 2023-01-22 15:39:14.977251: step: 768/459, loss: 0.08829101175069809 2023-01-22 15:39:15.637646: step: 770/459, loss: 0.13667884469032288 2023-01-22 15:39:16.263487: step: 772/459, loss: 2.164541244506836 2023-01-22 15:39:17.060764: step: 774/459, loss: 0.2711189389228821 2023-01-22 15:39:17.757686: step: 776/459, loss: 0.4047464430332184 2023-01-22 15:39:18.401420: step: 778/459, loss: 4.638520240783691 2023-01-22 15:39:19.034874: step: 780/459, loss: 0.2706935703754425 2023-01-22 15:39:19.648096: step: 782/459, loss: 0.14475788176059723 2023-01-22 15:39:20.344486: step: 784/459, loss: 0.39738813042640686 2023-01-22 15:39:21.006516: step: 786/459, loss: 0.13893568515777588 2023-01-22 15:39:21.667444: step: 788/459, loss: 0.13139045238494873 2023-01-22 15:39:22.253941: step: 790/459, loss: 0.16454365849494934 2023-01-22 15:39:22.942365: step: 792/459, loss: 1.347833514213562 2023-01-22 15:39:23.596519: step: 794/459, loss: 0.17499710619449615 2023-01-22 15:39:24.203869: step: 796/459, loss: 0.38105157017707825 2023-01-22 15:39:24.878942: step: 798/459, loss: 0.15227513015270233 2023-01-22 15:39:25.446732: step: 800/459, loss: 0.28471437096595764 2023-01-22 15:39:26.121116: step: 802/459, loss: 0.2209750860929489 2023-01-22 15:39:26.736825: step: 804/459, loss: 0.7337496280670166 2023-01-22 15:39:27.336565: step: 806/459, loss: 0.24932976067066193 2023-01-22 15:39:27.998209: step: 808/459, loss: 0.43047162890434265 2023-01-22 15:39:28.539307: step: 810/459, loss: 0.34606149792671204 2023-01-22 15:39:29.171832: step: 812/459, loss: 0.36937662959098816 2023-01-22 15:39:29.723272: step: 814/459, loss: 0.18080925941467285 2023-01-22 15:39:30.289244: step: 816/459, loss: 0.6112193465232849 2023-01-22 15:39:30.930126: step: 818/459, loss: 0.7706747055053711 2023-01-22 15:39:31.518933: step: 820/459, loss: 0.26852646470069885 2023-01-22 15:39:32.247938: step: 822/459, loss: 0.16055721044540405 2023-01-22 15:39:32.887631: step: 824/459, loss: 0.36304378509521484 2023-01-22 15:39:33.534962: step: 826/459, loss: 0.14541125297546387 2023-01-22 15:39:34.159173: step: 828/459, loss: 0.31827524304389954 2023-01-22 15:39:34.767221: step: 830/459, loss: 0.11696010828018188 2023-01-22 15:39:35.416198: step: 832/459, loss: 0.22134330868721008 2023-01-22 15:39:36.031227: step: 834/459, loss: 0.5800367593765259 2023-01-22 15:39:36.584636: step: 836/459, loss: 0.8357799053192139 2023-01-22 15:39:37.227880: step: 838/459, loss: 0.1527685672044754 2023-01-22 15:39:37.939616: step: 840/459, loss: 0.2896485924720764 2023-01-22 15:39:38.605382: step: 842/459, loss: 0.18816415965557098 2023-01-22 15:39:39.237786: step: 844/459, loss: 0.5510760545730591 2023-01-22 15:39:39.925294: step: 846/459, loss: 0.35877925157546997 2023-01-22 15:39:40.608898: step: 848/459, loss: 0.3067723512649536 2023-01-22 15:39:41.218780: step: 850/459, loss: 0.2490990161895752 2023-01-22 15:39:41.879656: step: 852/459, loss: 0.22330492734909058 2023-01-22 15:39:42.441574: step: 854/459, loss: 0.6082636713981628 2023-01-22 15:39:43.061301: step: 856/459, loss: 0.38931572437286377 2023-01-22 15:39:43.726294: step: 858/459, loss: 0.29516786336898804 2023-01-22 15:39:44.406928: step: 860/459, loss: 0.3143901824951172 2023-01-22 15:39:45.044741: step: 862/459, loss: 0.4596213698387146 2023-01-22 15:39:45.782265: step: 864/459, loss: 1.784907579421997 2023-01-22 15:39:46.392371: step: 866/459, loss: 0.12098558247089386 2023-01-22 15:39:47.087879: step: 868/459, loss: 0.2939828932285309 2023-01-22 15:39:47.676751: step: 870/459, loss: 0.10397935658693314 2023-01-22 15:39:48.285895: step: 872/459, loss: 0.13100995123386383 2023-01-22 15:39:49.034239: step: 874/459, loss: 0.27525967359542847 2023-01-22 15:39:49.748124: step: 876/459, loss: 0.4288443326950073 2023-01-22 15:39:50.339156: step: 878/459, loss: 0.29519525170326233 2023-01-22 15:39:50.962065: step: 880/459, loss: 1.2497903108596802 2023-01-22 15:39:51.580558: step: 882/459, loss: 0.3904200494289398 2023-01-22 15:39:52.232230: step: 884/459, loss: 0.5412472486495972 2023-01-22 15:39:52.902442: step: 886/459, loss: 0.2416374683380127 2023-01-22 15:39:53.511730: step: 888/459, loss: 0.6026015877723694 2023-01-22 15:39:54.114774: step: 890/459, loss: 0.37533146142959595 2023-01-22 15:39:54.720084: step: 892/459, loss: 0.33202147483825684 2023-01-22 15:39:55.400377: step: 894/459, loss: 0.16925440728664398 2023-01-22 15:39:55.983662: step: 896/459, loss: 0.23180295526981354 2023-01-22 15:39:56.560508: step: 898/459, loss: 0.36559879779815674 2023-01-22 15:39:57.188246: step: 900/459, loss: 0.17105074226856232 2023-01-22 15:39:57.836070: step: 902/459, loss: 0.2343926876783371 2023-01-22 15:39:58.456749: step: 904/459, loss: 1.0488343238830566 2023-01-22 15:39:59.132820: step: 906/459, loss: 0.6590080857276917 2023-01-22 15:39:59.724180: step: 908/459, loss: 0.8902783393859863 2023-01-22 15:40:00.384990: step: 910/459, loss: 0.36069390177726746 2023-01-22 15:40:01.025652: step: 912/459, loss: 0.23030008375644684 2023-01-22 15:40:01.690845: step: 914/459, loss: 0.07013504207134247 2023-01-22 15:40:02.426304: step: 916/459, loss: 0.34023216366767883 2023-01-22 15:40:03.075059: step: 918/459, loss: 1.4804028272628784 2023-01-22 15:40:03.502545: step: 920/459, loss: 0.01815219409763813 ================================================== Loss: 0.393 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2712276263251813, 'r': 0.34173651590117726, 'f1': 0.3024267739377337}, 'combined': 0.22284078079622482, 'epoch': 9} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3006570227153809, 'r': 0.31541654928504503, 'f1': 0.3078599859925018}, 'combined': 0.19703039103520112, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2713949227078891, 'r': 0.3450371882624017, 'f1': 0.30381720670724427}, 'combined': 0.22386531020533787, 'epoch': 9} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.29464516947422764, 'r': 0.3099131464378922, 'f1': 0.3020863633865143}, 'combined': 0.19333527256736913, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28851710222821064, 'r': 0.33833694340993203, 'f1': 0.3114472824052999}, 'combined': 0.22948747124601043, 'epoch': 9} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.31717626063008053, 'r': 0.33336757384364896, 'f1': 0.32507042533909586}, 'combined': 0.23306936156388006, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2222222222222222, 'r': 0.38095238095238093, 'f1': 0.2807017543859649}, 'combined': 0.1871345029239766, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.43478260869565216, 'f1': 0.3174603174603175}, 'combined': 0.15873015873015875, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2727272727272727, 'r': 0.20689655172413793, 'f1': 0.23529411764705882}, 'combined': 0.1568627450980392, 'epoch': 9} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27463650173611115, 'r': 0.29960345643939396, 'f1': 0.2865772192028986}, 'combined': 0.21116216151792525, 'epoch': 4} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3275304334361018, 'r': 0.25382122518550904, 'f1': 0.28600305741761856}, 'combined': 0.18304195674727586, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2387005649717514, 'r': 0.40238095238095234, 'f1': 0.29964539007092195}, 'combined': 0.1997635933806146, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2713949227078891, 'r': 0.3450371882624017, 'f1': 0.30381720670724427}, 'combined': 0.22386531020533787, 'epoch': 9} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.29464516947422764, 'r': 0.3099131464378922, 'f1': 0.3020863633865143}, 'combined': 0.19333527256736913, 'epoch': 9} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.43478260869565216, 'f1': 0.3174603174603175}, 'combined': 0.15873015873015875, 'epoch': 9} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28851710222821064, 'r': 0.33833694340993203, 'f1': 0.3114472824052999}, 'combined': 0.22948747124601043, 'epoch': 9} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.31717626063008053, 'r': 0.33336757384364896, 'f1': 0.32507042533909586}, 'combined': 0.23306936156388006, 'epoch': 9} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2727272727272727, 'r': 0.20689655172413793, 'f1': 0.23529411764705882}, 'combined': 0.1568627450980392, 'epoch': 9} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 15:43:02.579019: step: 2/459, loss: 0.22610728442668915 2023-01-22 15:43:03.224546: step: 4/459, loss: 0.2786632776260376 2023-01-22 15:43:03.886283: step: 6/459, loss: 0.13850268721580505 2023-01-22 15:43:04.509744: step: 8/459, loss: 0.16258689761161804 2023-01-22 15:43:05.204162: step: 10/459, loss: 0.1342201828956604 2023-01-22 15:43:05.811208: step: 12/459, loss: 0.24931097030639648 2023-01-22 15:43:06.510564: step: 14/459, loss: 0.5109156966209412 2023-01-22 15:43:07.076642: step: 16/459, loss: 0.12458965927362442 2023-01-22 15:43:07.726477: step: 18/459, loss: 0.27917394042015076 2023-01-22 15:43:08.377929: step: 20/459, loss: 0.15312691032886505 2023-01-22 15:43:08.964988: step: 22/459, loss: 0.5438960194587708 2023-01-22 15:43:09.517030: step: 24/459, loss: 0.0899239182472229 2023-01-22 15:43:10.201171: step: 26/459, loss: 0.4274297058582306 2023-01-22 15:43:10.828945: step: 28/459, loss: 0.2798287868499756 2023-01-22 15:43:11.451897: step: 30/459, loss: 0.13982917368412018 2023-01-22 15:43:12.029786: step: 32/459, loss: 0.28892606496810913 2023-01-22 15:43:12.679720: step: 34/459, loss: 0.11867410689592361 2023-01-22 15:43:13.358276: step: 36/459, loss: 0.3600650429725647 2023-01-22 15:43:14.001427: step: 38/459, loss: 0.6920903325080872 2023-01-22 15:43:14.694354: step: 40/459, loss: 0.17261913418769836 2023-01-22 15:43:15.330797: step: 42/459, loss: 0.22485020756721497 2023-01-22 15:43:15.957623: step: 44/459, loss: 0.2746923863887787 2023-01-22 15:43:16.513227: step: 46/459, loss: 0.240845188498497 2023-01-22 15:43:17.104034: step: 48/459, loss: 0.36299222707748413 2023-01-22 15:43:17.873838: step: 50/459, loss: 0.11169835180044174 2023-01-22 15:43:18.458103: step: 52/459, loss: 0.25187236070632935 2023-01-22 15:43:19.043558: step: 54/459, loss: 0.11081898957490921 2023-01-22 15:43:19.742192: step: 56/459, loss: 0.09167373925447464 2023-01-22 15:43:20.327019: step: 58/459, loss: 0.10812011361122131 2023-01-22 15:43:21.045356: step: 60/459, loss: 0.4828488528728485 2023-01-22 15:43:21.617966: step: 62/459, loss: 0.17310725152492523 2023-01-22 15:43:22.248946: step: 64/459, loss: 0.08307833969593048 2023-01-22 15:43:22.890599: step: 66/459, loss: 0.22032605111598969 2023-01-22 15:43:23.441158: step: 68/459, loss: 0.17941907048225403 2023-01-22 15:43:24.078758: step: 70/459, loss: 0.09711477160453796 2023-01-22 15:43:24.729362: step: 72/459, loss: 0.8967486619949341 2023-01-22 15:43:25.302902: step: 74/459, loss: 0.21190422773361206 2023-01-22 15:43:25.977743: step: 76/459, loss: 0.1329064518213272 2023-01-22 15:43:26.627419: step: 78/459, loss: 0.1633581668138504 2023-01-22 15:43:27.244543: step: 80/459, loss: 0.8763375282287598 2023-01-22 15:43:27.904685: step: 82/459, loss: 0.1736348569393158 2023-01-22 15:43:28.513848: step: 84/459, loss: 0.454317569732666 2023-01-22 15:43:29.109025: step: 86/459, loss: 0.31563708186149597 2023-01-22 15:43:29.824722: step: 88/459, loss: 0.1231343150138855 2023-01-22 15:43:30.409026: step: 90/459, loss: 0.260676771402359 2023-01-22 15:43:31.106681: step: 92/459, loss: 0.23280823230743408 2023-01-22 15:43:31.730824: step: 94/459, loss: 0.5272774696350098 2023-01-22 15:43:32.431616: step: 96/459, loss: 0.17118847370147705 2023-01-22 15:43:33.018496: step: 98/459, loss: 1.0567642450332642 2023-01-22 15:43:33.621295: step: 100/459, loss: 0.2124844789505005 2023-01-22 15:43:34.270081: step: 102/459, loss: 0.11850497126579285 2023-01-22 15:43:34.878282: step: 104/459, loss: 0.2916070818901062 2023-01-22 15:43:35.539859: step: 106/459, loss: 1.1517196893692017 2023-01-22 15:43:36.143208: step: 108/459, loss: 0.3815349340438843 2023-01-22 15:43:36.798302: step: 110/459, loss: 0.070505291223526 2023-01-22 15:43:37.501434: step: 112/459, loss: 0.17048414051532745 2023-01-22 15:43:38.152426: step: 114/459, loss: 0.4358203411102295 2023-01-22 15:43:38.796349: step: 116/459, loss: 0.12316520512104034 2023-01-22 15:43:39.417827: step: 118/459, loss: 0.2275605946779251 2023-01-22 15:43:40.031034: step: 120/459, loss: 0.3358086943626404 2023-01-22 15:43:40.695886: step: 122/459, loss: 0.1541461944580078 2023-01-22 15:43:41.318802: step: 124/459, loss: 0.24699753522872925 2023-01-22 15:43:41.978375: step: 126/459, loss: 0.3846650719642639 2023-01-22 15:43:42.556547: step: 128/459, loss: 0.18897800147533417 2023-01-22 15:43:43.125665: step: 130/459, loss: 0.4361589252948761 2023-01-22 15:43:43.755133: step: 132/459, loss: 0.46338921785354614 2023-01-22 15:43:44.356907: step: 134/459, loss: 0.28339147567749023 2023-01-22 15:43:44.984302: step: 136/459, loss: 0.15125402808189392 2023-01-22 15:43:45.591727: step: 138/459, loss: 0.3562297821044922 2023-01-22 15:43:46.316670: step: 140/459, loss: 0.8347601890563965 2023-01-22 15:43:46.955134: step: 142/459, loss: 0.3819274306297302 2023-01-22 15:43:47.536721: step: 144/459, loss: 1.9102306365966797 2023-01-22 15:43:48.169613: step: 146/459, loss: 0.14160224795341492 2023-01-22 15:43:48.751570: step: 148/459, loss: 0.34921392798423767 2023-01-22 15:43:49.375179: step: 150/459, loss: 0.26969531178474426 2023-01-22 15:43:49.999192: step: 152/459, loss: 0.2694658041000366 2023-01-22 15:43:50.649763: step: 154/459, loss: 0.0656208023428917 2023-01-22 15:43:51.235949: step: 156/459, loss: 0.3540137708187103 2023-01-22 15:43:51.818900: step: 158/459, loss: 0.1312653124332428 2023-01-22 15:43:52.459614: step: 160/459, loss: 0.49677711725234985 2023-01-22 15:43:53.042447: step: 162/459, loss: 0.25384196639060974 2023-01-22 15:43:53.668370: step: 164/459, loss: 0.2017701417207718 2023-01-22 15:43:54.348686: step: 166/459, loss: 1.2773247957229614 2023-01-22 15:43:55.005055: step: 168/459, loss: 0.1615687608718872 2023-01-22 15:43:55.644845: step: 170/459, loss: 0.17394331097602844 2023-01-22 15:43:56.231776: step: 172/459, loss: 0.22628384828567505 2023-01-22 15:43:56.910066: step: 174/459, loss: 0.1286797821521759 2023-01-22 15:43:57.517315: step: 176/459, loss: 0.3943638205528259 2023-01-22 15:43:58.122515: step: 178/459, loss: 0.11279895156621933 2023-01-22 15:43:58.765135: step: 180/459, loss: 0.27588412165641785 2023-01-22 15:43:59.303258: step: 182/459, loss: 0.09507818520069122 2023-01-22 15:43:59.900686: step: 184/459, loss: 1.2674676179885864 2023-01-22 15:44:00.569477: step: 186/459, loss: 0.15214769542217255 2023-01-22 15:44:01.171010: step: 188/459, loss: 0.17298823595046997 2023-01-22 15:44:01.848571: step: 190/459, loss: 0.19419118762016296 2023-01-22 15:44:02.450304: step: 192/459, loss: 0.6128352284431458 2023-01-22 15:44:03.035745: step: 194/459, loss: 0.3582507073879242 2023-01-22 15:44:03.689486: step: 196/459, loss: 0.25150272250175476 2023-01-22 15:44:04.301319: step: 198/459, loss: 0.020493997260928154 2023-01-22 15:44:04.910322: step: 200/459, loss: 0.19616995751857758 2023-01-22 15:44:05.582135: step: 202/459, loss: 0.5339926481246948 2023-01-22 15:44:06.229662: step: 204/459, loss: 0.18221579492092133 2023-01-22 15:44:06.856951: step: 206/459, loss: 0.20781877636909485 2023-01-22 15:44:07.433693: step: 208/459, loss: 0.13805760443210602 2023-01-22 15:44:08.041417: step: 210/459, loss: 0.09955912828445435 2023-01-22 15:44:08.692529: step: 212/459, loss: 0.2832687795162201 2023-01-22 15:44:09.372111: step: 214/459, loss: 0.8389816284179688 2023-01-22 15:44:09.987279: step: 216/459, loss: 0.8751591444015503 2023-01-22 15:44:10.705810: step: 218/459, loss: 1.5634924173355103 2023-01-22 15:44:11.320120: step: 220/459, loss: 0.05253215506672859 2023-01-22 15:44:12.016082: step: 222/459, loss: 0.4941399097442627 2023-01-22 15:44:12.605556: step: 224/459, loss: 0.17387481033802032 2023-01-22 15:44:13.173840: step: 226/459, loss: 0.23647494614124298 2023-01-22 15:44:13.837596: step: 228/459, loss: 0.13556183874607086 2023-01-22 15:44:14.464828: step: 230/459, loss: 0.07381556183099747 2023-01-22 15:44:15.110200: step: 232/459, loss: 0.1858452558517456 2023-01-22 15:44:15.777059: step: 234/459, loss: 0.27319031953811646 2023-01-22 15:44:16.439558: step: 236/459, loss: 0.10436579585075378 2023-01-22 15:44:17.116998: step: 238/459, loss: 0.22780919075012207 2023-01-22 15:44:17.763568: step: 240/459, loss: 0.17626972496509552 2023-01-22 15:44:18.398192: step: 242/459, loss: 1.041796326637268 2023-01-22 15:44:18.995335: step: 244/459, loss: 0.099839948117733 2023-01-22 15:44:19.594852: step: 246/459, loss: 0.43400734663009644 2023-01-22 15:44:20.226830: step: 248/459, loss: 0.18507032096385956 2023-01-22 15:44:20.846109: step: 250/459, loss: 0.36627262830734253 2023-01-22 15:44:21.461846: step: 252/459, loss: 0.32096239924430847 2023-01-22 15:44:22.158622: step: 254/459, loss: 0.7961674928665161 2023-01-22 15:44:22.795299: step: 256/459, loss: 0.15525484085083008 2023-01-22 15:44:23.416550: step: 258/459, loss: 0.2327055037021637 2023-01-22 15:44:23.981334: step: 260/459, loss: 0.10547396540641785 2023-01-22 15:44:24.590659: step: 262/459, loss: 0.14513374865055084 2023-01-22 15:44:25.216086: step: 264/459, loss: 0.09799749404191971 2023-01-22 15:44:25.911742: step: 266/459, loss: 0.20791542530059814 2023-01-22 15:44:26.532136: step: 268/459, loss: 0.21947310864925385 2023-01-22 15:44:27.122186: step: 270/459, loss: 0.05811810865998268 2023-01-22 15:44:27.715187: step: 272/459, loss: 0.07091820240020752 2023-01-22 15:44:28.326343: step: 274/459, loss: 0.08888401836156845 2023-01-22 15:44:29.022027: step: 276/459, loss: 0.6680675745010376 2023-01-22 15:44:29.661304: step: 278/459, loss: 0.22441859543323517 2023-01-22 15:44:30.260550: step: 280/459, loss: 0.20498254895210266 2023-01-22 15:44:30.865966: step: 282/459, loss: 0.09171580523252487 2023-01-22 15:44:31.476544: step: 284/459, loss: 0.5623090863227844 2023-01-22 15:44:32.079323: step: 286/459, loss: 0.9682990312576294 2023-01-22 15:44:32.715504: step: 288/459, loss: 0.301348477602005 2023-01-22 15:44:33.303435: step: 290/459, loss: 0.7686224579811096 2023-01-22 15:44:33.942102: step: 292/459, loss: 0.21794144809246063 2023-01-22 15:44:34.518079: step: 294/459, loss: 0.07105270773172379 2023-01-22 15:44:35.112441: step: 296/459, loss: 0.3893146216869354 2023-01-22 15:44:35.771580: step: 298/459, loss: 0.26330360770225525 2023-01-22 15:44:36.438145: step: 300/459, loss: 0.30579903721809387 2023-01-22 15:44:37.048631: step: 302/459, loss: 0.21083194017410278 2023-01-22 15:44:37.694919: step: 304/459, loss: 0.15920290350914001 2023-01-22 15:44:38.296771: step: 306/459, loss: 0.24914516508579254 2023-01-22 15:44:38.924793: step: 308/459, loss: 0.30458182096481323 2023-01-22 15:44:39.502590: step: 310/459, loss: 0.30000361800193787 2023-01-22 15:44:40.164173: step: 312/459, loss: 0.11337404698133469 2023-01-22 15:44:40.841775: step: 314/459, loss: 0.2891363203525543 2023-01-22 15:44:41.502446: step: 316/459, loss: 0.6706503033638 2023-01-22 15:44:42.171876: step: 318/459, loss: 0.806674599647522 2023-01-22 15:44:42.789278: step: 320/459, loss: 0.18026188015937805 2023-01-22 15:44:43.440149: step: 322/459, loss: 0.05113643407821655 2023-01-22 15:44:44.046418: step: 324/459, loss: 0.10776668041944504 2023-01-22 15:44:44.663850: step: 326/459, loss: 0.6870267987251282 2023-01-22 15:44:45.335153: step: 328/459, loss: 0.3527964651584625 2023-01-22 15:44:45.958948: step: 330/459, loss: 0.4324835538864136 2023-01-22 15:44:46.655374: step: 332/459, loss: 0.13043977320194244 2023-01-22 15:44:47.248414: step: 334/459, loss: 0.24314358830451965 2023-01-22 15:44:47.880051: step: 336/459, loss: 0.0705709233880043 2023-01-22 15:44:48.481108: step: 338/459, loss: 0.37765610218048096 2023-01-22 15:44:49.141885: step: 340/459, loss: 0.20556800067424774 2023-01-22 15:44:49.727722: step: 342/459, loss: 0.08478908985853195 2023-01-22 15:44:50.325887: step: 344/459, loss: 0.14171691238880157 2023-01-22 15:44:50.911605: step: 346/459, loss: 0.06233612447977066 2023-01-22 15:44:51.489630: step: 348/459, loss: 0.24337537586688995 2023-01-22 15:44:52.111919: step: 350/459, loss: 1.1024447679519653 2023-01-22 15:44:52.716213: step: 352/459, loss: 0.49163326621055603 2023-01-22 15:44:53.335070: step: 354/459, loss: 0.14195196330547333 2023-01-22 15:44:53.933828: step: 356/459, loss: 0.048860616981983185 2023-01-22 15:44:54.513824: step: 358/459, loss: 0.13728761672973633 2023-01-22 15:44:55.096500: step: 360/459, loss: 0.12688279151916504 2023-01-22 15:44:55.760496: step: 362/459, loss: 0.3149726986885071 2023-01-22 15:44:56.335114: step: 364/459, loss: 0.1953650265932083 2023-01-22 15:44:56.938491: step: 366/459, loss: 0.22961583733558655 2023-01-22 15:44:57.604865: step: 368/459, loss: 1.005109190940857 2023-01-22 15:44:58.322736: step: 370/459, loss: 0.20070284605026245 2023-01-22 15:44:58.846284: step: 372/459, loss: 0.03243998438119888 2023-01-22 15:44:59.431148: step: 374/459, loss: 0.14881271123886108 2023-01-22 15:45:00.020759: step: 376/459, loss: 0.5432679057121277 2023-01-22 15:45:00.644108: step: 378/459, loss: 0.25187671184539795 2023-01-22 15:45:01.266948: step: 380/459, loss: 0.12250269204378128 2023-01-22 15:45:01.872131: step: 382/459, loss: 0.2032904326915741 2023-01-22 15:45:02.558341: step: 384/459, loss: 0.22713610529899597 2023-01-22 15:45:03.226992: step: 386/459, loss: 0.23462994396686554 2023-01-22 15:45:03.840350: step: 388/459, loss: 0.08615240454673767 2023-01-22 15:45:04.519326: step: 390/459, loss: 0.2759768068790436 2023-01-22 15:45:05.217406: step: 392/459, loss: 0.12969771027565002 2023-01-22 15:45:05.903779: step: 394/459, loss: 0.08052876591682434 2023-01-22 15:45:06.553337: step: 396/459, loss: 0.28367406129837036 2023-01-22 15:45:07.159216: step: 398/459, loss: 0.1409715861082077 2023-01-22 15:45:07.835680: step: 400/459, loss: 0.22114722430706024 2023-01-22 15:45:08.433603: step: 402/459, loss: 0.25323525071144104 2023-01-22 15:45:09.064349: step: 404/459, loss: 0.1215936541557312 2023-01-22 15:45:09.727263: step: 406/459, loss: 0.4173739552497864 2023-01-22 15:45:10.339933: step: 408/459, loss: 0.12306585162878036 2023-01-22 15:45:10.956184: step: 410/459, loss: 0.22686484456062317 2023-01-22 15:45:11.547382: step: 412/459, loss: 0.8606250882148743 2023-01-22 15:45:12.154964: step: 414/459, loss: 0.13290874660015106 2023-01-22 15:45:12.758988: step: 416/459, loss: 0.2189546376466751 2023-01-22 15:45:13.417048: step: 418/459, loss: 0.9155065417289734 2023-01-22 15:45:14.017362: step: 420/459, loss: 0.10336514562368393 2023-01-22 15:45:14.593601: step: 422/459, loss: 0.42341870069503784 2023-01-22 15:45:15.208856: step: 424/459, loss: 0.44907307624816895 2023-01-22 15:45:15.820961: step: 426/459, loss: 0.4566139578819275 2023-01-22 15:45:16.434615: step: 428/459, loss: 0.16930824518203735 2023-01-22 15:45:17.027996: step: 430/459, loss: 1.2982723712921143 2023-01-22 15:45:17.626915: step: 432/459, loss: 0.6945377588272095 2023-01-22 15:45:18.185459: step: 434/459, loss: 0.11887839436531067 2023-01-22 15:45:18.798915: step: 436/459, loss: 0.21927504241466522 2023-01-22 15:45:19.395772: step: 438/459, loss: 0.5050202012062073 2023-01-22 15:45:20.001440: step: 440/459, loss: 0.28709137439727783 2023-01-22 15:45:20.599846: step: 442/459, loss: 0.13084959983825684 2023-01-22 15:45:21.243586: step: 444/459, loss: 0.08070079237222672 2023-01-22 15:45:21.832028: step: 446/459, loss: 0.23553963005542755 2023-01-22 15:45:22.466647: step: 448/459, loss: 0.4116637408733368 2023-01-22 15:45:23.089401: step: 450/459, loss: 0.12199553102254868 2023-01-22 15:45:23.737240: step: 452/459, loss: 0.11205653101205826 2023-01-22 15:45:24.372501: step: 454/459, loss: 0.07517842948436737 2023-01-22 15:45:25.001850: step: 456/459, loss: 0.05059798061847687 2023-01-22 15:45:25.538085: step: 458/459, loss: 0.1569020301103592 2023-01-22 15:45:26.216717: step: 460/459, loss: 0.1272823065519333 2023-01-22 15:45:26.847485: step: 462/459, loss: 0.3246210515499115 2023-01-22 15:45:27.481064: step: 464/459, loss: 0.17874707281589508 2023-01-22 15:45:28.148332: step: 466/459, loss: 0.09617853164672852 2023-01-22 15:45:28.763351: step: 468/459, loss: 0.15156129002571106 2023-01-22 15:45:29.380623: step: 470/459, loss: 0.6470770835876465 2023-01-22 15:45:30.028389: step: 472/459, loss: 0.38542211055755615 2023-01-22 15:45:30.704177: step: 474/459, loss: 0.05527980253100395 2023-01-22 15:45:31.345370: step: 476/459, loss: 0.13445088267326355 2023-01-22 15:45:31.967333: step: 478/459, loss: 0.13957779109477997 2023-01-22 15:45:32.589996: step: 480/459, loss: 0.18070527911186218 2023-01-22 15:45:33.199617: step: 482/459, loss: 0.6736512184143066 2023-01-22 15:45:33.956505: step: 484/459, loss: 0.4167126715183258 2023-01-22 15:45:34.573266: step: 486/459, loss: 0.09351364523172379 2023-01-22 15:45:35.200566: step: 488/459, loss: 0.24610280990600586 2023-01-22 15:45:35.802424: step: 490/459, loss: 0.1371878832578659 2023-01-22 15:45:36.414887: step: 492/459, loss: 0.6399286389350891 2023-01-22 15:45:37.081469: step: 494/459, loss: 0.15108665823936462 2023-01-22 15:45:37.718046: step: 496/459, loss: 0.09447549283504486 2023-01-22 15:45:38.321101: step: 498/459, loss: 0.11479125916957855 2023-01-22 15:45:39.030169: step: 500/459, loss: 0.4317035973072052 2023-01-22 15:45:39.661028: step: 502/459, loss: 0.32051077485084534 2023-01-22 15:45:40.327996: step: 504/459, loss: 0.21787351369857788 2023-01-22 15:45:40.966601: step: 506/459, loss: 0.18699343502521515 2023-01-22 15:45:41.568159: step: 508/459, loss: 0.280455082654953 2023-01-22 15:45:42.208377: step: 510/459, loss: 0.12450306117534637 2023-01-22 15:45:42.781370: step: 512/459, loss: 0.2533493638038635 2023-01-22 15:45:43.498837: step: 514/459, loss: 0.2891717851161957 2023-01-22 15:45:44.080536: step: 516/459, loss: 0.3478267788887024 2023-01-22 15:45:44.697660: step: 518/459, loss: 0.10450218617916107 2023-01-22 15:45:45.291857: step: 520/459, loss: 0.11506922543048859 2023-01-22 15:45:45.887192: step: 522/459, loss: 0.2251458615064621 2023-01-22 15:45:46.486141: step: 524/459, loss: 0.1885782927274704 2023-01-22 15:45:47.108203: step: 526/459, loss: 0.2875674366950989 2023-01-22 15:45:47.756401: step: 528/459, loss: 0.21343481540679932 2023-01-22 15:45:48.432517: step: 530/459, loss: 0.4006529152393341 2023-01-22 15:45:49.010112: step: 532/459, loss: 0.19058696925640106 2023-01-22 15:45:49.691465: step: 534/459, loss: 0.5904630422592163 2023-01-22 15:45:50.212686: step: 536/459, loss: 0.20291927456855774 2023-01-22 15:45:50.806225: step: 538/459, loss: 0.5890711545944214 2023-01-22 15:45:51.385096: step: 540/459, loss: 0.8341128826141357 2023-01-22 15:45:52.003410: step: 542/459, loss: 1.6909565925598145 2023-01-22 15:45:52.648192: step: 544/459, loss: 0.5401917695999146 2023-01-22 15:45:53.318761: step: 546/459, loss: 0.4209423065185547 2023-01-22 15:45:53.924847: step: 548/459, loss: 0.13605163991451263 2023-01-22 15:45:54.525751: step: 550/459, loss: 0.4447360634803772 2023-01-22 15:45:55.135396: step: 552/459, loss: 0.11016832292079926 2023-01-22 15:45:55.750562: step: 554/459, loss: 0.3080994486808777 2023-01-22 15:45:56.348529: step: 556/459, loss: 0.24533674120903015 2023-01-22 15:45:56.984593: step: 558/459, loss: 0.1837795376777649 2023-01-22 15:45:57.653505: step: 560/459, loss: 0.24746741354465485 2023-01-22 15:45:58.232048: step: 562/459, loss: 0.24878458678722382 2023-01-22 15:45:58.896628: step: 564/459, loss: 0.8954951763153076 2023-01-22 15:45:59.456471: step: 566/459, loss: 0.9496004581451416 2023-01-22 15:46:00.135141: step: 568/459, loss: 0.16735492646694183 2023-01-22 15:46:00.748061: step: 570/459, loss: 0.5917568206787109 2023-01-22 15:46:01.383724: step: 572/459, loss: 0.6572994589805603 2023-01-22 15:46:01.989774: step: 574/459, loss: 0.13193270564079285 2023-01-22 15:46:02.703499: step: 576/459, loss: 4.367260932922363 2023-01-22 15:46:03.341335: step: 578/459, loss: 0.09531519562005997 2023-01-22 15:46:03.926801: step: 580/459, loss: 0.37798821926116943 2023-01-22 15:46:04.525863: step: 582/459, loss: 0.08541770279407501 2023-01-22 15:46:05.138218: step: 584/459, loss: 0.46849995851516724 2023-01-22 15:46:05.757697: step: 586/459, loss: 0.29982930421829224 2023-01-22 15:46:06.377667: step: 588/459, loss: 0.0547085665166378 2023-01-22 15:46:07.018299: step: 590/459, loss: 0.3504946529865265 2023-01-22 15:46:07.615405: step: 592/459, loss: 0.28253865242004395 2023-01-22 15:46:08.302799: step: 594/459, loss: 0.15009745955467224 2023-01-22 15:46:08.901235: step: 596/459, loss: 0.5228163003921509 2023-01-22 15:46:09.503564: step: 598/459, loss: 0.08003262430429459 2023-01-22 15:46:10.179413: step: 600/459, loss: 0.4407813549041748 2023-01-22 15:46:10.810162: step: 602/459, loss: 0.30357128381729126 2023-01-22 15:46:11.421720: step: 604/459, loss: 0.48860839009284973 2023-01-22 15:46:12.009700: step: 606/459, loss: 0.13381972908973694 2023-01-22 15:46:12.571578: step: 608/459, loss: 0.15999387204647064 2023-01-22 15:46:13.215247: step: 610/459, loss: 0.6542862057685852 2023-01-22 15:46:13.781773: step: 612/459, loss: 0.18356075882911682 2023-01-22 15:46:14.375996: step: 614/459, loss: 0.2202473282814026 2023-01-22 15:46:14.959893: step: 616/459, loss: 0.3633947968482971 2023-01-22 15:46:15.622535: step: 618/459, loss: 0.37253862619400024 2023-01-22 15:46:16.252319: step: 620/459, loss: 0.1657758206129074 2023-01-22 15:46:16.867434: step: 622/459, loss: 0.21371543407440186 2023-01-22 15:46:17.483716: step: 624/459, loss: 0.580619752407074 2023-01-22 15:46:18.135791: step: 626/459, loss: 0.1960444301366806 2023-01-22 15:46:18.768857: step: 628/459, loss: 0.2259388118982315 2023-01-22 15:46:19.353155: step: 630/459, loss: 0.18437325954437256 2023-01-22 15:46:20.018048: step: 632/459, loss: 0.11376965790987015 2023-01-22 15:46:20.702612: step: 634/459, loss: 0.13448379933834076 2023-01-22 15:46:21.260839: step: 636/459, loss: 0.1725846230983734 2023-01-22 15:46:21.890853: step: 638/459, loss: 0.20584018528461456 2023-01-22 15:46:22.597404: step: 640/459, loss: 0.19834820926189423 2023-01-22 15:46:23.180640: step: 642/459, loss: 0.2885088324546814 2023-01-22 15:46:23.884414: step: 644/459, loss: 1.220697283744812 2023-01-22 15:46:24.497638: step: 646/459, loss: 0.25243470072746277 2023-01-22 15:46:25.167818: step: 648/459, loss: 0.2285233587026596 2023-01-22 15:46:25.817008: step: 650/459, loss: 0.2333863079547882 2023-01-22 15:46:26.539165: step: 652/459, loss: 0.9205377101898193 2023-01-22 15:46:27.202748: step: 654/459, loss: 0.28071364760398865 2023-01-22 15:46:27.776829: step: 656/459, loss: 0.36163151264190674 2023-01-22 15:46:28.428171: step: 658/459, loss: 0.26893407106399536 2023-01-22 15:46:29.066990: step: 660/459, loss: 0.1716768592596054 2023-01-22 15:46:29.791594: step: 662/459, loss: 0.44605553150177 2023-01-22 15:46:30.406640: step: 664/459, loss: 0.3194606900215149 2023-01-22 15:46:31.059903: step: 666/459, loss: 0.16290341317653656 2023-01-22 15:46:31.747112: step: 668/459, loss: 0.48091229796409607 2023-01-22 15:46:32.336836: step: 670/459, loss: 0.24705524742603302 2023-01-22 15:46:32.958349: step: 672/459, loss: 0.11085258424282074 2023-01-22 15:46:33.534708: step: 674/459, loss: 0.30879199504852295 2023-01-22 15:46:34.202405: step: 676/459, loss: 0.18225333094596863 2023-01-22 15:46:34.781312: step: 678/459, loss: 0.2578544616699219 2023-01-22 15:46:35.406742: step: 680/459, loss: 0.2470221221446991 2023-01-22 15:46:35.984575: step: 682/459, loss: 0.40355977416038513 2023-01-22 15:46:36.579804: step: 684/459, loss: 0.2404222935438156 2023-01-22 15:46:37.241048: step: 686/459, loss: 1.0376453399658203 2023-01-22 15:46:37.839345: step: 688/459, loss: 0.09723003953695297 2023-01-22 15:46:38.536730: step: 690/459, loss: 0.38358867168426514 2023-01-22 15:46:39.122334: step: 692/459, loss: 0.33696287870407104 2023-01-22 15:46:39.755078: step: 694/459, loss: 0.24087536334991455 2023-01-22 15:46:40.355123: step: 696/459, loss: 0.038458820432424545 2023-01-22 15:46:41.041169: step: 698/459, loss: 0.3762061893939972 2023-01-22 15:46:41.732750: step: 700/459, loss: 2.308149814605713 2023-01-22 15:46:42.386652: step: 702/459, loss: 0.25773975253105164 2023-01-22 15:46:43.148004: step: 704/459, loss: 0.26947328448295593 2023-01-22 15:46:43.862124: step: 706/459, loss: 0.2284969985485077 2023-01-22 15:46:44.497676: step: 708/459, loss: 0.12621144950389862 2023-01-22 15:46:45.084764: step: 710/459, loss: 0.10571365058422089 2023-01-22 15:46:45.811183: step: 712/459, loss: 0.047105174511671066 2023-01-22 15:46:46.424851: step: 714/459, loss: 0.28328603506088257 2023-01-22 15:46:47.026133: step: 716/459, loss: 1.1578017473220825 2023-01-22 15:46:47.663344: step: 718/459, loss: 0.1891985833644867 2023-01-22 15:46:48.309725: step: 720/459, loss: 0.19495035707950592 2023-01-22 15:46:48.887195: step: 722/459, loss: 0.30241045355796814 2023-01-22 15:46:49.542688: step: 724/459, loss: 0.13838033378124237 2023-01-22 15:46:50.141160: step: 726/459, loss: 0.16477550566196442 2023-01-22 15:46:50.770726: step: 728/459, loss: 0.5142703652381897 2023-01-22 15:46:51.379005: step: 730/459, loss: 0.3539266586303711 2023-01-22 15:46:52.008395: step: 732/459, loss: 1.2098736763000488 2023-01-22 15:46:52.663271: step: 734/459, loss: 0.12248755246400833 2023-01-22 15:46:53.290126: step: 736/459, loss: 0.20339274406433105 2023-01-22 15:46:53.958249: step: 738/459, loss: 5.308073043823242 2023-01-22 15:46:54.599732: step: 740/459, loss: 0.778378963470459 2023-01-22 15:46:55.194892: step: 742/459, loss: 0.07592218369245529 2023-01-22 15:46:55.775582: step: 744/459, loss: 0.1824823021888733 2023-01-22 15:46:56.416178: step: 746/459, loss: 0.23120412230491638 2023-01-22 15:46:57.041606: step: 748/459, loss: 0.07668328285217285 2023-01-22 15:46:57.656277: step: 750/459, loss: 0.6681633591651917 2023-01-22 15:46:58.307846: step: 752/459, loss: 0.0919201523065567 2023-01-22 15:46:58.920008: step: 754/459, loss: 0.2720078229904175 2023-01-22 15:46:59.504566: step: 756/459, loss: 0.12350912392139435 2023-01-22 15:47:00.108952: step: 758/459, loss: 0.05517967417836189 2023-01-22 15:47:00.680009: step: 760/459, loss: 0.14157189428806305 2023-01-22 15:47:01.287559: step: 762/459, loss: 0.22299815714359283 2023-01-22 15:47:01.909217: step: 764/459, loss: 0.2534199357032776 2023-01-22 15:47:02.532410: step: 766/459, loss: 0.1251223236322403 2023-01-22 15:47:03.154045: step: 768/459, loss: 0.10407588630914688 2023-01-22 15:47:03.710049: step: 770/459, loss: 0.08893416076898575 2023-01-22 15:47:04.423555: step: 772/459, loss: 0.681821882724762 2023-01-22 15:47:05.138404: step: 774/459, loss: 1.1546670198440552 2023-01-22 15:47:05.742869: step: 776/459, loss: 0.2135608196258545 2023-01-22 15:47:06.302101: step: 778/459, loss: 0.4979424774646759 2023-01-22 15:47:07.037509: step: 780/459, loss: 0.6563610434532166 2023-01-22 15:47:07.602287: step: 782/459, loss: 0.11286108195781708 2023-01-22 15:47:08.242331: step: 784/459, loss: 0.11686240881681442 2023-01-22 15:47:08.865689: step: 786/459, loss: 0.14184249937534332 2023-01-22 15:47:09.497497: step: 788/459, loss: 0.3576371967792511 2023-01-22 15:47:10.129336: step: 790/459, loss: 4.707576274871826 2023-01-22 15:47:10.698872: step: 792/459, loss: 0.06455505639314651 2023-01-22 15:47:11.292955: step: 794/459, loss: 8.209043502807617 2023-01-22 15:47:11.919335: step: 796/459, loss: 0.3882555663585663 2023-01-22 15:47:12.526321: step: 798/459, loss: 0.30697402358055115 2023-01-22 15:47:13.167559: step: 800/459, loss: 0.12140218913555145 2023-01-22 15:47:13.798587: step: 802/459, loss: 0.12686428427696228 2023-01-22 15:47:14.427450: step: 804/459, loss: 0.3126966655254364 2023-01-22 15:47:15.123487: step: 806/459, loss: 0.20181067287921906 2023-01-22 15:47:15.744506: step: 808/459, loss: 1.691286325454712 2023-01-22 15:47:16.387224: step: 810/459, loss: 0.0719224363565445 2023-01-22 15:47:16.968197: step: 812/459, loss: 0.3225944936275482 2023-01-22 15:47:17.554294: step: 814/459, loss: 0.08254902064800262 2023-01-22 15:47:18.159264: step: 816/459, loss: 0.173804372549057 2023-01-22 15:47:18.793160: step: 818/459, loss: 0.19434726238250732 2023-01-22 15:47:19.417313: step: 820/459, loss: 0.17766015231609344 2023-01-22 15:47:20.035842: step: 822/459, loss: 0.21363583207130432 2023-01-22 15:47:20.712916: step: 824/459, loss: 0.11351973563432693 2023-01-22 15:47:21.286564: step: 826/459, loss: 0.18164993822574615 2023-01-22 15:47:21.897941: step: 828/459, loss: 0.09377112239599228 2023-01-22 15:47:22.620674: step: 830/459, loss: 0.18036767840385437 2023-01-22 15:47:23.328832: step: 832/459, loss: 0.3164392411708832 2023-01-22 15:47:23.978469: step: 834/459, loss: 0.45626163482666016 2023-01-22 15:47:24.630799: step: 836/459, loss: 0.22015509009361267 2023-01-22 15:47:25.298896: step: 838/459, loss: 0.11924298852682114 2023-01-22 15:47:25.891865: step: 840/459, loss: 0.3571799099445343 2023-01-22 15:47:26.477306: step: 842/459, loss: 0.22553518414497375 2023-01-22 15:47:27.054570: step: 844/459, loss: 0.19122420251369476 2023-01-22 15:47:27.662548: step: 846/459, loss: 0.15380078554153442 2023-01-22 15:47:28.321652: step: 848/459, loss: 0.33141016960144043 2023-01-22 15:47:28.965388: step: 850/459, loss: 0.07633019238710403 2023-01-22 15:47:29.559199: step: 852/459, loss: 0.6620818972587585 2023-01-22 15:47:30.193534: step: 854/459, loss: 0.48599886894226074 2023-01-22 15:47:30.858205: step: 856/459, loss: 0.1363385021686554 2023-01-22 15:47:31.448279: step: 858/459, loss: 0.1515771746635437 2023-01-22 15:47:32.066844: step: 860/459, loss: 0.24568217992782593 2023-01-22 15:47:32.666053: step: 862/459, loss: 0.27133357524871826 2023-01-22 15:47:33.234895: step: 864/459, loss: 0.21303074061870575 2023-01-22 15:47:33.807528: step: 866/459, loss: 0.2429625391960144 2023-01-22 15:47:34.482329: step: 868/459, loss: 0.4253731966018677 2023-01-22 15:47:35.135175: step: 870/459, loss: 0.09201164543628693 2023-01-22 15:47:35.753528: step: 872/459, loss: 0.21582569181919098 2023-01-22 15:47:36.406154: step: 874/459, loss: 0.14386600255966187 2023-01-22 15:47:37.049101: step: 876/459, loss: 0.465329110622406 2023-01-22 15:47:37.650638: step: 878/459, loss: 1.0891187191009521 2023-01-22 15:47:38.300022: step: 880/459, loss: 1.14003586769104 2023-01-22 15:47:38.925908: step: 882/459, loss: 0.2797315716743469 2023-01-22 15:47:39.556144: step: 884/459, loss: 0.2924247980117798 2023-01-22 15:47:40.253235: step: 886/459, loss: 0.4141462445259094 2023-01-22 15:47:40.866387: step: 888/459, loss: 0.044696517288684845 2023-01-22 15:47:41.493882: step: 890/459, loss: 0.1196417286992073 2023-01-22 15:47:42.128296: step: 892/459, loss: 2.774909257888794 2023-01-22 15:47:42.711160: step: 894/459, loss: 0.10457154363393784 2023-01-22 15:47:43.378282: step: 896/459, loss: 0.29368579387664795 2023-01-22 15:47:43.973789: step: 898/459, loss: 0.7593988180160522 2023-01-22 15:47:44.592889: step: 900/459, loss: 0.5390227437019348 2023-01-22 15:47:45.173520: step: 902/459, loss: 0.5205087661743164 2023-01-22 15:47:45.804054: step: 904/459, loss: 0.1304234117269516 2023-01-22 15:47:46.416134: step: 906/459, loss: 0.17874892055988312 2023-01-22 15:47:47.049597: step: 908/459, loss: 0.16213174164295197 2023-01-22 15:47:47.658903: step: 910/459, loss: 0.07502996176481247 2023-01-22 15:47:48.294278: step: 912/459, loss: 0.12158554792404175 2023-01-22 15:47:48.889670: step: 914/459, loss: 0.35873156785964966 2023-01-22 15:47:49.433291: step: 916/459, loss: 0.1835521012544632 2023-01-22 15:47:50.074409: step: 918/459, loss: 0.7669426798820496 2023-01-22 15:47:50.529528: step: 920/459, loss: 0.048492301255464554 ================================================== Loss: 0.367 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2846651939761412, 'r': 0.3403018447912124, 'f1': 0.31000703924800166}, 'combined': 0.22842623944589593, 'epoch': 10} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3150284520070142, 'r': 0.32475685106990104, 'f1': 0.31981868786043033}, 'combined': 0.20468396023067537, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2817090409590409, 'r': 0.3474589689248133, 'f1': 0.3111484734466892}, 'combined': 0.22926729622387626, 'epoch': 10} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3054170108602103, 'r': 0.31816581530628074, 'f1': 0.31166109197113007}, 'combined': 0.19946309886152322, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2967510944449901, 'r': 0.3406725088030721, 'f1': 0.3171986080198215}, 'combined': 0.23372529011986848, 'epoch': 10} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.31940773298193875, 'r': 0.3403334443328938, 'f1': 0.3295387271558153}, 'combined': 0.23627304965888646, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.1987179487179487, 'r': 0.2952380952380952, 'f1': 0.2375478927203065}, 'combined': 0.15836526181353766, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26973684210526316, 'r': 0.44565217391304346, 'f1': 0.3360655737704918}, 'combined': 0.1680327868852459, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2727272727272727, 'r': 0.20689655172413793, 'f1': 0.23529411764705882}, 'combined': 0.1568627450980392, 'epoch': 10} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27463650173611115, 'r': 0.29960345643939396, 'f1': 0.2865772192028986}, 'combined': 0.21116216151792525, 'epoch': 4} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3275304334361018, 'r': 0.25382122518550904, 'f1': 0.28600305741761856}, 'combined': 0.18304195674727586, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2387005649717514, 'r': 0.40238095238095234, 'f1': 0.29964539007092195}, 'combined': 0.1997635933806146, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2817090409590409, 'r': 0.3474589689248133, 'f1': 0.3111484734466892}, 'combined': 0.22926729622387626, 'epoch': 10} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3054170108602103, 'r': 0.31816581530628074, 'f1': 0.31166109197113007}, 'combined': 0.19946309886152322, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26973684210526316, 'r': 0.44565217391304346, 'f1': 0.3360655737704918}, 'combined': 0.1680327868852459, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2967510944449901, 'r': 0.3406725088030721, 'f1': 0.3171986080198215}, 'combined': 0.23372529011986848, 'epoch': 10} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.31940773298193875, 'r': 0.3403334443328938, 'f1': 0.3295387271558153}, 'combined': 0.23627304965888646, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2727272727272727, 'r': 0.20689655172413793, 'f1': 0.23529411764705882}, 'combined': 0.1568627450980392, 'epoch': 10} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 15:50:42.967914: step: 2/459, loss: 0.11275211721658707 2023-01-22 15:50:43.567383: step: 4/459, loss: 0.10686974972486496 2023-01-22 15:50:44.216640: step: 6/459, loss: 0.2608473598957062 2023-01-22 15:50:44.845423: step: 8/459, loss: 0.12039929628372192 2023-01-22 15:50:45.503141: step: 10/459, loss: 0.2611970603466034 2023-01-22 15:50:46.148465: step: 12/459, loss: 0.48877161741256714 2023-01-22 15:50:46.797471: step: 14/459, loss: 0.11970476061105728 2023-01-22 15:50:47.451542: step: 16/459, loss: 3.661180257797241 2023-01-22 15:50:48.136445: step: 18/459, loss: 0.1180393397808075 2023-01-22 15:50:48.788459: step: 20/459, loss: 0.19204358756542206 2023-01-22 15:50:49.455149: step: 22/459, loss: 0.10955817252397537 2023-01-22 15:50:50.127350: step: 24/459, loss: 0.0919400304555893 2023-01-22 15:50:50.874661: step: 26/459, loss: 0.3240162432193756 2023-01-22 15:50:51.489443: step: 28/459, loss: 3.4630045890808105 2023-01-22 15:50:52.105809: step: 30/459, loss: 0.09668201208114624 2023-01-22 15:50:52.689272: step: 32/459, loss: 0.13687628507614136 2023-01-22 15:50:53.334743: step: 34/459, loss: 0.1192501038312912 2023-01-22 15:50:53.984533: step: 36/459, loss: 0.14495253562927246 2023-01-22 15:50:54.614064: step: 38/459, loss: 0.18833765387535095 2023-01-22 15:50:55.281973: step: 40/459, loss: 0.737675130367279 2023-01-22 15:50:55.917790: step: 42/459, loss: 0.21646812558174133 2023-01-22 15:50:56.541532: step: 44/459, loss: 0.14482398331165314 2023-01-22 15:50:57.149836: step: 46/459, loss: 0.4470270872116089 2023-01-22 15:50:57.778062: step: 48/459, loss: 0.1450187712907791 2023-01-22 15:50:58.340942: step: 50/459, loss: 0.6986967921257019 2023-01-22 15:50:59.001612: step: 52/459, loss: 0.6670300364494324 2023-01-22 15:50:59.588310: step: 54/459, loss: 0.45689329504966736 2023-01-22 15:51:00.316626: step: 56/459, loss: 0.22103868424892426 2023-01-22 15:51:00.972923: step: 58/459, loss: 0.19820426404476166 2023-01-22 15:51:01.650296: step: 60/459, loss: 0.07374050468206406 2023-01-22 15:51:02.258439: step: 62/459, loss: 0.15487250685691833 2023-01-22 15:51:02.831485: step: 64/459, loss: 0.21060527861118317 2023-01-22 15:51:03.487245: step: 66/459, loss: 0.3044937252998352 2023-01-22 15:51:04.063818: step: 68/459, loss: 0.18363617360591888 2023-01-22 15:51:04.703061: step: 70/459, loss: 0.22487196326255798 2023-01-22 15:51:05.445060: step: 72/459, loss: 0.2598344683647156 2023-01-22 15:51:06.072928: step: 74/459, loss: 0.19063130021095276 2023-01-22 15:51:06.696610: step: 76/459, loss: 0.15184497833251953 2023-01-22 15:51:07.297333: step: 78/459, loss: 0.30537423491477966 2023-01-22 15:51:07.927041: step: 80/459, loss: 1.8822036981582642 2023-01-22 15:51:08.601322: step: 82/459, loss: 0.132218599319458 2023-01-22 15:51:09.236626: step: 84/459, loss: 0.13589254021644592 2023-01-22 15:51:09.836387: step: 86/459, loss: 0.042592693120241165 2023-01-22 15:51:10.520302: step: 88/459, loss: 0.27406710386276245 2023-01-22 15:51:11.139407: step: 90/459, loss: 0.06579865515232086 2023-01-22 15:51:11.723906: step: 92/459, loss: 0.18547819554805756 2023-01-22 15:51:12.332498: step: 94/459, loss: 0.05599135532975197 2023-01-22 15:51:12.899729: step: 96/459, loss: 0.6439269781112671 2023-01-22 15:51:13.476694: step: 98/459, loss: 0.17558224499225616 2023-01-22 15:51:14.087567: step: 100/459, loss: 0.2225285768508911 2023-01-22 15:51:14.665724: step: 102/459, loss: 0.10589983314275742 2023-01-22 15:51:15.310657: step: 104/459, loss: 1.2812843322753906 2023-01-22 15:51:15.973091: step: 106/459, loss: 0.1243438869714737 2023-01-22 15:51:16.673618: step: 108/459, loss: 0.5930564403533936 2023-01-22 15:51:17.241471: step: 110/459, loss: 0.08268099278211594 2023-01-22 15:51:17.934486: step: 112/459, loss: 0.39749670028686523 2023-01-22 15:51:18.588297: step: 114/459, loss: 0.2702604830265045 2023-01-22 15:51:19.342673: step: 116/459, loss: 1.5482745170593262 2023-01-22 15:51:19.962467: step: 118/459, loss: 0.4411892592906952 2023-01-22 15:51:20.607687: step: 120/459, loss: 0.1506730616092682 2023-01-22 15:51:21.152118: step: 122/459, loss: 0.08141271024942398 2023-01-22 15:51:21.800854: step: 124/459, loss: 0.08077988773584366 2023-01-22 15:51:22.372478: step: 126/459, loss: 0.11863106489181519 2023-01-22 15:51:22.966212: step: 128/459, loss: 0.34144726395606995 2023-01-22 15:51:23.578885: step: 130/459, loss: 0.20811909437179565 2023-01-22 15:51:24.162604: step: 132/459, loss: 0.10531053692102432 2023-01-22 15:51:24.862946: step: 134/459, loss: 0.5329151153564453 2023-01-22 15:51:25.493731: step: 136/459, loss: 0.24750909209251404 2023-01-22 15:51:26.072970: step: 138/459, loss: 0.05885874107480049 2023-01-22 15:51:26.687365: step: 140/459, loss: 0.14912256598472595 2023-01-22 15:51:27.329598: step: 142/459, loss: 0.08372922241687775 2023-01-22 15:51:27.998442: step: 144/459, loss: 0.37663528323173523 2023-01-22 15:51:28.610775: step: 146/459, loss: 0.20844963192939758 2023-01-22 15:51:29.222745: step: 148/459, loss: 0.05868107080459595 2023-01-22 15:51:29.855266: step: 150/459, loss: 0.08950163424015045 2023-01-22 15:51:30.472611: step: 152/459, loss: 0.3027402460575104 2023-01-22 15:51:31.068550: step: 154/459, loss: 0.1666516810655594 2023-01-22 15:51:31.696469: step: 156/459, loss: 0.1754162460565567 2023-01-22 15:51:32.466790: step: 158/459, loss: 0.47508057951927185 2023-01-22 15:51:33.192498: step: 160/459, loss: 0.21410924196243286 2023-01-22 15:51:33.788647: step: 162/459, loss: 0.23182973265647888 2023-01-22 15:51:34.388928: step: 164/459, loss: 0.09399717301130295 2023-01-22 15:51:35.020385: step: 166/459, loss: 0.14002127945423126 2023-01-22 15:51:35.630254: step: 168/459, loss: 0.1147938147187233 2023-01-22 15:51:36.245469: step: 170/459, loss: 0.4339214563369751 2023-01-22 15:51:36.829554: step: 172/459, loss: 0.2309955209493637 2023-01-22 15:51:37.434381: step: 174/459, loss: 0.6805146932601929 2023-01-22 15:51:38.091788: step: 176/459, loss: 0.4299972951412201 2023-01-22 15:51:38.741055: step: 178/459, loss: 0.12374690175056458 2023-01-22 15:51:39.386357: step: 180/459, loss: 0.3235766887664795 2023-01-22 15:51:40.022084: step: 182/459, loss: 0.29639846086502075 2023-01-22 15:51:40.638576: step: 184/459, loss: 0.15322966873645782 2023-01-22 15:51:41.252442: step: 186/459, loss: 0.11118635535240173 2023-01-22 15:51:41.865554: step: 188/459, loss: 0.13752110302448273 2023-01-22 15:51:42.471772: step: 190/459, loss: 0.11309486627578735 2023-01-22 15:51:43.096318: step: 192/459, loss: 0.27820736169815063 2023-01-22 15:51:43.742253: step: 194/459, loss: 2.5947952270507812 2023-01-22 15:51:44.340883: step: 196/459, loss: 0.15291458368301392 2023-01-22 15:51:44.938918: step: 198/459, loss: 0.19791972637176514 2023-01-22 15:51:45.535601: step: 200/459, loss: 0.183834508061409 2023-01-22 15:51:46.180321: step: 202/459, loss: 0.12163852155208588 2023-01-22 15:51:46.820862: step: 204/459, loss: 0.12822559475898743 2023-01-22 15:51:47.539500: step: 206/459, loss: 0.5185658931732178 2023-01-22 15:51:48.187226: step: 208/459, loss: 0.1144445389509201 2023-01-22 15:51:48.812555: step: 210/459, loss: 0.25558578968048096 2023-01-22 15:51:49.513110: step: 212/459, loss: 0.2630971372127533 2023-01-22 15:51:50.198460: step: 214/459, loss: 0.20347851514816284 2023-01-22 15:51:50.786842: step: 216/459, loss: 0.12259017676115036 2023-01-22 15:51:51.431262: step: 218/459, loss: 0.17510986328125 2023-01-22 15:51:52.004575: step: 220/459, loss: 0.10558812320232391 2023-01-22 15:51:52.645877: step: 222/459, loss: 0.26093366742134094 2023-01-22 15:51:53.227790: step: 224/459, loss: 0.9643688201904297 2023-01-22 15:51:53.829965: step: 226/459, loss: 0.18450577557086945 2023-01-22 15:51:54.496258: step: 228/459, loss: 0.10664820671081543 2023-01-22 15:51:55.046918: step: 230/459, loss: 0.09880784898996353 2023-01-22 15:51:55.632684: step: 232/459, loss: 0.04549248889088631 2023-01-22 15:51:56.236768: step: 234/459, loss: 0.0522080734372139 2023-01-22 15:51:56.844189: step: 236/459, loss: 0.5205299258232117 2023-01-22 15:51:57.574851: step: 238/459, loss: 1.7315802574157715 2023-01-22 15:51:58.219006: step: 240/459, loss: 0.19929178059101105 2023-01-22 15:51:58.803838: step: 242/459, loss: 0.17629675567150116 2023-01-22 15:51:59.468855: step: 244/459, loss: 0.33019134402275085 2023-01-22 15:52:00.140320: step: 246/459, loss: 0.13378626108169556 2023-01-22 15:52:00.824130: step: 248/459, loss: 0.13654345273971558 2023-01-22 15:52:01.412756: step: 250/459, loss: 0.45899415016174316 2023-01-22 15:52:02.087642: step: 252/459, loss: 0.5909812450408936 2023-01-22 15:52:02.761050: step: 254/459, loss: 0.15070146322250366 2023-01-22 15:52:03.386351: step: 256/459, loss: 0.20334100723266602 2023-01-22 15:52:03.977329: step: 258/459, loss: 0.07193280011415482 2023-01-22 15:52:04.684788: step: 260/459, loss: 0.22297337651252747 2023-01-22 15:52:05.289573: step: 262/459, loss: 0.30269095301628113 2023-01-22 15:52:05.888861: step: 264/459, loss: 0.17046596109867096 2023-01-22 15:52:06.539341: step: 266/459, loss: 0.1067381203174591 2023-01-22 15:52:07.276496: step: 268/459, loss: 0.15277154743671417 2023-01-22 15:52:07.889771: step: 270/459, loss: 0.23303262889385223 2023-01-22 15:52:08.482207: step: 272/459, loss: 0.35318225622177124 2023-01-22 15:52:09.071625: step: 274/459, loss: 0.11376155912876129 2023-01-22 15:52:09.692719: step: 276/459, loss: 0.3717697262763977 2023-01-22 15:52:10.337706: step: 278/459, loss: 0.1709698885679245 2023-01-22 15:52:11.023272: step: 280/459, loss: 0.07665665447711945 2023-01-22 15:52:11.652235: step: 282/459, loss: 0.1703537106513977 2023-01-22 15:52:12.252935: step: 284/459, loss: 0.08076130598783493 2023-01-22 15:52:12.824031: step: 286/459, loss: 0.16002047061920166 2023-01-22 15:52:13.445449: step: 288/459, loss: 0.10531701892614365 2023-01-22 15:52:14.050603: step: 290/459, loss: 0.14562071859836578 2023-01-22 15:52:14.642350: step: 292/459, loss: 0.1993832141160965 2023-01-22 15:52:15.294142: step: 294/459, loss: 0.24833454191684723 2023-01-22 15:52:16.082680: step: 296/459, loss: 0.12150466442108154 2023-01-22 15:52:16.696209: step: 298/459, loss: 0.16694873571395874 2023-01-22 15:52:17.268839: step: 300/459, loss: 0.14250144362449646 2023-01-22 15:52:17.932936: step: 302/459, loss: 0.2993696331977844 2023-01-22 15:52:18.630871: step: 304/459, loss: 0.08323661983013153 2023-01-22 15:52:19.246180: step: 306/459, loss: 0.07945852726697922 2023-01-22 15:52:19.793851: step: 308/459, loss: 0.41731125116348267 2023-01-22 15:52:20.440792: step: 310/459, loss: 0.5517023801803589 2023-01-22 15:52:21.049314: step: 312/459, loss: 1.0190060138702393 2023-01-22 15:52:21.678435: step: 314/459, loss: 0.04851488396525383 2023-01-22 15:52:22.280690: step: 316/459, loss: 0.20628666877746582 2023-01-22 15:52:22.869268: step: 318/459, loss: 0.14415764808654785 2023-01-22 15:52:23.439323: step: 320/459, loss: 0.04026739299297333 2023-01-22 15:52:24.078672: step: 322/459, loss: 0.15062624216079712 2023-01-22 15:52:24.627970: step: 324/459, loss: 0.48845580220222473 2023-01-22 15:52:25.237033: step: 326/459, loss: 0.06819974631071091 2023-01-22 15:52:25.841864: step: 328/459, loss: 0.17124900221824646 2023-01-22 15:52:26.450468: step: 330/459, loss: 0.07246245443820953 2023-01-22 15:52:27.072310: step: 332/459, loss: 0.35247603058815 2023-01-22 15:52:27.687457: step: 334/459, loss: 0.09284552186727524 2023-01-22 15:52:28.344752: step: 336/459, loss: 0.5647307634353638 2023-01-22 15:52:28.989097: step: 338/459, loss: 0.4305064380168915 2023-01-22 15:52:29.690849: step: 340/459, loss: 0.0860200747847557 2023-01-22 15:52:30.331158: step: 342/459, loss: 0.08182386308908463 2023-01-22 15:52:30.949466: step: 344/459, loss: 0.07817761600017548 2023-01-22 15:52:31.583787: step: 346/459, loss: 0.4656638503074646 2023-01-22 15:52:32.228610: step: 348/459, loss: 0.10977884382009506 2023-01-22 15:52:32.892852: step: 350/459, loss: 0.06019672751426697 2023-01-22 15:52:33.561379: step: 352/459, loss: 0.13678035140037537 2023-01-22 15:52:34.194283: step: 354/459, loss: 0.1333872526884079 2023-01-22 15:52:34.800521: step: 356/459, loss: 0.08532661199569702 2023-01-22 15:52:35.391549: step: 358/459, loss: 0.2559453547000885 2023-01-22 15:52:36.016946: step: 360/459, loss: 0.041405659168958664 2023-01-22 15:52:36.641752: step: 362/459, loss: 0.06343285739421844 2023-01-22 15:52:37.232885: step: 364/459, loss: 0.1549064964056015 2023-01-22 15:52:37.886572: step: 366/459, loss: 0.14350424706935883 2023-01-22 15:52:38.510698: step: 368/459, loss: 0.13596512377262115 2023-01-22 15:52:39.126060: step: 370/459, loss: 0.5395746827125549 2023-01-22 15:52:39.826977: step: 372/459, loss: 0.13828378915786743 2023-01-22 15:52:40.478687: step: 374/459, loss: 0.2569045126438141 2023-01-22 15:52:41.170404: step: 376/459, loss: 0.13806010782718658 2023-01-22 15:52:41.799765: step: 378/459, loss: 0.15348322689533234 2023-01-22 15:52:42.422417: step: 380/459, loss: 0.12879550457000732 2023-01-22 15:52:43.073574: step: 382/459, loss: 0.08733861893415451 2023-01-22 15:52:43.717771: step: 384/459, loss: 0.1711239516735077 2023-01-22 15:52:44.337711: step: 386/459, loss: 0.17774704098701477 2023-01-22 15:52:44.943469: step: 388/459, loss: 0.12053477019071579 2023-01-22 15:52:45.611675: step: 390/459, loss: 0.16853272914886475 2023-01-22 15:52:46.232674: step: 392/459, loss: 0.28658413887023926 2023-01-22 15:52:46.882508: step: 394/459, loss: 0.25590378046035767 2023-01-22 15:52:47.525873: step: 396/459, loss: 0.8240681290626526 2023-01-22 15:52:48.168811: step: 398/459, loss: 0.06955432891845703 2023-01-22 15:52:48.758102: step: 400/459, loss: 0.10677331686019897 2023-01-22 15:52:49.445563: step: 402/459, loss: 0.5039047002792358 2023-01-22 15:52:50.017973: step: 404/459, loss: 0.27743417024612427 2023-01-22 15:52:50.655377: step: 406/459, loss: 0.1035865843296051 2023-01-22 15:52:51.241999: step: 408/459, loss: 0.20508931577205658 2023-01-22 15:52:51.870934: step: 410/459, loss: 0.13182923197746277 2023-01-22 15:52:52.503176: step: 412/459, loss: 0.4361801743507385 2023-01-22 15:52:53.203673: step: 414/459, loss: 0.34623053669929504 2023-01-22 15:52:53.824894: step: 416/459, loss: 0.14785853028297424 2023-01-22 15:52:54.512588: step: 418/459, loss: 0.21138396859169006 2023-01-22 15:52:55.142245: step: 420/459, loss: 0.3150207996368408 2023-01-22 15:52:55.720850: step: 422/459, loss: 0.17106015980243683 2023-01-22 15:52:56.347853: step: 424/459, loss: 0.4608490765094757 2023-01-22 15:52:56.956134: step: 426/459, loss: 0.18445433676242828 2023-01-22 15:52:57.553787: step: 428/459, loss: 0.048142872750759125 2023-01-22 15:52:58.226294: step: 430/459, loss: 0.15637724101543427 2023-01-22 15:52:58.816376: step: 432/459, loss: 0.10963443666696548 2023-01-22 15:52:59.408822: step: 434/459, loss: 0.10776787251234055 2023-01-22 15:53:00.015765: step: 436/459, loss: 0.1524261236190796 2023-01-22 15:53:00.659709: step: 438/459, loss: 0.4386144280433655 2023-01-22 15:53:01.259419: step: 440/459, loss: 0.1599751114845276 2023-01-22 15:53:01.900595: step: 442/459, loss: 0.12555722892284393 2023-01-22 15:53:02.495677: step: 444/459, loss: 0.16033007204532623 2023-01-22 15:53:03.056991: step: 446/459, loss: 0.1770080178976059 2023-01-22 15:53:03.616317: step: 448/459, loss: 0.38186684250831604 2023-01-22 15:53:04.215536: step: 450/459, loss: 0.45693448185920715 2023-01-22 15:53:04.905916: step: 452/459, loss: 0.13480570912361145 2023-01-22 15:53:05.558692: step: 454/459, loss: 0.19582627713680267 2023-01-22 15:53:06.196905: step: 456/459, loss: 0.06460209935903549 2023-01-22 15:53:06.892553: step: 458/459, loss: 0.33178749680519104 2023-01-22 15:53:07.519079: step: 460/459, loss: 0.18194104731082916 2023-01-22 15:53:08.144291: step: 462/459, loss: 0.4311094880104065 2023-01-22 15:53:08.752671: step: 464/459, loss: 0.7929636240005493 2023-01-22 15:53:09.313038: step: 466/459, loss: 0.04964881017804146 2023-01-22 15:53:10.038417: step: 468/459, loss: 0.23969992995262146 2023-01-22 15:53:10.668659: step: 470/459, loss: 0.17754453420639038 2023-01-22 15:53:11.286216: step: 472/459, loss: 0.264949768781662 2023-01-22 15:53:11.912180: step: 474/459, loss: 0.7883945107460022 2023-01-22 15:53:12.500481: step: 476/459, loss: 0.14367082715034485 2023-01-22 15:53:13.092705: step: 478/459, loss: 0.3594035804271698 2023-01-22 15:53:13.709198: step: 480/459, loss: 0.17428353428840637 2023-01-22 15:53:14.310968: step: 482/459, loss: 0.14235049486160278 2023-01-22 15:53:14.965176: step: 484/459, loss: 0.20328570902347565 2023-01-22 15:53:15.589582: step: 486/459, loss: 0.05348825827240944 2023-01-22 15:53:16.171388: step: 488/459, loss: 0.051066234707832336 2023-01-22 15:53:16.737007: step: 490/459, loss: 0.13572464883327484 2023-01-22 15:53:17.444289: step: 492/459, loss: 0.319203644990921 2023-01-22 15:53:18.064145: step: 494/459, loss: 0.36570972204208374 2023-01-22 15:53:18.669939: step: 496/459, loss: 0.23557209968566895 2023-01-22 15:53:19.292751: step: 498/459, loss: 0.15502367913722992 2023-01-22 15:53:19.852397: step: 500/459, loss: 0.11505042016506195 2023-01-22 15:53:20.424668: step: 502/459, loss: 0.219563290476799 2023-01-22 15:53:21.134997: step: 504/459, loss: 0.5308017730712891 2023-01-22 15:53:21.657999: step: 506/459, loss: 0.2894197106361389 2023-01-22 15:53:22.320155: step: 508/459, loss: 0.17754441499710083 2023-01-22 15:53:22.911786: step: 510/459, loss: 0.5197442173957825 2023-01-22 15:53:23.522662: step: 512/459, loss: 0.14772886037826538 2023-01-22 15:53:24.187553: step: 514/459, loss: 0.23810598254203796 2023-01-22 15:53:24.796009: step: 516/459, loss: 0.1954866200685501 2023-01-22 15:53:25.361587: step: 518/459, loss: 0.17934677004814148 2023-01-22 15:53:25.962367: step: 520/459, loss: 0.48074349761009216 2023-01-22 15:53:26.616073: step: 522/459, loss: 0.1368170529603958 2023-01-22 15:53:27.274753: step: 524/459, loss: 0.09609031677246094 2023-01-22 15:53:27.946638: step: 526/459, loss: 0.12910370528697968 2023-01-22 15:53:28.574663: step: 528/459, loss: 0.16838814318180084 2023-01-22 15:53:29.243988: step: 530/459, loss: 0.07675488293170929 2023-01-22 15:53:29.839497: step: 532/459, loss: 0.10392405092716217 2023-01-22 15:53:30.503418: step: 534/459, loss: 0.06403756141662598 2023-01-22 15:53:31.116350: step: 536/459, loss: 0.4173506498336792 2023-01-22 15:53:31.773991: step: 538/459, loss: 0.2156752645969391 2023-01-22 15:53:32.372004: step: 540/459, loss: 1.6007822751998901 2023-01-22 15:53:32.959349: step: 542/459, loss: 0.1353331357240677 2023-01-22 15:53:33.654647: step: 544/459, loss: 0.5418546199798584 2023-01-22 15:53:34.311681: step: 546/459, loss: 0.42604386806488037 2023-01-22 15:53:35.004527: step: 548/459, loss: 0.4722111225128174 2023-01-22 15:53:35.604924: step: 550/459, loss: 0.13126057386398315 2023-01-22 15:53:36.207250: step: 552/459, loss: 0.3270952105522156 2023-01-22 15:53:36.854997: step: 554/459, loss: 0.180037260055542 2023-01-22 15:53:37.496933: step: 556/459, loss: 0.09547489136457443 2023-01-22 15:53:38.117181: step: 558/459, loss: 0.2646651566028595 2023-01-22 15:53:38.762362: step: 560/459, loss: 0.14760933816432953 2023-01-22 15:53:39.435848: step: 562/459, loss: 0.1786375492811203 2023-01-22 15:53:40.039387: step: 564/459, loss: 0.06856317818164825 2023-01-22 15:53:40.646279: step: 566/459, loss: 0.5455055832862854 2023-01-22 15:53:41.277566: step: 568/459, loss: 0.7599986791610718 2023-01-22 15:53:41.933542: step: 570/459, loss: 0.27606073021888733 2023-01-22 15:53:42.526466: step: 572/459, loss: 0.060058340430259705 2023-01-22 15:53:43.154454: step: 574/459, loss: 0.09410014003515244 2023-01-22 15:53:43.826638: step: 576/459, loss: 0.1998775601387024 2023-01-22 15:53:44.471287: step: 578/459, loss: 0.14599131047725677 2023-01-22 15:53:45.057074: step: 580/459, loss: 0.313771516084671 2023-01-22 15:53:45.669020: step: 582/459, loss: 0.07697026431560516 2023-01-22 15:53:46.293076: step: 584/459, loss: 1.1827771663665771 2023-01-22 15:53:46.955700: step: 586/459, loss: 0.2901417911052704 2023-01-22 15:53:47.561070: step: 588/459, loss: 0.5810971856117249 2023-01-22 15:53:48.213195: step: 590/459, loss: 1.1214869022369385 2023-01-22 15:53:48.810354: step: 592/459, loss: 0.26532354950904846 2023-01-22 15:53:49.482160: step: 594/459, loss: 0.14446336030960083 2023-01-22 15:53:50.047581: step: 596/459, loss: 0.3501150906085968 2023-01-22 15:53:50.656383: step: 598/459, loss: 0.17113864421844482 2023-01-22 15:53:51.296590: step: 600/459, loss: 0.11684054136276245 2023-01-22 15:53:51.932935: step: 602/459, loss: 0.6350547075271606 2023-01-22 15:53:52.578798: step: 604/459, loss: 0.21436379849910736 2023-01-22 15:53:53.207194: step: 606/459, loss: 0.2101549208164215 2023-01-22 15:53:53.851330: step: 608/459, loss: 0.1957787424325943 2023-01-22 15:53:54.472410: step: 610/459, loss: 0.2176215797662735 2023-01-22 15:53:55.065487: step: 612/459, loss: 0.45250967144966125 2023-01-22 15:53:55.702496: step: 614/459, loss: 0.13282984495162964 2023-01-22 15:53:56.433669: step: 616/459, loss: 0.2939413785934448 2023-01-22 15:53:57.043294: step: 618/459, loss: 0.13534234464168549 2023-01-22 15:53:57.651788: step: 620/459, loss: 0.14778201282024384 2023-01-22 15:53:58.277118: step: 622/459, loss: 0.24862347543239594 2023-01-22 15:53:58.931185: step: 624/459, loss: 0.11247130483388901 2023-01-22 15:53:59.504236: step: 626/459, loss: 0.10049846768379211 2023-01-22 15:54:00.110808: step: 628/459, loss: 0.1283808946609497 2023-01-22 15:54:00.781594: step: 630/459, loss: 0.21355411410331726 2023-01-22 15:54:01.393363: step: 632/459, loss: 0.27969467639923096 2023-01-22 15:54:02.065683: step: 634/459, loss: 0.4423898458480835 2023-01-22 15:54:02.733488: step: 636/459, loss: 0.1435507833957672 2023-01-22 15:54:03.321985: step: 638/459, loss: 0.5217066407203674 2023-01-22 15:54:03.937438: step: 640/459, loss: 0.1669824719429016 2023-01-22 15:54:04.660306: step: 642/459, loss: 0.13218474388122559 2023-01-22 15:54:05.236130: step: 644/459, loss: 0.16818098723888397 2023-01-22 15:54:05.838497: step: 646/459, loss: 0.08161741495132446 2023-01-22 15:54:06.504668: step: 648/459, loss: 0.38574355840682983 2023-01-22 15:54:07.116826: step: 650/459, loss: 0.33588096499443054 2023-01-22 15:54:07.694650: step: 652/459, loss: 0.1796434074640274 2023-01-22 15:54:08.347752: step: 654/459, loss: 0.16703686118125916 2023-01-22 15:54:08.994233: step: 656/459, loss: 0.025381408631801605 2023-01-22 15:54:09.604309: step: 658/459, loss: 0.4069361388683319 2023-01-22 15:54:10.198083: step: 660/459, loss: 0.12664777040481567 2023-01-22 15:54:10.801914: step: 662/459, loss: 0.5495584011077881 2023-01-22 15:54:11.372005: step: 664/459, loss: 0.12278769910335541 2023-01-22 15:54:12.027123: step: 666/459, loss: 0.10981091111898422 2023-01-22 15:54:12.696837: step: 668/459, loss: 0.10065427422523499 2023-01-22 15:54:13.348698: step: 670/459, loss: 0.06395649164915085 2023-01-22 15:54:13.943631: step: 672/459, loss: 0.22129592299461365 2023-01-22 15:54:14.573892: step: 674/459, loss: 0.1735781878232956 2023-01-22 15:54:15.190467: step: 676/459, loss: 0.30232858657836914 2023-01-22 15:54:15.835635: step: 678/459, loss: 0.4083072245121002 2023-01-22 15:54:16.429585: step: 680/459, loss: 0.10548080503940582 2023-01-22 15:54:17.080319: step: 682/459, loss: 0.24000054597854614 2023-01-22 15:54:17.715254: step: 684/459, loss: 0.10132011026144028 2023-01-22 15:54:18.358596: step: 686/459, loss: 0.0900411605834961 2023-01-22 15:54:18.916555: step: 688/459, loss: 0.2739145755767822 2023-01-22 15:54:19.544472: step: 690/459, loss: 0.12556448578834534 2023-01-22 15:54:20.088837: step: 692/459, loss: 0.12898723781108856 2023-01-22 15:54:20.751843: step: 694/459, loss: 0.1207360103726387 2023-01-22 15:54:21.453271: step: 696/459, loss: 0.7688129544258118 2023-01-22 15:54:22.065426: step: 698/459, loss: 0.21229831874370575 2023-01-22 15:54:22.719093: step: 700/459, loss: 0.16592468321323395 2023-01-22 15:54:23.371310: step: 702/459, loss: 0.054987650364637375 2023-01-22 15:54:23.990174: step: 704/459, loss: 0.10764336585998535 2023-01-22 15:54:24.599658: step: 706/459, loss: 0.1585250198841095 2023-01-22 15:54:25.262821: step: 708/459, loss: 0.24117501080036163 2023-01-22 15:54:25.901419: step: 710/459, loss: 0.1401645690202713 2023-01-22 15:54:26.462637: step: 712/459, loss: 0.2002030909061432 2023-01-22 15:54:27.070333: step: 714/459, loss: 0.06596740335226059 2023-01-22 15:54:27.660167: step: 716/459, loss: 0.5025600790977478 2023-01-22 15:54:28.308676: step: 718/459, loss: 0.16824749112129211 2023-01-22 15:54:28.964463: step: 720/459, loss: 0.7413332462310791 2023-01-22 15:54:29.531345: step: 722/459, loss: 0.11705369502305984 2023-01-22 15:54:30.110008: step: 724/459, loss: 0.07845655828714371 2023-01-22 15:54:30.740781: step: 726/459, loss: 0.13053517043590546 2023-01-22 15:54:31.452451: step: 728/459, loss: 0.389509379863739 2023-01-22 15:54:32.030542: step: 730/459, loss: 0.07455238699913025 2023-01-22 15:54:32.628273: step: 732/459, loss: 0.3014214336872101 2023-01-22 15:54:33.236124: step: 734/459, loss: 1.0662025213241577 2023-01-22 15:54:33.888350: step: 736/459, loss: 0.14869052171707153 2023-01-22 15:54:34.505756: step: 738/459, loss: 0.12954486906528473 2023-01-22 15:54:35.139929: step: 740/459, loss: 0.10698312520980835 2023-01-22 15:54:35.741456: step: 742/459, loss: 0.18078705668449402 2023-01-22 15:54:36.378602: step: 744/459, loss: 0.18150001764297485 2023-01-22 15:54:36.976955: step: 746/459, loss: 0.07449041306972504 2023-01-22 15:54:37.609320: step: 748/459, loss: 0.09963898360729218 2023-01-22 15:54:38.212515: step: 750/459, loss: 1.5931727886199951 2023-01-22 15:54:38.866785: step: 752/459, loss: 0.5336211919784546 2023-01-22 15:54:39.469431: step: 754/459, loss: 0.6422545909881592 2023-01-22 15:54:40.106713: step: 756/459, loss: 0.25732332468032837 2023-01-22 15:54:40.784135: step: 758/459, loss: 0.28021663427352905 2023-01-22 15:54:41.515394: step: 760/459, loss: 0.08823411911725998 2023-01-22 15:54:42.101702: step: 762/459, loss: 0.2908158600330353 2023-01-22 15:54:42.683543: step: 764/459, loss: 0.0305323489010334 2023-01-22 15:54:43.344825: step: 766/459, loss: 0.9034067988395691 2023-01-22 15:54:43.973875: step: 768/459, loss: 0.2113620638847351 2023-01-22 15:54:44.616709: step: 770/459, loss: 0.19166304171085358 2023-01-22 15:54:45.229465: step: 772/459, loss: 0.24271754920482635 2023-01-22 15:54:45.786442: step: 774/459, loss: 0.2534889280796051 2023-01-22 15:54:46.417627: step: 776/459, loss: 0.17192646861076355 2023-01-22 15:54:47.058506: step: 778/459, loss: 0.22874964773654938 2023-01-22 15:54:47.696216: step: 780/459, loss: 0.1978732943534851 2023-01-22 15:54:48.297190: step: 782/459, loss: 0.8296011686325073 2023-01-22 15:54:48.914052: step: 784/459, loss: 0.10366343706846237 2023-01-22 15:54:49.508068: step: 786/459, loss: 0.23045563697814941 2023-01-22 15:54:50.123293: step: 788/459, loss: 0.03933629393577576 2023-01-22 15:54:50.681396: step: 790/459, loss: 0.17930127680301666 2023-01-22 15:54:51.324075: step: 792/459, loss: 0.3700902760028839 2023-01-22 15:54:51.904187: step: 794/459, loss: 0.2574950158596039 2023-01-22 15:54:52.563783: step: 796/459, loss: 0.09485986828804016 2023-01-22 15:54:53.165571: step: 798/459, loss: 0.08968497067689896 2023-01-22 15:54:53.832556: step: 800/459, loss: 0.06670928001403809 2023-01-22 15:54:54.452549: step: 802/459, loss: 0.0926079973578453 2023-01-22 15:54:55.046142: step: 804/459, loss: 0.17443044483661652 2023-01-22 15:54:55.750418: step: 806/459, loss: 0.43957558274269104 2023-01-22 15:54:56.337292: step: 808/459, loss: 0.1255202293395996 2023-01-22 15:54:56.854161: step: 810/459, loss: 0.20331838726997375 2023-01-22 15:54:57.548763: step: 812/459, loss: 0.1189742311835289 2023-01-22 15:54:58.104776: step: 814/459, loss: 0.5041230320930481 2023-01-22 15:54:58.705881: step: 816/459, loss: 0.22749951481819153 2023-01-22 15:54:59.371955: step: 818/459, loss: 0.10583936423063278 2023-01-22 15:55:00.050174: step: 820/459, loss: 0.511237621307373 2023-01-22 15:55:00.634838: step: 822/459, loss: 0.0470888689160347 2023-01-22 15:55:01.258354: step: 824/459, loss: 0.1663469672203064 2023-01-22 15:55:01.828217: step: 826/459, loss: 0.08329518139362335 2023-01-22 15:55:02.449986: step: 828/459, loss: 0.15003742277622223 2023-01-22 15:55:03.064835: step: 830/459, loss: 0.04200620576739311 2023-01-22 15:55:03.652393: step: 832/459, loss: 0.2559027373790741 2023-01-22 15:55:04.316597: step: 834/459, loss: 0.35139209032058716 2023-01-22 15:55:04.985967: step: 836/459, loss: 0.14547117054462433 2023-01-22 15:55:05.616169: step: 838/459, loss: 0.25380632281303406 2023-01-22 15:55:06.259048: step: 840/459, loss: 0.4497436583042145 2023-01-22 15:55:06.900906: step: 842/459, loss: 0.12410133332014084 2023-01-22 15:55:07.486932: step: 844/459, loss: 0.08688125014305115 2023-01-22 15:55:08.095706: step: 846/459, loss: 0.07641095668077469 2023-01-22 15:55:08.759227: step: 848/459, loss: 0.2274005115032196 2023-01-22 15:55:09.415733: step: 850/459, loss: 0.2081613391637802 2023-01-22 15:55:10.070086: step: 852/459, loss: 0.17242270708084106 2023-01-22 15:55:10.691943: step: 854/459, loss: 0.06085250899195671 2023-01-22 15:55:11.334003: step: 856/459, loss: 0.12899073958396912 2023-01-22 15:55:12.008407: step: 858/459, loss: 0.2401752769947052 2023-01-22 15:55:12.617981: step: 860/459, loss: 0.16907215118408203 2023-01-22 15:55:13.209151: step: 862/459, loss: 0.17783123254776 2023-01-22 15:55:13.815511: step: 864/459, loss: 0.16515912115573883 2023-01-22 15:55:14.486772: step: 866/459, loss: 0.38701048493385315 2023-01-22 15:55:15.147337: step: 868/459, loss: 0.2413817048072815 2023-01-22 15:55:15.719227: step: 870/459, loss: 0.13218413293361664 2023-01-22 15:55:16.351333: step: 872/459, loss: 1.261270523071289 2023-01-22 15:55:16.967246: step: 874/459, loss: 0.31421294808387756 2023-01-22 15:55:17.576408: step: 876/459, loss: 0.3380816578865051 2023-01-22 15:55:18.265395: step: 878/459, loss: 0.15147556364536285 2023-01-22 15:55:18.883096: step: 880/459, loss: 0.6471230983734131 2023-01-22 15:55:19.484093: step: 882/459, loss: 0.19884082674980164 2023-01-22 15:55:20.122845: step: 884/459, loss: 0.4035875201225281 2023-01-22 15:55:20.678824: step: 886/459, loss: 0.17160354554653168 2023-01-22 15:55:21.334421: step: 888/459, loss: 0.4612383246421814 2023-01-22 15:55:21.911263: step: 890/459, loss: 0.08733475208282471 2023-01-22 15:55:22.560231: step: 892/459, loss: 0.2642471194267273 2023-01-22 15:55:23.139785: step: 894/459, loss: 0.1142827719449997 2023-01-22 15:55:23.776038: step: 896/459, loss: 0.1403496116399765 2023-01-22 15:55:24.428918: step: 898/459, loss: 0.7209608554840088 2023-01-22 15:55:25.001690: step: 900/459, loss: 0.10197431594133377 2023-01-22 15:55:25.621013: step: 902/459, loss: 0.8303442597389221 2023-01-22 15:55:26.212888: step: 904/459, loss: 0.09774375706911087 2023-01-22 15:55:26.847290: step: 906/459, loss: 0.09700021892786026 2023-01-22 15:55:27.517517: step: 908/459, loss: 0.23879754543304443 2023-01-22 15:55:28.178105: step: 910/459, loss: 0.13785181939601898 2023-01-22 15:55:28.822684: step: 912/459, loss: 0.14719267189502716 2023-01-22 15:55:29.481394: step: 914/459, loss: 0.4238658845424652 2023-01-22 15:55:30.108315: step: 916/459, loss: 0.24939964711666107 2023-01-22 15:55:30.739608: step: 918/459, loss: 3.044767379760742 2023-01-22 15:55:31.169033: step: 920/459, loss: 0.00043083136552013457 ================================================== Loss: 0.282 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30476446417667413, 'r': 0.33425779941957806, 'f1': 0.3188305163694437}, 'combined': 0.2349277489038006, 'epoch': 11} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3389615843902013, 'r': 0.29119881568067296, 'f1': 0.3132701195586702}, 'combined': 0.20049287651754888, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29300418399683104, 'r': 0.33025519031141864, 'f1': 0.310516476885134}, 'combined': 0.22880161454694084, 'epoch': 11} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3350599028264153, 'r': 0.2936343148406039, 'f1': 0.31298231233010115}, 'combined': 0.20030867989126472, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32106382475177153, 'r': 0.33324841013134543, 'f1': 0.32704266692592}, 'combined': 0.24097880720857262, 'epoch': 11} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3453196093302738, 'r': 0.30313836261171706, 'f1': 0.3228570716359744}, 'combined': 0.23148242872013258, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21383647798742136, 'r': 0.32380952380952377, 'f1': 0.25757575757575757}, 'combined': 0.1717171717171717, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.32608695652173914, 'f1': 0.2830188679245283}, 'combined': 0.14150943396226415, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27941176470588236, 'r': 0.16379310344827586, 'f1': 0.20652173913043476}, 'combined': 0.13768115942028983, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27463650173611115, 'r': 0.29960345643939396, 'f1': 0.2865772192028986}, 'combined': 0.21116216151792525, 'epoch': 4} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3275304334361018, 'r': 0.25382122518550904, 'f1': 0.28600305741761856}, 'combined': 0.18304195674727586, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2387005649717514, 'r': 0.40238095238095234, 'f1': 0.29964539007092195}, 'combined': 0.1997635933806146, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2817090409590409, 'r': 0.3474589689248133, 'f1': 0.3111484734466892}, 'combined': 0.22926729622387626, 'epoch': 10} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3054170108602103, 'r': 0.31816581530628074, 'f1': 0.31166109197113007}, 'combined': 0.19946309886152322, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26973684210526316, 'r': 0.44565217391304346, 'f1': 0.3360655737704918}, 'combined': 0.1680327868852459, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2967510944449901, 'r': 0.3406725088030721, 'f1': 0.3171986080198215}, 'combined': 0.23372529011986848, 'epoch': 10} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.31940773298193875, 'r': 0.3403334443328938, 'f1': 0.3295387271558153}, 'combined': 0.23627304965888646, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2727272727272727, 'r': 0.20689655172413793, 'f1': 0.23529411764705882}, 'combined': 0.1568627450980392, 'epoch': 10} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 15:58:08.695171: step: 2/459, loss: 0.14223426580429077 2023-01-22 15:58:09.334695: step: 4/459, loss: 0.270908921957016 2023-01-22 15:58:09.932949: step: 6/459, loss: 0.08260183036327362 2023-01-22 15:58:10.472785: step: 8/459, loss: 0.18523859977722168 2023-01-22 15:58:11.035395: step: 10/459, loss: 0.0840756893157959 2023-01-22 15:58:11.762176: step: 12/459, loss: 0.2783038318157196 2023-01-22 15:58:12.365945: step: 14/459, loss: 0.07453734427690506 2023-01-22 15:58:13.073201: step: 16/459, loss: 0.09953215718269348 2023-01-22 15:58:13.759131: step: 18/459, loss: 0.06391118466854095 2023-01-22 15:58:14.347061: step: 20/459, loss: 0.1172814592719078 2023-01-22 15:58:15.086431: step: 22/459, loss: 0.09420858323574066 2023-01-22 15:58:15.699466: step: 24/459, loss: 0.15031085908412933 2023-01-22 15:58:16.285813: step: 26/459, loss: 0.12557542324066162 2023-01-22 15:58:16.867607: step: 28/459, loss: 0.19741901755332947 2023-01-22 15:58:17.541644: step: 30/459, loss: 0.16309905052185059 2023-01-22 15:58:18.106308: step: 32/459, loss: 0.30593565106391907 2023-01-22 15:58:18.681096: step: 34/459, loss: 0.1707727611064911 2023-01-22 15:58:19.282646: step: 36/459, loss: 0.14114713668823242 2023-01-22 15:58:19.914372: step: 38/459, loss: 0.5244718790054321 2023-01-22 15:58:20.501265: step: 40/459, loss: 0.22422049939632416 2023-01-22 15:58:21.113604: step: 42/459, loss: 0.649178683757782 2023-01-22 15:58:21.725960: step: 44/459, loss: 0.06303593516349792 2023-01-22 15:58:22.429360: step: 46/459, loss: 0.35691219568252563 2023-01-22 15:58:23.015502: step: 48/459, loss: 0.28022271394729614 2023-01-22 15:58:23.699322: step: 50/459, loss: 0.23717927932739258 2023-01-22 15:58:24.311595: step: 52/459, loss: 0.22205838561058044 2023-01-22 15:58:24.887154: step: 54/459, loss: 0.205804243683815 2023-01-22 15:58:25.541557: step: 56/459, loss: 0.2868136763572693 2023-01-22 15:58:26.211393: step: 58/459, loss: 0.051780879497528076 2023-01-22 15:58:26.774213: step: 60/459, loss: 1.2085447311401367 2023-01-22 15:58:27.416176: step: 62/459, loss: 0.08565480262041092 2023-01-22 15:58:28.035496: step: 64/459, loss: 0.08914370089769363 2023-01-22 15:58:28.654312: step: 66/459, loss: 0.13522671163082123 2023-01-22 15:58:29.287458: step: 68/459, loss: 0.10990405082702637 2023-01-22 15:58:29.835584: step: 70/459, loss: 0.18940123915672302 2023-01-22 15:58:30.492935: step: 72/459, loss: 0.04765135049819946 2023-01-22 15:58:31.098368: step: 74/459, loss: 0.23311246931552887 2023-01-22 15:58:31.720822: step: 76/459, loss: 0.03906859830021858 2023-01-22 15:58:32.268604: step: 78/459, loss: 0.09104172885417938 2023-01-22 15:58:32.898366: step: 80/459, loss: 0.36352577805519104 2023-01-22 15:58:33.527856: step: 82/459, loss: 0.05810919776558876 2023-01-22 15:58:34.111539: step: 84/459, loss: 0.0937906950712204 2023-01-22 15:58:34.736674: step: 86/459, loss: 0.12080533802509308 2023-01-22 15:58:35.357819: step: 88/459, loss: 0.07309590280056 2023-01-22 15:58:35.968763: step: 90/459, loss: 0.14158637821674347 2023-01-22 15:58:36.605734: step: 92/459, loss: 0.1305581033229828 2023-01-22 15:58:37.223789: step: 94/459, loss: 0.19347621500492096 2023-01-22 15:58:37.872298: step: 96/459, loss: 0.0959404930472374 2023-01-22 15:58:38.521801: step: 98/459, loss: 0.26741325855255127 2023-01-22 15:58:39.121357: step: 100/459, loss: 0.1530745029449463 2023-01-22 15:58:39.701257: step: 102/459, loss: 0.07454866170883179 2023-01-22 15:58:40.393766: step: 104/459, loss: 0.17184120416641235 2023-01-22 15:58:40.994792: step: 106/459, loss: 0.06999144703149796 2023-01-22 15:58:41.543005: step: 108/459, loss: 0.12472149729728699 2023-01-22 15:58:42.186578: step: 110/459, loss: 0.2039591372013092 2023-01-22 15:58:42.774148: step: 112/459, loss: 0.068353570997715 2023-01-22 15:58:43.363728: step: 114/459, loss: 0.09603844583034515 2023-01-22 15:58:43.965483: step: 116/459, loss: 0.06809313595294952 2023-01-22 15:58:44.568500: step: 118/459, loss: 0.060473617166280746 2023-01-22 15:58:45.205635: step: 120/459, loss: 0.13570758700370789 2023-01-22 15:58:45.921930: step: 122/459, loss: 0.16705633699893951 2023-01-22 15:58:46.590372: step: 124/459, loss: 0.6329265236854553 2023-01-22 15:58:47.218942: step: 126/459, loss: 0.09717154502868652 2023-01-22 15:58:47.931956: step: 128/459, loss: 0.18974082171916962 2023-01-22 15:58:48.540480: step: 130/459, loss: 0.10938066244125366 2023-01-22 15:58:49.138549: step: 132/459, loss: 0.15865974128246307 2023-01-22 15:58:49.740247: step: 134/459, loss: 0.05840231850743294 2023-01-22 15:58:50.393918: step: 136/459, loss: 0.18897037208080292 2023-01-22 15:58:51.067117: step: 138/459, loss: 0.024923913180828094 2023-01-22 15:58:51.762182: step: 140/459, loss: 0.09939219802618027 2023-01-22 15:58:52.476433: step: 142/459, loss: 0.12840110063552856 2023-01-22 15:58:53.094023: step: 144/459, loss: 0.5763400197029114 2023-01-22 15:58:53.741207: step: 146/459, loss: 0.11723388731479645 2023-01-22 15:58:54.352174: step: 148/459, loss: 0.08128924667835236 2023-01-22 15:58:54.975146: step: 150/459, loss: 0.06407808512449265 2023-01-22 15:58:55.605537: step: 152/459, loss: 0.07529150694608688 2023-01-22 15:58:56.262162: step: 154/459, loss: 0.11638274788856506 2023-01-22 15:58:56.872039: step: 156/459, loss: 0.34685245156288147 2023-01-22 15:58:57.476898: step: 158/459, loss: 0.6432854533195496 2023-01-22 15:58:58.236983: step: 160/459, loss: 0.11428514122962952 2023-01-22 15:58:58.880504: step: 162/459, loss: 0.2552833557128906 2023-01-22 15:58:59.522635: step: 164/459, loss: 0.06634919345378876 2023-01-22 15:59:00.204766: step: 166/459, loss: 0.08504413813352585 2023-01-22 15:59:00.850858: step: 168/459, loss: 0.15368546545505524 2023-01-22 15:59:01.488153: step: 170/459, loss: 0.22941707074642181 2023-01-22 15:59:02.077997: step: 172/459, loss: 0.13053184747695923 2023-01-22 15:59:02.640664: step: 174/459, loss: 0.36494097113609314 2023-01-22 15:59:03.273569: step: 176/459, loss: 0.04572712257504463 2023-01-22 15:59:03.830539: step: 178/459, loss: 0.13857263326644897 2023-01-22 15:59:04.451184: step: 180/459, loss: 0.0983036607503891 2023-01-22 15:59:05.091525: step: 182/459, loss: 0.08460605144500732 2023-01-22 15:59:05.733333: step: 184/459, loss: 0.1671324074268341 2023-01-22 15:59:06.326536: step: 186/459, loss: 0.03538316860795021 2023-01-22 15:59:06.960285: step: 188/459, loss: 0.7831069827079773 2023-01-22 15:59:07.581359: step: 190/459, loss: 0.34961265325546265 2023-01-22 15:59:08.262960: step: 192/459, loss: 0.16157670319080353 2023-01-22 15:59:08.908886: step: 194/459, loss: 0.07082056254148483 2023-01-22 15:59:09.520310: step: 196/459, loss: 0.08635284751653671 2023-01-22 15:59:10.155111: step: 198/459, loss: 0.12029948830604553 2023-01-22 15:59:10.694668: step: 200/459, loss: 0.02770398184657097 2023-01-22 15:59:11.273328: step: 202/459, loss: 0.08351265639066696 2023-01-22 15:59:11.929278: step: 204/459, loss: 0.11186012625694275 2023-01-22 15:59:12.566502: step: 206/459, loss: 0.7799564599990845 2023-01-22 15:59:13.159271: step: 208/459, loss: 0.07227781414985657 2023-01-22 15:59:13.759046: step: 210/459, loss: 0.5705663561820984 2023-01-22 15:59:14.448019: step: 212/459, loss: 0.06934645771980286 2023-01-22 15:59:15.062766: step: 214/459, loss: 0.06881103664636612 2023-01-22 15:59:15.637176: step: 216/459, loss: 0.07526582479476929 2023-01-22 15:59:16.278215: step: 218/459, loss: 0.05707506090402603 2023-01-22 15:59:16.938439: step: 220/459, loss: 0.05839872360229492 2023-01-22 15:59:17.605586: step: 222/459, loss: 0.11976359784603119 2023-01-22 15:59:18.266166: step: 224/459, loss: 0.2528174817562103 2023-01-22 15:59:18.856886: step: 226/459, loss: 0.08267201483249664 2023-01-22 15:59:19.527646: step: 228/459, loss: 0.011666945181787014 2023-01-22 15:59:20.183375: step: 230/459, loss: 0.07341386377811432 2023-01-22 15:59:20.841956: step: 232/459, loss: 0.21118277311325073 2023-01-22 15:59:21.492249: step: 234/459, loss: 0.11342699825763702 2023-01-22 15:59:22.181200: step: 236/459, loss: 0.16992680728435516 2023-01-22 15:59:22.818322: step: 238/459, loss: 0.18595650792121887 2023-01-22 15:59:23.384609: step: 240/459, loss: 0.06288450956344604 2023-01-22 15:59:23.922030: step: 242/459, loss: 0.05802256613969803 2023-01-22 15:59:24.586984: step: 244/459, loss: 0.2237108200788498 2023-01-22 15:59:25.168290: step: 246/459, loss: 0.15725351870059967 2023-01-22 15:59:25.798424: step: 248/459, loss: 0.632198691368103 2023-01-22 15:59:26.409525: step: 250/459, loss: 0.11358405649662018 2023-01-22 15:59:26.972739: step: 252/459, loss: 0.06712155789136887 2023-01-22 15:59:27.591337: step: 254/459, loss: 0.10104983299970627 2023-01-22 15:59:28.246542: step: 256/459, loss: 0.5063568353652954 2023-01-22 15:59:28.887041: step: 258/459, loss: 0.1823718249797821 2023-01-22 15:59:29.494326: step: 260/459, loss: 0.29994523525238037 2023-01-22 15:59:30.165385: step: 262/459, loss: 0.1370822936296463 2023-01-22 15:59:30.787986: step: 264/459, loss: 0.4424288272857666 2023-01-22 15:59:31.367458: step: 266/459, loss: 0.1475774645805359 2023-01-22 15:59:32.010103: step: 268/459, loss: 0.21147103607654572 2023-01-22 15:59:32.640369: step: 270/459, loss: 0.1277928501367569 2023-01-22 15:59:33.225623: step: 272/459, loss: 0.24841620028018951 2023-01-22 15:59:33.923245: step: 274/459, loss: 0.35455870628356934 2023-01-22 15:59:34.545281: step: 276/459, loss: 0.121632419526577 2023-01-22 15:59:35.217053: step: 278/459, loss: 0.059834424406290054 2023-01-22 15:59:35.788497: step: 280/459, loss: 0.15062224864959717 2023-01-22 15:59:36.557602: step: 282/459, loss: 0.08693956583738327 2023-01-22 15:59:37.148251: step: 284/459, loss: 0.1493176519870758 2023-01-22 15:59:37.680565: step: 286/459, loss: 0.1808346062898636 2023-01-22 15:59:38.304120: step: 288/459, loss: 0.04127265885472298 2023-01-22 15:59:39.016892: step: 290/459, loss: 0.8051396012306213 2023-01-22 15:59:39.705450: step: 292/459, loss: 0.0693308562040329 2023-01-22 15:59:40.325702: step: 294/459, loss: 0.5503357648849487 2023-01-22 15:59:40.912703: step: 296/459, loss: 0.15695129334926605 2023-01-22 15:59:41.569189: step: 298/459, loss: 0.40464749932289124 2023-01-22 15:59:42.212306: step: 300/459, loss: 0.06560198217630386 2023-01-22 15:59:42.814108: step: 302/459, loss: 0.22340188920497894 2023-01-22 15:59:43.402095: step: 304/459, loss: 0.1389555037021637 2023-01-22 15:59:43.968060: step: 306/459, loss: 0.16974925994873047 2023-01-22 15:59:44.625615: step: 308/459, loss: 0.7016472816467285 2023-01-22 15:59:45.232185: step: 310/459, loss: 0.07725343853235245 2023-01-22 15:59:45.810045: step: 312/459, loss: 0.0308352243155241 2023-01-22 15:59:46.550876: step: 314/459, loss: 0.1599906086921692 2023-01-22 15:59:47.182509: step: 316/459, loss: 0.18803636729717255 2023-01-22 15:59:47.808131: step: 318/459, loss: 0.09949667751789093 2023-01-22 15:59:48.419512: step: 320/459, loss: 0.3175083100795746 2023-01-22 15:59:49.049472: step: 322/459, loss: 0.7586451172828674 2023-01-22 15:59:49.676645: step: 324/459, loss: 0.14085251092910767 2023-01-22 15:59:50.310959: step: 326/459, loss: 0.20600196719169617 2023-01-22 15:59:50.906568: step: 328/459, loss: 0.20339754223823547 2023-01-22 15:59:51.508726: step: 330/459, loss: 0.033106137067079544 2023-01-22 15:59:52.126826: step: 332/459, loss: 0.33018454909324646 2023-01-22 15:59:52.770300: step: 334/459, loss: 0.5055447816848755 2023-01-22 15:59:53.424637: step: 336/459, loss: 0.04364173114299774 2023-01-22 15:59:54.012749: step: 338/459, loss: 0.0601985938847065 2023-01-22 15:59:54.681417: step: 340/459, loss: 0.2320200502872467 2023-01-22 15:59:55.314500: step: 342/459, loss: 0.11477724462747574 2023-01-22 15:59:55.927934: step: 344/459, loss: 0.23263666033744812 2023-01-22 15:59:56.589547: step: 346/459, loss: 0.2039908468723297 2023-01-22 15:59:57.164436: step: 348/459, loss: 0.17536774277687073 2023-01-22 15:59:57.777041: step: 350/459, loss: 0.4116232097148895 2023-01-22 15:59:58.397654: step: 352/459, loss: 0.2127358317375183 2023-01-22 15:59:59.038317: step: 354/459, loss: 0.6859855055809021 2023-01-22 15:59:59.640114: step: 356/459, loss: 0.23051002621650696 2023-01-22 16:00:00.330193: step: 358/459, loss: 0.12079103291034698 2023-01-22 16:00:00.963863: step: 360/459, loss: 0.26082369685173035 2023-01-22 16:00:01.623963: step: 362/459, loss: 0.5757618546485901 2023-01-22 16:00:02.267775: step: 364/459, loss: 0.11780695617198944 2023-01-22 16:00:02.883264: step: 366/459, loss: 0.06308349221944809 2023-01-22 16:00:03.453685: step: 368/459, loss: 0.7417283654212952 2023-01-22 16:00:04.101463: step: 370/459, loss: 0.05816633999347687 2023-01-22 16:00:04.726617: step: 372/459, loss: 0.5813828706741333 2023-01-22 16:00:05.334234: step: 374/459, loss: 0.1376386135816574 2023-01-22 16:00:06.026601: step: 376/459, loss: 0.14621631801128387 2023-01-22 16:00:06.643296: step: 378/459, loss: 0.9869222044944763 2023-01-22 16:00:07.267749: step: 380/459, loss: 0.26671603322029114 2023-01-22 16:00:07.875928: step: 382/459, loss: 0.13415946066379547 2023-01-22 16:00:08.532194: step: 384/459, loss: 0.08915240317583084 2023-01-22 16:00:09.173496: step: 386/459, loss: 0.6027820110321045 2023-01-22 16:00:09.801542: step: 388/459, loss: 0.41400906443595886 2023-01-22 16:00:10.497142: step: 390/459, loss: 0.14715363085269928 2023-01-22 16:00:11.127229: step: 392/459, loss: 0.1915755718946457 2023-01-22 16:00:11.768975: step: 394/459, loss: 0.059386350214481354 2023-01-22 16:00:12.416601: step: 396/459, loss: 0.37014079093933105 2023-01-22 16:00:13.049888: step: 398/459, loss: 2.4643561840057373 2023-01-22 16:00:13.694202: step: 400/459, loss: 0.10413771122694016 2023-01-22 16:00:14.305746: step: 402/459, loss: 0.17231325805187225 2023-01-22 16:00:14.890809: step: 404/459, loss: 0.029710840433835983 2023-01-22 16:00:15.491155: step: 406/459, loss: 0.1310705840587616 2023-01-22 16:00:16.119433: step: 408/459, loss: 0.16214582324028015 2023-01-22 16:00:16.749209: step: 410/459, loss: 0.22812017798423767 2023-01-22 16:00:17.345943: step: 412/459, loss: 0.4149877429008484 2023-01-22 16:00:17.960892: step: 414/459, loss: 0.5251519680023193 2023-01-22 16:00:18.616064: step: 416/459, loss: 0.09875824302434921 2023-01-22 16:00:19.242234: step: 418/459, loss: 0.04687622934579849 2023-01-22 16:00:19.859086: step: 420/459, loss: 0.13321179151535034 2023-01-22 16:00:20.479922: step: 422/459, loss: 0.06321742385625839 2023-01-22 16:00:21.167503: step: 424/459, loss: 0.223535418510437 2023-01-22 16:00:21.795960: step: 426/459, loss: 0.30697059631347656 2023-01-22 16:00:22.401034: step: 428/459, loss: 0.08477500826120377 2023-01-22 16:00:22.966454: step: 430/459, loss: 0.03651107847690582 2023-01-22 16:00:23.529930: step: 432/459, loss: 0.10096246004104614 2023-01-22 16:00:24.173608: step: 434/459, loss: 0.08865472674369812 2023-01-22 16:00:24.860777: step: 436/459, loss: 1.0427042245864868 2023-01-22 16:00:25.532007: step: 438/459, loss: 0.23289452493190765 2023-01-22 16:00:26.197794: step: 440/459, loss: 0.23132991790771484 2023-01-22 16:00:26.877131: step: 442/459, loss: 0.16408443450927734 2023-01-22 16:00:27.432820: step: 444/459, loss: 0.9806494116783142 2023-01-22 16:00:28.049397: step: 446/459, loss: 0.09004873782396317 2023-01-22 16:00:28.655108: step: 448/459, loss: 0.5184901356697083 2023-01-22 16:00:29.287001: step: 450/459, loss: 0.4610937237739563 2023-01-22 16:00:29.901023: step: 452/459, loss: 0.24880895018577576 2023-01-22 16:00:30.509142: step: 454/459, loss: 0.22555533051490784 2023-01-22 16:00:31.081495: step: 456/459, loss: 0.36081093549728394 2023-01-22 16:00:31.708852: step: 458/459, loss: 0.05931709334254265 2023-01-22 16:00:32.292365: step: 460/459, loss: 0.15340213477611542 2023-01-22 16:00:32.902963: step: 462/459, loss: 0.08088135719299316 2023-01-22 16:00:33.555155: step: 464/459, loss: 0.05683029815554619 2023-01-22 16:00:34.199254: step: 466/459, loss: 0.1782017946243286 2023-01-22 16:00:34.806206: step: 468/459, loss: 0.1284886598587036 2023-01-22 16:00:35.503140: step: 470/459, loss: 0.05859282985329628 2023-01-22 16:00:36.141416: step: 472/459, loss: 0.22151000797748566 2023-01-22 16:00:36.720886: step: 474/459, loss: 0.1178138256072998 2023-01-22 16:00:37.276087: step: 476/459, loss: 0.11032843589782715 2023-01-22 16:00:37.939762: step: 478/459, loss: 0.31676438450813293 2023-01-22 16:00:38.575067: step: 480/459, loss: 0.10491131246089935 2023-01-22 16:00:39.207267: step: 482/459, loss: 0.38253435492515564 2023-01-22 16:00:39.803008: step: 484/459, loss: 0.06938231736421585 2023-01-22 16:00:40.472700: step: 486/459, loss: 0.8991177678108215 2023-01-22 16:00:41.075504: step: 488/459, loss: 0.22505667805671692 2023-01-22 16:00:41.653266: step: 490/459, loss: 0.12142986059188843 2023-01-22 16:00:42.282233: step: 492/459, loss: 0.06132381781935692 2023-01-22 16:00:42.930241: step: 494/459, loss: 0.17408332228660583 2023-01-22 16:00:43.509470: step: 496/459, loss: 0.16894745826721191 2023-01-22 16:00:44.102923: step: 498/459, loss: 0.23758535087108612 2023-01-22 16:00:44.711946: step: 500/459, loss: 0.13946300745010376 2023-01-22 16:00:45.321673: step: 502/459, loss: 0.31618574261665344 2023-01-22 16:00:45.962300: step: 504/459, loss: 0.32620295882225037 2023-01-22 16:00:46.584511: step: 506/459, loss: 0.3043925166130066 2023-01-22 16:00:47.238754: step: 508/459, loss: 0.2500089108943939 2023-01-22 16:00:47.866881: step: 510/459, loss: 0.15033884346485138 2023-01-22 16:00:48.489009: step: 512/459, loss: 0.14481858909130096 2023-01-22 16:00:49.225541: step: 514/459, loss: 0.45266589522361755 2023-01-22 16:00:49.832741: step: 516/459, loss: 0.06894615292549133 2023-01-22 16:00:50.425721: step: 518/459, loss: 0.12571215629577637 2023-01-22 16:00:51.024004: step: 520/459, loss: 0.05893244221806526 2023-01-22 16:00:51.609409: step: 522/459, loss: 0.24182534217834473 2023-01-22 16:00:52.283701: step: 524/459, loss: 0.06195223703980446 2023-01-22 16:00:52.894159: step: 526/459, loss: 0.13914625346660614 2023-01-22 16:00:53.549373: step: 528/459, loss: 0.27729982137680054 2023-01-22 16:00:54.253154: step: 530/459, loss: 0.1365760862827301 2023-01-22 16:00:54.897162: step: 532/459, loss: 0.06263881921768188 2023-01-22 16:00:55.572367: step: 534/459, loss: 0.1363162249326706 2023-01-22 16:00:56.236611: step: 536/459, loss: 0.1171068325638771 2023-01-22 16:00:56.869816: step: 538/459, loss: 0.13417339324951172 2023-01-22 16:00:57.455458: step: 540/459, loss: 0.0905187800526619 2023-01-22 16:00:58.074059: step: 542/459, loss: 0.10478904098272324 2023-01-22 16:00:58.707162: step: 544/459, loss: 0.2955794632434845 2023-01-22 16:00:59.321293: step: 546/459, loss: 0.10329007357358932 2023-01-22 16:00:59.981352: step: 548/459, loss: 0.8142912983894348 2023-01-22 16:01:00.698548: step: 550/459, loss: 0.13708274066448212 2023-01-22 16:01:01.300784: step: 552/459, loss: 0.38509422540664673 2023-01-22 16:01:01.920697: step: 554/459, loss: 0.15963302552700043 2023-01-22 16:01:02.549460: step: 556/459, loss: 0.1315971463918686 2023-01-22 16:01:03.150593: step: 558/459, loss: 0.10802867263555527 2023-01-22 16:01:03.771702: step: 560/459, loss: 0.21943698823451996 2023-01-22 16:01:04.434748: step: 562/459, loss: 0.13642412424087524 2023-01-22 16:01:05.033825: step: 564/459, loss: 0.14374232292175293 2023-01-22 16:01:05.655997: step: 566/459, loss: 0.06231065094470978 2023-01-22 16:01:06.216267: step: 568/459, loss: 0.044972311705350876 2023-01-22 16:01:06.870012: step: 570/459, loss: 0.1711844652891159 2023-01-22 16:01:07.487478: step: 572/459, loss: 0.13107053935527802 2023-01-22 16:01:08.072532: step: 574/459, loss: 0.07565026730298996 2023-01-22 16:01:08.616698: step: 576/459, loss: 0.3011215925216675 2023-01-22 16:01:09.229199: step: 578/459, loss: 0.6630562543869019 2023-01-22 16:01:09.824541: step: 580/459, loss: 0.26524174213409424 2023-01-22 16:01:10.537487: step: 582/459, loss: 0.38340485095977783 2023-01-22 16:01:11.320620: step: 584/459, loss: 0.341422975063324 2023-01-22 16:01:11.953447: step: 586/459, loss: 0.04739456623792648 2023-01-22 16:01:12.584502: step: 588/459, loss: 0.1325874626636505 2023-01-22 16:01:13.283749: step: 590/459, loss: 0.2070091813802719 2023-01-22 16:01:13.891138: step: 592/459, loss: 0.17248569428920746 2023-01-22 16:01:14.489886: step: 594/459, loss: 0.24943795800209045 2023-01-22 16:01:15.108954: step: 596/459, loss: 0.36774390935897827 2023-01-22 16:01:15.673880: step: 598/459, loss: 0.841009259223938 2023-01-22 16:01:16.313740: step: 600/459, loss: 0.1289174109697342 2023-01-22 16:01:17.022213: step: 602/459, loss: 0.3550705313682556 2023-01-22 16:01:17.676980: step: 604/459, loss: 0.132292240858078 2023-01-22 16:01:18.302812: step: 606/459, loss: 0.13247442245483398 2023-01-22 16:01:18.930950: step: 608/459, loss: 0.915798008441925 2023-01-22 16:01:19.591386: step: 610/459, loss: 0.13645200431346893 2023-01-22 16:01:20.231975: step: 612/459, loss: 0.5867289900779724 2023-01-22 16:01:20.856786: step: 614/459, loss: 0.19729021191596985 2023-01-22 16:01:21.464810: step: 616/459, loss: 0.09601975232362747 2023-01-22 16:01:22.080533: step: 618/459, loss: 0.27185580134391785 2023-01-22 16:01:22.711974: step: 620/459, loss: 0.544731080532074 2023-01-22 16:01:23.336148: step: 622/459, loss: 0.6117517948150635 2023-01-22 16:01:23.917481: step: 624/459, loss: 0.04339960962533951 2023-01-22 16:01:24.488237: step: 626/459, loss: 0.32206350564956665 2023-01-22 16:01:25.175873: step: 628/459, loss: 0.09933286160230637 2023-01-22 16:01:25.818710: step: 630/459, loss: 0.13168412446975708 2023-01-22 16:01:26.420792: step: 632/459, loss: 0.1934884488582611 2023-01-22 16:01:27.054351: step: 634/459, loss: 0.3924553394317627 2023-01-22 16:01:27.650487: step: 636/459, loss: 0.12430531531572342 2023-01-22 16:01:28.236160: step: 638/459, loss: 0.32800114154815674 2023-01-22 16:01:28.825156: step: 640/459, loss: 0.20850279927253723 2023-01-22 16:01:29.445410: step: 642/459, loss: 0.1902998387813568 2023-01-22 16:01:30.059427: step: 644/459, loss: 0.050154026597738266 2023-01-22 16:01:30.661745: step: 646/459, loss: 0.05865699052810669 2023-01-22 16:01:31.277940: step: 648/459, loss: 0.048122093081474304 2023-01-22 16:01:31.867974: step: 650/459, loss: 0.07979954779148102 2023-01-22 16:01:32.509915: step: 652/459, loss: 0.16897550225257874 2023-01-22 16:01:33.168049: step: 654/459, loss: 0.9880947470664978 2023-01-22 16:01:33.721703: step: 656/459, loss: 0.181030735373497 2023-01-22 16:01:34.296132: step: 658/459, loss: 0.11267170310020447 2023-01-22 16:01:34.979211: step: 660/459, loss: 0.6970316171646118 2023-01-22 16:01:35.649226: step: 662/459, loss: 0.22365222871303558 2023-01-22 16:01:36.308537: step: 664/459, loss: 0.06250275671482086 2023-01-22 16:01:36.937631: step: 666/459, loss: 0.15930739045143127 2023-01-22 16:01:37.520062: step: 668/459, loss: 0.08538995683193207 2023-01-22 16:01:38.112119: step: 670/459, loss: 1.2778420448303223 2023-01-22 16:01:38.804890: step: 672/459, loss: 0.4419507682323456 2023-01-22 16:01:39.433760: step: 674/459, loss: 0.14673766493797302 2023-01-22 16:01:40.079493: step: 676/459, loss: 0.11017453670501709 2023-01-22 16:01:40.727281: step: 678/459, loss: 0.19960109889507294 2023-01-22 16:01:41.292417: step: 680/459, loss: 0.19554555416107178 2023-01-22 16:01:41.930770: step: 682/459, loss: 0.07611095160245895 2023-01-22 16:01:42.557738: step: 684/459, loss: 0.1881568878889084 2023-01-22 16:01:43.217750: step: 686/459, loss: 0.08361875265836716 2023-01-22 16:01:43.828411: step: 688/459, loss: 0.7727715373039246 2023-01-22 16:01:44.413385: step: 690/459, loss: 0.10361066460609436 2023-01-22 16:01:45.000855: step: 692/459, loss: 0.299448698759079 2023-01-22 16:01:45.579608: step: 694/459, loss: 0.5473894476890564 2023-01-22 16:01:46.154564: step: 696/459, loss: 0.11000756919384003 2023-01-22 16:01:46.826891: step: 698/459, loss: 0.40637481212615967 2023-01-22 16:01:47.507411: step: 700/459, loss: 0.386801153421402 2023-01-22 16:01:48.151353: step: 702/459, loss: 0.4128059148788452 2023-01-22 16:01:48.735688: step: 704/459, loss: 0.2224578708410263 2023-01-22 16:01:49.298026: step: 706/459, loss: 0.11504364013671875 2023-01-22 16:01:49.933239: step: 708/459, loss: 0.16285470128059387 2023-01-22 16:01:50.569805: step: 710/459, loss: 0.09369632601737976 2023-01-22 16:01:51.254489: step: 712/459, loss: 0.13851508498191833 2023-01-22 16:01:51.948192: step: 714/459, loss: 0.06711982190608978 2023-01-22 16:01:52.560660: step: 716/459, loss: 0.08592095226049423 2023-01-22 16:01:53.241131: step: 718/459, loss: 0.13200931251049042 2023-01-22 16:01:53.919283: step: 720/459, loss: 0.09723826497793198 2023-01-22 16:01:54.483452: step: 722/459, loss: 0.07797159254550934 2023-01-22 16:01:55.076740: step: 724/459, loss: 0.13266296684741974 2023-01-22 16:01:55.818117: step: 726/459, loss: 0.04609883204102516 2023-01-22 16:01:56.429839: step: 728/459, loss: 0.12056046724319458 2023-01-22 16:01:57.087566: step: 730/459, loss: 0.32092565298080444 2023-01-22 16:01:57.668171: step: 732/459, loss: 0.19345490634441376 2023-01-22 16:01:58.264867: step: 734/459, loss: 0.13493883609771729 2023-01-22 16:01:58.868507: step: 736/459, loss: 0.12742187082767487 2023-01-22 16:01:59.515435: step: 738/459, loss: 0.08567100763320923 2023-01-22 16:02:00.211991: step: 740/459, loss: 0.15105967223644257 2023-01-22 16:02:00.782346: step: 742/459, loss: 0.11959337443113327 2023-01-22 16:02:01.412143: step: 744/459, loss: 0.22464105486869812 2023-01-22 16:02:02.065671: step: 746/459, loss: 0.1824856847524643 2023-01-22 16:02:02.746662: step: 748/459, loss: 0.20926640927791595 2023-01-22 16:02:03.366127: step: 750/459, loss: 0.13167282938957214 2023-01-22 16:02:04.009118: step: 752/459, loss: 0.16521230340003967 2023-01-22 16:02:04.596943: step: 754/459, loss: 0.1924225389957428 2023-01-22 16:02:05.224217: step: 756/459, loss: 0.09721677005290985 2023-01-22 16:02:05.835043: step: 758/459, loss: 0.06517835706472397 2023-01-22 16:02:06.444236: step: 760/459, loss: 0.0956009030342102 2023-01-22 16:02:07.114616: step: 762/459, loss: 0.14973747730255127 2023-01-22 16:02:07.749587: step: 764/459, loss: 0.0734502524137497 2023-01-22 16:02:08.403652: step: 766/459, loss: 0.1625695377588272 2023-01-22 16:02:09.014151: step: 768/459, loss: 0.19912298023700714 2023-01-22 16:02:09.646128: step: 770/459, loss: 0.34271976351737976 2023-01-22 16:02:10.227459: step: 772/459, loss: 0.19767214357852936 2023-01-22 16:02:10.885899: step: 774/459, loss: 0.18408703804016113 2023-01-22 16:02:11.478647: step: 776/459, loss: 0.3567752242088318 2023-01-22 16:02:12.052149: step: 778/459, loss: 0.29700350761413574 2023-01-22 16:02:12.644849: step: 780/459, loss: 0.3016476035118103 2023-01-22 16:02:13.288800: step: 782/459, loss: 0.18752045929431915 2023-01-22 16:02:13.897710: step: 784/459, loss: 0.3425899147987366 2023-01-22 16:02:14.525895: step: 786/459, loss: 0.1267523467540741 2023-01-22 16:02:15.174397: step: 788/459, loss: 0.18360242247581482 2023-01-22 16:02:15.771592: step: 790/459, loss: 0.2898668348789215 2023-01-22 16:02:16.415172: step: 792/459, loss: 0.13282090425491333 2023-01-22 16:02:17.049998: step: 794/459, loss: 0.09311199188232422 2023-01-22 16:02:17.696602: step: 796/459, loss: 0.09874223172664642 2023-01-22 16:02:18.315618: step: 798/459, loss: 0.10659027099609375 2023-01-22 16:02:18.935674: step: 800/459, loss: 0.06645316630601883 2023-01-22 16:02:19.493951: step: 802/459, loss: 0.12861771881580353 2023-01-22 16:02:20.150164: step: 804/459, loss: 0.13108237087726593 2023-01-22 16:02:20.765676: step: 806/459, loss: 0.26226937770843506 2023-01-22 16:02:21.405255: step: 808/459, loss: 0.144917294383049 2023-01-22 16:02:22.079631: step: 810/459, loss: 0.5324758291244507 2023-01-22 16:02:22.666835: step: 812/459, loss: 0.1317596137523651 2023-01-22 16:02:23.277238: step: 814/459, loss: 1.0882883071899414 2023-01-22 16:02:23.957922: step: 816/459, loss: 0.16661152243614197 2023-01-22 16:02:24.601189: step: 818/459, loss: 0.15103577077388763 2023-01-22 16:02:25.232541: step: 820/459, loss: 0.7590926885604858 2023-01-22 16:02:25.875079: step: 822/459, loss: 0.092158243060112 2023-01-22 16:02:26.552175: step: 824/459, loss: 0.2239702194929123 2023-01-22 16:02:27.152575: step: 826/459, loss: 0.20199565589427948 2023-01-22 16:02:27.813635: step: 828/459, loss: 0.20534652471542358 2023-01-22 16:02:28.468925: step: 830/459, loss: 0.1090080738067627 2023-01-22 16:02:29.130849: step: 832/459, loss: 0.2825623154640198 2023-01-22 16:02:29.743844: step: 834/459, loss: 0.1053212359547615 2023-01-22 16:02:30.378127: step: 836/459, loss: 0.7537582516670227 2023-01-22 16:02:31.019012: step: 838/459, loss: 0.18428227305412292 2023-01-22 16:02:31.665225: step: 840/459, loss: 0.1708417385816574 2023-01-22 16:02:32.354535: step: 842/459, loss: 0.12533918023109436 2023-01-22 16:02:32.998599: step: 844/459, loss: 0.06515805423259735 2023-01-22 16:02:33.641491: step: 846/459, loss: 0.2554217278957367 2023-01-22 16:02:34.285026: step: 848/459, loss: 0.14307613670825958 2023-01-22 16:02:34.904770: step: 850/459, loss: 0.42706453800201416 2023-01-22 16:02:35.558393: step: 852/459, loss: 0.2087622880935669 2023-01-22 16:02:36.153071: step: 854/459, loss: 0.1658012568950653 2023-01-22 16:02:36.814982: step: 856/459, loss: 0.16982011497020721 2023-01-22 16:02:37.436080: step: 858/459, loss: 0.0986863374710083 2023-01-22 16:02:38.031589: step: 860/459, loss: 0.038933176547288895 2023-01-22 16:02:38.635464: step: 862/459, loss: 0.17026250064373016 2023-01-22 16:02:39.241858: step: 864/459, loss: 0.0669618546962738 2023-01-22 16:02:39.914217: step: 866/459, loss: 0.5081891417503357 2023-01-22 16:02:40.548930: step: 868/459, loss: 0.08898888528347015 2023-01-22 16:02:41.168998: step: 870/459, loss: 0.7345309257507324 2023-01-22 16:02:41.775021: step: 872/459, loss: 0.11121787875890732 2023-01-22 16:02:42.368639: step: 874/459, loss: 0.1990262269973755 2023-01-22 16:02:42.970123: step: 876/459, loss: 0.4990063011646271 2023-01-22 16:02:43.622735: step: 878/459, loss: 0.14299578964710236 2023-01-22 16:02:44.278834: step: 880/459, loss: 0.12315787374973297 2023-01-22 16:02:44.894876: step: 882/459, loss: 0.11860886216163635 2023-01-22 16:02:45.504240: step: 884/459, loss: 0.18836206197738647 2023-01-22 16:02:46.142403: step: 886/459, loss: 0.12298082560300827 2023-01-22 16:02:46.806108: step: 888/459, loss: 0.21767204999923706 2023-01-22 16:02:47.441098: step: 890/459, loss: 0.045448921620845795 2023-01-22 16:02:48.051421: step: 892/459, loss: 0.5448034405708313 2023-01-22 16:02:48.715581: step: 894/459, loss: 0.1081387996673584 2023-01-22 16:02:49.353025: step: 896/459, loss: 0.46430879831314087 2023-01-22 16:02:50.008218: step: 898/459, loss: 0.054774556308984756 2023-01-22 16:02:50.635248: step: 900/459, loss: 0.17061758041381836 2023-01-22 16:02:51.262009: step: 902/459, loss: 0.7238332629203796 2023-01-22 16:02:51.884712: step: 904/459, loss: 0.337159663438797 2023-01-22 16:02:52.475289: step: 906/459, loss: 0.31884729862213135 2023-01-22 16:02:53.178024: step: 908/459, loss: 0.20098558068275452 2023-01-22 16:02:53.811952: step: 910/459, loss: 0.23588338494300842 2023-01-22 16:02:54.389387: step: 912/459, loss: 0.10488611459732056 2023-01-22 16:02:55.096938: step: 914/459, loss: 0.4298332631587982 2023-01-22 16:02:55.685087: step: 916/459, loss: 0.18534953892230988 2023-01-22 16:02:56.271751: step: 918/459, loss: 0.5912069082260132 2023-01-22 16:02:56.709065: step: 920/459, loss: 0.019416505470871925 ================================================== Loss: 0.227 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2965306761268781, 'r': 0.33704340607210626, 'f1': 0.315491785079929}, 'combined': 0.2324676311115266, 'epoch': 12} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31983745334792824, 'r': 0.32478489348160866, 'f1': 0.3222921877528559}, 'combined': 0.20626700016182775, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2893706135386009, 'r': 0.3464760097587422, 'f1': 0.315358993338268}, 'combined': 0.23236978456503957, 'epoch': 12} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3199656683044682, 'r': 0.3269530896323183, 'f1': 0.32342164311963795}, 'combined': 0.20698985159656824, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30053231389412993, 'r': 0.3381701368865636, 'f1': 0.318242253820034}, 'combined': 0.2344942922884461, 'epoch': 12} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3258027369804804, 'r': 0.33203393686808524, 'f1': 0.32888882515214934}, 'combined': 0.23580708218455992, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23270440251572325, 'r': 0.35238095238095235, 'f1': 0.2803030303030303}, 'combined': 0.18686868686868685, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2631578947368421, 'r': 0.43478260869565216, 'f1': 0.32786885245901637}, 'combined': 0.16393442622950818, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.1724137931034483, 'f1': 0.20408163265306123}, 'combined': 0.13605442176870747, 'epoch': 12} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2965306761268781, 'r': 0.33704340607210626, 'f1': 0.315491785079929}, 'combined': 0.2324676311115266, 'epoch': 12} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31983745334792824, 'r': 0.32478489348160866, 'f1': 0.3222921877528559}, 'combined': 0.20626700016182775, 'epoch': 12} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23270440251572325, 'r': 0.35238095238095235, 'f1': 0.2803030303030303}, 'combined': 0.18686868686868685, 'epoch': 12} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2817090409590409, 'r': 0.3474589689248133, 'f1': 0.3111484734466892}, 'combined': 0.22926729622387626, 'epoch': 10} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3054170108602103, 'r': 0.31816581530628074, 'f1': 0.31166109197113007}, 'combined': 0.19946309886152322, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26973684210526316, 'r': 0.44565217391304346, 'f1': 0.3360655737704918}, 'combined': 0.1680327868852459, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2967510944449901, 'r': 0.3406725088030721, 'f1': 0.3171986080198215}, 'combined': 0.23372529011986848, 'epoch': 10} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.31940773298193875, 'r': 0.3403334443328938, 'f1': 0.3295387271558153}, 'combined': 0.23627304965888646, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2727272727272727, 'r': 0.20689655172413793, 'f1': 0.23529411764705882}, 'combined': 0.1568627450980392, 'epoch': 10} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 16:05:43.145621: step: 2/459, loss: 0.2175314724445343 2023-01-22 16:05:43.924408: step: 4/459, loss: 0.22702665627002716 2023-01-22 16:05:44.559177: step: 6/459, loss: 0.11299006640911102 2023-01-22 16:05:45.244745: step: 8/459, loss: 0.1754152923822403 2023-01-22 16:05:45.929733: step: 10/459, loss: 0.7010007500648499 2023-01-22 16:05:46.527132: step: 12/459, loss: 0.06964869052171707 2023-01-22 16:05:47.126424: step: 14/459, loss: 0.23842069506645203 2023-01-22 16:05:47.800050: step: 16/459, loss: 0.3255208134651184 2023-01-22 16:05:48.352550: step: 18/459, loss: 0.16347819566726685 2023-01-22 16:05:48.986647: step: 20/459, loss: 0.15435002744197845 2023-01-22 16:05:49.604433: step: 22/459, loss: 0.09366387873888016 2023-01-22 16:05:50.195307: step: 24/459, loss: 0.05045172572135925 2023-01-22 16:05:50.926240: step: 26/459, loss: 0.1300341933965683 2023-01-22 16:05:51.571095: step: 28/459, loss: 0.1041533350944519 2023-01-22 16:05:52.166722: step: 30/459, loss: 0.1616532951593399 2023-01-22 16:05:52.794112: step: 32/459, loss: 1.5867905616760254 2023-01-22 16:05:53.409070: step: 34/459, loss: 0.09160703420639038 2023-01-22 16:05:54.108300: step: 36/459, loss: 0.07720400393009186 2023-01-22 16:05:54.779048: step: 38/459, loss: 0.20973435044288635 2023-01-22 16:05:55.347596: step: 40/459, loss: 0.22615252435207367 2023-01-22 16:05:56.016131: step: 42/459, loss: 0.07692065089941025 2023-01-22 16:05:56.650748: step: 44/459, loss: 0.2094975858926773 2023-01-22 16:05:57.293243: step: 46/459, loss: 0.40089744329452515 2023-01-22 16:05:57.857830: step: 48/459, loss: 0.06954915821552277 2023-01-22 16:05:58.504503: step: 50/459, loss: 0.07083893567323685 2023-01-22 16:05:59.081818: step: 52/459, loss: 1.1810297966003418 2023-01-22 16:05:59.667223: step: 54/459, loss: 0.04671978950500488 2023-01-22 16:06:00.286812: step: 56/459, loss: 0.08663367480039597 2023-01-22 16:06:00.940084: step: 58/459, loss: 0.0891827866435051 2023-01-22 16:06:01.576114: step: 60/459, loss: 0.028585119172930717 2023-01-22 16:06:02.232754: step: 62/459, loss: 0.4873834252357483 2023-01-22 16:06:02.870369: step: 64/459, loss: 0.2908472716808319 2023-01-22 16:06:03.538455: step: 66/459, loss: 0.04882846027612686 2023-01-22 16:06:04.162871: step: 68/459, loss: 0.04848664999008179 2023-01-22 16:06:04.819778: step: 70/459, loss: 0.11301092058420181 2023-01-22 16:06:05.492139: step: 72/459, loss: 1.0149459838867188 2023-01-22 16:06:06.076875: step: 74/459, loss: 0.09153713285923004 2023-01-22 16:06:06.672908: step: 76/459, loss: 0.20254400372505188 2023-01-22 16:06:07.346536: step: 78/459, loss: 0.2639021873474121 2023-01-22 16:06:07.980923: step: 80/459, loss: 0.067299485206604 2023-01-22 16:06:08.662173: step: 82/459, loss: 0.11729835718870163 2023-01-22 16:06:09.271262: step: 84/459, loss: 0.0734364241361618 2023-01-22 16:06:09.892240: step: 86/459, loss: 0.11769383400678635 2023-01-22 16:06:10.575653: step: 88/459, loss: 0.16214491426944733 2023-01-22 16:06:11.220192: step: 90/459, loss: 0.5203626751899719 2023-01-22 16:06:11.931964: step: 92/459, loss: 0.32338133454322815 2023-01-22 16:06:12.611498: step: 94/459, loss: 0.14961151778697968 2023-01-22 16:06:13.253992: step: 96/459, loss: 0.4085984230041504 2023-01-22 16:06:13.896295: step: 98/459, loss: 0.12819509208202362 2023-01-22 16:06:14.485878: step: 100/459, loss: 0.10779286921024323 2023-01-22 16:06:15.082564: step: 102/459, loss: 0.09224804490804672 2023-01-22 16:06:15.668677: step: 104/459, loss: 0.03356286138296127 2023-01-22 16:06:16.281761: step: 106/459, loss: 0.07983666658401489 2023-01-22 16:06:16.961561: step: 108/459, loss: 0.053692009299993515 2023-01-22 16:06:17.578086: step: 110/459, loss: 0.10316037386655807 2023-01-22 16:06:18.205300: step: 112/459, loss: 0.101192407310009 2023-01-22 16:06:18.748543: step: 114/459, loss: 0.022552579641342163 2023-01-22 16:06:19.418517: step: 116/459, loss: 0.2989416718482971 2023-01-22 16:06:20.069158: step: 118/459, loss: 0.04554332420229912 2023-01-22 16:06:20.693789: step: 120/459, loss: 0.06406331062316895 2023-01-22 16:06:21.258592: step: 122/459, loss: 0.06204906478524208 2023-01-22 16:06:21.866721: step: 124/459, loss: 0.15275682508945465 2023-01-22 16:06:22.490299: step: 126/459, loss: 0.07375963777303696 2023-01-22 16:06:23.080867: step: 128/459, loss: 0.2546471655368805 2023-01-22 16:06:23.749378: step: 130/459, loss: 0.13920637965202332 2023-01-22 16:06:24.320746: step: 132/459, loss: 0.10253171622753143 2023-01-22 16:06:24.925721: step: 134/459, loss: 0.1345568746328354 2023-01-22 16:06:25.615377: step: 136/459, loss: 0.3448968231678009 2023-01-22 16:06:26.268305: step: 138/459, loss: 0.18606877326965332 2023-01-22 16:06:26.875075: step: 140/459, loss: 0.03942585736513138 2023-01-22 16:06:27.545462: step: 142/459, loss: 0.05147302895784378 2023-01-22 16:06:28.208748: step: 144/459, loss: 0.891085684299469 2023-01-22 16:06:28.919253: step: 146/459, loss: 0.12006156891584396 2023-01-22 16:06:29.563603: step: 148/459, loss: 0.06055760011076927 2023-01-22 16:06:30.234046: step: 150/459, loss: 0.2210058867931366 2023-01-22 16:06:30.863128: step: 152/459, loss: 0.08609043806791306 2023-01-22 16:06:31.531531: step: 154/459, loss: 0.03575967252254486 2023-01-22 16:06:32.224936: step: 156/459, loss: 0.21274571120738983 2023-01-22 16:06:32.881710: step: 158/459, loss: 0.07201936095952988 2023-01-22 16:06:33.466104: step: 160/459, loss: 0.1181299239397049 2023-01-22 16:06:34.072621: step: 162/459, loss: 0.09649896621704102 2023-01-22 16:06:34.709291: step: 164/459, loss: 0.5096212029457092 2023-01-22 16:06:35.283419: step: 166/459, loss: 0.0558023601770401 2023-01-22 16:06:35.972719: step: 168/459, loss: 0.09520533680915833 2023-01-22 16:06:36.677433: step: 170/459, loss: 0.17890788614749908 2023-01-22 16:06:37.242770: step: 172/459, loss: 0.13756930828094482 2023-01-22 16:06:37.916520: step: 174/459, loss: 0.6056203246116638 2023-01-22 16:06:38.559202: step: 176/459, loss: 0.35848385095596313 2023-01-22 16:06:39.162702: step: 178/459, loss: 0.11956531554460526 2023-01-22 16:06:39.791587: step: 180/459, loss: 0.2216273844242096 2023-01-22 16:06:40.409484: step: 182/459, loss: 0.2050948441028595 2023-01-22 16:06:41.047755: step: 184/459, loss: 0.10356173664331436 2023-01-22 16:06:41.679549: step: 186/459, loss: 0.08099433779716492 2023-01-22 16:06:42.322957: step: 188/459, loss: 0.055222656577825546 2023-01-22 16:06:42.971621: step: 190/459, loss: 0.14947329461574554 2023-01-22 16:06:43.591905: step: 192/459, loss: 0.15494923293590546 2023-01-22 16:06:44.160291: step: 194/459, loss: 0.11869938671588898 2023-01-22 16:06:44.789400: step: 196/459, loss: 0.421548992395401 2023-01-22 16:06:45.446850: step: 198/459, loss: 0.13321194052696228 2023-01-22 16:06:46.068978: step: 200/459, loss: 0.7263316512107849 2023-01-22 16:06:46.690234: step: 202/459, loss: 0.06975799053907394 2023-01-22 16:06:47.286903: step: 204/459, loss: 0.16496850550174713 2023-01-22 16:06:47.984987: step: 206/459, loss: 0.23883144557476044 2023-01-22 16:06:48.628855: step: 208/459, loss: 0.14150236546993256 2023-01-22 16:06:49.255749: step: 210/459, loss: 0.3988848924636841 2023-01-22 16:06:49.859962: step: 212/459, loss: 2.2639918327331543 2023-01-22 16:06:50.492049: step: 214/459, loss: 0.14795882999897003 2023-01-22 16:06:51.060585: step: 216/459, loss: 0.05145477503538132 2023-01-22 16:06:51.778588: step: 218/459, loss: 0.0765538141131401 2023-01-22 16:06:52.378284: step: 220/459, loss: 0.05973486974835396 2023-01-22 16:06:53.003328: step: 222/459, loss: 0.05449271574616432 2023-01-22 16:06:53.666533: step: 224/459, loss: 0.12988387048244476 2023-01-22 16:06:54.289589: step: 226/459, loss: 0.08725002408027649 2023-01-22 16:06:54.978052: step: 228/459, loss: 0.02992982417345047 2023-01-22 16:06:55.617968: step: 230/459, loss: 0.07696876674890518 2023-01-22 16:06:56.184610: step: 232/459, loss: 0.1168847307562828 2023-01-22 16:06:56.846946: step: 234/459, loss: 0.3612947463989258 2023-01-22 16:06:57.428729: step: 236/459, loss: 0.18153999745845795 2023-01-22 16:06:58.049972: step: 238/459, loss: 0.18927615880966187 2023-01-22 16:06:58.652197: step: 240/459, loss: 0.15007330477237701 2023-01-22 16:06:59.328465: step: 242/459, loss: 0.07084771245718002 2023-01-22 16:06:59.891340: step: 244/459, loss: 0.2841370701789856 2023-01-22 16:07:00.527934: step: 246/459, loss: 0.03830922394990921 2023-01-22 16:07:01.218419: step: 248/459, loss: 0.07359690219163895 2023-01-22 16:07:01.919453: step: 250/459, loss: 0.18483209609985352 2023-01-22 16:07:02.527082: step: 252/459, loss: 0.02625676803290844 2023-01-22 16:07:03.143335: step: 254/459, loss: 0.20124326646327972 2023-01-22 16:07:03.820834: step: 256/459, loss: 0.44295358657836914 2023-01-22 16:07:04.459596: step: 258/459, loss: 0.06109107285737991 2023-01-22 16:07:05.099585: step: 260/459, loss: 0.0743495374917984 2023-01-22 16:07:05.717912: step: 262/459, loss: 0.03953646123409271 2023-01-22 16:07:06.375443: step: 264/459, loss: 0.21319469809532166 2023-01-22 16:07:07.078066: step: 266/459, loss: 0.15446889400482178 2023-01-22 16:07:07.747843: step: 268/459, loss: 0.13618919253349304 2023-01-22 16:07:08.406272: step: 270/459, loss: 0.13784804940223694 2023-01-22 16:07:09.021845: step: 272/459, loss: 0.34730014204978943 2023-01-22 16:07:09.597930: step: 274/459, loss: 0.04998093098402023 2023-01-22 16:07:10.276534: step: 276/459, loss: 0.06699305772781372 2023-01-22 16:07:10.892064: step: 278/459, loss: 0.35640400648117065 2023-01-22 16:07:11.418153: step: 280/459, loss: 0.08468704670667648 2023-01-22 16:07:12.046218: step: 282/459, loss: 0.19855272769927979 2023-01-22 16:07:12.739428: step: 284/459, loss: 1.9266496896743774 2023-01-22 16:07:13.362764: step: 286/459, loss: 0.15803176164627075 2023-01-22 16:07:13.962085: step: 288/459, loss: 0.32058072090148926 2023-01-22 16:07:14.553526: step: 290/459, loss: 0.0753961056470871 2023-01-22 16:07:15.207387: step: 292/459, loss: 0.8766348361968994 2023-01-22 16:07:15.800977: step: 294/459, loss: 0.8263092041015625 2023-01-22 16:07:16.401801: step: 296/459, loss: 0.07365443557500839 2023-01-22 16:07:17.017482: step: 298/459, loss: 0.12553438544273376 2023-01-22 16:07:17.639348: step: 300/459, loss: 0.21690310537815094 2023-01-22 16:07:18.281101: step: 302/459, loss: 0.32868266105651855 2023-01-22 16:07:18.956817: step: 304/459, loss: 0.3214021921157837 2023-01-22 16:07:19.608676: step: 306/459, loss: 0.08914636075496674 2023-01-22 16:07:20.306560: step: 308/459, loss: 0.09980230033397675 2023-01-22 16:07:20.973144: step: 310/459, loss: 0.31864047050476074 2023-01-22 16:07:21.594126: step: 312/459, loss: 0.15907683968544006 2023-01-22 16:07:22.210758: step: 314/459, loss: 0.08621561527252197 2023-01-22 16:07:22.845451: step: 316/459, loss: 0.05336346849799156 2023-01-22 16:07:23.521818: step: 318/459, loss: 0.07777329534292221 2023-01-22 16:07:24.108223: step: 320/459, loss: 0.23821412026882172 2023-01-22 16:07:24.771912: step: 322/459, loss: 0.22245821356773376 2023-01-22 16:07:25.334167: step: 324/459, loss: 0.08422134816646576 2023-01-22 16:07:25.936931: step: 326/459, loss: 0.11318973451852798 2023-01-22 16:07:26.586334: step: 328/459, loss: 0.10502725094556808 2023-01-22 16:07:27.252482: step: 330/459, loss: 0.06709364801645279 2023-01-22 16:07:27.948414: step: 332/459, loss: 0.6198889017105103 2023-01-22 16:07:28.631542: step: 334/459, loss: 0.27449676394462585 2023-01-22 16:07:29.208702: step: 336/459, loss: 0.10783402621746063 2023-01-22 16:07:29.819218: step: 338/459, loss: 0.015271150507032871 2023-01-22 16:07:30.427744: step: 340/459, loss: 0.10899872332811356 2023-01-22 16:07:31.053555: step: 342/459, loss: 0.13691815733909607 2023-01-22 16:07:31.672023: step: 344/459, loss: 0.1621280014514923 2023-01-22 16:07:32.344606: step: 346/459, loss: 0.0667685940861702 2023-01-22 16:07:32.980287: step: 348/459, loss: 0.08375894278287888 2023-01-22 16:07:33.562052: step: 350/459, loss: 0.2199813723564148 2023-01-22 16:07:34.147397: step: 352/459, loss: 0.1406746506690979 2023-01-22 16:07:34.778763: step: 354/459, loss: 0.019952336326241493 2023-01-22 16:07:35.346284: step: 356/459, loss: 0.1454218477010727 2023-01-22 16:07:35.909691: step: 358/459, loss: 0.10102307051420212 2023-01-22 16:07:36.518565: step: 360/459, loss: 0.1139460876584053 2023-01-22 16:07:37.229174: step: 362/459, loss: 0.5351136326789856 2023-01-22 16:07:37.885316: step: 364/459, loss: 0.2169981300830841 2023-01-22 16:07:38.458165: step: 366/459, loss: 0.1098385602235794 2023-01-22 16:07:39.108727: step: 368/459, loss: 0.08514164388179779 2023-01-22 16:07:39.746960: step: 370/459, loss: 0.06942510604858398 2023-01-22 16:07:40.389670: step: 372/459, loss: 0.16232141852378845 2023-01-22 16:07:41.017235: step: 374/459, loss: 0.12929050624370575 2023-01-22 16:07:41.642600: step: 376/459, loss: 0.07881692796945572 2023-01-22 16:07:42.261471: step: 378/459, loss: 0.09103835374116898 2023-01-22 16:07:42.822219: step: 380/459, loss: 0.02309042029082775 2023-01-22 16:07:43.463311: step: 382/459, loss: 0.05100786313414574 2023-01-22 16:07:44.051323: step: 384/459, loss: 1.0172722339630127 2023-01-22 16:07:44.678297: step: 386/459, loss: 0.4248782694339752 2023-01-22 16:07:45.337870: step: 388/459, loss: 0.346198707818985 2023-01-22 16:07:45.980185: step: 390/459, loss: 0.21730327606201172 2023-01-22 16:07:46.548421: step: 392/459, loss: 0.12571953237056732 2023-01-22 16:07:47.148570: step: 394/459, loss: 0.030417198315262794 2023-01-22 16:07:47.804696: step: 396/459, loss: 0.08848410099744797 2023-01-22 16:07:48.413738: step: 398/459, loss: 0.6672924160957336 2023-01-22 16:07:49.015102: step: 400/459, loss: 0.06494947522878647 2023-01-22 16:07:49.631840: step: 402/459, loss: 0.07734787464141846 2023-01-22 16:07:50.297824: step: 404/459, loss: 0.455759197473526 2023-01-22 16:07:50.905720: step: 406/459, loss: 0.2010497748851776 2023-01-22 16:07:51.508237: step: 408/459, loss: 0.08998303860425949 2023-01-22 16:07:52.088587: step: 410/459, loss: 0.0675843134522438 2023-01-22 16:07:52.753895: step: 412/459, loss: 0.17294134199619293 2023-01-22 16:07:53.343293: step: 414/459, loss: 0.18057341873645782 2023-01-22 16:07:53.940507: step: 416/459, loss: 0.37796059250831604 2023-01-22 16:07:54.634978: step: 418/459, loss: 0.2641032636165619 2023-01-22 16:07:55.202367: step: 420/459, loss: 0.4563921391963959 2023-01-22 16:07:55.849563: step: 422/459, loss: 0.40384724736213684 2023-01-22 16:07:56.454915: step: 424/459, loss: 0.09222158789634705 2023-01-22 16:07:57.052480: step: 426/459, loss: 0.16864734888076782 2023-01-22 16:07:57.607892: step: 428/459, loss: 0.11065934598445892 2023-01-22 16:07:58.236166: step: 430/459, loss: 0.21375197172164917 2023-01-22 16:07:58.836957: step: 432/459, loss: 0.10995235294103622 2023-01-22 16:07:59.469631: step: 434/459, loss: 0.4065725803375244 2023-01-22 16:08:00.082557: step: 436/459, loss: 0.9781618714332581 2023-01-22 16:08:00.737114: step: 438/459, loss: 0.16864009201526642 2023-01-22 16:08:01.377667: step: 440/459, loss: 0.05352596566081047 2023-01-22 16:08:02.023493: step: 442/459, loss: 0.09305402636528015 2023-01-22 16:08:02.687019: step: 444/459, loss: 0.1468774676322937 2023-01-22 16:08:03.289686: step: 446/459, loss: 0.11571153998374939 2023-01-22 16:08:03.833383: step: 448/459, loss: 0.17914676666259766 2023-01-22 16:08:04.474048: step: 450/459, loss: 0.16256371140480042 2023-01-22 16:08:05.080859: step: 452/459, loss: 0.14370578527450562 2023-01-22 16:08:05.725256: step: 454/459, loss: 0.053663402795791626 2023-01-22 16:08:06.330290: step: 456/459, loss: 0.18213282525539398 2023-01-22 16:08:06.950709: step: 458/459, loss: 0.12403430789709091 2023-01-22 16:08:07.578838: step: 460/459, loss: 0.04935413971543312 2023-01-22 16:08:08.185483: step: 462/459, loss: 0.2666420340538025 2023-01-22 16:08:08.778190: step: 464/459, loss: 0.07676253467798233 2023-01-22 16:08:09.344333: step: 466/459, loss: 0.9260852932929993 2023-01-22 16:08:09.956644: step: 468/459, loss: 0.6504155397415161 2023-01-22 16:08:10.583162: step: 470/459, loss: 0.12300096452236176 2023-01-22 16:08:11.159482: step: 472/459, loss: 0.3777703642845154 2023-01-22 16:08:11.786785: step: 474/459, loss: 0.35843661427497864 2023-01-22 16:08:12.446243: step: 476/459, loss: 0.12157877534627914 2023-01-22 16:08:13.139470: step: 478/459, loss: 0.19923800230026245 2023-01-22 16:08:13.779447: step: 480/459, loss: 0.05871972441673279 2023-01-22 16:08:14.373921: step: 482/459, loss: 0.06114637479186058 2023-01-22 16:08:14.924056: step: 484/459, loss: 0.059701528400182724 2023-01-22 16:08:15.496669: step: 486/459, loss: 0.16189223527908325 2023-01-22 16:08:16.109067: step: 488/459, loss: 0.21329636871814728 2023-01-22 16:08:16.730426: step: 490/459, loss: 0.45215699076652527 2023-01-22 16:08:17.364509: step: 492/459, loss: 0.27208423614501953 2023-01-22 16:08:17.960021: step: 494/459, loss: 0.09668344259262085 2023-01-22 16:08:18.555686: step: 496/459, loss: 0.22410909831523895 2023-01-22 16:08:19.174324: step: 498/459, loss: 0.07736319303512573 2023-01-22 16:08:19.818230: step: 500/459, loss: 0.2083858996629715 2023-01-22 16:08:20.406059: step: 502/459, loss: 0.09378604590892792 2023-01-22 16:08:21.020250: step: 504/459, loss: 0.12391608208417892 2023-01-22 16:08:21.749310: step: 506/459, loss: 0.3067978322505951 2023-01-22 16:08:22.396122: step: 508/459, loss: 0.2327512502670288 2023-01-22 16:08:23.082998: step: 510/459, loss: 0.15314443409442902 2023-01-22 16:08:23.688019: step: 512/459, loss: 0.09248086810112 2023-01-22 16:08:24.265691: step: 514/459, loss: 0.0773029625415802 2023-01-22 16:08:24.888321: step: 516/459, loss: 0.1048043891787529 2023-01-22 16:08:25.526077: step: 518/459, loss: 0.09622599929571152 2023-01-22 16:08:26.180133: step: 520/459, loss: 0.06065554916858673 2023-01-22 16:08:26.786399: step: 522/459, loss: 0.05678688362240791 2023-01-22 16:08:27.376133: step: 524/459, loss: 0.8680861592292786 2023-01-22 16:08:28.022398: step: 526/459, loss: 0.11019274592399597 2023-01-22 16:08:28.603721: step: 528/459, loss: 0.05484390631318092 2023-01-22 16:08:29.312211: step: 530/459, loss: 0.11086069792509079 2023-01-22 16:08:30.039340: step: 532/459, loss: 0.08550454676151276 2023-01-22 16:08:30.648767: step: 534/459, loss: 0.36040836572647095 2023-01-22 16:08:31.336718: step: 536/459, loss: 0.34048235416412354 2023-01-22 16:08:31.937491: step: 538/459, loss: 0.06449855118989944 2023-01-22 16:08:32.535873: step: 540/459, loss: 0.037475526332855225 2023-01-22 16:08:33.148428: step: 542/459, loss: 0.16057778894901276 2023-01-22 16:08:33.740986: step: 544/459, loss: 0.0328393317759037 2023-01-22 16:08:34.270816: step: 546/459, loss: 0.16086871922016144 2023-01-22 16:08:34.918880: step: 548/459, loss: 0.12040700763463974 2023-01-22 16:08:35.568492: step: 550/459, loss: 0.09095343202352524 2023-01-22 16:08:36.209881: step: 552/459, loss: 0.05664040520787239 2023-01-22 16:08:36.857002: step: 554/459, loss: 0.15150828659534454 2023-01-22 16:08:37.489000: step: 556/459, loss: 0.03381132706999779 2023-01-22 16:08:38.088123: step: 558/459, loss: 0.043697986751794815 2023-01-22 16:08:38.756382: step: 560/459, loss: 1.4246702194213867 2023-01-22 16:08:39.405288: step: 562/459, loss: 0.03209487348794937 2023-01-22 16:08:39.968458: step: 564/459, loss: 0.0892476737499237 2023-01-22 16:08:40.689866: step: 566/459, loss: 0.02017149142920971 2023-01-22 16:08:41.316958: step: 568/459, loss: 0.09451127797365189 2023-01-22 16:08:41.921901: step: 570/459, loss: 0.04851658642292023 2023-01-22 16:08:42.501852: step: 572/459, loss: 0.17216874659061432 2023-01-22 16:08:43.131108: step: 574/459, loss: 0.341172993183136 2023-01-22 16:08:43.660886: step: 576/459, loss: 0.038643646985292435 2023-01-22 16:08:44.255669: step: 578/459, loss: 0.23256172239780426 2023-01-22 16:08:44.905735: step: 580/459, loss: 0.11188121885061264 2023-01-22 16:08:45.518105: step: 582/459, loss: 0.10665664821863174 2023-01-22 16:08:46.157905: step: 584/459, loss: 0.862468957901001 2023-01-22 16:08:46.716004: step: 586/459, loss: 0.24569977819919586 2023-01-22 16:08:47.282340: step: 588/459, loss: 0.22495926916599274 2023-01-22 16:08:47.932998: step: 590/459, loss: 0.08538158237934113 2023-01-22 16:08:48.546483: step: 592/459, loss: 0.08553473651409149 2023-01-22 16:08:49.198547: step: 594/459, loss: 0.1746479868888855 2023-01-22 16:08:49.922985: step: 596/459, loss: 0.11437417566776276 2023-01-22 16:08:50.516883: step: 598/459, loss: 0.2838955223560333 2023-01-22 16:08:51.159519: step: 600/459, loss: 0.11558566242456436 2023-01-22 16:08:51.734608: step: 602/459, loss: 0.6847760081291199 2023-01-22 16:08:52.456262: step: 604/459, loss: 0.24646607041358948 2023-01-22 16:08:53.075196: step: 606/459, loss: 0.5493173003196716 2023-01-22 16:08:53.664605: step: 608/459, loss: 0.2482856661081314 2023-01-22 16:08:54.258172: step: 610/459, loss: 0.1039825826883316 2023-01-22 16:08:54.843273: step: 612/459, loss: 0.02138184942305088 2023-01-22 16:08:55.458931: step: 614/459, loss: 0.10652806609869003 2023-01-22 16:08:56.048549: step: 616/459, loss: 0.12183962762355804 2023-01-22 16:08:56.654188: step: 618/459, loss: 0.4056240916252136 2023-01-22 16:08:57.221710: step: 620/459, loss: 0.23278671503067017 2023-01-22 16:08:57.873735: step: 622/459, loss: 0.3607924282550812 2023-01-22 16:08:58.433372: step: 624/459, loss: 0.28010332584381104 2023-01-22 16:08:59.045284: step: 626/459, loss: 0.17377839982509613 2023-01-22 16:08:59.671269: step: 628/459, loss: 0.191105917096138 2023-01-22 16:09:00.261837: step: 630/459, loss: 0.12469834089279175 2023-01-22 16:09:00.873691: step: 632/459, loss: 0.10472163558006287 2023-01-22 16:09:01.483628: step: 634/459, loss: 0.1181766614317894 2023-01-22 16:09:02.089725: step: 636/459, loss: 0.14583741128444672 2023-01-22 16:09:02.668720: step: 638/459, loss: 0.048283327370882034 2023-01-22 16:09:03.254676: step: 640/459, loss: 0.10572639107704163 2023-01-22 16:09:03.849032: step: 642/459, loss: 0.4630007743835449 2023-01-22 16:09:04.494370: step: 644/459, loss: 0.07351832091808319 2023-01-22 16:09:05.117668: step: 646/459, loss: 0.5763841867446899 2023-01-22 16:09:05.734986: step: 648/459, loss: 1.007498860359192 2023-01-22 16:09:06.336377: step: 650/459, loss: 0.5948658585548401 2023-01-22 16:09:06.992804: step: 652/459, loss: 0.10380541533231735 2023-01-22 16:09:07.609353: step: 654/459, loss: 0.02848116122186184 2023-01-22 16:09:08.249344: step: 656/459, loss: 0.2746102511882782 2023-01-22 16:09:08.858755: step: 658/459, loss: 0.09283861517906189 2023-01-22 16:09:09.466430: step: 660/459, loss: 0.05952325835824013 2023-01-22 16:09:10.086990: step: 662/459, loss: 0.08959735184907913 2023-01-22 16:09:10.650632: step: 664/459, loss: 0.10747572034597397 2023-01-22 16:09:11.273672: step: 666/459, loss: 0.10155584663152695 2023-01-22 16:09:11.902286: step: 668/459, loss: 0.13425973057746887 2023-01-22 16:09:12.511885: step: 670/459, loss: 0.21596023440361023 2023-01-22 16:09:13.124713: step: 672/459, loss: 1.7999727725982666 2023-01-22 16:09:13.747203: step: 674/459, loss: 0.07163188606500626 2023-01-22 16:09:14.378794: step: 676/459, loss: 0.09605004638433456 2023-01-22 16:09:15.000464: step: 678/459, loss: 0.12615108489990234 2023-01-22 16:09:15.575243: step: 680/459, loss: 0.12297182530164719 2023-01-22 16:09:16.160702: step: 682/459, loss: 0.0131983682513237 2023-01-22 16:09:16.786363: step: 684/459, loss: 0.5180760622024536 2023-01-22 16:09:17.371983: step: 686/459, loss: 0.1144990399479866 2023-01-22 16:09:17.939747: step: 688/459, loss: 0.4884774088859558 2023-01-22 16:09:18.611575: step: 690/459, loss: 0.1337982416152954 2023-01-22 16:09:19.272777: step: 692/459, loss: 0.0738927498459816 2023-01-22 16:09:19.849322: step: 694/459, loss: 0.24636101722717285 2023-01-22 16:09:20.503399: step: 696/459, loss: 0.13661494851112366 2023-01-22 16:09:21.102273: step: 698/459, loss: 0.11227037757635117 2023-01-22 16:09:21.755409: step: 700/459, loss: 0.10916230082511902 2023-01-22 16:09:22.416569: step: 702/459, loss: 0.39691832661628723 2023-01-22 16:09:23.044750: step: 704/459, loss: 0.11710461229085922 2023-01-22 16:09:23.815316: step: 706/459, loss: 0.030457785353064537 2023-01-22 16:09:24.443039: step: 708/459, loss: 0.2445012778043747 2023-01-22 16:09:25.069371: step: 710/459, loss: 0.1260208934545517 2023-01-22 16:09:25.673588: step: 712/459, loss: 0.06301505863666534 2023-01-22 16:09:26.235744: step: 714/459, loss: 0.04300425574183464 2023-01-22 16:09:26.845676: step: 716/459, loss: 0.1224135234951973 2023-01-22 16:09:27.474937: step: 718/459, loss: 0.1047707200050354 2023-01-22 16:09:28.099700: step: 720/459, loss: 0.15377749502658844 2023-01-22 16:09:28.695792: step: 722/459, loss: 0.2211814671754837 2023-01-22 16:09:29.316943: step: 724/459, loss: 0.08811847865581512 2023-01-22 16:09:29.940360: step: 726/459, loss: 0.12086383253335953 2023-01-22 16:09:30.621238: step: 728/459, loss: 0.02711937576532364 2023-01-22 16:09:31.273284: step: 730/459, loss: 5.806827545166016 2023-01-22 16:09:31.899988: step: 732/459, loss: 0.22329480946063995 2023-01-22 16:09:32.499795: step: 734/459, loss: 0.08981562405824661 2023-01-22 16:09:33.159532: step: 736/459, loss: 0.18851202726364136 2023-01-22 16:09:33.827379: step: 738/459, loss: 1.096872091293335 2023-01-22 16:09:34.413212: step: 740/459, loss: 0.04108292981982231 2023-01-22 16:09:35.072242: step: 742/459, loss: 0.14824949204921722 2023-01-22 16:09:35.687242: step: 744/459, loss: 0.3962174654006958 2023-01-22 16:09:36.297087: step: 746/459, loss: 0.10996728390455246 2023-01-22 16:09:36.913180: step: 748/459, loss: 0.13620731234550476 2023-01-22 16:09:37.541008: step: 750/459, loss: 0.7770532965660095 2023-01-22 16:09:38.148311: step: 752/459, loss: 0.14721664786338806 2023-01-22 16:09:38.789360: step: 754/459, loss: 0.545897364616394 2023-01-22 16:09:39.387907: step: 756/459, loss: 0.10732260346412659 2023-01-22 16:09:39.974407: step: 758/459, loss: 0.1558440923690796 2023-01-22 16:09:40.586546: step: 760/459, loss: 0.26405543088912964 2023-01-22 16:09:41.182418: step: 762/459, loss: 9.847442626953125 2023-01-22 16:09:41.815825: step: 764/459, loss: 0.11948665231466293 2023-01-22 16:09:42.393386: step: 766/459, loss: 1.312936782836914 2023-01-22 16:09:42.978603: step: 768/459, loss: 0.10059797018766403 2023-01-22 16:09:43.656136: step: 770/459, loss: 0.03895750269293785 2023-01-22 16:09:44.293414: step: 772/459, loss: 0.64100182056427 2023-01-22 16:09:44.965170: step: 774/459, loss: 0.030735544860363007 2023-01-22 16:09:45.626700: step: 776/459, loss: 0.07669083029031754 2023-01-22 16:09:46.237249: step: 778/459, loss: 0.7161417007446289 2023-01-22 16:09:46.918524: step: 780/459, loss: 0.09086070209741592 2023-01-22 16:09:47.561517: step: 782/459, loss: 0.19896654784679413 2023-01-22 16:09:48.242908: step: 784/459, loss: 0.12394176423549652 2023-01-22 16:09:48.931496: step: 786/459, loss: 0.0658368319272995 2023-01-22 16:09:49.677249: step: 788/459, loss: 0.11021754890680313 2023-01-22 16:09:50.279464: step: 790/459, loss: 0.1210855171084404 2023-01-22 16:09:50.844673: step: 792/459, loss: 0.0641956701874733 2023-01-22 16:09:51.440017: step: 794/459, loss: 0.03851225972175598 2023-01-22 16:09:52.015652: step: 796/459, loss: 0.2052469700574875 2023-01-22 16:09:52.636013: step: 798/459, loss: 0.05359363928437233 2023-01-22 16:09:53.246596: step: 800/459, loss: 0.1810566484928131 2023-01-22 16:09:53.863862: step: 802/459, loss: 0.07648082077503204 2023-01-22 16:09:54.479409: step: 804/459, loss: 0.40898406505584717 2023-01-22 16:09:55.080671: step: 806/459, loss: 0.1126558929681778 2023-01-22 16:09:55.755300: step: 808/459, loss: 0.08524700999259949 2023-01-22 16:09:56.345105: step: 810/459, loss: 0.07549833506345749 2023-01-22 16:09:56.988837: step: 812/459, loss: 0.10212375968694687 2023-01-22 16:09:57.615517: step: 814/459, loss: 0.07848583906888962 2023-01-22 16:09:58.242368: step: 816/459, loss: 0.1210382953286171 2023-01-22 16:09:58.922111: step: 818/459, loss: 0.05031590536236763 2023-01-22 16:09:59.510430: step: 820/459, loss: 0.187314972281456 2023-01-22 16:10:00.209519: step: 822/459, loss: 0.3324595093727112 2023-01-22 16:10:00.792629: step: 824/459, loss: 0.25421464443206787 2023-01-22 16:10:01.418483: step: 826/459, loss: 0.06909172981977463 2023-01-22 16:10:02.029385: step: 828/459, loss: 0.13191623985767365 2023-01-22 16:10:02.649069: step: 830/459, loss: 0.20692312717437744 2023-01-22 16:10:03.248538: step: 832/459, loss: 0.05683526024222374 2023-01-22 16:10:03.837898: step: 834/459, loss: 0.42429032921791077 2023-01-22 16:10:04.470889: step: 836/459, loss: 0.6238166093826294 2023-01-22 16:10:05.123999: step: 838/459, loss: 0.0913553535938263 2023-01-22 16:10:05.698138: step: 840/459, loss: 0.093619205057621 2023-01-22 16:10:06.347863: step: 842/459, loss: 0.027838991954922676 2023-01-22 16:10:06.873120: step: 844/459, loss: 0.18243491649627686 2023-01-22 16:10:07.485426: step: 846/459, loss: 0.4289887547492981 2023-01-22 16:10:08.133102: step: 848/459, loss: 0.5001163482666016 2023-01-22 16:10:08.736149: step: 850/459, loss: 0.10345688462257385 2023-01-22 16:10:09.374749: step: 852/459, loss: 0.12397043406963348 2023-01-22 16:10:10.016742: step: 854/459, loss: 0.5527150630950928 2023-01-22 16:10:10.669938: step: 856/459, loss: 0.9902576804161072 2023-01-22 16:10:11.332333: step: 858/459, loss: 0.3322468101978302 2023-01-22 16:10:12.035951: step: 860/459, loss: 0.10809021443128586 2023-01-22 16:10:12.642610: step: 862/459, loss: 0.19807657599449158 2023-01-22 16:10:13.222479: step: 864/459, loss: 0.27233630418777466 2023-01-22 16:10:13.865340: step: 866/459, loss: 0.5007730722427368 2023-01-22 16:10:14.542206: step: 868/459, loss: 0.16286122798919678 2023-01-22 16:10:15.115374: step: 870/459, loss: 0.15085415542125702 2023-01-22 16:10:15.737117: step: 872/459, loss: 0.4381345510482788 2023-01-22 16:10:16.390709: step: 874/459, loss: 0.13509061932563782 2023-01-22 16:10:17.005439: step: 876/459, loss: 0.4053989350795746 2023-01-22 16:10:17.763600: step: 878/459, loss: 0.10049339383840561 2023-01-22 16:10:18.404180: step: 880/459, loss: 0.8968438506126404 2023-01-22 16:10:18.994249: step: 882/459, loss: 0.0841679722070694 2023-01-22 16:10:19.691061: step: 884/459, loss: 0.4988075792789459 2023-01-22 16:10:20.289160: step: 886/459, loss: 0.09775331616401672 2023-01-22 16:10:20.879878: step: 888/459, loss: 0.095893494784832 2023-01-22 16:10:21.525351: step: 890/459, loss: 0.07573654502630234 2023-01-22 16:10:22.197512: step: 892/459, loss: 0.13596943020820618 2023-01-22 16:10:22.795581: step: 894/459, loss: 0.231241375207901 2023-01-22 16:10:23.372167: step: 896/459, loss: 0.22770342230796814 2023-01-22 16:10:23.987845: step: 898/459, loss: 0.21445754170417786 2023-01-22 16:10:24.633285: step: 900/459, loss: 0.27565184235572815 2023-01-22 16:10:25.330328: step: 902/459, loss: 0.19809432327747345 2023-01-22 16:10:26.010755: step: 904/459, loss: 0.1127210259437561 2023-01-22 16:10:26.684930: step: 906/459, loss: 0.0936361774802208 2023-01-22 16:10:27.305415: step: 908/459, loss: 0.08105523139238358 2023-01-22 16:10:27.904763: step: 910/459, loss: 0.2606672942638397 2023-01-22 16:10:28.496289: step: 912/459, loss: 0.30182167887687683 2023-01-22 16:10:29.157582: step: 914/459, loss: 0.3913436532020569 2023-01-22 16:10:29.698388: step: 916/459, loss: 0.08434551954269409 2023-01-22 16:10:30.293603: step: 918/459, loss: 0.2666407823562622 2023-01-22 16:10:30.759597: step: 920/459, loss: 0.008216381072998047 ================================================== Loss: 0.252 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2915844507221785, 'r': 0.34525369497275027, 'f1': 0.3161575973078008}, 'combined': 0.23295822959522164, 'epoch': 13} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.32205148413763973, 'r': 0.31706979602996016, 'f1': 0.3195412249765486}, 'combined': 0.20450638398499105, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31343370807204546, 'r': 0.345550255009219, 'f1': 0.3287093581044376}, 'combined': 0.24220689544537505, 'epoch': 13} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.32930395112068145, 'r': 0.3299048707395148, 'f1': 0.329604137037838}, 'combined': 0.236319947310148, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2202380952380952, 'r': 0.35238095238095235, 'f1': 0.271062271062271}, 'combined': 0.18070818070818065, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.20689655172413793, 'f1': 0.24000000000000002}, 'combined': 0.16, 'epoch': 13} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2965306761268781, 'r': 0.33704340607210626, 'f1': 0.315491785079929}, 'combined': 0.2324676311115266, 'epoch': 12} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31983745334792824, 'r': 0.32478489348160866, 'f1': 0.3222921877528559}, 'combined': 0.20626700016182775, 'epoch': 12} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23270440251572325, 'r': 0.35238095238095235, 'f1': 0.2803030303030303}, 'combined': 0.18686868686868685, 'epoch': 12} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31343370807204546, 'r': 0.345550255009219, 'f1': 0.3287093581044376}, 'combined': 0.24220689544537505, 'epoch': 13} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.32930395112068145, 'r': 0.3299048707395148, 'f1': 0.329604137037838}, 'combined': 0.236319947310148, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.20689655172413793, 'f1': 0.24000000000000002}, 'combined': 0.16, 'epoch': 13} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 16:13:21.374677: step: 2/459, loss: 0.06441961973905563 2023-01-22 16:13:21.969385: step: 4/459, loss: 0.24168774485588074 2023-01-22 16:13:22.584579: step: 6/459, loss: 0.09007761627435684 2023-01-22 16:13:23.181483: step: 8/459, loss: 0.06352373212575912 2023-01-22 16:13:23.911476: step: 10/459, loss: 0.18836553394794464 2023-01-22 16:13:24.600160: step: 12/459, loss: 0.054919660091400146 2023-01-22 16:13:25.252937: step: 14/459, loss: 0.08336023986339569 2023-01-22 16:13:25.848049: step: 16/459, loss: 0.07377622276544571 2023-01-22 16:13:26.429233: step: 18/459, loss: 0.0911712646484375 2023-01-22 16:13:27.053153: step: 20/459, loss: 0.043421823531389236 2023-01-22 16:13:27.672655: step: 22/459, loss: 0.09621667116880417 2023-01-22 16:13:28.327204: step: 24/459, loss: 1.1882328987121582 2023-01-22 16:13:28.912170: step: 26/459, loss: 0.05446174740791321 2023-01-22 16:13:29.584763: step: 28/459, loss: 0.08758416771888733 2023-01-22 16:13:30.210457: step: 30/459, loss: 0.06806449592113495 2023-01-22 16:13:30.823541: step: 32/459, loss: 0.5626229643821716 2023-01-22 16:13:31.468157: step: 34/459, loss: 0.22133304178714752 2023-01-22 16:13:32.065948: step: 36/459, loss: 0.11496199667453766 2023-01-22 16:13:32.664021: step: 38/459, loss: 0.2548699676990509 2023-01-22 16:13:33.260859: step: 40/459, loss: 0.4416053891181946 2023-01-22 16:13:33.881447: step: 42/459, loss: 0.34797435998916626 2023-01-22 16:13:34.508458: step: 44/459, loss: 0.061756085604429245 2023-01-22 16:13:35.138127: step: 46/459, loss: 0.3163054883480072 2023-01-22 16:13:35.826041: step: 48/459, loss: 0.2247781753540039 2023-01-22 16:13:36.435605: step: 50/459, loss: 0.0657285526394844 2023-01-22 16:13:37.106397: step: 52/459, loss: 0.1251581460237503 2023-01-22 16:13:37.703580: step: 54/459, loss: 0.2249917984008789 2023-01-22 16:13:38.323619: step: 56/459, loss: 0.2070305049419403 2023-01-22 16:13:38.914717: step: 58/459, loss: 0.5986971855163574 2023-01-22 16:13:39.582248: step: 60/459, loss: 0.07258062064647675 2023-01-22 16:13:40.191058: step: 62/459, loss: 0.07770689576864243 2023-01-22 16:13:40.807753: step: 64/459, loss: 0.044736348092556 2023-01-22 16:13:41.341050: step: 66/459, loss: 0.13738499581813812 2023-01-22 16:13:42.113755: step: 68/459, loss: 0.20796900987625122 2023-01-22 16:13:42.741025: step: 70/459, loss: 0.09944851696491241 2023-01-22 16:13:43.309530: step: 72/459, loss: 0.07951584458351135 2023-01-22 16:13:43.963056: step: 74/459, loss: 0.08414649963378906 2023-01-22 16:13:44.615423: step: 76/459, loss: 0.24652612209320068 2023-01-22 16:13:45.252663: step: 78/459, loss: 0.08239132910966873 2023-01-22 16:13:45.874706: step: 80/459, loss: 0.011474859900772572 2023-01-22 16:13:46.486208: step: 82/459, loss: 0.12491506338119507 2023-01-22 16:13:47.065356: step: 84/459, loss: 0.18238994479179382 2023-01-22 16:13:47.713320: step: 86/459, loss: 0.13747794926166534 2023-01-22 16:13:48.352462: step: 88/459, loss: 0.04843825474381447 2023-01-22 16:13:48.979370: step: 90/459, loss: 0.3054289221763611 2023-01-22 16:13:49.579673: step: 92/459, loss: 0.07644008845090866 2023-01-22 16:13:50.217420: step: 94/459, loss: 0.21005994081497192 2023-01-22 16:13:50.848311: step: 96/459, loss: 0.0813373327255249 2023-01-22 16:13:51.487053: step: 98/459, loss: 0.08959370851516724 2023-01-22 16:13:52.098152: step: 100/459, loss: 0.06660779565572739 2023-01-22 16:13:52.714314: step: 102/459, loss: 0.3228623867034912 2023-01-22 16:13:53.401163: step: 104/459, loss: 0.1375080943107605 2023-01-22 16:13:53.999339: step: 106/459, loss: 0.08323484659194946 2023-01-22 16:13:54.622833: step: 108/459, loss: 0.15723548829555511 2023-01-22 16:13:55.264460: step: 110/459, loss: 0.22374866902828217 2023-01-22 16:13:55.890401: step: 112/459, loss: 0.17939354479312897 2023-01-22 16:13:56.519893: step: 114/459, loss: 0.08194570988416672 2023-01-22 16:13:57.130306: step: 116/459, loss: 0.10507726669311523 2023-01-22 16:13:57.736975: step: 118/459, loss: 0.16655859351158142 2023-01-22 16:13:58.288056: step: 120/459, loss: 0.08620044589042664 2023-01-22 16:13:58.916636: step: 122/459, loss: 0.06111699342727661 2023-01-22 16:13:59.557832: step: 124/459, loss: 0.04450435936450958 2023-01-22 16:14:00.156486: step: 126/459, loss: 0.1009693369269371 2023-01-22 16:14:00.882588: step: 128/459, loss: 0.06164734065532684 2023-01-22 16:14:01.532702: step: 130/459, loss: 0.08032125234603882 2023-01-22 16:14:02.192559: step: 132/459, loss: 0.2128690779209137 2023-01-22 16:14:02.863703: step: 134/459, loss: 0.1892033815383911 2023-01-22 16:14:03.506556: step: 136/459, loss: 0.282770574092865 2023-01-22 16:14:04.118430: step: 138/459, loss: 0.11994190514087677 2023-01-22 16:14:04.740806: step: 140/459, loss: 0.06139950081706047 2023-01-22 16:14:05.327265: step: 142/459, loss: 0.042752642184495926 2023-01-22 16:14:05.910047: step: 144/459, loss: 0.07353543490171432 2023-01-22 16:14:06.540821: step: 146/459, loss: 0.07029826939105988 2023-01-22 16:14:07.093595: step: 148/459, loss: 0.5246962308883667 2023-01-22 16:14:07.670520: step: 150/459, loss: 0.0563591830432415 2023-01-22 16:14:08.267133: step: 152/459, loss: 0.01124170795083046 2023-01-22 16:14:08.890004: step: 154/459, loss: 0.24180348217487335 2023-01-22 16:14:09.496012: step: 156/459, loss: 0.08453784137964249 2023-01-22 16:14:10.055537: step: 158/459, loss: 0.08879511058330536 2023-01-22 16:14:10.702672: step: 160/459, loss: 0.08013739436864853 2023-01-22 16:14:11.344931: step: 162/459, loss: 0.29491478204727173 2023-01-22 16:14:11.974060: step: 164/459, loss: 0.09980983287096024 2023-01-22 16:14:12.650981: step: 166/459, loss: 0.11042336374521255 2023-01-22 16:14:13.433938: step: 168/459, loss: 0.6062020659446716 2023-01-22 16:14:14.142549: step: 170/459, loss: 0.06307248026132584 2023-01-22 16:14:14.863621: step: 172/459, loss: 0.2507913410663605 2023-01-22 16:14:15.513198: step: 174/459, loss: 0.1086149513721466 2023-01-22 16:14:16.124301: step: 176/459, loss: 0.07093862444162369 2023-01-22 16:14:16.772253: step: 178/459, loss: 0.6770135164260864 2023-01-22 16:14:17.392047: step: 180/459, loss: 0.22331413626670837 2023-01-22 16:14:18.127462: step: 182/459, loss: 0.06860455125570297 2023-01-22 16:14:18.743895: step: 184/459, loss: 0.15400372445583344 2023-01-22 16:14:19.353370: step: 186/459, loss: 0.11092140525579453 2023-01-22 16:14:20.026869: step: 188/459, loss: 0.10555815696716309 2023-01-22 16:14:20.648416: step: 190/459, loss: 0.11116554588079453 2023-01-22 16:14:21.235590: step: 192/459, loss: 0.06893973797559738 2023-01-22 16:14:21.860529: step: 194/459, loss: 0.11285433173179626 2023-01-22 16:14:22.444368: step: 196/459, loss: 0.15193326771259308 2023-01-22 16:14:23.101803: step: 198/459, loss: 3.56915020942688 2023-01-22 16:14:23.751210: step: 200/459, loss: 0.03643273562192917 2023-01-22 16:14:24.398010: step: 202/459, loss: 0.2721875011920929 2023-01-22 16:14:25.026027: step: 204/459, loss: 0.08110247552394867 2023-01-22 16:14:25.712660: step: 206/459, loss: 0.218644917011261 2023-01-22 16:14:26.279726: step: 208/459, loss: 0.015305077657103539 2023-01-22 16:14:26.896336: step: 210/459, loss: 0.0752711370587349 2023-01-22 16:14:27.551485: step: 212/459, loss: 0.6018018126487732 2023-01-22 16:14:28.188428: step: 214/459, loss: 0.09494287520647049 2023-01-22 16:14:28.860772: step: 216/459, loss: 0.2864401936531067 2023-01-22 16:14:29.458633: step: 218/459, loss: 0.12717193365097046 2023-01-22 16:14:30.097950: step: 220/459, loss: 0.4627942442893982 2023-01-22 16:14:30.700653: step: 222/459, loss: 0.0603732205927372 2023-01-22 16:14:31.329278: step: 224/459, loss: 3.3956806659698486 2023-01-22 16:14:31.984782: step: 226/459, loss: 0.7265195250511169 2023-01-22 16:14:32.620070: step: 228/459, loss: 0.033690229058265686 2023-01-22 16:14:33.234691: step: 230/459, loss: 0.06018483266234398 2023-01-22 16:14:33.882058: step: 232/459, loss: 0.1150030791759491 2023-01-22 16:14:34.520713: step: 234/459, loss: 0.2931418716907501 2023-01-22 16:14:35.184476: step: 236/459, loss: 0.09669801592826843 2023-01-22 16:14:35.816652: step: 238/459, loss: 0.06385646760463715 2023-01-22 16:14:36.507803: step: 240/459, loss: 0.0499483197927475 2023-01-22 16:14:37.104087: step: 242/459, loss: 0.06243397295475006 2023-01-22 16:14:37.681803: step: 244/459, loss: 0.22294658422470093 2023-01-22 16:14:38.348334: step: 246/459, loss: 0.27316561341285706 2023-01-22 16:14:38.951393: step: 248/459, loss: 0.4500230550765991 2023-01-22 16:14:39.557695: step: 250/459, loss: 0.04577363654971123 2023-01-22 16:14:40.234385: step: 252/459, loss: 0.043319497257471085 2023-01-22 16:14:40.799549: step: 254/459, loss: 0.1364111751317978 2023-01-22 16:14:41.405269: step: 256/459, loss: 0.15574726462364197 2023-01-22 16:14:41.921455: step: 258/459, loss: 0.13959038257598877 2023-01-22 16:14:42.552516: step: 260/459, loss: 0.481312096118927 2023-01-22 16:14:43.150814: step: 262/459, loss: 0.06392300128936768 2023-01-22 16:14:43.778822: step: 264/459, loss: 0.05714191868901253 2023-01-22 16:14:44.473494: step: 266/459, loss: 0.059793081134557724 2023-01-22 16:14:45.080496: step: 268/459, loss: 0.10395845770835876 2023-01-22 16:14:45.660476: step: 270/459, loss: 1.2195897102355957 2023-01-22 16:14:46.392051: step: 272/459, loss: 0.21321937441825867 2023-01-22 16:14:47.016684: step: 274/459, loss: 0.15510378777980804 2023-01-22 16:14:47.604212: step: 276/459, loss: 0.14464609324932098 2023-01-22 16:14:48.205175: step: 278/459, loss: 1.555549144744873 2023-01-22 16:14:48.810437: step: 280/459, loss: 0.29548969864845276 2023-01-22 16:14:49.489707: step: 282/459, loss: 0.08344057947397232 2023-01-22 16:14:50.145247: step: 284/459, loss: 0.05372897535562515 2023-01-22 16:14:50.728716: step: 286/459, loss: 0.16035722196102142 2023-01-22 16:14:51.344202: step: 288/459, loss: 0.17348527908325195 2023-01-22 16:14:51.959356: step: 290/459, loss: 0.03860146179795265 2023-01-22 16:14:52.540571: step: 292/459, loss: 0.4313651919364929 2023-01-22 16:14:53.132718: step: 294/459, loss: 0.05461354926228523 2023-01-22 16:14:53.741232: step: 296/459, loss: 0.1210990771651268 2023-01-22 16:14:54.386264: step: 298/459, loss: 1.6519839763641357 2023-01-22 16:14:55.062531: step: 300/459, loss: 0.11350032687187195 2023-01-22 16:14:55.687017: step: 302/459, loss: 0.06681153178215027 2023-01-22 16:14:56.368545: step: 304/459, loss: 0.06232345104217529 2023-01-22 16:14:57.063051: step: 306/459, loss: 0.3056487441062927 2023-01-22 16:14:57.704629: step: 308/459, loss: 0.08775142580270767 2023-01-22 16:14:58.365031: step: 310/459, loss: 0.21422764658927917 2023-01-22 16:14:59.001523: step: 312/459, loss: 0.10430175811052322 2023-01-22 16:14:59.620680: step: 314/459, loss: 0.13841083645820618 2023-01-22 16:15:00.217023: step: 316/459, loss: 0.15572680532932281 2023-01-22 16:15:00.925690: step: 318/459, loss: 0.024756353348493576 2023-01-22 16:15:01.576215: step: 320/459, loss: 0.17997482419013977 2023-01-22 16:15:02.168020: step: 322/459, loss: 1.0330842733383179 2023-01-22 16:15:02.777388: step: 324/459, loss: 0.03922523558139801 2023-01-22 16:15:03.422956: step: 326/459, loss: 0.1440928876399994 2023-01-22 16:15:04.057960: step: 328/459, loss: 0.1018076166510582 2023-01-22 16:15:04.632694: step: 330/459, loss: 0.03137773647904396 2023-01-22 16:15:05.296170: step: 332/459, loss: 0.28440091013908386 2023-01-22 16:15:05.895959: step: 334/459, loss: 0.10121467709541321 2023-01-22 16:15:06.539825: step: 336/459, loss: 0.12572842836380005 2023-01-22 16:15:07.126313: step: 338/459, loss: 0.03452789783477783 2023-01-22 16:15:07.720951: step: 340/459, loss: 0.0731254518032074 2023-01-22 16:15:08.432017: step: 342/459, loss: 0.5611311793327332 2023-01-22 16:15:09.041877: step: 344/459, loss: 0.10208314657211304 2023-01-22 16:15:09.762882: step: 346/459, loss: 0.05241439864039421 2023-01-22 16:15:10.531412: step: 348/459, loss: 0.1152397096157074 2023-01-22 16:15:11.145922: step: 350/459, loss: 0.11780358850955963 2023-01-22 16:15:11.765028: step: 352/459, loss: 0.13754092156887054 2023-01-22 16:15:12.390569: step: 354/459, loss: 0.743360161781311 2023-01-22 16:15:13.006798: step: 356/459, loss: 0.12185235321521759 2023-01-22 16:15:13.669816: step: 358/459, loss: 0.07792577892541885 2023-01-22 16:15:14.277779: step: 360/459, loss: 0.10029002279043198 2023-01-22 16:15:14.897295: step: 362/459, loss: 0.055654555559158325 2023-01-22 16:15:15.495238: step: 364/459, loss: 0.08485683053731918 2023-01-22 16:15:16.082038: step: 366/459, loss: 0.05664955824613571 2023-01-22 16:15:16.689169: step: 368/459, loss: 0.2190289944410324 2023-01-22 16:15:17.287996: step: 370/459, loss: 0.0824706107378006 2023-01-22 16:15:17.888668: step: 372/459, loss: 0.14971604943275452 2023-01-22 16:15:18.566015: step: 374/459, loss: 0.24603766202926636 2023-01-22 16:15:19.224819: step: 376/459, loss: 0.04757764935493469 2023-01-22 16:15:19.806441: step: 378/459, loss: 1.9808703660964966 2023-01-22 16:15:20.407796: step: 380/459, loss: 0.11423568427562714 2023-01-22 16:15:20.997996: step: 382/459, loss: 0.09762033075094223 2023-01-22 16:15:21.561489: step: 384/459, loss: 0.03450421243906021 2023-01-22 16:15:22.139322: step: 386/459, loss: 0.08658834546804428 2023-01-22 16:15:22.734884: step: 388/459, loss: 0.07874707877635956 2023-01-22 16:15:23.312588: step: 390/459, loss: 0.15308307111263275 2023-01-22 16:15:23.915311: step: 392/459, loss: 0.003502724226564169 2023-01-22 16:15:24.566410: step: 394/459, loss: 0.21341486275196075 2023-01-22 16:15:25.144451: step: 396/459, loss: 0.163757786154747 2023-01-22 16:15:25.777326: step: 398/459, loss: 0.6673762798309326 2023-01-22 16:15:26.341390: step: 400/459, loss: 0.06104438751935959 2023-01-22 16:15:27.002465: step: 402/459, loss: 0.07785363495349884 2023-01-22 16:15:27.584151: step: 404/459, loss: 0.14936119318008423 2023-01-22 16:15:28.181084: step: 406/459, loss: 0.0882159173488617 2023-01-22 16:15:28.854350: step: 408/459, loss: 0.10267335176467896 2023-01-22 16:15:29.478359: step: 410/459, loss: 0.06043518707156181 2023-01-22 16:15:30.092802: step: 412/459, loss: 0.14828315377235413 2023-01-22 16:15:30.720749: step: 414/459, loss: 0.07952707260847092 2023-01-22 16:15:31.306718: step: 416/459, loss: 0.15380056202411652 2023-01-22 16:15:31.964066: step: 418/459, loss: 0.13499809801578522 2023-01-22 16:15:32.591831: step: 420/459, loss: 0.028338994830846786 2023-01-22 16:15:33.269951: step: 422/459, loss: 0.08296581357717514 2023-01-22 16:15:33.930204: step: 424/459, loss: 0.08197712153196335 2023-01-22 16:15:34.564103: step: 426/459, loss: 0.14740677177906036 2023-01-22 16:15:35.141616: step: 428/459, loss: 0.10418958216905594 2023-01-22 16:15:35.740679: step: 430/459, loss: 0.06694205105304718 2023-01-22 16:15:36.358874: step: 432/459, loss: 0.040093135088682175 2023-01-22 16:15:37.063040: step: 434/459, loss: 0.17116259038448334 2023-01-22 16:15:37.626422: step: 436/459, loss: 0.033285629004240036 2023-01-22 16:15:38.264224: step: 438/459, loss: 0.04399026185274124 2023-01-22 16:15:38.900781: step: 440/459, loss: 0.11168336123228073 2023-01-22 16:15:39.501554: step: 442/459, loss: 0.10938329994678497 2023-01-22 16:15:40.111756: step: 444/459, loss: 0.0911632850766182 2023-01-22 16:15:40.734943: step: 446/459, loss: 0.05152979493141174 2023-01-22 16:15:41.378527: step: 448/459, loss: 0.02795613929629326 2023-01-22 16:15:42.031895: step: 450/459, loss: 0.17604242265224457 2023-01-22 16:15:42.685510: step: 452/459, loss: 0.6229467988014221 2023-01-22 16:15:43.416485: step: 454/459, loss: 0.06544727832078934 2023-01-22 16:15:44.079376: step: 456/459, loss: 0.09000267088413239 2023-01-22 16:15:44.723369: step: 458/459, loss: 0.09480736404657364 2023-01-22 16:15:45.301957: step: 460/459, loss: 0.10590202361345291 2023-01-22 16:15:45.965560: step: 462/459, loss: 0.15232284367084503 2023-01-22 16:15:46.588767: step: 464/459, loss: 0.04087786749005318 2023-01-22 16:15:47.196339: step: 466/459, loss: 0.19096140563488007 2023-01-22 16:15:47.849196: step: 468/459, loss: 0.43249499797821045 2023-01-22 16:15:48.406146: step: 470/459, loss: 0.0535280816257 2023-01-22 16:15:49.030697: step: 472/459, loss: 0.002964629791676998 2023-01-22 16:15:49.703059: step: 474/459, loss: 0.15261296927928925 2023-01-22 16:15:50.276053: step: 476/459, loss: 0.14857709407806396 2023-01-22 16:15:50.923677: step: 478/459, loss: 0.20442980527877808 2023-01-22 16:15:51.487842: step: 480/459, loss: 0.043235257267951965 2023-01-22 16:15:52.120035: step: 482/459, loss: 0.06433428078889847 2023-01-22 16:15:52.751696: step: 484/459, loss: 0.34200647473335266 2023-01-22 16:15:53.395631: step: 486/459, loss: 0.1582440733909607 2023-01-22 16:15:54.136270: step: 488/459, loss: 0.11173295974731445 2023-01-22 16:15:54.759995: step: 490/459, loss: 0.07663121819496155 2023-01-22 16:15:55.391682: step: 492/459, loss: 0.021138040348887444 2023-01-22 16:15:56.048522: step: 494/459, loss: 0.22960183024406433 2023-01-22 16:15:56.691077: step: 496/459, loss: 0.13404521346092224 2023-01-22 16:15:57.293840: step: 498/459, loss: 1.8416216373443604 2023-01-22 16:15:57.935105: step: 500/459, loss: 0.07688159495592117 2023-01-22 16:15:58.537182: step: 502/459, loss: 0.076439268887043 2023-01-22 16:15:59.173297: step: 504/459, loss: 0.16020862758159637 2023-01-22 16:15:59.793415: step: 506/459, loss: 0.2024340182542801 2023-01-22 16:16:00.508774: step: 508/459, loss: 0.22210797667503357 2023-01-22 16:16:01.128823: step: 510/459, loss: 0.1276715248823166 2023-01-22 16:16:01.796231: step: 512/459, loss: 0.11908185482025146 2023-01-22 16:16:02.398714: step: 514/459, loss: 0.12347186356782913 2023-01-22 16:16:02.987477: step: 516/459, loss: 0.27316027879714966 2023-01-22 16:16:03.562652: step: 518/459, loss: 0.09478378295898438 2023-01-22 16:16:04.230712: step: 520/459, loss: 0.5936716198921204 2023-01-22 16:16:04.805874: step: 522/459, loss: 0.20154812932014465 2023-01-22 16:16:05.410607: step: 524/459, loss: 0.06496135890483856 2023-01-22 16:16:05.998316: step: 526/459, loss: 0.1685427874326706 2023-01-22 16:16:06.655372: step: 528/459, loss: 0.20569060742855072 2023-01-22 16:16:07.313222: step: 530/459, loss: 0.09706541150808334 2023-01-22 16:16:07.929893: step: 532/459, loss: 0.09817253798246384 2023-01-22 16:16:08.595460: step: 534/459, loss: 0.07050872594118118 2023-01-22 16:16:09.249630: step: 536/459, loss: 0.1510125994682312 2023-01-22 16:16:09.899796: step: 538/459, loss: 0.18187086284160614 2023-01-22 16:16:10.480001: step: 540/459, loss: 0.06031858175992966 2023-01-22 16:16:11.192871: step: 542/459, loss: 0.5724014639854431 2023-01-22 16:16:11.819257: step: 544/459, loss: 0.1680055856704712 2023-01-22 16:16:12.415896: step: 546/459, loss: 0.06514791399240494 2023-01-22 16:16:13.045837: step: 548/459, loss: 0.15522830188274384 2023-01-22 16:16:13.797689: step: 550/459, loss: 0.6444486379623413 2023-01-22 16:16:14.387637: step: 552/459, loss: 0.10339345037937164 2023-01-22 16:16:15.015726: step: 554/459, loss: 0.16707591712474823 2023-01-22 16:16:15.545597: step: 556/459, loss: 0.7283487915992737 2023-01-22 16:16:16.123021: step: 558/459, loss: 0.2545148730278015 2023-01-22 16:16:16.777462: step: 560/459, loss: 0.45186224579811096 2023-01-22 16:16:17.580372: step: 562/459, loss: 0.08149199187755585 2023-01-22 16:16:18.195424: step: 564/459, loss: 0.11276412755250931 2023-01-22 16:16:18.775624: step: 566/459, loss: 0.05356033146381378 2023-01-22 16:16:19.478926: step: 568/459, loss: 0.02735983021557331 2023-01-22 16:16:20.098236: step: 570/459, loss: 0.44685453176498413 2023-01-22 16:16:20.715442: step: 572/459, loss: 0.07278413325548172 2023-01-22 16:16:21.337387: step: 574/459, loss: 0.07996215671300888 2023-01-22 16:16:21.961019: step: 576/459, loss: 0.04798819497227669 2023-01-22 16:16:22.563485: step: 578/459, loss: 0.10141680389642715 2023-01-22 16:16:23.210646: step: 580/459, loss: 0.04212584346532822 2023-01-22 16:16:23.924801: step: 582/459, loss: 0.060311898589134216 2023-01-22 16:16:24.568922: step: 584/459, loss: 0.07880634069442749 2023-01-22 16:16:25.162581: step: 586/459, loss: 0.18628142774105072 2023-01-22 16:16:25.761749: step: 588/459, loss: 0.0820527970790863 2023-01-22 16:16:26.307659: step: 590/459, loss: 0.056586481630802155 2023-01-22 16:16:27.007238: step: 592/459, loss: 0.2896645963191986 2023-01-22 16:16:27.601023: step: 594/459, loss: 0.24446147680282593 2023-01-22 16:16:28.238762: step: 596/459, loss: 0.07182004302740097 2023-01-22 16:16:28.918750: step: 598/459, loss: 0.16820387542247772 2023-01-22 16:16:29.567377: step: 600/459, loss: 0.1183542013168335 2023-01-22 16:16:30.160806: step: 602/459, loss: 3.1726067066192627 2023-01-22 16:16:30.814106: step: 604/459, loss: 0.11451217532157898 2023-01-22 16:16:31.500968: step: 606/459, loss: 0.13916495442390442 2023-01-22 16:16:32.147015: step: 608/459, loss: 0.07187788188457489 2023-01-22 16:16:32.802020: step: 610/459, loss: 0.6635444760322571 2023-01-22 16:16:33.467447: step: 612/459, loss: 0.713933527469635 2023-01-22 16:16:34.037011: step: 614/459, loss: 1.0369352102279663 2023-01-22 16:16:34.639153: step: 616/459, loss: 0.09023985266685486 2023-01-22 16:16:35.352069: step: 618/459, loss: 0.10964377224445343 2023-01-22 16:16:35.911883: step: 620/459, loss: 0.06577864289283752 2023-01-22 16:16:36.630543: step: 622/459, loss: 0.057988908141851425 2023-01-22 16:16:37.258621: step: 624/459, loss: 0.04534882679581642 2023-01-22 16:16:37.842158: step: 626/459, loss: 0.06343047320842743 2023-01-22 16:16:38.483179: step: 628/459, loss: 0.08727064728736877 2023-01-22 16:16:39.169871: step: 630/459, loss: 0.9711803793907166 2023-01-22 16:16:39.770095: step: 632/459, loss: 0.041177455335855484 2023-01-22 16:16:40.394332: step: 634/459, loss: 0.16685906052589417 2023-01-22 16:16:41.042497: step: 636/459, loss: 0.1553511619567871 2023-01-22 16:16:41.728539: step: 638/459, loss: 0.13245470821857452 2023-01-22 16:16:42.366757: step: 640/459, loss: 0.1090826541185379 2023-01-22 16:16:42.944753: step: 642/459, loss: 0.1835288554430008 2023-01-22 16:16:43.548669: step: 644/459, loss: 0.18024401366710663 2023-01-22 16:16:44.172506: step: 646/459, loss: 0.09970470517873764 2023-01-22 16:16:44.766140: step: 648/459, loss: 0.1630219966173172 2023-01-22 16:16:45.438349: step: 650/459, loss: 0.07819847017526627 2023-01-22 16:16:46.008227: step: 652/459, loss: 0.1266017109155655 2023-01-22 16:16:46.636721: step: 654/459, loss: 0.15270422399044037 2023-01-22 16:16:47.300035: step: 656/459, loss: 0.17346946895122528 2023-01-22 16:16:47.923207: step: 658/459, loss: 0.13964225351810455 2023-01-22 16:16:48.523949: step: 660/459, loss: 0.061097558587789536 2023-01-22 16:16:49.232259: step: 662/459, loss: 0.15297260880470276 2023-01-22 16:16:49.941153: step: 664/459, loss: 0.058266300708055496 2023-01-22 16:16:50.571119: step: 666/459, loss: 0.07962194830179214 2023-01-22 16:16:51.192660: step: 668/459, loss: 0.1426120549440384 2023-01-22 16:16:51.807900: step: 670/459, loss: 0.06565774232149124 2023-01-22 16:16:52.444700: step: 672/459, loss: 0.10415282845497131 2023-01-22 16:16:53.117772: step: 674/459, loss: 0.16144612431526184 2023-01-22 16:16:53.758982: step: 676/459, loss: 0.06459441035985947 2023-01-22 16:16:54.411748: step: 678/459, loss: 0.2903662919998169 2023-01-22 16:16:55.083422: step: 680/459, loss: 0.06512487679719925 2023-01-22 16:16:55.768397: step: 682/459, loss: 0.18803299963474274 2023-01-22 16:16:56.457171: step: 684/459, loss: 0.0990016907453537 2023-01-22 16:16:57.071067: step: 686/459, loss: 0.07700912654399872 2023-01-22 16:16:57.717180: step: 688/459, loss: 0.15820688009262085 2023-01-22 16:16:58.347730: step: 690/459, loss: 0.11562884598970413 2023-01-22 16:16:58.969106: step: 692/459, loss: 0.02071470208466053 2023-01-22 16:16:59.581795: step: 694/459, loss: 0.1472952663898468 2023-01-22 16:17:00.226869: step: 696/459, loss: 0.6280515193939209 2023-01-22 16:17:00.822945: step: 698/459, loss: 0.056831423193216324 2023-01-22 16:17:01.476995: step: 700/459, loss: 0.1400810182094574 2023-01-22 16:17:02.069929: step: 702/459, loss: 0.1043766513466835 2023-01-22 16:17:02.691303: step: 704/459, loss: 1.1153489351272583 2023-01-22 16:17:03.281025: step: 706/459, loss: 0.5385883450508118 2023-01-22 16:17:03.940480: step: 708/459, loss: 0.08825590461492538 2023-01-22 16:17:04.620488: step: 710/459, loss: 0.041418902575969696 2023-01-22 16:17:05.222321: step: 712/459, loss: 0.2109055370092392 2023-01-22 16:17:05.856652: step: 714/459, loss: 0.18201333284378052 2023-01-22 16:17:06.534612: step: 716/459, loss: 0.18957893550395966 2023-01-22 16:17:07.289830: step: 718/459, loss: 0.27045783400535583 2023-01-22 16:17:08.031098: step: 720/459, loss: 0.09271743893623352 2023-01-22 16:17:08.689889: step: 722/459, loss: 1.0764331817626953 2023-01-22 16:17:09.290542: step: 724/459, loss: 0.06470192223787308 2023-01-22 16:17:09.998933: step: 726/459, loss: 0.08558265119791031 2023-01-22 16:17:10.629586: step: 728/459, loss: 0.0790066048502922 2023-01-22 16:17:11.317695: step: 730/459, loss: 0.11137638241052628 2023-01-22 16:17:11.913441: step: 732/459, loss: 0.10007143765687943 2023-01-22 16:17:12.485848: step: 734/459, loss: 0.016059981659054756 2023-01-22 16:17:13.133008: step: 736/459, loss: 0.19019532203674316 2023-01-22 16:17:13.768267: step: 738/459, loss: 0.23406066000461578 2023-01-22 16:17:14.382205: step: 740/459, loss: 0.29490312933921814 2023-01-22 16:17:15.016463: step: 742/459, loss: 0.1696939468383789 2023-01-22 16:17:15.639204: step: 744/459, loss: 0.31100764870643616 2023-01-22 16:17:16.246643: step: 746/459, loss: 0.09923236072063446 2023-01-22 16:17:16.881443: step: 748/459, loss: 0.03281458467245102 2023-01-22 16:17:17.507897: step: 750/459, loss: 0.0721248984336853 2023-01-22 16:17:18.159164: step: 752/459, loss: 0.5520293116569519 2023-01-22 16:17:18.884633: step: 754/459, loss: 0.3913125693798065 2023-01-22 16:17:19.472421: step: 756/459, loss: 0.08478737622499466 2023-01-22 16:17:20.107930: step: 758/459, loss: 0.08430156856775284 2023-01-22 16:17:20.753205: step: 760/459, loss: 0.4148305058479309 2023-01-22 16:17:21.382762: step: 762/459, loss: 0.06802643835544586 2023-01-22 16:17:22.047169: step: 764/459, loss: 0.43473055958747864 2023-01-22 16:17:22.617219: step: 766/459, loss: 0.09546316415071487 2023-01-22 16:17:23.266150: step: 768/459, loss: 0.14414992928504944 2023-01-22 16:17:23.851403: step: 770/459, loss: 0.07112233340740204 2023-01-22 16:17:24.508712: step: 772/459, loss: 0.023431621491909027 2023-01-22 16:17:25.197589: step: 774/459, loss: 0.14973464608192444 2023-01-22 16:17:25.870742: step: 776/459, loss: 0.097756028175354 2023-01-22 16:17:26.467481: step: 778/459, loss: 0.08939177542924881 2023-01-22 16:17:27.102330: step: 780/459, loss: 0.1910543143749237 2023-01-22 16:17:27.688087: step: 782/459, loss: 0.6618527173995972 2023-01-22 16:17:28.345462: step: 784/459, loss: 0.1712627410888672 2023-01-22 16:17:29.050568: step: 786/459, loss: 0.15704227983951569 2023-01-22 16:17:29.677023: step: 788/459, loss: 0.08776414394378662 2023-01-22 16:17:30.270142: step: 790/459, loss: 0.0939122885465622 2023-01-22 16:17:30.918189: step: 792/459, loss: 0.08852482587099075 2023-01-22 16:17:31.517670: step: 794/459, loss: 0.05152321234345436 2023-01-22 16:17:32.153940: step: 796/459, loss: 0.20047429203987122 2023-01-22 16:17:32.727446: step: 798/459, loss: 0.13595029711723328 2023-01-22 16:17:33.423736: step: 800/459, loss: 0.19447734951972961 2023-01-22 16:17:34.064561: step: 802/459, loss: 0.46443089842796326 2023-01-22 16:17:34.663706: step: 804/459, loss: 0.21205873787403107 2023-01-22 16:17:35.268677: step: 806/459, loss: 0.1320125013589859 2023-01-22 16:17:35.919375: step: 808/459, loss: 3.0995163917541504 2023-01-22 16:17:36.565542: step: 810/459, loss: 0.04587202146649361 2023-01-22 16:17:37.206509: step: 812/459, loss: 0.18947646021842957 2023-01-22 16:17:37.872975: step: 814/459, loss: 0.06886213272809982 2023-01-22 16:17:38.469607: step: 816/459, loss: 0.16461722552776337 2023-01-22 16:17:39.041363: step: 818/459, loss: 0.11392181366682053 2023-01-22 16:17:39.643799: step: 820/459, loss: 0.12290021777153015 2023-01-22 16:17:40.173362: step: 822/459, loss: 0.06942452490329742 2023-01-22 16:17:40.802805: step: 824/459, loss: 0.08687920868396759 2023-01-22 16:17:41.407394: step: 826/459, loss: 0.23973771929740906 2023-01-22 16:17:42.016159: step: 828/459, loss: 0.11288207024335861 2023-01-22 16:17:42.735710: step: 830/459, loss: 0.1564875692129135 2023-01-22 16:17:43.344014: step: 832/459, loss: 0.6924835443496704 2023-01-22 16:17:43.999012: step: 834/459, loss: 0.11713901162147522 2023-01-22 16:17:44.644756: step: 836/459, loss: 0.19068118929862976 2023-01-22 16:17:45.237294: step: 838/459, loss: 0.252514511346817 2023-01-22 16:17:45.799609: step: 840/459, loss: 0.08315566182136536 2023-01-22 16:17:46.438672: step: 842/459, loss: 0.10417691618204117 2023-01-22 16:17:47.083983: step: 844/459, loss: 0.15337271988391876 2023-01-22 16:17:47.735965: step: 846/459, loss: 0.12408004701137543 2023-01-22 16:17:48.335182: step: 848/459, loss: 0.20800061523914337 2023-01-22 16:17:48.953885: step: 850/459, loss: 0.2498859018087387 2023-01-22 16:17:49.572227: step: 852/459, loss: 0.2218315601348877 2023-01-22 16:17:50.172825: step: 854/459, loss: 0.1651669591665268 2023-01-22 16:17:50.751361: step: 856/459, loss: 0.08946865797042847 2023-01-22 16:17:51.355576: step: 858/459, loss: 0.054829102009534836 2023-01-22 16:17:51.965504: step: 860/459, loss: 0.28078263998031616 2023-01-22 16:17:52.582902: step: 862/459, loss: 0.03727496787905693 2023-01-22 16:17:53.167208: step: 864/459, loss: 0.04075774550437927 2023-01-22 16:17:53.794751: step: 866/459, loss: 0.21806031465530396 2023-01-22 16:17:54.455011: step: 868/459, loss: 0.05968962982296944 2023-01-22 16:17:55.079572: step: 870/459, loss: 0.05579304322600365 2023-01-22 16:17:55.711780: step: 872/459, loss: 0.13546620309352875 2023-01-22 16:17:56.313302: step: 874/459, loss: 0.18974390625953674 2023-01-22 16:17:56.919476: step: 876/459, loss: 0.03927962854504585 2023-01-22 16:17:57.479192: step: 878/459, loss: 0.19006875157356262 2023-01-22 16:17:58.116570: step: 880/459, loss: 0.07168001681566238 2023-01-22 16:17:58.712610: step: 882/459, loss: 0.07870712131261826 2023-01-22 16:17:59.304018: step: 884/459, loss: 0.1663609743118286 2023-01-22 16:17:59.868343: step: 886/459, loss: 0.07601575553417206 2023-01-22 16:18:00.496768: step: 888/459, loss: 0.1666986644268036 2023-01-22 16:18:01.132233: step: 890/459, loss: 0.04511537030339241 2023-01-22 16:18:01.794908: step: 892/459, loss: 0.24410556256771088 2023-01-22 16:18:02.429336: step: 894/459, loss: 0.06690314412117004 2023-01-22 16:18:02.978874: step: 896/459, loss: 0.3217136859893799 2023-01-22 16:18:03.487411: step: 898/459, loss: 0.03593762218952179 2023-01-22 16:18:04.053317: step: 900/459, loss: 0.0569002665579319 2023-01-22 16:18:04.624697: step: 902/459, loss: 0.15526026487350464 2023-01-22 16:18:05.220496: step: 904/459, loss: 0.08728202432394028 2023-01-22 16:18:05.844637: step: 906/459, loss: 0.03730348125100136 2023-01-22 16:18:06.457878: step: 908/459, loss: 0.46375882625579834 2023-01-22 16:18:07.094414: step: 910/459, loss: 0.08697223663330078 2023-01-22 16:18:07.698609: step: 912/459, loss: 0.016158483922481537 2023-01-22 16:18:08.293496: step: 914/459, loss: 0.1665673553943634 2023-01-22 16:18:08.948891: step: 916/459, loss: 0.05571366474032402 2023-01-22 16:18:09.651785: step: 918/459, loss: 3.768118143081665 2023-01-22 16:18:10.143433: step: 920/459, loss: 0.008426330052316189 ================================================== Loss: 0.219 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29696135578583766, 'r': 0.326263045540797, 'f1': 0.31092337251356245}, 'combined': 0.22910143237841443, 'epoch': 14} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33079188920157054, 'r': 0.32085910271963075, 'f1': 0.32574979574029955}, 'combined': 0.20847986927379167, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3022373949579832, 'r': 0.3412357685009488, 'f1': 0.32055481283422466}, 'combined': 0.23619828314100763, 'epoch': 14} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33241476380720225, 'r': 0.31700280657614105, 'f1': 0.32452590715100527}, 'combined': 0.20769658057664334, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31489238390625046, 'r': 0.33640305908770207, 'f1': 0.32529249933801646}, 'combined': 0.23968921003853844, 'epoch': 14} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3311601724333834, 'r': 0.3284407914553721, 'f1': 0.32979487625752435}, 'combined': 0.23645670373180994, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.18888888888888886, 'r': 0.32380952380952377, 'f1': 0.2385964912280701}, 'combined': 0.15906432748538007, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27941176470588236, 'r': 0.41304347826086957, 'f1': 0.33333333333333337}, 'combined': 0.16666666666666669, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.20689655172413793, 'f1': 0.2553191489361702}, 'combined': 0.17021276595744678, 'epoch': 14} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2965306761268781, 'r': 0.33704340607210626, 'f1': 0.315491785079929}, 'combined': 0.2324676311115266, 'epoch': 12} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31983745334792824, 'r': 0.32478489348160866, 'f1': 0.3222921877528559}, 'combined': 0.20626700016182775, 'epoch': 12} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23270440251572325, 'r': 0.35238095238095235, 'f1': 0.2803030303030303}, 'combined': 0.18686868686868685, 'epoch': 12} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31489238390625046, 'r': 0.33640305908770207, 'f1': 0.32529249933801646}, 'combined': 0.23968921003853844, 'epoch': 14} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3311601724333834, 'r': 0.3284407914553721, 'f1': 0.32979487625752435}, 'combined': 0.23645670373180994, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.20689655172413793, 'f1': 0.2553191489361702}, 'combined': 0.17021276595744678, 'epoch': 14} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 16:20:53.516129: step: 2/459, loss: 0.5486410856246948 2023-01-22 16:20:54.109505: step: 4/459, loss: 0.289652943611145 2023-01-22 16:20:54.665246: step: 6/459, loss: 0.05959634482860565 2023-01-22 16:20:55.245458: step: 8/459, loss: 0.07039528340101242 2023-01-22 16:20:55.919355: step: 10/459, loss: 0.047656189650297165 2023-01-22 16:20:56.514962: step: 12/459, loss: 0.3625733554363251 2023-01-22 16:20:57.069304: step: 14/459, loss: 0.046565525233745575 2023-01-22 16:20:57.740207: step: 16/459, loss: 0.7882801294326782 2023-01-22 16:20:58.372013: step: 18/459, loss: 0.04174453020095825 2023-01-22 16:20:59.013394: step: 20/459, loss: 0.1813397854566574 2023-01-22 16:20:59.608859: step: 22/459, loss: 0.09234306216239929 2023-01-22 16:21:00.227259: step: 24/459, loss: 0.04229148104786873 2023-01-22 16:21:00.827727: step: 26/459, loss: 0.09445139020681381 2023-01-22 16:21:01.448446: step: 28/459, loss: 0.1375788003206253 2023-01-22 16:21:02.033739: step: 30/459, loss: 0.05185992270708084 2023-01-22 16:21:02.643931: step: 32/459, loss: 0.024089183658361435 2023-01-22 16:21:03.346187: step: 34/459, loss: 0.16061915457248688 2023-01-22 16:21:03.995479: step: 36/459, loss: 0.2136932611465454 2023-01-22 16:21:04.649524: step: 38/459, loss: 0.02996690943837166 2023-01-22 16:21:05.241893: step: 40/459, loss: 0.12510062754154205 2023-01-22 16:21:05.829232: step: 42/459, loss: 0.04629860073328018 2023-01-22 16:21:06.466332: step: 44/459, loss: 0.042977336794137955 2023-01-22 16:21:07.178225: step: 46/459, loss: 0.40165507793426514 2023-01-22 16:21:07.809935: step: 48/459, loss: 0.1573132425546646 2023-01-22 16:21:08.469579: step: 50/459, loss: 0.09597649425268173 2023-01-22 16:21:09.075532: step: 52/459, loss: 0.046925660222768784 2023-01-22 16:21:09.630082: step: 54/459, loss: 0.46771693229675293 2023-01-22 16:21:10.223677: step: 56/459, loss: 0.07231107354164124 2023-01-22 16:21:10.840393: step: 58/459, loss: 0.03494403138756752 2023-01-22 16:21:11.484944: step: 60/459, loss: 0.01978539116680622 2023-01-22 16:21:12.180063: step: 62/459, loss: 0.15288308262825012 2023-01-22 16:21:12.820390: step: 64/459, loss: 0.11064063012599945 2023-01-22 16:21:13.482679: step: 66/459, loss: 0.07883388549089432 2023-01-22 16:21:14.143382: step: 68/459, loss: 0.05859791114926338 2023-01-22 16:21:14.768412: step: 70/459, loss: 0.09513600915670395 2023-01-22 16:21:15.303333: step: 72/459, loss: 0.03309210017323494 2023-01-22 16:21:15.988131: step: 74/459, loss: 0.07499226927757263 2023-01-22 16:21:16.568634: step: 76/459, loss: 0.013237319886684418 2023-01-22 16:21:17.228592: step: 78/459, loss: 0.19404545426368713 2023-01-22 16:21:17.862779: step: 80/459, loss: 0.07593639194965363 2023-01-22 16:21:18.468567: step: 82/459, loss: 0.02960822917521 2023-01-22 16:21:19.052379: step: 84/459, loss: 0.10159771889448166 2023-01-22 16:21:19.754029: step: 86/459, loss: 0.0194355808198452 2023-01-22 16:21:20.340926: step: 88/459, loss: 0.09518440812826157 2023-01-22 16:21:21.035638: step: 90/459, loss: 0.1177462488412857 2023-01-22 16:21:21.647318: step: 92/459, loss: 0.07299747318029404 2023-01-22 16:21:22.310161: step: 94/459, loss: 0.5563223958015442 2023-01-22 16:21:22.973514: step: 96/459, loss: 0.07798074185848236 2023-01-22 16:21:23.678404: step: 98/459, loss: 0.038705021142959595 2023-01-22 16:21:24.275671: step: 100/459, loss: 0.0481891892850399 2023-01-22 16:21:24.853690: step: 102/459, loss: 0.04821006581187248 2023-01-22 16:21:25.507660: step: 104/459, loss: 0.04163314029574394 2023-01-22 16:21:26.120457: step: 106/459, loss: 0.09405163675546646 2023-01-22 16:21:26.773590: step: 108/459, loss: 0.11197163164615631 2023-01-22 16:21:27.368271: step: 110/459, loss: 0.040973905473947525 2023-01-22 16:21:27.982693: step: 112/459, loss: 0.08627942204475403 2023-01-22 16:21:28.552158: step: 114/459, loss: 0.1055140420794487 2023-01-22 16:21:29.194464: step: 116/459, loss: 0.093475341796875 2023-01-22 16:21:29.783106: step: 118/459, loss: 0.06007818505167961 2023-01-22 16:21:30.362039: step: 120/459, loss: 0.2576269209384918 2023-01-22 16:21:30.971581: step: 122/459, loss: 0.1734248846769333 2023-01-22 16:21:31.719053: step: 124/459, loss: 0.511833131313324 2023-01-22 16:21:32.362196: step: 126/459, loss: 0.039821017533540726 2023-01-22 16:21:33.016986: step: 128/459, loss: 0.07495009899139404 2023-01-22 16:21:33.609170: step: 130/459, loss: 0.03896324709057808 2023-01-22 16:21:34.221038: step: 132/459, loss: 0.12392791360616684 2023-01-22 16:21:34.857872: step: 134/459, loss: 0.20620664954185486 2023-01-22 16:21:35.508768: step: 136/459, loss: 0.0882606953382492 2023-01-22 16:21:36.140153: step: 138/459, loss: 0.0498078279197216 2023-01-22 16:21:36.793538: step: 140/459, loss: 0.06893911212682724 2023-01-22 16:21:37.397270: step: 142/459, loss: 0.07317329943180084 2023-01-22 16:21:37.999441: step: 144/459, loss: 0.021599983796477318 2023-01-22 16:21:38.625421: step: 146/459, loss: 0.0438101626932621 2023-01-22 16:21:39.265709: step: 148/459, loss: 0.024955661967396736 2023-01-22 16:21:39.883795: step: 150/459, loss: 0.14916422963142395 2023-01-22 16:21:40.532071: step: 152/459, loss: 0.0968284010887146 2023-01-22 16:21:41.114093: step: 154/459, loss: 0.12663382291793823 2023-01-22 16:21:41.856498: step: 156/459, loss: 0.06396165490150452 2023-01-22 16:21:42.505568: step: 158/459, loss: 0.05292266234755516 2023-01-22 16:21:43.129166: step: 160/459, loss: 0.040811069309711456 2023-01-22 16:21:43.729247: step: 162/459, loss: 0.346055805683136 2023-01-22 16:21:44.382166: step: 164/459, loss: 0.11508221179246902 2023-01-22 16:21:44.970672: step: 166/459, loss: 1.4586166143417358 2023-01-22 16:21:45.638391: step: 168/459, loss: 0.051573824137449265 2023-01-22 16:21:46.244376: step: 170/459, loss: 0.41678252816200256 2023-01-22 16:21:46.858307: step: 172/459, loss: 0.05154046788811684 2023-01-22 16:21:47.463696: step: 174/459, loss: 0.09927209466695786 2023-01-22 16:21:48.015454: step: 176/459, loss: 0.021224061027169228 2023-01-22 16:21:48.604693: step: 178/459, loss: 0.06455913186073303 2023-01-22 16:21:49.257705: step: 180/459, loss: 0.3284795880317688 2023-01-22 16:21:49.922683: step: 182/459, loss: 0.06135117635130882 2023-01-22 16:21:50.543451: step: 184/459, loss: 0.07916609942913055 2023-01-22 16:21:51.150865: step: 186/459, loss: 0.0522749088704586 2023-01-22 16:21:51.781646: step: 188/459, loss: 0.14456631243228912 2023-01-22 16:21:52.388805: step: 190/459, loss: 0.4673730134963989 2023-01-22 16:21:52.980107: step: 192/459, loss: 0.0965438261628151 2023-01-22 16:21:53.680828: step: 194/459, loss: 0.023068953305482864 2023-01-22 16:21:54.321123: step: 196/459, loss: 0.052555955946445465 2023-01-22 16:21:54.896195: step: 198/459, loss: 0.1828565150499344 2023-01-22 16:21:55.494618: step: 200/459, loss: 0.18767528235912323 2023-01-22 16:21:56.822103: step: 202/459, loss: 0.12679165601730347 2023-01-22 16:21:57.460809: step: 204/459, loss: 0.9313849806785583 2023-01-22 16:21:58.143305: step: 206/459, loss: 1.0794764757156372 2023-01-22 16:21:58.730493: step: 208/459, loss: 0.1592589020729065 2023-01-22 16:21:59.335096: step: 210/459, loss: 0.13033975660800934 2023-01-22 16:21:59.969817: step: 212/459, loss: 0.14950180053710938 2023-01-22 16:22:00.593793: step: 214/459, loss: 0.07056218385696411 2023-01-22 16:22:01.262189: step: 216/459, loss: 0.045484255999326706 2023-01-22 16:22:02.010269: step: 218/459, loss: 0.1092509925365448 2023-01-22 16:22:02.596107: step: 220/459, loss: 0.049923885613679886 2023-01-22 16:22:03.211334: step: 222/459, loss: 0.029810503125190735 2023-01-22 16:22:03.897957: step: 224/459, loss: 0.07330261915922165 2023-01-22 16:22:04.523447: step: 226/459, loss: 0.15135681629180908 2023-01-22 16:22:05.111365: step: 228/459, loss: 0.062113720923662186 2023-01-22 16:22:05.705184: step: 230/459, loss: 0.06076894700527191 2023-01-22 16:22:06.336666: step: 232/459, loss: 0.053891561925411224 2023-01-22 16:22:06.948605: step: 234/459, loss: 0.033323563635349274 2023-01-22 16:22:07.540231: step: 236/459, loss: 0.03629208728671074 2023-01-22 16:22:08.214999: step: 238/459, loss: 0.05915994569659233 2023-01-22 16:22:08.789733: step: 240/459, loss: 0.07891526818275452 2023-01-22 16:22:09.389600: step: 242/459, loss: 0.12058553844690323 2023-01-22 16:22:10.006585: step: 244/459, loss: 0.026138124987483025 2023-01-22 16:22:10.648376: step: 246/459, loss: 0.09974869340658188 2023-01-22 16:22:11.205555: step: 248/459, loss: 0.26063260436058044 2023-01-22 16:22:11.855591: step: 250/459, loss: 0.47929540276527405 2023-01-22 16:22:12.497476: step: 252/459, loss: 0.3003607988357544 2023-01-22 16:22:13.123798: step: 254/459, loss: 0.03765399008989334 2023-01-22 16:22:13.724959: step: 256/459, loss: 0.049710918217897415 2023-01-22 16:22:14.356602: step: 258/459, loss: 0.053628962486982346 2023-01-22 16:22:14.965117: step: 260/459, loss: 0.21738378703594208 2023-01-22 16:22:15.657643: step: 262/459, loss: 0.035200733691453934 2023-01-22 16:22:16.330723: step: 264/459, loss: 0.15992163121700287 2023-01-22 16:22:16.972351: step: 266/459, loss: 0.08184882253408432 2023-01-22 16:22:17.613481: step: 268/459, loss: 0.159784734249115 2023-01-22 16:22:18.266737: step: 270/459, loss: 8.433250427246094 2023-01-22 16:22:18.958852: step: 272/459, loss: 0.08354288339614868 2023-01-22 16:22:19.615020: step: 274/459, loss: 0.1503436267375946 2023-01-22 16:22:20.295189: step: 276/459, loss: 0.0878472775220871 2023-01-22 16:22:20.931367: step: 278/459, loss: 0.05583018437027931 2023-01-22 16:22:21.547074: step: 280/459, loss: 0.10404793918132782 2023-01-22 16:22:22.134659: step: 282/459, loss: 0.06407301127910614 2023-01-22 16:22:22.737582: step: 284/459, loss: 0.06415210664272308 2023-01-22 16:22:23.315580: step: 286/459, loss: 0.2422693967819214 2023-01-22 16:22:23.941680: step: 288/459, loss: 1.1582915782928467 2023-01-22 16:22:24.577447: step: 290/459, loss: 0.2877998948097229 2023-01-22 16:22:25.187112: step: 292/459, loss: 0.008561448194086552 2023-01-22 16:22:25.832839: step: 294/459, loss: 0.09687379747629166 2023-01-22 16:22:26.389377: step: 296/459, loss: 0.05026068910956383 2023-01-22 16:22:27.058058: step: 298/459, loss: 0.1625170260667801 2023-01-22 16:22:27.666078: step: 300/459, loss: 0.10092519223690033 2023-01-22 16:22:28.266876: step: 302/459, loss: 0.13453955948352814 2023-01-22 16:22:28.948973: step: 304/459, loss: 0.026802584528923035 2023-01-22 16:22:29.535486: step: 306/459, loss: 0.1018746942281723 2023-01-22 16:22:30.103503: step: 308/459, loss: 0.3878232538700104 2023-01-22 16:22:30.741462: step: 310/459, loss: 0.0527753047645092 2023-01-22 16:22:31.385580: step: 312/459, loss: 0.027200596407055855 2023-01-22 16:22:31.930533: step: 314/459, loss: 0.0486212782561779 2023-01-22 16:22:32.654937: step: 316/459, loss: 0.1892220824956894 2023-01-22 16:22:33.309879: step: 318/459, loss: 0.06702631711959839 2023-01-22 16:22:33.923323: step: 320/459, loss: 0.18653710186481476 2023-01-22 16:22:34.538377: step: 322/459, loss: 0.044236235320568085 2023-01-22 16:22:35.144177: step: 324/459, loss: 0.043203093111515045 2023-01-22 16:22:35.763790: step: 326/459, loss: 0.1008564680814743 2023-01-22 16:22:36.365167: step: 328/459, loss: 0.10758522897958755 2023-01-22 16:22:37.114028: step: 330/459, loss: 0.04766101762652397 2023-01-22 16:22:37.715565: step: 332/459, loss: 0.12776702642440796 2023-01-22 16:22:38.306378: step: 334/459, loss: 0.10224311798810959 2023-01-22 16:22:38.914627: step: 336/459, loss: 0.043369222432374954 2023-01-22 16:22:39.563118: step: 338/459, loss: 0.08268517255783081 2023-01-22 16:22:40.196915: step: 340/459, loss: 0.03749764338135719 2023-01-22 16:22:40.818591: step: 342/459, loss: 0.1489589512348175 2023-01-22 16:22:41.497906: step: 344/459, loss: 0.07285585254430771 2023-01-22 16:22:42.097698: step: 346/459, loss: 0.08438001573085785 2023-01-22 16:22:42.756756: step: 348/459, loss: 0.10672702640295029 2023-01-22 16:22:43.419104: step: 350/459, loss: 0.14821627736091614 2023-01-22 16:22:44.072379: step: 352/459, loss: 1.7046703100204468 2023-01-22 16:22:44.728350: step: 354/459, loss: 0.2455647587776184 2023-01-22 16:22:45.399769: step: 356/459, loss: 0.0616142712533474 2023-01-22 16:22:46.021244: step: 358/459, loss: 0.8370824456214905 2023-01-22 16:22:46.620269: step: 360/459, loss: 0.04065103828907013 2023-01-22 16:22:47.215290: step: 362/459, loss: 0.028982101008296013 2023-01-22 16:22:47.784522: step: 364/459, loss: 0.08164241164922714 2023-01-22 16:22:48.413530: step: 366/459, loss: 0.09720020741224289 2023-01-22 16:22:49.064025: step: 368/459, loss: 0.1177835613489151 2023-01-22 16:22:49.653991: step: 370/459, loss: 0.13128447532653809 2023-01-22 16:22:50.280944: step: 372/459, loss: 0.4007541835308075 2023-01-22 16:22:50.863936: step: 374/459, loss: 0.20391987264156342 2023-01-22 16:22:51.485225: step: 376/459, loss: 0.08523490279912949 2023-01-22 16:22:52.162394: step: 378/459, loss: 5.785248756408691 2023-01-22 16:22:52.863527: step: 380/459, loss: 0.047764867544174194 2023-01-22 16:22:53.556893: step: 382/459, loss: 0.21394607424736023 2023-01-22 16:22:54.197226: step: 384/459, loss: 0.14828188717365265 2023-01-22 16:22:54.889526: step: 386/459, loss: 0.0934901162981987 2023-01-22 16:22:55.445152: step: 388/459, loss: 0.27927079796791077 2023-01-22 16:22:56.068928: step: 390/459, loss: 0.049980029463768005 2023-01-22 16:22:56.653671: step: 392/459, loss: 0.26837122440338135 2023-01-22 16:22:57.304634: step: 394/459, loss: 0.02327386476099491 2023-01-22 16:22:57.889381: step: 396/459, loss: 0.13844193518161774 2023-01-22 16:22:58.551944: step: 398/459, loss: 0.07648002356290817 2023-01-22 16:22:59.165153: step: 400/459, loss: 0.0517784059047699 2023-01-22 16:22:59.836702: step: 402/459, loss: 0.049390144646167755 2023-01-22 16:23:00.421420: step: 404/459, loss: 0.13214673101902008 2023-01-22 16:23:01.036641: step: 406/459, loss: 0.10317973047494888 2023-01-22 16:23:01.655945: step: 408/459, loss: 0.07660428434610367 2023-01-22 16:23:02.253139: step: 410/459, loss: 0.04034743085503578 2023-01-22 16:23:02.878990: step: 412/459, loss: 0.07730445265769958 2023-01-22 16:23:03.519856: step: 414/459, loss: 0.11486756801605225 2023-01-22 16:23:04.101465: step: 416/459, loss: 0.1728941798210144 2023-01-22 16:23:04.763254: step: 418/459, loss: 0.4245995879173279 2023-01-22 16:23:05.354060: step: 420/459, loss: 0.03510960191488266 2023-01-22 16:23:06.024218: step: 422/459, loss: 0.19269749522209167 2023-01-22 16:23:06.584026: step: 424/459, loss: 0.08104848116636276 2023-01-22 16:23:07.183279: step: 426/459, loss: 0.11721151322126389 2023-01-22 16:23:07.730191: step: 428/459, loss: 0.09560471773147583 2023-01-22 16:23:08.356343: step: 430/459, loss: 0.06096576526761055 2023-01-22 16:23:09.020287: step: 432/459, loss: 0.06840670853853226 2023-01-22 16:23:09.679691: step: 434/459, loss: 0.03257577121257782 2023-01-22 16:23:10.325658: step: 436/459, loss: 0.9241122603416443 2023-01-22 16:23:10.991152: step: 438/459, loss: 0.15438951551914215 2023-01-22 16:23:11.663406: step: 440/459, loss: 0.06192932277917862 2023-01-22 16:23:12.331979: step: 442/459, loss: 0.10063299536705017 2023-01-22 16:23:12.938610: step: 444/459, loss: 0.09826772660017014 2023-01-22 16:23:13.551684: step: 446/459, loss: 0.48317044973373413 2023-01-22 16:23:14.218142: step: 448/459, loss: 0.08194407820701599 2023-01-22 16:23:14.884972: step: 450/459, loss: 0.33721572160720825 2023-01-22 16:23:15.479899: step: 452/459, loss: 0.4177247881889343 2023-01-22 16:23:16.148530: step: 454/459, loss: 0.049857743084430695 2023-01-22 16:23:16.847302: step: 456/459, loss: 0.06802026182413101 2023-01-22 16:23:17.449977: step: 458/459, loss: 0.021420303732156754 2023-01-22 16:23:18.063704: step: 460/459, loss: 0.12654490768909454 2023-01-22 16:23:18.641988: step: 462/459, loss: 0.08624296635389328 2023-01-22 16:23:19.259133: step: 464/459, loss: 0.34278762340545654 2023-01-22 16:23:19.903911: step: 466/459, loss: 0.03354499116539955 2023-01-22 16:23:20.516259: step: 468/459, loss: 2.1276679039001465 2023-01-22 16:23:21.124431: step: 470/459, loss: 0.24155183136463165 2023-01-22 16:23:21.675686: step: 472/459, loss: 0.06828545033931732 2023-01-22 16:23:22.370792: step: 474/459, loss: 0.2527075409889221 2023-01-22 16:23:22.961898: step: 476/459, loss: 0.02124977856874466 2023-01-22 16:23:23.586427: step: 478/459, loss: 0.39764872193336487 2023-01-22 16:23:24.207891: step: 480/459, loss: 0.11018143594264984 2023-01-22 16:23:24.801845: step: 482/459, loss: 0.15672901272773743 2023-01-22 16:23:25.463778: step: 484/459, loss: 1.0497149229049683 2023-01-22 16:23:26.075670: step: 486/459, loss: 0.3668913245201111 2023-01-22 16:23:26.704363: step: 488/459, loss: 0.08832050114870071 2023-01-22 16:23:27.337934: step: 490/459, loss: 0.0657416433095932 2023-01-22 16:23:27.955038: step: 492/459, loss: 0.1151801347732544 2023-01-22 16:23:28.561823: step: 494/459, loss: 0.14075328409671783 2023-01-22 16:23:29.223055: step: 496/459, loss: 0.13086363673210144 2023-01-22 16:23:29.870339: step: 498/459, loss: 0.07432903349399567 2023-01-22 16:23:30.487851: step: 500/459, loss: 0.06859371066093445 2023-01-22 16:23:31.182850: step: 502/459, loss: 0.3604695796966553 2023-01-22 16:23:31.773373: step: 504/459, loss: 4.746042251586914 2023-01-22 16:23:32.330482: step: 506/459, loss: 0.019115403294563293 2023-01-22 16:23:32.986485: step: 508/459, loss: 0.2635763883590698 2023-01-22 16:23:33.542756: step: 510/459, loss: 0.24438302218914032 2023-01-22 16:23:34.125285: step: 512/459, loss: 0.06822407245635986 2023-01-22 16:23:34.700256: step: 514/459, loss: 0.11425280570983887 2023-01-22 16:23:35.317936: step: 516/459, loss: 0.27335649728775024 2023-01-22 16:23:35.963772: step: 518/459, loss: 4.555061340332031 2023-01-22 16:23:36.592812: step: 520/459, loss: 0.17336434125900269 2023-01-22 16:23:37.184309: step: 522/459, loss: 0.01780852861702442 2023-01-22 16:23:37.810424: step: 524/459, loss: 0.13307854533195496 2023-01-22 16:23:38.353696: step: 526/459, loss: 0.05859757587313652 2023-01-22 16:23:39.021122: step: 528/459, loss: 0.044598087668418884 2023-01-22 16:23:39.620935: step: 530/459, loss: 0.15959309041500092 2023-01-22 16:23:40.248296: step: 532/459, loss: 0.1262839138507843 2023-01-22 16:23:40.998313: step: 534/459, loss: 0.1383325457572937 2023-01-22 16:23:41.588637: step: 536/459, loss: 0.17561329901218414 2023-01-22 16:23:42.195263: step: 538/459, loss: 0.09374985098838806 2023-01-22 16:23:42.756434: step: 540/459, loss: 0.22331416606903076 2023-01-22 16:23:43.281232: step: 542/459, loss: 0.17585532367229462 2023-01-22 16:23:43.902656: step: 544/459, loss: 0.0829617902636528 2023-01-22 16:23:44.558743: step: 546/459, loss: 0.9492756128311157 2023-01-22 16:23:45.159087: step: 548/459, loss: 0.17642751336097717 2023-01-22 16:23:45.806174: step: 550/459, loss: 0.1915162354707718 2023-01-22 16:23:46.430063: step: 552/459, loss: 0.33816400170326233 2023-01-22 16:23:47.043984: step: 554/459, loss: 0.034371547400951385 2023-01-22 16:23:47.661594: step: 556/459, loss: 0.08479378372430801 2023-01-22 16:23:48.332971: step: 558/459, loss: 0.10787676274776459 2023-01-22 16:23:48.989706: step: 560/459, loss: 0.24675242602825165 2023-01-22 16:23:49.598568: step: 562/459, loss: 0.08971472829580307 2023-01-22 16:23:50.264729: step: 564/459, loss: 0.08851435035467148 2023-01-22 16:23:50.926197: step: 566/459, loss: 0.2919875979423523 2023-01-22 16:23:51.525038: step: 568/459, loss: 0.05922119319438934 2023-01-22 16:23:52.130711: step: 570/459, loss: 0.25354665517807007 2023-01-22 16:23:52.823325: step: 572/459, loss: 0.08742742240428925 2023-01-22 16:23:53.451723: step: 574/459, loss: 0.4074710011482239 2023-01-22 16:23:54.062032: step: 576/459, loss: 0.10425861924886703 2023-01-22 16:23:54.710540: step: 578/459, loss: 0.10839827358722687 2023-01-22 16:23:55.335960: step: 580/459, loss: 0.17718730866909027 2023-01-22 16:23:55.992581: step: 582/459, loss: 0.07955067604780197 2023-01-22 16:23:56.649266: step: 584/459, loss: 0.04593948647379875 2023-01-22 16:23:57.301824: step: 586/459, loss: 0.1502593457698822 2023-01-22 16:23:57.948242: step: 588/459, loss: 0.21792159974575043 2023-01-22 16:23:58.559048: step: 590/459, loss: 0.36064812541007996 2023-01-22 16:23:59.174389: step: 592/459, loss: 0.11893970519304276 2023-01-22 16:23:59.778280: step: 594/459, loss: 0.03208649531006813 2023-01-22 16:24:00.521754: step: 596/459, loss: 0.1605861783027649 2023-01-22 16:24:01.177442: step: 598/459, loss: 0.3033224940299988 2023-01-22 16:24:01.784976: step: 600/459, loss: 0.049942102283239365 2023-01-22 16:24:02.358538: step: 602/459, loss: 0.013289282098412514 2023-01-22 16:24:02.979710: step: 604/459, loss: 0.04765566438436508 2023-01-22 16:24:03.635797: step: 606/459, loss: 0.1808035671710968 2023-01-22 16:24:04.272155: step: 608/459, loss: 0.11141428351402283 2023-01-22 16:24:04.879522: step: 610/459, loss: 0.06235422566533089 2023-01-22 16:24:05.471122: step: 612/459, loss: 0.03451959788799286 2023-01-22 16:24:06.099197: step: 614/459, loss: 0.0967143103480339 2023-01-22 16:24:06.749118: step: 616/459, loss: 0.23114973306655884 2023-01-22 16:24:07.356320: step: 618/459, loss: 0.12156976759433746 2023-01-22 16:24:08.005931: step: 620/459, loss: 0.31609785556793213 2023-01-22 16:24:08.595160: step: 622/459, loss: 0.16025526821613312 2023-01-22 16:24:09.249947: step: 624/459, loss: 0.10482238233089447 2023-01-22 16:24:09.913124: step: 626/459, loss: 0.20690646767616272 2023-01-22 16:24:10.500425: step: 628/459, loss: 0.9083549976348877 2023-01-22 16:24:11.072536: step: 630/459, loss: 0.23199217021465302 2023-01-22 16:24:11.641075: step: 632/459, loss: 0.0786832794547081 2023-01-22 16:24:12.243911: step: 634/459, loss: 0.02816798724234104 2023-01-22 16:24:12.867033: step: 636/459, loss: 0.14440679550170898 2023-01-22 16:24:13.405634: step: 638/459, loss: 0.05286209285259247 2023-01-22 16:24:14.029789: step: 640/459, loss: 0.1207784116268158 2023-01-22 16:24:14.636413: step: 642/459, loss: 0.03392457962036133 2023-01-22 16:24:15.289947: step: 644/459, loss: 0.06673998385667801 2023-01-22 16:24:15.915247: step: 646/459, loss: 0.113188236951828 2023-01-22 16:24:16.518952: step: 648/459, loss: 0.06829463690519333 2023-01-22 16:24:17.162500: step: 650/459, loss: 0.06262204051017761 2023-01-22 16:24:17.773082: step: 652/459, loss: 0.046639543026685715 2023-01-22 16:24:18.360851: step: 654/459, loss: 0.04701884835958481 2023-01-22 16:24:19.055653: step: 656/459, loss: 0.02455955184996128 2023-01-22 16:24:19.692761: step: 658/459, loss: 1.4135867357254028 2023-01-22 16:24:20.325264: step: 660/459, loss: 0.060525454580783844 2023-01-22 16:24:20.971687: step: 662/459, loss: 0.10150265693664551 2023-01-22 16:24:21.539166: step: 664/459, loss: 0.1648922711610794 2023-01-22 16:24:22.133870: step: 666/459, loss: 0.15738268196582794 2023-01-22 16:24:22.775909: step: 668/459, loss: 0.48707059025764465 2023-01-22 16:24:23.376844: step: 670/459, loss: 0.08065757900476456 2023-01-22 16:24:24.010228: step: 672/459, loss: 0.1273612082004547 2023-01-22 16:24:24.557649: step: 674/459, loss: 0.1339716911315918 2023-01-22 16:24:25.144223: step: 676/459, loss: 0.8464416861534119 2023-01-22 16:24:25.797528: step: 678/459, loss: 0.03313465416431427 2023-01-22 16:24:26.421314: step: 680/459, loss: 0.7946802973747253 2023-01-22 16:24:27.138420: step: 682/459, loss: 0.04043581336736679 2023-01-22 16:24:27.826801: step: 684/459, loss: 0.04383474960923195 2023-01-22 16:24:28.454082: step: 686/459, loss: 0.1301778256893158 2023-01-22 16:24:29.071286: step: 688/459, loss: 0.27779003977775574 2023-01-22 16:24:29.679660: step: 690/459, loss: 0.03094465658068657 2023-01-22 16:24:30.307111: step: 692/459, loss: 0.05769069120287895 2023-01-22 16:24:30.941638: step: 694/459, loss: 0.09426134824752808 2023-01-22 16:24:31.574811: step: 696/459, loss: 0.11281517893075943 2023-01-22 16:24:32.176652: step: 698/459, loss: 0.17040300369262695 2023-01-22 16:24:32.808532: step: 700/459, loss: 0.22418160736560822 2023-01-22 16:24:33.441667: step: 702/459, loss: 0.06680037081241608 2023-01-22 16:24:34.061549: step: 704/459, loss: 0.09927865117788315 2023-01-22 16:24:34.719913: step: 706/459, loss: 0.2807328402996063 2023-01-22 16:24:35.350529: step: 708/459, loss: 0.11914856731891632 2023-01-22 16:24:35.964049: step: 710/459, loss: 0.11808769404888153 2023-01-22 16:24:36.615300: step: 712/459, loss: 0.11046499013900757 2023-01-22 16:24:37.221928: step: 714/459, loss: 0.1902889907360077 2023-01-22 16:24:37.820617: step: 716/459, loss: 0.09878397732973099 2023-01-22 16:24:38.499583: step: 718/459, loss: 0.03911047428846359 2023-01-22 16:24:39.161444: step: 720/459, loss: 0.12672363221645355 2023-01-22 16:24:39.782739: step: 722/459, loss: 0.1335163563489914 2023-01-22 16:24:40.444702: step: 724/459, loss: 0.0730169415473938 2023-01-22 16:24:41.068631: step: 726/459, loss: 0.09712959825992584 2023-01-22 16:24:41.699585: step: 728/459, loss: 0.2896866202354431 2023-01-22 16:24:42.344700: step: 730/459, loss: 0.10525329411029816 2023-01-22 16:24:42.944381: step: 732/459, loss: 0.03804433345794678 2023-01-22 16:24:43.548002: step: 734/459, loss: 0.047234758734703064 2023-01-22 16:24:44.136730: step: 736/459, loss: 0.4169715940952301 2023-01-22 16:24:44.750763: step: 738/459, loss: 0.1400170624256134 2023-01-22 16:24:45.422748: step: 740/459, loss: 0.19945509731769562 2023-01-22 16:24:46.051071: step: 742/459, loss: 0.35617539286613464 2023-01-22 16:24:46.656833: step: 744/459, loss: 0.029249699786305428 2023-01-22 16:24:47.298741: step: 746/459, loss: 0.07087953388690948 2023-01-22 16:24:47.900816: step: 748/459, loss: 0.1579747349023819 2023-01-22 16:24:48.472501: step: 750/459, loss: 0.25137627124786377 2023-01-22 16:24:49.166255: step: 752/459, loss: 0.04891891032457352 2023-01-22 16:24:49.805502: step: 754/459, loss: 0.05966755002737045 2023-01-22 16:24:50.454493: step: 756/459, loss: 0.08920136839151382 2023-01-22 16:24:51.236210: step: 758/459, loss: 0.1348571926355362 2023-01-22 16:24:51.838956: step: 760/459, loss: 0.15010423958301544 2023-01-22 16:24:52.450544: step: 762/459, loss: 0.11280161887407303 2023-01-22 16:24:53.027187: step: 764/459, loss: 0.2631674110889435 2023-01-22 16:24:53.655014: step: 766/459, loss: 0.12199173867702484 2023-01-22 16:24:54.296807: step: 768/459, loss: 0.4532184898853302 2023-01-22 16:24:54.892758: step: 770/459, loss: 0.39643317461013794 2023-01-22 16:24:55.497166: step: 772/459, loss: 0.03370276466012001 2023-01-22 16:24:56.188861: step: 774/459, loss: 0.18387499451637268 2023-01-22 16:24:56.754504: step: 776/459, loss: 0.05170874670147896 2023-01-22 16:24:57.336153: step: 778/459, loss: 0.03978317975997925 2023-01-22 16:24:57.972940: step: 780/459, loss: 0.1854645311832428 2023-01-22 16:24:58.599218: step: 782/459, loss: 0.10616506636142731 2023-01-22 16:24:59.182921: step: 784/459, loss: 0.11322058737277985 2023-01-22 16:24:59.761741: step: 786/459, loss: 0.129385843873024 2023-01-22 16:25:00.432729: step: 788/459, loss: 0.1037244126200676 2023-01-22 16:25:01.035833: step: 790/459, loss: 0.13066475093364716 2023-01-22 16:25:01.677702: step: 792/459, loss: 0.04663245379924774 2023-01-22 16:25:02.292720: step: 794/459, loss: 0.07115887850522995 2023-01-22 16:25:02.871708: step: 796/459, loss: 0.022110415622591972 2023-01-22 16:25:03.493465: step: 798/459, loss: 0.09536222368478775 2023-01-22 16:25:04.114918: step: 800/459, loss: 0.09564674645662308 2023-01-22 16:25:04.763112: step: 802/459, loss: 0.17475619912147522 2023-01-22 16:25:05.412901: step: 804/459, loss: 0.19228708744049072 2023-01-22 16:25:06.038680: step: 806/459, loss: 0.07478566467761993 2023-01-22 16:25:06.699798: step: 808/459, loss: 0.32532015442848206 2023-01-22 16:25:07.330073: step: 810/459, loss: 0.17316530644893646 2023-01-22 16:25:07.910873: step: 812/459, loss: 0.6891698837280273 2023-01-22 16:25:08.577242: step: 814/459, loss: 0.27573174238204956 2023-01-22 16:25:09.292249: step: 816/459, loss: 0.0390947051346302 2023-01-22 16:25:09.872600: step: 818/459, loss: 0.03452770784497261 2023-01-22 16:25:10.495104: step: 820/459, loss: 0.3188793361186981 2023-01-22 16:25:11.057865: step: 822/459, loss: 0.053490910679101944 2023-01-22 16:25:11.684273: step: 824/459, loss: 2.4637012481689453 2023-01-22 16:25:12.371094: step: 826/459, loss: 0.32630157470703125 2023-01-22 16:25:12.978196: step: 828/459, loss: 0.026783736422657967 2023-01-22 16:25:13.633293: step: 830/459, loss: 0.33674782514572144 2023-01-22 16:25:14.277647: step: 832/459, loss: 0.37168991565704346 2023-01-22 16:25:14.899179: step: 834/459, loss: 0.13487665355205536 2023-01-22 16:25:15.558860: step: 836/459, loss: 0.0986691415309906 2023-01-22 16:25:16.173922: step: 838/459, loss: 0.12196309864521027 2023-01-22 16:25:16.850172: step: 840/459, loss: 0.09645374119281769 2023-01-22 16:25:17.511367: step: 842/459, loss: 0.07680221647024155 2023-01-22 16:25:18.263496: step: 844/459, loss: 0.08769435435533524 2023-01-22 16:25:18.805545: step: 846/459, loss: 0.0032552294433116913 2023-01-22 16:25:19.476494: step: 848/459, loss: 0.3883289098739624 2023-01-22 16:25:20.098135: step: 850/459, loss: 0.42544931173324585 2023-01-22 16:25:20.716402: step: 852/459, loss: 0.06978535652160645 2023-01-22 16:25:21.343016: step: 854/459, loss: 0.3711180090904236 2023-01-22 16:25:21.958500: step: 856/459, loss: 0.08473961055278778 2023-01-22 16:25:22.600188: step: 858/459, loss: 0.6738617420196533 2023-01-22 16:25:23.220764: step: 860/459, loss: 0.06016272306442261 2023-01-22 16:25:23.848217: step: 862/459, loss: 0.1515260934829712 2023-01-22 16:25:24.424511: step: 864/459, loss: 0.06114427372813225 2023-01-22 16:25:25.032721: step: 866/459, loss: 0.024301204830408096 2023-01-22 16:25:25.698348: step: 868/459, loss: 0.03693966567516327 2023-01-22 16:25:26.368371: step: 870/459, loss: 0.11785560846328735 2023-01-22 16:25:27.017598: step: 872/459, loss: 0.2840283513069153 2023-01-22 16:25:27.736808: step: 874/459, loss: 0.1774817258119583 2023-01-22 16:25:28.360538: step: 876/459, loss: 0.11782927811145782 2023-01-22 16:25:28.958625: step: 878/459, loss: 0.09599194675683975 2023-01-22 16:25:29.593589: step: 880/459, loss: 0.18278378248214722 2023-01-22 16:25:30.155389: step: 882/459, loss: 0.315634161233902 2023-01-22 16:25:30.725254: step: 884/459, loss: 0.0557362362742424 2023-01-22 16:25:31.386279: step: 886/459, loss: 0.12295199185609818 2023-01-22 16:25:32.033545: step: 888/459, loss: 0.13714657723903656 2023-01-22 16:25:32.642833: step: 890/459, loss: 0.15591411292552948 2023-01-22 16:25:33.327601: step: 892/459, loss: 0.0897250548005104 2023-01-22 16:25:33.967191: step: 894/459, loss: 0.40753793716430664 2023-01-22 16:25:34.565212: step: 896/459, loss: 0.32816189527511597 2023-01-22 16:25:35.197959: step: 898/459, loss: 0.31129008531570435 2023-01-22 16:25:35.836515: step: 900/459, loss: 0.08267886191606522 2023-01-22 16:25:36.503248: step: 902/459, loss: 0.20005983114242554 2023-01-22 16:25:37.100876: step: 904/459, loss: 0.1510181576013565 2023-01-22 16:25:37.756047: step: 906/459, loss: 0.2172638177871704 2023-01-22 16:25:38.348843: step: 908/459, loss: 0.04315739497542381 2023-01-22 16:25:38.930167: step: 910/459, loss: 0.1782074272632599 2023-01-22 16:25:39.560756: step: 912/459, loss: 0.05332005023956299 2023-01-22 16:25:40.174735: step: 914/459, loss: 0.08795734494924545 2023-01-22 16:25:40.804502: step: 916/459, loss: 0.22083303332328796 2023-01-22 16:25:41.435296: step: 918/459, loss: 0.08341339975595474 2023-01-22 16:25:41.943668: step: 920/459, loss: 0.003675771178677678 ================================================== Loss: 0.222 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3048269576699326, 'r': 0.3285421479250887, 'f1': 0.3162405697836013}, 'combined': 0.23301936720896937, 'epoch': 15} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34042618010052256, 'r': 0.2939621882760654, 'f1': 0.3154926219876913}, 'combined': 0.2019152780721224, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30454634744511644, 'r': 0.33575223503911317, 'f1': 0.31938885896319974}, 'combined': 0.2353391592360419, 'epoch': 15} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35241869240045853, 'r': 0.3014781723171195, 'f1': 0.3249642229777868}, 'combined': 0.20797710270578354, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2296296296296296, 'r': 0.2952380952380952, 'f1': 0.2583333333333333}, 'combined': 0.1722222222222222, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2965306761268781, 'r': 0.33704340607210626, 'f1': 0.315491785079929}, 'combined': 0.2324676311115266, 'epoch': 12} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31983745334792824, 'r': 0.32478489348160866, 'f1': 0.3222921877528559}, 'combined': 0.20626700016182775, 'epoch': 12} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23270440251572325, 'r': 0.35238095238095235, 'f1': 0.2803030303030303}, 'combined': 0.18686868686868685, 'epoch': 12} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 16:28:30.094682: step: 2/459, loss: 0.01931356079876423 2023-01-22 16:28:30.671526: step: 4/459, loss: 0.05071929097175598 2023-01-22 16:28:31.275549: step: 6/459, loss: 0.1022384986281395 2023-01-22 16:28:31.948515: step: 8/459, loss: 0.052067287266254425 2023-01-22 16:28:32.661192: step: 10/459, loss: 0.0677531510591507 2023-01-22 16:28:33.267614: step: 12/459, loss: 0.067044697701931 2023-01-22 16:28:33.896705: step: 14/459, loss: 0.026747241616249084 2023-01-22 16:28:34.484847: step: 16/459, loss: 0.4047658145427704 2023-01-22 16:28:35.131347: step: 18/459, loss: 0.2315514087677002 2023-01-22 16:28:35.708529: step: 20/459, loss: 0.05906762182712555 2023-01-22 16:28:36.313283: step: 22/459, loss: 0.19989833235740662 2023-01-22 16:28:36.921657: step: 24/459, loss: 0.05496235936880112 2023-01-22 16:28:37.535743: step: 26/459, loss: 0.07041577994823456 2023-01-22 16:28:38.209483: step: 28/459, loss: 0.06678183376789093 2023-01-22 16:28:38.785405: step: 30/459, loss: 0.07673346996307373 2023-01-22 16:28:39.381025: step: 32/459, loss: 0.13332974910736084 2023-01-22 16:28:39.983130: step: 34/459, loss: 0.06029756739735603 2023-01-22 16:28:40.619540: step: 36/459, loss: 0.30716532468795776 2023-01-22 16:28:41.257244: step: 38/459, loss: 0.0973428338766098 2023-01-22 16:28:41.881753: step: 40/459, loss: 0.1761714667081833 2023-01-22 16:28:42.463843: step: 42/459, loss: 0.17198798060417175 2023-01-22 16:28:43.146583: step: 44/459, loss: 0.20850372314453125 2023-01-22 16:28:43.818690: step: 46/459, loss: 0.3430839478969574 2023-01-22 16:28:44.377435: step: 48/459, loss: 0.022775230929255486 2023-01-22 16:28:45.025071: step: 50/459, loss: 0.15061022341251373 2023-01-22 16:28:45.690959: step: 52/459, loss: 0.24040807783603668 2023-01-22 16:28:46.280242: step: 54/459, loss: 0.019507642835378647 2023-01-22 16:28:46.911727: step: 56/459, loss: 0.0173790380358696 2023-01-22 16:28:47.493987: step: 58/459, loss: 0.08190690726041794 2023-01-22 16:28:48.128850: step: 60/459, loss: 0.2120600789785385 2023-01-22 16:28:48.689169: step: 62/459, loss: 0.09232837706804276 2023-01-22 16:28:49.339560: step: 64/459, loss: 0.18219131231307983 2023-01-22 16:28:49.969658: step: 66/459, loss: 0.032263435423374176 2023-01-22 16:28:50.575479: step: 68/459, loss: 0.10839977860450745 2023-01-22 16:28:51.202141: step: 70/459, loss: 0.03550161048769951 2023-01-22 16:28:51.821150: step: 72/459, loss: 0.05555253103375435 2023-01-22 16:28:52.431666: step: 74/459, loss: 0.054376136511564255 2023-01-22 16:28:53.047335: step: 76/459, loss: 0.06596235930919647 2023-01-22 16:28:53.743326: step: 78/459, loss: 0.09666019678115845 2023-01-22 16:28:54.373820: step: 80/459, loss: 0.09964393824338913 2023-01-22 16:28:55.063646: step: 82/459, loss: 0.045899149030447006 2023-01-22 16:28:55.681392: step: 84/459, loss: 0.10431969165802002 2023-01-22 16:28:56.287582: step: 86/459, loss: 0.07527284324169159 2023-01-22 16:28:56.858941: step: 88/459, loss: 0.04407572001218796 2023-01-22 16:28:57.493009: step: 90/459, loss: 0.019990896806120872 2023-01-22 16:28:58.019721: step: 92/459, loss: 0.017803454771637917 2023-01-22 16:28:58.693208: step: 94/459, loss: 0.05873236805200577 2023-01-22 16:28:59.367243: step: 96/459, loss: 0.18810760974884033 2023-01-22 16:28:59.913357: step: 98/459, loss: 0.34958821535110474 2023-01-22 16:29:00.533976: step: 100/459, loss: 0.08640673011541367 2023-01-22 16:29:01.122947: step: 102/459, loss: 0.17225149273872375 2023-01-22 16:29:01.784619: step: 104/459, loss: 0.11682149767875671 2023-01-22 16:29:02.313162: step: 106/459, loss: 0.0222601518034935 2023-01-22 16:29:02.919863: step: 108/459, loss: 0.14780133962631226 2023-01-22 16:29:03.557869: step: 110/459, loss: 0.06648165732622147 2023-01-22 16:29:04.211499: step: 112/459, loss: 0.0649031400680542 2023-01-22 16:29:04.878124: step: 114/459, loss: 0.049819570034742355 2023-01-22 16:29:05.484002: step: 116/459, loss: 0.38073036074638367 2023-01-22 16:29:06.144856: step: 118/459, loss: 0.0627637654542923 2023-01-22 16:29:06.809308: step: 120/459, loss: 0.014064174145460129 2023-01-22 16:29:07.462768: step: 122/459, loss: 0.1550702005624771 2023-01-22 16:29:08.067212: step: 124/459, loss: 0.03739543631672859 2023-01-22 16:29:08.675451: step: 126/459, loss: 0.41671109199523926 2023-01-22 16:29:09.264315: step: 128/459, loss: 0.1254136562347412 2023-01-22 16:29:09.880667: step: 130/459, loss: 0.1445484310388565 2023-01-22 16:29:10.464394: step: 132/459, loss: 0.1258227825164795 2023-01-22 16:29:11.057597: step: 134/459, loss: 0.049373019486665726 2023-01-22 16:29:11.677455: step: 136/459, loss: 0.03908264636993408 2023-01-22 16:29:12.262834: step: 138/459, loss: 0.04292367771267891 2023-01-22 16:29:12.941290: step: 140/459, loss: 0.10196442902088165 2023-01-22 16:29:13.577051: step: 142/459, loss: 0.018234698101878166 2023-01-22 16:29:14.222740: step: 144/459, loss: 0.08380237221717834 2023-01-22 16:29:14.830150: step: 146/459, loss: 0.0705084279179573 2023-01-22 16:29:15.454428: step: 148/459, loss: 0.15315116941928864 2023-01-22 16:29:16.098125: step: 150/459, loss: 0.07690802961587906 2023-01-22 16:29:16.745672: step: 152/459, loss: 0.08082899451255798 2023-01-22 16:29:17.367922: step: 154/459, loss: 0.2708396911621094 2023-01-22 16:29:18.060119: step: 156/459, loss: 0.10123477876186371 2023-01-22 16:29:18.709734: step: 158/459, loss: 0.0854659155011177 2023-01-22 16:29:19.322765: step: 160/459, loss: 0.8391792178153992 2023-01-22 16:29:19.975407: step: 162/459, loss: 0.14004118740558624 2023-01-22 16:29:20.552926: step: 164/459, loss: 0.11132725328207016 2023-01-22 16:29:21.171033: step: 166/459, loss: 0.11886531114578247 2023-01-22 16:29:21.773741: step: 168/459, loss: 0.034186799079179764 2023-01-22 16:29:22.387617: step: 170/459, loss: 0.09251759201288223 2023-01-22 16:29:23.003069: step: 172/459, loss: 0.1643432378768921 2023-01-22 16:29:23.628944: step: 174/459, loss: 0.07986556738615036 2023-01-22 16:29:24.272464: step: 176/459, loss: 0.1332751214504242 2023-01-22 16:29:24.917774: step: 178/459, loss: 0.06364081799983978 2023-01-22 16:29:25.521596: step: 180/459, loss: 0.016720179468393326 2023-01-22 16:29:26.146002: step: 182/459, loss: 0.10158473998308182 2023-01-22 16:29:26.747062: step: 184/459, loss: 0.07296829670667648 2023-01-22 16:29:27.343948: step: 186/459, loss: 0.06694729626178741 2023-01-22 16:29:28.014920: step: 188/459, loss: 0.08557794988155365 2023-01-22 16:29:28.690575: step: 190/459, loss: 0.04677531495690346 2023-01-22 16:29:29.343338: step: 192/459, loss: 0.027862900868058205 2023-01-22 16:29:29.939196: step: 194/459, loss: 0.11771175265312195 2023-01-22 16:29:30.591740: step: 196/459, loss: 0.05265508592128754 2023-01-22 16:29:31.250329: step: 198/459, loss: 0.05309395119547844 2023-01-22 16:29:31.891192: step: 200/459, loss: 1.1270195245742798 2023-01-22 16:29:32.526419: step: 202/459, loss: 0.06370670348405838 2023-01-22 16:29:33.187295: step: 204/459, loss: 0.10461830347776413 2023-01-22 16:29:33.827692: step: 206/459, loss: 0.37700870633125305 2023-01-22 16:29:34.374889: step: 208/459, loss: 0.11872740089893341 2023-01-22 16:29:34.956509: step: 210/459, loss: 0.6936201453208923 2023-01-22 16:29:35.657447: step: 212/459, loss: 0.0820017084479332 2023-01-22 16:29:36.224431: step: 214/459, loss: 0.026011716574430466 2023-01-22 16:29:36.859214: step: 216/459, loss: 0.09367741644382477 2023-01-22 16:29:37.508192: step: 218/459, loss: 0.04797079414129257 2023-01-22 16:29:38.140108: step: 220/459, loss: 0.05814728885889053 2023-01-22 16:29:38.740503: step: 222/459, loss: 0.08903458714485168 2023-01-22 16:29:39.350941: step: 224/459, loss: 0.02530730329453945 2023-01-22 16:29:40.043254: step: 226/459, loss: 0.03358269855380058 2023-01-22 16:29:40.684977: step: 228/459, loss: 0.059349510818719864 2023-01-22 16:29:41.390850: step: 230/459, loss: 0.16483013331890106 2023-01-22 16:29:42.050141: step: 232/459, loss: 0.11237235367298126 2023-01-22 16:29:42.661782: step: 234/459, loss: 0.0607307069003582 2023-01-22 16:29:43.307797: step: 236/459, loss: 0.024711722508072853 2023-01-22 16:29:43.937849: step: 238/459, loss: 0.1563442200422287 2023-01-22 16:29:44.565958: step: 240/459, loss: 0.3948640525341034 2023-01-22 16:29:45.192458: step: 242/459, loss: 0.359493225812912 2023-01-22 16:29:45.820057: step: 244/459, loss: 0.06619556248188019 2023-01-22 16:29:46.477159: step: 246/459, loss: 2.824965238571167 2023-01-22 16:29:47.111338: step: 248/459, loss: 0.08963656425476074 2023-01-22 16:29:47.738907: step: 250/459, loss: 0.04801926761865616 2023-01-22 16:29:48.364620: step: 252/459, loss: 0.057705964893102646 2023-01-22 16:29:48.990126: step: 254/459, loss: 0.036856140941381454 2023-01-22 16:29:49.646248: step: 256/459, loss: 0.08039944618940353 2023-01-22 16:29:50.227397: step: 258/459, loss: 0.04998581111431122 2023-01-22 16:29:50.904576: step: 260/459, loss: 0.08670539408922195 2023-01-22 16:29:51.584008: step: 262/459, loss: 0.11289578676223755 2023-01-22 16:29:52.223188: step: 264/459, loss: 0.05322916805744171 2023-01-22 16:29:52.797151: step: 266/459, loss: 0.11820245534181595 2023-01-22 16:29:53.428560: step: 268/459, loss: 0.6952579617500305 2023-01-22 16:29:54.023208: step: 270/459, loss: 0.04893031343817711 2023-01-22 16:29:54.636887: step: 272/459, loss: 0.025678224861621857 2023-01-22 16:29:55.262010: step: 274/459, loss: 0.06178047135472298 2023-01-22 16:29:55.887936: step: 276/459, loss: 0.18752366304397583 2023-01-22 16:29:56.491114: step: 278/459, loss: 0.05175314098596573 2023-01-22 16:29:57.105928: step: 280/459, loss: 0.03623977303504944 2023-01-22 16:29:57.732976: step: 282/459, loss: 0.1178644448518753 2023-01-22 16:29:58.386053: step: 284/459, loss: 0.08921278268098831 2023-01-22 16:29:58.967450: step: 286/459, loss: 0.07563696801662445 2023-01-22 16:29:59.673629: step: 288/459, loss: 0.37661653757095337 2023-01-22 16:30:00.213079: step: 290/459, loss: 0.06587998569011688 2023-01-22 16:30:00.889698: step: 292/459, loss: 0.07367648184299469 2023-01-22 16:30:01.631512: step: 294/459, loss: 0.06730613857507706 2023-01-22 16:30:02.240766: step: 296/459, loss: 0.030435631051659584 2023-01-22 16:30:02.896682: step: 298/459, loss: 0.07919187843799591 2023-01-22 16:30:03.501632: step: 300/459, loss: 0.11444911360740662 2023-01-22 16:30:04.084621: step: 302/459, loss: 0.20229960978031158 2023-01-22 16:30:04.648562: step: 304/459, loss: 0.23474031686782837 2023-01-22 16:30:05.238963: step: 306/459, loss: 0.10930656641721725 2023-01-22 16:30:05.877032: step: 308/459, loss: 0.06435523182153702 2023-01-22 16:30:06.538983: step: 310/459, loss: 0.010808300226926804 2023-01-22 16:30:07.168232: step: 312/459, loss: 0.10238995403051376 2023-01-22 16:30:07.862402: step: 314/459, loss: 0.18910610675811768 2023-01-22 16:30:08.486193: step: 316/459, loss: 0.06660126149654388 2023-01-22 16:30:09.043831: step: 318/459, loss: 0.06136489659547806 2023-01-22 16:30:09.691249: step: 320/459, loss: 0.01659010350704193 2023-01-22 16:30:10.339951: step: 322/459, loss: 0.13467466831207275 2023-01-22 16:30:10.905527: step: 324/459, loss: 0.48547589778900146 2023-01-22 16:30:11.573055: step: 326/459, loss: 0.0644373819231987 2023-01-22 16:30:12.203855: step: 328/459, loss: 0.09406941384077072 2023-01-22 16:30:12.834989: step: 330/459, loss: 0.10472267866134644 2023-01-22 16:30:13.483000: step: 332/459, loss: 0.414918452501297 2023-01-22 16:30:14.095041: step: 334/459, loss: 0.06725715100765228 2023-01-22 16:30:14.711736: step: 336/459, loss: 0.019609598442912102 2023-01-22 16:30:15.320173: step: 338/459, loss: 0.19656264781951904 2023-01-22 16:30:16.009942: step: 340/459, loss: 2.2407796382904053 2023-01-22 16:30:16.654098: step: 342/459, loss: 0.5040199756622314 2023-01-22 16:30:17.260759: step: 344/459, loss: 0.13217633962631226 2023-01-22 16:30:17.989750: step: 346/459, loss: 0.1435088813304901 2023-01-22 16:30:18.707414: step: 348/459, loss: 0.5504677295684814 2023-01-22 16:30:19.341509: step: 350/459, loss: 0.08689773827791214 2023-01-22 16:30:19.979701: step: 352/459, loss: 0.024734672158956528 2023-01-22 16:30:20.686736: step: 354/459, loss: 0.08755392581224442 2023-01-22 16:30:21.291456: step: 356/459, loss: 0.019753649830818176 2023-01-22 16:30:21.933912: step: 358/459, loss: 0.07046405971050262 2023-01-22 16:30:22.581335: step: 360/459, loss: 0.18215250968933105 2023-01-22 16:30:23.206923: step: 362/459, loss: 0.03776020184159279 2023-01-22 16:30:23.840773: step: 364/459, loss: 0.14024491608142853 2023-01-22 16:30:24.395274: step: 366/459, loss: 0.03014683537185192 2023-01-22 16:30:25.019991: step: 368/459, loss: 0.2079888880252838 2023-01-22 16:30:25.627931: step: 370/459, loss: 0.11475293338298798 2023-01-22 16:30:26.277098: step: 372/459, loss: 0.09771841764450073 2023-01-22 16:30:26.898236: step: 374/459, loss: 0.07386401295661926 2023-01-22 16:30:27.596363: step: 376/459, loss: 0.01784946769475937 2023-01-22 16:30:28.229510: step: 378/459, loss: 0.05118865892291069 2023-01-22 16:30:28.828607: step: 380/459, loss: 0.23377835750579834 2023-01-22 16:30:29.459150: step: 382/459, loss: 0.11667245626449585 2023-01-22 16:30:30.125896: step: 384/459, loss: 0.02964131534099579 2023-01-22 16:30:30.762039: step: 386/459, loss: 0.08486756682395935 2023-01-22 16:30:31.410674: step: 388/459, loss: 0.0857916921377182 2023-01-22 16:30:32.095760: step: 390/459, loss: 0.06475162506103516 2023-01-22 16:30:32.753511: step: 392/459, loss: 0.07593490183353424 2023-01-22 16:30:33.353917: step: 394/459, loss: 0.11231763660907745 2023-01-22 16:30:33.961581: step: 396/459, loss: 0.0363011471927166 2023-01-22 16:30:34.573998: step: 398/459, loss: 0.12308439612388611 2023-01-22 16:30:35.112205: step: 400/459, loss: 0.006558162160217762 2023-01-22 16:30:35.676757: step: 402/459, loss: 0.08329534530639648 2023-01-22 16:30:36.279368: step: 404/459, loss: 0.18536347150802612 2023-01-22 16:30:36.837037: step: 406/459, loss: 0.24677503108978271 2023-01-22 16:30:37.458773: step: 408/459, loss: 0.008606220595538616 2023-01-22 16:30:38.121835: step: 410/459, loss: 0.04923606663942337 2023-01-22 16:30:38.823216: step: 412/459, loss: 0.04975922405719757 2023-01-22 16:30:39.468110: step: 414/459, loss: 0.030368441715836525 2023-01-22 16:30:40.060234: step: 416/459, loss: 0.0863150954246521 2023-01-22 16:30:40.673440: step: 418/459, loss: 0.11022179573774338 2023-01-22 16:30:41.331326: step: 420/459, loss: 0.010061169043183327 2023-01-22 16:30:41.961719: step: 422/459, loss: 0.058705467730760574 2023-01-22 16:30:42.605750: step: 424/459, loss: 0.1476324498653412 2023-01-22 16:30:43.234348: step: 426/459, loss: 0.030196459963917732 2023-01-22 16:30:43.862196: step: 428/459, loss: 0.21896418929100037 2023-01-22 16:30:44.463052: step: 430/459, loss: 0.08825073391199112 2023-01-22 16:30:44.985009: step: 432/459, loss: 0.18864662945270538 2023-01-22 16:30:45.590646: step: 434/459, loss: 0.4043020009994507 2023-01-22 16:30:46.188325: step: 436/459, loss: 0.531916618347168 2023-01-22 16:30:46.886985: step: 438/459, loss: 0.23923565447330475 2023-01-22 16:30:47.463142: step: 440/459, loss: 0.07527695596218109 2023-01-22 16:30:48.079007: step: 442/459, loss: 0.30435076355934143 2023-01-22 16:30:48.697245: step: 444/459, loss: 0.06795012950897217 2023-01-22 16:30:49.339858: step: 446/459, loss: 0.3743436634540558 2023-01-22 16:30:49.951807: step: 448/459, loss: 0.05156278237700462 2023-01-22 16:30:50.594664: step: 450/459, loss: 0.053839001804590225 2023-01-22 16:30:51.281950: step: 452/459, loss: 0.09755496680736542 2023-01-22 16:30:51.876469: step: 454/459, loss: 0.04727427661418915 2023-01-22 16:30:52.465637: step: 456/459, loss: 0.03529256954789162 2023-01-22 16:30:53.140205: step: 458/459, loss: 0.010532301850616932 2023-01-22 16:30:53.768095: step: 460/459, loss: 0.0855116918683052 2023-01-22 16:30:54.379603: step: 462/459, loss: 0.1114676371216774 2023-01-22 16:30:55.032042: step: 464/459, loss: 0.192707359790802 2023-01-22 16:30:55.614703: step: 466/459, loss: 0.10680980235338211 2023-01-22 16:30:56.253896: step: 468/459, loss: 0.05201512947678566 2023-01-22 16:30:56.834094: step: 470/459, loss: 0.1233862042427063 2023-01-22 16:30:57.501367: step: 472/459, loss: 0.15335248410701752 2023-01-22 16:30:58.154256: step: 474/459, loss: 0.08478038758039474 2023-01-22 16:30:58.802140: step: 476/459, loss: 0.1775749772787094 2023-01-22 16:30:59.393523: step: 478/459, loss: 0.09209296852350235 2023-01-22 16:31:00.040128: step: 480/459, loss: 0.5480735301971436 2023-01-22 16:31:00.711940: step: 482/459, loss: 0.09851270169019699 2023-01-22 16:31:01.390517: step: 484/459, loss: 0.10303562134504318 2023-01-22 16:31:02.022058: step: 486/459, loss: 0.1628599613904953 2023-01-22 16:31:02.629143: step: 488/459, loss: 0.17045418918132782 2023-01-22 16:31:03.235443: step: 490/459, loss: 0.06378761678934097 2023-01-22 16:31:03.908780: step: 492/459, loss: 0.4557214677333832 2023-01-22 16:31:04.513546: step: 494/459, loss: 0.03688172250986099 2023-01-22 16:31:05.135605: step: 496/459, loss: 0.03970596566796303 2023-01-22 16:31:05.742578: step: 498/459, loss: 0.17322856187820435 2023-01-22 16:31:06.400114: step: 500/459, loss: 0.19446343183517456 2023-01-22 16:31:06.962816: step: 502/459, loss: 0.08993135392665863 2023-01-22 16:31:07.589952: step: 504/459, loss: 0.1238139346241951 2023-01-22 16:31:08.176980: step: 506/459, loss: 0.03235333412885666 2023-01-22 16:31:08.837709: step: 508/459, loss: 0.05165242403745651 2023-01-22 16:31:09.495959: step: 510/459, loss: 0.029594816267490387 2023-01-22 16:31:10.093404: step: 512/459, loss: 0.08270543813705444 2023-01-22 16:31:10.649364: step: 514/459, loss: 0.08992709219455719 2023-01-22 16:31:11.249285: step: 516/459, loss: 0.02572793699800968 2023-01-22 16:31:11.829878: step: 518/459, loss: 0.03457919508218765 2023-01-22 16:31:12.406298: step: 520/459, loss: 0.030231868848204613 2023-01-22 16:31:13.003599: step: 522/459, loss: 0.04673770070075989 2023-01-22 16:31:13.698730: step: 524/459, loss: 0.33260053396224976 2023-01-22 16:31:14.270057: step: 526/459, loss: 0.03988060727715492 2023-01-22 16:31:14.899497: step: 528/459, loss: 0.16370749473571777 2023-01-22 16:31:15.585966: step: 530/459, loss: 0.030957728624343872 2023-01-22 16:31:16.222796: step: 532/459, loss: 0.25999653339385986 2023-01-22 16:31:16.856921: step: 534/459, loss: 0.017902806401252747 2023-01-22 16:31:17.505508: step: 536/459, loss: 0.32408663630485535 2023-01-22 16:31:18.168348: step: 538/459, loss: 0.050637610256671906 2023-01-22 16:31:18.818189: step: 540/459, loss: 0.5947838425636292 2023-01-22 16:31:19.453941: step: 542/459, loss: 0.19449803233146667 2023-01-22 16:31:20.149373: step: 544/459, loss: 0.8912656903266907 2023-01-22 16:31:20.770212: step: 546/459, loss: 0.08127059042453766 2023-01-22 16:31:21.383179: step: 548/459, loss: 0.12588383257389069 2023-01-22 16:31:22.024633: step: 550/459, loss: 0.04491003602743149 2023-01-22 16:31:22.620534: step: 552/459, loss: 0.04908056929707527 2023-01-22 16:31:23.310257: step: 554/459, loss: 0.18206322193145752 2023-01-22 16:31:23.923038: step: 556/459, loss: 0.18627113103866577 2023-01-22 16:31:24.625102: step: 558/459, loss: 0.08949729800224304 2023-01-22 16:31:25.219050: step: 560/459, loss: 0.2984250783920288 2023-01-22 16:31:25.819489: step: 562/459, loss: 0.08157337456941605 2023-01-22 16:31:26.462933: step: 564/459, loss: 0.13471746444702148 2023-01-22 16:31:27.126529: step: 566/459, loss: 0.030956992879509926 2023-01-22 16:31:27.719588: step: 568/459, loss: 0.1107364073395729 2023-01-22 16:31:28.441796: step: 570/459, loss: 0.6310507655143738 2023-01-22 16:31:29.071634: step: 572/459, loss: 0.070053331553936 2023-01-22 16:31:29.678500: step: 574/459, loss: 0.08911691606044769 2023-01-22 16:31:30.307643: step: 576/459, loss: 0.1088302955031395 2023-01-22 16:31:30.883993: step: 578/459, loss: 0.0674404725432396 2023-01-22 16:31:31.430446: step: 580/459, loss: 0.08770214021205902 2023-01-22 16:31:32.083735: step: 582/459, loss: 0.3349892497062683 2023-01-22 16:31:32.732475: step: 584/459, loss: 1.421939730644226 2023-01-22 16:31:33.319832: step: 586/459, loss: 32.30384826660156 2023-01-22 16:31:33.967918: step: 588/459, loss: 0.11479255557060242 2023-01-22 16:31:34.576139: step: 590/459, loss: 0.12627309560775757 2023-01-22 16:31:35.241757: step: 592/459, loss: 0.0680653378367424 2023-01-22 16:31:35.834894: step: 594/459, loss: 0.050430573523044586 2023-01-22 16:31:36.420755: step: 596/459, loss: 0.06511525064706802 2023-01-22 16:31:37.081131: step: 598/459, loss: 0.1455654352903366 2023-01-22 16:31:37.670088: step: 600/459, loss: 0.06606967002153397 2023-01-22 16:31:38.278120: step: 602/459, loss: 0.03629513829946518 2023-01-22 16:31:38.925363: step: 604/459, loss: 0.055378757417201996 2023-01-22 16:31:39.606537: step: 606/459, loss: 0.1716824769973755 2023-01-22 16:31:40.235880: step: 608/459, loss: 0.040373001247644424 2023-01-22 16:31:40.907061: step: 610/459, loss: 0.21023251116275787 2023-01-22 16:31:41.505996: step: 612/459, loss: 0.15172630548477173 2023-01-22 16:31:42.090535: step: 614/459, loss: 0.09405255317687988 2023-01-22 16:31:42.705481: step: 616/459, loss: 0.08798904716968536 2023-01-22 16:31:43.256235: step: 618/459, loss: 0.04935498535633087 2023-01-22 16:31:43.877802: step: 620/459, loss: 0.10149157792329788 2023-01-22 16:31:44.489189: step: 622/459, loss: 0.023541463539004326 2023-01-22 16:31:45.136439: step: 624/459, loss: 0.0366288460791111 2023-01-22 16:31:45.838191: step: 626/459, loss: 0.025340542197227478 2023-01-22 16:31:46.494686: step: 628/459, loss: 0.21186181902885437 2023-01-22 16:31:47.140608: step: 630/459, loss: 0.09865696728229523 2023-01-22 16:31:47.764124: step: 632/459, loss: 0.1856561154127121 2023-01-22 16:31:48.302074: step: 634/459, loss: 0.06591346114873886 2023-01-22 16:31:48.921488: step: 636/459, loss: 0.26647818088531494 2023-01-22 16:31:49.576077: step: 638/459, loss: 0.10923212766647339 2023-01-22 16:31:50.328057: step: 640/459, loss: 0.10725235193967819 2023-01-22 16:31:50.938017: step: 642/459, loss: 0.03376438468694687 2023-01-22 16:31:51.537387: step: 644/459, loss: 0.04120311141014099 2023-01-22 16:31:52.236624: step: 646/459, loss: 0.09205885231494904 2023-01-22 16:31:52.847749: step: 648/459, loss: 0.05519772693514824 2023-01-22 16:31:53.419056: step: 650/459, loss: 0.09757176041603088 2023-01-22 16:31:54.124746: step: 652/459, loss: 0.04484139010310173 2023-01-22 16:31:54.829159: step: 654/459, loss: 0.05152864754199982 2023-01-22 16:31:55.422416: step: 656/459, loss: 0.06883178651332855 2023-01-22 16:31:56.050279: step: 658/459, loss: 0.24058979749679565 2023-01-22 16:31:56.711166: step: 660/459, loss: 0.3878398537635803 2023-01-22 16:31:57.331569: step: 662/459, loss: 0.01546313427388668 2023-01-22 16:31:57.899405: step: 664/459, loss: 0.03635892644524574 2023-01-22 16:31:58.472906: step: 666/459, loss: 0.09898176044225693 2023-01-22 16:31:59.121843: step: 668/459, loss: 0.3112559914588928 2023-01-22 16:31:59.766903: step: 670/459, loss: 0.053926654160022736 2023-01-22 16:32:00.453777: step: 672/459, loss: 0.05192828178405762 2023-01-22 16:32:01.070445: step: 674/459, loss: 0.06879007071256638 2023-01-22 16:32:01.750841: step: 676/459, loss: 0.039390478283166885 2023-01-22 16:32:02.378056: step: 678/459, loss: 0.02297990396618843 2023-01-22 16:32:03.059551: step: 680/459, loss: 0.03876163437962532 2023-01-22 16:32:03.754009: step: 682/459, loss: 0.25130823254585266 2023-01-22 16:32:04.358432: step: 684/459, loss: 0.049961768090724945 2023-01-22 16:32:04.963213: step: 686/459, loss: 0.01000999752432108 2023-01-22 16:32:05.599539: step: 688/459, loss: 0.06922328472137451 2023-01-22 16:32:06.256506: step: 690/459, loss: 0.44203463196754456 2023-01-22 16:32:06.942431: step: 692/459, loss: 0.1639186143875122 2023-01-22 16:32:07.535577: step: 694/459, loss: 0.08340214192867279 2023-01-22 16:32:08.195786: step: 696/459, loss: 0.04706002399325371 2023-01-22 16:32:08.787722: step: 698/459, loss: 0.10721082240343094 2023-01-22 16:32:09.363671: step: 700/459, loss: 0.10124266147613525 2023-01-22 16:32:09.995403: step: 702/459, loss: 0.18653595447540283 2023-01-22 16:32:10.708461: step: 704/459, loss: 0.0676918774843216 2023-01-22 16:32:11.320828: step: 706/459, loss: 0.012187056243419647 2023-01-22 16:32:11.941321: step: 708/459, loss: 0.23220545053482056 2023-01-22 16:32:12.601197: step: 710/459, loss: 0.3411718010902405 2023-01-22 16:32:13.230122: step: 712/459, loss: 0.019707070663571358 2023-01-22 16:32:13.843060: step: 714/459, loss: 0.21983906626701355 2023-01-22 16:32:14.459877: step: 716/459, loss: 0.008521830663084984 2023-01-22 16:32:15.056901: step: 718/459, loss: 0.1220218688249588 2023-01-22 16:32:15.800310: step: 720/459, loss: 0.09376772493124008 2023-01-22 16:32:16.394078: step: 722/459, loss: 0.11291556805372238 2023-01-22 16:32:17.016484: step: 724/459, loss: 0.15233831107616425 2023-01-22 16:32:17.634486: step: 726/459, loss: 0.1142633706331253 2023-01-22 16:32:18.277666: step: 728/459, loss: 0.03693414479494095 2023-01-22 16:32:18.862207: step: 730/459, loss: 0.04971098527312279 2023-01-22 16:32:19.554263: step: 732/459, loss: 0.06538019329309464 2023-01-22 16:32:20.135523: step: 734/459, loss: 0.01672871969640255 2023-01-22 16:32:20.746603: step: 736/459, loss: 0.2400113344192505 2023-01-22 16:32:21.347327: step: 738/459, loss: 0.19907689094543457 2023-01-22 16:32:21.951245: step: 740/459, loss: 0.12710218131542206 2023-01-22 16:32:22.624140: step: 742/459, loss: 0.14440418779850006 2023-01-22 16:32:23.274097: step: 744/459, loss: 0.09551601111888885 2023-01-22 16:32:23.904647: step: 746/459, loss: 0.49644631147384644 2023-01-22 16:32:24.491486: step: 748/459, loss: 0.026100128889083862 2023-01-22 16:32:25.150040: step: 750/459, loss: 0.02740365080535412 2023-01-22 16:32:25.771655: step: 752/459, loss: 2.5307319164276123 2023-01-22 16:32:26.442135: step: 754/459, loss: 0.14884591102600098 2023-01-22 16:32:27.076566: step: 756/459, loss: 0.024486567825078964 2023-01-22 16:32:27.669564: step: 758/459, loss: 0.11660891771316528 2023-01-22 16:32:28.361770: step: 760/459, loss: 0.0750233605504036 2023-01-22 16:32:28.988509: step: 762/459, loss: 0.07561202347278595 2023-01-22 16:32:29.563165: step: 764/459, loss: 0.018648723140358925 2023-01-22 16:32:30.255895: step: 766/459, loss: 0.0850503072142601 2023-01-22 16:32:30.905118: step: 768/459, loss: 5.1808552742004395 2023-01-22 16:32:31.502965: step: 770/459, loss: 0.06491079181432724 2023-01-22 16:32:32.158142: step: 772/459, loss: 0.10398449748754501 2023-01-22 16:32:32.783731: step: 774/459, loss: 0.12852570414543152 2023-01-22 16:32:33.349996: step: 776/459, loss: 0.266038179397583 2023-01-22 16:32:33.944588: step: 778/459, loss: 0.07103952020406723 2023-01-22 16:32:34.509897: step: 780/459, loss: 0.07764387875795364 2023-01-22 16:32:35.083066: step: 782/459, loss: 0.1273649036884308 2023-01-22 16:32:35.678621: step: 784/459, loss: 0.06870914250612259 2023-01-22 16:32:36.247695: step: 786/459, loss: 0.03585587069392204 2023-01-22 16:32:36.836100: step: 788/459, loss: 0.15447506308555603 2023-01-22 16:32:37.471323: step: 790/459, loss: 0.15548604726791382 2023-01-22 16:32:38.092731: step: 792/459, loss: 0.5037197470664978 2023-01-22 16:32:38.694381: step: 794/459, loss: 0.015677466988563538 2023-01-22 16:32:39.316707: step: 796/459, loss: 0.29170945286750793 2023-01-22 16:32:39.875653: step: 798/459, loss: 0.11056486517190933 2023-01-22 16:32:40.589913: step: 800/459, loss: 0.06686298549175262 2023-01-22 16:32:41.244109: step: 802/459, loss: 0.22492662072181702 2023-01-22 16:32:41.873439: step: 804/459, loss: 0.1582270711660385 2023-01-22 16:32:42.490718: step: 806/459, loss: 0.11327268928289413 2023-01-22 16:32:43.068595: step: 808/459, loss: 0.3088478446006775 2023-01-22 16:32:43.679466: step: 810/459, loss: 0.0344325453042984 2023-01-22 16:32:44.256280: step: 812/459, loss: 0.18189756572246552 2023-01-22 16:32:44.854025: step: 814/459, loss: 0.03003609925508499 2023-01-22 16:32:45.552369: step: 816/459, loss: 0.5278926491737366 2023-01-22 16:32:46.191835: step: 818/459, loss: 0.4508969783782959 2023-01-22 16:32:46.843030: step: 820/459, loss: 0.08245141804218292 2023-01-22 16:32:47.455591: step: 822/459, loss: 0.08782432228326797 2023-01-22 16:32:48.142315: step: 824/459, loss: 0.0507502518594265 2023-01-22 16:32:48.764214: step: 826/459, loss: 0.05976211652159691 2023-01-22 16:32:49.346545: step: 828/459, loss: 0.14259961247444153 2023-01-22 16:32:49.940921: step: 830/459, loss: 0.03218244016170502 2023-01-22 16:32:50.577793: step: 832/459, loss: 0.008628204464912415 2023-01-22 16:32:51.190860: step: 834/459, loss: 0.12664316594600677 2023-01-22 16:32:51.790490: step: 836/459, loss: 0.10005903244018555 2023-01-22 16:32:52.397841: step: 838/459, loss: 0.28290510177612305 2023-01-22 16:32:53.062423: step: 840/459, loss: 0.03955516964197159 2023-01-22 16:32:53.637563: step: 842/459, loss: 0.21589279174804688 2023-01-22 16:32:54.245440: step: 844/459, loss: 0.0953596755862236 2023-01-22 16:32:54.933138: step: 846/459, loss: 0.08653610199689865 2023-01-22 16:32:55.582916: step: 848/459, loss: 1.2919983863830566 2023-01-22 16:32:56.152606: step: 850/459, loss: 0.09891129285097122 2023-01-22 16:32:56.828443: step: 852/459, loss: 0.028276149183511734 2023-01-22 16:32:57.530084: step: 854/459, loss: 0.10328690707683563 2023-01-22 16:32:58.149914: step: 856/459, loss: 0.046352557837963104 2023-01-22 16:32:58.760902: step: 858/459, loss: 0.08626148849725723 2023-01-22 16:32:59.389147: step: 860/459, loss: 0.10156378149986267 2023-01-22 16:33:00.058713: step: 862/459, loss: 0.010855760425329208 2023-01-22 16:33:00.687514: step: 864/459, loss: 0.12340010702610016 2023-01-22 16:33:01.354828: step: 866/459, loss: 0.21393786370754242 2023-01-22 16:33:01.996489: step: 868/459, loss: 0.01993715763092041 2023-01-22 16:33:02.560181: step: 870/459, loss: 0.09584330022335052 2023-01-22 16:33:03.192312: step: 872/459, loss: 0.09407295286655426 2023-01-22 16:33:03.853859: step: 874/459, loss: 0.13090740144252777 2023-01-22 16:33:04.458468: step: 876/459, loss: 0.12118003517389297 2023-01-22 16:33:05.048316: step: 878/459, loss: 0.031470414251089096 2023-01-22 16:33:05.728623: step: 880/459, loss: 0.5347956418991089 2023-01-22 16:33:06.312181: step: 882/459, loss: 0.02833881974220276 2023-01-22 16:33:06.818968: step: 884/459, loss: 0.09282902628183365 2023-01-22 16:33:07.443250: step: 886/459, loss: 0.11542994529008865 2023-01-22 16:33:08.052986: step: 888/459, loss: 0.08748286962509155 2023-01-22 16:33:08.600029: step: 890/459, loss: 0.04704218730330467 2023-01-22 16:33:09.220161: step: 892/459, loss: 0.17559397220611572 2023-01-22 16:33:09.805418: step: 894/459, loss: 1.3653079271316528 2023-01-22 16:33:10.435110: step: 896/459, loss: 0.09628830850124359 2023-01-22 16:33:11.098988: step: 898/459, loss: 0.2295190840959549 2023-01-22 16:33:11.777775: step: 900/459, loss: 0.09657265990972519 2023-01-22 16:33:12.422672: step: 902/459, loss: 0.17035169899463654 2023-01-22 16:33:13.018733: step: 904/459, loss: 0.06970127671957016 2023-01-22 16:33:13.605690: step: 906/459, loss: 0.3700084686279297 2023-01-22 16:33:14.193750: step: 908/459, loss: 0.11204694956541061 2023-01-22 16:33:14.836440: step: 910/459, loss: 0.08953094482421875 2023-01-22 16:33:15.467277: step: 912/459, loss: 0.16464336216449738 2023-01-22 16:33:16.109740: step: 914/459, loss: 0.4274744391441345 2023-01-22 16:33:16.755084: step: 916/459, loss: 0.10624717175960541 2023-01-22 16:33:17.423611: step: 918/459, loss: 0.367862731218338 2023-01-22 16:33:17.857919: step: 920/459, loss: 0.007964755408465862 ================================================== Loss: 0.234 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2844807841614907, 'r': 0.3130908060980353, 'f1': 0.2981009120391411}, 'combined': 0.21965330360778815, 'epoch': 16} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33418816168445836, 'r': 0.3043879434450799, 'f1': 0.31859271413918366}, 'combined': 0.2038993370490775, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28236791999408456, 'r': 0.3150518727827737, 'f1': 0.2978158510430883}, 'combined': 0.2194432586633282, 'epoch': 16} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33730392731873343, 'r': 0.3118296543729146, 'f1': 0.3240669410457051}, 'combined': 0.20740284226925124, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3034446444568798, 'r': 0.31726375539988766, 'f1': 0.310200369379853}, 'combined': 0.2285686932272601, 'epoch': 16} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3409246559262686, 'r': 0.31728389511386307, 'f1': 0.32867972499507936}, 'combined': 0.23565716131722672, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24666666666666665, 'r': 0.35238095238095235, 'f1': 0.2901960784313725}, 'combined': 0.19346405228758168, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2702702702702703, 'r': 0.43478260869565216, 'f1': 0.3333333333333333}, 'combined': 0.16666666666666666, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2777777777777778, 'r': 0.1724137931034483, 'f1': 0.21276595744680854}, 'combined': 0.14184397163120568, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2965306761268781, 'r': 0.33704340607210626, 'f1': 0.315491785079929}, 'combined': 0.2324676311115266, 'epoch': 12} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31983745334792824, 'r': 0.32478489348160866, 'f1': 0.3222921877528559}, 'combined': 0.20626700016182775, 'epoch': 12} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23270440251572325, 'r': 0.35238095238095235, 'f1': 0.2803030303030303}, 'combined': 0.18686868686868685, 'epoch': 12} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 16:35:55.874360: step: 2/459, loss: 0.0226190984249115 2023-01-22 16:35:56.558399: step: 4/459, loss: 0.03529379889369011 2023-01-22 16:35:57.135058: step: 6/459, loss: 0.2590015232563019 2023-01-22 16:35:57.778031: step: 8/459, loss: 0.3523576557636261 2023-01-22 16:35:58.423113: step: 10/459, loss: 0.10192857682704926 2023-01-22 16:35:59.122161: step: 12/459, loss: 0.08469638973474503 2023-01-22 16:35:59.733032: step: 14/459, loss: 0.26782482862472534 2023-01-22 16:36:00.359957: step: 16/459, loss: 0.1263800412416458 2023-01-22 16:36:00.907602: step: 18/459, loss: 0.07599637657403946 2023-01-22 16:36:01.477241: step: 20/459, loss: 0.045984897762537 2023-01-22 16:36:02.112858: step: 22/459, loss: 0.027757011353969574 2023-01-22 16:36:02.786690: step: 24/459, loss: 0.058458905667066574 2023-01-22 16:36:03.403871: step: 26/459, loss: 0.21552860736846924 2023-01-22 16:36:04.104518: step: 28/459, loss: 0.06474911421537399 2023-01-22 16:36:04.772873: step: 30/459, loss: 0.07021252065896988 2023-01-22 16:36:05.436769: step: 32/459, loss: 0.015940167009830475 2023-01-22 16:36:06.032387: step: 34/459, loss: 0.06814922392368317 2023-01-22 16:36:06.618554: step: 36/459, loss: 0.016211269423365593 2023-01-22 16:36:07.231630: step: 38/459, loss: 0.045073866844177246 2023-01-22 16:36:07.907451: step: 40/459, loss: 0.08995107561349869 2023-01-22 16:36:08.565038: step: 42/459, loss: 0.255002498626709 2023-01-22 16:36:09.234322: step: 44/459, loss: 0.022879691794514656 2023-01-22 16:36:09.853751: step: 46/459, loss: 0.06257915496826172 2023-01-22 16:36:10.466847: step: 48/459, loss: 0.11619651317596436 2023-01-22 16:36:11.090485: step: 50/459, loss: 0.024520408362150192 2023-01-22 16:36:11.680019: step: 52/459, loss: 0.019433829933404922 2023-01-22 16:36:12.302688: step: 54/459, loss: 0.24324040114879608 2023-01-22 16:36:12.913675: step: 56/459, loss: 0.09304536879062653 2023-01-22 16:36:13.647854: step: 58/459, loss: 0.02985798940062523 2023-01-22 16:36:14.218271: step: 60/459, loss: 0.02789018303155899 2023-01-22 16:36:14.841886: step: 62/459, loss: 0.04830945283174515 2023-01-22 16:36:15.470480: step: 64/459, loss: 2.348137855529785 2023-01-22 16:36:16.082131: step: 66/459, loss: 0.019591206684708595 2023-01-22 16:36:16.726356: step: 68/459, loss: 0.0610213503241539 2023-01-22 16:36:17.317233: step: 70/459, loss: 0.024994419887661934 2023-01-22 16:36:17.860205: step: 72/459, loss: 0.03376520425081253 2023-01-22 16:36:18.490251: step: 74/459, loss: 0.07706993073225021 2023-01-22 16:36:19.125999: step: 76/459, loss: 1.0301122665405273 2023-01-22 16:36:19.759500: step: 78/459, loss: 0.06063727289438248 2023-01-22 16:36:20.415867: step: 80/459, loss: 0.04840623959898949 2023-01-22 16:36:21.047876: step: 82/459, loss: 0.23986659944057465 2023-01-22 16:36:21.707165: step: 84/459, loss: 0.10852351784706116 2023-01-22 16:36:22.294771: step: 86/459, loss: 0.061025142669677734 2023-01-22 16:36:22.902478: step: 88/459, loss: 0.03342805057764053 2023-01-22 16:36:23.632962: step: 90/459, loss: 0.10734865814447403 2023-01-22 16:36:24.206432: step: 92/459, loss: 0.05435201898217201 2023-01-22 16:36:24.823806: step: 94/459, loss: 0.08620511740446091 2023-01-22 16:36:25.430726: step: 96/459, loss: 0.059245433658361435 2023-01-22 16:36:26.018043: step: 98/459, loss: 0.08935914188623428 2023-01-22 16:36:26.660431: step: 100/459, loss: 0.03576292097568512 2023-01-22 16:36:27.284217: step: 102/459, loss: 0.07226675003767014 2023-01-22 16:36:27.993706: step: 104/459, loss: 0.3202812671661377 2023-01-22 16:36:28.531775: step: 106/459, loss: 0.007778709288686514 2023-01-22 16:36:29.114080: step: 108/459, loss: 0.04522336274385452 2023-01-22 16:36:29.766922: step: 110/459, loss: 1.2918485403060913 2023-01-22 16:36:30.433696: step: 112/459, loss: 0.0449567548930645 2023-01-22 16:36:31.019220: step: 114/459, loss: 0.15102477371692657 2023-01-22 16:36:31.645638: step: 116/459, loss: 0.04215138405561447 2023-01-22 16:36:32.253080: step: 118/459, loss: 0.40142834186553955 2023-01-22 16:36:32.838098: step: 120/459, loss: 0.10582423955202103 2023-01-22 16:36:33.462515: step: 122/459, loss: 0.11537880450487137 2023-01-22 16:36:34.118616: step: 124/459, loss: 0.03137029707431793 2023-01-22 16:36:34.821415: step: 126/459, loss: 0.15642213821411133 2023-01-22 16:36:35.527106: step: 128/459, loss: 0.06223905459046364 2023-01-22 16:36:36.111673: step: 130/459, loss: 0.15314941108226776 2023-01-22 16:36:36.709885: step: 132/459, loss: 2.2644734382629395 2023-01-22 16:36:37.354499: step: 134/459, loss: 0.04459386691451073 2023-01-22 16:36:37.971060: step: 136/459, loss: 0.04050346091389656 2023-01-22 16:36:38.647415: step: 138/459, loss: 0.03961852937936783 2023-01-22 16:36:39.325518: step: 140/459, loss: 0.07778133451938629 2023-01-22 16:36:39.885539: step: 142/459, loss: 0.11239825189113617 2023-01-22 16:36:40.567237: step: 144/459, loss: 0.03582390770316124 2023-01-22 16:36:41.174567: step: 146/459, loss: 0.06874121725559235 2023-01-22 16:36:41.923966: step: 148/459, loss: 0.07385246455669403 2023-01-22 16:36:42.602971: step: 150/459, loss: 0.07043734937906265 2023-01-22 16:36:43.248978: step: 152/459, loss: 0.04708825796842575 2023-01-22 16:36:43.844234: step: 154/459, loss: 0.019653119146823883 2023-01-22 16:36:44.468447: step: 156/459, loss: 0.39006465673446655 2023-01-22 16:36:45.098884: step: 158/459, loss: 0.15348513424396515 2023-01-22 16:36:45.772539: step: 160/459, loss: 0.09382452070713043 2023-01-22 16:36:46.333009: step: 162/459, loss: 0.10678225010633469 2023-01-22 16:36:46.932696: step: 164/459, loss: 0.17927195131778717 2023-01-22 16:36:47.578514: step: 166/459, loss: 0.06318739801645279 2023-01-22 16:36:48.223895: step: 168/459, loss: 0.12340334802865982 2023-01-22 16:36:48.856955: step: 170/459, loss: 0.03600132837891579 2023-01-22 16:36:49.517080: step: 172/459, loss: 0.26235780119895935 2023-01-22 16:36:50.170273: step: 174/459, loss: 0.11083702743053436 2023-01-22 16:36:50.777820: step: 176/459, loss: 0.07322274148464203 2023-01-22 16:36:51.432832: step: 178/459, loss: 0.11904281377792358 2023-01-22 16:36:52.027920: step: 180/459, loss: 0.013243947178125381 2023-01-22 16:36:52.610862: step: 182/459, loss: 0.019140755757689476 2023-01-22 16:36:53.234034: step: 184/459, loss: 0.04663524404168129 2023-01-22 16:36:53.884009: step: 186/459, loss: 0.2772245407104492 2023-01-22 16:36:54.509958: step: 188/459, loss: 0.057375356554985046 2023-01-22 16:36:55.134541: step: 190/459, loss: 1.0641883611679077 2023-01-22 16:36:55.713025: step: 192/459, loss: 0.07098899036645889 2023-01-22 16:36:56.301115: step: 194/459, loss: 0.07579025626182556 2023-01-22 16:36:56.949400: step: 196/459, loss: 0.015021344646811485 2023-01-22 16:36:57.513590: step: 198/459, loss: 0.07299932837486267 2023-01-22 16:36:58.109393: step: 200/459, loss: 0.011287749744951725 2023-01-22 16:36:58.721802: step: 202/459, loss: 0.041193194687366486 2023-01-22 16:36:59.409884: step: 204/459, loss: 0.16358576714992523 2023-01-22 16:37:00.028870: step: 206/459, loss: 0.049519870430231094 2023-01-22 16:37:00.690165: step: 208/459, loss: 0.06840280443429947 2023-01-22 16:37:01.248109: step: 210/459, loss: 0.12746348977088928 2023-01-22 16:37:01.869256: step: 212/459, loss: 0.08188238739967346 2023-01-22 16:37:02.484540: step: 214/459, loss: 0.43958908319473267 2023-01-22 16:37:03.146138: step: 216/459, loss: 0.21615906059741974 2023-01-22 16:37:03.790417: step: 218/459, loss: 0.025017056614160538 2023-01-22 16:37:04.399462: step: 220/459, loss: 0.06779380142688751 2023-01-22 16:37:04.987949: step: 222/459, loss: 0.10247702151536942 2023-01-22 16:37:05.600324: step: 224/459, loss: 0.4134225845336914 2023-01-22 16:37:06.236451: step: 226/459, loss: 0.07948349416255951 2023-01-22 16:37:06.888262: step: 228/459, loss: 0.19022570550441742 2023-01-22 16:37:07.538003: step: 230/459, loss: 0.057643648236989975 2023-01-22 16:37:08.200744: step: 232/459, loss: 0.09898307919502258 2023-01-22 16:37:08.825338: step: 234/459, loss: 0.07345885783433914 2023-01-22 16:37:09.415558: step: 236/459, loss: 0.12747201323509216 2023-01-22 16:37:09.988975: step: 238/459, loss: 0.0735468715429306 2023-01-22 16:37:10.597393: step: 240/459, loss: 0.22112107276916504 2023-01-22 16:37:11.236211: step: 242/459, loss: 0.1738436073064804 2023-01-22 16:37:11.864337: step: 244/459, loss: 0.12261100113391876 2023-01-22 16:37:12.479463: step: 246/459, loss: 0.04281124472618103 2023-01-22 16:37:13.112933: step: 248/459, loss: 0.24012352526187897 2023-01-22 16:37:13.847141: step: 250/459, loss: 0.07400960475206375 2023-01-22 16:37:14.430423: step: 252/459, loss: 0.08669819682836533 2023-01-22 16:37:15.031464: step: 254/459, loss: 0.05449916794896126 2023-01-22 16:37:15.723281: step: 256/459, loss: 0.041366685181856155 2023-01-22 16:37:16.307313: step: 258/459, loss: 0.13975276052951813 2023-01-22 16:37:16.929786: step: 260/459, loss: 0.08194644749164581 2023-01-22 16:37:17.655674: step: 262/459, loss: 0.03902914375066757 2023-01-22 16:37:18.339049: step: 264/459, loss: 0.03473089262843132 2023-01-22 16:37:18.965101: step: 266/459, loss: 0.06892494857311249 2023-01-22 16:37:19.598665: step: 268/459, loss: 0.06244364753365517 2023-01-22 16:37:20.191534: step: 270/459, loss: 0.07171644270420074 2023-01-22 16:37:20.842925: step: 272/459, loss: 0.09546049684286118 2023-01-22 16:37:21.480366: step: 274/459, loss: 0.06281570345163345 2023-01-22 16:37:22.103816: step: 276/459, loss: 0.0925997868180275 2023-01-22 16:37:22.766061: step: 278/459, loss: 0.053407829254865646 2023-01-22 16:37:23.339229: step: 280/459, loss: 0.2946621775627136 2023-01-22 16:37:23.942664: step: 282/459, loss: 0.12305590510368347 2023-01-22 16:37:24.535365: step: 284/459, loss: 0.14393532276153564 2023-01-22 16:37:25.110756: step: 286/459, loss: 0.11888694018125534 2023-01-22 16:37:25.666745: step: 288/459, loss: 0.05261756107211113 2023-01-22 16:37:26.292952: step: 290/459, loss: 0.21149133145809174 2023-01-22 16:37:26.886406: step: 292/459, loss: 0.02318679541349411 2023-01-22 16:37:27.532316: step: 294/459, loss: 0.11732055991888046 2023-01-22 16:37:28.245963: step: 296/459, loss: 0.4831003248691559 2023-01-22 16:37:28.820001: step: 298/459, loss: 0.05546312779188156 2023-01-22 16:37:29.451505: step: 300/459, loss: 0.06928793340921402 2023-01-22 16:37:30.083947: step: 302/459, loss: 0.054580509662628174 2023-01-22 16:37:30.723793: step: 304/459, loss: 0.24619364738464355 2023-01-22 16:37:31.341926: step: 306/459, loss: 0.03563022240996361 2023-01-22 16:37:32.085666: step: 308/459, loss: 0.14634720981121063 2023-01-22 16:37:32.722314: step: 310/459, loss: 0.09717052429914474 2023-01-22 16:37:33.383459: step: 312/459, loss: 0.06681129336357117 2023-01-22 16:37:33.990407: step: 314/459, loss: 0.1202440932393074 2023-01-22 16:37:34.610441: step: 316/459, loss: 0.07661222666501999 2023-01-22 16:37:35.281404: step: 318/459, loss: 0.04424034431576729 2023-01-22 16:37:35.949705: step: 320/459, loss: 0.12957195937633514 2023-01-22 16:37:36.600384: step: 322/459, loss: 0.07786662876605988 2023-01-22 16:37:37.267847: step: 324/459, loss: 0.03827553614974022 2023-01-22 16:37:37.872753: step: 326/459, loss: 0.5539201498031616 2023-01-22 16:37:38.500910: step: 328/459, loss: 0.08098413050174713 2023-01-22 16:37:39.083720: step: 330/459, loss: 0.13491769134998322 2023-01-22 16:37:39.665625: step: 332/459, loss: 0.04449357092380524 2023-01-22 16:37:40.262403: step: 334/459, loss: 0.11292774975299835 2023-01-22 16:37:40.884932: step: 336/459, loss: 0.10381443798542023 2023-01-22 16:37:41.471291: step: 338/459, loss: 0.08735895901918411 2023-01-22 16:37:42.147209: step: 340/459, loss: 0.022649236023426056 2023-01-22 16:37:42.746744: step: 342/459, loss: 0.08879528939723969 2023-01-22 16:37:43.376781: step: 344/459, loss: 0.04243733733892441 2023-01-22 16:37:43.962724: step: 346/459, loss: 0.036786969751119614 2023-01-22 16:37:44.624799: step: 348/459, loss: 1.5114189386367798 2023-01-22 16:37:45.319538: step: 350/459, loss: 0.07911691069602966 2023-01-22 16:37:45.936441: step: 352/459, loss: 0.0780545249581337 2023-01-22 16:37:46.577069: step: 354/459, loss: 0.07674381881952286 2023-01-22 16:37:47.199029: step: 356/459, loss: 0.023973552510142326 2023-01-22 16:37:47.780487: step: 358/459, loss: 0.034271810203790665 2023-01-22 16:37:48.421325: step: 360/459, loss: 0.027626093477010727 2023-01-22 16:37:49.035850: step: 362/459, loss: 0.03213593363761902 2023-01-22 16:37:49.641601: step: 364/459, loss: 0.031698260456323624 2023-01-22 16:37:50.269674: step: 366/459, loss: 0.016858292743563652 2023-01-22 16:37:50.909846: step: 368/459, loss: 0.0203403253108263 2023-01-22 16:37:51.548914: step: 370/459, loss: 0.050379421561956406 2023-01-22 16:37:52.095369: step: 372/459, loss: 0.06954978406429291 2023-01-22 16:37:52.675835: step: 374/459, loss: 1.5813164710998535 2023-01-22 16:37:53.303253: step: 376/459, loss: 2.703169345855713 2023-01-22 16:37:53.854831: step: 378/459, loss: 0.21990159153938293 2023-01-22 16:37:54.486060: step: 380/459, loss: 0.20268236100673676 2023-01-22 16:37:55.169866: step: 382/459, loss: 0.11188065260648727 2023-01-22 16:37:55.794764: step: 384/459, loss: 0.041432835161685944 2023-01-22 16:37:56.391561: step: 386/459, loss: 0.01689930260181427 2023-01-22 16:37:57.006779: step: 388/459, loss: 0.1070152148604393 2023-01-22 16:37:57.627911: step: 390/459, loss: 0.09842966496944427 2023-01-22 16:37:58.261155: step: 392/459, loss: 0.055762551724910736 2023-01-22 16:37:58.897203: step: 394/459, loss: 0.03972569853067398 2023-01-22 16:37:59.492164: step: 396/459, loss: 0.03962523117661476 2023-01-22 16:38:00.134532: step: 398/459, loss: 0.05983889847993851 2023-01-22 16:38:00.759141: step: 400/459, loss: 0.10453197360038757 2023-01-22 16:38:01.365546: step: 402/459, loss: 0.015522785484790802 2023-01-22 16:38:02.020834: step: 404/459, loss: 0.07022076100111008 2023-01-22 16:38:02.610078: step: 406/459, loss: 0.0935448408126831 2023-01-22 16:38:03.216325: step: 408/459, loss: 0.028232766315340996 2023-01-22 16:38:03.782838: step: 410/459, loss: 0.1744021326303482 2023-01-22 16:38:04.453990: step: 412/459, loss: 0.09008517861366272 2023-01-22 16:38:05.125587: step: 414/459, loss: 0.21061651408672333 2023-01-22 16:38:05.700558: step: 416/459, loss: 0.05652464181184769 2023-01-22 16:38:06.328868: step: 418/459, loss: 0.046342119574546814 2023-01-22 16:38:06.956626: step: 420/459, loss: 0.008701956830918789 2023-01-22 16:38:07.590011: step: 422/459, loss: 0.06661341339349747 2023-01-22 16:38:08.309386: step: 424/459, loss: 0.20687001943588257 2023-01-22 16:38:08.946234: step: 426/459, loss: 0.04599497467279434 2023-01-22 16:38:09.541992: step: 428/459, loss: 0.05258144065737724 2023-01-22 16:38:10.197988: step: 430/459, loss: 0.0058744833804667 2023-01-22 16:38:10.792414: step: 432/459, loss: 0.20495785772800446 2023-01-22 16:38:11.429382: step: 434/459, loss: 0.0635644793510437 2023-01-22 16:38:12.009840: step: 436/459, loss: 0.030549094080924988 2023-01-22 16:38:12.711199: step: 438/459, loss: 0.12406887859106064 2023-01-22 16:38:13.351974: step: 440/459, loss: 0.011748535558581352 2023-01-22 16:38:13.971236: step: 442/459, loss: 0.4964453876018524 2023-01-22 16:38:14.711418: step: 444/459, loss: 0.13142791390419006 2023-01-22 16:38:15.359860: step: 446/459, loss: 0.015975257381796837 2023-01-22 16:38:15.970171: step: 448/459, loss: 0.19493427872657776 2023-01-22 16:38:16.635220: step: 450/459, loss: 0.06050697714090347 2023-01-22 16:38:17.278982: step: 452/459, loss: 0.0996752679347992 2023-01-22 16:38:17.899580: step: 454/459, loss: 0.01445807795971632 2023-01-22 16:38:18.570087: step: 456/459, loss: 0.13935434818267822 2023-01-22 16:38:19.200330: step: 458/459, loss: 0.23168939352035522 2023-01-22 16:38:19.782856: step: 460/459, loss: 0.011708037927746773 2023-01-22 16:38:20.384106: step: 462/459, loss: 0.3938480615615845 2023-01-22 16:38:21.008668: step: 464/459, loss: 3.9725496768951416 2023-01-22 16:38:21.634452: step: 466/459, loss: 0.14161552488803864 2023-01-22 16:38:22.247543: step: 468/459, loss: 1.0138821601867676 2023-01-22 16:38:22.841586: step: 470/459, loss: 0.32088178396224976 2023-01-22 16:38:23.493497: step: 472/459, loss: 0.047578830271959305 2023-01-22 16:38:24.196606: step: 474/459, loss: 0.04982667788863182 2023-01-22 16:38:24.894218: step: 476/459, loss: 0.023769646883010864 2023-01-22 16:38:25.483602: step: 478/459, loss: 0.02422056719660759 2023-01-22 16:38:26.106629: step: 480/459, loss: 0.02937036007642746 2023-01-22 16:38:26.698758: step: 482/459, loss: 0.05573542043566704 2023-01-22 16:38:27.246629: step: 484/459, loss: 0.17392857372760773 2023-01-22 16:38:27.878191: step: 486/459, loss: 0.19941039383411407 2023-01-22 16:38:28.499213: step: 488/459, loss: 0.012867371551692486 2023-01-22 16:38:29.095541: step: 490/459, loss: 0.25541186332702637 2023-01-22 16:38:29.686158: step: 492/459, loss: 0.1580924540758133 2023-01-22 16:38:30.243823: step: 494/459, loss: 0.05947078764438629 2023-01-22 16:38:30.773910: step: 496/459, loss: 0.04087104648351669 2023-01-22 16:38:31.356665: step: 498/459, loss: 0.05700166895985603 2023-01-22 16:38:32.052085: step: 500/459, loss: 7.642635822296143 2023-01-22 16:38:32.681182: step: 502/459, loss: 0.497416615486145 2023-01-22 16:38:33.336253: step: 504/459, loss: 0.105469711124897 2023-01-22 16:38:33.980971: step: 506/459, loss: 0.07882914692163467 2023-01-22 16:38:34.574368: step: 508/459, loss: 0.16796091198921204 2023-01-22 16:38:35.171565: step: 510/459, loss: 0.2506042420864105 2023-01-22 16:38:35.762714: step: 512/459, loss: 0.10715793073177338 2023-01-22 16:38:36.390465: step: 514/459, loss: 0.14893773198127747 2023-01-22 16:38:36.974653: step: 516/459, loss: 0.12284407019615173 2023-01-22 16:38:37.571777: step: 518/459, loss: 0.07207246124744415 2023-01-22 16:38:38.184788: step: 520/459, loss: 0.04315793141722679 2023-01-22 16:38:38.814825: step: 522/459, loss: 0.14236164093017578 2023-01-22 16:38:39.459608: step: 524/459, loss: 0.038890548050403595 2023-01-22 16:38:40.084390: step: 526/459, loss: 0.054369810968637466 2023-01-22 16:38:40.756057: step: 528/459, loss: 0.06407813727855682 2023-01-22 16:38:41.400746: step: 530/459, loss: 0.13652962446212769 2023-01-22 16:38:42.016310: step: 532/459, loss: 0.08155030012130737 2023-01-22 16:38:42.607747: step: 534/459, loss: 0.4784477949142456 2023-01-22 16:38:43.266531: step: 536/459, loss: 0.11420230567455292 2023-01-22 16:38:43.886269: step: 538/459, loss: 0.08054330199956894 2023-01-22 16:38:44.568135: step: 540/459, loss: 0.058077193796634674 2023-01-22 16:38:45.177204: step: 542/459, loss: 0.04866713657975197 2023-01-22 16:38:45.803854: step: 544/459, loss: 0.17648082971572876 2023-01-22 16:38:46.502637: step: 546/459, loss: 0.047203078866004944 2023-01-22 16:38:47.110640: step: 548/459, loss: 0.382712721824646 2023-01-22 16:38:47.699162: step: 550/459, loss: 0.018081746995449066 2023-01-22 16:38:48.290285: step: 552/459, loss: 0.12514257431030273 2023-01-22 16:38:48.918800: step: 554/459, loss: 0.5346789956092834 2023-01-22 16:38:49.599888: step: 556/459, loss: 0.1327831745147705 2023-01-22 16:38:50.221179: step: 558/459, loss: 0.04352327808737755 2023-01-22 16:38:50.843043: step: 560/459, loss: 0.036332231014966965 2023-01-22 16:38:51.470531: step: 562/459, loss: 0.3079025447368622 2023-01-22 16:38:52.168350: step: 564/459, loss: 0.023303352296352386 2023-01-22 16:38:52.791547: step: 566/459, loss: 0.13391026854515076 2023-01-22 16:38:53.433822: step: 568/459, loss: 0.09228402376174927 2023-01-22 16:38:54.108464: step: 570/459, loss: 0.1738479733467102 2023-01-22 16:38:54.704501: step: 572/459, loss: 0.08182685077190399 2023-01-22 16:38:55.272222: step: 574/459, loss: 0.06296449899673462 2023-01-22 16:38:55.958337: step: 576/459, loss: 0.0878196582198143 2023-01-22 16:38:56.586027: step: 578/459, loss: 0.08877350389957428 2023-01-22 16:38:57.219028: step: 580/459, loss: 0.04606643691658974 2023-01-22 16:38:57.920812: step: 582/459, loss: 0.04419446364045143 2023-01-22 16:38:58.599394: step: 584/459, loss: 0.10406481474637985 2023-01-22 16:38:59.239881: step: 586/459, loss: 0.11091474443674088 2023-01-22 16:38:59.893113: step: 588/459, loss: 0.13539834320545197 2023-01-22 16:39:00.512660: step: 590/459, loss: 0.08791746199131012 2023-01-22 16:39:01.117954: step: 592/459, loss: 0.03808167204260826 2023-01-22 16:39:01.733746: step: 594/459, loss: 0.889700174331665 2023-01-22 16:39:02.351870: step: 596/459, loss: 0.011606106534600258 2023-01-22 16:39:02.956207: step: 598/459, loss: 0.04024066403508186 2023-01-22 16:39:03.651644: step: 600/459, loss: 0.023934388533234596 2023-01-22 16:39:04.253969: step: 602/459, loss: 0.061381977051496506 2023-01-22 16:39:04.882370: step: 604/459, loss: 0.08606468141078949 2023-01-22 16:39:05.439598: step: 606/459, loss: 0.11655927449464798 2023-01-22 16:39:06.113597: step: 608/459, loss: 0.1389087587594986 2023-01-22 16:39:06.699102: step: 610/459, loss: 0.024915117770433426 2023-01-22 16:39:07.312368: step: 612/459, loss: 0.055444296449422836 2023-01-22 16:39:07.889049: step: 614/459, loss: 0.0655502900481224 2023-01-22 16:39:08.541513: step: 616/459, loss: 0.13309986889362335 2023-01-22 16:39:09.159540: step: 618/459, loss: 0.08508802950382233 2023-01-22 16:39:09.791470: step: 620/459, loss: 0.05870969593524933 2023-01-22 16:39:10.376430: step: 622/459, loss: 0.04141663759946823 2023-01-22 16:39:11.001230: step: 624/459, loss: 0.041943199932575226 2023-01-22 16:39:11.601006: step: 626/459, loss: 0.03341648727655411 2023-01-22 16:39:12.213948: step: 628/459, loss: 0.02958584763109684 2023-01-22 16:39:12.826294: step: 630/459, loss: 0.11547474563121796 2023-01-22 16:39:13.426657: step: 632/459, loss: 0.10805100202560425 2023-01-22 16:39:14.017793: step: 634/459, loss: 0.07075667381286621 2023-01-22 16:39:14.640023: step: 636/459, loss: 0.06310361623764038 2023-01-22 16:39:15.268458: step: 638/459, loss: 0.03716906160116196 2023-01-22 16:39:15.901610: step: 640/459, loss: 0.3339294195175171 2023-01-22 16:39:16.553907: step: 642/459, loss: 0.1368045061826706 2023-01-22 16:39:17.197571: step: 644/459, loss: 0.17632371187210083 2023-01-22 16:39:17.813114: step: 646/459, loss: 0.035174254328012466 2023-01-22 16:39:18.546045: step: 648/459, loss: 0.1353309452533722 2023-01-22 16:39:19.151581: step: 650/459, loss: 0.08942495286464691 2023-01-22 16:39:19.720517: step: 652/459, loss: 0.03595377132296562 2023-01-22 16:39:20.366808: step: 654/459, loss: 0.03920774161815643 2023-01-22 16:39:20.957751: step: 656/459, loss: 0.013756465166807175 2023-01-22 16:39:21.606896: step: 658/459, loss: 0.06631498038768768 2023-01-22 16:39:22.207105: step: 660/459, loss: 0.05153050646185875 2023-01-22 16:39:22.763071: step: 662/459, loss: 0.034770216792821884 2023-01-22 16:39:23.319893: step: 664/459, loss: 0.013488632626831532 2023-01-22 16:39:23.912108: step: 666/459, loss: 0.11399305611848831 2023-01-22 16:39:24.564169: step: 668/459, loss: 0.09216897189617157 2023-01-22 16:39:25.162971: step: 670/459, loss: 0.024257924407720566 2023-01-22 16:39:25.739303: step: 672/459, loss: 0.20946262776851654 2023-01-22 16:39:26.383281: step: 674/459, loss: 0.08639010041952133 2023-01-22 16:39:26.996658: step: 676/459, loss: 0.04631488397717476 2023-01-22 16:39:27.620574: step: 678/459, loss: 0.033540304750204086 2023-01-22 16:39:28.274396: step: 680/459, loss: 0.05767352133989334 2023-01-22 16:39:28.909066: step: 682/459, loss: 3.39371657371521 2023-01-22 16:39:29.494993: step: 684/459, loss: 0.12376079708337784 2023-01-22 16:39:30.153903: step: 686/459, loss: 0.00886446051299572 2023-01-22 16:39:30.758078: step: 688/459, loss: 0.0412302240729332 2023-01-22 16:39:31.351350: step: 690/459, loss: 0.1375507265329361 2023-01-22 16:39:31.986279: step: 692/459, loss: 0.06381123512983322 2023-01-22 16:39:32.656572: step: 694/459, loss: 0.34613487124443054 2023-01-22 16:39:33.302834: step: 696/459, loss: 0.09578805416822433 2023-01-22 16:39:33.887497: step: 698/459, loss: 0.04006124660372734 2023-01-22 16:39:34.504086: step: 700/459, loss: 0.11099801957607269 2023-01-22 16:39:35.155713: step: 702/459, loss: 0.5062176585197449 2023-01-22 16:39:35.823899: step: 704/459, loss: 0.2787242829799652 2023-01-22 16:39:36.455166: step: 706/459, loss: 0.11305487155914307 2023-01-22 16:39:37.134589: step: 708/459, loss: 0.0361800380051136 2023-01-22 16:39:37.799052: step: 710/459, loss: 0.43916627764701843 2023-01-22 16:39:38.400027: step: 712/459, loss: 0.565156877040863 2023-01-22 16:39:38.976321: step: 714/459, loss: 0.12025880813598633 2023-01-22 16:39:39.680939: step: 716/459, loss: 0.11039142310619354 2023-01-22 16:39:40.304226: step: 718/459, loss: 0.15842723846435547 2023-01-22 16:39:40.914754: step: 720/459, loss: 0.03732123225927353 2023-01-22 16:39:41.471708: step: 722/459, loss: 0.14157818257808685 2023-01-22 16:39:42.223768: step: 724/459, loss: 0.11912128329277039 2023-01-22 16:39:42.828655: step: 726/459, loss: 0.1442282348871231 2023-01-22 16:39:43.556059: step: 728/459, loss: 0.08625755459070206 2023-01-22 16:39:44.164047: step: 730/459, loss: 0.04558468237519264 2023-01-22 16:39:44.786467: step: 732/459, loss: 0.10837836563587189 2023-01-22 16:39:45.402371: step: 734/459, loss: 0.052067819982767105 2023-01-22 16:39:46.034652: step: 736/459, loss: 0.09042655676603317 2023-01-22 16:39:46.677398: step: 738/459, loss: 0.1513449102640152 2023-01-22 16:39:47.316732: step: 740/459, loss: 0.07803641259670258 2023-01-22 16:39:47.958556: step: 742/459, loss: 0.09189755469560623 2023-01-22 16:39:48.643000: step: 744/459, loss: 0.12148813158273697 2023-01-22 16:39:49.280553: step: 746/459, loss: 0.09521518647670746 2023-01-22 16:39:49.847132: step: 748/459, loss: 0.19277696311473846 2023-01-22 16:39:50.513057: step: 750/459, loss: 0.34338971972465515 2023-01-22 16:39:51.153096: step: 752/459, loss: 0.09531759470701218 2023-01-22 16:39:51.776463: step: 754/459, loss: 0.09077014774084091 2023-01-22 16:39:52.403798: step: 756/459, loss: 0.05063566192984581 2023-01-22 16:39:53.045673: step: 758/459, loss: 0.0978822261095047 2023-01-22 16:39:53.658237: step: 760/459, loss: 0.040600016713142395 2023-01-22 16:39:54.278743: step: 762/459, loss: 0.04120824486017227 2023-01-22 16:39:54.837293: step: 764/459, loss: 0.12963800132274628 2023-01-22 16:39:55.465809: step: 766/459, loss: 0.28271690011024475 2023-01-22 16:39:56.096493: step: 768/459, loss: 0.09325085580348969 2023-01-22 16:39:56.682030: step: 770/459, loss: 0.040906865149736404 2023-01-22 16:39:57.324416: step: 772/459, loss: 0.016813797876238823 2023-01-22 16:39:57.987500: step: 774/459, loss: 0.11133258789777756 2023-01-22 16:39:58.579412: step: 776/459, loss: 0.23792873322963715 2023-01-22 16:39:59.163723: step: 778/459, loss: 0.246930330991745 2023-01-22 16:39:59.800877: step: 780/459, loss: 0.4502424895763397 2023-01-22 16:40:00.433344: step: 782/459, loss: 0.05796670913696289 2023-01-22 16:40:01.094146: step: 784/459, loss: 0.16376975178718567 2023-01-22 16:40:01.739432: step: 786/459, loss: 0.04326638579368591 2023-01-22 16:40:02.363310: step: 788/459, loss: 0.7212471961975098 2023-01-22 16:40:02.993516: step: 790/459, loss: 0.06741322576999664 2023-01-22 16:40:03.616513: step: 792/459, loss: 0.03265785798430443 2023-01-22 16:40:04.223126: step: 794/459, loss: 0.04889584705233574 2023-01-22 16:40:04.839123: step: 796/459, loss: 0.0690174251794815 2023-01-22 16:40:05.525789: step: 798/459, loss: 0.16460713744163513 2023-01-22 16:40:06.174336: step: 800/459, loss: 0.1522631049156189 2023-01-22 16:40:06.791403: step: 802/459, loss: 0.03760905563831329 2023-01-22 16:40:07.405837: step: 804/459, loss: 0.056172262877225876 2023-01-22 16:40:08.040428: step: 806/459, loss: 0.049944061785936356 2023-01-22 16:40:08.683996: step: 808/459, loss: 0.01903534308075905 2023-01-22 16:40:09.296444: step: 810/459, loss: 0.2693248391151428 2023-01-22 16:40:09.921891: step: 812/459, loss: 0.08891575038433075 2023-01-22 16:40:10.511560: step: 814/459, loss: 0.08002731949090958 2023-01-22 16:40:11.216258: step: 816/459, loss: 0.7192894220352173 2023-01-22 16:40:11.911548: step: 818/459, loss: 0.1666012704372406 2023-01-22 16:40:12.481043: step: 820/459, loss: 0.07138966023921967 2023-01-22 16:40:13.080320: step: 822/459, loss: 0.450344055891037 2023-01-22 16:40:13.736359: step: 824/459, loss: 0.06457008421421051 2023-01-22 16:40:14.391327: step: 826/459, loss: 0.08821283280849457 2023-01-22 16:40:15.009027: step: 828/459, loss: 0.37116143107414246 2023-01-22 16:40:15.634661: step: 830/459, loss: 0.03647668659687042 2023-01-22 16:40:16.281739: step: 832/459, loss: 0.024241134524345398 2023-01-22 16:40:16.963459: step: 834/459, loss: 0.2746947109699249 2023-01-22 16:40:17.599670: step: 836/459, loss: 0.06685367226600647 2023-01-22 16:40:18.230590: step: 838/459, loss: 0.1265377402305603 2023-01-22 16:40:18.834856: step: 840/459, loss: 0.18362504243850708 2023-01-22 16:40:19.447606: step: 842/459, loss: 0.030790254473686218 2023-01-22 16:40:20.051623: step: 844/459, loss: 0.25082752108573914 2023-01-22 16:40:20.710510: step: 846/459, loss: 0.17694848775863647 2023-01-22 16:40:21.326659: step: 848/459, loss: 0.08286664634943008 2023-01-22 16:40:21.983595: step: 850/459, loss: 0.2955385446548462 2023-01-22 16:40:22.563945: step: 852/459, loss: 0.015113789588212967 2023-01-22 16:40:23.180991: step: 854/459, loss: 0.055542830377817154 2023-01-22 16:40:23.839266: step: 856/459, loss: 0.09365937113761902 2023-01-22 16:40:24.444617: step: 858/459, loss: 0.06068319082260132 2023-01-22 16:40:25.131620: step: 860/459, loss: 0.38057273626327515 2023-01-22 16:40:25.767692: step: 862/459, loss: 0.03953362628817558 2023-01-22 16:40:26.493168: step: 864/459, loss: 0.23629504442214966 2023-01-22 16:40:27.182086: step: 866/459, loss: 0.31381407380104065 2023-01-22 16:40:27.752728: step: 868/459, loss: 0.07678572088479996 2023-01-22 16:40:28.347017: step: 870/459, loss: 0.06592600792646408 2023-01-22 16:40:28.962174: step: 872/459, loss: 0.08193004876375198 2023-01-22 16:40:29.584950: step: 874/459, loss: 0.0635322704911232 2023-01-22 16:40:30.187533: step: 876/459, loss: 0.11143606901168823 2023-01-22 16:40:30.853537: step: 878/459, loss: 0.06813451647758484 2023-01-22 16:40:31.503268: step: 880/459, loss: 0.1352187842130661 2023-01-22 16:40:32.160658: step: 882/459, loss: 0.04442784562706947 2023-01-22 16:40:32.830074: step: 884/459, loss: 0.06384018808603287 2023-01-22 16:40:33.522493: step: 886/459, loss: 0.0875781774520874 2023-01-22 16:40:34.118579: step: 888/459, loss: 0.022100219503045082 2023-01-22 16:40:34.749211: step: 890/459, loss: 0.11703900247812271 2023-01-22 16:40:35.315047: step: 892/459, loss: 0.09152207523584366 2023-01-22 16:40:35.970733: step: 894/459, loss: 0.27747681736946106 2023-01-22 16:40:36.563597: step: 896/459, loss: 0.1579284518957138 2023-01-22 16:40:37.175142: step: 898/459, loss: 0.09124882519245148 2023-01-22 16:40:37.783351: step: 900/459, loss: 0.04492473974823952 2023-01-22 16:40:38.398227: step: 902/459, loss: 0.03993421420454979 2023-01-22 16:40:39.074814: step: 904/459, loss: 0.0835069939494133 2023-01-22 16:40:39.682751: step: 906/459, loss: 0.03453861176967621 2023-01-22 16:40:40.380716: step: 908/459, loss: 0.06946581602096558 2023-01-22 16:40:41.017013: step: 910/459, loss: 0.09529326111078262 2023-01-22 16:40:41.683915: step: 912/459, loss: 0.10344228148460388 2023-01-22 16:40:42.275382: step: 914/459, loss: 0.10859185457229614 2023-01-22 16:40:42.916651: step: 916/459, loss: 2.258355140686035 2023-01-22 16:40:43.583946: step: 918/459, loss: 0.18854430317878723 2023-01-22 16:40:44.057867: step: 920/459, loss: 0.004039850551635027 ================================================== Loss: 0.180 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29719270833333333, 'r': 0.33835981973434537, 'f1': 0.3164429902395741}, 'combined': 0.23316851912389672, 'epoch': 17} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31466824660710857, 'r': 0.3112323785822812, 'f1': 0.3129408820328701}, 'combined': 0.20028216450103686, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2875256598240469, 'r': 0.33826548214593755, 'f1': 0.3108385511611318}, 'combined': 0.22903893243451814, 'epoch': 17} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.30144188949068956, 'r': 0.3047333386934996, 'f1': 0.3030786780309105}, 'combined': 0.19397035393978268, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3028695266426218, 'r': 0.33735182569111766, 'f1': 0.3191820684725655}, 'combined': 0.23518678729557457, 'epoch': 17} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.30741788966422634, 'r': 0.31554470716674465, 'f1': 0.3114282894844075}, 'combined': 0.2232882075548582, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2222222222222222, 'r': 0.38095238095238093, 'f1': 0.2807017543859649}, 'combined': 0.1871345029239766, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.21794871794871795, 'r': 0.3695652173913043, 'f1': 0.27419354838709675}, 'combined': 0.13709677419354838, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.1724137931034483, 'f1': 0.22222222222222224}, 'combined': 0.14814814814814814, 'epoch': 17} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29719270833333333, 'r': 0.33835981973434537, 'f1': 0.3164429902395741}, 'combined': 0.23316851912389672, 'epoch': 17} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31466824660710857, 'r': 0.3112323785822812, 'f1': 0.3129408820328701}, 'combined': 0.20028216450103686, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2222222222222222, 'r': 0.38095238095238093, 'f1': 0.2807017543859649}, 'combined': 0.1871345029239766, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 16:43:31.514262: step: 2/459, loss: 0.05346100032329559 2023-01-22 16:43:32.135491: step: 4/459, loss: 0.05913763493299484 2023-01-22 16:43:32.714594: step: 6/459, loss: 0.021352050825953484 2023-01-22 16:43:33.378934: step: 8/459, loss: 0.063618965446949 2023-01-22 16:43:33.981852: step: 10/459, loss: 0.022244349122047424 2023-01-22 16:43:34.646788: step: 12/459, loss: 0.02963201515376568 2023-01-22 16:43:35.260729: step: 14/459, loss: 0.028751306235790253 2023-01-22 16:43:35.909038: step: 16/459, loss: 0.18293286859989166 2023-01-22 16:43:36.521262: step: 18/459, loss: 0.11532185971736908 2023-01-22 16:43:37.065663: step: 20/459, loss: 0.14692234992980957 2023-01-22 16:43:37.713550: step: 22/459, loss: 0.07930877059698105 2023-01-22 16:43:38.369085: step: 24/459, loss: 0.06137746199965477 2023-01-22 16:43:38.996576: step: 26/459, loss: 0.04524914547801018 2023-01-22 16:43:39.685327: step: 28/459, loss: 0.027739033102989197 2023-01-22 16:43:40.296895: step: 30/459, loss: 0.5092236399650574 2023-01-22 16:43:40.935783: step: 32/459, loss: 0.13672605156898499 2023-01-22 16:43:41.526060: step: 34/459, loss: 0.043003201484680176 2023-01-22 16:43:42.181955: step: 36/459, loss: 0.0937446877360344 2023-01-22 16:43:42.823607: step: 38/459, loss: 0.0820828378200531 2023-01-22 16:43:43.423214: step: 40/459, loss: 0.056727491319179535 2023-01-22 16:43:44.053740: step: 42/459, loss: 0.0951480120420456 2023-01-22 16:43:44.613742: step: 44/459, loss: 0.3547365665435791 2023-01-22 16:43:45.341758: step: 46/459, loss: 0.013008134439587593 2023-01-22 16:43:45.935453: step: 48/459, loss: 0.03142639249563217 2023-01-22 16:43:46.612425: step: 50/459, loss: 0.06233704090118408 2023-01-22 16:43:47.288722: step: 52/459, loss: 0.09406264126300812 2023-01-22 16:43:47.897544: step: 54/459, loss: 0.0631914958357811 2023-01-22 16:43:48.520680: step: 56/459, loss: 0.03872083127498627 2023-01-22 16:43:49.113212: step: 58/459, loss: 0.05512724444270134 2023-01-22 16:43:49.726657: step: 60/459, loss: 0.03456186130642891 2023-01-22 16:43:50.266858: step: 62/459, loss: 0.008603779599070549 2023-01-22 16:43:50.907740: step: 64/459, loss: 0.09629219770431519 2023-01-22 16:43:51.444643: step: 66/459, loss: 0.3238354027271271 2023-01-22 16:43:52.057769: step: 68/459, loss: 0.04186465963721275 2023-01-22 16:43:52.691257: step: 70/459, loss: 0.05535939708352089 2023-01-22 16:43:53.319079: step: 72/459, loss: 0.08295807242393494 2023-01-22 16:43:53.874238: step: 74/459, loss: 0.05401318520307541 2023-01-22 16:43:54.546508: step: 76/459, loss: 0.05523856356739998 2023-01-22 16:43:55.131483: step: 78/459, loss: 0.06813941895961761 2023-01-22 16:43:55.760997: step: 80/459, loss: 0.05992984026670456 2023-01-22 16:43:56.369003: step: 82/459, loss: 0.32806310057640076 2023-01-22 16:43:56.957046: step: 84/459, loss: 0.004716029856353998 2023-01-22 16:43:57.546047: step: 86/459, loss: 0.07794470340013504 2023-01-22 16:43:58.122184: step: 88/459, loss: 0.020131798461079597 2023-01-22 16:43:58.743038: step: 90/459, loss: 0.028973788022994995 2023-01-22 16:43:59.325188: step: 92/459, loss: 0.06829780340194702 2023-01-22 16:43:59.918493: step: 94/459, loss: 0.108960822224617 2023-01-22 16:44:00.559697: step: 96/459, loss: 0.05704909935593605 2023-01-22 16:44:01.229694: step: 98/459, loss: 0.0728960782289505 2023-01-22 16:44:01.969829: step: 100/459, loss: 0.04655252769589424 2023-01-22 16:44:02.630215: step: 102/459, loss: 0.08365191519260406 2023-01-22 16:44:03.275571: step: 104/459, loss: 0.031054239720106125 2023-01-22 16:44:03.916460: step: 106/459, loss: 0.04685111716389656 2023-01-22 16:44:04.493955: step: 108/459, loss: 0.0875857025384903 2023-01-22 16:44:05.118337: step: 110/459, loss: 0.011423681862652302 2023-01-22 16:44:05.692029: step: 112/459, loss: 0.013203948736190796 2023-01-22 16:44:06.294955: step: 114/459, loss: 0.062149204313755035 2023-01-22 16:44:06.844187: step: 116/459, loss: 0.027100475504994392 2023-01-22 16:44:07.486646: step: 118/459, loss: 0.0633736401796341 2023-01-22 16:44:08.077177: step: 120/459, loss: 0.1202281191945076 2023-01-22 16:44:08.672102: step: 122/459, loss: 0.010333728045225143 2023-01-22 16:44:09.262128: step: 124/459, loss: 0.8763865232467651 2023-01-22 16:44:09.843905: step: 126/459, loss: 0.037693798542022705 2023-01-22 16:44:10.438145: step: 128/459, loss: 0.05038963258266449 2023-01-22 16:44:11.074749: step: 130/459, loss: 0.024578435346484184 2023-01-22 16:44:11.670925: step: 132/459, loss: 0.5459612607955933 2023-01-22 16:44:12.294816: step: 134/459, loss: 0.11301619559526443 2023-01-22 16:44:12.916420: step: 136/459, loss: 0.07596281915903091 2023-01-22 16:44:13.531107: step: 138/459, loss: 0.038311559706926346 2023-01-22 16:44:14.115605: step: 140/459, loss: 0.031467732042074203 2023-01-22 16:44:14.738224: step: 142/459, loss: 0.7375898957252502 2023-01-22 16:44:15.346512: step: 144/459, loss: 0.06720395386219025 2023-01-22 16:44:15.988184: step: 146/459, loss: 0.0812506303191185 2023-01-22 16:44:16.657148: step: 148/459, loss: 0.0904693752527237 2023-01-22 16:44:17.334814: step: 150/459, loss: 0.08629387617111206 2023-01-22 16:44:17.987983: step: 152/459, loss: 0.17918911576271057 2023-01-22 16:44:18.604904: step: 154/459, loss: 0.048269160091876984 2023-01-22 16:44:19.223889: step: 156/459, loss: 6.455811023712158 2023-01-22 16:44:19.897463: step: 158/459, loss: 0.048557348549366 2023-01-22 16:44:20.480139: step: 160/459, loss: 0.08517688512802124 2023-01-22 16:44:21.128028: step: 162/459, loss: 0.07353177666664124 2023-01-22 16:44:21.800972: step: 164/459, loss: 0.007789887022227049 2023-01-22 16:44:22.413991: step: 166/459, loss: 0.028410015627741814 2023-01-22 16:44:23.047777: step: 168/459, loss: 0.024298351258039474 2023-01-22 16:44:23.669712: step: 170/459, loss: 0.08993580937385559 2023-01-22 16:44:24.270181: step: 172/459, loss: 0.013120484538376331 2023-01-22 16:44:24.902651: step: 174/459, loss: 0.10025325417518616 2023-01-22 16:44:25.508095: step: 176/459, loss: 0.028966033831238747 2023-01-22 16:44:26.203888: step: 178/459, loss: 0.011340652592480183 2023-01-22 16:44:26.772733: step: 180/459, loss: 0.06147002801299095 2023-01-22 16:44:27.417541: step: 182/459, loss: 0.04302888736128807 2023-01-22 16:44:27.970104: step: 184/459, loss: 0.03985420614480972 2023-01-22 16:44:28.613911: step: 186/459, loss: 0.03129641339182854 2023-01-22 16:44:29.210993: step: 188/459, loss: 0.06336456537246704 2023-01-22 16:44:29.825451: step: 190/459, loss: 0.07772348076105118 2023-01-22 16:44:30.410844: step: 192/459, loss: 0.05454317480325699 2023-01-22 16:44:31.056479: step: 194/459, loss: 0.0265592560172081 2023-01-22 16:44:31.628903: step: 196/459, loss: 0.04974993318319321 2023-01-22 16:44:32.212495: step: 198/459, loss: 0.07644502818584442 2023-01-22 16:44:32.824169: step: 200/459, loss: 0.10054710507392883 2023-01-22 16:44:33.443720: step: 202/459, loss: 0.039171963930130005 2023-01-22 16:44:34.113893: step: 204/459, loss: 0.06778719276189804 2023-01-22 16:44:34.706538: step: 206/459, loss: 0.07276531308889389 2023-01-22 16:44:35.306785: step: 208/459, loss: 0.1838439255952835 2023-01-22 16:44:35.944970: step: 210/459, loss: 0.12507811188697815 2023-01-22 16:44:36.648126: step: 212/459, loss: 0.03488181158900261 2023-01-22 16:44:37.266652: step: 214/459, loss: 0.14767807722091675 2023-01-22 16:44:37.975395: step: 216/459, loss: 0.09678028523921967 2023-01-22 16:44:38.652918: step: 218/459, loss: 0.012287321500480175 2023-01-22 16:44:39.270922: step: 220/459, loss: 0.057741936296224594 2023-01-22 16:44:39.970027: step: 222/459, loss: 0.08147388696670532 2023-01-22 16:44:40.654543: step: 224/459, loss: 0.10976623743772507 2023-01-22 16:44:41.187016: step: 226/459, loss: 0.02385665290057659 2023-01-22 16:44:41.848754: step: 228/459, loss: 0.09987145662307739 2023-01-22 16:44:42.502999: step: 230/459, loss: 0.007859901525080204 2023-01-22 16:44:43.141131: step: 232/459, loss: 0.061350639909505844 2023-01-22 16:44:43.814053: step: 234/459, loss: 0.10200343281030655 2023-01-22 16:44:44.455480: step: 236/459, loss: 0.07148648798465729 2023-01-22 16:44:45.055955: step: 238/459, loss: 0.019390175119042397 2023-01-22 16:44:45.619324: step: 240/459, loss: 0.024739330634474754 2023-01-22 16:44:46.272700: step: 242/459, loss: 0.13348719477653503 2023-01-22 16:44:46.895334: step: 244/459, loss: 0.08936150372028351 2023-01-22 16:44:47.483337: step: 246/459, loss: 0.04066132754087448 2023-01-22 16:44:48.046016: step: 248/459, loss: 0.04519509896636009 2023-01-22 16:44:48.665768: step: 250/459, loss: 0.051021724939346313 2023-01-22 16:44:49.293548: step: 252/459, loss: 0.06537435203790665 2023-01-22 16:44:49.862052: step: 254/459, loss: 0.6918044090270996 2023-01-22 16:44:50.575469: step: 256/459, loss: 0.03566872328519821 2023-01-22 16:44:51.224042: step: 258/459, loss: 0.657372772693634 2023-01-22 16:44:51.833322: step: 260/459, loss: 0.0091579370200634 2023-01-22 16:44:52.515310: step: 262/459, loss: 0.01748722977936268 2023-01-22 16:44:53.200328: step: 264/459, loss: 0.23975805938243866 2023-01-22 16:44:53.821950: step: 266/459, loss: 0.047606874257326126 2023-01-22 16:44:54.468419: step: 268/459, loss: 0.10824992507696152 2023-01-22 16:44:55.090166: step: 270/459, loss: 0.017463982105255127 2023-01-22 16:44:55.637755: step: 272/459, loss: 0.05113428086042404 2023-01-22 16:44:56.270242: step: 274/459, loss: 0.07966379076242447 2023-01-22 16:44:56.880243: step: 276/459, loss: 0.018669087439775467 2023-01-22 16:44:57.479621: step: 278/459, loss: 0.9054864645004272 2023-01-22 16:44:58.093012: step: 280/459, loss: 0.08383411169052124 2023-01-22 16:44:58.674590: step: 282/459, loss: 0.14030534029006958 2023-01-22 16:44:59.340663: step: 284/459, loss: 0.02621239237487316 2023-01-22 16:44:59.989438: step: 286/459, loss: 0.10758289694786072 2023-01-22 16:45:00.649244: step: 288/459, loss: 0.10023441910743713 2023-01-22 16:45:01.283854: step: 290/459, loss: 0.08644817769527435 2023-01-22 16:45:01.844274: step: 292/459, loss: 0.028419049456715584 2023-01-22 16:45:02.439619: step: 294/459, loss: 0.07090795040130615 2023-01-22 16:45:03.073830: step: 296/459, loss: 0.04088492691516876 2023-01-22 16:45:03.721181: step: 298/459, loss: 0.01874092034995556 2023-01-22 16:45:04.321887: step: 300/459, loss: 0.009390808641910553 2023-01-22 16:45:04.932081: step: 302/459, loss: 0.036651719361543655 2023-01-22 16:45:05.633158: step: 304/459, loss: 0.06675086915493011 2023-01-22 16:45:06.346248: step: 306/459, loss: 0.10042503476142883 2023-01-22 16:45:06.991231: step: 308/459, loss: 0.15915349125862122 2023-01-22 16:45:07.522877: step: 310/459, loss: 0.0881909728050232 2023-01-22 16:45:08.154936: step: 312/459, loss: 0.024669170379638672 2023-01-22 16:45:08.887780: step: 314/459, loss: 0.06160861998796463 2023-01-22 16:45:09.436694: step: 316/459, loss: 0.03954675421118736 2023-01-22 16:45:10.017321: step: 318/459, loss: 0.13706110417842865 2023-01-22 16:45:10.689338: step: 320/459, loss: 0.042208652943372726 2023-01-22 16:45:11.313152: step: 322/459, loss: 0.13250552117824554 2023-01-22 16:45:11.930392: step: 324/459, loss: 0.07068297266960144 2023-01-22 16:45:12.528438: step: 326/459, loss: 0.11881685256958008 2023-01-22 16:45:13.184867: step: 328/459, loss: 0.03557323291897774 2023-01-22 16:45:13.743340: step: 330/459, loss: 0.03155729919672012 2023-01-22 16:45:14.423685: step: 332/459, loss: 0.04149679094552994 2023-01-22 16:45:15.017875: step: 334/459, loss: 0.01848447136580944 2023-01-22 16:45:15.605232: step: 336/459, loss: 0.1343211680650711 2023-01-22 16:45:16.221041: step: 338/459, loss: 0.09267884492874146 2023-01-22 16:45:16.858051: step: 340/459, loss: 0.07401582598686218 2023-01-22 16:45:17.457754: step: 342/459, loss: 0.010911349207162857 2023-01-22 16:45:18.073688: step: 344/459, loss: 0.020135780796408653 2023-01-22 16:45:18.668455: step: 346/459, loss: 0.04714033380150795 2023-01-22 16:45:19.298004: step: 348/459, loss: 0.541075587272644 2023-01-22 16:45:19.985014: step: 350/459, loss: 0.20642787218093872 2023-01-22 16:45:20.578782: step: 352/459, loss: 0.06928475946187973 2023-01-22 16:45:21.133380: step: 354/459, loss: 0.08214838057756424 2023-01-22 16:45:21.685646: step: 356/459, loss: 0.16356615722179413 2023-01-22 16:45:22.302717: step: 358/459, loss: 0.07929275929927826 2023-01-22 16:45:22.875501: step: 360/459, loss: 1.0689303874969482 2023-01-22 16:45:23.547889: step: 362/459, loss: 0.12265557050704956 2023-01-22 16:45:24.127529: step: 364/459, loss: 0.029942287132143974 2023-01-22 16:45:24.745994: step: 366/459, loss: 0.10229063034057617 2023-01-22 16:45:25.404367: step: 368/459, loss: 0.06597569584846497 2023-01-22 16:45:26.000605: step: 370/459, loss: 0.4430593252182007 2023-01-22 16:45:26.624248: step: 372/459, loss: 0.03964323550462723 2023-01-22 16:45:27.313969: step: 374/459, loss: 0.05287628620862961 2023-01-22 16:45:27.882787: step: 376/459, loss: 0.19470305740833282 2023-01-22 16:45:28.496871: step: 378/459, loss: 0.9635472297668457 2023-01-22 16:45:29.129742: step: 380/459, loss: 0.019069170579314232 2023-01-22 16:45:29.745840: step: 382/459, loss: 0.1614234447479248 2023-01-22 16:45:30.306562: step: 384/459, loss: 0.06234690546989441 2023-01-22 16:45:30.899269: step: 386/459, loss: 0.010001587681472301 2023-01-22 16:45:31.535410: step: 388/459, loss: 0.1475697010755539 2023-01-22 16:45:32.191480: step: 390/459, loss: 0.0814564898610115 2023-01-22 16:45:32.805840: step: 392/459, loss: 0.09011489897966385 2023-01-22 16:45:33.427967: step: 394/459, loss: 0.02943314239382744 2023-01-22 16:45:34.047961: step: 396/459, loss: 0.3755403757095337 2023-01-22 16:45:34.594732: step: 398/459, loss: 0.05391167476773262 2023-01-22 16:45:35.139665: step: 400/459, loss: 0.01723998226225376 2023-01-22 16:45:35.764612: step: 402/459, loss: 0.04933418706059456 2023-01-22 16:45:36.391791: step: 404/459, loss: 0.09485770016908646 2023-01-22 16:45:36.969724: step: 406/459, loss: 0.03237356245517731 2023-01-22 16:45:37.601298: step: 408/459, loss: 0.0618608221411705 2023-01-22 16:45:38.179067: step: 410/459, loss: 0.023041650652885437 2023-01-22 16:45:38.835596: step: 412/459, loss: 0.07594982534646988 2023-01-22 16:45:39.432998: step: 414/459, loss: 1.129152774810791 2023-01-22 16:45:40.005114: step: 416/459, loss: 0.08019348978996277 2023-01-22 16:45:40.604679: step: 418/459, loss: 0.028616981580853462 2023-01-22 16:45:41.229485: step: 420/459, loss: 0.4556719660758972 2023-01-22 16:45:41.999311: step: 422/459, loss: 0.037569884210824966 2023-01-22 16:45:42.659178: step: 424/459, loss: 0.056941159069538116 2023-01-22 16:45:43.255840: step: 426/459, loss: 0.23476210236549377 2023-01-22 16:45:43.903838: step: 428/459, loss: 0.004366088192909956 2023-01-22 16:45:44.519346: step: 430/459, loss: 0.05427195504307747 2023-01-22 16:45:45.191049: step: 432/459, loss: 0.03708488494157791 2023-01-22 16:45:45.820463: step: 434/459, loss: 0.4011690616607666 2023-01-22 16:45:46.476146: step: 436/459, loss: 0.2285120189189911 2023-01-22 16:45:47.053956: step: 438/459, loss: 0.09953758120536804 2023-01-22 16:45:47.656318: step: 440/459, loss: 0.15873168408870697 2023-01-22 16:45:48.297844: step: 442/459, loss: 0.07061606645584106 2023-01-22 16:45:48.951389: step: 444/459, loss: 0.0617765337228775 2023-01-22 16:45:49.566086: step: 446/459, loss: 0.06293389946222305 2023-01-22 16:45:50.164064: step: 448/459, loss: 0.03512075915932655 2023-01-22 16:45:50.779948: step: 450/459, loss: 0.0019254852086305618 2023-01-22 16:45:51.379734: step: 452/459, loss: 0.040261160582304 2023-01-22 16:45:51.986956: step: 454/459, loss: 0.054155658930540085 2023-01-22 16:45:52.614525: step: 456/459, loss: 0.03600851818919182 2023-01-22 16:45:53.262885: step: 458/459, loss: 0.1399046778678894 2023-01-22 16:45:53.880713: step: 460/459, loss: 0.1442776769399643 2023-01-22 16:45:54.477854: step: 462/459, loss: 0.10866142809391022 2023-01-22 16:45:55.043690: step: 464/459, loss: 0.1783614307641983 2023-01-22 16:45:55.726474: step: 466/459, loss: 0.0059614940546453 2023-01-22 16:45:56.385142: step: 468/459, loss: 0.08493456989526749 2023-01-22 16:45:57.031327: step: 470/459, loss: 0.05017702281475067 2023-01-22 16:45:57.627726: step: 472/459, loss: 0.029146134853363037 2023-01-22 16:45:58.260311: step: 474/459, loss: 0.2103261649608612 2023-01-22 16:45:58.889004: step: 476/459, loss: 0.03889176622033119 2023-01-22 16:45:59.481632: step: 478/459, loss: 0.061008669435977936 2023-01-22 16:46:00.101808: step: 480/459, loss: 0.039768919348716736 2023-01-22 16:46:00.701689: step: 482/459, loss: 0.026643691584467888 2023-01-22 16:46:01.299772: step: 484/459, loss: 0.010346617549657822 2023-01-22 16:46:01.945881: step: 486/459, loss: 0.22377869486808777 2023-01-22 16:46:02.555899: step: 488/459, loss: 0.07204742729663849 2023-01-22 16:46:03.139995: step: 490/459, loss: 0.08418828248977661 2023-01-22 16:46:03.752731: step: 492/459, loss: 0.03342689573764801 2023-01-22 16:46:04.308748: step: 494/459, loss: 0.03697150945663452 2023-01-22 16:46:04.983650: step: 496/459, loss: 0.1985706388950348 2023-01-22 16:46:05.625242: step: 498/459, loss: 0.11228727549314499 2023-01-22 16:46:06.301687: step: 500/459, loss: 0.03816017508506775 2023-01-22 16:46:06.945138: step: 502/459, loss: 0.04945036396384239 2023-01-22 16:46:07.487331: step: 504/459, loss: 0.01377645693719387 2023-01-22 16:46:08.147466: step: 506/459, loss: 0.4544815421104431 2023-01-22 16:46:08.725802: step: 508/459, loss: 0.08727859705686569 2023-01-22 16:46:09.396570: step: 510/459, loss: 0.1249663233757019 2023-01-22 16:46:10.021690: step: 512/459, loss: 0.09200556576251984 2023-01-22 16:46:10.640545: step: 514/459, loss: 0.07794243842363358 2023-01-22 16:46:11.282343: step: 516/459, loss: 0.041633762419223785 2023-01-22 16:46:11.973989: step: 518/459, loss: 0.035412706434726715 2023-01-22 16:46:12.582575: step: 520/459, loss: 0.012071368284523487 2023-01-22 16:46:13.167801: step: 522/459, loss: 0.04200519993901253 2023-01-22 16:46:13.795541: step: 524/459, loss: 0.19792501628398895 2023-01-22 16:46:14.463066: step: 526/459, loss: 0.0104666193947196 2023-01-22 16:46:15.097264: step: 528/459, loss: 0.13894087076187134 2023-01-22 16:46:15.718614: step: 530/459, loss: 0.06512139737606049 2023-01-22 16:46:16.340277: step: 532/459, loss: 0.028412630781531334 2023-01-22 16:46:16.963432: step: 534/459, loss: 0.10816632211208344 2023-01-22 16:46:17.596948: step: 536/459, loss: 0.05129527673125267 2023-01-22 16:46:18.233218: step: 538/459, loss: 0.15077972412109375 2023-01-22 16:46:18.827453: step: 540/459, loss: 0.09759873151779175 2023-01-22 16:46:19.458996: step: 542/459, loss: 0.07060377299785614 2023-01-22 16:46:20.123548: step: 544/459, loss: 0.0371258482336998 2023-01-22 16:46:20.720495: step: 546/459, loss: 0.05552984029054642 2023-01-22 16:46:21.345601: step: 548/459, loss: 0.016676006838679314 2023-01-22 16:46:21.950446: step: 550/459, loss: 0.018143562600016594 2023-01-22 16:46:22.593825: step: 552/459, loss: 0.02429545857012272 2023-01-22 16:46:23.171804: step: 554/459, loss: 0.0958019495010376 2023-01-22 16:46:23.780021: step: 556/459, loss: 0.023054128512740135 2023-01-22 16:46:24.341159: step: 558/459, loss: 0.1250004768371582 2023-01-22 16:46:24.932151: step: 560/459, loss: 0.04949987307190895 2023-01-22 16:46:25.580426: step: 562/459, loss: 0.1532232165336609 2023-01-22 16:46:26.159049: step: 564/459, loss: 0.06458042562007904 2023-01-22 16:46:26.743168: step: 566/459, loss: 0.043190374970436096 2023-01-22 16:46:27.405192: step: 568/459, loss: 0.3716271221637726 2023-01-22 16:46:28.066934: step: 570/459, loss: 0.028064679354429245 2023-01-22 16:46:28.703883: step: 572/459, loss: 0.11789781600236893 2023-01-22 16:46:29.331424: step: 574/459, loss: 0.10502124577760696 2023-01-22 16:46:29.961813: step: 576/459, loss: 0.281949907541275 2023-01-22 16:46:30.554039: step: 578/459, loss: 0.03356491029262543 2023-01-22 16:46:31.185846: step: 580/459, loss: 0.28127798438072205 2023-01-22 16:46:31.766292: step: 582/459, loss: 0.5430603623390198 2023-01-22 16:46:32.431697: step: 584/459, loss: 0.08766601234674454 2023-01-22 16:46:33.138983: step: 586/459, loss: 0.0947529748082161 2023-01-22 16:46:33.778442: step: 588/459, loss: 0.3701959550380707 2023-01-22 16:46:34.447872: step: 590/459, loss: 0.15840740501880646 2023-01-22 16:46:35.018101: step: 592/459, loss: 0.04917715862393379 2023-01-22 16:46:35.683747: step: 594/459, loss: 0.10379461944103241 2023-01-22 16:46:36.262309: step: 596/459, loss: 0.04355023801326752 2023-01-22 16:46:36.843689: step: 598/459, loss: 0.09489044547080994 2023-01-22 16:46:37.445220: step: 600/459, loss: 0.024723561480641365 2023-01-22 16:46:38.057898: step: 602/459, loss: 0.04896465688943863 2023-01-22 16:46:38.742521: step: 604/459, loss: 0.051299914717674255 2023-01-22 16:46:39.480768: step: 606/459, loss: 0.045137375593185425 2023-01-22 16:46:40.153335: step: 608/459, loss: 0.1259990930557251 2023-01-22 16:46:40.799035: step: 610/459, loss: 0.024859720841050148 2023-01-22 16:46:41.497043: step: 612/459, loss: 0.01965874619781971 2023-01-22 16:46:42.154205: step: 614/459, loss: 0.14710260927677155 2023-01-22 16:46:42.750982: step: 616/459, loss: 0.04034070670604706 2023-01-22 16:46:43.446628: step: 618/459, loss: 0.07177021354436874 2023-01-22 16:46:44.070959: step: 620/459, loss: 0.07349206507205963 2023-01-22 16:46:44.675295: step: 622/459, loss: 0.08984249830245972 2023-01-22 16:46:45.252921: step: 624/459, loss: 0.26151394844055176 2023-01-22 16:46:45.809582: step: 626/459, loss: 0.0995243638753891 2023-01-22 16:46:46.398580: step: 628/459, loss: 0.24234791100025177 2023-01-22 16:46:47.037374: step: 630/459, loss: 0.03228859603404999 2023-01-22 16:46:47.725128: step: 632/459, loss: 0.06063751131296158 2023-01-22 16:46:48.430082: step: 634/459, loss: 2.309478282928467 2023-01-22 16:46:49.109294: step: 636/459, loss: 0.01860075816512108 2023-01-22 16:46:49.722980: step: 638/459, loss: 0.027825728058815002 2023-01-22 16:46:50.379474: step: 640/459, loss: 0.027265062555670738 2023-01-22 16:46:50.960598: step: 642/459, loss: 0.24369151890277863 2023-01-22 16:46:51.574857: step: 644/459, loss: 0.04232015460729599 2023-01-22 16:46:52.249721: step: 646/459, loss: 0.1448332965373993 2023-01-22 16:46:52.893367: step: 648/459, loss: 0.028683023527264595 2023-01-22 16:46:53.535423: step: 650/459, loss: 0.07188218086957932 2023-01-22 16:46:54.124449: step: 652/459, loss: 0.125137597322464 2023-01-22 16:46:54.807368: step: 654/459, loss: 0.07380376756191254 2023-01-22 16:46:55.389017: step: 656/459, loss: 0.17901377379894257 2023-01-22 16:46:56.003683: step: 658/459, loss: 0.03168317675590515 2023-01-22 16:46:56.661795: step: 660/459, loss: 0.1665235310792923 2023-01-22 16:46:57.249107: step: 662/459, loss: 0.6788172125816345 2023-01-22 16:46:57.843520: step: 664/459, loss: 0.11272546648979187 2023-01-22 16:46:58.516045: step: 666/459, loss: 0.06425986438989639 2023-01-22 16:46:59.112020: step: 668/459, loss: 0.15545295178890228 2023-01-22 16:46:59.799624: step: 670/459, loss: 0.03415561467409134 2023-01-22 16:47:00.450669: step: 672/459, loss: 0.06802842766046524 2023-01-22 16:47:00.998605: step: 674/459, loss: 0.033234499394893646 2023-01-22 16:47:01.595165: step: 676/459, loss: 0.08689046651124954 2023-01-22 16:47:02.199939: step: 678/459, loss: 0.027751941233873367 2023-01-22 16:47:02.782169: step: 680/459, loss: 0.3368563950061798 2023-01-22 16:47:03.385804: step: 682/459, loss: 0.059047289192676544 2023-01-22 16:47:03.963830: step: 684/459, loss: 0.02439240925014019 2023-01-22 16:47:04.644749: step: 686/459, loss: 0.017048165202140808 2023-01-22 16:47:05.230218: step: 688/459, loss: 0.022696755826473236 2023-01-22 16:47:05.798729: step: 690/459, loss: 0.046518679708242416 2023-01-22 16:47:06.455419: step: 692/459, loss: 0.012873743660748005 2023-01-22 16:47:07.025690: step: 694/459, loss: 0.7430551052093506 2023-01-22 16:47:07.699442: step: 696/459, loss: 0.02169869653880596 2023-01-22 16:47:08.286941: step: 698/459, loss: 0.04769236221909523 2023-01-22 16:47:08.907251: step: 700/459, loss: 0.03850403055548668 2023-01-22 16:47:09.594481: step: 702/459, loss: 0.28655141592025757 2023-01-22 16:47:10.174757: step: 704/459, loss: 0.12491967529058456 2023-01-22 16:47:10.844029: step: 706/459, loss: 0.12235256284475327 2023-01-22 16:47:11.458383: step: 708/459, loss: 0.10169564187526703 2023-01-22 16:47:12.047129: step: 710/459, loss: 0.08894705772399902 2023-01-22 16:47:12.658978: step: 712/459, loss: 0.034460585564374924 2023-01-22 16:47:13.271868: step: 714/459, loss: 0.08122759312391281 2023-01-22 16:47:13.847519: step: 716/459, loss: 0.051886655390262604 2023-01-22 16:47:14.525301: step: 718/459, loss: 0.0768001601099968 2023-01-22 16:47:15.130731: step: 720/459, loss: 0.1707136034965515 2023-01-22 16:47:15.815348: step: 722/459, loss: 0.04592875763773918 2023-01-22 16:47:16.438771: step: 724/459, loss: 0.05703800916671753 2023-01-22 16:47:16.987383: step: 726/459, loss: 0.3364502489566803 2023-01-22 16:47:17.653024: step: 728/459, loss: 0.13422317802906036 2023-01-22 16:47:18.263099: step: 730/459, loss: 0.08920254558324814 2023-01-22 16:47:18.851153: step: 732/459, loss: 0.0661138966679573 2023-01-22 16:47:19.514910: step: 734/459, loss: 0.0817049965262413 2023-01-22 16:47:20.214110: step: 736/459, loss: 0.026414304971694946 2023-01-22 16:47:20.858991: step: 738/459, loss: 0.10205746442079544 2023-01-22 16:47:21.491577: step: 740/459, loss: 0.10639548301696777 2023-01-22 16:47:22.152615: step: 742/459, loss: 0.09692167490720749 2023-01-22 16:47:22.764779: step: 744/459, loss: 0.10824550688266754 2023-01-22 16:47:23.321320: step: 746/459, loss: 0.0658331960439682 2023-01-22 16:47:23.920739: step: 748/459, loss: 0.14704306423664093 2023-01-22 16:47:24.525596: step: 750/459, loss: 0.16685032844543457 2023-01-22 16:47:25.165998: step: 752/459, loss: 0.07633430510759354 2023-01-22 16:47:25.764020: step: 754/459, loss: 0.019763967022299767 2023-01-22 16:47:26.420957: step: 756/459, loss: 0.09872082620859146 2023-01-22 16:47:27.024919: step: 758/459, loss: 0.09111758321523666 2023-01-22 16:47:27.664130: step: 760/459, loss: 0.03536752983927727 2023-01-22 16:47:28.269847: step: 762/459, loss: 0.17472481727600098 2023-01-22 16:47:28.922664: step: 764/459, loss: 0.019400186836719513 2023-01-22 16:47:29.523499: step: 766/459, loss: 0.03572757542133331 2023-01-22 16:47:30.060167: step: 768/459, loss: 0.041711367666721344 2023-01-22 16:47:30.706863: step: 770/459, loss: 0.07316067069768906 2023-01-22 16:47:31.330098: step: 772/459, loss: 0.1271355301141739 2023-01-22 16:47:31.936430: step: 774/459, loss: 0.09620354324579239 2023-01-22 16:47:32.544397: step: 776/459, loss: 0.2531862258911133 2023-01-22 16:47:33.243136: step: 778/459, loss: 0.06546211242675781 2023-01-22 16:47:33.881100: step: 780/459, loss: 0.07942058891057968 2023-01-22 16:47:34.566577: step: 782/459, loss: 0.05079041048884392 2023-01-22 16:47:35.172949: step: 784/459, loss: 0.018056929111480713 2023-01-22 16:47:35.787423: step: 786/459, loss: 0.08114388585090637 2023-01-22 16:47:36.410172: step: 788/459, loss: 0.03932151943445206 2023-01-22 16:47:37.006479: step: 790/459, loss: 0.03185522183775902 2023-01-22 16:47:37.555167: step: 792/459, loss: 1.5641160011291504 2023-01-22 16:47:38.156909: step: 794/459, loss: 0.044021427631378174 2023-01-22 16:47:38.755219: step: 796/459, loss: 0.03443581238389015 2023-01-22 16:47:39.382426: step: 798/459, loss: 0.03595861792564392 2023-01-22 16:47:39.975631: step: 800/459, loss: 0.04957573115825653 2023-01-22 16:47:40.620418: step: 802/459, loss: 0.04862571880221367 2023-01-22 16:47:41.300955: step: 804/459, loss: 0.010294655337929726 2023-01-22 16:47:41.870499: step: 806/459, loss: 0.033893465995788574 2023-01-22 16:47:42.555885: step: 808/459, loss: 0.026866471394896507 2023-01-22 16:47:43.217935: step: 810/459, loss: 0.10005602985620499 2023-01-22 16:47:43.812616: step: 812/459, loss: 0.08109891414642334 2023-01-22 16:47:44.489440: step: 814/459, loss: 0.1149824932217598 2023-01-22 16:47:45.092983: step: 816/459, loss: 0.012341326102614403 2023-01-22 16:47:45.764257: step: 818/459, loss: 0.11000432819128036 2023-01-22 16:47:46.419703: step: 820/459, loss: 1.4965062141418457 2023-01-22 16:47:47.065601: step: 822/459, loss: 0.02378295734524727 2023-01-22 16:47:47.721597: step: 824/459, loss: 0.5111196041107178 2023-01-22 16:47:48.359323: step: 826/459, loss: 0.14078257977962494 2023-01-22 16:47:48.995211: step: 828/459, loss: 0.065251424908638 2023-01-22 16:47:49.618727: step: 830/459, loss: 0.17842689156532288 2023-01-22 16:47:50.228814: step: 832/459, loss: 0.04866514354944229 2023-01-22 16:47:50.859678: step: 834/459, loss: 0.1224013939499855 2023-01-22 16:47:51.519135: step: 836/459, loss: 0.04646917060017586 2023-01-22 16:47:52.158609: step: 838/459, loss: 0.10612420737743378 2023-01-22 16:47:52.778448: step: 840/459, loss: 0.03267592936754227 2023-01-22 16:47:53.410801: step: 842/459, loss: 0.0650893971323967 2023-01-22 16:47:54.098476: step: 844/459, loss: 0.38908129930496216 2023-01-22 16:47:54.711511: step: 846/459, loss: 0.044997069984674454 2023-01-22 16:47:55.237403: step: 848/459, loss: 0.043273016810417175 2023-01-22 16:47:55.842540: step: 850/459, loss: 0.07747216522693634 2023-01-22 16:47:56.479053: step: 852/459, loss: 0.21269670128822327 2023-01-22 16:47:57.066506: step: 854/459, loss: 0.035039570182561874 2023-01-22 16:47:57.684949: step: 856/459, loss: 0.03290571644902229 2023-01-22 16:47:58.297442: step: 858/459, loss: 0.07134772837162018 2023-01-22 16:47:58.877453: step: 860/459, loss: 0.013203918002545834 2023-01-22 16:47:59.482349: step: 862/459, loss: 0.17637185752391815 2023-01-22 16:48:00.103615: step: 864/459, loss: 0.01316221896559 2023-01-22 16:48:00.718896: step: 866/459, loss: 0.08788970112800598 2023-01-22 16:48:01.344430: step: 868/459, loss: 0.13441972434520721 2023-01-22 16:48:01.911068: step: 870/459, loss: 0.023385057225823402 2023-01-22 16:48:02.535763: step: 872/459, loss: 0.10251281410455704 2023-01-22 16:48:03.107747: step: 874/459, loss: 0.03708559647202492 2023-01-22 16:48:03.751742: step: 876/459, loss: 0.19971586763858795 2023-01-22 16:48:04.377720: step: 878/459, loss: 0.10291758179664612 2023-01-22 16:48:04.981375: step: 880/459, loss: 0.07359448075294495 2023-01-22 16:48:05.606985: step: 882/459, loss: 0.4843934178352356 2023-01-22 16:48:06.249645: step: 884/459, loss: 0.057063519954681396 2023-01-22 16:48:06.855399: step: 886/459, loss: 0.542678713798523 2023-01-22 16:48:07.442593: step: 888/459, loss: 3.318286180496216 2023-01-22 16:48:08.065412: step: 890/459, loss: 0.09397625923156738 2023-01-22 16:48:08.723763: step: 892/459, loss: 0.05803747475147247 2023-01-22 16:48:09.420598: step: 894/459, loss: 0.10569605231285095 2023-01-22 16:48:10.110355: step: 896/459, loss: 0.04361460730433464 2023-01-22 16:48:10.749262: step: 898/459, loss: 0.018322976306080818 2023-01-22 16:48:11.347624: step: 900/459, loss: 0.03334629163146019 2023-01-22 16:48:11.999873: step: 902/459, loss: 0.057478874921798706 2023-01-22 16:48:12.633426: step: 904/459, loss: 0.030509721487760544 2023-01-22 16:48:13.285042: step: 906/459, loss: 0.022177288308739662 2023-01-22 16:48:13.975159: step: 908/459, loss: 0.09350035339593887 2023-01-22 16:48:14.599095: step: 910/459, loss: 0.16668815910816193 2023-01-22 16:48:15.183682: step: 912/459, loss: 0.15866781771183014 2023-01-22 16:48:15.815262: step: 914/459, loss: 0.21004772186279297 2023-01-22 16:48:16.387774: step: 916/459, loss: 0.1424548476934433 2023-01-22 16:48:16.975702: step: 918/459, loss: 0.19201156497001648 2023-01-22 16:48:17.390813: step: 920/459, loss: 0.006582234520465136 ================================================== Loss: 0.138 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165846273291925, 'r': 0.3147858510375494, 'f1': 0.3156826768239811}, 'combined': 0.23260828818609133, 'epoch': 18} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33618176522498033, 'r': 0.28422640150839246, 'f1': 0.3080286124721495}, 'combined': 0.19713831198217566, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30828354779411765, 'r': 0.3176254734848485, 'f1': 0.31288479477611936}, 'combined': 0.23054669088766688, 'epoch': 18} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33685396263931755, 'r': 0.2805074816160135, 'f1': 0.30610935493811003}, 'combined': 0.1959099871603904, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32350178500825644, 'r': 0.3136800989358995, 'f1': 0.31851524496959355}, 'combined': 0.23469544366180575, 'epoch': 18} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3496871102034305, 'r': 0.2882211609600224, 'f1': 0.3159928454659057}, 'combined': 0.22656090806989468, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25157232704402516, 'r': 0.38095238095238093, 'f1': 0.30303030303030304}, 'combined': 0.20202020202020202, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2642857142857143, 'r': 0.40217391304347827, 'f1': 0.31896551724137934}, 'combined': 0.15948275862068967, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27941176470588236, 'r': 0.16379310344827586, 'f1': 0.20652173913043476}, 'combined': 0.13768115942028983, 'epoch': 18} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165846273291925, 'r': 0.3147858510375494, 'f1': 0.3156826768239811}, 'combined': 0.23260828818609133, 'epoch': 18} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33618176522498033, 'r': 0.28422640150839246, 'f1': 0.3080286124721495}, 'combined': 0.19713831198217566, 'epoch': 18} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25157232704402516, 'r': 0.38095238095238093, 'f1': 0.30303030303030304}, 'combined': 0.20202020202020202, 'epoch': 18} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 16:51:02.761103: step: 2/459, loss: 0.07806821167469025 2023-01-22 16:51:03.424041: step: 4/459, loss: 0.053808391094207764 2023-01-22 16:51:03.993446: step: 6/459, loss: 0.014116642065346241 2023-01-22 16:51:04.616268: step: 8/459, loss: 0.0855446308851242 2023-01-22 16:51:05.214683: step: 10/459, loss: 0.04946175962686539 2023-01-22 16:51:05.831219: step: 12/459, loss: 0.010980721563100815 2023-01-22 16:51:06.433176: step: 14/459, loss: 0.03135364502668381 2023-01-22 16:51:07.039061: step: 16/459, loss: 0.45043420791625977 2023-01-22 16:51:07.655798: step: 18/459, loss: 0.039380915462970734 2023-01-22 16:51:08.263566: step: 20/459, loss: 8.12999439239502 2023-01-22 16:51:08.928665: step: 22/459, loss: 0.018217500299215317 2023-01-22 16:51:09.555251: step: 24/459, loss: 0.07790228724479675 2023-01-22 16:51:10.219282: step: 26/459, loss: 0.09395729005336761 2023-01-22 16:51:10.864872: step: 28/459, loss: 0.0638892725110054 2023-01-22 16:51:11.453257: step: 30/459, loss: 0.039185650646686554 2023-01-22 16:51:12.052970: step: 32/459, loss: 0.036317408084869385 2023-01-22 16:51:12.684589: step: 34/459, loss: 0.08605368435382843 2023-01-22 16:51:13.327483: step: 36/459, loss: 0.18352341651916504 2023-01-22 16:51:13.924077: step: 38/459, loss: 0.03434718772768974 2023-01-22 16:51:14.562364: step: 40/459, loss: 0.13509708642959595 2023-01-22 16:51:15.102646: step: 42/459, loss: 0.057412005960941315 2023-01-22 16:51:15.717644: step: 44/459, loss: 0.02480188198387623 2023-01-22 16:51:16.317835: step: 46/459, loss: 0.04770028963685036 2023-01-22 16:51:16.927320: step: 48/459, loss: 0.08100080490112305 2023-01-22 16:51:17.617196: step: 50/459, loss: 0.02407694049179554 2023-01-22 16:51:18.221747: step: 52/459, loss: 0.032368242740631104 2023-01-22 16:51:18.842561: step: 54/459, loss: 0.2909230589866638 2023-01-22 16:51:19.509616: step: 56/459, loss: 0.08083527535200119 2023-01-22 16:51:20.064271: step: 58/459, loss: 0.016034672036767006 2023-01-22 16:51:20.730077: step: 60/459, loss: 0.07307702302932739 2023-01-22 16:51:21.342977: step: 62/459, loss: 0.1115296483039856 2023-01-22 16:51:22.023676: step: 64/459, loss: 0.0266606193035841 2023-01-22 16:51:22.582380: step: 66/459, loss: 0.14659567177295685 2023-01-22 16:51:23.169249: step: 68/459, loss: 12.221670150756836 2023-01-22 16:51:23.798707: step: 70/459, loss: 0.08807044476270676 2023-01-22 16:51:24.421491: step: 72/459, loss: 0.084469735622406 2023-01-22 16:51:24.992594: step: 74/459, loss: 0.2431037575006485 2023-01-22 16:51:25.552939: step: 76/459, loss: 0.03473161160945892 2023-01-22 16:51:26.230021: step: 78/459, loss: 0.03142084181308746 2023-01-22 16:51:26.805438: step: 80/459, loss: 0.03403417766094208 2023-01-22 16:51:27.429272: step: 82/459, loss: 0.027035586535930634 2023-01-22 16:51:28.031626: step: 84/459, loss: 0.09563162177801132 2023-01-22 16:51:28.628554: step: 86/459, loss: 0.13520899415016174 2023-01-22 16:51:29.236876: step: 88/459, loss: 0.03907875344157219 2023-01-22 16:51:29.850179: step: 90/459, loss: 0.06430704891681671 2023-01-22 16:51:30.482052: step: 92/459, loss: 0.023926202207803726 2023-01-22 16:51:31.102285: step: 94/459, loss: 0.05604263022542 2023-01-22 16:51:31.627693: step: 96/459, loss: 0.013267558999359608 2023-01-22 16:51:32.286170: step: 98/459, loss: 0.1183263286948204 2023-01-22 16:51:32.890487: step: 100/459, loss: 0.07702021300792694 2023-01-22 16:51:33.498055: step: 102/459, loss: 0.046797432005405426 2023-01-22 16:51:34.199865: step: 104/459, loss: 0.08650176227092743 2023-01-22 16:51:34.825786: step: 106/459, loss: 0.19926568865776062 2023-01-22 16:51:35.412070: step: 108/459, loss: 0.14588779211044312 2023-01-22 16:51:36.006199: step: 110/459, loss: 0.09315995126962662 2023-01-22 16:51:36.629050: step: 112/459, loss: 0.07535996288061142 2023-01-22 16:51:37.199386: step: 114/459, loss: 0.011820236220955849 2023-01-22 16:51:37.817221: step: 116/459, loss: 0.003143953625112772 2023-01-22 16:51:38.390218: step: 118/459, loss: 0.010869319550693035 2023-01-22 16:51:39.038521: step: 120/459, loss: 0.9215289950370789 2023-01-22 16:51:39.699542: step: 122/459, loss: 0.04972256347537041 2023-01-22 16:51:40.260171: step: 124/459, loss: 0.028293967247009277 2023-01-22 16:51:40.812746: step: 126/459, loss: 0.07127924263477325 2023-01-22 16:51:41.422203: step: 128/459, loss: 0.06868112832307816 2023-01-22 16:51:42.117542: step: 130/459, loss: 0.04040024057030678 2023-01-22 16:51:42.788144: step: 132/459, loss: 0.012609817087650299 2023-01-22 16:51:43.493941: step: 134/459, loss: 0.02693508192896843 2023-01-22 16:51:44.061018: step: 136/459, loss: 0.02203119546175003 2023-01-22 16:51:44.659225: step: 138/459, loss: 0.060060106217861176 2023-01-22 16:51:45.251399: step: 140/459, loss: 0.1412290632724762 2023-01-22 16:51:45.883374: step: 142/459, loss: 2.0180165767669678 2023-01-22 16:51:46.514508: step: 144/459, loss: 0.040186282247304916 2023-01-22 16:51:47.139889: step: 146/459, loss: 0.12633951008319855 2023-01-22 16:51:47.860659: step: 148/459, loss: 0.21621982753276825 2023-01-22 16:51:48.564584: step: 150/459, loss: 0.14103247225284576 2023-01-22 16:51:49.148689: step: 152/459, loss: 0.062136195600032806 2023-01-22 16:51:49.770522: step: 154/459, loss: 0.06119069457054138 2023-01-22 16:51:50.433721: step: 156/459, loss: 0.05711417272686958 2023-01-22 16:51:50.993922: step: 158/459, loss: 0.028628455474972725 2023-01-22 16:51:51.596583: step: 160/459, loss: 0.060944754630327225 2023-01-22 16:51:52.211288: step: 162/459, loss: 0.03155810013413429 2023-01-22 16:51:52.863714: step: 164/459, loss: 0.051336437463760376 2023-01-22 16:51:53.459936: step: 166/459, loss: 0.10220381617546082 2023-01-22 16:51:54.102778: step: 168/459, loss: 0.06795176863670349 2023-01-22 16:51:54.760572: step: 170/459, loss: 0.013007434085011482 2023-01-22 16:51:55.349626: step: 172/459, loss: 0.04409888759255409 2023-01-22 16:51:55.969508: step: 174/459, loss: 0.06301357597112656 2023-01-22 16:51:56.580901: step: 176/459, loss: 0.022369369864463806 2023-01-22 16:51:57.217365: step: 178/459, loss: 0.1629677265882492 2023-01-22 16:51:57.834994: step: 180/459, loss: 0.06051493436098099 2023-01-22 16:51:58.400650: step: 182/459, loss: 0.03519703075289726 2023-01-22 16:51:59.074135: step: 184/459, loss: 0.018172921612858772 2023-01-22 16:51:59.731546: step: 186/459, loss: 0.026252491399645805 2023-01-22 16:52:00.338371: step: 188/459, loss: 0.057286765426397324 2023-01-22 16:52:00.996581: step: 190/459, loss: 0.06030688062310219 2023-01-22 16:52:01.650914: step: 192/459, loss: 0.14837555587291718 2023-01-22 16:52:02.285898: step: 194/459, loss: 0.004948818124830723 2023-01-22 16:52:02.875685: step: 196/459, loss: 0.09740400314331055 2023-01-22 16:52:03.575297: step: 198/459, loss: 0.04365503787994385 2023-01-22 16:52:04.191493: step: 200/459, loss: 0.603210985660553 2023-01-22 16:52:04.846072: step: 202/459, loss: 0.04550163075327873 2023-01-22 16:52:05.436998: step: 204/459, loss: 0.3096429109573364 2023-01-22 16:52:05.987204: step: 206/459, loss: 0.05189787223935127 2023-01-22 16:52:06.660670: step: 208/459, loss: 0.060935258865356445 2023-01-22 16:52:07.306855: step: 210/459, loss: 0.01918981224298477 2023-01-22 16:52:07.939214: step: 212/459, loss: 0.2578226923942566 2023-01-22 16:52:08.552095: step: 214/459, loss: 0.02953050285577774 2023-01-22 16:52:09.129780: step: 216/459, loss: 0.04894363135099411 2023-01-22 16:52:09.703560: step: 218/459, loss: 0.007692015264183283 2023-01-22 16:52:10.271374: step: 220/459, loss: 0.3816676139831543 2023-01-22 16:52:10.901402: step: 222/459, loss: 0.014287366531789303 2023-01-22 16:52:11.615954: step: 224/459, loss: 0.2504187226295471 2023-01-22 16:52:12.286085: step: 226/459, loss: 0.3352997899055481 2023-01-22 16:52:13.021801: step: 228/459, loss: 0.020457018166780472 2023-01-22 16:52:13.659498: step: 230/459, loss: 0.2449796050786972 2023-01-22 16:52:14.248491: step: 232/459, loss: 1.5838464498519897 2023-01-22 16:52:14.872493: step: 234/459, loss: 0.01823676936328411 2023-01-22 16:52:15.547611: step: 236/459, loss: 0.039712708443403244 2023-01-22 16:52:16.152619: step: 238/459, loss: 0.05510875955224037 2023-01-22 16:52:16.755809: step: 240/459, loss: 0.026090653613209724 2023-01-22 16:52:17.428538: step: 242/459, loss: 0.09939420223236084 2023-01-22 16:52:18.055947: step: 244/459, loss: 0.055071160197257996 2023-01-22 16:52:18.652347: step: 246/459, loss: 0.004399648867547512 2023-01-22 16:52:19.263641: step: 248/459, loss: 0.1863432377576828 2023-01-22 16:52:19.892331: step: 250/459, loss: 0.05073116347193718 2023-01-22 16:52:20.513025: step: 252/459, loss: 0.022778330370783806 2023-01-22 16:52:21.167718: step: 254/459, loss: 0.061998363584280014 2023-01-22 16:52:21.814987: step: 256/459, loss: 0.029039667919278145 2023-01-22 16:52:22.528586: step: 258/459, loss: 0.04452308267354965 2023-01-22 16:52:23.159553: step: 260/459, loss: 0.017335329204797745 2023-01-22 16:52:23.754669: step: 262/459, loss: 0.13631916046142578 2023-01-22 16:52:24.362137: step: 264/459, loss: 0.17912694811820984 2023-01-22 16:52:25.083721: step: 266/459, loss: 0.07537680864334106 2023-01-22 16:52:25.678636: step: 268/459, loss: 0.005662201903760433 2023-01-22 16:52:26.258832: step: 270/459, loss: 0.008821776136755943 2023-01-22 16:52:26.923142: step: 272/459, loss: 0.07160379737615585 2023-01-22 16:52:27.585493: step: 274/459, loss: 1.588599681854248 2023-01-22 16:52:28.287470: step: 276/459, loss: 0.08180908113718033 2023-01-22 16:52:28.921991: step: 278/459, loss: 0.08914253860712051 2023-01-22 16:52:29.510572: step: 280/459, loss: 0.060048025101423264 2023-01-22 16:52:30.185456: step: 282/459, loss: 0.0981522649526596 2023-01-22 16:52:30.822342: step: 284/459, loss: 0.026833977550268173 2023-01-22 16:52:31.411788: step: 286/459, loss: 0.039359815418720245 2023-01-22 16:52:32.085247: step: 288/459, loss: 0.12716174125671387 2023-01-22 16:52:32.680431: step: 290/459, loss: 0.003698864718899131 2023-01-22 16:52:33.348404: step: 292/459, loss: 0.11845415085554123 2023-01-22 16:52:33.949060: step: 294/459, loss: 0.03945623338222504 2023-01-22 16:52:34.531935: step: 296/459, loss: 0.050383973866701126 2023-01-22 16:52:35.139830: step: 298/459, loss: 0.6309406757354736 2023-01-22 16:52:35.802653: step: 300/459, loss: 0.013814058154821396 2023-01-22 16:52:36.421589: step: 302/459, loss: 0.1450561285018921 2023-01-22 16:52:36.995236: step: 304/459, loss: 0.2429662048816681 2023-01-22 16:52:37.607505: step: 306/459, loss: 0.09207235276699066 2023-01-22 16:52:38.256733: step: 308/459, loss: 0.8981939554214478 2023-01-22 16:52:38.836135: step: 310/459, loss: 0.057423222810029984 2023-01-22 16:52:39.455877: step: 312/459, loss: 0.022368401288986206 2023-01-22 16:52:40.066608: step: 314/459, loss: 0.1169661357998848 2023-01-22 16:52:40.716859: step: 316/459, loss: 0.10909855365753174 2023-01-22 16:52:41.348707: step: 318/459, loss: 0.05050847306847572 2023-01-22 16:52:41.922643: step: 320/459, loss: 0.0525834821164608 2023-01-22 16:52:42.590991: step: 322/459, loss: 0.0434744693338871 2023-01-22 16:52:43.147924: step: 324/459, loss: 0.06485892832279205 2023-01-22 16:52:43.768037: step: 326/459, loss: 0.040790855884552 2023-01-22 16:52:44.366515: step: 328/459, loss: 0.04444364085793495 2023-01-22 16:52:44.976004: step: 330/459, loss: 0.012510924600064754 2023-01-22 16:52:45.565782: step: 332/459, loss: 0.01673901081085205 2023-01-22 16:52:46.175641: step: 334/459, loss: 0.43007054924964905 2023-01-22 16:52:46.815490: step: 336/459, loss: 0.12067227065563202 2023-01-22 16:52:47.457692: step: 338/459, loss: 0.022134406492114067 2023-01-22 16:52:48.115198: step: 340/459, loss: 0.05333522707223892 2023-01-22 16:52:48.791962: step: 342/459, loss: 0.5661528706550598 2023-01-22 16:52:49.459109: step: 344/459, loss: 0.006246526725590229 2023-01-22 16:52:50.007941: step: 346/459, loss: 0.006064717657864094 2023-01-22 16:52:50.655237: step: 348/459, loss: 0.0204228013753891 2023-01-22 16:52:51.333927: step: 350/459, loss: 0.04484117403626442 2023-01-22 16:52:51.912034: step: 352/459, loss: 0.2411363124847412 2023-01-22 16:52:52.543212: step: 354/459, loss: 0.12288696318864822 2023-01-22 16:52:53.159480: step: 356/459, loss: 0.07312557846307755 2023-01-22 16:52:53.794941: step: 358/459, loss: 0.07278715819120407 2023-01-22 16:52:54.407850: step: 360/459, loss: 0.05853430926799774 2023-01-22 16:52:55.045377: step: 362/459, loss: 0.05001004412770271 2023-01-22 16:52:55.600173: step: 364/459, loss: 0.03961656615138054 2023-01-22 16:52:56.262008: step: 366/459, loss: 0.07923755794763565 2023-01-22 16:52:56.916382: step: 368/459, loss: 0.1267145276069641 2023-01-22 16:52:57.537299: step: 370/459, loss: 0.03684331849217415 2023-01-22 16:52:58.161533: step: 372/459, loss: 0.024448027834296227 2023-01-22 16:52:58.861348: step: 374/459, loss: 0.3192734718322754 2023-01-22 16:52:59.483336: step: 376/459, loss: 0.06728874146938324 2023-01-22 16:53:00.115681: step: 378/459, loss: 0.05607064813375473 2023-01-22 16:53:00.797966: step: 380/459, loss: 0.04306817054748535 2023-01-22 16:53:01.356937: step: 382/459, loss: 0.0016482890350744128 2023-01-22 16:53:01.951994: step: 384/459, loss: 0.027984051033854485 2023-01-22 16:53:02.559916: step: 386/459, loss: 0.050297193229198456 2023-01-22 16:53:03.194629: step: 388/459, loss: 0.20440587401390076 2023-01-22 16:53:03.778951: step: 390/459, loss: 0.0455477237701416 2023-01-22 16:53:04.471735: step: 392/459, loss: 0.15706327557563782 2023-01-22 16:53:05.165695: step: 394/459, loss: 0.03830566629767418 2023-01-22 16:53:05.869071: step: 396/459, loss: 0.1135689914226532 2023-01-22 16:53:06.609534: step: 398/459, loss: 0.019075067713856697 2023-01-22 16:53:07.207578: step: 400/459, loss: 0.009041273035109043 2023-01-22 16:53:07.805225: step: 402/459, loss: 0.015650663524866104 2023-01-22 16:53:08.389108: step: 404/459, loss: 0.104852095246315 2023-01-22 16:53:09.014452: step: 406/459, loss: 0.019897278398275375 2023-01-22 16:53:09.563747: step: 408/459, loss: 0.03973732888698578 2023-01-22 16:53:10.184177: step: 410/459, loss: 0.06493841111660004 2023-01-22 16:53:10.888350: step: 412/459, loss: 0.034014150500297546 2023-01-22 16:53:11.519683: step: 414/459, loss: 0.0732174664735794 2023-01-22 16:53:12.123279: step: 416/459, loss: 0.0456249974668026 2023-01-22 16:53:12.772687: step: 418/459, loss: 0.04507865756750107 2023-01-22 16:53:13.500851: step: 420/459, loss: 0.05259424448013306 2023-01-22 16:53:14.056474: step: 422/459, loss: 0.012811033055186272 2023-01-22 16:53:14.687312: step: 424/459, loss: 0.1471346914768219 2023-01-22 16:53:15.312817: step: 426/459, loss: 0.009660816751420498 2023-01-22 16:53:15.917281: step: 428/459, loss: 0.0017188603524118662 2023-01-22 16:53:16.492929: step: 430/459, loss: 0.03379829600453377 2023-01-22 16:53:17.135348: step: 432/459, loss: 0.19839248061180115 2023-01-22 16:53:17.760454: step: 434/459, loss: 0.044022440910339355 2023-01-22 16:53:18.527394: step: 436/459, loss: 0.10311343520879745 2023-01-22 16:53:19.164948: step: 438/459, loss: 0.3727658987045288 2023-01-22 16:53:19.778833: step: 440/459, loss: 0.012156271375715733 2023-01-22 16:53:20.383267: step: 442/459, loss: 0.01895398087799549 2023-01-22 16:53:20.971609: step: 444/459, loss: 0.017347892746329308 2023-01-22 16:53:21.593266: step: 446/459, loss: 0.017637142911553383 2023-01-22 16:53:22.231175: step: 448/459, loss: 0.056647032499313354 2023-01-22 16:53:22.912842: step: 450/459, loss: 0.06568159908056259 2023-01-22 16:53:23.442150: step: 452/459, loss: 0.04556339234113693 2023-01-22 16:53:24.023902: step: 454/459, loss: 0.05157574266195297 2023-01-22 16:53:24.656987: step: 456/459, loss: 0.020495835691690445 2023-01-22 16:53:25.283236: step: 458/459, loss: 0.03646979108452797 2023-01-22 16:53:25.885127: step: 460/459, loss: 0.019308630377054214 2023-01-22 16:53:26.515860: step: 462/459, loss: 0.5413766503334045 2023-01-22 16:53:27.116729: step: 464/459, loss: 0.02341311424970627 2023-01-22 16:53:27.861682: step: 466/459, loss: 0.019245373085141182 2023-01-22 16:53:28.536929: step: 468/459, loss: 0.06721381098031998 2023-01-22 16:53:29.168210: step: 470/459, loss: 0.01904725655913353 2023-01-22 16:53:29.757370: step: 472/459, loss: 0.08194570988416672 2023-01-22 16:53:30.349590: step: 474/459, loss: 0.012368959374725819 2023-01-22 16:53:30.990280: step: 476/459, loss: 0.09143030643463135 2023-01-22 16:53:31.564289: step: 478/459, loss: 0.07606247812509537 2023-01-22 16:53:32.127786: step: 480/459, loss: 0.00797201693058014 2023-01-22 16:53:32.680732: step: 482/459, loss: 1.1360845565795898 2023-01-22 16:53:33.242257: step: 484/459, loss: 0.12916535139083862 2023-01-22 16:53:33.819609: step: 486/459, loss: 0.0458512157201767 2023-01-22 16:53:34.366585: step: 488/459, loss: 0.008756515569984913 2023-01-22 16:53:35.004542: step: 490/459, loss: 0.04125141724944115 2023-01-22 16:53:35.575586: step: 492/459, loss: 0.06458313018083572 2023-01-22 16:53:36.173693: step: 494/459, loss: 0.09440933167934418 2023-01-22 16:53:36.851234: step: 496/459, loss: 0.05955049768090248 2023-01-22 16:53:37.440542: step: 498/459, loss: 0.01909560337662697 2023-01-22 16:53:38.079350: step: 500/459, loss: 0.008516060188412666 2023-01-22 16:53:38.721988: step: 502/459, loss: 0.11826205998659134 2023-01-22 16:53:39.343940: step: 504/459, loss: 0.0783848762512207 2023-01-22 16:53:39.986030: step: 506/459, loss: 0.007848921231925488 2023-01-22 16:53:40.593616: step: 508/459, loss: 0.05985372141003609 2023-01-22 16:53:41.205280: step: 510/459, loss: 0.009111099876463413 2023-01-22 16:53:41.810310: step: 512/459, loss: 0.05883515253663063 2023-01-22 16:53:42.473915: step: 514/459, loss: 0.13078095018863678 2023-01-22 16:53:43.101903: step: 516/459, loss: 0.17282944917678833 2023-01-22 16:53:43.699771: step: 518/459, loss: 0.051789019256830215 2023-01-22 16:53:44.376337: step: 520/459, loss: 0.0628538727760315 2023-01-22 16:53:45.020201: step: 522/459, loss: 0.1660318672657013 2023-01-22 16:53:45.657848: step: 524/459, loss: 0.07446978241205215 2023-01-22 16:53:46.245569: step: 526/459, loss: 0.03642286732792854 2023-01-22 16:53:46.937979: step: 528/459, loss: 0.0200937632471323 2023-01-22 16:53:47.474389: step: 530/459, loss: 0.017854711040854454 2023-01-22 16:53:48.111545: step: 532/459, loss: 0.06038819998502731 2023-01-22 16:53:48.808675: step: 534/459, loss: 0.07085015624761581 2023-01-22 16:53:49.461826: step: 536/459, loss: 0.040731094777584076 2023-01-22 16:53:50.048095: step: 538/459, loss: 0.07887811958789825 2023-01-22 16:53:50.685995: step: 540/459, loss: 0.021012835204601288 2023-01-22 16:53:51.296386: step: 542/459, loss: 0.06567184627056122 2023-01-22 16:53:51.883299: step: 544/459, loss: 0.3213575780391693 2023-01-22 16:53:52.559494: step: 546/459, loss: 0.06888820976018906 2023-01-22 16:53:53.169268: step: 548/459, loss: 0.044706813991069794 2023-01-22 16:53:53.807447: step: 550/459, loss: 0.11985288560390472 2023-01-22 16:53:54.446350: step: 552/459, loss: 0.07485827803611755 2023-01-22 16:53:55.049149: step: 554/459, loss: 0.05197512358427048 2023-01-22 16:53:55.611515: step: 556/459, loss: 0.02553795464336872 2023-01-22 16:53:56.266675: step: 558/459, loss: 0.0784917101264 2023-01-22 16:53:56.891081: step: 560/459, loss: 0.04178362712264061 2023-01-22 16:53:57.603798: step: 562/459, loss: 0.0450725294649601 2023-01-22 16:53:58.237412: step: 564/459, loss: 0.4276464283466339 2023-01-22 16:53:58.811104: step: 566/459, loss: 0.08126475661993027 2023-01-22 16:53:59.480136: step: 568/459, loss: 0.019030537456274033 2023-01-22 16:54:00.136160: step: 570/459, loss: 0.04636233672499657 2023-01-22 16:54:00.722333: step: 572/459, loss: 0.17811526358127594 2023-01-22 16:54:01.350561: step: 574/459, loss: 0.025734882801771164 2023-01-22 16:54:01.951182: step: 576/459, loss: 0.2807765007019043 2023-01-22 16:54:02.586618: step: 578/459, loss: 0.04264673590660095 2023-01-22 16:54:03.184806: step: 580/459, loss: 0.030184024944901466 2023-01-22 16:54:03.824541: step: 582/459, loss: 0.047185249626636505 2023-01-22 16:54:04.446900: step: 584/459, loss: 0.16512101888656616 2023-01-22 16:54:05.045858: step: 586/459, loss: 0.023160602897405624 2023-01-22 16:54:05.707319: step: 588/459, loss: 0.06149701774120331 2023-01-22 16:54:06.332737: step: 590/459, loss: 0.07265780866146088 2023-01-22 16:54:06.905251: step: 592/459, loss: 0.061340656131505966 2023-01-22 16:54:07.565701: step: 594/459, loss: 0.4114360809326172 2023-01-22 16:54:08.160679: step: 596/459, loss: 0.053624603897333145 2023-01-22 16:54:08.744865: step: 598/459, loss: 0.05442585423588753 2023-01-22 16:54:09.424688: step: 600/459, loss: 0.29500940442085266 2023-01-22 16:54:10.013212: step: 602/459, loss: 0.02621873840689659 2023-01-22 16:54:10.660188: step: 604/459, loss: 0.040714509785175323 2023-01-22 16:54:11.254854: step: 606/459, loss: 0.032849233597517014 2023-01-22 16:54:11.924941: step: 608/459, loss: 0.013506417162716389 2023-01-22 16:54:12.566167: step: 610/459, loss: 0.0652952715754509 2023-01-22 16:54:13.142498: step: 612/459, loss: 0.0055878846906125546 2023-01-22 16:54:13.772159: step: 614/459, loss: 0.10628878325223923 2023-01-22 16:54:14.387065: step: 616/459, loss: 0.06248045340180397 2023-01-22 16:54:14.967702: step: 618/459, loss: 0.02595207467675209 2023-01-22 16:54:15.598374: step: 620/459, loss: 0.023633552715182304 2023-01-22 16:54:16.208661: step: 622/459, loss: 0.03886827081441879 2023-01-22 16:54:16.836876: step: 624/459, loss: 0.09139174222946167 2023-01-22 16:54:17.433391: step: 626/459, loss: 0.00951039232313633 2023-01-22 16:54:17.988506: step: 628/459, loss: 0.0784825012087822 2023-01-22 16:54:18.675869: step: 630/459, loss: 0.08720705658197403 2023-01-22 16:54:19.300620: step: 632/459, loss: 0.029913684353232384 2023-01-22 16:54:19.963167: step: 634/459, loss: 0.04193555936217308 2023-01-22 16:54:20.508859: step: 636/459, loss: 0.0451163649559021 2023-01-22 16:54:21.170509: step: 638/459, loss: 0.08224747329950333 2023-01-22 16:54:21.775643: step: 640/459, loss: 0.012714694254100323 2023-01-22 16:54:22.372554: step: 642/459, loss: 0.018822479993104935 2023-01-22 16:54:22.976992: step: 644/459, loss: 0.007535866927355528 2023-01-22 16:54:23.555391: step: 646/459, loss: 0.01519091334193945 2023-01-22 16:54:24.245988: step: 648/459, loss: 0.045429669320583344 2023-01-22 16:54:24.919404: step: 650/459, loss: 0.04523537680506706 2023-01-22 16:54:25.534835: step: 652/459, loss: 0.020360443741083145 2023-01-22 16:54:26.163111: step: 654/459, loss: 0.02196146920323372 2023-01-22 16:54:26.724184: step: 656/459, loss: 0.012511519715189934 2023-01-22 16:54:27.353548: step: 658/459, loss: 0.08438675105571747 2023-01-22 16:54:27.985978: step: 660/459, loss: 0.07368183135986328 2023-01-22 16:54:28.697675: step: 662/459, loss: 0.05903848260641098 2023-01-22 16:54:29.295569: step: 664/459, loss: 0.038618799299001694 2023-01-22 16:54:29.908605: step: 666/459, loss: 0.014066135510802269 2023-01-22 16:54:30.470239: step: 668/459, loss: 0.029532097280025482 2023-01-22 16:54:31.141614: step: 670/459, loss: 0.09816791117191315 2023-01-22 16:54:31.839200: step: 672/459, loss: 0.08739590644836426 2023-01-22 16:54:32.442654: step: 674/459, loss: 0.018681177869439125 2023-01-22 16:54:33.046702: step: 676/459, loss: 0.04813528060913086 2023-01-22 16:54:33.630602: step: 678/459, loss: 0.13237841427326202 2023-01-22 16:54:34.253378: step: 680/459, loss: 0.0343746580183506 2023-01-22 16:54:34.930675: step: 682/459, loss: 0.07078292965888977 2023-01-22 16:54:35.463704: step: 684/459, loss: 0.026636414229869843 2023-01-22 16:54:36.045649: step: 686/459, loss: 0.06817056238651276 2023-01-22 16:54:36.679372: step: 688/459, loss: 0.06000012904405594 2023-01-22 16:54:37.325513: step: 690/459, loss: 0.0373588465154171 2023-01-22 16:54:37.943109: step: 692/459, loss: 0.06368768960237503 2023-01-22 16:54:38.541010: step: 694/459, loss: 0.14548924565315247 2023-01-22 16:54:39.177060: step: 696/459, loss: 0.03159354254603386 2023-01-22 16:54:39.833258: step: 698/459, loss: 0.05067628249526024 2023-01-22 16:54:40.472379: step: 700/459, loss: 0.042572423815727234 2023-01-22 16:54:41.169825: step: 702/459, loss: 0.040350742638111115 2023-01-22 16:54:41.762447: step: 704/459, loss: 0.04203552380204201 2023-01-22 16:54:42.450289: step: 706/459, loss: 0.034758683294057846 2023-01-22 16:54:43.118173: step: 708/459, loss: 0.024523356929421425 2023-01-22 16:54:43.784743: step: 710/459, loss: 0.08829626441001892 2023-01-22 16:54:44.362651: step: 712/459, loss: 0.03615858033299446 2023-01-22 16:54:44.961662: step: 714/459, loss: 0.12608018517494202 2023-01-22 16:54:45.661388: step: 716/459, loss: 0.196508526802063 2023-01-22 16:54:46.324997: step: 718/459, loss: 0.04248116910457611 2023-01-22 16:54:46.945221: step: 720/459, loss: 0.0498238205909729 2023-01-22 16:54:47.537439: step: 722/459, loss: 0.0243215449154377 2023-01-22 16:54:48.165097: step: 724/459, loss: 0.30093371868133545 2023-01-22 16:54:48.788881: step: 726/459, loss: 0.022538157179951668 2023-01-22 16:54:49.415916: step: 728/459, loss: 0.11590714007616043 2023-01-22 16:54:50.092856: step: 730/459, loss: 0.8417569398880005 2023-01-22 16:54:50.712727: step: 732/459, loss: 0.10227639228105545 2023-01-22 16:54:51.273972: step: 734/459, loss: 0.028988467529416084 2023-01-22 16:54:51.924532: step: 736/459, loss: 0.16233721375465393 2023-01-22 16:54:52.536423: step: 738/459, loss: 0.22100500762462616 2023-01-22 16:54:53.206269: step: 740/459, loss: 0.009719747118651867 2023-01-22 16:54:53.846657: step: 742/459, loss: 0.014111795462667942 2023-01-22 16:54:54.450600: step: 744/459, loss: 0.1484522670507431 2023-01-22 16:54:55.083909: step: 746/459, loss: 0.5076615810394287 2023-01-22 16:54:55.721288: step: 748/459, loss: 0.05237176641821861 2023-01-22 16:54:56.319314: step: 750/459, loss: 0.1827717125415802 2023-01-22 16:54:56.938274: step: 752/459, loss: 0.057476285845041275 2023-01-22 16:54:57.595144: step: 754/459, loss: 0.037842217832803726 2023-01-22 16:54:58.309054: step: 756/459, loss: 0.026611628010869026 2023-01-22 16:54:58.945185: step: 758/459, loss: 0.09410920739173889 2023-01-22 16:54:59.531383: step: 760/459, loss: 0.16346976161003113 2023-01-22 16:55:00.107655: step: 762/459, loss: 0.029730023816227913 2023-01-22 16:55:00.751105: step: 764/459, loss: 0.07173711806535721 2023-01-22 16:55:01.422937: step: 766/459, loss: 0.13771399855613708 2023-01-22 16:55:02.028275: step: 768/459, loss: 0.026737792417407036 2023-01-22 16:55:02.649566: step: 770/459, loss: 0.01272384263575077 2023-01-22 16:55:03.262376: step: 772/459, loss: 0.14210115373134613 2023-01-22 16:55:03.896898: step: 774/459, loss: 0.0535258948802948 2023-01-22 16:55:04.515198: step: 776/459, loss: 0.030291302129626274 2023-01-22 16:55:05.105525: step: 778/459, loss: 0.8768541216850281 2023-01-22 16:55:05.739833: step: 780/459, loss: 0.046301718801259995 2023-01-22 16:55:06.338344: step: 782/459, loss: 0.05607893690466881 2023-01-22 16:55:06.944003: step: 784/459, loss: 0.09412380307912827 2023-01-22 16:55:07.503170: step: 786/459, loss: 0.01600470207631588 2023-01-22 16:55:08.103342: step: 788/459, loss: 0.046328939497470856 2023-01-22 16:55:08.680782: step: 790/459, loss: 0.0978398472070694 2023-01-22 16:55:09.282788: step: 792/459, loss: 0.088990718126297 2023-01-22 16:55:09.924681: step: 794/459, loss: 0.032136447727680206 2023-01-22 16:55:10.541527: step: 796/459, loss: 0.031694553792476654 2023-01-22 16:55:11.158507: step: 798/459, loss: 0.06013484299182892 2023-01-22 16:55:11.772920: step: 800/459, loss: 0.0360369011759758 2023-01-22 16:55:12.430085: step: 802/459, loss: 0.3821277916431427 2023-01-22 16:55:13.067170: step: 804/459, loss: 0.06983546912670135 2023-01-22 16:55:13.648656: step: 806/459, loss: 0.026751309633255005 2023-01-22 16:55:14.234268: step: 808/459, loss: 0.025875002145767212 2023-01-22 16:55:14.792067: step: 810/459, loss: 0.5054068565368652 2023-01-22 16:55:15.432373: step: 812/459, loss: 0.0664668083190918 2023-01-22 16:55:16.025767: step: 814/459, loss: 0.25825661420822144 2023-01-22 16:55:16.631364: step: 816/459, loss: 0.2768920958042145 2023-01-22 16:55:17.218650: step: 818/459, loss: 0.013669253326952457 2023-01-22 16:55:17.850353: step: 820/459, loss: 0.07091953605413437 2023-01-22 16:55:18.499972: step: 822/459, loss: 0.14212842285633087 2023-01-22 16:55:19.165642: step: 824/459, loss: 0.084327831864357 2023-01-22 16:55:19.813769: step: 826/459, loss: 0.012151743285357952 2023-01-22 16:55:20.421447: step: 828/459, loss: 0.009631875902414322 2023-01-22 16:55:21.037319: step: 830/459, loss: 0.053881097584962845 2023-01-22 16:55:21.637950: step: 832/459, loss: 0.03377237170934677 2023-01-22 16:55:22.255039: step: 834/459, loss: 0.5832617878913879 2023-01-22 16:55:22.968079: step: 836/459, loss: 0.04745929315686226 2023-01-22 16:55:23.546440: step: 838/459, loss: 0.02788577228784561 2023-01-22 16:55:24.149512: step: 840/459, loss: 0.06570357084274292 2023-01-22 16:55:24.744212: step: 842/459, loss: 0.058085888624191284 2023-01-22 16:55:25.339619: step: 844/459, loss: 0.029686298221349716 2023-01-22 16:55:26.017110: step: 846/459, loss: 0.06367189437150955 2023-01-22 16:55:26.568976: step: 848/459, loss: 0.09462186694145203 2023-01-22 16:55:27.255928: step: 850/459, loss: 0.08435549587011337 2023-01-22 16:55:27.844446: step: 852/459, loss: 0.10915666818618774 2023-01-22 16:55:28.473026: step: 854/459, loss: 0.018536638468503952 2023-01-22 16:55:29.101221: step: 856/459, loss: 0.1345360428094864 2023-01-22 16:55:29.777598: step: 858/459, loss: 0.09500298649072647 2023-01-22 16:55:30.400077: step: 860/459, loss: 0.05598185211420059 2023-01-22 16:55:31.063323: step: 862/459, loss: 0.12720869481563568 2023-01-22 16:55:31.684363: step: 864/459, loss: 0.018472939729690552 2023-01-22 16:55:32.315307: step: 866/459, loss: 0.041280169039964676 2023-01-22 16:55:32.932628: step: 868/459, loss: 0.03988105431199074 2023-01-22 16:55:33.569869: step: 870/459, loss: 0.1324407309293747 2023-01-22 16:55:34.172429: step: 872/459, loss: 0.1016283705830574 2023-01-22 16:55:34.822885: step: 874/459, loss: 0.08061406016349792 2023-01-22 16:55:35.408706: step: 876/459, loss: 0.14978595077991486 2023-01-22 16:55:36.097881: step: 878/459, loss: 0.028919395059347153 2023-01-22 16:55:36.739524: step: 880/459, loss: 0.06864556670188904 2023-01-22 16:55:37.383681: step: 882/459, loss: 0.09691867977380753 2023-01-22 16:55:37.903964: step: 884/459, loss: 0.05097085237503052 2023-01-22 16:55:38.485639: step: 886/459, loss: 0.017005465924739838 2023-01-22 16:55:39.142027: step: 888/459, loss: 0.2906736135482788 2023-01-22 16:55:39.795353: step: 890/459, loss: 0.1430058479309082 2023-01-22 16:55:40.378963: step: 892/459, loss: 0.013199142180383205 2023-01-22 16:55:41.011821: step: 894/459, loss: 0.033328209072351456 2023-01-22 16:55:41.651158: step: 896/459, loss: 0.06762955337762833 2023-01-22 16:55:42.317742: step: 898/459, loss: 0.08397633582353592 2023-01-22 16:55:42.979578: step: 900/459, loss: 0.2079596221446991 2023-01-22 16:55:43.577301: step: 902/459, loss: 0.07673574984073639 2023-01-22 16:55:44.224546: step: 904/459, loss: 0.01810038462281227 2023-01-22 16:55:44.854185: step: 906/459, loss: 0.07278335839509964 2023-01-22 16:55:45.446692: step: 908/459, loss: 0.030927183106541634 2023-01-22 16:55:46.048736: step: 910/459, loss: 0.03518429398536682 2023-01-22 16:55:46.723199: step: 912/459, loss: 0.09772902727127075 2023-01-22 16:55:47.299075: step: 914/459, loss: 0.0742645263671875 2023-01-22 16:55:47.880596: step: 916/459, loss: 0.014007151126861572 2023-01-22 16:55:48.458730: step: 918/459, loss: 0.3254907727241516 2023-01-22 16:55:48.891850: step: 920/459, loss: 0.00028322788421064615 ================================================== Loss: 0.146 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30432287753568743, 'r': 0.34936497326203203, 'f1': 0.3252921217475104}, 'combined': 0.239688931813955, 'epoch': 19} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31873378117554807, 'r': 0.3169952332782269, 'f1': 0.3178621299963989}, 'combined': 0.20343176319769526, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207681791399267, 'r': 0.34024556383153515, 'f1': 0.3302199118585986}, 'combined': 0.24331993505370422, 'epoch': 19} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3329995961329248, 'r': 0.3333031509151791, 'f1': 0.3331513043771767}, 'combined': 0.23886319936476824, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2426470588235294, 'r': 0.358695652173913, 'f1': 0.2894736842105263}, 'combined': 0.14473684210526316, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.1724137931034483, 'f1': 0.22222222222222224}, 'combined': 0.14814814814814814, 'epoch': 19} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 16:58:39.576522: step: 2/459, loss: 0.07470521330833435 2023-01-22 16:58:40.245245: step: 4/459, loss: 0.10932993143796921 2023-01-22 16:58:40.900973: step: 6/459, loss: 0.023760944604873657 2023-01-22 16:58:41.544085: step: 8/459, loss: 0.1315298229455948 2023-01-22 16:58:42.171640: step: 10/459, loss: 0.02189754694700241 2023-01-22 16:58:42.764558: step: 12/459, loss: 0.018965842202305794 2023-01-22 16:58:43.313065: step: 14/459, loss: 0.013515274971723557 2023-01-22 16:58:43.883486: step: 16/459, loss: 0.08557847142219543 2023-01-22 16:58:44.491517: step: 18/459, loss: 0.07357529550790787 2023-01-22 16:58:45.096949: step: 20/459, loss: 0.0380052849650383 2023-01-22 16:58:45.636747: step: 22/459, loss: 0.03295587748289108 2023-01-22 16:58:46.272606: step: 24/459, loss: 0.009474709630012512 2023-01-22 16:58:46.863434: step: 26/459, loss: 0.024355459958314896 2023-01-22 16:58:47.533781: step: 28/459, loss: 0.07785923033952713 2023-01-22 16:58:48.229405: step: 30/459, loss: 0.01432728860527277 2023-01-22 16:58:48.831902: step: 32/459, loss: 0.05780802667140961 2023-01-22 16:58:49.489567: step: 34/459, loss: 0.04481916129589081 2023-01-22 16:58:50.094070: step: 36/459, loss: 0.035423822700977325 2023-01-22 16:58:50.686489: step: 38/459, loss: 0.008202098309993744 2023-01-22 16:58:51.343797: step: 40/459, loss: 0.03897996246814728 2023-01-22 16:58:52.022191: step: 42/459, loss: 0.021299000829458237 2023-01-22 16:58:52.636180: step: 44/459, loss: 0.024328801780939102 2023-01-22 16:58:53.251624: step: 46/459, loss: 0.054680779576301575 2023-01-22 16:58:53.857130: step: 48/459, loss: 0.2715844511985779 2023-01-22 16:58:54.495058: step: 50/459, loss: 0.045310866087675095 2023-01-22 16:58:55.144522: step: 52/459, loss: 0.034058794379234314 2023-01-22 16:58:55.754691: step: 54/459, loss: 0.023770052939653397 2023-01-22 16:58:56.349768: step: 56/459, loss: 0.15159960091114044 2023-01-22 16:58:56.960917: step: 58/459, loss: 0.014318381436169147 2023-01-22 16:58:57.577409: step: 60/459, loss: 0.0518992617726326 2023-01-22 16:58:58.212927: step: 62/459, loss: 0.03090200200676918 2023-01-22 16:58:58.840021: step: 64/459, loss: 0.11464698612689972 2023-01-22 16:58:59.428390: step: 66/459, loss: 0.03711942210793495 2023-01-22 16:59:00.052557: step: 68/459, loss: 0.12683676183223724 2023-01-22 16:59:00.691605: step: 70/459, loss: 0.020109672099351883 2023-01-22 16:59:01.286972: step: 72/459, loss: 0.033914715051651 2023-01-22 16:59:02.007654: step: 74/459, loss: 0.029316583648324013 2023-01-22 16:59:02.645812: step: 76/459, loss: 0.030630310997366905 2023-01-22 16:59:03.341343: step: 78/459, loss: 0.07579075545072556 2023-01-22 16:59:04.015493: step: 80/459, loss: 0.18983787298202515 2023-01-22 16:59:04.645006: step: 82/459, loss: 0.010848655365407467 2023-01-22 16:59:05.276468: step: 84/459, loss: 0.0996270701289177 2023-01-22 16:59:05.915011: step: 86/459, loss: 0.06044477969408035 2023-01-22 16:59:06.513008: step: 88/459, loss: 0.2774375081062317 2023-01-22 16:59:07.161826: step: 90/459, loss: 0.023317622020840645 2023-01-22 16:59:07.762208: step: 92/459, loss: 0.03474913164973259 2023-01-22 16:59:08.385258: step: 94/459, loss: 0.05339209735393524 2023-01-22 16:59:09.004918: step: 96/459, loss: 0.4378610849380493 2023-01-22 16:59:09.638287: step: 98/459, loss: 0.07663140445947647 2023-01-22 16:59:10.256363: step: 100/459, loss: 0.025069842115044594 2023-01-22 16:59:10.966069: step: 102/459, loss: 0.034488849341869354 2023-01-22 16:59:11.542403: step: 104/459, loss: 0.0009506430360488594 2023-01-22 16:59:12.165732: step: 106/459, loss: 0.01749313622713089 2023-01-22 16:59:12.727164: step: 108/459, loss: 0.020381733775138855 2023-01-22 16:59:13.296979: step: 110/459, loss: 0.18079407513141632 2023-01-22 16:59:13.929842: step: 112/459, loss: 0.06637118756771088 2023-01-22 16:59:14.548674: step: 114/459, loss: 0.11456461250782013 2023-01-22 16:59:15.201306: step: 116/459, loss: 0.413664311170578 2023-01-22 16:59:15.973216: step: 118/459, loss: 0.051005296409130096 2023-01-22 16:59:16.546425: step: 120/459, loss: 0.025535304099321365 2023-01-22 16:59:17.141401: step: 122/459, loss: 0.06031080335378647 2023-01-22 16:59:17.760156: step: 124/459, loss: 0.03341873362660408 2023-01-22 16:59:18.359013: step: 126/459, loss: 0.006575940642505884 2023-01-22 16:59:18.997738: step: 128/459, loss: 0.026239128783345222 2023-01-22 16:59:19.610928: step: 130/459, loss: 0.021035222336649895 2023-01-22 16:59:20.173378: step: 132/459, loss: 0.917841911315918 2023-01-22 16:59:20.786626: step: 134/459, loss: 0.010917187668383121 2023-01-22 16:59:21.435628: step: 136/459, loss: 0.07757078856229782 2023-01-22 16:59:22.107634: step: 138/459, loss: 0.18106138706207275 2023-01-22 16:59:22.740394: step: 140/459, loss: 0.07090955972671509 2023-01-22 16:59:23.339226: step: 142/459, loss: 0.023600388318300247 2023-01-22 16:59:23.965590: step: 144/459, loss: 0.07302413135766983 2023-01-22 16:59:24.619055: step: 146/459, loss: 0.022212620824575424 2023-01-22 16:59:25.276485: step: 148/459, loss: 0.05428776517510414 2023-01-22 16:59:25.990513: step: 150/459, loss: 0.08946312218904495 2023-01-22 16:59:26.674906: step: 152/459, loss: 0.01660596951842308 2023-01-22 16:59:27.329878: step: 154/459, loss: 0.04551423713564873 2023-01-22 16:59:27.926069: step: 156/459, loss: 0.010163622908294201 2023-01-22 16:59:28.489262: step: 158/459, loss: 0.013179643079638481 2023-01-22 16:59:29.099395: step: 160/459, loss: 0.02066226303577423 2023-01-22 16:59:29.696397: step: 162/459, loss: 0.004925037734210491 2023-01-22 16:59:30.297611: step: 164/459, loss: 0.0014625594485551119 2023-01-22 16:59:30.889321: step: 166/459, loss: 0.0298256017267704 2023-01-22 16:59:31.554105: step: 168/459, loss: 0.0010691303759813309 2023-01-22 16:59:32.161927: step: 170/459, loss: 0.051986388862133026 2023-01-22 16:59:32.755395: step: 172/459, loss: 0.04556310549378395 2023-01-22 16:59:33.408423: step: 174/459, loss: 0.03567812964320183 2023-01-22 16:59:34.085824: step: 176/459, loss: 0.07021137326955795 2023-01-22 16:59:34.719553: step: 178/459, loss: 0.3310394883155823 2023-01-22 16:59:35.344393: step: 180/459, loss: 0.07564850896596909 2023-01-22 16:59:36.004842: step: 182/459, loss: 0.05482872202992439 2023-01-22 16:59:36.619588: step: 184/459, loss: 0.030451253056526184 2023-01-22 16:59:37.238764: step: 186/459, loss: 0.2767855226993561 2023-01-22 16:59:37.853232: step: 188/459, loss: 0.040636103600263596 2023-01-22 16:59:38.521622: step: 190/459, loss: 0.07016197592020035 2023-01-22 16:59:39.134346: step: 192/459, loss: 0.21796372532844543 2023-01-22 16:59:39.739437: step: 194/459, loss: 0.022792981937527657 2023-01-22 16:59:40.317164: step: 196/459, loss: 0.015149074606597424 2023-01-22 16:59:40.946406: step: 198/459, loss: 0.1953006535768509 2023-01-22 16:59:41.617376: step: 200/459, loss: 0.021133707836270332 2023-01-22 16:59:42.214995: step: 202/459, loss: 0.02343875728547573 2023-01-22 16:59:42.873665: step: 204/459, loss: 0.04428006336092949 2023-01-22 16:59:43.498749: step: 206/459, loss: 0.10320693999528885 2023-01-22 16:59:44.114681: step: 208/459, loss: 0.09409085661172867 2023-01-22 16:59:44.754526: step: 210/459, loss: 0.023778662085533142 2023-01-22 16:59:45.443987: step: 212/459, loss: 0.021064244210720062 2023-01-22 16:59:46.023511: step: 214/459, loss: 0.013511089608073235 2023-01-22 16:59:46.586876: step: 216/459, loss: 0.03660643845796585 2023-01-22 16:59:47.219579: step: 218/459, loss: 0.1003895029425621 2023-01-22 16:59:47.850564: step: 220/459, loss: 0.06641336530447006 2023-01-22 16:59:48.437674: step: 222/459, loss: 0.012097152881324291 2023-01-22 16:59:49.037827: step: 224/459, loss: 0.0926191508769989 2023-01-22 16:59:49.764623: step: 226/459, loss: 0.00372349307872355 2023-01-22 16:59:50.342999: step: 228/459, loss: 0.025557024404406548 2023-01-22 16:59:51.046529: step: 230/459, loss: 0.059351202100515366 2023-01-22 16:59:51.619981: step: 232/459, loss: 0.011658880859613419 2023-01-22 16:59:52.192378: step: 234/459, loss: 0.0037552807480096817 2023-01-22 16:59:52.791050: step: 236/459, loss: 0.0553792342543602 2023-01-22 16:59:53.400121: step: 238/459, loss: 0.013929390348494053 2023-01-22 16:59:54.026611: step: 240/459, loss: 0.009333419613540173 2023-01-22 16:59:54.573193: step: 242/459, loss: 0.15186598896980286 2023-01-22 16:59:55.188939: step: 244/459, loss: 0.14192058145999908 2023-01-22 16:59:55.792020: step: 246/459, loss: 0.053616005927324295 2023-01-22 16:59:56.346167: step: 248/459, loss: 0.01496982667595148 2023-01-22 16:59:56.955604: step: 250/459, loss: 0.09458352625370026 2023-01-22 16:59:57.660276: step: 252/459, loss: 0.0805136114358902 2023-01-22 16:59:58.329135: step: 254/459, loss: 0.13560090959072113 2023-01-22 16:59:59.026470: step: 256/459, loss: 0.04563348740339279 2023-01-22 16:59:59.638998: step: 258/459, loss: 0.06827323883771896 2023-01-22 17:00:00.308629: step: 260/459, loss: 0.037384260445833206 2023-01-22 17:00:00.971242: step: 262/459, loss: 0.027922242879867554 2023-01-22 17:00:01.574857: step: 264/459, loss: 0.00659040967002511 2023-01-22 17:00:02.271815: step: 266/459, loss: 0.03724820539355278 2023-01-22 17:00:02.909624: step: 268/459, loss: 0.035875894129276276 2023-01-22 17:00:03.634920: step: 270/459, loss: 0.028968336060643196 2023-01-22 17:00:04.234011: step: 272/459, loss: 0.11431773006916046 2023-01-22 17:00:04.791719: step: 274/459, loss: 0.7102066874504089 2023-01-22 17:00:05.338942: step: 276/459, loss: 0.0031249162275344133 2023-01-22 17:00:05.942764: step: 278/459, loss: 0.10098564624786377 2023-01-22 17:00:06.545044: step: 280/459, loss: 0.04965241625905037 2023-01-22 17:00:07.116989: step: 282/459, loss: 0.0005786462570540607 2023-01-22 17:00:07.706046: step: 284/459, loss: 0.007607356645166874 2023-01-22 17:00:08.347544: step: 286/459, loss: 0.05698813125491142 2023-01-22 17:00:08.966926: step: 288/459, loss: 0.054301802068948746 2023-01-22 17:00:09.666598: step: 290/459, loss: 0.394819051027298 2023-01-22 17:00:10.278506: step: 292/459, loss: 0.03732365742325783 2023-01-22 17:00:10.881104: step: 294/459, loss: 0.10386335104703903 2023-01-22 17:00:11.491951: step: 296/459, loss: 0.0970069169998169 2023-01-22 17:00:12.146501: step: 298/459, loss: 0.055556491017341614 2023-01-22 17:00:12.762512: step: 300/459, loss: 0.023973001167178154 2023-01-22 17:00:13.390392: step: 302/459, loss: 0.04531499743461609 2023-01-22 17:00:14.058407: step: 304/459, loss: 0.06579045951366425 2023-01-22 17:00:14.734173: step: 306/459, loss: 0.028921375051140785 2023-01-22 17:00:15.360160: step: 308/459, loss: 0.05577724054455757 2023-01-22 17:00:15.889290: step: 310/459, loss: 0.061956003308296204 2023-01-22 17:00:16.454785: step: 312/459, loss: 0.05605258047580719 2023-01-22 17:00:17.144849: step: 314/459, loss: 0.20059974491596222 2023-01-22 17:00:17.788959: step: 316/459, loss: 0.03825554996728897 2023-01-22 17:00:18.361975: step: 318/459, loss: 0.0057137468829751015 2023-01-22 17:00:18.979581: step: 320/459, loss: 0.060677964240312576 2023-01-22 17:00:19.676607: step: 322/459, loss: 0.04808647185564041 2023-01-22 17:00:20.295447: step: 324/459, loss: 0.039781708270311356 2023-01-22 17:00:20.914277: step: 326/459, loss: 0.12763668596744537 2023-01-22 17:00:21.509058: step: 328/459, loss: 0.03802170231938362 2023-01-22 17:00:22.219009: step: 330/459, loss: 0.047334954142570496 2023-01-22 17:00:22.840825: step: 332/459, loss: 0.022704022005200386 2023-01-22 17:00:23.439559: step: 334/459, loss: 0.04379318654537201 2023-01-22 17:00:24.075689: step: 336/459, loss: 0.09576316922903061 2023-01-22 17:00:24.726956: step: 338/459, loss: 0.05331173166632652 2023-01-22 17:00:25.327884: step: 340/459, loss: 0.049110572785139084 2023-01-22 17:00:25.992267: step: 342/459, loss: 0.03824935853481293 2023-01-22 17:00:26.644601: step: 344/459, loss: 0.029216453433036804 2023-01-22 17:00:27.225014: step: 346/459, loss: 0.012050400488078594 2023-01-22 17:00:27.799810: step: 348/459, loss: 0.08008033037185669 2023-01-22 17:00:28.457251: step: 350/459, loss: 0.08247989416122437 2023-01-22 17:00:29.093293: step: 352/459, loss: 0.1040826365351677 2023-01-22 17:00:29.787298: step: 354/459, loss: 0.028005016967654228 2023-01-22 17:00:30.392429: step: 356/459, loss: 0.5166982412338257 2023-01-22 17:00:31.023213: step: 358/459, loss: 0.011080273427069187 2023-01-22 17:00:31.679192: step: 360/459, loss: 0.01942683942615986 2023-01-22 17:00:32.316885: step: 362/459, loss: 0.21715405583381653 2023-01-22 17:00:32.973605: step: 364/459, loss: 0.0377415232360363 2023-01-22 17:00:33.573156: step: 366/459, loss: 0.02265748381614685 2023-01-22 17:00:34.177524: step: 368/459, loss: 0.06841293722391129 2023-01-22 17:00:34.834648: step: 370/459, loss: 0.05744689702987671 2023-01-22 17:00:35.468195: step: 372/459, loss: 0.06394843757152557 2023-01-22 17:00:35.989597: step: 374/459, loss: 0.03374779224395752 2023-01-22 17:00:36.692286: step: 376/459, loss: 0.04140923172235489 2023-01-22 17:00:37.315289: step: 378/459, loss: 0.032978422939777374 2023-01-22 17:00:37.946693: step: 380/459, loss: 0.8007550239562988 2023-01-22 17:00:38.521936: step: 382/459, loss: 0.052710775285959244 2023-01-22 17:00:39.219195: step: 384/459, loss: 0.029791517183184624 2023-01-22 17:00:39.823909: step: 386/459, loss: 0.0796152651309967 2023-01-22 17:00:40.468955: step: 388/459, loss: 0.029246380552649498 2023-01-22 17:00:41.110595: step: 390/459, loss: 0.019106674939393997 2023-01-22 17:00:41.778180: step: 392/459, loss: 0.0769248902797699 2023-01-22 17:00:42.427722: step: 394/459, loss: 0.09686250984668732 2023-01-22 17:00:43.087062: step: 396/459, loss: 0.05006745085120201 2023-01-22 17:00:43.695745: step: 398/459, loss: 0.04573042690753937 2023-01-22 17:00:44.278142: step: 400/459, loss: 0.05021078139543533 2023-01-22 17:00:44.850834: step: 402/459, loss: 0.09259038418531418 2023-01-22 17:00:45.408294: step: 404/459, loss: 0.12266980111598969 2023-01-22 17:00:46.060826: step: 406/459, loss: 0.40256938338279724 2023-01-22 17:00:46.707920: step: 408/459, loss: 0.06224486231803894 2023-01-22 17:00:47.334927: step: 410/459, loss: 0.011711819097399712 2023-01-22 17:00:47.911082: step: 412/459, loss: 0.12120562791824341 2023-01-22 17:00:48.464131: step: 414/459, loss: 0.0673501119017601 2023-01-22 17:00:49.104204: step: 416/459, loss: 0.05499453842639923 2023-01-22 17:00:49.743316: step: 418/459, loss: 0.04999115690588951 2023-01-22 17:00:50.456634: step: 420/459, loss: 0.08329056203365326 2023-01-22 17:00:51.049293: step: 422/459, loss: 0.14974655210971832 2023-01-22 17:00:51.651002: step: 424/459, loss: 0.01786421611905098 2023-01-22 17:00:52.288273: step: 426/459, loss: 0.013404449447989464 2023-01-22 17:00:52.893151: step: 428/459, loss: 0.2907252311706543 2023-01-22 17:00:53.432408: step: 430/459, loss: 0.10256446897983551 2023-01-22 17:00:54.033747: step: 432/459, loss: 0.22714999318122864 2023-01-22 17:00:54.770207: step: 434/459, loss: 0.10803370922803879 2023-01-22 17:00:55.484628: step: 436/459, loss: 0.0395452156662941 2023-01-22 17:00:56.088189: step: 438/459, loss: 0.02334635518491268 2023-01-22 17:00:56.672669: step: 440/459, loss: 0.08768557757139206 2023-01-22 17:00:57.330320: step: 442/459, loss: 0.06905203312635422 2023-01-22 17:00:57.954482: step: 444/459, loss: 0.012981434352695942 2023-01-22 17:00:58.545915: step: 446/459, loss: 0.03623326122760773 2023-01-22 17:00:59.193023: step: 448/459, loss: 0.032555777579545975 2023-01-22 17:00:59.794904: step: 450/459, loss: 0.02694508619606495 2023-01-22 17:01:00.358789: step: 452/459, loss: 0.15979452431201935 2023-01-22 17:01:00.965953: step: 454/459, loss: 0.04494030401110649 2023-01-22 17:01:01.573427: step: 456/459, loss: 0.013247435912489891 2023-01-22 17:01:02.212093: step: 458/459, loss: 0.03305288031697273 2023-01-22 17:01:02.823033: step: 460/459, loss: 0.08349739015102386 2023-01-22 17:01:03.523695: step: 462/459, loss: 0.015975065529346466 2023-01-22 17:01:04.114271: step: 464/459, loss: 0.03025933727622032 2023-01-22 17:01:04.670475: step: 466/459, loss: 0.08348551392555237 2023-01-22 17:01:05.278493: step: 468/459, loss: 0.03090248629450798 2023-01-22 17:01:05.840877: step: 470/459, loss: 0.02081601694226265 2023-01-22 17:01:06.468397: step: 472/459, loss: 0.06959852576255798 2023-01-22 17:01:07.153626: step: 474/459, loss: 0.012882841750979424 2023-01-22 17:01:07.837351: step: 476/459, loss: 0.0492078959941864 2023-01-22 17:01:08.500263: step: 478/459, loss: 0.03302006796002388 2023-01-22 17:01:09.077748: step: 480/459, loss: 0.058174602687358856 2023-01-22 17:01:09.641332: step: 482/459, loss: 0.0812818855047226 2023-01-22 17:01:10.226171: step: 484/459, loss: 0.04636014625430107 2023-01-22 17:01:10.970475: step: 486/459, loss: 0.13973069190979004 2023-01-22 17:01:11.577287: step: 488/459, loss: 0.0809292122721672 2023-01-22 17:01:12.165506: step: 490/459, loss: 0.01750250533223152 2023-01-22 17:01:12.782671: step: 492/459, loss: 0.03636382147669792 2023-01-22 17:01:13.450960: step: 494/459, loss: 0.006173378322273493 2023-01-22 17:01:14.092221: step: 496/459, loss: 0.028694763779640198 2023-01-22 17:01:14.740359: step: 498/459, loss: 0.010962216183543205 2023-01-22 17:01:15.383427: step: 500/459, loss: 0.042921122163534164 2023-01-22 17:01:15.967413: step: 502/459, loss: 0.0026878679636865854 2023-01-22 17:01:16.694824: step: 504/459, loss: 0.012958127073943615 2023-01-22 17:01:17.293482: step: 506/459, loss: 0.06694658100605011 2023-01-22 17:01:17.936802: step: 508/459, loss: 0.05157662183046341 2023-01-22 17:01:18.546101: step: 510/459, loss: 0.009531402960419655 2023-01-22 17:01:19.238129: step: 512/459, loss: 0.09802338480949402 2023-01-22 17:01:19.814709: step: 514/459, loss: 0.015163056552410126 2023-01-22 17:01:20.418161: step: 516/459, loss: 0.12360221892595291 2023-01-22 17:01:20.992627: step: 518/459, loss: 0.03645819425582886 2023-01-22 17:01:21.553651: step: 520/459, loss: 0.005559597630053759 2023-01-22 17:01:22.225650: step: 522/459, loss: 0.11881968379020691 2023-01-22 17:01:22.883186: step: 524/459, loss: 0.04628906771540642 2023-01-22 17:01:23.504104: step: 526/459, loss: 0.058730777353048325 2023-01-22 17:01:24.138858: step: 528/459, loss: 0.02441442757844925 2023-01-22 17:01:24.769246: step: 530/459, loss: 0.01646512560546398 2023-01-22 17:01:25.508922: step: 532/459, loss: 0.055115874856710434 2023-01-22 17:01:26.070862: step: 534/459, loss: 0.14649511873722076 2023-01-22 17:01:26.681000: step: 536/459, loss: 0.032486140727996826 2023-01-22 17:01:27.367263: step: 538/459, loss: 0.07945225387811661 2023-01-22 17:01:28.016863: step: 540/459, loss: 0.015738481655716896 2023-01-22 17:01:28.688995: step: 542/459, loss: 0.06846271455287933 2023-01-22 17:01:29.507646: step: 544/459, loss: 0.04983101785182953 2023-01-22 17:01:30.081510: step: 546/459, loss: 0.019669678062200546 2023-01-22 17:01:30.713636: step: 548/459, loss: 0.014479370787739754 2023-01-22 17:01:31.333877: step: 550/459, loss: 0.024007447063922882 2023-01-22 17:01:31.958027: step: 552/459, loss: 0.0949258804321289 2023-01-22 17:01:32.554360: step: 554/459, loss: 0.04490743204951286 2023-01-22 17:01:33.149235: step: 556/459, loss: 0.023689359426498413 2023-01-22 17:01:33.822960: step: 558/459, loss: 0.051058944314718246 2023-01-22 17:01:34.398018: step: 560/459, loss: 0.05832243710756302 2023-01-22 17:01:35.007129: step: 562/459, loss: 0.0285752322524786 2023-01-22 17:01:35.601856: step: 564/459, loss: 0.022437933832406998 2023-01-22 17:01:36.183314: step: 566/459, loss: 0.03484615683555603 2023-01-22 17:01:36.790442: step: 568/459, loss: 0.008916087448596954 2023-01-22 17:01:37.369504: step: 570/459, loss: 0.018826236948370934 2023-01-22 17:01:38.040049: step: 572/459, loss: 0.09362314641475677 2023-01-22 17:01:38.646233: step: 574/459, loss: 0.06315428763628006 2023-01-22 17:01:39.250629: step: 576/459, loss: 0.10474980622529984 2023-01-22 17:01:39.832811: step: 578/459, loss: 0.12123598158359528 2023-01-22 17:01:40.432584: step: 580/459, loss: 0.03250649943947792 2023-01-22 17:01:41.114947: step: 582/459, loss: 0.06482744216918945 2023-01-22 17:01:41.775404: step: 584/459, loss: 0.1283603012561798 2023-01-22 17:01:42.378799: step: 586/459, loss: 0.2042866051197052 2023-01-22 17:01:42.996914: step: 588/459, loss: 0.06555379182100296 2023-01-22 17:01:43.621882: step: 590/459, loss: 0.01733950525522232 2023-01-22 17:01:44.181736: step: 592/459, loss: 0.010796530172228813 2023-01-22 17:01:44.821959: step: 594/459, loss: 0.045743588358163834 2023-01-22 17:01:45.463025: step: 596/459, loss: 0.4103584587574005 2023-01-22 17:01:46.048616: step: 598/459, loss: 0.007984532043337822 2023-01-22 17:01:46.615327: step: 600/459, loss: 0.05088353902101517 2023-01-22 17:01:47.251205: step: 602/459, loss: 0.03250141069293022 2023-01-22 17:01:47.822484: step: 604/459, loss: 0.07439916580915451 2023-01-22 17:01:48.461123: step: 606/459, loss: 0.023283246904611588 2023-01-22 17:01:49.015165: step: 608/459, loss: 0.014723222702741623 2023-01-22 17:01:49.609853: step: 610/459, loss: 0.04132024943828583 2023-01-22 17:01:50.245104: step: 612/459, loss: 0.1255100816488266 2023-01-22 17:01:50.917277: step: 614/459, loss: 0.08075017482042313 2023-01-22 17:01:51.556053: step: 616/459, loss: 0.01794544793665409 2023-01-22 17:01:52.185946: step: 618/459, loss: 0.051288411021232605 2023-01-22 17:01:52.854312: step: 620/459, loss: 0.3541657626628876 2023-01-22 17:01:53.458041: step: 622/459, loss: 0.40335744619369507 2023-01-22 17:01:54.070673: step: 624/459, loss: 0.04729708656668663 2023-01-22 17:01:54.690691: step: 626/459, loss: 0.03480703756213188 2023-01-22 17:01:55.315545: step: 628/459, loss: 0.5896925330162048 2023-01-22 17:01:55.936002: step: 630/459, loss: 0.025472266599535942 2023-01-22 17:01:56.572806: step: 632/459, loss: 0.04050981625914574 2023-01-22 17:01:57.232735: step: 634/459, loss: 0.12532418966293335 2023-01-22 17:01:57.883282: step: 636/459, loss: 0.015164973214268684 2023-01-22 17:01:58.604991: step: 638/459, loss: 0.0254242904484272 2023-01-22 17:01:59.220930: step: 640/459, loss: 0.031191227957606316 2023-01-22 17:01:59.812595: step: 642/459, loss: 0.07873635739088058 2023-01-22 17:02:00.439822: step: 644/459, loss: 0.007392171770334244 2023-01-22 17:02:01.018747: step: 646/459, loss: 0.0035790351685136557 2023-01-22 17:02:01.641351: step: 648/459, loss: 0.03896341100335121 2023-01-22 17:02:02.233362: step: 650/459, loss: 0.029291203245520592 2023-01-22 17:02:02.908447: step: 652/459, loss: 0.0731748566031456 2023-01-22 17:02:03.528307: step: 654/459, loss: 0.11165726184844971 2023-01-22 17:02:04.144354: step: 656/459, loss: 0.07784903794527054 2023-01-22 17:02:04.749942: step: 658/459, loss: 0.04310945048928261 2023-01-22 17:02:05.403586: step: 660/459, loss: 0.053291670978069305 2023-01-22 17:02:06.009679: step: 662/459, loss: 0.108383908867836 2023-01-22 17:02:06.617465: step: 664/459, loss: 0.03011743165552616 2023-01-22 17:02:07.256618: step: 666/459, loss: 0.04878503456711769 2023-01-22 17:02:08.007490: step: 668/459, loss: 0.0333598293364048 2023-01-22 17:02:08.602322: step: 670/459, loss: 0.04710008576512337 2023-01-22 17:02:09.230359: step: 672/459, loss: 0.7953053712844849 2023-01-22 17:02:09.849168: step: 674/459, loss: 0.04955623671412468 2023-01-22 17:02:10.386899: step: 676/459, loss: 0.07468301057815552 2023-01-22 17:02:11.060315: step: 678/459, loss: 0.14007174968719482 2023-01-22 17:02:11.718917: step: 680/459, loss: 0.32083818316459656 2023-01-22 17:02:12.362579: step: 682/459, loss: 0.02679501846432686 2023-01-22 17:02:12.998039: step: 684/459, loss: 0.11373432725667953 2023-01-22 17:02:13.607104: step: 686/459, loss: 0.03948090597987175 2023-01-22 17:02:14.225950: step: 688/459, loss: 0.013588064350187778 2023-01-22 17:02:14.932161: step: 690/459, loss: 0.07667003571987152 2023-01-22 17:02:15.579825: step: 692/459, loss: 0.24515031278133392 2023-01-22 17:02:16.196417: step: 694/459, loss: 0.023314056918025017 2023-01-22 17:02:16.778319: step: 696/459, loss: 0.14367039501667023 2023-01-22 17:02:17.404975: step: 698/459, loss: 0.10681440681219101 2023-01-22 17:02:18.067959: step: 700/459, loss: 0.04398532584309578 2023-01-22 17:02:18.658854: step: 702/459, loss: 0.07214000821113586 2023-01-22 17:02:19.296984: step: 704/459, loss: 0.11255055665969849 2023-01-22 17:02:19.866348: step: 706/459, loss: 0.036472685635089874 2023-01-22 17:02:20.458514: step: 708/459, loss: 0.025226924568414688 2023-01-22 17:02:21.005277: step: 710/459, loss: 0.006470364052802324 2023-01-22 17:02:21.620665: step: 712/459, loss: 0.014192083850502968 2023-01-22 17:02:22.298294: step: 714/459, loss: 0.12953591346740723 2023-01-22 17:02:22.903539: step: 716/459, loss: 0.0526556558907032 2023-01-22 17:02:23.519808: step: 718/459, loss: 0.0805651843547821 2023-01-22 17:02:24.115785: step: 720/459, loss: 0.047147080302238464 2023-01-22 17:02:24.706445: step: 722/459, loss: 0.03203088045120239 2023-01-22 17:02:25.391197: step: 724/459, loss: 0.026131173595786095 2023-01-22 17:02:25.982386: step: 726/459, loss: 0.028955023735761642 2023-01-22 17:02:26.550054: step: 728/459, loss: 0.0899924486875534 2023-01-22 17:02:27.112809: step: 730/459, loss: 0.0181586854159832 2023-01-22 17:02:27.673310: step: 732/459, loss: 0.03172290325164795 2023-01-22 17:02:28.281298: step: 734/459, loss: 0.13251443207263947 2023-01-22 17:02:28.862047: step: 736/459, loss: 0.06645863503217697 2023-01-22 17:02:29.418127: step: 738/459, loss: 0.014924367889761925 2023-01-22 17:02:30.109611: step: 740/459, loss: 0.04575205594301224 2023-01-22 17:02:30.709077: step: 742/459, loss: 0.032098494470119476 2023-01-22 17:02:31.334641: step: 744/459, loss: 0.06847930699586868 2023-01-22 17:02:32.009288: step: 746/459, loss: 0.035230450332164764 2023-01-22 17:02:32.603194: step: 748/459, loss: 0.021325942128896713 2023-01-22 17:02:33.240908: step: 750/459, loss: 0.43881043791770935 2023-01-22 17:02:33.893297: step: 752/459, loss: 0.2969907522201538 2023-01-22 17:02:34.474787: step: 754/459, loss: 0.06817200034856796 2023-01-22 17:02:35.120308: step: 756/459, loss: 0.01284728292375803 2023-01-22 17:02:35.777562: step: 758/459, loss: 0.04843015968799591 2023-01-22 17:02:36.394617: step: 760/459, loss: 0.009736558422446251 2023-01-22 17:02:37.015018: step: 762/459, loss: 0.03933177515864372 2023-01-22 17:02:37.733926: step: 764/459, loss: 0.11352979391813278 2023-01-22 17:02:38.353011: step: 766/459, loss: 0.012480957433581352 2023-01-22 17:02:38.969655: step: 768/459, loss: 0.038633864372968674 2023-01-22 17:02:39.585884: step: 770/459, loss: 0.05158255994319916 2023-01-22 17:02:40.213021: step: 772/459, loss: 0.08326686173677444 2023-01-22 17:02:40.832168: step: 774/459, loss: 0.11317575722932816 2023-01-22 17:02:41.436517: step: 776/459, loss: 0.07761102169752121 2023-01-22 17:02:42.053983: step: 778/459, loss: 0.020852649584412575 2023-01-22 17:02:42.687146: step: 780/459, loss: 0.01217423751950264 2023-01-22 17:02:43.298602: step: 782/459, loss: 0.04027571529150009 2023-01-22 17:02:44.014970: step: 784/459, loss: 0.013875330798327923 2023-01-22 17:02:44.645481: step: 786/459, loss: 0.04685995727777481 2023-01-22 17:02:45.315313: step: 788/459, loss: 0.04433273524045944 2023-01-22 17:02:45.880733: step: 790/459, loss: 0.12302420288324356 2023-01-22 17:02:46.528073: step: 792/459, loss: 0.08999717235565186 2023-01-22 17:02:47.155149: step: 794/459, loss: 0.17481039464473724 2023-01-22 17:02:47.770020: step: 796/459, loss: 0.00577508145943284 2023-01-22 17:02:48.385919: step: 798/459, loss: 0.11964546144008636 2023-01-22 17:02:48.974509: step: 800/459, loss: 0.05482054501771927 2023-01-22 17:02:49.586578: step: 802/459, loss: 0.01343041192740202 2023-01-22 17:02:50.164275: step: 804/459, loss: 0.030994247645139694 2023-01-22 17:02:50.738938: step: 806/459, loss: 0.02867540344595909 2023-01-22 17:02:51.364493: step: 808/459, loss: 0.3143928050994873 2023-01-22 17:02:51.944028: step: 810/459, loss: 0.06727652251720428 2023-01-22 17:02:52.575300: step: 812/459, loss: 0.03663233667612076 2023-01-22 17:02:53.163650: step: 814/459, loss: 0.04307188838720322 2023-01-22 17:02:53.765375: step: 816/459, loss: 0.03637206554412842 2023-01-22 17:02:54.403466: step: 818/459, loss: 0.034618400037288666 2023-01-22 17:02:55.038803: step: 820/459, loss: 0.20656991004943848 2023-01-22 17:02:55.631640: step: 822/459, loss: 0.04579215496778488 2023-01-22 17:02:56.239516: step: 824/459, loss: 0.03625321015715599 2023-01-22 17:02:56.882428: step: 826/459, loss: 0.004678001627326012 2023-01-22 17:02:57.454523: step: 828/459, loss: 0.0633644163608551 2023-01-22 17:02:58.133525: step: 830/459, loss: 0.027715642005205154 2023-01-22 17:02:58.832518: step: 832/459, loss: 0.022343548014760017 2023-01-22 17:02:59.433473: step: 834/459, loss: 0.01938195526599884 2023-01-22 17:03:00.048129: step: 836/459, loss: 0.06828559935092926 2023-01-22 17:03:00.661687: step: 838/459, loss: 0.08909180760383606 2023-01-22 17:03:01.328404: step: 840/459, loss: 0.11152797937393188 2023-01-22 17:03:02.020813: step: 842/459, loss: 0.06352880597114563 2023-01-22 17:03:02.621114: step: 844/459, loss: 0.057997677475214005 2023-01-22 17:03:03.211056: step: 846/459, loss: 0.11889591068029404 2023-01-22 17:03:03.808037: step: 848/459, loss: 0.024112991988658905 2023-01-22 17:03:04.474915: step: 850/459, loss: 0.5109362602233887 2023-01-22 17:03:05.056821: step: 852/459, loss: 0.07157865911722183 2023-01-22 17:03:05.626034: step: 854/459, loss: 0.046580828726291656 2023-01-22 17:03:06.331331: step: 856/459, loss: 0.05625399947166443 2023-01-22 17:03:06.942928: step: 858/459, loss: 0.021941186860203743 2023-01-22 17:03:07.578617: step: 860/459, loss: 0.011431346647441387 2023-01-22 17:03:08.214035: step: 862/459, loss: 0.09021639078855515 2023-01-22 17:03:08.828482: step: 864/459, loss: 0.017766639590263367 2023-01-22 17:03:09.619194: step: 866/459, loss: 0.06612715870141983 2023-01-22 17:03:10.302058: step: 868/459, loss: 0.08613957464694977 2023-01-22 17:03:11.001729: step: 870/459, loss: 0.0786861926317215 2023-01-22 17:03:11.641996: step: 872/459, loss: 0.030604258179664612 2023-01-22 17:03:12.267355: step: 874/459, loss: 0.0770968347787857 2023-01-22 17:03:12.913851: step: 876/459, loss: 0.10372843593358994 2023-01-22 17:03:13.512128: step: 878/459, loss: 0.08466082811355591 2023-01-22 17:03:14.173933: step: 880/459, loss: 0.10827072709798813 2023-01-22 17:03:14.776602: step: 882/459, loss: 0.05812252685427666 2023-01-22 17:03:15.414944: step: 884/459, loss: 0.04787755757570267 2023-01-22 17:03:16.054019: step: 886/459, loss: 0.03161241114139557 2023-01-22 17:03:16.705752: step: 888/459, loss: 0.03817448392510414 2023-01-22 17:03:17.364285: step: 890/459, loss: 0.0916028618812561 2023-01-22 17:03:17.925331: step: 892/459, loss: 0.10929309576749802 2023-01-22 17:03:18.480727: step: 894/459, loss: 0.03171471133828163 2023-01-22 17:03:19.168535: step: 896/459, loss: 0.1822526603937149 2023-01-22 17:03:19.820202: step: 898/459, loss: 0.04223553091287613 2023-01-22 17:03:20.458915: step: 900/459, loss: 0.07601232081651688 2023-01-22 17:03:21.093930: step: 902/459, loss: 0.05606600642204285 2023-01-22 17:03:21.699296: step: 904/459, loss: 0.024905776605010033 2023-01-22 17:03:22.320275: step: 906/459, loss: 0.09164398163557053 2023-01-22 17:03:22.900449: step: 908/459, loss: 0.01517370343208313 2023-01-22 17:03:23.563403: step: 910/459, loss: 0.13223053514957428 2023-01-22 17:03:24.127445: step: 912/459, loss: 0.09349755197763443 2023-01-22 17:03:24.696490: step: 914/459, loss: 0.02853327989578247 2023-01-22 17:03:25.316904: step: 916/459, loss: 0.045493077486753464 2023-01-22 17:03:25.948218: step: 918/459, loss: 0.18305723369121552 2023-01-22 17:03:26.491157: step: 920/459, loss: 1.1369206731615122e-05 ================================================== Loss: 0.076 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29657568993506495, 'r': 0.3466615275142315, 'f1': 0.31966863517060373}, 'combined': 0.235545310125708, 'epoch': 20} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31789711814059685, 'r': 0.3141367336675962, 'f1': 0.3160057394056643}, 'combined': 0.2022436732196251, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2974188816626608, 'r': 0.3628848973607038, 'f1': 0.3269065656565656}, 'combined': 0.24087852206273255, 'epoch': 20} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33030576129844846, 'r': 0.3215977003187621, 'f1': 0.32589357010653}, 'combined': 0.20857188486817915, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3114514392975276, 'r': 0.3540026795810608, 'f1': 0.33136662902170344}, 'combined': 0.24416488454230778, 'epoch': 20} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3284269175650756, 'r': 0.3221340660423871, 'f1': 0.3252500565476336}, 'combined': 0.23319815375113356, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2202380952380952, 'r': 0.35238095238095235, 'f1': 0.271062271062271}, 'combined': 0.18070818070818065, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2558139534883721, 'r': 0.4782608695652174, 'f1': 0.33333333333333337}, 'combined': 0.16666666666666669, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2777777777777778, 'r': 0.1724137931034483, 'f1': 0.21276595744680854}, 'combined': 0.14184397163120568, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:06:03.745040: step: 2/459, loss: 0.06962810456752777 2023-01-22 17:06:04.538436: step: 4/459, loss: 0.034475408494472504 2023-01-22 17:06:05.162166: step: 6/459, loss: 0.006822242867201567 2023-01-22 17:06:05.715944: step: 8/459, loss: 0.02910529263317585 2023-01-22 17:06:06.290221: step: 10/459, loss: 0.041772566735744476 2023-01-22 17:06:06.923500: step: 12/459, loss: 0.11574418842792511 2023-01-22 17:06:07.491295: step: 14/459, loss: 0.019032176584005356 2023-01-22 17:06:08.084288: step: 16/459, loss: 0.07292469590902328 2023-01-22 17:06:08.670249: step: 18/459, loss: 0.1317106932401657 2023-01-22 17:06:09.350705: step: 20/459, loss: 0.1374448984861374 2023-01-22 17:06:09.948732: step: 22/459, loss: 0.056847237050533295 2023-01-22 17:06:10.593902: step: 24/459, loss: 0.04483141377568245 2023-01-22 17:06:11.243087: step: 26/459, loss: 0.018170397728681564 2023-01-22 17:06:11.858511: step: 28/459, loss: 0.022382579743862152 2023-01-22 17:06:12.412931: step: 30/459, loss: 0.02339290641248226 2023-01-22 17:06:13.013922: step: 32/459, loss: 0.03134223446249962 2023-01-22 17:06:13.612862: step: 34/459, loss: 0.007746638264507055 2023-01-22 17:06:14.226279: step: 36/459, loss: 0.02181130461394787 2023-01-22 17:06:14.809411: step: 38/459, loss: 0.02826651558279991 2023-01-22 17:06:15.435427: step: 40/459, loss: 0.00047050867578946054 2023-01-22 17:06:16.029910: step: 42/459, loss: 0.16908815503120422 2023-01-22 17:06:16.613932: step: 44/459, loss: 0.005091594532132149 2023-01-22 17:06:17.242354: step: 46/459, loss: 0.027665209025144577 2023-01-22 17:06:17.908021: step: 48/459, loss: 0.01782211847603321 2023-01-22 17:06:18.613821: step: 50/459, loss: 0.018784884363412857 2023-01-22 17:06:19.200833: step: 52/459, loss: 0.016245193779468536 2023-01-22 17:06:19.867458: step: 54/459, loss: 0.05218629539012909 2023-01-22 17:06:20.522615: step: 56/459, loss: 0.027225436642766 2023-01-22 17:06:21.195135: step: 58/459, loss: 0.01638990268111229 2023-01-22 17:06:21.901551: step: 60/459, loss: 0.03927980363368988 2023-01-22 17:06:22.542246: step: 62/459, loss: 0.0954994186758995 2023-01-22 17:06:23.169905: step: 64/459, loss: 0.8502589464187622 2023-01-22 17:06:23.719057: step: 66/459, loss: 0.021486634388566017 2023-01-22 17:06:24.320910: step: 68/459, loss: 0.34447577595710754 2023-01-22 17:06:24.870718: step: 70/459, loss: 0.02581837959587574 2023-01-22 17:06:25.415536: step: 72/459, loss: 0.00693783164024353 2023-01-22 17:06:26.027118: step: 74/459, loss: 0.04567744955420494 2023-01-22 17:06:26.677406: step: 76/459, loss: 0.17543111741542816 2023-01-22 17:06:27.299126: step: 78/459, loss: 0.007803607266396284 2023-01-22 17:06:27.907949: step: 80/459, loss: 0.15362539887428284 2023-01-22 17:06:28.512863: step: 82/459, loss: 0.28486141562461853 2023-01-22 17:06:29.205170: step: 84/459, loss: 0.06825278699398041 2023-01-22 17:06:29.792398: step: 86/459, loss: 0.05900218337774277 2023-01-22 17:06:30.470174: step: 88/459, loss: 0.08822602778673172 2023-01-22 17:06:31.117965: step: 90/459, loss: 0.01233330275863409 2023-01-22 17:06:31.809223: step: 92/459, loss: 0.025633085519075394 2023-01-22 17:06:32.474284: step: 94/459, loss: 0.04380636289715767 2023-01-22 17:06:33.113925: step: 96/459, loss: 0.10082679241895676 2023-01-22 17:06:33.831001: step: 98/459, loss: 0.07105249911546707 2023-01-22 17:06:34.448902: step: 100/459, loss: 0.005754649173468351 2023-01-22 17:06:35.058285: step: 102/459, loss: 0.03511657565832138 2023-01-22 17:06:35.659881: step: 104/459, loss: 0.015887489542365074 2023-01-22 17:06:36.287260: step: 106/459, loss: 0.6305825710296631 2023-01-22 17:06:36.876225: step: 108/459, loss: 1.849263072013855 2023-01-22 17:06:37.524468: step: 110/459, loss: 0.031701769679784775 2023-01-22 17:06:38.202744: step: 112/459, loss: 0.08379683643579483 2023-01-22 17:06:38.846314: step: 114/459, loss: 0.02969285659492016 2023-01-22 17:06:39.433231: step: 116/459, loss: 0.04922913387417793 2023-01-22 17:06:40.081075: step: 118/459, loss: 0.09876736253499985 2023-01-22 17:06:40.675459: step: 120/459, loss: 0.09378977864980698 2023-01-22 17:06:41.322306: step: 122/459, loss: 0.04369097575545311 2023-01-22 17:06:41.903678: step: 124/459, loss: 0.004301452077925205 2023-01-22 17:06:42.489825: step: 126/459, loss: 0.06653646379709244 2023-01-22 17:06:43.195631: step: 128/459, loss: 0.06850039958953857 2023-01-22 17:06:43.798506: step: 130/459, loss: 0.03715331479907036 2023-01-22 17:06:44.404851: step: 132/459, loss: 0.033479198813438416 2023-01-22 17:06:44.999165: step: 134/459, loss: 0.005165076348930597 2023-01-22 17:06:45.661490: step: 136/459, loss: 0.03787190467119217 2023-01-22 17:06:46.331247: step: 138/459, loss: 0.023118332028388977 2023-01-22 17:06:46.953443: step: 140/459, loss: 0.011210676282644272 2023-01-22 17:06:47.563376: step: 142/459, loss: 0.037180669605731964 2023-01-22 17:06:48.186802: step: 144/459, loss: 0.04676051437854767 2023-01-22 17:06:48.823795: step: 146/459, loss: 0.1516672521829605 2023-01-22 17:06:49.499040: step: 148/459, loss: 0.017936181277036667 2023-01-22 17:06:50.151048: step: 150/459, loss: 0.03233356401324272 2023-01-22 17:06:50.848287: step: 152/459, loss: 0.0037135728634893894 2023-01-22 17:06:51.416476: step: 154/459, loss: 0.2557941973209381 2023-01-22 17:06:52.072054: step: 156/459, loss: 0.0435834638774395 2023-01-22 17:06:52.702635: step: 158/459, loss: 0.02676798403263092 2023-01-22 17:06:53.355614: step: 160/459, loss: 0.05335798114538193 2023-01-22 17:06:53.955439: step: 162/459, loss: 0.05106571689248085 2023-01-22 17:06:54.579464: step: 164/459, loss: 0.047931499779224396 2023-01-22 17:06:55.115310: step: 166/459, loss: 0.11207334697246552 2023-01-22 17:06:55.681168: step: 168/459, loss: 0.08840624243021011 2023-01-22 17:06:56.273460: step: 170/459, loss: 0.013562600128352642 2023-01-22 17:06:56.862181: step: 172/459, loss: 0.05607878789305687 2023-01-22 17:06:57.447307: step: 174/459, loss: 0.038282979279756546 2023-01-22 17:06:58.100174: step: 176/459, loss: 0.03979385271668434 2023-01-22 17:06:58.673535: step: 178/459, loss: 0.01729661598801613 2023-01-22 17:06:59.289126: step: 180/459, loss: 0.05226897448301315 2023-01-22 17:06:59.920240: step: 182/459, loss: 0.05281347408890724 2023-01-22 17:07:00.521392: step: 184/459, loss: 0.002310649026185274 2023-01-22 17:07:01.137005: step: 186/459, loss: 0.03659176826477051 2023-01-22 17:07:01.771637: step: 188/459, loss: 0.09961922466754913 2023-01-22 17:07:02.409108: step: 190/459, loss: 0.05568614974617958 2023-01-22 17:07:02.995620: step: 192/459, loss: 0.04936368390917778 2023-01-22 17:07:03.621194: step: 194/459, loss: 0.09662860631942749 2023-01-22 17:07:04.244263: step: 196/459, loss: 0.053680069744586945 2023-01-22 17:07:04.845463: step: 198/459, loss: 0.019599400460720062 2023-01-22 17:07:05.509591: step: 200/459, loss: 0.025735586881637573 2023-01-22 17:07:06.201472: step: 202/459, loss: 0.18365496397018433 2023-01-22 17:07:06.846820: step: 204/459, loss: 0.1484183669090271 2023-01-22 17:07:07.492067: step: 206/459, loss: 0.04227414354681969 2023-01-22 17:07:08.110301: step: 208/459, loss: 0.18677811324596405 2023-01-22 17:07:08.809179: step: 210/459, loss: 0.01668965071439743 2023-01-22 17:07:09.399026: step: 212/459, loss: 0.031301598995923996 2023-01-22 17:07:10.021544: step: 214/459, loss: 0.1455032080411911 2023-01-22 17:07:10.667576: step: 216/459, loss: 0.01468255091458559 2023-01-22 17:07:11.279069: step: 218/459, loss: 0.05153811722993851 2023-01-22 17:07:11.891114: step: 220/459, loss: 0.06546813994646072 2023-01-22 17:07:12.510295: step: 222/459, loss: 0.020947711542248726 2023-01-22 17:07:13.135142: step: 224/459, loss: 0.17011001706123352 2023-01-22 17:07:13.755699: step: 226/459, loss: 0.3991493582725525 2023-01-22 17:07:14.352416: step: 228/459, loss: 0.039137743413448334 2023-01-22 17:07:14.917533: step: 230/459, loss: 0.0053811161778867245 2023-01-22 17:07:15.580593: step: 232/459, loss: 0.04395284876227379 2023-01-22 17:07:16.206827: step: 234/459, loss: 0.5855132937431335 2023-01-22 17:07:16.793086: step: 236/459, loss: 0.01313477847725153 2023-01-22 17:07:17.344607: step: 238/459, loss: 0.010729895904660225 2023-01-22 17:07:17.940922: step: 240/459, loss: 0.3548297584056854 2023-01-22 17:07:18.553938: step: 242/459, loss: 0.12557388842105865 2023-01-22 17:07:19.185640: step: 244/459, loss: 0.3159484267234802 2023-01-22 17:07:19.920681: step: 246/459, loss: 0.12514707446098328 2023-01-22 17:07:20.563896: step: 248/459, loss: 0.0031648948788642883 2023-01-22 17:07:21.157576: step: 250/459, loss: 0.027567030861973763 2023-01-22 17:07:21.806560: step: 252/459, loss: 0.02413513883948326 2023-01-22 17:07:22.400846: step: 254/459, loss: 0.01215443480759859 2023-01-22 17:07:22.964331: step: 256/459, loss: 0.0037236434873193502 2023-01-22 17:07:23.545213: step: 258/459, loss: 0.02396847866475582 2023-01-22 17:07:24.083046: step: 260/459, loss: 0.06574507802724838 2023-01-22 17:07:24.680811: step: 262/459, loss: 0.010614545084536076 2023-01-22 17:07:25.284604: step: 264/459, loss: 0.03421199321746826 2023-01-22 17:07:25.881926: step: 266/459, loss: 0.1673142910003662 2023-01-22 17:07:26.559785: step: 268/459, loss: 0.03135696426033974 2023-01-22 17:07:27.146266: step: 270/459, loss: 7.44478702545166 2023-01-22 17:07:27.756689: step: 272/459, loss: 0.009730671532452106 2023-01-22 17:07:28.370338: step: 274/459, loss: 1.3294960260391235 2023-01-22 17:07:28.961320: step: 276/459, loss: 0.11695840954780579 2023-01-22 17:07:29.647731: step: 278/459, loss: 0.08816685527563095 2023-01-22 17:07:30.264119: step: 280/459, loss: 0.04278158023953438 2023-01-22 17:07:30.835460: step: 282/459, loss: 0.03385075181722641 2023-01-22 17:07:31.542724: step: 284/459, loss: 0.06113985925912857 2023-01-22 17:07:32.191001: step: 286/459, loss: 0.042212411761283875 2023-01-22 17:07:32.876333: step: 288/459, loss: 0.0303342305123806 2023-01-22 17:07:33.512984: step: 290/459, loss: 0.037736181169748306 2023-01-22 17:07:34.155991: step: 292/459, loss: 0.08144277334213257 2023-01-22 17:07:34.722383: step: 294/459, loss: 0.036357104778289795 2023-01-22 17:07:35.344109: step: 296/459, loss: 0.04022188112139702 2023-01-22 17:07:35.982307: step: 298/459, loss: 0.03333866223692894 2023-01-22 17:07:36.659518: step: 300/459, loss: 0.03649170696735382 2023-01-22 17:07:37.244331: step: 302/459, loss: 0.006130927242338657 2023-01-22 17:07:37.896892: step: 304/459, loss: 0.24477887153625488 2023-01-22 17:07:38.619412: step: 306/459, loss: 0.012194565497338772 2023-01-22 17:07:39.293083: step: 308/459, loss: 0.03457929566502571 2023-01-22 17:07:39.913952: step: 310/459, loss: 0.049254558980464935 2023-01-22 17:07:40.612183: step: 312/459, loss: 0.05195244029164314 2023-01-22 17:07:41.262912: step: 314/459, loss: 0.1627921462059021 2023-01-22 17:07:41.845908: step: 316/459, loss: 0.19415248930454254 2023-01-22 17:07:42.431222: step: 318/459, loss: 0.013683516532182693 2023-01-22 17:07:43.070203: step: 320/459, loss: 0.05271563678979874 2023-01-22 17:07:43.732096: step: 322/459, loss: 0.024325702339410782 2023-01-22 17:07:44.314826: step: 324/459, loss: 0.08375047147274017 2023-01-22 17:07:44.984431: step: 326/459, loss: 0.07933294773101807 2023-01-22 17:07:45.595632: step: 328/459, loss: 0.04276502504944801 2023-01-22 17:07:46.210963: step: 330/459, loss: 0.011767435818910599 2023-01-22 17:07:46.796077: step: 332/459, loss: 0.0424051508307457 2023-01-22 17:07:47.482712: step: 334/459, loss: 0.052796561270952225 2023-01-22 17:07:48.174859: step: 336/459, loss: 0.0052894544787704945 2023-01-22 17:07:48.742970: step: 338/459, loss: 0.09042176604270935 2023-01-22 17:07:49.292433: step: 340/459, loss: 0.022248467430472374 2023-01-22 17:07:49.881099: step: 342/459, loss: 0.038078099489212036 2023-01-22 17:07:50.440178: step: 344/459, loss: 0.03004765510559082 2023-01-22 17:07:51.077863: step: 346/459, loss: 0.45471322536468506 2023-01-22 17:07:51.672522: step: 348/459, loss: 0.8394812345504761 2023-01-22 17:07:52.310122: step: 350/459, loss: 0.06779265403747559 2023-01-22 17:07:52.983685: step: 352/459, loss: 0.11043381690979004 2023-01-22 17:07:53.567933: step: 354/459, loss: 0.005477819591760635 2023-01-22 17:07:54.149026: step: 356/459, loss: 0.028773972764611244 2023-01-22 17:07:54.845890: step: 358/459, loss: 0.03781234845519066 2023-01-22 17:07:55.455147: step: 360/459, loss: 0.015790347009897232 2023-01-22 17:07:56.124704: step: 362/459, loss: 0.223789781332016 2023-01-22 17:07:56.745227: step: 364/459, loss: 0.02197341062128544 2023-01-22 17:07:57.394996: step: 366/459, loss: 0.07268624007701874 2023-01-22 17:07:58.043523: step: 368/459, loss: 0.06635550409555435 2023-01-22 17:07:58.639778: step: 370/459, loss: 0.08054255694150925 2023-01-22 17:07:59.244616: step: 372/459, loss: 0.000999930314719677 2023-01-22 17:07:59.881343: step: 374/459, loss: 0.01593717932701111 2023-01-22 17:08:00.483399: step: 376/459, loss: 0.023155339062213898 2023-01-22 17:08:01.067455: step: 378/459, loss: 0.11343219131231308 2023-01-22 17:08:01.745207: step: 380/459, loss: 0.02858814224600792 2023-01-22 17:08:02.371826: step: 382/459, loss: 0.07612166553735733 2023-01-22 17:08:03.004394: step: 384/459, loss: 0.14275194704532623 2023-01-22 17:08:03.578392: step: 386/459, loss: 0.10569014400243759 2023-01-22 17:08:04.143915: step: 388/459, loss: 0.09758516401052475 2023-01-22 17:08:04.844839: step: 390/459, loss: 0.007503940723836422 2023-01-22 17:08:05.393182: step: 392/459, loss: 0.04869602620601654 2023-01-22 17:08:05.982375: step: 394/459, loss: 0.05389813333749771 2023-01-22 17:08:06.602796: step: 396/459, loss: 0.0291640255600214 2023-01-22 17:08:07.201470: step: 398/459, loss: 0.30187034606933594 2023-01-22 17:08:07.813924: step: 400/459, loss: 0.022940106689929962 2023-01-22 17:08:08.417214: step: 402/459, loss: 0.006315132137387991 2023-01-22 17:08:08.986933: step: 404/459, loss: 0.030824637040495872 2023-01-22 17:08:09.629766: step: 406/459, loss: 0.2532338798046112 2023-01-22 17:08:10.225007: step: 408/459, loss: 0.024136733263731003 2023-01-22 17:08:10.806708: step: 410/459, loss: 0.006977571174502373 2023-01-22 17:08:11.423898: step: 412/459, loss: 0.0517672598361969 2023-01-22 17:08:12.036647: step: 414/459, loss: 0.023037657141685486 2023-01-22 17:08:12.650021: step: 416/459, loss: 0.030054813250899315 2023-01-22 17:08:13.271992: step: 418/459, loss: 0.07322642207145691 2023-01-22 17:08:13.867315: step: 420/459, loss: 0.07879436761140823 2023-01-22 17:08:14.516129: step: 422/459, loss: 0.23567810654640198 2023-01-22 17:08:15.126228: step: 424/459, loss: 0.0286968182772398 2023-01-22 17:08:15.741123: step: 426/459, loss: 0.04480830952525139 2023-01-22 17:08:16.305425: step: 428/459, loss: 0.029084062203764915 2023-01-22 17:08:16.899518: step: 430/459, loss: 0.030856503173708916 2023-01-22 17:08:17.517256: step: 432/459, loss: 0.011494223028421402 2023-01-22 17:08:18.093396: step: 434/459, loss: 0.03089032508432865 2023-01-22 17:08:18.737236: step: 436/459, loss: 0.09207265079021454 2023-01-22 17:08:19.473319: step: 438/459, loss: 0.055316779762506485 2023-01-22 17:08:20.108981: step: 440/459, loss: 0.024327613413333893 2023-01-22 17:08:20.749797: step: 442/459, loss: 0.08643589168787003 2023-01-22 17:08:21.350383: step: 444/459, loss: 0.08873305469751358 2023-01-22 17:08:21.881518: step: 446/459, loss: 0.03569134324789047 2023-01-22 17:08:22.461161: step: 448/459, loss: 0.0031114479061216116 2023-01-22 17:08:23.058334: step: 450/459, loss: 0.02972698211669922 2023-01-22 17:08:23.710670: step: 452/459, loss: 0.02429080568253994 2023-01-22 17:08:24.369981: step: 454/459, loss: 0.0643652155995369 2023-01-22 17:08:24.966820: step: 456/459, loss: 0.010998926125466824 2023-01-22 17:08:25.563099: step: 458/459, loss: 0.11795710027217865 2023-01-22 17:08:26.194748: step: 460/459, loss: 0.09817785024642944 2023-01-22 17:08:26.832620: step: 462/459, loss: 0.04268195480108261 2023-01-22 17:08:27.470881: step: 464/459, loss: 0.07683226466178894 2023-01-22 17:08:28.087763: step: 466/459, loss: 0.054402004927396774 2023-01-22 17:08:28.694609: step: 468/459, loss: 0.04337604343891144 2023-01-22 17:08:29.372705: step: 470/459, loss: 0.08446889370679855 2023-01-22 17:08:30.000424: step: 472/459, loss: 0.03701034188270569 2023-01-22 17:08:30.689851: step: 474/459, loss: 0.17212066054344177 2023-01-22 17:08:31.286999: step: 476/459, loss: 0.01607143133878708 2023-01-22 17:08:31.952017: step: 478/459, loss: 0.26547208428382874 2023-01-22 17:08:32.515083: step: 480/459, loss: 0.03390689566731453 2023-01-22 17:08:33.143189: step: 482/459, loss: 0.05008602514863014 2023-01-22 17:08:33.825712: step: 484/459, loss: 0.2897596061229706 2023-01-22 17:08:34.408384: step: 486/459, loss: 0.00904764048755169 2023-01-22 17:08:35.032520: step: 488/459, loss: 0.03775648772716522 2023-01-22 17:08:35.741890: step: 490/459, loss: 0.03967226296663284 2023-01-22 17:08:36.361361: step: 492/459, loss: 0.031832918524742126 2023-01-22 17:08:36.996748: step: 494/459, loss: 0.040528543293476105 2023-01-22 17:08:37.601214: step: 496/459, loss: 0.008270270191133022 2023-01-22 17:08:38.228945: step: 498/459, loss: 0.027195364236831665 2023-01-22 17:08:38.788110: step: 500/459, loss: 0.1303715705871582 2023-01-22 17:08:39.428015: step: 502/459, loss: 0.08777569979429245 2023-01-22 17:08:40.038344: step: 504/459, loss: 0.0959232747554779 2023-01-22 17:08:40.639213: step: 506/459, loss: 0.05372912064194679 2023-01-22 17:08:41.229648: step: 508/459, loss: 0.0602230578660965 2023-01-22 17:08:41.879970: step: 510/459, loss: 0.03496379032731056 2023-01-22 17:08:42.501865: step: 512/459, loss: 0.6841410398483276 2023-01-22 17:08:43.123863: step: 514/459, loss: 0.025325536727905273 2023-01-22 17:08:43.732897: step: 516/459, loss: 0.05432983115315437 2023-01-22 17:08:44.355166: step: 518/459, loss: 1.0153361558914185 2023-01-22 17:08:44.932119: step: 520/459, loss: 0.021156126633286476 2023-01-22 17:08:45.497086: step: 522/459, loss: 0.0320606455206871 2023-01-22 17:08:46.107297: step: 524/459, loss: 0.01791592501103878 2023-01-22 17:08:46.734957: step: 526/459, loss: 0.008896227926015854 2023-01-22 17:08:47.354324: step: 528/459, loss: 0.055420782417058945 2023-01-22 17:08:47.952301: step: 530/459, loss: 0.04146575927734375 2023-01-22 17:08:48.562041: step: 532/459, loss: 0.0006849484634585679 2023-01-22 17:08:49.139955: step: 534/459, loss: 0.028790680691599846 2023-01-22 17:08:49.754858: step: 536/459, loss: 0.29817837476730347 2023-01-22 17:08:50.404769: step: 538/459, loss: 0.23594032227993011 2023-01-22 17:08:51.037046: step: 540/459, loss: 0.08240245282649994 2023-01-22 17:08:51.564937: step: 542/459, loss: 0.17268384993076324 2023-01-22 17:08:52.149576: step: 544/459, loss: 0.06278242915868759 2023-01-22 17:08:52.784019: step: 546/459, loss: 0.02892162837088108 2023-01-22 17:08:53.369250: step: 548/459, loss: 1.035400629043579 2023-01-22 17:08:54.001008: step: 550/459, loss: 0.016732830554246902 2023-01-22 17:08:54.642502: step: 552/459, loss: 0.04452865198254585 2023-01-22 17:08:55.418662: step: 554/459, loss: 0.037959836423397064 2023-01-22 17:08:56.119130: step: 556/459, loss: 0.03305688127875328 2023-01-22 17:08:56.721716: step: 558/459, loss: 0.03459461033344269 2023-01-22 17:08:57.435809: step: 560/459, loss: 0.19824163615703583 2023-01-22 17:08:58.036136: step: 562/459, loss: 0.026296960189938545 2023-01-22 17:08:58.621764: step: 564/459, loss: 0.0318724624812603 2023-01-22 17:08:59.219919: step: 566/459, loss: 0.04281560704112053 2023-01-22 17:08:59.927112: step: 568/459, loss: 0.0677160769701004 2023-01-22 17:09:00.539069: step: 570/459, loss: 0.13952668011188507 2023-01-22 17:09:01.183131: step: 572/459, loss: 0.023611046373844147 2023-01-22 17:09:01.786917: step: 574/459, loss: 0.03811974078416824 2023-01-22 17:09:02.409109: step: 576/459, loss: 0.08350081741809845 2023-01-22 17:09:03.053088: step: 578/459, loss: 0.170767679810524 2023-01-22 17:09:03.631905: step: 580/459, loss: 0.048407770693302155 2023-01-22 17:09:04.241495: step: 582/459, loss: 0.08924128115177155 2023-01-22 17:09:04.868813: step: 584/459, loss: 0.03497880697250366 2023-01-22 17:09:05.477445: step: 586/459, loss: 0.005457647610455751 2023-01-22 17:09:06.075914: step: 588/459, loss: 0.06301120668649673 2023-01-22 17:09:06.726751: step: 590/459, loss: 0.03987009823322296 2023-01-22 17:09:07.347421: step: 592/459, loss: 0.06058087199926376 2023-01-22 17:09:07.945732: step: 594/459, loss: 0.014418719336390495 2023-01-22 17:09:08.601391: step: 596/459, loss: 0.02783622033894062 2023-01-22 17:09:09.289279: step: 598/459, loss: 0.0744682252407074 2023-01-22 17:09:09.899655: step: 600/459, loss: 0.06966905295848846 2023-01-22 17:09:10.438149: step: 602/459, loss: 0.04269642010331154 2023-01-22 17:09:11.049846: step: 604/459, loss: 0.00619933195412159 2023-01-22 17:09:11.663666: step: 606/459, loss: 0.0328347384929657 2023-01-22 17:09:12.307831: step: 608/459, loss: 0.06362452358007431 2023-01-22 17:09:12.979547: step: 610/459, loss: 0.07118628174066544 2023-01-22 17:09:13.693245: step: 612/459, loss: 0.018214542418718338 2023-01-22 17:09:14.340489: step: 614/459, loss: 0.07115773856639862 2023-01-22 17:09:14.867495: step: 616/459, loss: 0.014687877148389816 2023-01-22 17:09:15.466290: step: 618/459, loss: 0.011150038801133633 2023-01-22 17:09:16.072520: step: 620/459, loss: 0.04063272476196289 2023-01-22 17:09:16.667698: step: 622/459, loss: 2.0383899211883545 2023-01-22 17:09:17.245660: step: 624/459, loss: 0.09373392164707184 2023-01-22 17:09:17.848799: step: 626/459, loss: 0.024713687598705292 2023-01-22 17:09:18.488379: step: 628/459, loss: 0.0812135860323906 2023-01-22 17:09:19.046192: step: 630/459, loss: 0.13056840002536774 2023-01-22 17:09:19.728337: step: 632/459, loss: 0.01606117933988571 2023-01-22 17:09:20.366327: step: 634/459, loss: 0.044599998742341995 2023-01-22 17:09:20.986477: step: 636/459, loss: 0.005533107090741396 2023-01-22 17:09:21.618334: step: 638/459, loss: 0.016084985807538033 2023-01-22 17:09:22.237888: step: 640/459, loss: 0.018376849591732025 2023-01-22 17:09:22.851774: step: 642/459, loss: 0.006862836889922619 2023-01-22 17:09:23.542555: step: 644/459, loss: 0.09132091701030731 2023-01-22 17:09:24.182438: step: 646/459, loss: 0.1905173361301422 2023-01-22 17:09:24.780193: step: 648/459, loss: 0.038463007658720016 2023-01-22 17:09:25.332793: step: 650/459, loss: 1.0441938638687134 2023-01-22 17:09:25.905872: step: 652/459, loss: 0.08333275467157364 2023-01-22 17:09:26.624239: step: 654/459, loss: 0.04433933272957802 2023-01-22 17:09:27.273021: step: 656/459, loss: 0.01631101965904236 2023-01-22 17:09:27.825324: step: 658/459, loss: 0.006272132974117994 2023-01-22 17:09:28.398355: step: 660/459, loss: 0.03269575908780098 2023-01-22 17:09:28.999594: step: 662/459, loss: 0.052458301186561584 2023-01-22 17:09:29.723874: step: 664/459, loss: 0.04338430240750313 2023-01-22 17:09:30.372070: step: 666/459, loss: 0.30409708619117737 2023-01-22 17:09:30.973914: step: 668/459, loss: 0.32818764448165894 2023-01-22 17:09:31.542171: step: 670/459, loss: 0.17444676160812378 2023-01-22 17:09:32.155888: step: 672/459, loss: 0.017003636807203293 2023-01-22 17:09:32.767801: step: 674/459, loss: 0.6543542742729187 2023-01-22 17:09:33.473962: step: 676/459, loss: 0.09543561190366745 2023-01-22 17:09:34.103259: step: 678/459, loss: 0.043750859797000885 2023-01-22 17:09:34.679708: step: 680/459, loss: 0.051585715264081955 2023-01-22 17:09:35.349447: step: 682/459, loss: 0.026672469452023506 2023-01-22 17:09:35.949220: step: 684/459, loss: 0.3620958626270294 2023-01-22 17:09:36.584729: step: 686/459, loss: 0.022602306678891182 2023-01-22 17:09:37.175168: step: 688/459, loss: 0.02470436505973339 2023-01-22 17:09:37.771910: step: 690/459, loss: 0.09186240285634995 2023-01-22 17:09:38.359757: step: 692/459, loss: 0.14812055230140686 2023-01-22 17:09:38.980664: step: 694/459, loss: 0.04023469239473343 2023-01-22 17:09:39.537863: step: 696/459, loss: 0.027251359075307846 2023-01-22 17:09:40.105492: step: 698/459, loss: 0.013398260809481144 2023-01-22 17:09:40.709985: step: 700/459, loss: 0.11069802939891815 2023-01-22 17:09:41.345667: step: 702/459, loss: 0.026202738285064697 2023-01-22 17:09:41.961000: step: 704/459, loss: 0.00911814346909523 2023-01-22 17:09:42.581403: step: 706/459, loss: 0.01837187260389328 2023-01-22 17:09:43.252422: step: 708/459, loss: 0.014116784557700157 2023-01-22 17:09:43.866488: step: 710/459, loss: 0.009888328611850739 2023-01-22 17:09:44.518720: step: 712/459, loss: 0.04827408865094185 2023-01-22 17:09:45.117348: step: 714/459, loss: 0.022427065297961235 2023-01-22 17:09:45.815993: step: 716/459, loss: 0.13535526394844055 2023-01-22 17:09:46.494981: step: 718/459, loss: 0.027535632252693176 2023-01-22 17:09:47.114387: step: 720/459, loss: 0.050093378871679306 2023-01-22 17:09:47.865964: step: 722/459, loss: 0.1266290545463562 2023-01-22 17:09:48.495960: step: 724/459, loss: 0.0006268041324801743 2023-01-22 17:09:49.117898: step: 726/459, loss: 0.028952039778232574 2023-01-22 17:09:49.762700: step: 728/459, loss: 0.02061973512172699 2023-01-22 17:09:50.339003: step: 730/459, loss: 0.01980031281709671 2023-01-22 17:09:50.955178: step: 732/459, loss: 0.030816994607448578 2023-01-22 17:09:51.579103: step: 734/459, loss: 0.0577092207968235 2023-01-22 17:09:52.166372: step: 736/459, loss: 0.010055608116090298 2023-01-22 17:09:52.721312: step: 738/459, loss: 0.08790303021669388 2023-01-22 17:09:53.312644: step: 740/459, loss: 0.027153095230460167 2023-01-22 17:09:54.012473: step: 742/459, loss: 0.08983062207698822 2023-01-22 17:09:54.631271: step: 744/459, loss: 0.05922381207346916 2023-01-22 17:09:55.284494: step: 746/459, loss: 0.02697344310581684 2023-01-22 17:09:55.896339: step: 748/459, loss: 0.298384428024292 2023-01-22 17:09:56.522525: step: 750/459, loss: 0.025646740570664406 2023-01-22 17:09:57.189943: step: 752/459, loss: 0.019862301647663116 2023-01-22 17:09:57.823057: step: 754/459, loss: 0.04165469482541084 2023-01-22 17:09:58.479452: step: 756/459, loss: 0.015659764409065247 2023-01-22 17:09:59.104824: step: 758/459, loss: 0.04033395275473595 2023-01-22 17:09:59.765940: step: 760/459, loss: 0.059591758996248245 2023-01-22 17:10:00.408024: step: 762/459, loss: 0.042618077248334885 2023-01-22 17:10:01.018865: step: 764/459, loss: 0.024883272126317024 2023-01-22 17:10:01.622874: step: 766/459, loss: 0.04935704544186592 2023-01-22 17:10:02.201075: step: 768/459, loss: 0.06508719176054001 2023-01-22 17:10:02.806132: step: 770/459, loss: 0.03490280732512474 2023-01-22 17:10:03.420971: step: 772/459, loss: 0.05050167068839073 2023-01-22 17:10:04.058095: step: 774/459, loss: 0.07082483172416687 2023-01-22 17:10:04.634235: step: 776/459, loss: 0.022155245766043663 2023-01-22 17:10:05.205118: step: 778/459, loss: 0.06403708457946777 2023-01-22 17:10:05.785712: step: 780/459, loss: 0.04355843365192413 2023-01-22 17:10:06.448900: step: 782/459, loss: 0.009303390048444271 2023-01-22 17:10:07.029655: step: 784/459, loss: 0.010791837237775326 2023-01-22 17:10:07.708114: step: 786/459, loss: 0.06556699424982071 2023-01-22 17:10:08.357668: step: 788/459, loss: 0.04688836261630058 2023-01-22 17:10:08.911228: step: 790/459, loss: 0.0026328829117119312 2023-01-22 17:10:09.567464: step: 792/459, loss: 0.01884455420076847 2023-01-22 17:10:10.146052: step: 794/459, loss: 0.39903274178504944 2023-01-22 17:10:10.689587: step: 796/459, loss: 0.056254349648952484 2023-01-22 17:10:11.244190: step: 798/459, loss: 0.02437078393995762 2023-01-22 17:10:11.916904: step: 800/459, loss: 0.030443141236901283 2023-01-22 17:10:12.517386: step: 802/459, loss: 0.0015869957860559225 2023-01-22 17:10:13.164496: step: 804/459, loss: 0.06421378254890442 2023-01-22 17:10:13.713241: step: 806/459, loss: 0.024880660697817802 2023-01-22 17:10:14.300526: step: 808/459, loss: 0.013302222825586796 2023-01-22 17:10:14.928876: step: 810/459, loss: 0.06135137379169464 2023-01-22 17:10:15.490351: step: 812/459, loss: 0.031095581129193306 2023-01-22 17:10:16.179753: step: 814/459, loss: 0.09487888216972351 2023-01-22 17:10:16.802268: step: 816/459, loss: 0.03766593709588051 2023-01-22 17:10:17.419316: step: 818/459, loss: 0.05772780254483223 2023-01-22 17:10:17.982821: step: 820/459, loss: 0.04593160003423691 2023-01-22 17:10:18.631046: step: 822/459, loss: 0.0450328066945076 2023-01-22 17:10:19.287304: step: 824/459, loss: 0.019522173330187798 2023-01-22 17:10:19.909781: step: 826/459, loss: 0.018175119534134865 2023-01-22 17:10:20.458274: step: 828/459, loss: 0.04293341562151909 2023-01-22 17:10:21.076933: step: 830/459, loss: 0.07963168621063232 2023-01-22 17:10:21.675860: step: 832/459, loss: 0.10267185419797897 2023-01-22 17:10:22.381696: step: 834/459, loss: 0.06854265183210373 2023-01-22 17:10:22.995030: step: 836/459, loss: 0.07227293401956558 2023-01-22 17:10:23.600731: step: 838/459, loss: 0.10177715122699738 2023-01-22 17:10:24.172251: step: 840/459, loss: 0.14890339970588684 2023-01-22 17:10:24.761534: step: 842/459, loss: 0.03864680230617523 2023-01-22 17:10:25.393711: step: 844/459, loss: 0.01455527450889349 2023-01-22 17:10:26.050151: step: 846/459, loss: 0.11426743865013123 2023-01-22 17:10:26.721775: step: 848/459, loss: 0.041558887809515 2023-01-22 17:10:27.323885: step: 850/459, loss: 0.05419929325580597 2023-01-22 17:10:27.932418: step: 852/459, loss: 0.04745161533355713 2023-01-22 17:10:28.528682: step: 854/459, loss: 0.011593121103942394 2023-01-22 17:10:29.092909: step: 856/459, loss: 0.03135685250163078 2023-01-22 17:10:29.703782: step: 858/459, loss: 0.05189639702439308 2023-01-22 17:10:30.351079: step: 860/459, loss: 0.014372549019753933 2023-01-22 17:10:30.995289: step: 862/459, loss: 0.11032935976982117 2023-01-22 17:10:31.510369: step: 864/459, loss: 0.03464547172188759 2023-01-22 17:10:32.047082: step: 866/459, loss: 0.047763168811798096 2023-01-22 17:10:32.602062: step: 868/459, loss: 0.08189395070075989 2023-01-22 17:10:33.227499: step: 870/459, loss: 0.10312726348638535 2023-01-22 17:10:33.859347: step: 872/459, loss: 0.023095590993762016 2023-01-22 17:10:34.485713: step: 874/459, loss: 1.0420503616333008 2023-01-22 17:10:35.089110: step: 876/459, loss: 0.16925649344921112 2023-01-22 17:10:35.793296: step: 878/459, loss: 0.008748591877520084 2023-01-22 17:10:36.460969: step: 880/459, loss: 0.09279181808233261 2023-01-22 17:10:37.169384: step: 882/459, loss: 0.05507230386137962 2023-01-22 17:10:37.799210: step: 884/459, loss: 0.039823565632104874 2023-01-22 17:10:38.443096: step: 886/459, loss: 0.024265026673674583 2023-01-22 17:10:39.041820: step: 888/459, loss: 0.08550796657800674 2023-01-22 17:10:39.724029: step: 890/459, loss: 0.04712861031293869 2023-01-22 17:10:40.391680: step: 892/459, loss: 0.03831730782985687 2023-01-22 17:10:41.011757: step: 894/459, loss: 0.039274681359529495 2023-01-22 17:10:41.602281: step: 896/459, loss: 0.008581345900893211 2023-01-22 17:10:42.203644: step: 898/459, loss: 0.029534876346588135 2023-01-22 17:10:42.830064: step: 900/459, loss: 0.07328962534666061 2023-01-22 17:10:43.489272: step: 902/459, loss: 0.10432709753513336 2023-01-22 17:10:44.051057: step: 904/459, loss: 0.04279569163918495 2023-01-22 17:10:44.598014: step: 906/459, loss: 0.013411427848041058 2023-01-22 17:10:45.171579: step: 908/459, loss: 0.0063079544343054295 2023-01-22 17:10:45.726717: step: 910/459, loss: 0.02458120696246624 2023-01-22 17:10:46.379978: step: 912/459, loss: 0.09551279991865158 2023-01-22 17:10:46.958583: step: 914/459, loss: 0.014269753359258175 2023-01-22 17:10:47.589646: step: 916/459, loss: 0.012027394957840443 2023-01-22 17:10:48.167579: step: 918/459, loss: 0.04686097428202629 2023-01-22 17:10:48.581218: step: 920/459, loss: 0.0015634888550266623 ================================================== Loss: 0.106 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2977693492169595, 'r': 0.3473975740864528, 'f1': 0.3206746837721103}, 'combined': 0.23628660909523913, 'epoch': 21} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33328485943874636, 'r': 0.3190445063536363, 'f1': 0.3260092494091964}, 'combined': 0.20864591962188567, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.294472977809592, 'r': 0.35414837293388424, 'f1': 0.32156550457281324}, 'combined': 0.23694300336944132, 'epoch': 21} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33869066896262745, 'r': 0.32729834646115724, 'f1': 0.3328970699096376}, 'combined': 0.213054124742168, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3084881671773272, 'r': 0.34997047753639965, 'f1': 0.327922647984417}, 'combined': 0.24162721430430725, 'epoch': 21} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.33389718035955884, 'r': 0.3268966013729865, 'f1': 0.33035980811254373}, 'combined': 0.23686174921276723, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22988505747126434, 'r': 0.38095238095238093, 'f1': 0.2867383512544803}, 'combined': 0.19115890083632017, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22727272727272727, 'r': 0.32608695652173914, 'f1': 0.26785714285714285}, 'combined': 0.13392857142857142, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29411764705882354, 'r': 0.1724137931034483, 'f1': 0.2173913043478261}, 'combined': 0.14492753623188406, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:13:26.788144: step: 2/459, loss: 0.026346124708652496 2023-01-22 17:13:27.423680: step: 4/459, loss: 0.04566538706421852 2023-01-22 17:13:27.992672: step: 6/459, loss: 0.04429998993873596 2023-01-22 17:13:28.664051: step: 8/459, loss: 0.07990197837352753 2023-01-22 17:13:29.279073: step: 10/459, loss: 0.05762810632586479 2023-01-22 17:13:29.908927: step: 12/459, loss: 0.1237635686993599 2023-01-22 17:13:30.514832: step: 14/459, loss: 0.03112717717885971 2023-01-22 17:13:31.057859: step: 16/459, loss: 0.03809545934200287 2023-01-22 17:13:31.627275: step: 18/459, loss: 0.027139581739902496 2023-01-22 17:13:32.220437: step: 20/459, loss: 0.012463323771953583 2023-01-22 17:13:32.858396: step: 22/459, loss: 0.04990972578525543 2023-01-22 17:13:33.428579: step: 24/459, loss: 0.011956119909882545 2023-01-22 17:13:34.005427: step: 26/459, loss: 0.05647145211696625 2023-01-22 17:13:34.636773: step: 28/459, loss: 0.053167011588811874 2023-01-22 17:13:35.192050: step: 30/459, loss: 0.02213391661643982 2023-01-22 17:13:35.802445: step: 32/459, loss: 0.018077930435538292 2023-01-22 17:13:36.401236: step: 34/459, loss: 0.05191947892308235 2023-01-22 17:13:37.058518: step: 36/459, loss: 0.042555008083581924 2023-01-22 17:13:37.632275: step: 38/459, loss: 0.03236010670661926 2023-01-22 17:13:38.181973: step: 40/459, loss: 0.040648721158504486 2023-01-22 17:13:38.815265: step: 42/459, loss: 0.016845980659127235 2023-01-22 17:13:39.425898: step: 44/459, loss: 0.01268764678388834 2023-01-22 17:13:40.169408: step: 46/459, loss: 0.04483975097537041 2023-01-22 17:13:40.754965: step: 48/459, loss: 0.028159640729427338 2023-01-22 17:13:41.340095: step: 50/459, loss: 0.024264821782708168 2023-01-22 17:13:41.896859: step: 52/459, loss: 0.009710555896162987 2023-01-22 17:13:42.572924: step: 54/459, loss: 0.020755840465426445 2023-01-22 17:13:43.233928: step: 56/459, loss: 0.2782325744628906 2023-01-22 17:13:43.858444: step: 58/459, loss: 0.04276804253458977 2023-01-22 17:13:44.512489: step: 60/459, loss: 0.038927242159843445 2023-01-22 17:13:45.126189: step: 62/459, loss: 3.0185964107513428 2023-01-22 17:13:45.803590: step: 64/459, loss: 0.040247175842523575 2023-01-22 17:13:46.400599: step: 66/459, loss: 0.09014195948839188 2023-01-22 17:13:47.101579: step: 68/459, loss: 0.12846770882606506 2023-01-22 17:13:47.708897: step: 70/459, loss: 0.003628802252933383 2023-01-22 17:13:48.355898: step: 72/459, loss: 0.025929564610123634 2023-01-22 17:13:49.000551: step: 74/459, loss: 0.023532623425126076 2023-01-22 17:13:49.639844: step: 76/459, loss: 0.11495717614889145 2023-01-22 17:13:50.273376: step: 78/459, loss: 0.070429228246212 2023-01-22 17:13:50.843297: step: 80/459, loss: 0.028058044612407684 2023-01-22 17:13:51.504846: step: 82/459, loss: 0.04712716117501259 2023-01-22 17:13:52.158617: step: 84/459, loss: 0.05399499833583832 2023-01-22 17:13:52.803701: step: 86/459, loss: 0.006845901254564524 2023-01-22 17:13:53.555890: step: 88/459, loss: 0.007844832725822926 2023-01-22 17:13:54.107010: step: 90/459, loss: 0.016264736652374268 2023-01-22 17:13:54.681898: step: 92/459, loss: 0.019178710877895355 2023-01-22 17:13:55.307505: step: 94/459, loss: 0.17619189620018005 2023-01-22 17:13:55.888985: step: 96/459, loss: 0.003245798172429204 2023-01-22 17:13:56.475516: step: 98/459, loss: 0.0022701474372297525 2023-01-22 17:13:57.116277: step: 100/459, loss: 0.0348513200879097 2023-01-22 17:13:57.773055: step: 102/459, loss: 0.07891477644443512 2023-01-22 17:13:58.452020: step: 104/459, loss: 0.0987682193517685 2023-01-22 17:13:59.038374: step: 106/459, loss: 0.0024048772174865007 2023-01-22 17:13:59.606876: step: 108/459, loss: 0.034079693257808685 2023-01-22 17:14:00.248494: step: 110/459, loss: 0.09490132331848145 2023-01-22 17:14:00.879944: step: 112/459, loss: 0.02208867110311985 2023-01-22 17:14:01.425170: step: 114/459, loss: 0.03762305900454521 2023-01-22 17:14:02.001322: step: 116/459, loss: 0.07453132420778275 2023-01-22 17:14:02.623773: step: 118/459, loss: 0.02698238007724285 2023-01-22 17:14:03.181947: step: 120/459, loss: 0.006639184430241585 2023-01-22 17:14:03.763608: step: 122/459, loss: 0.011093111708760262 2023-01-22 17:14:04.342217: step: 124/459, loss: 0.04782804101705551 2023-01-22 17:14:04.943788: step: 126/459, loss: 0.03525000438094139 2023-01-22 17:14:05.536719: step: 128/459, loss: 0.017497297376394272 2023-01-22 17:14:06.151024: step: 130/459, loss: 0.005246711429208517 2023-01-22 17:14:06.894469: step: 132/459, loss: 0.034949760884046555 2023-01-22 17:14:07.508907: step: 134/459, loss: 0.013458567671477795 2023-01-22 17:14:08.134508: step: 136/459, loss: 0.03815441578626633 2023-01-22 17:14:08.748464: step: 138/459, loss: 0.011080188676714897 2023-01-22 17:14:09.270829: step: 140/459, loss: 0.021171117201447487 2023-01-22 17:14:09.870576: step: 142/459, loss: 0.005734983831644058 2023-01-22 17:14:10.518218: step: 144/459, loss: 0.019338665530085564 2023-01-22 17:14:11.164094: step: 146/459, loss: 0.041768040508031845 2023-01-22 17:14:11.791279: step: 148/459, loss: 0.24233973026275635 2023-01-22 17:14:12.378770: step: 150/459, loss: 0.28781014680862427 2023-01-22 17:14:12.937271: step: 152/459, loss: 0.1669972985982895 2023-01-22 17:14:13.533463: step: 154/459, loss: 0.018051104620099068 2023-01-22 17:14:14.087695: step: 156/459, loss: 0.10977909713983536 2023-01-22 17:14:14.722574: step: 158/459, loss: 0.018279284238815308 2023-01-22 17:14:15.361204: step: 160/459, loss: 0.1925724893808365 2023-01-22 17:14:15.963888: step: 162/459, loss: 0.02487138658761978 2023-01-22 17:14:16.645203: step: 164/459, loss: 0.024437466636300087 2023-01-22 17:14:17.242231: step: 166/459, loss: 0.0772494450211525 2023-01-22 17:14:17.895781: step: 168/459, loss: 0.03844528645277023 2023-01-22 17:14:18.531873: step: 170/459, loss: 0.016046294942498207 2023-01-22 17:14:19.157851: step: 172/459, loss: 0.8894335627555847 2023-01-22 17:14:19.764708: step: 174/459, loss: 0.037680260837078094 2023-01-22 17:14:20.424977: step: 176/459, loss: 0.03921224921941757 2023-01-22 17:14:21.088331: step: 178/459, loss: 0.021351337432861328 2023-01-22 17:14:21.707650: step: 180/459, loss: 0.05164683237671852 2023-01-22 17:14:22.356833: step: 182/459, loss: 0.12373287230730057 2023-01-22 17:14:22.972813: step: 184/459, loss: 0.7750673890113831 2023-01-22 17:14:23.605774: step: 186/459, loss: 0.017808176577091217 2023-01-22 17:14:24.202473: step: 188/459, loss: 0.023830890655517578 2023-01-22 17:14:24.804762: step: 190/459, loss: 0.0025184545665979385 2023-01-22 17:14:25.396811: step: 192/459, loss: 0.06932085752487183 2023-01-22 17:14:26.049570: step: 194/459, loss: 0.03303677961230278 2023-01-22 17:14:26.680634: step: 196/459, loss: 0.010136366821825504 2023-01-22 17:14:27.344652: step: 198/459, loss: 0.009977549314498901 2023-01-22 17:14:27.983971: step: 200/459, loss: 0.0572221614420414 2023-01-22 17:14:28.585035: step: 202/459, loss: 0.08027958869934082 2023-01-22 17:14:29.211205: step: 204/459, loss: 0.056547604501247406 2023-01-22 17:14:29.853510: step: 206/459, loss: 0.012737813405692577 2023-01-22 17:14:30.378059: step: 208/459, loss: 0.04908161237835884 2023-01-22 17:14:30.953782: step: 210/459, loss: 0.00400513457134366 2023-01-22 17:14:31.605382: step: 212/459, loss: 0.04712236300110817 2023-01-22 17:14:32.228836: step: 214/459, loss: 0.050320953130722046 2023-01-22 17:14:32.867222: step: 216/459, loss: 1.3865139484405518 2023-01-22 17:14:33.462717: step: 218/459, loss: 0.029726970940828323 2023-01-22 17:14:34.131761: step: 220/459, loss: 0.08438760787248611 2023-01-22 17:14:34.838000: step: 222/459, loss: 0.2955694794654846 2023-01-22 17:14:35.451719: step: 224/459, loss: 0.017496079206466675 2023-01-22 17:14:36.136213: step: 226/459, loss: 0.02155141718685627 2023-01-22 17:14:36.710225: step: 228/459, loss: 0.017025133594870567 2023-01-22 17:14:37.366691: step: 230/459, loss: 0.002616557292640209 2023-01-22 17:14:38.044835: step: 232/459, loss: 0.02329319715499878 2023-01-22 17:14:38.657118: step: 234/459, loss: 0.10638178884983063 2023-01-22 17:14:39.265735: step: 236/459, loss: 0.017231764271855354 2023-01-22 17:14:39.844486: step: 238/459, loss: 0.0571727529168129 2023-01-22 17:14:40.449207: step: 240/459, loss: 0.021477974951267242 2023-01-22 17:14:41.118474: step: 242/459, loss: 0.043130747973918915 2023-01-22 17:14:41.733926: step: 244/459, loss: 0.029137980192899704 2023-01-22 17:14:42.442519: step: 246/459, loss: 0.2126033455133438 2023-01-22 17:14:43.113391: step: 248/459, loss: 0.004861151333898306 2023-01-22 17:14:43.723476: step: 250/459, loss: 0.01700451783835888 2023-01-22 17:14:44.352056: step: 252/459, loss: 0.06181621924042702 2023-01-22 17:14:44.953482: step: 254/459, loss: 0.035560593008995056 2023-01-22 17:14:45.564808: step: 256/459, loss: 0.09881936013698578 2023-01-22 17:14:46.310288: step: 258/459, loss: 0.057163745164871216 2023-01-22 17:14:46.952766: step: 260/459, loss: 0.012476208619773388 2023-01-22 17:14:47.611549: step: 262/459, loss: 0.07921157777309418 2023-01-22 17:14:48.335358: step: 264/459, loss: 0.06812220811843872 2023-01-22 17:14:48.998477: step: 266/459, loss: 0.24388806521892548 2023-01-22 17:14:49.633758: step: 268/459, loss: 0.009056903421878815 2023-01-22 17:14:50.249359: step: 270/459, loss: 0.02041330747306347 2023-01-22 17:14:50.814744: step: 272/459, loss: 0.23191604018211365 2023-01-22 17:14:51.414039: step: 274/459, loss: 0.0440036877989769 2023-01-22 17:14:52.061231: step: 276/459, loss: 0.006307473871856928 2023-01-22 17:14:52.681281: step: 278/459, loss: 0.016738813370466232 2023-01-22 17:14:53.335849: step: 280/459, loss: 0.016178026795387268 2023-01-22 17:14:53.954646: step: 282/459, loss: 0.05966642126441002 2023-01-22 17:14:54.579966: step: 284/459, loss: 1.5185487270355225 2023-01-22 17:14:55.294229: step: 286/459, loss: 0.014358755201101303 2023-01-22 17:14:55.844798: step: 288/459, loss: 0.025959115475416183 2023-01-22 17:14:56.476462: step: 290/459, loss: 0.05645442754030228 2023-01-22 17:14:57.135596: step: 292/459, loss: 0.012365003116428852 2023-01-22 17:14:57.779142: step: 294/459, loss: 0.04068297520279884 2023-01-22 17:14:58.350360: step: 296/459, loss: 0.004662307444959879 2023-01-22 17:14:58.949992: step: 298/459, loss: 0.22195199131965637 2023-01-22 17:14:59.564511: step: 300/459, loss: 0.06065337359905243 2023-01-22 17:15:00.228569: step: 302/459, loss: 0.36951401829719543 2023-01-22 17:15:00.828775: step: 304/459, loss: 0.04873509705066681 2023-01-22 17:15:01.417759: step: 306/459, loss: 0.01951543428003788 2023-01-22 17:15:01.990425: step: 308/459, loss: 0.014267228543758392 2023-01-22 17:15:02.571461: step: 310/459, loss: 0.22016148269176483 2023-01-22 17:15:03.212274: step: 312/459, loss: 0.01823374629020691 2023-01-22 17:15:03.797080: step: 314/459, loss: 0.04538235068321228 2023-01-22 17:15:04.517558: step: 316/459, loss: 0.07549505680799484 2023-01-22 17:15:05.069260: step: 318/459, loss: 0.00487141590565443 2023-01-22 17:15:05.657037: step: 320/459, loss: 0.2394665777683258 2023-01-22 17:15:06.272735: step: 322/459, loss: 0.2735185921192169 2023-01-22 17:15:06.837002: step: 324/459, loss: 0.0031948573887348175 2023-01-22 17:15:07.479071: step: 326/459, loss: 0.006490461062639952 2023-01-22 17:15:08.110920: step: 328/459, loss: 0.010079638101160526 2023-01-22 17:15:08.705170: step: 330/459, loss: 0.013745789416134357 2023-01-22 17:15:09.346371: step: 332/459, loss: 0.03473980724811554 2023-01-22 17:15:10.013535: step: 334/459, loss: 0.06368331611156464 2023-01-22 17:15:10.609378: step: 336/459, loss: 0.010316764004528522 2023-01-22 17:15:11.320004: step: 338/459, loss: 0.05769922211766243 2023-01-22 17:15:11.928517: step: 340/459, loss: 0.025059735402464867 2023-01-22 17:15:12.557880: step: 342/459, loss: 0.08455561101436615 2023-01-22 17:15:13.255207: step: 344/459, loss: 0.04761756211519241 2023-01-22 17:15:13.951248: step: 346/459, loss: 0.005555951036512852 2023-01-22 17:15:14.591039: step: 348/459, loss: 0.007528517860919237 2023-01-22 17:15:15.134473: step: 350/459, loss: 0.054692238569259644 2023-01-22 17:15:15.709318: step: 352/459, loss: 0.02567819319665432 2023-01-22 17:15:16.373857: step: 354/459, loss: 0.036692749708890915 2023-01-22 17:15:16.930826: step: 356/459, loss: 0.05592750757932663 2023-01-22 17:15:17.541073: step: 358/459, loss: 0.06382086873054504 2023-01-22 17:15:18.207087: step: 360/459, loss: 0.14979341626167297 2023-01-22 17:15:18.832374: step: 362/459, loss: 0.04784433916211128 2023-01-22 17:15:19.493875: step: 364/459, loss: 0.014615454711019993 2023-01-22 17:15:20.176634: step: 366/459, loss: 0.019627267494797707 2023-01-22 17:15:20.798152: step: 368/459, loss: 0.015010179951786995 2023-01-22 17:15:21.384466: step: 370/459, loss: 0.0045725577510893345 2023-01-22 17:15:21.991586: step: 372/459, loss: 0.019586579874157906 2023-01-22 17:15:22.605431: step: 374/459, loss: 0.015141193754971027 2023-01-22 17:15:23.190962: step: 376/459, loss: 0.01417064294219017 2023-01-22 17:15:23.848295: step: 378/459, loss: 0.037457335740327835 2023-01-22 17:15:24.423705: step: 380/459, loss: 0.03424796462059021 2023-01-22 17:15:25.062681: step: 382/459, loss: 0.011495736427605152 2023-01-22 17:15:25.685600: step: 384/459, loss: 0.008041814900934696 2023-01-22 17:15:26.238132: step: 386/459, loss: 0.35848289728164673 2023-01-22 17:15:26.871583: step: 388/459, loss: 0.017756387591362 2023-01-22 17:15:27.455454: step: 390/459, loss: 0.028421053662896156 2023-01-22 17:15:28.046141: step: 392/459, loss: 0.05287051945924759 2023-01-22 17:15:28.652228: step: 394/459, loss: 0.08868866413831711 2023-01-22 17:15:29.271047: step: 396/459, loss: 0.025552693754434586 2023-01-22 17:15:29.953818: step: 398/459, loss: 0.026343677192926407 2023-01-22 17:15:30.544334: step: 400/459, loss: 0.019448967650532722 2023-01-22 17:15:31.141370: step: 402/459, loss: 0.1506904810667038 2023-01-22 17:15:31.760744: step: 404/459, loss: 0.02461448311805725 2023-01-22 17:15:32.399491: step: 406/459, loss: 0.31263285875320435 2023-01-22 17:15:33.058406: step: 408/459, loss: 0.0336935818195343 2023-01-22 17:15:33.679322: step: 410/459, loss: 0.03369084745645523 2023-01-22 17:15:34.225048: step: 412/459, loss: 0.004112513270229101 2023-01-22 17:15:34.862405: step: 414/459, loss: 0.06522492319345474 2023-01-22 17:15:35.468272: step: 416/459, loss: 0.03770206868648529 2023-01-22 17:15:36.104555: step: 418/459, loss: 0.015811465680599213 2023-01-22 17:15:36.724109: step: 420/459, loss: 0.07692104578018188 2023-01-22 17:15:37.372513: step: 422/459, loss: 0.052361391484737396 2023-01-22 17:15:37.984688: step: 424/459, loss: 0.0751284658908844 2023-01-22 17:15:38.508058: step: 426/459, loss: 0.010947153903543949 2023-01-22 17:15:39.110759: step: 428/459, loss: 0.09933919459581375 2023-01-22 17:15:39.675693: step: 430/459, loss: 0.018033349886536598 2023-01-22 17:15:40.251137: step: 432/459, loss: 0.009221258573234081 2023-01-22 17:15:40.807943: step: 434/459, loss: 0.0353260263800621 2023-01-22 17:15:41.463150: step: 436/459, loss: 0.0635896846652031 2023-01-22 17:15:42.113131: step: 438/459, loss: 0.02773154154419899 2023-01-22 17:15:42.730887: step: 440/459, loss: 0.02767075039446354 2023-01-22 17:15:43.281650: step: 442/459, loss: 0.21961349248886108 2023-01-22 17:15:43.921865: step: 444/459, loss: 0.06725914776325226 2023-01-22 17:15:44.557034: step: 446/459, loss: 0.05583101883530617 2023-01-22 17:15:45.137389: step: 448/459, loss: 0.0453258715569973 2023-01-22 17:15:45.763468: step: 450/459, loss: 0.10601557791233063 2023-01-22 17:15:46.344015: step: 452/459, loss: 0.0013704823795706034 2023-01-22 17:15:46.969986: step: 454/459, loss: 0.036667827516794205 2023-01-22 17:15:47.601048: step: 456/459, loss: 0.06447137892246246 2023-01-22 17:15:48.239432: step: 458/459, loss: 0.026300624012947083 2023-01-22 17:15:48.809516: step: 460/459, loss: 1.94223952293396 2023-01-22 17:15:49.458756: step: 462/459, loss: 0.0700061172246933 2023-01-22 17:15:50.069807: step: 464/459, loss: 0.02833566628396511 2023-01-22 17:15:50.737723: step: 466/459, loss: 0.009097084403038025 2023-01-22 17:15:51.361843: step: 468/459, loss: 0.04847263917326927 2023-01-22 17:15:52.007351: step: 470/459, loss: 0.006385414395481348 2023-01-22 17:15:52.622641: step: 472/459, loss: 0.034837279468774796 2023-01-22 17:15:53.200525: step: 474/459, loss: 0.0013880339683964849 2023-01-22 17:15:53.781306: step: 476/459, loss: 0.03244774043560028 2023-01-22 17:15:54.352201: step: 478/459, loss: 0.01286572590470314 2023-01-22 17:15:54.947507: step: 480/459, loss: 0.021153034642338753 2023-01-22 17:15:55.495618: step: 482/459, loss: 0.03628551587462425 2023-01-22 17:15:56.069449: step: 484/459, loss: 0.030607879161834717 2023-01-22 17:15:56.699846: step: 486/459, loss: 0.03087986633181572 2023-01-22 17:15:57.324719: step: 488/459, loss: 0.4994218945503235 2023-01-22 17:15:57.916001: step: 490/459, loss: 0.04619431495666504 2023-01-22 17:15:58.597894: step: 492/459, loss: 0.19139990210533142 2023-01-22 17:15:59.209672: step: 494/459, loss: 0.6031819581985474 2023-01-22 17:15:59.854779: step: 496/459, loss: 0.004484820645302534 2023-01-22 17:16:00.546694: step: 498/459, loss: 0.054506346583366394 2023-01-22 17:16:01.143558: step: 500/459, loss: 0.3393665850162506 2023-01-22 17:16:01.898678: step: 502/459, loss: 0.012491557747125626 2023-01-22 17:16:02.570752: step: 504/459, loss: 0.0742330253124237 2023-01-22 17:16:03.237397: step: 506/459, loss: 0.06560003012418747 2023-01-22 17:16:03.833035: step: 508/459, loss: 0.005259087309241295 2023-01-22 17:16:04.449445: step: 510/459, loss: 0.06130737066268921 2023-01-22 17:16:05.002441: step: 512/459, loss: 0.06752491742372513 2023-01-22 17:16:05.619612: step: 514/459, loss: 0.06485892832279205 2023-01-22 17:16:06.279512: step: 516/459, loss: 0.007411536760628223 2023-01-22 17:16:06.881281: step: 518/459, loss: 0.06229359656572342 2023-01-22 17:16:07.485370: step: 520/459, loss: 0.009851649403572083 2023-01-22 17:16:08.167771: step: 522/459, loss: 0.02347351796925068 2023-01-22 17:16:08.801811: step: 524/459, loss: 0.07585801184177399 2023-01-22 17:16:09.399220: step: 526/459, loss: 0.022511741146445274 2023-01-22 17:16:10.024844: step: 528/459, loss: 0.03910200297832489 2023-01-22 17:16:10.639784: step: 530/459, loss: 0.042918335646390915 2023-01-22 17:16:11.255728: step: 532/459, loss: 0.030814552679657936 2023-01-22 17:16:11.834957: step: 534/459, loss: 0.022115349769592285 2023-01-22 17:16:12.434991: step: 536/459, loss: 0.8133385181427002 2023-01-22 17:16:13.095391: step: 538/459, loss: 0.04520424082875252 2023-01-22 17:16:13.854628: step: 540/459, loss: 0.013294505886733532 2023-01-22 17:16:14.527792: step: 542/459, loss: 0.027102535590529442 2023-01-22 17:16:15.189550: step: 544/459, loss: 0.03001679852604866 2023-01-22 17:16:15.802835: step: 546/459, loss: 0.0172171238809824 2023-01-22 17:16:16.400614: step: 548/459, loss: 0.0749741643667221 2023-01-22 17:16:17.070370: step: 550/459, loss: 0.00439429609104991 2023-01-22 17:16:17.733645: step: 552/459, loss: 0.01771623082458973 2023-01-22 17:16:18.290619: step: 554/459, loss: 0.0068047381937503815 2023-01-22 17:16:18.897744: step: 556/459, loss: 0.029301568865776062 2023-01-22 17:16:19.525523: step: 558/459, loss: 0.013697724789381027 2023-01-22 17:16:20.144897: step: 560/459, loss: 0.010710817761719227 2023-01-22 17:16:20.773923: step: 562/459, loss: 0.13912881910800934 2023-01-22 17:16:21.452401: step: 564/459, loss: 0.03787193074822426 2023-01-22 17:16:22.053981: step: 566/459, loss: 0.18330077826976776 2023-01-22 17:16:22.629846: step: 568/459, loss: 0.019763192161917686 2023-01-22 17:16:23.258545: step: 570/459, loss: 0.1068534404039383 2023-01-22 17:16:23.854421: step: 572/459, loss: 0.07377493381500244 2023-01-22 17:16:24.527668: step: 574/459, loss: 0.16739703714847565 2023-01-22 17:16:25.131157: step: 576/459, loss: 0.015127421356737614 2023-01-22 17:16:25.687669: step: 578/459, loss: 0.023985834792256355 2023-01-22 17:16:26.368521: step: 580/459, loss: 0.20785550773143768 2023-01-22 17:16:26.996543: step: 582/459, loss: 0.020923703908920288 2023-01-22 17:16:27.640008: step: 584/459, loss: 0.012601137161254883 2023-01-22 17:16:28.321529: step: 586/459, loss: 0.0504632294178009 2023-01-22 17:16:28.938436: step: 588/459, loss: 0.010526206344366074 2023-01-22 17:16:29.607751: step: 590/459, loss: 0.08760897815227509 2023-01-22 17:16:30.272929: step: 592/459, loss: 0.0306037999689579 2023-01-22 17:16:30.845129: step: 594/459, loss: 0.013976583257317543 2023-01-22 17:16:31.441638: step: 596/459, loss: 0.004304125439375639 2023-01-22 17:16:32.068609: step: 598/459, loss: 0.010031676851212978 2023-01-22 17:16:32.642492: step: 600/459, loss: 0.022077931091189384 2023-01-22 17:16:33.271169: step: 602/459, loss: 0.05231672525405884 2023-01-22 17:16:33.805030: step: 604/459, loss: 0.008731868118047714 2023-01-22 17:16:34.401865: step: 606/459, loss: 0.020286159589886665 2023-01-22 17:16:34.967859: step: 608/459, loss: 0.015759943053126335 2023-01-22 17:16:35.671505: step: 610/459, loss: 0.00947621464729309 2023-01-22 17:16:36.262602: step: 612/459, loss: 0.08187827467918396 2023-01-22 17:16:36.953059: step: 614/459, loss: 0.06620460748672485 2023-01-22 17:16:37.590516: step: 616/459, loss: 0.04332459717988968 2023-01-22 17:16:38.188526: step: 618/459, loss: 0.019241228699684143 2023-01-22 17:16:38.854315: step: 620/459, loss: 0.028191443532705307 2023-01-22 17:16:39.456885: step: 622/459, loss: 0.01999499276280403 2023-01-22 17:16:40.079618: step: 624/459, loss: 0.0038684457540512085 2023-01-22 17:16:40.693670: step: 626/459, loss: 0.0034544796217232943 2023-01-22 17:16:41.309850: step: 628/459, loss: 0.013194660656154156 2023-01-22 17:16:41.941421: step: 630/459, loss: 0.08757342398166656 2023-01-22 17:16:42.505756: step: 632/459, loss: 0.034149300307035446 2023-01-22 17:16:43.116713: step: 634/459, loss: 0.029539519920945168 2023-01-22 17:16:43.702700: step: 636/459, loss: 0.34492895007133484 2023-01-22 17:16:44.310944: step: 638/459, loss: 0.1164543479681015 2023-01-22 17:16:44.952801: step: 640/459, loss: 0.01934254728257656 2023-01-22 17:16:45.557471: step: 642/459, loss: 0.04128773882985115 2023-01-22 17:16:46.183141: step: 644/459, loss: 0.026061244308948517 2023-01-22 17:16:46.801980: step: 646/459, loss: 0.05815793573856354 2023-01-22 17:16:47.454335: step: 648/459, loss: 0.04567207023501396 2023-01-22 17:16:48.113984: step: 650/459, loss: 0.0236348956823349 2023-01-22 17:16:48.743947: step: 652/459, loss: 0.02609976753592491 2023-01-22 17:16:49.371042: step: 654/459, loss: 0.007761191576719284 2023-01-22 17:16:49.980536: step: 656/459, loss: 0.049124348908662796 2023-01-22 17:16:50.677035: step: 658/459, loss: 0.33483803272247314 2023-01-22 17:16:51.337672: step: 660/459, loss: 0.05216114968061447 2023-01-22 17:16:51.916167: step: 662/459, loss: 0.03311688452959061 2023-01-22 17:16:52.550591: step: 664/459, loss: 0.06010989099740982 2023-01-22 17:16:53.212856: step: 666/459, loss: 0.02707153931260109 2023-01-22 17:16:53.826996: step: 668/459, loss: 0.06278768181800842 2023-01-22 17:16:54.457062: step: 670/459, loss: 0.1821589320898056 2023-01-22 17:16:55.089932: step: 672/459, loss: 0.04274550452828407 2023-01-22 17:16:55.792362: step: 674/459, loss: 0.02857544831931591 2023-01-22 17:16:56.386352: step: 676/459, loss: 0.024565724655985832 2023-01-22 17:16:57.034437: step: 678/459, loss: 0.024261239916086197 2023-01-22 17:16:57.713787: step: 680/459, loss: 0.06549413502216339 2023-01-22 17:16:58.303285: step: 682/459, loss: 0.016888564452528954 2023-01-22 17:16:58.941514: step: 684/459, loss: 0.241699680685997 2023-01-22 17:16:59.547317: step: 686/459, loss: 0.13715356588363647 2023-01-22 17:17:00.199903: step: 688/459, loss: 0.047702450305223465 2023-01-22 17:17:00.903853: step: 690/459, loss: 0.10275678336620331 2023-01-22 17:17:01.550962: step: 692/459, loss: 0.0312688872218132 2023-01-22 17:17:02.183553: step: 694/459, loss: 0.028700659051537514 2023-01-22 17:17:02.872634: step: 696/459, loss: 0.008833990432322025 2023-01-22 17:17:03.492467: step: 698/459, loss: 0.015873488038778305 2023-01-22 17:17:04.185151: step: 700/459, loss: 0.025503940880298615 2023-01-22 17:17:04.758768: step: 702/459, loss: 0.04091525822877884 2023-01-22 17:17:05.396209: step: 704/459, loss: 0.39028283953666687 2023-01-22 17:17:06.024402: step: 706/459, loss: 0.0745469406247139 2023-01-22 17:17:06.615256: step: 708/459, loss: 0.01639268361032009 2023-01-22 17:17:07.188566: step: 710/459, loss: 0.035337310284376144 2023-01-22 17:17:07.869421: step: 712/459, loss: 5.940037250518799 2023-01-22 17:17:08.511419: step: 714/459, loss: 0.01097420509904623 2023-01-22 17:17:09.160768: step: 716/459, loss: 0.025968121364712715 2023-01-22 17:17:09.777061: step: 718/459, loss: 0.02698172815144062 2023-01-22 17:17:10.401137: step: 720/459, loss: 0.029203681275248528 2023-01-22 17:17:10.990069: step: 722/459, loss: 0.017215857282280922 2023-01-22 17:17:11.586853: step: 724/459, loss: 0.024809319525957108 2023-01-22 17:17:12.162691: step: 726/459, loss: 0.06200660020112991 2023-01-22 17:17:12.718887: step: 728/459, loss: 0.8759531378746033 2023-01-22 17:17:13.308385: step: 730/459, loss: 0.011805787682533264 2023-01-22 17:17:13.928901: step: 732/459, loss: 0.08634456992149353 2023-01-22 17:17:14.558161: step: 734/459, loss: 0.12635862827301025 2023-01-22 17:17:15.163008: step: 736/459, loss: 0.0003686492855194956 2023-01-22 17:17:15.738552: step: 738/459, loss: 0.02627079002559185 2023-01-22 17:17:16.367384: step: 740/459, loss: 0.023761678487062454 2023-01-22 17:17:17.022209: step: 742/459, loss: 0.1037098839879036 2023-01-22 17:17:17.625510: step: 744/459, loss: 0.049269113689661026 2023-01-22 17:17:18.239704: step: 746/459, loss: 0.17002254724502563 2023-01-22 17:17:18.888334: step: 748/459, loss: 0.16266314685344696 2023-01-22 17:17:19.606544: step: 750/459, loss: 0.16009916365146637 2023-01-22 17:17:20.214058: step: 752/459, loss: 0.7774741053581238 2023-01-22 17:17:20.862182: step: 754/459, loss: 0.04822846129536629 2023-01-22 17:17:21.481697: step: 756/459, loss: 0.05717698112130165 2023-01-22 17:17:22.031615: step: 758/459, loss: 0.033003583550453186 2023-01-22 17:17:22.611987: step: 760/459, loss: 0.0905524343252182 2023-01-22 17:17:23.168704: step: 762/459, loss: 0.048433102667331696 2023-01-22 17:17:23.734081: step: 764/459, loss: 0.03837824612855911 2023-01-22 17:17:24.333452: step: 766/459, loss: 0.06432680040597916 2023-01-22 17:17:24.967627: step: 768/459, loss: 0.05403800308704376 2023-01-22 17:17:25.631185: step: 770/459, loss: 0.21634043753147125 2023-01-22 17:17:26.160814: step: 772/459, loss: 0.036848895251750946 2023-01-22 17:17:26.783017: step: 774/459, loss: 0.05508994683623314 2023-01-22 17:17:27.348018: step: 776/459, loss: 0.007275050971657038 2023-01-22 17:17:27.978656: step: 778/459, loss: 0.012403182685375214 2023-01-22 17:17:28.637472: step: 780/459, loss: 0.10270638018846512 2023-01-22 17:17:29.241833: step: 782/459, loss: 0.0770593136548996 2023-01-22 17:17:29.892818: step: 784/459, loss: 0.014948186464607716 2023-01-22 17:17:30.455345: step: 786/459, loss: 0.05195844545960426 2023-01-22 17:17:31.132310: step: 788/459, loss: 0.16843312978744507 2023-01-22 17:17:31.794406: step: 790/459, loss: 0.2573939263820648 2023-01-22 17:17:32.404804: step: 792/459, loss: 0.18907125294208527 2023-01-22 17:17:33.086107: step: 794/459, loss: 0.17564135789871216 2023-01-22 17:17:33.700780: step: 796/459, loss: 0.05086379498243332 2023-01-22 17:17:34.294788: step: 798/459, loss: 0.01736379973590374 2023-01-22 17:17:34.943825: step: 800/459, loss: 0.02694394625723362 2023-01-22 17:17:35.510616: step: 802/459, loss: 0.037567608058452606 2023-01-22 17:17:36.138103: step: 804/459, loss: 0.005208540242165327 2023-01-22 17:17:36.791016: step: 806/459, loss: 0.19682230055332184 2023-01-22 17:17:37.449850: step: 808/459, loss: 0.02230658195912838 2023-01-22 17:17:38.052268: step: 810/459, loss: 0.04600980132818222 2023-01-22 17:17:38.630006: step: 812/459, loss: 0.7404766082763672 2023-01-22 17:17:39.252465: step: 814/459, loss: 0.018147923052310944 2023-01-22 17:17:39.919524: step: 816/459, loss: 0.029244672507047653 2023-01-22 17:17:40.546137: step: 818/459, loss: 0.3575712740421295 2023-01-22 17:17:41.140865: step: 820/459, loss: 0.09043259918689728 2023-01-22 17:17:41.708848: step: 822/459, loss: 0.004887314047664404 2023-01-22 17:17:42.355919: step: 824/459, loss: 0.025286555290222168 2023-01-22 17:17:42.943974: step: 826/459, loss: 0.02576921135187149 2023-01-22 17:17:43.582018: step: 828/459, loss: 0.03326186165213585 2023-01-22 17:17:44.232861: step: 830/459, loss: 0.1284000426530838 2023-01-22 17:17:44.908419: step: 832/459, loss: 0.19677087664604187 2023-01-22 17:17:45.591520: step: 834/459, loss: 0.053813811391592026 2023-01-22 17:17:46.206349: step: 836/459, loss: 0.11758706718683243 2023-01-22 17:17:46.847756: step: 838/459, loss: 0.6713539361953735 2023-01-22 17:17:47.442190: step: 840/459, loss: 0.2209496945142746 2023-01-22 17:17:48.051843: step: 842/459, loss: 0.06985381245613098 2023-01-22 17:17:48.645769: step: 844/459, loss: 0.06685090810060501 2023-01-22 17:17:49.257692: step: 846/459, loss: 0.025711989030241966 2023-01-22 17:17:49.938734: step: 848/459, loss: 0.039702024310827255 2023-01-22 17:17:50.561553: step: 850/459, loss: 0.05915825814008713 2023-01-22 17:17:51.130653: step: 852/459, loss: 0.003894324880093336 2023-01-22 17:17:51.756909: step: 854/459, loss: 0.07864288240671158 2023-01-22 17:17:52.338022: step: 856/459, loss: 0.11727706342935562 2023-01-22 17:17:52.954150: step: 858/459, loss: 0.33039039373397827 2023-01-22 17:17:53.587604: step: 860/459, loss: 0.21867568790912628 2023-01-22 17:17:54.222299: step: 862/459, loss: 0.03777242824435234 2023-01-22 17:17:54.813714: step: 864/459, loss: 0.028931815177202225 2023-01-22 17:17:55.457317: step: 866/459, loss: 0.05200423672795296 2023-01-22 17:17:56.051735: step: 868/459, loss: 0.030135739594697952 2023-01-22 17:17:56.762599: step: 870/459, loss: 0.005960527807474136 2023-01-22 17:17:57.355238: step: 872/459, loss: 0.006475598085671663 2023-01-22 17:17:57.897998: step: 874/459, loss: 0.22794947028160095 2023-01-22 17:17:58.485195: step: 876/459, loss: 0.023363526910543442 2023-01-22 17:17:59.105544: step: 878/459, loss: 0.022140292450785637 2023-01-22 17:17:59.791532: step: 880/459, loss: 0.0012999719474464655 2023-01-22 17:18:00.412069: step: 882/459, loss: 0.02847246453166008 2023-01-22 17:18:01.041076: step: 884/459, loss: 0.10185854136943817 2023-01-22 17:18:01.598173: step: 886/459, loss: 0.07725100964307785 2023-01-22 17:18:02.310424: step: 888/459, loss: 0.03612612932920456 2023-01-22 17:18:02.919076: step: 890/459, loss: 0.03800543397665024 2023-01-22 17:18:03.547045: step: 892/459, loss: 0.010089004412293434 2023-01-22 17:18:04.243757: step: 894/459, loss: 0.07320858538150787 2023-01-22 17:18:04.874470: step: 896/459, loss: 0.04084397479891777 2023-01-22 17:18:05.455210: step: 898/459, loss: 0.012818755581974983 2023-01-22 17:18:06.147146: step: 900/459, loss: 0.05754971504211426 2023-01-22 17:18:06.722106: step: 902/459, loss: 0.014546235091984272 2023-01-22 17:18:07.425522: step: 904/459, loss: 0.0433381088078022 2023-01-22 17:18:08.055323: step: 906/459, loss: 0.10205716639757156 2023-01-22 17:18:08.695303: step: 908/459, loss: 0.06716811656951904 2023-01-22 17:18:09.328649: step: 910/459, loss: 0.06528419256210327 2023-01-22 17:18:09.923644: step: 912/459, loss: 0.012115002609789371 2023-01-22 17:18:10.514092: step: 914/459, loss: 0.18304064869880676 2023-01-22 17:18:11.193738: step: 916/459, loss: 0.031166477128863335 2023-01-22 17:18:11.820716: step: 918/459, loss: 0.04557126760482788 2023-01-22 17:18:12.245048: step: 920/459, loss: 0.00010487787221791223 ================================================== Loss: 0.101 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3134982701807599, 'r': 0.33134447151932306, 'f1': 0.3221744215695263}, 'combined': 0.2373916790512299, 'epoch': 22} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3311319082448348, 'r': 0.32179151774839265, 'f1': 0.32639490355836037}, 'combined': 0.2088927382773506, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3085316828530489, 'r': 0.33429144385026743, 'f1': 0.32089542970690516}, 'combined': 0.23644926399456168, 'epoch': 22} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33038242748816987, 'r': 0.3258772125678766, 'f1': 0.32811435590358284}, 'combined': 0.20999318777829298, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32612066166128206, 'r': 0.3360218582202584, 'f1': 0.3309972323029461}, 'combined': 0.24389269748638132, 'epoch': 22} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3271180418217118, 'r': 0.3357656473028692, 'f1': 0.3313854386785852}, 'combined': 0.23759710697709885, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.259963768115942, 'r': 0.3416666666666666, 'f1': 0.2952674897119341}, 'combined': 0.1968449931412894, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27941176470588236, 'r': 0.41304347826086957, 'f1': 0.33333333333333337}, 'combined': 0.16666666666666669, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.1724137931034483, 'f1': 0.22727272727272724}, 'combined': 0.1515151515151515, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:20:47.913766: step: 2/459, loss: 0.08627480268478394 2023-01-22 17:20:48.530603: step: 4/459, loss: 0.03302153944969177 2023-01-22 17:20:49.285667: step: 6/459, loss: 0.07973629981279373 2023-01-22 17:20:49.955985: step: 8/459, loss: 0.013533671386539936 2023-01-22 17:20:50.611197: step: 10/459, loss: 0.029807686805725098 2023-01-22 17:20:51.258025: step: 12/459, loss: 0.003256802447140217 2023-01-22 17:20:51.868280: step: 14/459, loss: 0.1382424384355545 2023-01-22 17:20:52.498580: step: 16/459, loss: 0.03893426060676575 2023-01-22 17:20:53.153880: step: 18/459, loss: 0.07182061672210693 2023-01-22 17:20:53.742977: step: 20/459, loss: 0.02694215439260006 2023-01-22 17:20:54.348988: step: 22/459, loss: 0.04553443193435669 2023-01-22 17:20:54.974753: step: 24/459, loss: 0.008797201327979565 2023-01-22 17:20:55.570326: step: 26/459, loss: 0.8531522154808044 2023-01-22 17:20:56.128048: step: 28/459, loss: 0.07958583533763885 2023-01-22 17:20:56.797685: step: 30/459, loss: 0.0044428324326872826 2023-01-22 17:20:57.428003: step: 32/459, loss: 0.12259700894355774 2023-01-22 17:20:58.103783: step: 34/459, loss: 0.023946823552250862 2023-01-22 17:20:58.708195: step: 36/459, loss: 0.03203597664833069 2023-01-22 17:20:59.325828: step: 38/459, loss: 0.05041579157114029 2023-01-22 17:20:59.964352: step: 40/459, loss: 0.03662065789103508 2023-01-22 17:21:00.633123: step: 42/459, loss: 0.021469194442033768 2023-01-22 17:21:01.265984: step: 44/459, loss: 0.005933227948844433 2023-01-22 17:21:01.881412: step: 46/459, loss: 0.0910499170422554 2023-01-22 17:21:02.533805: step: 48/459, loss: 0.061675671488046646 2023-01-22 17:21:03.176169: step: 50/459, loss: 0.022055042907595634 2023-01-22 17:21:03.769131: step: 52/459, loss: 0.05096252262592316 2023-01-22 17:21:04.437319: step: 54/459, loss: 0.049728233367204666 2023-01-22 17:21:05.014088: step: 56/459, loss: 0.04489203542470932 2023-01-22 17:21:05.642843: step: 58/459, loss: 0.015598439611494541 2023-01-22 17:21:06.237474: step: 60/459, loss: 0.028355680406093597 2023-01-22 17:21:06.806558: step: 62/459, loss: 0.0056373849511146545 2023-01-22 17:21:07.425900: step: 64/459, loss: 0.019454535096883774 2023-01-22 17:21:08.055815: step: 66/459, loss: 0.00410076417028904 2023-01-22 17:21:08.669537: step: 68/459, loss: 0.01834918186068535 2023-01-22 17:21:09.311507: step: 70/459, loss: 0.13957636058330536 2023-01-22 17:21:09.961310: step: 72/459, loss: 0.4333879053592682 2023-01-22 17:21:10.596348: step: 74/459, loss: 0.03833833336830139 2023-01-22 17:21:11.173873: step: 76/459, loss: 1.2951456308364868 2023-01-22 17:21:11.786034: step: 78/459, loss: 0.009863047860562801 2023-01-22 17:21:12.406855: step: 80/459, loss: 0.0431496724486351 2023-01-22 17:21:13.024279: step: 82/459, loss: 0.08600941300392151 2023-01-22 17:21:13.635645: step: 84/459, loss: 0.01698673702776432 2023-01-22 17:21:14.230305: step: 86/459, loss: 0.031414926052093506 2023-01-22 17:21:14.878467: step: 88/459, loss: 0.008186540566384792 2023-01-22 17:21:15.533996: step: 90/459, loss: 0.020903339609503746 2023-01-22 17:21:16.109034: step: 92/459, loss: 0.03385529667139053 2023-01-22 17:21:16.635109: step: 94/459, loss: 0.032321564853191376 2023-01-22 17:21:17.240012: step: 96/459, loss: 0.024783242493867874 2023-01-22 17:21:17.798503: step: 98/459, loss: 0.06923066824674606 2023-01-22 17:21:18.394580: step: 100/459, loss: 0.047564033418893814 2023-01-22 17:21:19.033260: step: 102/459, loss: 0.056281108409166336 2023-01-22 17:21:19.642212: step: 104/459, loss: 0.0512361116707325 2023-01-22 17:21:20.243194: step: 106/459, loss: 0.04753617197275162 2023-01-22 17:21:20.856628: step: 108/459, loss: 0.013952820561826229 2023-01-22 17:21:21.433557: step: 110/459, loss: 0.012793589383363724 2023-01-22 17:21:22.044306: step: 112/459, loss: 0.03614550456404686 2023-01-22 17:21:22.651642: step: 114/459, loss: 0.04681338369846344 2023-01-22 17:21:23.268746: step: 116/459, loss: 0.08573553711175919 2023-01-22 17:21:23.895896: step: 118/459, loss: 0.0650978833436966 2023-01-22 17:21:24.496576: step: 120/459, loss: 0.017680954188108444 2023-01-22 17:21:25.077254: step: 122/459, loss: 0.02785453572869301 2023-01-22 17:21:25.685176: step: 124/459, loss: 0.05343882367014885 2023-01-22 17:21:26.325562: step: 126/459, loss: 0.009142217226326466 2023-01-22 17:21:26.992244: step: 128/459, loss: 0.022594138979911804 2023-01-22 17:21:27.667302: step: 130/459, loss: 0.040068671107292175 2023-01-22 17:21:28.224274: step: 132/459, loss: 0.3424657881259918 2023-01-22 17:21:28.768469: step: 134/459, loss: 0.031511496752500534 2023-01-22 17:21:29.333136: step: 136/459, loss: 0.014830965548753738 2023-01-22 17:21:29.937600: step: 138/459, loss: 0.014689805917441845 2023-01-22 17:21:30.554603: step: 140/459, loss: 0.05281566083431244 2023-01-22 17:21:31.218802: step: 142/459, loss: 0.14595790207386017 2023-01-22 17:21:31.906041: step: 144/459, loss: 0.002532534534111619 2023-01-22 17:21:32.570227: step: 146/459, loss: 0.06003907322883606 2023-01-22 17:21:33.138716: step: 148/459, loss: 0.0028246331494301558 2023-01-22 17:21:33.750766: step: 150/459, loss: 0.10398107022047043 2023-01-22 17:21:34.402168: step: 152/459, loss: 0.012647167779505253 2023-01-22 17:21:35.048601: step: 154/459, loss: 0.056552641093730927 2023-01-22 17:21:35.740688: step: 156/459, loss: 0.016029033809900284 2023-01-22 17:21:36.305586: step: 158/459, loss: 0.01812479831278324 2023-01-22 17:21:36.955026: step: 160/459, loss: 0.03872637450695038 2023-01-22 17:21:37.600955: step: 162/459, loss: 0.009962227195501328 2023-01-22 17:21:38.254924: step: 164/459, loss: 0.7418813109397888 2023-01-22 17:21:38.877049: step: 166/459, loss: 0.21723107993602753 2023-01-22 17:21:39.513255: step: 168/459, loss: 0.013914971612393856 2023-01-22 17:21:40.141516: step: 170/459, loss: 0.024982091039419174 2023-01-22 17:21:40.731949: step: 172/459, loss: 0.015950795263051987 2023-01-22 17:21:41.354109: step: 174/459, loss: 0.14222311973571777 2023-01-22 17:21:42.043029: step: 176/459, loss: 0.37950006127357483 2023-01-22 17:21:42.699371: step: 178/459, loss: 0.04590025916695595 2023-01-22 17:21:43.392002: step: 180/459, loss: 0.008111425675451756 2023-01-22 17:21:43.971249: step: 182/459, loss: 0.009809991344809532 2023-01-22 17:21:44.665549: step: 184/459, loss: 0.4350547790527344 2023-01-22 17:21:45.291465: step: 186/459, loss: 0.009150789119303226 2023-01-22 17:21:45.927126: step: 188/459, loss: 0.02723940834403038 2023-01-22 17:21:46.545266: step: 190/459, loss: 1.3680834770202637 2023-01-22 17:21:47.079885: step: 192/459, loss: 0.021531766280531883 2023-01-22 17:21:47.682981: step: 194/459, loss: 0.005127781070768833 2023-01-22 17:21:48.328875: step: 196/459, loss: 0.02562432549893856 2023-01-22 17:21:48.949784: step: 198/459, loss: 0.1500994861125946 2023-01-22 17:21:49.629103: step: 200/459, loss: 0.0359063446521759 2023-01-22 17:21:50.236571: step: 202/459, loss: 0.017917780205607414 2023-01-22 17:21:50.865684: step: 204/459, loss: 0.01464832667261362 2023-01-22 17:21:51.455905: step: 206/459, loss: 0.05808012932538986 2023-01-22 17:21:52.093276: step: 208/459, loss: 0.03505484759807587 2023-01-22 17:21:52.690880: step: 210/459, loss: 0.07930980622768402 2023-01-22 17:21:53.285615: step: 212/459, loss: 0.07424855977296829 2023-01-22 17:21:53.903510: step: 214/459, loss: 0.038320641964673996 2023-01-22 17:21:54.481980: step: 216/459, loss: 0.03664332628250122 2023-01-22 17:21:55.112479: step: 218/459, loss: 0.00567859411239624 2023-01-22 17:21:55.762634: step: 220/459, loss: 0.031259287148714066 2023-01-22 17:21:56.388680: step: 222/459, loss: 0.052787113934755325 2023-01-22 17:21:56.980994: step: 224/459, loss: 0.014944338239729404 2023-01-22 17:21:57.547903: step: 226/459, loss: 0.282993882894516 2023-01-22 17:21:58.133218: step: 228/459, loss: 0.0006968651432543993 2023-01-22 17:21:58.744415: step: 230/459, loss: 0.1355431228876114 2023-01-22 17:21:59.347792: step: 232/459, loss: 0.014861891977488995 2023-01-22 17:21:59.972102: step: 234/459, loss: 0.04805498942732811 2023-01-22 17:22:00.638862: step: 236/459, loss: 0.06877084076404572 2023-01-22 17:22:01.298832: step: 238/459, loss: 0.0029047068674117327 2023-01-22 17:22:01.898667: step: 240/459, loss: 0.052191492170095444 2023-01-22 17:22:02.511366: step: 242/459, loss: 0.00793328694999218 2023-01-22 17:22:03.115302: step: 244/459, loss: 0.016262833029031754 2023-01-22 17:22:03.786240: step: 246/459, loss: 0.07135670632123947 2023-01-22 17:22:04.380056: step: 248/459, loss: 0.2759213447570801 2023-01-22 17:22:05.007799: step: 250/459, loss: 0.043351706117391586 2023-01-22 17:22:05.617676: step: 252/459, loss: 0.04087109863758087 2023-01-22 17:22:06.143590: step: 254/459, loss: 0.07491416484117508 2023-01-22 17:22:06.772004: step: 256/459, loss: 0.005117062013596296 2023-01-22 17:22:07.390299: step: 258/459, loss: 0.023986445739865303 2023-01-22 17:22:07.999321: step: 260/459, loss: 0.04290802776813507 2023-01-22 17:22:08.652457: step: 262/459, loss: 0.08199816197156906 2023-01-22 17:22:09.277802: step: 264/459, loss: 0.03209898620843887 2023-01-22 17:22:09.953194: step: 266/459, loss: 0.15472671389579773 2023-01-22 17:22:10.555148: step: 268/459, loss: 0.10133934766054153 2023-01-22 17:22:11.135430: step: 270/459, loss: 0.07316446304321289 2023-01-22 17:22:11.771683: step: 272/459, loss: 0.010442809201776981 2023-01-22 17:22:12.396628: step: 274/459, loss: 0.1416461318731308 2023-01-22 17:22:13.000938: step: 276/459, loss: 0.015155657194554806 2023-01-22 17:22:13.623455: step: 278/459, loss: 0.025075513869524002 2023-01-22 17:22:14.280961: step: 280/459, loss: 0.0017982330173254013 2023-01-22 17:22:14.902655: step: 282/459, loss: 0.03898642212152481 2023-01-22 17:22:15.529836: step: 284/459, loss: 1.039948582649231 2023-01-22 17:22:16.230016: step: 286/459, loss: 0.06217994913458824 2023-01-22 17:22:16.859628: step: 288/459, loss: 0.015742473304271698 2023-01-22 17:22:17.510347: step: 290/459, loss: 0.025965431705117226 2023-01-22 17:22:18.162828: step: 292/459, loss: 0.011567634530365467 2023-01-22 17:22:18.778119: step: 294/459, loss: 0.18412260711193085 2023-01-22 17:22:19.495019: step: 296/459, loss: 0.020994210615754128 2023-01-22 17:22:20.058766: step: 298/459, loss: 0.0009078008006326854 2023-01-22 17:22:20.692778: step: 300/459, loss: 0.0028688672464340925 2023-01-22 17:22:21.314062: step: 302/459, loss: 0.16158413887023926 2023-01-22 17:22:21.921285: step: 304/459, loss: 0.03436434641480446 2023-01-22 17:22:22.558676: step: 306/459, loss: 0.011973277665674686 2023-01-22 17:22:23.197197: step: 308/459, loss: 0.09083597362041473 2023-01-22 17:22:23.835712: step: 310/459, loss: 0.007074462249875069 2023-01-22 17:22:24.400681: step: 312/459, loss: 0.010907858610153198 2023-01-22 17:22:25.016018: step: 314/459, loss: 0.052062857896089554 2023-01-22 17:22:25.614556: step: 316/459, loss: 0.038011688739061356 2023-01-22 17:22:26.272834: step: 318/459, loss: 0.050265517085790634 2023-01-22 17:22:26.941156: step: 320/459, loss: 0.08120229095220566 2023-01-22 17:22:27.534743: step: 322/459, loss: 0.09737682342529297 2023-01-22 17:22:28.323362: step: 324/459, loss: 0.12042336165904999 2023-01-22 17:22:28.893607: step: 326/459, loss: 0.048932865262031555 2023-01-22 17:22:29.507976: step: 328/459, loss: 0.02027304284274578 2023-01-22 17:22:30.155325: step: 330/459, loss: 0.02541283518075943 2023-01-22 17:22:30.709997: step: 332/459, loss: 0.012168185785412788 2023-01-22 17:22:31.341130: step: 334/459, loss: 0.0014145594323053956 2023-01-22 17:22:31.900952: step: 336/459, loss: 0.04354682192206383 2023-01-22 17:22:32.495176: step: 338/459, loss: 0.04926249012351036 2023-01-22 17:22:33.089688: step: 340/459, loss: 0.005208548624068499 2023-01-22 17:22:33.685849: step: 342/459, loss: 0.007167977746576071 2023-01-22 17:22:34.319666: step: 344/459, loss: 0.03821176663041115 2023-01-22 17:22:34.911378: step: 346/459, loss: 0.005527811590582132 2023-01-22 17:22:35.533648: step: 348/459, loss: 0.08376617729663849 2023-01-22 17:22:36.105859: step: 350/459, loss: 0.10337590426206589 2023-01-22 17:22:36.773572: step: 352/459, loss: 0.024455899372696877 2023-01-22 17:22:37.380545: step: 354/459, loss: 0.05704716593027115 2023-01-22 17:22:38.023400: step: 356/459, loss: 0.05892262980341911 2023-01-22 17:22:38.710192: step: 358/459, loss: 0.17841222882270813 2023-01-22 17:22:39.265014: step: 360/459, loss: 0.04586251452565193 2023-01-22 17:22:39.873902: step: 362/459, loss: 0.1322636604309082 2023-01-22 17:22:40.518581: step: 364/459, loss: 0.0007297742995433509 2023-01-22 17:22:41.142208: step: 366/459, loss: 0.01368697639554739 2023-01-22 17:22:41.758842: step: 368/459, loss: 0.006010224111378193 2023-01-22 17:22:42.331081: step: 370/459, loss: 0.02780664712190628 2023-01-22 17:22:42.965651: step: 372/459, loss: 0.062259044498205185 2023-01-22 17:22:43.600097: step: 374/459, loss: 0.01003385428339243 2023-01-22 17:22:44.223024: step: 376/459, loss: 0.04706886038184166 2023-01-22 17:22:44.840282: step: 378/459, loss: 0.06770870834589005 2023-01-22 17:22:45.450049: step: 380/459, loss: 0.003733854740858078 2023-01-22 17:22:46.065295: step: 382/459, loss: 0.03182694688439369 2023-01-22 17:22:46.697727: step: 384/459, loss: 0.0381680466234684 2023-01-22 17:22:47.333061: step: 386/459, loss: 0.013581687584519386 2023-01-22 17:22:47.995857: step: 388/459, loss: 0.028861133381724358 2023-01-22 17:22:48.660159: step: 390/459, loss: 0.001302835182286799 2023-01-22 17:22:49.334686: step: 392/459, loss: 0.013083875179290771 2023-01-22 17:22:49.924674: step: 394/459, loss: 0.0030179063323885202 2023-01-22 17:22:50.552848: step: 396/459, loss: 0.03583589941263199 2023-01-22 17:22:51.154350: step: 398/459, loss: 0.003412493271753192 2023-01-22 17:22:51.830453: step: 400/459, loss: 0.048477042466402054 2023-01-22 17:22:52.463130: step: 402/459, loss: 0.08937758207321167 2023-01-22 17:22:53.112369: step: 404/459, loss: 0.005932438187301159 2023-01-22 17:22:53.708447: step: 406/459, loss: 0.03710130974650383 2023-01-22 17:22:54.330828: step: 408/459, loss: 0.14107519388198853 2023-01-22 17:22:54.950827: step: 410/459, loss: 0.01864292100071907 2023-01-22 17:22:55.569425: step: 412/459, loss: 0.020329352468252182 2023-01-22 17:22:56.148066: step: 414/459, loss: 0.0007914012530818582 2023-01-22 17:22:56.760266: step: 416/459, loss: 0.03510862961411476 2023-01-22 17:22:57.373233: step: 418/459, loss: 0.0550374835729599 2023-01-22 17:22:58.050758: step: 420/459, loss: 0.039951175451278687 2023-01-22 17:22:58.684009: step: 422/459, loss: 0.023357439786195755 2023-01-22 17:22:59.263545: step: 424/459, loss: 0.030301902443170547 2023-01-22 17:22:59.907702: step: 426/459, loss: 0.042458683252334595 2023-01-22 17:23:00.520074: step: 428/459, loss: 0.141988605260849 2023-01-22 17:23:01.166481: step: 430/459, loss: 0.06375636905431747 2023-01-22 17:23:01.808771: step: 432/459, loss: 0.028632624074816704 2023-01-22 17:23:02.401114: step: 434/459, loss: 0.016150452196598053 2023-01-22 17:23:03.042270: step: 436/459, loss: 0.006647998932749033 2023-01-22 17:23:03.687590: step: 438/459, loss: 0.06509237736463547 2023-01-22 17:23:04.325380: step: 440/459, loss: 0.0824255421757698 2023-01-22 17:23:04.950159: step: 442/459, loss: 0.022789819166064262 2023-01-22 17:23:05.624512: step: 444/459, loss: 0.002397359348833561 2023-01-22 17:23:06.244143: step: 446/459, loss: 0.03110719844698906 2023-01-22 17:23:06.833595: step: 448/459, loss: 0.012367240153253078 2023-01-22 17:23:07.504821: step: 450/459, loss: 0.04528667777776718 2023-01-22 17:23:08.060109: step: 452/459, loss: 0.015939565375447273 2023-01-22 17:23:08.688175: step: 454/459, loss: 0.04416550323367119 2023-01-22 17:23:09.296830: step: 456/459, loss: 0.15298335254192352 2023-01-22 17:23:09.887366: step: 458/459, loss: 0.018790055066347122 2023-01-22 17:23:10.479231: step: 460/459, loss: 0.005302248056977987 2023-01-22 17:23:11.050029: step: 462/459, loss: 0.004761774558573961 2023-01-22 17:23:11.671937: step: 464/459, loss: 0.005685572512447834 2023-01-22 17:23:12.246453: step: 466/459, loss: 0.01253502257168293 2023-01-22 17:23:12.846955: step: 468/459, loss: 0.031334683299064636 2023-01-22 17:23:13.484068: step: 470/459, loss: 0.014429938048124313 2023-01-22 17:23:14.075254: step: 472/459, loss: 0.06484458595514297 2023-01-22 17:23:14.695737: step: 474/459, loss: 0.12649060785770416 2023-01-22 17:23:15.287578: step: 476/459, loss: 0.032982125878334045 2023-01-22 17:23:15.857460: step: 478/459, loss: 0.12741893529891968 2023-01-22 17:23:16.470174: step: 480/459, loss: 0.012636019848287106 2023-01-22 17:23:17.050024: step: 482/459, loss: 0.09595763683319092 2023-01-22 17:23:17.631534: step: 484/459, loss: 0.03388001769781113 2023-01-22 17:23:18.234826: step: 486/459, loss: 0.1752392202615738 2023-01-22 17:23:18.811031: step: 488/459, loss: 0.0006838826229795814 2023-01-22 17:23:19.451756: step: 490/459, loss: 0.039235714823007584 2023-01-22 17:23:20.087668: step: 492/459, loss: 0.01202980987727642 2023-01-22 17:23:20.722594: step: 494/459, loss: 0.01376819796860218 2023-01-22 17:23:21.289169: step: 496/459, loss: 0.941684901714325 2023-01-22 17:23:21.968925: step: 498/459, loss: 0.012485415674746037 2023-01-22 17:23:22.593729: step: 500/459, loss: 3.17387056350708 2023-01-22 17:23:23.183617: step: 502/459, loss: 0.006592405494302511 2023-01-22 17:23:23.790940: step: 504/459, loss: 0.08208411186933517 2023-01-22 17:23:24.353497: step: 506/459, loss: 0.023171095177531242 2023-01-22 17:23:25.000794: step: 508/459, loss: 0.1219007596373558 2023-01-22 17:23:25.665162: step: 510/459, loss: 0.027485797181725502 2023-01-22 17:23:26.280355: step: 512/459, loss: 0.04042671620845795 2023-01-22 17:23:26.911388: step: 514/459, loss: 0.02810647338628769 2023-01-22 17:23:27.484025: step: 516/459, loss: 0.03969293087720871 2023-01-22 17:23:28.124148: step: 518/459, loss: 0.0101367412135005 2023-01-22 17:23:28.755851: step: 520/459, loss: 0.08979977667331696 2023-01-22 17:23:29.326987: step: 522/459, loss: 0.008515375666320324 2023-01-22 17:23:29.966986: step: 524/459, loss: 0.1203165128827095 2023-01-22 17:23:30.611165: step: 526/459, loss: 0.066526398062706 2023-01-22 17:23:31.194079: step: 528/459, loss: 0.014821610413491726 2023-01-22 17:23:31.752109: step: 530/459, loss: 0.34888139367103577 2023-01-22 17:23:32.367465: step: 532/459, loss: 0.020176442340016365 2023-01-22 17:23:32.959771: step: 534/459, loss: 0.006460756063461304 2023-01-22 17:23:33.580423: step: 536/459, loss: 0.13633622229099274 2023-01-22 17:23:34.197542: step: 538/459, loss: 0.07648888230323792 2023-01-22 17:23:34.827527: step: 540/459, loss: 0.20856201648712158 2023-01-22 17:23:35.415374: step: 542/459, loss: 0.06899577379226685 2023-01-22 17:23:36.076618: step: 544/459, loss: 0.1830524355173111 2023-01-22 17:23:36.670358: step: 546/459, loss: 0.0728459432721138 2023-01-22 17:23:37.239705: step: 548/459, loss: 0.15491588413715363 2023-01-22 17:23:37.870751: step: 550/459, loss: 0.22158993780612946 2023-01-22 17:23:38.500248: step: 552/459, loss: 0.023318588733673096 2023-01-22 17:23:39.144919: step: 554/459, loss: 0.11909421533346176 2023-01-22 17:23:39.743570: step: 556/459, loss: 0.08287063986063004 2023-01-22 17:23:40.317654: step: 558/459, loss: 0.02961014397442341 2023-01-22 17:23:40.982833: step: 560/459, loss: 0.025154974311590195 2023-01-22 17:23:41.557282: step: 562/459, loss: 0.47467240691185 2023-01-22 17:23:42.139428: step: 564/459, loss: 0.010706230066716671 2023-01-22 17:23:42.851548: step: 566/459, loss: 0.035729654133319855 2023-01-22 17:23:43.520499: step: 568/459, loss: 0.033275291323661804 2023-01-22 17:23:44.167311: step: 570/459, loss: 0.02481965906918049 2023-01-22 17:23:44.814402: step: 572/459, loss: 0.032114963978528976 2023-01-22 17:23:45.451197: step: 574/459, loss: 0.02783932164311409 2023-01-22 17:23:46.083373: step: 576/459, loss: 0.0664207860827446 2023-01-22 17:23:46.682282: step: 578/459, loss: 0.008546116761863232 2023-01-22 17:23:47.301923: step: 580/459, loss: 0.06001521274447441 2023-01-22 17:23:47.874990: step: 582/459, loss: 0.000594617857132107 2023-01-22 17:23:48.485088: step: 584/459, loss: 0.034523848444223404 2023-01-22 17:23:49.070395: step: 586/459, loss: 0.09903448820114136 2023-01-22 17:23:49.666619: step: 588/459, loss: 0.0560692623257637 2023-01-22 17:23:50.249846: step: 590/459, loss: 0.06440544128417969 2023-01-22 17:23:50.857292: step: 592/459, loss: 0.0421396940946579 2023-01-22 17:23:51.573304: step: 594/459, loss: 0.03285801038146019 2023-01-22 17:23:52.214085: step: 596/459, loss: 0.024141667410731316 2023-01-22 17:23:52.882257: step: 598/459, loss: 0.052182428538799286 2023-01-22 17:23:53.460672: step: 600/459, loss: 0.038584113121032715 2023-01-22 17:23:53.978599: step: 602/459, loss: 0.02681732550263405 2023-01-22 17:23:54.552128: step: 604/459, loss: 0.010044309310615063 2023-01-22 17:23:55.229204: step: 606/459, loss: 0.053498391062021255 2023-01-22 17:23:55.862228: step: 608/459, loss: 0.0175472442060709 2023-01-22 17:23:56.535430: step: 610/459, loss: 0.1283857822418213 2023-01-22 17:23:57.191006: step: 612/459, loss: 0.01870698481798172 2023-01-22 17:23:57.830553: step: 614/459, loss: 0.07209707796573639 2023-01-22 17:23:58.444674: step: 616/459, loss: 0.06765498220920563 2023-01-22 17:23:59.040062: step: 618/459, loss: 0.03469505161046982 2023-01-22 17:23:59.660053: step: 620/459, loss: 0.017407672479748726 2023-01-22 17:24:00.343750: step: 622/459, loss: 0.17810097336769104 2023-01-22 17:24:00.880959: step: 624/459, loss: 0.014440138824284077 2023-01-22 17:24:01.443153: step: 626/459, loss: 0.022237280383706093 2023-01-22 17:24:02.029664: step: 628/459, loss: 0.03245306760072708 2023-01-22 17:24:02.592748: step: 630/459, loss: 0.015389066189527512 2023-01-22 17:24:03.246760: step: 632/459, loss: 0.03505747392773628 2023-01-22 17:24:03.874945: step: 634/459, loss: 0.06157718598842621 2023-01-22 17:24:04.536477: step: 636/459, loss: 0.02352411299943924 2023-01-22 17:24:05.296362: step: 638/459, loss: 0.023740258067846298 2023-01-22 17:24:05.899270: step: 640/459, loss: 0.004520630929619074 2023-01-22 17:24:06.537217: step: 642/459, loss: 0.0186782106757164 2023-01-22 17:24:07.181354: step: 644/459, loss: 0.4452179968357086 2023-01-22 17:24:07.775800: step: 646/459, loss: 0.030085664242506027 2023-01-22 17:24:08.436277: step: 648/459, loss: 0.07845152914524078 2023-01-22 17:24:09.059755: step: 650/459, loss: 7.771837408654392e-05 2023-01-22 17:24:09.777248: step: 652/459, loss: 0.5006208419799805 2023-01-22 17:24:10.364241: step: 654/459, loss: 0.02215568721294403 2023-01-22 17:24:11.037501: step: 656/459, loss: 0.003917305264621973 2023-01-22 17:24:11.614409: step: 658/459, loss: 0.035129498690366745 2023-01-22 17:24:12.194843: step: 660/459, loss: 0.038250237703323364 2023-01-22 17:24:12.826799: step: 662/459, loss: 0.030814100056886673 2023-01-22 17:24:13.533419: step: 664/459, loss: 0.01617497019469738 2023-01-22 17:24:14.165301: step: 666/459, loss: 0.028071217238903046 2023-01-22 17:24:14.782712: step: 668/459, loss: 0.1411135047674179 2023-01-22 17:24:15.470063: step: 670/459, loss: 0.06467308104038239 2023-01-22 17:24:16.131500: step: 672/459, loss: 0.08921609818935394 2023-01-22 17:24:16.717994: step: 674/459, loss: 0.010283363051712513 2023-01-22 17:24:17.279328: step: 676/459, loss: 0.03494598716497421 2023-01-22 17:24:17.904900: step: 678/459, loss: 0.04492684826254845 2023-01-22 17:24:18.500648: step: 680/459, loss: 0.0598372258245945 2023-01-22 17:24:19.106097: step: 682/459, loss: 0.6701768040657043 2023-01-22 17:24:19.806146: step: 684/459, loss: 0.005999053828418255 2023-01-22 17:24:20.463927: step: 686/459, loss: 0.5801866054534912 2023-01-22 17:24:21.065697: step: 688/459, loss: 0.04591070115566254 2023-01-22 17:24:21.670498: step: 690/459, loss: 0.015308861620724201 2023-01-22 17:24:22.275244: step: 692/459, loss: 0.13982610404491425 2023-01-22 17:24:22.842876: step: 694/459, loss: 0.02239038795232773 2023-01-22 17:24:23.440705: step: 696/459, loss: 0.03497892618179321 2023-01-22 17:24:24.046456: step: 698/459, loss: 0.018172508105635643 2023-01-22 17:24:24.671810: step: 700/459, loss: 0.021459607407450676 2023-01-22 17:24:25.292949: step: 702/459, loss: 0.054022323340177536 2023-01-22 17:24:26.008636: step: 704/459, loss: 0.05802542343735695 2023-01-22 17:24:26.744103: step: 706/459, loss: 0.02152281254529953 2023-01-22 17:24:27.383389: step: 708/459, loss: 0.0028955857269465923 2023-01-22 17:24:28.061447: step: 710/459, loss: 0.07662633806467056 2023-01-22 17:24:28.730281: step: 712/459, loss: 0.05352451652288437 2023-01-22 17:24:29.340748: step: 714/459, loss: 0.04393748566508293 2023-01-22 17:24:29.998663: step: 716/459, loss: 0.03293988108634949 2023-01-22 17:24:30.652106: step: 718/459, loss: 0.021319938823580742 2023-01-22 17:24:31.257663: step: 720/459, loss: 0.025697309523820877 2023-01-22 17:24:31.843083: step: 722/459, loss: 0.010953713208436966 2023-01-22 17:24:32.437359: step: 724/459, loss: 0.05303457751870155 2023-01-22 17:24:33.070748: step: 726/459, loss: 0.0611993633210659 2023-01-22 17:24:33.686582: step: 728/459, loss: 0.04956426844000816 2023-01-22 17:24:34.297161: step: 730/459, loss: 1.1682957410812378 2023-01-22 17:24:34.883798: step: 732/459, loss: 0.023065760731697083 2023-01-22 17:24:35.491052: step: 734/459, loss: 0.05745159462094307 2023-01-22 17:24:36.076774: step: 736/459, loss: 0.006388711277395487 2023-01-22 17:24:36.738129: step: 738/459, loss: 0.06960338354110718 2023-01-22 17:24:37.343517: step: 740/459, loss: 0.026513922959566116 2023-01-22 17:24:37.986729: step: 742/459, loss: 0.046966180205345154 2023-01-22 17:24:38.642096: step: 744/459, loss: 0.16208291053771973 2023-01-22 17:24:39.258485: step: 746/459, loss: 0.010775376111268997 2023-01-22 17:24:39.871701: step: 748/459, loss: 0.023194625973701477 2023-01-22 17:24:40.427280: step: 750/459, loss: 0.0168790053576231 2023-01-22 17:24:41.066228: step: 752/459, loss: 0.03170133754611015 2023-01-22 17:24:41.635558: step: 754/459, loss: 0.004335256293416023 2023-01-22 17:24:42.307339: step: 756/459, loss: 0.1068827286362648 2023-01-22 17:24:42.957302: step: 758/459, loss: 0.041127294301986694 2023-01-22 17:24:43.582736: step: 760/459, loss: 0.017822016030550003 2023-01-22 17:24:44.178069: step: 762/459, loss: 0.1179901733994484 2023-01-22 17:24:44.889097: step: 764/459, loss: 0.05156347155570984 2023-01-22 17:24:45.612283: step: 766/459, loss: 0.1058984324336052 2023-01-22 17:24:46.397335: step: 768/459, loss: 0.047750748693943024 2023-01-22 17:24:47.020761: step: 770/459, loss: 0.05540177971124649 2023-01-22 17:24:47.626462: step: 772/459, loss: 0.04387734830379486 2023-01-22 17:24:48.210086: step: 774/459, loss: 0.030908463522791862 2023-01-22 17:24:48.862334: step: 776/459, loss: 0.022646034136414528 2023-01-22 17:24:49.530822: step: 778/459, loss: 0.0279940664768219 2023-01-22 17:24:50.103119: step: 780/459, loss: 0.013225158676505089 2023-01-22 17:24:50.780405: step: 782/459, loss: 0.07585899531841278 2023-01-22 17:24:51.410890: step: 784/459, loss: 0.061041515320539474 2023-01-22 17:24:52.054689: step: 786/459, loss: 0.01824662648141384 2023-01-22 17:24:52.777163: step: 788/459, loss: 0.028424574062228203 2023-01-22 17:24:53.341632: step: 790/459, loss: 0.13201287388801575 2023-01-22 17:24:53.910171: step: 792/459, loss: 0.03558709844946861 2023-01-22 17:24:54.463369: step: 794/459, loss: 0.040252793580293655 2023-01-22 17:24:55.062996: step: 796/459, loss: 0.041825491935014725 2023-01-22 17:24:55.670861: step: 798/459, loss: 0.02266046777367592 2023-01-22 17:24:56.328045: step: 800/459, loss: 0.021287184208631516 2023-01-22 17:24:56.937704: step: 802/459, loss: 0.032967839390039444 2023-01-22 17:24:57.517813: step: 804/459, loss: 0.03891510143876076 2023-01-22 17:24:58.104143: step: 806/459, loss: 0.05250013619661331 2023-01-22 17:24:58.745772: step: 808/459, loss: 0.05571910738945007 2023-01-22 17:24:59.402211: step: 810/459, loss: 0.5605465769767761 2023-01-22 17:25:00.065931: step: 812/459, loss: 0.05514345318078995 2023-01-22 17:25:00.721348: step: 814/459, loss: 0.013447940349578857 2023-01-22 17:25:01.299540: step: 816/459, loss: 0.0547759011387825 2023-01-22 17:25:01.906923: step: 818/459, loss: 0.05424008145928383 2023-01-22 17:25:02.499139: step: 820/459, loss: 0.010526158846914768 2023-01-22 17:25:03.108733: step: 822/459, loss: 0.08043567836284637 2023-01-22 17:25:03.794830: step: 824/459, loss: 0.023630043491721153 2023-01-22 17:25:04.387915: step: 826/459, loss: 0.10371125489473343 2023-01-22 17:25:05.031299: step: 828/459, loss: 0.43073636293411255 2023-01-22 17:25:05.648435: step: 830/459, loss: 0.008698207326233387 2023-01-22 17:25:06.343957: step: 832/459, loss: 0.3520076870918274 2023-01-22 17:25:06.999620: step: 834/459, loss: 0.09704400599002838 2023-01-22 17:25:07.596319: step: 836/459, loss: 0.28332287073135376 2023-01-22 17:25:08.215405: step: 838/459, loss: 0.0597277507185936 2023-01-22 17:25:08.910262: step: 840/459, loss: 0.5645433664321899 2023-01-22 17:25:09.437901: step: 842/459, loss: 0.33287712931632996 2023-01-22 17:25:10.097011: step: 844/459, loss: 0.030096709728240967 2023-01-22 17:25:10.676920: step: 846/459, loss: 0.031483590602874756 2023-01-22 17:25:11.300513: step: 848/459, loss: 0.04095800966024399 2023-01-22 17:25:11.890741: step: 850/459, loss: 0.12304207682609558 2023-01-22 17:25:12.469175: step: 852/459, loss: 0.008424277417361736 2023-01-22 17:25:13.069429: step: 854/459, loss: 0.03322190046310425 2023-01-22 17:25:13.717216: step: 856/459, loss: 0.005253645591437817 2023-01-22 17:25:14.373198: step: 858/459, loss: 0.031321458518505096 2023-01-22 17:25:15.015150: step: 860/459, loss: 0.10999950766563416 2023-01-22 17:25:15.636841: step: 862/459, loss: 0.015470885671675205 2023-01-22 17:25:16.233696: step: 864/459, loss: 0.04763171821832657 2023-01-22 17:25:16.915137: step: 866/459, loss: 0.009038747288286686 2023-01-22 17:25:17.476575: step: 868/459, loss: 0.0020437007769942284 2023-01-22 17:25:18.083112: step: 870/459, loss: 0.05130446329712868 2023-01-22 17:25:18.707713: step: 872/459, loss: 1.8459959030151367 2023-01-22 17:25:19.337758: step: 874/459, loss: 0.08758866786956787 2023-01-22 17:25:19.940667: step: 876/459, loss: 0.006065867375582457 2023-01-22 17:25:20.538373: step: 878/459, loss: 0.019404519349336624 2023-01-22 17:25:21.100060: step: 880/459, loss: 0.04187808930873871 2023-01-22 17:25:21.787670: step: 882/459, loss: 0.4353110194206238 2023-01-22 17:25:22.414758: step: 884/459, loss: 0.07378815114498138 2023-01-22 17:25:23.009039: step: 886/459, loss: 0.0687570869922638 2023-01-22 17:25:23.594985: step: 888/459, loss: 0.10778500139713287 2023-01-22 17:25:24.178913: step: 890/459, loss: 0.017278103157877922 2023-01-22 17:25:24.731126: step: 892/459, loss: 0.2593228220939636 2023-01-22 17:25:25.296193: step: 894/459, loss: 0.011286402121186256 2023-01-22 17:25:25.935546: step: 896/459, loss: 0.024015633389353752 2023-01-22 17:25:26.651497: step: 898/459, loss: 0.04252889007329941 2023-01-22 17:25:27.226297: step: 900/459, loss: 0.06732986867427826 2023-01-22 17:25:27.847979: step: 902/459, loss: 0.0423046238720417 2023-01-22 17:25:28.482450: step: 904/459, loss: 0.017478732392191887 2023-01-22 17:25:29.100471: step: 906/459, loss: 0.056421421468257904 2023-01-22 17:25:29.678840: step: 908/459, loss: 0.24288314580917358 2023-01-22 17:25:30.282797: step: 910/459, loss: 0.02987118437886238 2023-01-22 17:25:30.860026: step: 912/459, loss: 0.006775592919439077 2023-01-22 17:25:31.461050: step: 914/459, loss: 0.5402472019195557 2023-01-22 17:25:32.059447: step: 916/459, loss: 0.02070109359920025 2023-01-22 17:25:32.720765: step: 918/459, loss: 0.02593689225614071 2023-01-22 17:25:33.201453: step: 920/459, loss: 3.615976083892747e-06 ================================================== Loss: 0.089 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2884865702479339, 'r': 0.33118477229601523, 'f1': 0.3083646201413428}, 'combined': 0.227216035893621, 'epoch': 23} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3161144695137364, 'r': 0.3261726571800826, 'f1': 0.32106480796249737}, 'combined': 0.2054814770959983, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29589717741935484, 'r': 0.3481143263757116, 'f1': 0.3198888404533566}, 'combined': 0.2357075666498417, 'epoch': 23} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3155395791808253, 'r': 0.32415303448148475, 'f1': 0.31978831678200337}, 'combined': 0.20466452274048214, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31758303033490093, 'r': 0.34048275548238904, 'f1': 0.3286344544674341}, 'combined': 0.24215170329179353, 'epoch': 23} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3216394799326119, 'r': 0.3377801472649966, 'f1': 0.32951227539157657}, 'combined': 0.2362540842430172, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23270440251572325, 'r': 0.35238095238095235, 'f1': 0.2803030303030303}, 'combined': 0.18686868686868685, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25675675675675674, 'r': 0.41304347826086957, 'f1': 0.31666666666666665}, 'combined': 0.15833333333333333, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.1724137931034483, 'f1': 0.22727272727272724}, 'combined': 0.1515151515151515, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:28:10.892068: step: 2/459, loss: 0.047084007412195206 2023-01-22 17:28:11.537933: step: 4/459, loss: 0.02673051320016384 2023-01-22 17:28:12.169914: step: 6/459, loss: 0.20898757874965668 2023-01-22 17:28:12.780776: step: 8/459, loss: 0.08564101904630661 2023-01-22 17:28:13.377002: step: 10/459, loss: 0.5229640007019043 2023-01-22 17:28:14.052286: step: 12/459, loss: 0.017357727512717247 2023-01-22 17:28:14.673556: step: 14/459, loss: 0.003162266919389367 2023-01-22 17:28:15.313183: step: 16/459, loss: 0.05582832545042038 2023-01-22 17:28:16.008773: step: 18/459, loss: 0.04714591056108475 2023-01-22 17:28:16.667399: step: 20/459, loss: 0.047545067965984344 2023-01-22 17:28:17.309593: step: 22/459, loss: 0.01663302630186081 2023-01-22 17:28:17.861945: step: 24/459, loss: 0.003726863767951727 2023-01-22 17:28:18.516407: step: 26/459, loss: 0.05073040351271629 2023-01-22 17:28:19.177879: step: 28/459, loss: 0.08314822614192963 2023-01-22 17:28:19.816045: step: 30/459, loss: 0.03880385309457779 2023-01-22 17:28:20.452114: step: 32/459, loss: 0.01251937821507454 2023-01-22 17:28:21.132213: step: 34/459, loss: 0.6902560591697693 2023-01-22 17:28:21.725952: step: 36/459, loss: 0.032874107360839844 2023-01-22 17:28:22.317076: step: 38/459, loss: 0.0029821060597896576 2023-01-22 17:28:22.940304: step: 40/459, loss: 0.006304925307631493 2023-01-22 17:28:23.542312: step: 42/459, loss: 0.048292309045791626 2023-01-22 17:28:24.140729: step: 44/459, loss: 0.006788281723856926 2023-01-22 17:28:24.854604: step: 46/459, loss: 0.031423307955265045 2023-01-22 17:28:25.534323: step: 48/459, loss: 0.019995110109448433 2023-01-22 17:28:26.119972: step: 50/459, loss: 0.006372619885951281 2023-01-22 17:28:26.765803: step: 52/459, loss: 0.1641993373632431 2023-01-22 17:28:27.382819: step: 54/459, loss: 0.055018581449985504 2023-01-22 17:28:27.994386: step: 56/459, loss: 0.0010401038452982903 2023-01-22 17:28:28.579032: step: 58/459, loss: 0.06540512293577194 2023-01-22 17:28:29.181812: step: 60/459, loss: 0.056846097111701965 2023-01-22 17:28:29.782480: step: 62/459, loss: 0.02475569024682045 2023-01-22 17:28:30.369145: step: 64/459, loss: 0.01398357655853033 2023-01-22 17:28:30.912341: step: 66/459, loss: 0.05749718099832535 2023-01-22 17:28:31.571955: step: 68/459, loss: 0.01246891263872385 2023-01-22 17:28:32.155774: step: 70/459, loss: 0.037309322506189346 2023-01-22 17:28:32.779364: step: 72/459, loss: 0.03675243258476257 2023-01-22 17:28:33.380088: step: 74/459, loss: 0.003401476191356778 2023-01-22 17:28:33.993799: step: 76/459, loss: 0.0052030147053301334 2023-01-22 17:28:34.665019: step: 78/459, loss: 0.015728527680039406 2023-01-22 17:28:35.304086: step: 80/459, loss: 0.07667097449302673 2023-01-22 17:28:35.906893: step: 82/459, loss: 0.01462208665907383 2023-01-22 17:28:36.514392: step: 84/459, loss: 0.00022533131414093077 2023-01-22 17:28:37.101159: step: 86/459, loss: 0.008982416242361069 2023-01-22 17:28:37.753586: step: 88/459, loss: 0.2011738419532776 2023-01-22 17:28:38.290378: step: 90/459, loss: 0.018933380022644997 2023-01-22 17:28:38.934977: step: 92/459, loss: 0.03071061335504055 2023-01-22 17:28:39.539158: step: 94/459, loss: 0.0038907986599951982 2023-01-22 17:28:40.153743: step: 96/459, loss: 0.027166886255145073 2023-01-22 17:28:40.735323: step: 98/459, loss: 0.007109090685844421 2023-01-22 17:28:41.302279: step: 100/459, loss: 0.02205871045589447 2023-01-22 17:28:41.863161: step: 102/459, loss: 0.004213309846818447 2023-01-22 17:28:42.441351: step: 104/459, loss: 0.013449412770569324 2023-01-22 17:28:43.104904: step: 106/459, loss: 0.07196351885795593 2023-01-22 17:28:43.720027: step: 108/459, loss: 0.09122282266616821 2023-01-22 17:28:44.338312: step: 110/459, loss: 0.003362891497090459 2023-01-22 17:28:44.926544: step: 112/459, loss: 0.01990852877497673 2023-01-22 17:28:45.484760: step: 114/459, loss: 0.012143729254603386 2023-01-22 17:28:46.096592: step: 116/459, loss: 0.050873927772045135 2023-01-22 17:28:46.709284: step: 118/459, loss: 0.0580194853246212 2023-01-22 17:28:47.360754: step: 120/459, loss: 0.01777065359055996 2023-01-22 17:28:47.967805: step: 122/459, loss: 0.044194672256708145 2023-01-22 17:28:48.562942: step: 124/459, loss: 0.029035374522209167 2023-01-22 17:28:49.170913: step: 126/459, loss: 0.0019143427489325404 2023-01-22 17:28:49.808208: step: 128/459, loss: 0.04398776590824127 2023-01-22 17:28:50.421304: step: 130/459, loss: 0.032640229910612106 2023-01-22 17:28:51.003095: step: 132/459, loss: 0.02236166223883629 2023-01-22 17:28:51.650244: step: 134/459, loss: 0.8550947308540344 2023-01-22 17:28:52.266908: step: 136/459, loss: 0.4497445821762085 2023-01-22 17:28:52.894972: step: 138/459, loss: 0.011673812754452229 2023-01-22 17:28:53.462898: step: 140/459, loss: 0.004660515114665031 2023-01-22 17:28:54.146673: step: 142/459, loss: 0.006811492145061493 2023-01-22 17:28:54.780516: step: 144/459, loss: 0.15849104523658752 2023-01-22 17:28:55.397484: step: 146/459, loss: 0.07386138290166855 2023-01-22 17:28:56.026636: step: 148/459, loss: 0.052300870418548584 2023-01-22 17:28:56.589038: step: 150/459, loss: 0.012261220254004002 2023-01-22 17:28:57.200478: step: 152/459, loss: 0.023399153724312782 2023-01-22 17:28:57.813696: step: 154/459, loss: 0.06666760891675949 2023-01-22 17:28:58.467028: step: 156/459, loss: 0.08196154981851578 2023-01-22 17:28:59.017471: step: 158/459, loss: 0.017144232988357544 2023-01-22 17:28:59.752130: step: 160/459, loss: 0.04000276327133179 2023-01-22 17:29:00.354261: step: 162/459, loss: 0.09801151603460312 2023-01-22 17:29:00.983721: step: 164/459, loss: 0.033083271235227585 2023-01-22 17:29:01.566195: step: 166/459, loss: 0.051780425012111664 2023-01-22 17:29:02.179287: step: 168/459, loss: 0.005383175797760487 2023-01-22 17:29:02.787271: step: 170/459, loss: 0.09339700639247894 2023-01-22 17:29:03.387229: step: 172/459, loss: 0.023819321766495705 2023-01-22 17:29:04.001360: step: 174/459, loss: 0.0018045150209218264 2023-01-22 17:29:04.601168: step: 176/459, loss: 0.007238712627440691 2023-01-22 17:29:05.285557: step: 178/459, loss: 0.03392142057418823 2023-01-22 17:29:05.909187: step: 180/459, loss: 0.008840883150696754 2023-01-22 17:29:06.617209: step: 182/459, loss: 0.0034433328546583652 2023-01-22 17:29:07.208325: step: 184/459, loss: 0.04897715151309967 2023-01-22 17:29:07.792961: step: 186/459, loss: 0.029549896717071533 2023-01-22 17:29:08.404623: step: 188/459, loss: 0.01793571375310421 2023-01-22 17:29:09.043141: step: 190/459, loss: 0.025524884462356567 2023-01-22 17:29:09.667064: step: 192/459, loss: 0.005218002945184708 2023-01-22 17:29:10.301739: step: 194/459, loss: 0.08175579458475113 2023-01-22 17:29:10.909745: step: 196/459, loss: 0.17771203815937042 2023-01-22 17:29:11.468423: step: 198/459, loss: 0.012794628739356995 2023-01-22 17:29:12.128793: step: 200/459, loss: 0.030566375702619553 2023-01-22 17:29:12.728348: step: 202/459, loss: 0.0011204298352822661 2023-01-22 17:29:13.321920: step: 204/459, loss: 0.025353476405143738 2023-01-22 17:29:13.963378: step: 206/459, loss: 0.00144704629201442 2023-01-22 17:29:14.651704: step: 208/459, loss: 0.03232686594128609 2023-01-22 17:29:15.390447: step: 210/459, loss: 0.12373503297567368 2023-01-22 17:29:16.032579: step: 212/459, loss: 0.025408197194337845 2023-01-22 17:29:16.653146: step: 214/459, loss: 0.011883789673447609 2023-01-22 17:29:17.295507: step: 216/459, loss: 0.007301203906536102 2023-01-22 17:29:17.954291: step: 218/459, loss: 0.019820090383291245 2023-01-22 17:29:18.685704: step: 220/459, loss: 0.023543182760477066 2023-01-22 17:29:19.345269: step: 222/459, loss: 0.04659811779856682 2023-01-22 17:29:19.974825: step: 224/459, loss: 0.024646969512104988 2023-01-22 17:29:20.644144: step: 226/459, loss: 0.03234701603651047 2023-01-22 17:29:21.272174: step: 228/459, loss: 0.027776844799518585 2023-01-22 17:29:21.867355: step: 230/459, loss: 0.021065857261419296 2023-01-22 17:29:22.532552: step: 232/459, loss: 0.012411470524966717 2023-01-22 17:29:23.201538: step: 234/459, loss: 0.0008365408866666257 2023-01-22 17:29:23.837710: step: 236/459, loss: 0.007119207642972469 2023-01-22 17:29:24.521069: step: 238/459, loss: 0.04156377166509628 2023-01-22 17:29:25.138412: step: 240/459, loss: 0.002489926526322961 2023-01-22 17:29:25.771673: step: 242/459, loss: 0.06479490548372269 2023-01-22 17:29:26.394849: step: 244/459, loss: 0.06747336685657501 2023-01-22 17:29:27.028832: step: 246/459, loss: 0.01171522494405508 2023-01-22 17:29:27.654569: step: 248/459, loss: 0.007008770946413279 2023-01-22 17:29:28.244316: step: 250/459, loss: 0.03927210718393326 2023-01-22 17:29:28.848805: step: 252/459, loss: 0.007170611061155796 2023-01-22 17:29:29.510628: step: 254/459, loss: 0.029224591329693794 2023-01-22 17:29:30.116653: step: 256/459, loss: 0.04331120476126671 2023-01-22 17:29:30.709332: step: 258/459, loss: 0.047057367861270905 2023-01-22 17:29:31.299286: step: 260/459, loss: 0.09820511937141418 2023-01-22 17:29:31.915486: step: 262/459, loss: 0.12570683658123016 2023-01-22 17:29:32.529166: step: 264/459, loss: 0.05907128006219864 2023-01-22 17:29:33.113893: step: 266/459, loss: 0.03568997234106064 2023-01-22 17:29:33.774840: step: 268/459, loss: 0.013417055830359459 2023-01-22 17:29:34.431457: step: 270/459, loss: 0.02215283177793026 2023-01-22 17:29:35.081503: step: 272/459, loss: 0.07307875156402588 2023-01-22 17:29:35.699756: step: 274/459, loss: 0.008729935623705387 2023-01-22 17:29:36.319773: step: 276/459, loss: 0.006414141971617937 2023-01-22 17:29:36.895229: step: 278/459, loss: 0.15186406672000885 2023-01-22 17:29:37.507460: step: 280/459, loss: 0.009407359175384045 2023-01-22 17:29:38.102105: step: 282/459, loss: 0.009967871010303497 2023-01-22 17:29:38.674975: step: 284/459, loss: 0.011072198860347271 2023-01-22 17:29:39.349430: step: 286/459, loss: 0.0035697261337190866 2023-01-22 17:29:39.936449: step: 288/459, loss: 0.044224925339221954 2023-01-22 17:29:40.529600: step: 290/459, loss: 0.008462893776595592 2023-01-22 17:29:41.118318: step: 292/459, loss: 0.016448481008410454 2023-01-22 17:29:41.794793: step: 294/459, loss: 0.022876236587762833 2023-01-22 17:29:42.352307: step: 296/459, loss: 0.07504778355360031 2023-01-22 17:29:43.035335: step: 298/459, loss: 0.0470120906829834 2023-01-22 17:29:43.648697: step: 300/459, loss: 0.0008274897700175643 2023-01-22 17:29:44.188931: step: 302/459, loss: 0.09499936550855637 2023-01-22 17:29:44.794364: step: 304/459, loss: 0.09518428146839142 2023-01-22 17:29:45.364616: step: 306/459, loss: 0.05478556454181671 2023-01-22 17:29:46.026362: step: 308/459, loss: 0.02740149199962616 2023-01-22 17:29:46.578833: step: 310/459, loss: 0.018726080656051636 2023-01-22 17:29:47.190354: step: 312/459, loss: 0.13450370728969574 2023-01-22 17:29:47.844118: step: 314/459, loss: 0.0030188935343176126 2023-01-22 17:29:48.464038: step: 316/459, loss: 0.07327716797590256 2023-01-22 17:29:49.076095: step: 318/459, loss: 0.0013489185366779566 2023-01-22 17:29:49.759804: step: 320/459, loss: 0.024088190868496895 2023-01-22 17:29:50.427834: step: 322/459, loss: 0.05740426480770111 2023-01-22 17:29:50.983946: step: 324/459, loss: 0.0037260393146425486 2023-01-22 17:29:51.614547: step: 326/459, loss: 0.00210999371483922 2023-01-22 17:29:52.173479: step: 328/459, loss: 0.0031563392840325832 2023-01-22 17:29:52.771586: step: 330/459, loss: 0.0007586986175738275 2023-01-22 17:29:53.361753: step: 332/459, loss: 0.02723735384643078 2023-01-22 17:29:53.979299: step: 334/459, loss: 0.028851592913269997 2023-01-22 17:29:54.589742: step: 336/459, loss: 0.04571404308080673 2023-01-22 17:29:55.231662: step: 338/459, loss: 0.017905689775943756 2023-01-22 17:29:55.890382: step: 340/459, loss: 0.01935652643442154 2023-01-22 17:29:56.497860: step: 342/459, loss: 0.022267362102866173 2023-01-22 17:29:57.051011: step: 344/459, loss: 0.21662437915802002 2023-01-22 17:29:57.711341: step: 346/459, loss: 0.010897412896156311 2023-01-22 17:29:58.411220: step: 348/459, loss: 0.04260272532701492 2023-01-22 17:29:59.097899: step: 350/459, loss: 0.02267679013311863 2023-01-22 17:29:59.714932: step: 352/459, loss: 0.028318749740719795 2023-01-22 17:30:00.346054: step: 354/459, loss: 0.08638878911733627 2023-01-22 17:30:00.965997: step: 356/459, loss: 0.05589190870523453 2023-01-22 17:30:01.625563: step: 358/459, loss: 0.036008428782224655 2023-01-22 17:30:02.133840: step: 360/459, loss: 0.011075137183070183 2023-01-22 17:30:02.752824: step: 362/459, loss: 0.8310810327529907 2023-01-22 17:30:03.375279: step: 364/459, loss: 0.04460754990577698 2023-01-22 17:30:03.972393: step: 366/459, loss: 0.006146349012851715 2023-01-22 17:30:04.607697: step: 368/459, loss: 0.028728485107421875 2023-01-22 17:30:05.181495: step: 370/459, loss: 0.011883074417710304 2023-01-22 17:30:05.794177: step: 372/459, loss: 0.03708215430378914 2023-01-22 17:30:06.440858: step: 374/459, loss: 0.010863824747502804 2023-01-22 17:30:07.019513: step: 376/459, loss: 0.01667112670838833 2023-01-22 17:30:07.640414: step: 378/459, loss: 0.040397658944129944 2023-01-22 17:30:08.288387: step: 380/459, loss: 0.02753378264605999 2023-01-22 17:30:08.951403: step: 382/459, loss: 0.004098786506801844 2023-01-22 17:30:09.527183: step: 384/459, loss: 0.10912112146615982 2023-01-22 17:30:10.161005: step: 386/459, loss: 0.06130761280655861 2023-01-22 17:30:10.768721: step: 388/459, loss: 0.007756899576634169 2023-01-22 17:30:11.419464: step: 390/459, loss: 0.009891838766634464 2023-01-22 17:30:12.047850: step: 392/459, loss: 0.015105849131941795 2023-01-22 17:30:12.679278: step: 394/459, loss: 0.006866544485092163 2023-01-22 17:30:13.383696: step: 396/459, loss: 0.07629069685935974 2023-01-22 17:30:13.952425: step: 398/459, loss: 0.05828893557190895 2023-01-22 17:30:14.558239: step: 400/459, loss: 0.39183229207992554 2023-01-22 17:30:15.194127: step: 402/459, loss: 0.03673488274216652 2023-01-22 17:30:15.838126: step: 404/459, loss: 0.006211625877767801 2023-01-22 17:30:16.424436: step: 406/459, loss: 0.002474738284945488 2023-01-22 17:30:17.061375: step: 408/459, loss: 0.07389585673809052 2023-01-22 17:30:17.686272: step: 410/459, loss: 0.023051954805850983 2023-01-22 17:30:18.328152: step: 412/459, loss: 0.059362951666116714 2023-01-22 17:30:18.923855: step: 414/459, loss: 0.013256178237497807 2023-01-22 17:30:19.541738: step: 416/459, loss: 0.020865054801106453 2023-01-22 17:30:20.111123: step: 418/459, loss: 0.0016972084995359182 2023-01-22 17:30:20.818271: step: 420/459, loss: 0.007237361744046211 2023-01-22 17:30:21.431793: step: 422/459, loss: 0.01285796333104372 2023-01-22 17:30:22.035347: step: 424/459, loss: 0.02062300592660904 2023-01-22 17:30:22.754232: step: 426/459, loss: 0.21100914478302002 2023-01-22 17:30:23.430093: step: 428/459, loss: 0.06890407204627991 2023-01-22 17:30:23.984060: step: 430/459, loss: 0.038749031722545624 2023-01-22 17:30:24.552505: step: 432/459, loss: 0.027905898168683052 2023-01-22 17:30:25.129242: step: 434/459, loss: 0.20402100682258606 2023-01-22 17:30:25.839459: step: 436/459, loss: 0.017415983602404594 2023-01-22 17:30:26.483546: step: 438/459, loss: 0.020434560254216194 2023-01-22 17:30:27.122619: step: 440/459, loss: 0.020828189328312874 2023-01-22 17:30:27.734073: step: 442/459, loss: 0.049139734357595444 2023-01-22 17:30:28.336928: step: 444/459, loss: 0.01415524072945118 2023-01-22 17:30:28.955269: step: 446/459, loss: 0.11132676154375076 2023-01-22 17:30:29.547238: step: 448/459, loss: 0.031090492382645607 2023-01-22 17:30:30.181875: step: 450/459, loss: 0.00983670074492693 2023-01-22 17:30:30.823170: step: 452/459, loss: 0.006629394832998514 2023-01-22 17:30:31.434550: step: 454/459, loss: 0.020365754142403603 2023-01-22 17:30:31.980475: step: 456/459, loss: 0.021048642694950104 2023-01-22 17:30:32.632445: step: 458/459, loss: 0.0037975760642439127 2023-01-22 17:30:33.352212: step: 460/459, loss: 0.051169220358133316 2023-01-22 17:30:33.964498: step: 462/459, loss: 0.025370702147483826 2023-01-22 17:30:34.565270: step: 464/459, loss: 0.01056431420147419 2023-01-22 17:30:35.134593: step: 466/459, loss: 0.00955763179808855 2023-01-22 17:30:35.745544: step: 468/459, loss: 0.07913697510957718 2023-01-22 17:30:36.351683: step: 470/459, loss: 0.021680746227502823 2023-01-22 17:30:37.049585: step: 472/459, loss: 0.14744186401367188 2023-01-22 17:30:37.674188: step: 474/459, loss: 0.02474161796271801 2023-01-22 17:30:38.306688: step: 476/459, loss: 0.03340521827340126 2023-01-22 17:30:38.976592: step: 478/459, loss: 0.0007765039335936308 2023-01-22 17:30:39.627240: step: 480/459, loss: 0.1734689325094223 2023-01-22 17:30:40.215500: step: 482/459, loss: 0.08976420760154724 2023-01-22 17:30:40.907874: step: 484/459, loss: 0.003285980550572276 2023-01-22 17:30:41.518594: step: 486/459, loss: 0.04094323143362999 2023-01-22 17:30:42.108231: step: 488/459, loss: 0.111933633685112 2023-01-22 17:30:42.704725: step: 490/459, loss: 0.026926320046186447 2023-01-22 17:30:43.352613: step: 492/459, loss: 0.05112963169813156 2023-01-22 17:30:43.996515: step: 494/459, loss: 0.4249143600463867 2023-01-22 17:30:44.596387: step: 496/459, loss: 0.03644000366330147 2023-01-22 17:30:45.288727: step: 498/459, loss: 0.014635439962148666 2023-01-22 17:30:45.921295: step: 500/459, loss: 0.03186901658773422 2023-01-22 17:30:46.518280: step: 502/459, loss: 0.013514330610632896 2023-01-22 17:30:47.129186: step: 504/459, loss: 0.09427928924560547 2023-01-22 17:30:47.726072: step: 506/459, loss: 0.001031401683576405 2023-01-22 17:30:48.435241: step: 508/459, loss: 0.07083006203174591 2023-01-22 17:30:48.993923: step: 510/459, loss: 0.004751982167363167 2023-01-22 17:30:49.598340: step: 512/459, loss: 0.03644780069589615 2023-01-22 17:30:50.148870: step: 514/459, loss: 0.08853523433208466 2023-01-22 17:30:50.759967: step: 516/459, loss: 0.03213000297546387 2023-01-22 17:30:51.357332: step: 518/459, loss: 0.060345087200403214 2023-01-22 17:30:51.987331: step: 520/459, loss: 0.019147271290421486 2023-01-22 17:30:52.623713: step: 522/459, loss: 0.06564149260520935 2023-01-22 17:30:53.287854: step: 524/459, loss: 0.010129543021321297 2023-01-22 17:30:53.935390: step: 526/459, loss: 0.04144278168678284 2023-01-22 17:30:54.561320: step: 528/459, loss: 0.16162154078483582 2023-01-22 17:30:55.190041: step: 530/459, loss: 0.02654232271015644 2023-01-22 17:30:55.842952: step: 532/459, loss: 5.751251220703125 2023-01-22 17:30:56.486757: step: 534/459, loss: 0.0643952265381813 2023-01-22 17:30:57.108456: step: 536/459, loss: 0.037519339472055435 2023-01-22 17:30:57.661501: step: 538/459, loss: 0.006927755661308765 2023-01-22 17:30:58.314361: step: 540/459, loss: 0.02179042249917984 2023-01-22 17:30:58.932985: step: 542/459, loss: 0.0010854514548555017 2023-01-22 17:30:59.567978: step: 544/459, loss: 0.047863055020570755 2023-01-22 17:31:00.181464: step: 546/459, loss: 5.542479991912842 2023-01-22 17:31:01.453944: step: 548/459, loss: 0.024458490312099457 2023-01-22 17:31:02.074949: step: 550/459, loss: 0.5495651364326477 2023-01-22 17:31:02.698031: step: 552/459, loss: 0.0006563551141880453 2023-01-22 17:31:03.327621: step: 554/459, loss: 0.026605656370520592 2023-01-22 17:31:03.928907: step: 556/459, loss: 0.010977125726640224 2023-01-22 17:31:04.499020: step: 558/459, loss: 0.037644162774086 2023-01-22 17:31:05.168823: step: 560/459, loss: 0.045487016439437866 2023-01-22 17:31:05.764574: step: 562/459, loss: 0.009802722372114658 2023-01-22 17:31:06.358313: step: 564/459, loss: 0.0057118553668260574 2023-01-22 17:31:06.936820: step: 566/459, loss: 0.06683515012264252 2023-01-22 17:31:07.549225: step: 568/459, loss: 0.007363267242908478 2023-01-22 17:31:08.109713: step: 570/459, loss: 0.05511775612831116 2023-01-22 17:31:08.770549: step: 572/459, loss: 0.0297094639390707 2023-01-22 17:31:09.494256: step: 574/459, loss: 0.908475935459137 2023-01-22 17:31:10.103547: step: 576/459, loss: 0.04191069304943085 2023-01-22 17:31:10.738760: step: 578/459, loss: 0.005009212531149387 2023-01-22 17:31:11.396780: step: 580/459, loss: 0.10154452919960022 2023-01-22 17:31:12.077847: step: 582/459, loss: 0.017708472907543182 2023-01-22 17:31:12.660869: step: 584/459, loss: 0.005757106468081474 2023-01-22 17:31:13.257946: step: 586/459, loss: 0.04239577800035477 2023-01-22 17:31:13.944041: step: 588/459, loss: 0.03721484914422035 2023-01-22 17:31:14.569925: step: 590/459, loss: 0.02922661602497101 2023-01-22 17:31:15.205397: step: 592/459, loss: 0.033519644290208817 2023-01-22 17:31:15.842427: step: 594/459, loss: 0.0580122247338295 2023-01-22 17:31:16.460253: step: 596/459, loss: 0.02497921884059906 2023-01-22 17:31:17.076423: step: 598/459, loss: 0.02610209584236145 2023-01-22 17:31:17.687279: step: 600/459, loss: 0.038301244378089905 2023-01-22 17:31:18.284085: step: 602/459, loss: 0.03363842889666557 2023-01-22 17:31:18.873448: step: 604/459, loss: 0.008936560712754726 2023-01-22 17:31:19.491408: step: 606/459, loss: 0.04283912479877472 2023-01-22 17:31:20.125703: step: 608/459, loss: 0.03800855204463005 2023-01-22 17:31:20.711706: step: 610/459, loss: 0.038040246814489365 2023-01-22 17:31:21.316894: step: 612/459, loss: 0.06449640542268753 2023-01-22 17:31:21.926689: step: 614/459, loss: 0.006695647723972797 2023-01-22 17:31:22.624751: step: 616/459, loss: 0.03378463536500931 2023-01-22 17:31:23.294894: step: 618/459, loss: 0.021038146689534187 2023-01-22 17:31:23.904943: step: 620/459, loss: 0.012092210352420807 2023-01-22 17:31:24.536213: step: 622/459, loss: 0.019565846771001816 2023-01-22 17:31:25.128653: step: 624/459, loss: 0.05253884196281433 2023-01-22 17:31:25.754551: step: 626/459, loss: 0.018799472600221634 2023-01-22 17:31:26.376979: step: 628/459, loss: 0.017247764393687248 2023-01-22 17:31:27.024667: step: 630/459, loss: 0.009317516349256039 2023-01-22 17:31:27.649213: step: 632/459, loss: 0.009126310236752033 2023-01-22 17:31:28.259708: step: 634/459, loss: 0.0196724534034729 2023-01-22 17:31:28.852347: step: 636/459, loss: 1.6296930313110352 2023-01-22 17:31:29.534907: step: 638/459, loss: 0.014761727303266525 2023-01-22 17:31:30.140817: step: 640/459, loss: 0.2701621353626251 2023-01-22 17:31:30.704979: step: 642/459, loss: 0.11686715483665466 2023-01-22 17:31:31.350225: step: 644/459, loss: 0.02304048277437687 2023-01-22 17:31:31.951316: step: 646/459, loss: 0.013915415853261948 2023-01-22 17:31:32.660266: step: 648/459, loss: 0.015018954873085022 2023-01-22 17:31:33.281110: step: 650/459, loss: 0.03615604713559151 2023-01-22 17:31:33.901320: step: 652/459, loss: 2.5052621364593506 2023-01-22 17:31:34.511795: step: 654/459, loss: 0.011504078283905983 2023-01-22 17:31:35.097337: step: 656/459, loss: 0.01299243699759245 2023-01-22 17:31:35.772944: step: 658/459, loss: 0.0004307858762331307 2023-01-22 17:31:36.375590: step: 660/459, loss: 0.0295903030782938 2023-01-22 17:31:36.976061: step: 662/459, loss: 0.002934493590146303 2023-01-22 17:31:37.610576: step: 664/459, loss: 0.45567598938941956 2023-01-22 17:31:38.248619: step: 666/459, loss: 0.03139790520071983 2023-01-22 17:31:38.821458: step: 668/459, loss: 0.019774414598941803 2023-01-22 17:31:39.399274: step: 670/459, loss: 0.06496177613735199 2023-01-22 17:31:40.016216: step: 672/459, loss: 0.1425761580467224 2023-01-22 17:31:40.636303: step: 674/459, loss: 0.0629580095410347 2023-01-22 17:31:41.270977: step: 676/459, loss: 0.6909782290458679 2023-01-22 17:31:41.902664: step: 678/459, loss: 0.010260282084345818 2023-01-22 17:31:42.535037: step: 680/459, loss: 0.01557548251003027 2023-01-22 17:31:43.147109: step: 682/459, loss: 0.02428780496120453 2023-01-22 17:31:43.719600: step: 684/459, loss: 0.011366133578121662 2023-01-22 17:31:44.513537: step: 686/459, loss: 0.021274596452713013 2023-01-22 17:31:45.107352: step: 688/459, loss: 0.22677665948867798 2023-01-22 17:31:45.735460: step: 690/459, loss: 0.04704762250185013 2023-01-22 17:31:46.356494: step: 692/459, loss: 0.03088376484811306 2023-01-22 17:31:46.958398: step: 694/459, loss: 0.025558719411492348 2023-01-22 17:31:47.622695: step: 696/459, loss: 0.007346552796661854 2023-01-22 17:31:48.208900: step: 698/459, loss: 0.009055946953594685 2023-01-22 17:31:48.787048: step: 700/459, loss: 0.008503350429236889 2023-01-22 17:31:49.367572: step: 702/459, loss: 0.004163193050771952 2023-01-22 17:31:49.935673: step: 704/459, loss: 0.005185345187783241 2023-01-22 17:31:50.552645: step: 706/459, loss: 0.038668856024742126 2023-01-22 17:31:51.195335: step: 708/459, loss: 0.017605792731046677 2023-01-22 17:31:51.814896: step: 710/459, loss: 0.017790205776691437 2023-01-22 17:31:52.416983: step: 712/459, loss: 0.0637589767575264 2023-01-22 17:31:53.052875: step: 714/459, loss: 0.0016574130859225988 2023-01-22 17:31:53.663070: step: 716/459, loss: 0.055774953216314316 2023-01-22 17:31:54.284144: step: 718/459, loss: 0.0024685391690582037 2023-01-22 17:31:54.919947: step: 720/459, loss: 0.032189320772886276 2023-01-22 17:31:55.492907: step: 722/459, loss: 0.02751813642680645 2023-01-22 17:31:56.146019: step: 724/459, loss: 0.7445820569992065 2023-01-22 17:31:56.749793: step: 726/459, loss: 0.014363833703100681 2023-01-22 17:31:57.331643: step: 728/459, loss: 0.01391809992492199 2023-01-22 17:31:57.917145: step: 730/459, loss: 0.03580464795231819 2023-01-22 17:31:58.533896: step: 732/459, loss: 0.011702410876750946 2023-01-22 17:31:59.084438: step: 734/459, loss: 0.03685874864459038 2023-01-22 17:31:59.664598: step: 736/459, loss: 0.060033638030290604 2023-01-22 17:32:00.296905: step: 738/459, loss: 0.013653836213052273 2023-01-22 17:32:00.896466: step: 740/459, loss: 0.08850466459989548 2023-01-22 17:32:01.479093: step: 742/459, loss: 0.005042435601353645 2023-01-22 17:32:02.116961: step: 744/459, loss: 0.03475680947303772 2023-01-22 17:32:02.796868: step: 746/459, loss: 0.05060116574168205 2023-01-22 17:32:03.432122: step: 748/459, loss: 0.037060875445604324 2023-01-22 17:32:04.087422: step: 750/459, loss: 0.046955108642578125 2023-01-22 17:32:04.662178: step: 752/459, loss: 0.02120811492204666 2023-01-22 17:32:05.274304: step: 754/459, loss: 0.011390168219804764 2023-01-22 17:32:05.844776: step: 756/459, loss: 0.110373355448246 2023-01-22 17:32:06.459679: step: 758/459, loss: 0.03628093749284744 2023-01-22 17:32:07.089909: step: 760/459, loss: 0.2074795961380005 2023-01-22 17:32:07.659104: step: 762/459, loss: 0.25668150186538696 2023-01-22 17:32:08.253086: step: 764/459, loss: 0.007625050377100706 2023-01-22 17:32:08.875682: step: 766/459, loss: 0.006116581615060568 2023-01-22 17:32:09.501560: step: 768/459, loss: 0.008135228417813778 2023-01-22 17:32:10.110946: step: 770/459, loss: 0.097684346139431 2023-01-22 17:32:10.680393: step: 772/459, loss: 0.015662381425499916 2023-01-22 17:32:11.254260: step: 774/459, loss: 0.005629403050988913 2023-01-22 17:32:11.806044: step: 776/459, loss: 0.007022624369710684 2023-01-22 17:32:12.445299: step: 778/459, loss: 0.0941726341843605 2023-01-22 17:32:13.007366: step: 780/459, loss: 0.0012258451897650957 2023-01-22 17:32:13.555918: step: 782/459, loss: 0.2073964774608612 2023-01-22 17:32:14.290462: step: 784/459, loss: 0.08812826126813889 2023-01-22 17:32:14.921082: step: 786/459, loss: 0.04014333337545395 2023-01-22 17:32:15.528047: step: 788/459, loss: 0.008741251192986965 2023-01-22 17:32:16.155401: step: 790/459, loss: 0.03690696135163307 2023-01-22 17:32:16.860108: step: 792/459, loss: 0.06772755831480026 2023-01-22 17:32:17.510931: step: 794/459, loss: 0.03460952267050743 2023-01-22 17:32:18.132640: step: 796/459, loss: 0.04983443021774292 2023-01-22 17:32:18.737727: step: 798/459, loss: 0.04244077205657959 2023-01-22 17:32:19.470016: step: 800/459, loss: 0.04663332179188728 2023-01-22 17:32:20.128803: step: 802/459, loss: 0.041255731135606766 2023-01-22 17:32:20.710694: step: 804/459, loss: 0.00803519506007433 2023-01-22 17:32:21.406023: step: 806/459, loss: 0.13854587078094482 2023-01-22 17:32:22.091288: step: 808/459, loss: 0.07391373068094254 2023-01-22 17:32:22.769029: step: 810/459, loss: 0.023574309423565865 2023-01-22 17:32:23.412203: step: 812/459, loss: 0.028337424620985985 2023-01-22 17:32:24.029828: step: 814/459, loss: 0.03372451290488243 2023-01-22 17:32:24.635427: step: 816/459, loss: 0.048107970505952835 2023-01-22 17:32:25.233817: step: 818/459, loss: 0.019371582195162773 2023-01-22 17:32:25.882945: step: 820/459, loss: 0.06150500103831291 2023-01-22 17:32:26.490697: step: 822/459, loss: 0.052157774567604065 2023-01-22 17:32:27.205956: step: 824/459, loss: 0.022944316267967224 2023-01-22 17:32:27.793488: step: 826/459, loss: 0.15677478909492493 2023-01-22 17:32:28.411955: step: 828/459, loss: 0.04342680796980858 2023-01-22 17:32:29.064071: step: 830/459, loss: 0.0056196823716163635 2023-01-22 17:32:29.672765: step: 832/459, loss: 0.027646193280816078 2023-01-22 17:32:30.256028: step: 834/459, loss: 2.8180665969848633 2023-01-22 17:32:30.862799: step: 836/459, loss: 0.06536545604467392 2023-01-22 17:32:31.397389: step: 838/459, loss: 0.05563417077064514 2023-01-22 17:32:32.019668: step: 840/459, loss: 0.001480183214880526 2023-01-22 17:32:32.653918: step: 842/459, loss: 0.05436326190829277 2023-01-22 17:32:33.221799: step: 844/459, loss: 0.06171036511659622 2023-01-22 17:32:33.818769: step: 846/459, loss: 0.02820236049592495 2023-01-22 17:32:34.435541: step: 848/459, loss: 0.017604492604732513 2023-01-22 17:32:35.042359: step: 850/459, loss: 0.10797881335020065 2023-01-22 17:32:35.606190: step: 852/459, loss: 0.018329601734876633 2023-01-22 17:32:36.261629: step: 854/459, loss: 0.03452352434396744 2023-01-22 17:32:36.943374: step: 856/459, loss: 0.0092709269374609 2023-01-22 17:32:37.557846: step: 858/459, loss: 0.14209552109241486 2023-01-22 17:32:38.196212: step: 860/459, loss: 0.04317631945014 2023-01-22 17:32:38.813104: step: 862/459, loss: 0.01948372647166252 2023-01-22 17:32:39.436909: step: 864/459, loss: 0.02715989015996456 2023-01-22 17:32:40.020396: step: 866/459, loss: 0.03274625912308693 2023-01-22 17:32:40.637600: step: 868/459, loss: 0.05429830402135849 2023-01-22 17:32:41.218952: step: 870/459, loss: 0.0014986537862569094 2023-01-22 17:32:41.825241: step: 872/459, loss: 0.0021451711654663086 2023-01-22 17:32:42.464892: step: 874/459, loss: 0.005313169676810503 2023-01-22 17:32:43.097872: step: 876/459, loss: 0.02233775518834591 2023-01-22 17:32:43.672259: step: 878/459, loss: 0.03986942768096924 2023-01-22 17:32:44.268053: step: 880/459, loss: 0.0331604965031147 2023-01-22 17:32:44.914335: step: 882/459, loss: 0.10624771565198898 2023-01-22 17:32:45.472496: step: 884/459, loss: 0.03232525661587715 2023-01-22 17:32:46.085461: step: 886/459, loss: 0.019384941086173058 2023-01-22 17:32:46.710446: step: 888/459, loss: 0.038704775273799896 2023-01-22 17:32:47.331817: step: 890/459, loss: 0.049461543560028076 2023-01-22 17:32:47.939760: step: 892/459, loss: 0.045684490352869034 2023-01-22 17:32:48.578417: step: 894/459, loss: 0.014822143130004406 2023-01-22 17:32:49.244485: step: 896/459, loss: 0.01864583231508732 2023-01-22 17:32:49.788208: step: 898/459, loss: 0.00010603434930089861 2023-01-22 17:32:50.392985: step: 900/459, loss: 0.015569229610264301 2023-01-22 17:32:51.027492: step: 902/459, loss: 0.02844519354403019 2023-01-22 17:32:51.671113: step: 904/459, loss: 0.003523878753185272 2023-01-22 17:32:52.201913: step: 906/459, loss: 0.11548186093568802 2023-01-22 17:32:52.790414: step: 908/459, loss: 0.25323447585105896 2023-01-22 17:32:53.416595: step: 910/459, loss: 0.4727080762386322 2023-01-22 17:32:54.030546: step: 912/459, loss: 0.03169140964746475 2023-01-22 17:32:54.675857: step: 914/459, loss: 0.047139059752225876 2023-01-22 17:32:55.282070: step: 916/459, loss: 0.3105873763561249 2023-01-22 17:32:55.919780: step: 918/459, loss: 0.055480360984802246 2023-01-22 17:32:56.389578: step: 920/459, loss: 4.8872578190639615e-05 ================================================== Loss: 0.096 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29701829805996477, 'r': 0.3195623814041746, 'f1': 0.30787819926873866}, 'combined': 0.22685762051380742, 'epoch': 24} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3277896756102292, 'r': 0.31406963459287657, 'f1': 0.32078301897543804}, 'combined': 0.2053011321442803, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3018645966709347, 'r': 0.3253493186130757, 'f1': 0.31316728933167287}, 'combined': 0.23075484477070632, 'epoch': 24} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3432826469652689, 'r': 0.3214175102158887, 'f1': 0.33199045463088506}, 'combined': 0.2124738909637664, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3194539127564537, 'r': 0.3236971336090062, 'f1': 0.3215615257529618}, 'combined': 0.23694007160744554, 'epoch': 24} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3378972408464454, 'r': 0.3314229323995701, 'f1': 0.3346287737539649}, 'combined': 0.23992251703114464, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2312925170068027, 'r': 0.32380952380952377, 'f1': 0.26984126984126977}, 'combined': 0.17989417989417983, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23125, 'r': 0.40217391304347827, 'f1': 0.29365079365079366}, 'combined': 0.14682539682539683, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.1724137931034483, 'f1': 0.22222222222222224}, 'combined': 0.14814814814814814, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:35:32.060279: step: 2/459, loss: 0.0009467432973906398 2023-01-22 17:35:32.690999: step: 4/459, loss: 0.07925505191087723 2023-01-22 17:35:33.321084: step: 6/459, loss: 0.013624582439661026 2023-01-22 17:35:34.018262: step: 8/459, loss: 0.011337261646986008 2023-01-22 17:35:34.600514: step: 10/459, loss: 0.05914195254445076 2023-01-22 17:35:35.228533: step: 12/459, loss: 0.009118347428739071 2023-01-22 17:35:35.812912: step: 14/459, loss: 0.028683025389909744 2023-01-22 17:35:36.382677: step: 16/459, loss: 4.464139783522114e-05 2023-01-22 17:35:36.970251: step: 18/459, loss: 0.006182099226862192 2023-01-22 17:35:37.641471: step: 20/459, loss: 0.014004338532686234 2023-01-22 17:35:38.299861: step: 22/459, loss: 0.0158466137945652 2023-01-22 17:35:38.878993: step: 24/459, loss: 0.009450498968362808 2023-01-22 17:35:39.526793: step: 26/459, loss: 0.13917171955108643 2023-01-22 17:35:40.097404: step: 28/459, loss: 0.0003115713188890368 2023-01-22 17:35:40.709822: step: 30/459, loss: 0.019104735925793648 2023-01-22 17:35:41.293824: step: 32/459, loss: 0.01834845170378685 2023-01-22 17:35:41.919689: step: 34/459, loss: 0.03261566907167435 2023-01-22 17:35:42.518302: step: 36/459, loss: 0.005552491173148155 2023-01-22 17:35:43.119684: step: 38/459, loss: 0.45042070746421814 2023-01-22 17:35:43.690742: step: 40/459, loss: 0.018667837604880333 2023-01-22 17:35:44.359870: step: 42/459, loss: 0.005000010132789612 2023-01-22 17:35:44.909888: step: 44/459, loss: 0.029388699680566788 2023-01-22 17:35:45.556580: step: 46/459, loss: 0.038266122341156006 2023-01-22 17:35:46.169327: step: 48/459, loss: 0.6106569766998291 2023-01-22 17:35:46.798038: step: 50/459, loss: 0.04925088956952095 2023-01-22 17:35:47.423433: step: 52/459, loss: 0.00038646103348582983 2023-01-22 17:35:48.097251: step: 54/459, loss: 0.03690173104405403 2023-01-22 17:35:48.717277: step: 56/459, loss: 0.00386582943610847 2023-01-22 17:35:49.351156: step: 58/459, loss: 0.011593441478908062 2023-01-22 17:35:49.949982: step: 60/459, loss: 0.005977432243525982 2023-01-22 17:35:50.567160: step: 62/459, loss: 0.25210779905319214 2023-01-22 17:35:51.226447: step: 64/459, loss: 0.019494449719786644 2023-01-22 17:35:51.904204: step: 66/459, loss: 0.008241018280386925 2023-01-22 17:35:52.536162: step: 68/459, loss: 0.22377799451351166 2023-01-22 17:35:53.241521: step: 70/459, loss: 0.021145876497030258 2023-01-22 17:35:53.892772: step: 72/459, loss: 0.04572727531194687 2023-01-22 17:35:54.569801: step: 74/459, loss: 0.11972815543413162 2023-01-22 17:35:55.188583: step: 76/459, loss: 0.009729252196848392 2023-01-22 17:35:55.791661: step: 78/459, loss: 0.01722775772213936 2023-01-22 17:35:56.330538: step: 80/459, loss: 0.011205905117094517 2023-01-22 17:35:56.969390: step: 82/459, loss: 0.044082436710596085 2023-01-22 17:35:57.637088: step: 84/459, loss: 0.03805164992809296 2023-01-22 17:35:58.235367: step: 86/459, loss: 0.017073851078748703 2023-01-22 17:35:58.953543: step: 88/459, loss: 0.11711540073156357 2023-01-22 17:35:59.595293: step: 90/459, loss: 0.001507702050730586 2023-01-22 17:36:00.223303: step: 92/459, loss: 0.37061870098114014 2023-01-22 17:36:00.822512: step: 94/459, loss: 0.003091650316491723 2023-01-22 17:36:01.434558: step: 96/459, loss: 0.005620060954242945 2023-01-22 17:36:02.165650: step: 98/459, loss: 0.03382515534758568 2023-01-22 17:36:02.752686: step: 100/459, loss: 0.03346991166472435 2023-01-22 17:36:03.418491: step: 102/459, loss: 0.0027941304724663496 2023-01-22 17:36:04.108012: step: 104/459, loss: 0.005370601546019316 2023-01-22 17:36:04.736438: step: 106/459, loss: 0.03972449153661728 2023-01-22 17:36:05.373477: step: 108/459, loss: 0.02761339582502842 2023-01-22 17:36:05.953549: step: 110/459, loss: 0.08138282597064972 2023-01-22 17:36:06.611374: step: 112/459, loss: 0.0013767267810180783 2023-01-22 17:36:07.184123: step: 114/459, loss: 0.0004905625828541815 2023-01-22 17:36:07.853520: step: 116/459, loss: 0.007863636128604412 2023-01-22 17:36:08.580210: step: 118/459, loss: 0.012611864134669304 2023-01-22 17:36:09.250570: step: 120/459, loss: 0.0445837564766407 2023-01-22 17:36:09.790008: step: 122/459, loss: 0.031200604513287544 2023-01-22 17:36:10.413994: step: 124/459, loss: 0.02877631224691868 2023-01-22 17:36:11.086404: step: 126/459, loss: 0.03386486694216728 2023-01-22 17:36:11.656909: step: 128/459, loss: 0.03264276683330536 2023-01-22 17:36:12.278320: step: 130/459, loss: 0.006072711199522018 2023-01-22 17:36:12.849371: step: 132/459, loss: 0.026521308347582817 2023-01-22 17:36:13.469951: step: 134/459, loss: 0.06037655100226402 2023-01-22 17:36:14.092262: step: 136/459, loss: 0.024353208020329475 2023-01-22 17:36:14.748068: step: 138/459, loss: 0.06337527930736542 2023-01-22 17:36:15.404260: step: 140/459, loss: 0.07489749789237976 2023-01-22 17:36:16.008309: step: 142/459, loss: 0.04324183985590935 2023-01-22 17:36:16.647388: step: 144/459, loss: 0.049920160323381424 2023-01-22 17:36:17.306890: step: 146/459, loss: 0.022399302572011948 2023-01-22 17:36:17.949149: step: 148/459, loss: 0.03725287690758705 2023-01-22 17:36:18.563370: step: 150/459, loss: 0.05850312486290932 2023-01-22 17:36:19.234728: step: 152/459, loss: 0.017003178596496582 2023-01-22 17:36:19.851322: step: 154/459, loss: 0.0025231700856238604 2023-01-22 17:36:20.492209: step: 156/459, loss: 0.03696570545434952 2023-01-22 17:36:21.127302: step: 158/459, loss: 0.01118171215057373 2023-01-22 17:36:21.802545: step: 160/459, loss: 0.022735651582479477 2023-01-22 17:36:22.378926: step: 162/459, loss: 0.002561112865805626 2023-01-22 17:36:22.992266: step: 164/459, loss: 0.008520194329321384 2023-01-22 17:36:23.614313: step: 166/459, loss: 0.04738996550440788 2023-01-22 17:36:24.249110: step: 168/459, loss: 0.024203529581427574 2023-01-22 17:36:24.907147: step: 170/459, loss: 0.1316337287425995 2023-01-22 17:36:25.528745: step: 172/459, loss: 0.10802776366472244 2023-01-22 17:36:26.097411: step: 174/459, loss: 0.018355030566453934 2023-01-22 17:36:26.710057: step: 176/459, loss: 0.02377774380147457 2023-01-22 17:36:27.278028: step: 178/459, loss: 0.1804056614637375 2023-01-22 17:36:27.968359: step: 180/459, loss: 0.0019356379052624106 2023-01-22 17:36:28.572401: step: 182/459, loss: 0.1686805933713913 2023-01-22 17:36:29.198743: step: 184/459, loss: 0.16219840943813324 2023-01-22 17:36:29.823970: step: 186/459, loss: 0.0007506382535211742 2023-01-22 17:36:30.398917: step: 188/459, loss: 0.007482224144041538 2023-01-22 17:36:31.039760: step: 190/459, loss: 0.04120827838778496 2023-01-22 17:36:31.666138: step: 192/459, loss: 0.0912710651755333 2023-01-22 17:36:32.289570: step: 194/459, loss: 0.30036598443984985 2023-01-22 17:36:32.895709: step: 196/459, loss: 0.018753916025161743 2023-01-22 17:36:33.473270: step: 198/459, loss: 0.08868653327226639 2023-01-22 17:36:34.110889: step: 200/459, loss: 0.014076277613639832 2023-01-22 17:36:34.730807: step: 202/459, loss: 0.029581787064671516 2023-01-22 17:36:35.342272: step: 204/459, loss: 0.011097093112766743 2023-01-22 17:36:35.931367: step: 206/459, loss: 0.016699761152267456 2023-01-22 17:36:36.563407: step: 208/459, loss: 0.006091120187193155 2023-01-22 17:36:37.196377: step: 210/459, loss: 0.013691221363842487 2023-01-22 17:36:37.884921: step: 212/459, loss: 0.0023301090113818645 2023-01-22 17:36:38.447751: step: 214/459, loss: 0.009676637127995491 2023-01-22 17:36:39.055491: step: 216/459, loss: 0.05751131847500801 2023-01-22 17:36:39.676536: step: 218/459, loss: 0.04126618802547455 2023-01-22 17:36:40.251486: step: 220/459, loss: 0.03665662556886673 2023-01-22 17:36:40.871100: step: 222/459, loss: 0.007308376021683216 2023-01-22 17:36:41.510306: step: 224/459, loss: 0.0020590275526046753 2023-01-22 17:36:42.158139: step: 226/459, loss: 0.021580729633569717 2023-01-22 17:36:42.875072: step: 228/459, loss: 0.002891826443374157 2023-01-22 17:36:43.599034: step: 230/459, loss: 0.0789915919303894 2023-01-22 17:36:44.253648: step: 232/459, loss: 0.006126896012574434 2023-01-22 17:36:44.886369: step: 234/459, loss: 0.028965193778276443 2023-01-22 17:36:45.486796: step: 236/459, loss: 0.04624786972999573 2023-01-22 17:36:46.170166: step: 238/459, loss: 0.0014110736083239317 2023-01-22 17:36:46.851208: step: 240/459, loss: 0.3240031599998474 2023-01-22 17:36:47.481166: step: 242/459, loss: 0.02006884478032589 2023-01-22 17:36:48.118132: step: 244/459, loss: 0.007014357950538397 2023-01-22 17:36:48.704328: step: 246/459, loss: 0.09020472317934036 2023-01-22 17:36:49.297027: step: 248/459, loss: 0.06198996305465698 2023-01-22 17:36:49.957280: step: 250/459, loss: 0.16061551868915558 2023-01-22 17:36:50.586386: step: 252/459, loss: 0.013582334853708744 2023-01-22 17:36:51.187551: step: 254/459, loss: 0.04635150730609894 2023-01-22 17:36:51.849147: step: 256/459, loss: 0.021270066499710083 2023-01-22 17:36:52.515940: step: 258/459, loss: 0.005876732990145683 2023-01-22 17:36:53.109531: step: 260/459, loss: 0.020502958446741104 2023-01-22 17:36:53.751786: step: 262/459, loss: 0.0015900400467216969 2023-01-22 17:36:54.391955: step: 264/459, loss: 0.009071974083781242 2023-01-22 17:36:55.066057: step: 266/459, loss: 0.006861268077045679 2023-01-22 17:36:55.673957: step: 268/459, loss: 0.012350348755717278 2023-01-22 17:36:56.264949: step: 270/459, loss: 0.008950939401984215 2023-01-22 17:36:56.831848: step: 272/459, loss: 0.015229074284434319 2023-01-22 17:36:57.459990: step: 274/459, loss: 0.03060613013803959 2023-01-22 17:36:58.182804: step: 276/459, loss: 0.016828155145049095 2023-01-22 17:36:58.784459: step: 278/459, loss: 0.03017355129122734 2023-01-22 17:36:59.439541: step: 280/459, loss: 0.030493909493088722 2023-01-22 17:37:00.214581: step: 282/459, loss: 0.017291417345404625 2023-01-22 17:37:00.914502: step: 284/459, loss: 0.0035357277374714613 2023-01-22 17:37:01.459542: step: 286/459, loss: 0.03147123008966446 2023-01-22 17:37:02.078549: step: 288/459, loss: 0.009722255170345306 2023-01-22 17:37:02.767184: step: 290/459, loss: 0.04919544979929924 2023-01-22 17:37:03.324746: step: 292/459, loss: 0.0961112454533577 2023-01-22 17:37:03.915925: step: 294/459, loss: 0.2610752582550049 2023-01-22 17:37:04.547536: step: 296/459, loss: 0.0013041550992056727 2023-01-22 17:37:05.151941: step: 298/459, loss: 0.02009868249297142 2023-01-22 17:37:05.856519: step: 300/459, loss: 0.011021619662642479 2023-01-22 17:37:06.492353: step: 302/459, loss: 0.045731838792562485 2023-01-22 17:37:07.067610: step: 304/459, loss: 0.008400876075029373 2023-01-22 17:37:07.707344: step: 306/459, loss: 0.044108014553785324 2023-01-22 17:37:08.413072: step: 308/459, loss: 0.02835063263773918 2023-01-22 17:37:09.068224: step: 310/459, loss: 0.0074440669268369675 2023-01-22 17:37:09.669717: step: 312/459, loss: 0.08330876380205154 2023-01-22 17:37:10.206776: step: 314/459, loss: 0.009069802239537239 2023-01-22 17:37:10.827726: step: 316/459, loss: 0.020042845979332924 2023-01-22 17:37:11.482792: step: 318/459, loss: 0.03044980764389038 2023-01-22 17:37:12.167783: step: 320/459, loss: 0.018629590049386024 2023-01-22 17:37:12.759822: step: 322/459, loss: 0.014651015400886536 2023-01-22 17:37:13.406501: step: 324/459, loss: 0.03582810238003731 2023-01-22 17:37:13.975077: step: 326/459, loss: 0.009560903534293175 2023-01-22 17:37:14.590312: step: 328/459, loss: 0.03766295313835144 2023-01-22 17:37:15.244963: step: 330/459, loss: 0.036018405109643936 2023-01-22 17:37:15.868212: step: 332/459, loss: 0.03508475795388222 2023-01-22 17:37:16.430025: step: 334/459, loss: 0.017647793516516685 2023-01-22 17:37:17.040446: step: 336/459, loss: 0.022001922130584717 2023-01-22 17:37:17.698321: step: 338/459, loss: 0.06960055977106094 2023-01-22 17:37:18.307390: step: 340/459, loss: 0.025852588936686516 2023-01-22 17:37:18.910407: step: 342/459, loss: 0.004259223118424416 2023-01-22 17:37:19.515151: step: 344/459, loss: 0.011951619759202003 2023-01-22 17:37:20.094934: step: 346/459, loss: 0.037990618497133255 2023-01-22 17:37:20.714477: step: 348/459, loss: 0.1984451413154602 2023-01-22 17:37:21.454653: step: 350/459, loss: 0.03260447829961777 2023-01-22 17:37:22.027232: step: 352/459, loss: 0.7111073732376099 2023-01-22 17:37:22.619373: step: 354/459, loss: 0.005898024421185255 2023-01-22 17:37:23.184362: step: 356/459, loss: 0.030215898528695107 2023-01-22 17:37:23.779484: step: 358/459, loss: 0.014576800167560577 2023-01-22 17:37:24.430153: step: 360/459, loss: 0.020522544160485268 2023-01-22 17:37:25.078525: step: 362/459, loss: 0.012895860709249973 2023-01-22 17:37:25.692682: step: 364/459, loss: 0.011322258971631527 2023-01-22 17:37:26.312313: step: 366/459, loss: 0.03550180420279503 2023-01-22 17:37:26.884678: step: 368/459, loss: 0.07011913508176804 2023-01-22 17:37:27.521656: step: 370/459, loss: 0.21281524002552032 2023-01-22 17:37:28.106881: step: 372/459, loss: 0.08080485463142395 2023-01-22 17:37:28.718271: step: 374/459, loss: 0.0616132877767086 2023-01-22 17:37:29.378477: step: 376/459, loss: 0.054426681250333786 2023-01-22 17:37:30.055614: step: 378/459, loss: 0.017212988808751106 2023-01-22 17:37:30.767648: step: 380/459, loss: 0.03147655725479126 2023-01-22 17:37:31.387335: step: 382/459, loss: 0.011109724640846252 2023-01-22 17:37:32.044498: step: 384/459, loss: 0.007119243498891592 2023-01-22 17:37:32.597284: step: 386/459, loss: 0.16559374332427979 2023-01-22 17:37:33.321086: step: 388/459, loss: 0.00023086532019078732 2023-01-22 17:37:33.891855: step: 390/459, loss: 0.006378692574799061 2023-01-22 17:37:34.508402: step: 392/459, loss: 0.016498487442731857 2023-01-22 17:37:35.122237: step: 394/459, loss: 0.05966395139694214 2023-01-22 17:37:35.700418: step: 396/459, loss: 0.018233709037303925 2023-01-22 17:37:36.305783: step: 398/459, loss: 0.004497546702623367 2023-01-22 17:37:36.926269: step: 400/459, loss: 0.02401125244796276 2023-01-22 17:37:37.547274: step: 402/459, loss: 0.00034420302836224437 2023-01-22 17:37:38.220455: step: 404/459, loss: 0.06223900243639946 2023-01-22 17:37:38.871672: step: 406/459, loss: 0.22028547525405884 2023-01-22 17:37:39.459682: step: 408/459, loss: 0.01960931532084942 2023-01-22 17:37:40.137692: step: 410/459, loss: 0.0435306653380394 2023-01-22 17:37:40.754313: step: 412/459, loss: 0.03889598324894905 2023-01-22 17:37:41.341143: step: 414/459, loss: 0.027374161407351494 2023-01-22 17:37:41.936954: step: 416/459, loss: 0.008799523115158081 2023-01-22 17:37:42.562220: step: 418/459, loss: 0.359744668006897 2023-01-22 17:37:43.179284: step: 420/459, loss: 0.027536282315850258 2023-01-22 17:37:43.772233: step: 422/459, loss: 0.0874478742480278 2023-01-22 17:37:44.344856: step: 424/459, loss: 0.0056052315048873425 2023-01-22 17:37:44.913811: step: 426/459, loss: 0.038594912737607956 2023-01-22 17:37:45.514822: step: 428/459, loss: 0.003436112077906728 2023-01-22 17:37:46.115126: step: 430/459, loss: 0.0018727319547906518 2023-01-22 17:37:46.723435: step: 432/459, loss: 0.02372078225016594 2023-01-22 17:37:47.356102: step: 434/459, loss: 0.0422201082110405 2023-01-22 17:37:47.968360: step: 436/459, loss: 0.017492033541202545 2023-01-22 17:37:48.589941: step: 438/459, loss: 0.06984184682369232 2023-01-22 17:37:49.177027: step: 440/459, loss: 0.011623811908066273 2023-01-22 17:37:49.781336: step: 442/459, loss: 0.006393024232238531 2023-01-22 17:37:50.380974: step: 444/459, loss: 0.05011364445090294 2023-01-22 17:37:51.026440: step: 446/459, loss: 0.17774483561515808 2023-01-22 17:37:51.664508: step: 448/459, loss: 0.001969914883375168 2023-01-22 17:37:52.247790: step: 450/459, loss: 0.0018369159661233425 2023-01-22 17:37:52.905146: step: 452/459, loss: 0.10141005367040634 2023-01-22 17:37:53.562882: step: 454/459, loss: 0.04642583429813385 2023-01-22 17:37:54.152930: step: 456/459, loss: 0.01805245131254196 2023-01-22 17:37:54.780608: step: 458/459, loss: 0.03523952141404152 2023-01-22 17:37:55.400408: step: 460/459, loss: 0.00944028701633215 2023-01-22 17:37:55.974690: step: 462/459, loss: 0.009793940000236034 2023-01-22 17:37:56.583644: step: 464/459, loss: 0.3698002099990845 2023-01-22 17:37:57.219450: step: 466/459, loss: 0.004128694534301758 2023-01-22 17:37:57.849627: step: 468/459, loss: 0.016522245481610298 2023-01-22 17:37:58.450767: step: 470/459, loss: 0.08059055358171463 2023-01-22 17:37:59.054551: step: 472/459, loss: 0.008874192833900452 2023-01-22 17:37:59.669669: step: 474/459, loss: 0.0007282074075192213 2023-01-22 17:38:00.327378: step: 476/459, loss: 0.0246480293571949 2023-01-22 17:38:00.962783: step: 478/459, loss: 0.016686689108610153 2023-01-22 17:38:01.614286: step: 480/459, loss: 0.05614545941352844 2023-01-22 17:38:02.172525: step: 482/459, loss: 0.03926508128643036 2023-01-22 17:38:02.832001: step: 484/459, loss: 0.042626235634088516 2023-01-22 17:38:03.474978: step: 486/459, loss: 0.027626903727650642 2023-01-22 17:38:04.101778: step: 488/459, loss: 0.04126422852277756 2023-01-22 17:38:04.748327: step: 490/459, loss: 0.05922874063253403 2023-01-22 17:38:05.405040: step: 492/459, loss: 0.006400101352483034 2023-01-22 17:38:06.104583: step: 494/459, loss: 0.02295343019068241 2023-01-22 17:38:06.740649: step: 496/459, loss: 0.004874182865023613 2023-01-22 17:38:07.313539: step: 498/459, loss: 0.08993487805128098 2023-01-22 17:38:07.884296: step: 500/459, loss: 0.04137659817934036 2023-01-22 17:38:08.545695: step: 502/459, loss: 0.07500283420085907 2023-01-22 17:38:09.131690: step: 504/459, loss: 0.001033448614180088 2023-01-22 17:38:09.720358: step: 506/459, loss: 0.02448935993015766 2023-01-22 17:38:10.350108: step: 508/459, loss: 0.08719567209482193 2023-01-22 17:38:10.967377: step: 510/459, loss: 0.015505757182836533 2023-01-22 17:38:11.622160: step: 512/459, loss: 0.1464967429637909 2023-01-22 17:38:12.193346: step: 514/459, loss: 0.7649298310279846 2023-01-22 17:38:12.834890: step: 516/459, loss: 0.014556948095560074 2023-01-22 17:38:13.409121: step: 518/459, loss: 0.6049810647964478 2023-01-22 17:38:14.124218: step: 520/459, loss: 0.07564927637577057 2023-01-22 17:38:14.731828: step: 522/459, loss: 0.004444632213562727 2023-01-22 17:38:15.375644: step: 524/459, loss: 0.07080993056297302 2023-01-22 17:38:16.009567: step: 526/459, loss: 0.031033148989081383 2023-01-22 17:38:16.623175: step: 528/459, loss: 1.2784172296524048 2023-01-22 17:38:17.238729: step: 530/459, loss: 0.6611230373382568 2023-01-22 17:38:17.867037: step: 532/459, loss: 0.05769646167755127 2023-01-22 17:38:18.478103: step: 534/459, loss: 0.021383190527558327 2023-01-22 17:38:19.016017: step: 536/459, loss: 0.027086997404694557 2023-01-22 17:38:19.658368: step: 538/459, loss: 0.5421872735023499 2023-01-22 17:38:20.236463: step: 540/459, loss: 0.05512205883860588 2023-01-22 17:38:20.849886: step: 542/459, loss: 0.006544857285916805 2023-01-22 17:38:21.461724: step: 544/459, loss: 0.0111231142655015 2023-01-22 17:38:22.120761: step: 546/459, loss: 0.4302021265029907 2023-01-22 17:38:22.851748: step: 548/459, loss: 0.28801941871643066 2023-01-22 17:38:23.446347: step: 550/459, loss: 0.080411396920681 2023-01-22 17:38:24.113939: step: 552/459, loss: 0.01213913131505251 2023-01-22 17:38:24.742921: step: 554/459, loss: 0.053495701402425766 2023-01-22 17:38:25.259952: step: 556/459, loss: 0.008357508108019829 2023-01-22 17:38:25.850569: step: 558/459, loss: 0.012444382533431053 2023-01-22 17:38:26.459054: step: 560/459, loss: 0.01279216818511486 2023-01-22 17:38:27.101925: step: 562/459, loss: 0.009143856354057789 2023-01-22 17:38:27.712926: step: 564/459, loss: 0.0061926585622131824 2023-01-22 17:38:28.349026: step: 566/459, loss: 0.04086308181285858 2023-01-22 17:38:28.961851: step: 568/459, loss: 0.008197430521249771 2023-01-22 17:38:29.646551: step: 570/459, loss: 0.03296371176838875 2023-01-22 17:38:30.218308: step: 572/459, loss: 0.08503114432096481 2023-01-22 17:38:30.868134: step: 574/459, loss: 0.048412173986434937 2023-01-22 17:38:31.464968: step: 576/459, loss: 0.032784994691610336 2023-01-22 17:38:32.071669: step: 578/459, loss: 0.019548499956727028 2023-01-22 17:38:32.740988: step: 580/459, loss: 0.012263496406376362 2023-01-22 17:38:33.352026: step: 582/459, loss: 0.0010267922189086676 2023-01-22 17:38:33.997792: step: 584/459, loss: 0.030179578810930252 2023-01-22 17:38:34.672021: step: 586/459, loss: 0.06247628852725029 2023-01-22 17:38:35.343577: step: 588/459, loss: 0.013151374645531178 2023-01-22 17:38:35.964498: step: 590/459, loss: 0.048208024352788925 2023-01-22 17:38:36.562097: step: 592/459, loss: 0.025025397539138794 2023-01-22 17:38:37.225233: step: 594/459, loss: 0.03781125321984291 2023-01-22 17:38:37.864055: step: 596/459, loss: 0.0008866806165315211 2023-01-22 17:38:38.482774: step: 598/459, loss: 0.021783089265227318 2023-01-22 17:38:39.129625: step: 600/459, loss: 0.001532072201371193 2023-01-22 17:38:39.723889: step: 602/459, loss: 0.02142316661775112 2023-01-22 17:38:40.377529: step: 604/459, loss: 0.04030454903841019 2023-01-22 17:38:40.982559: step: 606/459, loss: 0.11517372727394104 2023-01-22 17:38:41.553193: step: 608/459, loss: 0.08820441365242004 2023-01-22 17:38:42.152967: step: 610/459, loss: 0.02060212939977646 2023-01-22 17:38:42.759989: step: 612/459, loss: 0.0004976072232238948 2023-01-22 17:38:43.405412: step: 614/459, loss: 0.07415910065174103 2023-01-22 17:38:43.943078: step: 616/459, loss: 0.012820359319448471 2023-01-22 17:38:44.574880: step: 618/459, loss: 0.01416313648223877 2023-01-22 17:38:45.247123: step: 620/459, loss: 0.01810065098106861 2023-01-22 17:38:45.871770: step: 622/459, loss: 0.029502611607313156 2023-01-22 17:38:46.452931: step: 624/459, loss: 0.05277271568775177 2023-01-22 17:38:47.005823: step: 626/459, loss: 0.03382152318954468 2023-01-22 17:38:47.643610: step: 628/459, loss: 0.01987583190202713 2023-01-22 17:38:48.179235: step: 630/459, loss: 0.028389524668455124 2023-01-22 17:38:48.732329: step: 632/459, loss: 0.13439971208572388 2023-01-22 17:38:49.364898: step: 634/459, loss: 0.018408337607979774 2023-01-22 17:38:49.941406: step: 636/459, loss: 0.113894023001194 2023-01-22 17:38:50.551627: step: 638/459, loss: 0.021424425765872 2023-01-22 17:38:51.155175: step: 640/459, loss: 0.10276538878679276 2023-01-22 17:38:51.783225: step: 642/459, loss: 0.025133909657597542 2023-01-22 17:38:52.399027: step: 644/459, loss: 0.034376345574855804 2023-01-22 17:38:53.062883: step: 646/459, loss: 0.013961468823254108 2023-01-22 17:38:53.705255: step: 648/459, loss: 0.08655797690153122 2023-01-22 17:38:54.349305: step: 650/459, loss: 0.09898625314235687 2023-01-22 17:38:54.930245: step: 652/459, loss: 0.026188526302576065 2023-01-22 17:38:55.544882: step: 654/459, loss: 0.05163942649960518 2023-01-22 17:38:56.113682: step: 656/459, loss: 0.050688356161117554 2023-01-22 17:38:56.739507: step: 658/459, loss: 0.010328209027647972 2023-01-22 17:38:57.304667: step: 660/459, loss: 0.01997159793972969 2023-01-22 17:38:57.956510: step: 662/459, loss: 0.04130484163761139 2023-01-22 17:38:58.583268: step: 664/459, loss: 0.020578261464834213 2023-01-22 17:38:59.180003: step: 666/459, loss: 0.01038252841681242 2023-01-22 17:38:59.772521: step: 668/459, loss: 0.003250379115343094 2023-01-22 17:39:00.356689: step: 670/459, loss: 0.020241739228367805 2023-01-22 17:39:00.994313: step: 672/459, loss: 0.003436553990468383 2023-01-22 17:39:01.609211: step: 674/459, loss: 0.3232094943523407 2023-01-22 17:39:02.235788: step: 676/459, loss: 0.02783888205885887 2023-01-22 17:39:02.842149: step: 678/459, loss: 0.012669711373746395 2023-01-22 17:39:03.491186: step: 680/459, loss: 0.030733788385987282 2023-01-22 17:39:04.158586: step: 682/459, loss: 0.18667519092559814 2023-01-22 17:39:04.796174: step: 684/459, loss: 0.034885674715042114 2023-01-22 17:39:05.378492: step: 686/459, loss: 0.024218467995524406 2023-01-22 17:39:06.003029: step: 688/459, loss: 0.054170235991477966 2023-01-22 17:39:06.652081: step: 690/459, loss: 0.015353420749306679 2023-01-22 17:39:07.274151: step: 692/459, loss: 0.039651528000831604 2023-01-22 17:39:07.892948: step: 694/459, loss: 0.01988074742257595 2023-01-22 17:39:08.459731: step: 696/459, loss: 0.006527806632220745 2023-01-22 17:39:09.086825: step: 698/459, loss: 0.04037994146347046 2023-01-22 17:39:09.721418: step: 700/459, loss: 0.000785886193625629 2023-01-22 17:39:10.297368: step: 702/459, loss: 0.005700233392417431 2023-01-22 17:39:10.843830: step: 704/459, loss: 0.024164069443941116 2023-01-22 17:39:11.402248: step: 706/459, loss: 0.011742044240236282 2023-01-22 17:39:12.035688: step: 708/459, loss: 0.04478015750646591 2023-01-22 17:39:12.664232: step: 710/459, loss: 0.022125156596302986 2023-01-22 17:39:13.248797: step: 712/459, loss: 0.009025130420923233 2023-01-22 17:39:13.860452: step: 714/459, loss: 0.0017980127595365047 2023-01-22 17:39:14.469228: step: 716/459, loss: 0.013881674036383629 2023-01-22 17:39:15.045002: step: 718/459, loss: 0.01303835865110159 2023-01-22 17:39:15.682680: step: 720/459, loss: 0.024789830669760704 2023-01-22 17:39:16.291361: step: 722/459, loss: 0.012207699939608574 2023-01-22 17:39:16.925427: step: 724/459, loss: 0.0370994433760643 2023-01-22 17:39:17.565260: step: 726/459, loss: 0.06568877398967743 2023-01-22 17:39:18.171702: step: 728/459, loss: 0.0506381094455719 2023-01-22 17:39:18.832354: step: 730/459, loss: 0.0031644045375287533 2023-01-22 17:39:19.435531: step: 732/459, loss: 0.02021116018295288 2023-01-22 17:39:19.982780: step: 734/459, loss: 0.11282217502593994 2023-01-22 17:39:20.635183: step: 736/459, loss: 0.036181602627038956 2023-01-22 17:39:21.245975: step: 738/459, loss: 0.032428078353405 2023-01-22 17:39:21.788828: step: 740/459, loss: 0.01750682108104229 2023-01-22 17:39:22.407077: step: 742/459, loss: 0.05737170949578285 2023-01-22 17:39:23.086043: step: 744/459, loss: 0.011510637588799 2023-01-22 17:39:23.671335: step: 746/459, loss: 0.007332121953368187 2023-01-22 17:39:24.242723: step: 748/459, loss: 0.0084617855027318 2023-01-22 17:39:24.803400: step: 750/459, loss: 0.05557670816779137 2023-01-22 17:39:25.423129: step: 752/459, loss: 0.004147137049585581 2023-01-22 17:39:26.055802: step: 754/459, loss: 0.04099806398153305 2023-01-22 17:39:26.721788: step: 756/459, loss: 0.002565014874562621 2023-01-22 17:39:27.363934: step: 758/459, loss: 0.049986038357019424 2023-01-22 17:39:27.966753: step: 760/459, loss: 0.020575840026140213 2023-01-22 17:39:28.563041: step: 762/459, loss: 0.050979599356651306 2023-01-22 17:39:29.201962: step: 764/459, loss: 0.001958137145265937 2023-01-22 17:39:29.815933: step: 766/459, loss: 0.007274237461388111 2023-01-22 17:39:30.492416: step: 768/459, loss: 0.11564977467060089 2023-01-22 17:39:31.130182: step: 770/459, loss: 0.01864958554506302 2023-01-22 17:39:31.753777: step: 772/459, loss: 0.03821555897593498 2023-01-22 17:39:32.473741: step: 774/459, loss: 0.08021369576454163 2023-01-22 17:39:33.075132: step: 776/459, loss: 0.031246405094861984 2023-01-22 17:39:33.819423: step: 778/459, loss: 0.0022721984423696995 2023-01-22 17:39:34.391641: step: 780/459, loss: 0.021205415949225426 2023-01-22 17:39:35.031673: step: 782/459, loss: 0.06518136709928513 2023-01-22 17:39:35.601999: step: 784/459, loss: 0.0057944003492593765 2023-01-22 17:39:36.226321: step: 786/459, loss: 0.03307517617940903 2023-01-22 17:39:36.819582: step: 788/459, loss: 0.0040773432701826096 2023-01-22 17:39:37.487370: step: 790/459, loss: 0.011110742576420307 2023-01-22 17:39:38.135483: step: 792/459, loss: 1.0083941221237183 2023-01-22 17:39:38.757328: step: 794/459, loss: 0.005325483623892069 2023-01-22 17:39:39.425678: step: 796/459, loss: 0.16236767172813416 2023-01-22 17:39:40.105940: step: 798/459, loss: 0.034570515155792236 2023-01-22 17:39:40.674488: step: 800/459, loss: 0.007792357355356216 2023-01-22 17:39:41.333627: step: 802/459, loss: 0.08292108029127121 2023-01-22 17:39:41.943815: step: 804/459, loss: 0.024574004113674164 2023-01-22 17:39:42.596155: step: 806/459, loss: 0.05385570973157883 2023-01-22 17:39:43.180991: step: 808/459, loss: 0.015066668391227722 2023-01-22 17:39:43.789893: step: 810/459, loss: 0.05436383932828903 2023-01-22 17:39:44.365171: step: 812/459, loss: 0.019697362557053566 2023-01-22 17:39:44.985149: step: 814/459, loss: 0.053512092679739 2023-01-22 17:39:45.580904: step: 816/459, loss: 0.04049590602517128 2023-01-22 17:39:46.247003: step: 818/459, loss: 0.005127241834998131 2023-01-22 17:39:46.876473: step: 820/459, loss: 0.032480236142873764 2023-01-22 17:39:47.495410: step: 822/459, loss: 0.06649237871170044 2023-01-22 17:39:48.114081: step: 824/459, loss: 0.04174155369400978 2023-01-22 17:39:48.701211: step: 826/459, loss: 0.012561355717480183 2023-01-22 17:39:49.273592: step: 828/459, loss: 0.008859534747898579 2023-01-22 17:39:49.970811: step: 830/459, loss: 0.010361015796661377 2023-01-22 17:39:50.584964: step: 832/459, loss: 0.016979224979877472 2023-01-22 17:39:51.235662: step: 834/459, loss: 0.01138446293771267 2023-01-22 17:39:51.828547: step: 836/459, loss: 0.3285406231880188 2023-01-22 17:39:52.509922: step: 838/459, loss: 0.05122237652540207 2023-01-22 17:39:53.118606: step: 840/459, loss: 0.020081913098692894 2023-01-22 17:39:53.680375: step: 842/459, loss: 0.008089934475719929 2023-01-22 17:39:54.252032: step: 844/459, loss: 0.05861780792474747 2023-01-22 17:39:54.841666: step: 846/459, loss: 0.05633487179875374 2023-01-22 17:39:55.398163: step: 848/459, loss: 0.021960807964205742 2023-01-22 17:39:56.039837: step: 850/459, loss: 0.27692073583602905 2023-01-22 17:39:56.629347: step: 852/459, loss: 0.0470670647919178 2023-01-22 17:39:57.237697: step: 854/459, loss: 0.018454622477293015 2023-01-22 17:39:57.879322: step: 856/459, loss: 0.03967173025012016 2023-01-22 17:39:58.415701: step: 858/459, loss: 0.01956779696047306 2023-01-22 17:39:59.026396: step: 860/459, loss: 0.01996075175702572 2023-01-22 17:39:59.638852: step: 862/459, loss: 0.0011321466881781816 2023-01-22 17:40:00.220658: step: 864/459, loss: 0.018521223217248917 2023-01-22 17:40:00.827353: step: 866/459, loss: 0.04263531044125557 2023-01-22 17:40:01.405950: step: 868/459, loss: 0.009262536652386189 2023-01-22 17:40:02.018759: step: 870/459, loss: 0.09623543173074722 2023-01-22 17:40:02.654171: step: 872/459, loss: 0.35052594542503357 2023-01-22 17:40:03.320451: step: 874/459, loss: 0.03078421764075756 2023-01-22 17:40:03.859111: step: 876/459, loss: 0.0038462590891867876 2023-01-22 17:40:04.398892: step: 878/459, loss: 0.0597432516515255 2023-01-22 17:40:05.006045: step: 880/459, loss: 0.0880049467086792 2023-01-22 17:40:05.650227: step: 882/459, loss: 0.08492162078619003 2023-01-22 17:40:06.281392: step: 884/459, loss: 0.055516283959150314 2023-01-22 17:40:06.883040: step: 886/459, loss: 0.051844630390405655 2023-01-22 17:40:07.394698: step: 888/459, loss: 0.3050972819328308 2023-01-22 17:40:08.006779: step: 890/459, loss: 0.01938626542687416 2023-01-22 17:40:08.591335: step: 892/459, loss: 0.006880119442939758 2023-01-22 17:40:09.211847: step: 894/459, loss: 0.004236876033246517 2023-01-22 17:40:09.805658: step: 896/459, loss: 0.4391285479068756 2023-01-22 17:40:10.454395: step: 898/459, loss: 0.059718526899814606 2023-01-22 17:40:11.077250: step: 900/459, loss: 0.02687220461666584 2023-01-22 17:40:11.714474: step: 902/459, loss: 0.18877536058425903 2023-01-22 17:40:12.279437: step: 904/459, loss: 0.032363634556531906 2023-01-22 17:40:12.882757: step: 906/459, loss: 0.06496664881706238 2023-01-22 17:40:13.530517: step: 908/459, loss: 0.03103497438132763 2023-01-22 17:40:14.196101: step: 910/459, loss: 0.06397145986557007 2023-01-22 17:40:14.752528: step: 912/459, loss: 0.008311821147799492 2023-01-22 17:40:15.412948: step: 914/459, loss: 0.04532633349299431 2023-01-22 17:40:16.087173: step: 916/459, loss: 1.928360939025879 2023-01-22 17:40:16.687954: step: 918/459, loss: 0.01741509512066841 2023-01-22 17:40:17.146495: step: 920/459, loss: 1.4305103945844166e-07 ================================================== Loss: 0.063 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28876906706421224, 'r': 0.33370087636073104, 'f1': 0.309613313102298}, 'combined': 0.22813612544379852, 'epoch': 25} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3314494809847326, 'r': 0.3209033611352184, 'f1': 0.32609117528751985}, 'combined': 0.20869835218401267, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2972784182699369, 'r': 0.349175219941349, 'f1': 0.32114370141202603}, 'combined': 0.23663220104044022, 'epoch': 25} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3397855985866235, 'r': 0.3221625056662981, 'f1': 0.3307394616788992}, 'combined': 0.21167325547449548, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3139955611307359, 'r': 0.34736131335715187, 'f1': 0.3298367786292235}, 'combined': 0.2430376263583752, 'epoch': 25} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3403014146979324, 'r': 0.32916427748963645, 'f1': 0.33464020816876167}, 'combined': 0.23993071529081028, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19270833333333331, 'r': 0.35238095238095235, 'f1': 0.24915824915824908}, 'combined': 0.1661054994388327, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23717948717948717, 'r': 0.40217391304347827, 'f1': 0.2983870967741935}, 'combined': 0.14919354838709675, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29411764705882354, 'r': 0.1724137931034483, 'f1': 0.2173913043478261}, 'combined': 0.14492753623188406, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:42:54.002331: step: 2/459, loss: 0.03934232518076897 2023-01-22 17:42:54.651774: step: 4/459, loss: 0.04295053333044052 2023-01-22 17:42:55.246424: step: 6/459, loss: 0.01736203022301197 2023-01-22 17:42:55.819460: step: 8/459, loss: 0.01184728741645813 2023-01-22 17:42:56.405660: step: 10/459, loss: 0.02145238406956196 2023-01-22 17:42:57.014702: step: 12/459, loss: 0.007710769306868315 2023-01-22 17:42:57.667826: step: 14/459, loss: 0.0005474172066897154 2023-01-22 17:42:58.291606: step: 16/459, loss: 0.0028862743638455868 2023-01-22 17:42:58.890294: step: 18/459, loss: 0.02119308151304722 2023-01-22 17:42:59.470202: step: 20/459, loss: 0.025260213762521744 2023-01-22 17:43:00.067950: step: 22/459, loss: 0.002610695781186223 2023-01-22 17:43:00.656423: step: 24/459, loss: 0.024868709966540337 2023-01-22 17:43:01.305648: step: 26/459, loss: 0.0016135128680616617 2023-01-22 17:43:01.971594: step: 28/459, loss: 0.02676362730562687 2023-01-22 17:43:02.563678: step: 30/459, loss: 0.2145170122385025 2023-01-22 17:43:03.193621: step: 32/459, loss: 0.058131806552410126 2023-01-22 17:43:03.870946: step: 34/459, loss: 0.026129554957151413 2023-01-22 17:43:04.400358: step: 36/459, loss: 0.00022864187485538423 2023-01-22 17:43:04.975269: step: 38/459, loss: 0.03963181748986244 2023-01-22 17:43:05.639434: step: 40/459, loss: 0.05041920021176338 2023-01-22 17:43:06.194220: step: 42/459, loss: 0.09955771267414093 2023-01-22 17:43:06.843252: step: 44/459, loss: 0.006006279028952122 2023-01-22 17:43:07.377164: step: 46/459, loss: 0.05150366574525833 2023-01-22 17:43:07.979320: step: 48/459, loss: 0.0014149330090731382 2023-01-22 17:43:08.654226: step: 50/459, loss: 0.47332075238227844 2023-01-22 17:43:09.331611: step: 52/459, loss: 0.010471108369529247 2023-01-22 17:43:10.014586: step: 54/459, loss: 0.013910415582358837 2023-01-22 17:43:10.583657: step: 56/459, loss: 0.002842640969902277 2023-01-22 17:43:11.233404: step: 58/459, loss: 0.056149378418922424 2023-01-22 17:43:11.863839: step: 60/459, loss: 0.0028798647690564394 2023-01-22 17:43:12.419283: step: 62/459, loss: 0.012677257880568504 2023-01-22 17:43:13.010775: step: 64/459, loss: 0.02466837875545025 2023-01-22 17:43:13.666916: step: 66/459, loss: 0.04925774782896042 2023-01-22 17:43:14.333918: step: 68/459, loss: 0.026911649852991104 2023-01-22 17:43:14.962992: step: 70/459, loss: 0.1015445739030838 2023-01-22 17:43:15.553289: step: 72/459, loss: 0.005114750470966101 2023-01-22 17:43:16.289241: step: 74/459, loss: 0.019842077046632767 2023-01-22 17:43:16.862920: step: 76/459, loss: 0.020931938663125038 2023-01-22 17:43:17.446076: step: 78/459, loss: 0.014168748632073402 2023-01-22 17:43:18.084261: step: 80/459, loss: 0.8942745923995972 2023-01-22 17:43:18.711623: step: 82/459, loss: 0.06326597929000854 2023-01-22 17:43:19.359008: step: 84/459, loss: 0.053016502410173416 2023-01-22 17:43:19.997261: step: 86/459, loss: 0.016204705461859703 2023-01-22 17:43:20.635211: step: 88/459, loss: 0.00807035993784666 2023-01-22 17:43:21.239701: step: 90/459, loss: 0.018629757687449455 2023-01-22 17:43:21.863997: step: 92/459, loss: 0.010417163372039795 2023-01-22 17:43:22.474012: step: 94/459, loss: 0.005843418184667826 2023-01-22 17:43:23.101188: step: 96/459, loss: 0.7130578756332397 2023-01-22 17:43:23.704100: step: 98/459, loss: 0.06139165908098221 2023-01-22 17:43:24.317645: step: 100/459, loss: 0.04972994327545166 2023-01-22 17:43:24.947496: step: 102/459, loss: 0.025313660502433777 2023-01-22 17:43:25.560685: step: 104/459, loss: 0.011807645671069622 2023-01-22 17:43:26.194959: step: 106/459, loss: 0.038285546004772186 2023-01-22 17:43:26.802442: step: 108/459, loss: 0.010912319645285606 2023-01-22 17:43:27.370045: step: 110/459, loss: 0.06298273056745529 2023-01-22 17:43:28.053204: step: 112/459, loss: 0.1365220546722412 2023-01-22 17:43:28.722081: step: 114/459, loss: 0.2845999300479889 2023-01-22 17:43:29.372482: step: 116/459, loss: 0.03728961944580078 2023-01-22 17:43:29.935413: step: 118/459, loss: 0.013370448723435402 2023-01-22 17:43:30.578055: step: 120/459, loss: 0.042746949940919876 2023-01-22 17:43:31.183836: step: 122/459, loss: 0.020087918266654015 2023-01-22 17:43:31.746433: step: 124/459, loss: 0.00042227699304930866 2023-01-22 17:43:32.384141: step: 126/459, loss: 0.8658772706985474 2023-01-22 17:43:32.994161: step: 128/459, loss: 0.12511172890663147 2023-01-22 17:43:33.620472: step: 130/459, loss: 0.003920333459973335 2023-01-22 17:43:34.305889: step: 132/459, loss: 0.06641004234552383 2023-01-22 17:43:34.877318: step: 134/459, loss: 0.0025893182028084993 2023-01-22 17:43:35.474543: step: 136/459, loss: 0.05865073204040527 2023-01-22 17:43:36.058298: step: 138/459, loss: 0.006447951775044203 2023-01-22 17:43:36.654712: step: 140/459, loss: 0.032987289130687714 2023-01-22 17:43:37.231737: step: 142/459, loss: 14.857780456542969 2023-01-22 17:43:37.824970: step: 144/459, loss: 0.08818661421537399 2023-01-22 17:43:38.408992: step: 146/459, loss: 0.051506754010915756 2023-01-22 17:43:39.097779: step: 148/459, loss: 0.004870849661529064 2023-01-22 17:43:39.642923: step: 150/459, loss: 0.040570322424173355 2023-01-22 17:43:40.185728: step: 152/459, loss: 0.023720022290945053 2023-01-22 17:43:40.810555: step: 154/459, loss: 0.0408426858484745 2023-01-22 17:43:41.399782: step: 156/459, loss: 0.012061728164553642 2023-01-22 17:43:42.038507: step: 158/459, loss: 0.01570805534720421 2023-01-22 17:43:42.652550: step: 160/459, loss: 0.020848477259278297 2023-01-22 17:43:43.311600: step: 162/459, loss: 0.04653196781873703 2023-01-22 17:43:43.990402: step: 164/459, loss: 0.06920981407165527 2023-01-22 17:43:44.623993: step: 166/459, loss: 0.011564026586711407 2023-01-22 17:43:45.218660: step: 168/459, loss: 0.017870556563138962 2023-01-22 17:43:45.796041: step: 170/459, loss: 0.00672060064971447 2023-01-22 17:43:46.389418: step: 172/459, loss: 0.005121238995343447 2023-01-22 17:43:46.981063: step: 174/459, loss: 0.2495914101600647 2023-01-22 17:43:47.606593: step: 176/459, loss: 0.004440987017005682 2023-01-22 17:43:48.232532: step: 178/459, loss: 0.02428465709090233 2023-01-22 17:43:48.912816: step: 180/459, loss: 0.14434745907783508 2023-01-22 17:43:49.569773: step: 182/459, loss: 0.11638445407152176 2023-01-22 17:43:50.195246: step: 184/459, loss: 0.14655955135822296 2023-01-22 17:43:50.869944: step: 186/459, loss: 0.23068393766880035 2023-01-22 17:43:51.507523: step: 188/459, loss: 0.011270434595644474 2023-01-22 17:43:52.163694: step: 190/459, loss: 0.01912982203066349 2023-01-22 17:43:52.776059: step: 192/459, loss: 0.024335214868187904 2023-01-22 17:43:53.428176: step: 194/459, loss: 0.05015501379966736 2023-01-22 17:43:54.099792: step: 196/459, loss: 0.0613505020737648 2023-01-22 17:43:54.727486: step: 198/459, loss: 0.07341717928647995 2023-01-22 17:43:55.338008: step: 200/459, loss: 0.08364692330360413 2023-01-22 17:43:55.980629: step: 202/459, loss: 0.0211639404296875 2023-01-22 17:43:56.568393: step: 204/459, loss: 0.014088904485106468 2023-01-22 17:43:57.235117: step: 206/459, loss: 0.028115034103393555 2023-01-22 17:43:57.802286: step: 208/459, loss: 0.004927425179630518 2023-01-22 17:43:58.452597: step: 210/459, loss: 0.4585459530353546 2023-01-22 17:43:59.022746: step: 212/459, loss: 0.035541024059057236 2023-01-22 17:43:59.659973: step: 214/459, loss: 0.02124963514506817 2023-01-22 17:44:00.226430: step: 216/459, loss: 0.029192065820097923 2023-01-22 17:44:00.832269: step: 218/459, loss: 0.015813248232007027 2023-01-22 17:44:01.490325: step: 220/459, loss: 0.03373774513602257 2023-01-22 17:44:02.049590: step: 222/459, loss: 0.10861983895301819 2023-01-22 17:44:02.742347: step: 224/459, loss: 0.005047668237239122 2023-01-22 17:44:03.343997: step: 226/459, loss: 0.17422474920749664 2023-01-22 17:44:03.947629: step: 228/459, loss: 0.01607690379023552 2023-01-22 17:44:04.642958: step: 230/459, loss: 0.2607530355453491 2023-01-22 17:44:05.301204: step: 232/459, loss: 0.006331949029117823 2023-01-22 17:44:05.901326: step: 234/459, loss: 0.00829971581697464 2023-01-22 17:44:06.543401: step: 236/459, loss: 0.2566775977611542 2023-01-22 17:44:07.099693: step: 238/459, loss: 0.0478798970580101 2023-01-22 17:44:07.784356: step: 240/459, loss: 0.07924435287714005 2023-01-22 17:44:08.424293: step: 242/459, loss: 0.019187506288290024 2023-01-22 17:44:09.095052: step: 244/459, loss: 0.018264951184391975 2023-01-22 17:44:09.685381: step: 246/459, loss: 0.024430306628346443 2023-01-22 17:44:10.332680: step: 248/459, loss: 0.01695755124092102 2023-01-22 17:44:10.911725: step: 250/459, loss: 0.003953781444579363 2023-01-22 17:44:11.620261: step: 252/459, loss: 0.015563243068754673 2023-01-22 17:44:12.226267: step: 254/459, loss: 0.01803540624678135 2023-01-22 17:44:12.851427: step: 256/459, loss: 0.08067846298217773 2023-01-22 17:44:13.462978: step: 258/459, loss: 0.004448754712939262 2023-01-22 17:44:14.080566: step: 260/459, loss: 0.1078592836856842 2023-01-22 17:44:14.701556: step: 262/459, loss: 0.03413406014442444 2023-01-22 17:44:15.321321: step: 264/459, loss: 0.12701265513896942 2023-01-22 17:44:15.889214: step: 266/459, loss: 0.026038825511932373 2023-01-22 17:44:16.536428: step: 268/459, loss: 0.060274869203567505 2023-01-22 17:44:17.157388: step: 270/459, loss: 0.026890775188803673 2023-01-22 17:44:17.753748: step: 272/459, loss: 0.004405649844557047 2023-01-22 17:44:18.301705: step: 274/459, loss: 0.04335831478238106 2023-01-22 17:44:18.922395: step: 276/459, loss: 0.037310708314180374 2023-01-22 17:44:19.542842: step: 278/459, loss: 0.04104654863476753 2023-01-22 17:44:20.154920: step: 280/459, loss: 0.03756015747785568 2023-01-22 17:44:20.951743: step: 282/459, loss: 0.005292600952088833 2023-01-22 17:44:21.534096: step: 284/459, loss: 0.037213221192359924 2023-01-22 17:44:22.173520: step: 286/459, loss: 0.027246497571468353 2023-01-22 17:44:22.731550: step: 288/459, loss: 0.019284356385469437 2023-01-22 17:44:23.357762: step: 290/459, loss: 0.010061700828373432 2023-01-22 17:44:23.983915: step: 292/459, loss: 0.016839392483234406 2023-01-22 17:44:24.672119: step: 294/459, loss: 0.05066603049635887 2023-01-22 17:44:25.261683: step: 296/459, loss: 0.09841960668563843 2023-01-22 17:44:25.811302: step: 298/459, loss: 0.16017819941043854 2023-01-22 17:44:26.461241: step: 300/459, loss: 0.01965760439634323 2023-01-22 17:44:27.067919: step: 302/459, loss: 0.023093635216355324 2023-01-22 17:44:27.629578: step: 304/459, loss: 0.3197527229785919 2023-01-22 17:44:28.201515: step: 306/459, loss: 0.0014295437140390277 2023-01-22 17:44:28.870828: step: 308/459, loss: 0.04679868370294571 2023-01-22 17:44:29.449507: step: 310/459, loss: 0.002305374015122652 2023-01-22 17:44:30.073937: step: 312/459, loss: 0.00915362685918808 2023-01-22 17:44:30.757002: step: 314/459, loss: 6.984035491943359 2023-01-22 17:44:31.353508: step: 316/459, loss: 0.012210165150463581 2023-01-22 17:44:31.972360: step: 318/459, loss: 0.051281530410051346 2023-01-22 17:44:32.674377: step: 320/459, loss: 0.051499143242836 2023-01-22 17:44:33.285354: step: 322/459, loss: 0.025917042046785355 2023-01-22 17:44:33.902185: step: 324/459, loss: 0.0239424929022789 2023-01-22 17:44:34.544441: step: 326/459, loss: 0.058225855231285095 2023-01-22 17:44:35.164441: step: 328/459, loss: 0.22749005258083344 2023-01-22 17:44:35.794938: step: 330/459, loss: 0.04050732031464577 2023-01-22 17:44:36.420599: step: 332/459, loss: 0.02444276586174965 2023-01-22 17:44:37.153886: step: 334/459, loss: 0.06686536222696304 2023-01-22 17:44:37.703649: step: 336/459, loss: 0.04392822086811066 2023-01-22 17:44:38.287921: step: 338/459, loss: 0.08913838863372803 2023-01-22 17:44:38.894905: step: 340/459, loss: 0.014405853115022182 2023-01-22 17:44:39.465876: step: 342/459, loss: 0.007141273934394121 2023-01-22 17:44:40.115453: step: 344/459, loss: 0.03960961475968361 2023-01-22 17:44:40.676665: step: 346/459, loss: 0.03953235596418381 2023-01-22 17:44:41.206534: step: 348/459, loss: 0.008060937747359276 2023-01-22 17:44:41.823367: step: 350/459, loss: 0.021871475502848625 2023-01-22 17:44:42.342984: step: 352/459, loss: 0.02165510132908821 2023-01-22 17:44:42.941886: step: 354/459, loss: 0.016264118254184723 2023-01-22 17:44:43.580178: step: 356/459, loss: 0.008020594716072083 2023-01-22 17:44:44.245623: step: 358/459, loss: 0.03476768732070923 2023-01-22 17:44:44.872458: step: 360/459, loss: 0.018458224833011627 2023-01-22 17:44:45.471371: step: 362/459, loss: 0.04004218429327011 2023-01-22 17:44:46.079541: step: 364/459, loss: 0.02738083340227604 2023-01-22 17:44:46.710109: step: 366/459, loss: 0.10505522787570953 2023-01-22 17:44:47.249373: step: 368/459, loss: 0.056164585053920746 2023-01-22 17:44:47.899324: step: 370/459, loss: 0.08191130310297012 2023-01-22 17:44:48.508119: step: 372/459, loss: 0.016264405101537704 2023-01-22 17:44:49.153952: step: 374/459, loss: 0.028218545019626617 2023-01-22 17:44:49.821018: step: 376/459, loss: 0.035477180033922195 2023-01-22 17:44:50.441644: step: 378/459, loss: 0.05862700939178467 2023-01-22 17:44:51.039360: step: 380/459, loss: 0.0031622375827282667 2023-01-22 17:44:51.609101: step: 382/459, loss: 0.08281778544187546 2023-01-22 17:44:52.181869: step: 384/459, loss: 0.46284767985343933 2023-01-22 17:44:52.760802: step: 386/459, loss: 0.032648906111717224 2023-01-22 17:44:53.286722: step: 388/459, loss: 0.02682616002857685 2023-01-22 17:44:53.950567: step: 390/459, loss: 0.012664820067584515 2023-01-22 17:44:54.630452: step: 392/459, loss: 0.07438403367996216 2023-01-22 17:44:55.277017: step: 394/459, loss: 0.002040772931650281 2023-01-22 17:44:55.928705: step: 396/459, loss: 0.007418474182486534 2023-01-22 17:44:56.583587: step: 398/459, loss: 3.221067428588867 2023-01-22 17:44:57.235736: step: 400/459, loss: 0.04634217545390129 2023-01-22 17:44:57.842369: step: 402/459, loss: 0.01703193411231041 2023-01-22 17:44:58.386370: step: 404/459, loss: 0.031204812228679657 2023-01-22 17:44:59.021478: step: 406/459, loss: 0.131965771317482 2023-01-22 17:44:59.646285: step: 408/459, loss: 0.21779821813106537 2023-01-22 17:45:00.291961: step: 410/459, loss: 0.06829607486724854 2023-01-22 17:45:00.910380: step: 412/459, loss: 0.4127400815486908 2023-01-22 17:45:01.525007: step: 414/459, loss: 0.016030041500926018 2023-01-22 17:45:02.169481: step: 416/459, loss: 0.09905023127794266 2023-01-22 17:45:02.752950: step: 418/459, loss: 0.015430550090968609 2023-01-22 17:45:03.364564: step: 420/459, loss: 0.10768890380859375 2023-01-22 17:45:03.973061: step: 422/459, loss: 0.01837792992591858 2023-01-22 17:45:04.659708: step: 424/459, loss: 0.04470491781830788 2023-01-22 17:45:05.282849: step: 426/459, loss: 0.032256145030260086 2023-01-22 17:45:05.939515: step: 428/459, loss: 0.33813604712486267 2023-01-22 17:45:06.504028: step: 430/459, loss: 0.034354981034994125 2023-01-22 17:45:07.144558: step: 432/459, loss: 0.01749001257121563 2023-01-22 17:45:07.775428: step: 434/459, loss: 0.0327569879591465 2023-01-22 17:45:08.422969: step: 436/459, loss: 0.014071992598474026 2023-01-22 17:45:09.005765: step: 438/459, loss: 0.04418570548295975 2023-01-22 17:45:09.590713: step: 440/459, loss: 0.014016923494637012 2023-01-22 17:45:10.233752: step: 442/459, loss: 0.25044241547584534 2023-01-22 17:45:10.880706: step: 444/459, loss: 0.06627418845891953 2023-01-22 17:45:11.499058: step: 446/459, loss: 0.07890655100345612 2023-01-22 17:45:12.214653: step: 448/459, loss: 0.3414706587791443 2023-01-22 17:45:12.850053: step: 450/459, loss: 0.08330859988927841 2023-01-22 17:45:13.557667: step: 452/459, loss: 0.4399689733982086 2023-01-22 17:45:14.169682: step: 454/459, loss: 0.014366351999342442 2023-01-22 17:45:14.752952: step: 456/459, loss: 0.02279217727482319 2023-01-22 17:45:15.318047: step: 458/459, loss: 0.020296610891819 2023-01-22 17:45:15.974833: step: 460/459, loss: 0.0072305891662836075 2023-01-22 17:45:16.596309: step: 462/459, loss: 0.011999884620308876 2023-01-22 17:45:17.285095: step: 464/459, loss: 0.015549827367067337 2023-01-22 17:45:18.002912: step: 466/459, loss: 0.013996937312185764 2023-01-22 17:45:18.623967: step: 468/459, loss: 0.027490893378853798 2023-01-22 17:45:19.282787: step: 470/459, loss: 0.009637786075472832 2023-01-22 17:45:19.963068: step: 472/459, loss: 0.01977917179465294 2023-01-22 17:45:20.560905: step: 474/459, loss: 0.0076415445655584335 2023-01-22 17:45:21.121003: step: 476/459, loss: 0.5571322441101074 2023-01-22 17:45:21.714939: step: 478/459, loss: 0.0019058905309066176 2023-01-22 17:45:22.290823: step: 480/459, loss: 0.03738779202103615 2023-01-22 17:45:22.836347: step: 482/459, loss: 0.043229617178440094 2023-01-22 17:45:23.379570: step: 484/459, loss: 0.1369795948266983 2023-01-22 17:45:23.981869: step: 486/459, loss: 0.01718072220683098 2023-01-22 17:45:24.609071: step: 488/459, loss: 0.04544859379529953 2023-01-22 17:45:25.275983: step: 490/459, loss: 0.2962128520011902 2023-01-22 17:45:25.881894: step: 492/459, loss: 0.03792634233832359 2023-01-22 17:45:26.472147: step: 494/459, loss: 0.027798375114798546 2023-01-22 17:45:27.134852: step: 496/459, loss: 0.1540001630783081 2023-01-22 17:45:27.756843: step: 498/459, loss: 0.01527972612529993 2023-01-22 17:45:28.431590: step: 500/459, loss: 0.03474974259734154 2023-01-22 17:45:29.029883: step: 502/459, loss: 0.008024622686207294 2023-01-22 17:45:29.692837: step: 504/459, loss: 0.03736163675785065 2023-01-22 17:45:30.311338: step: 506/459, loss: 0.00017937303346116096 2023-01-22 17:45:30.941783: step: 508/459, loss: 0.017746370285749435 2023-01-22 17:45:31.546201: step: 510/459, loss: 0.007677425164729357 2023-01-22 17:45:32.228879: step: 512/459, loss: 0.10930877923965454 2023-01-22 17:45:32.811111: step: 514/459, loss: 0.0028836738783866167 2023-01-22 17:45:33.383231: step: 516/459, loss: 0.0008193933172151446 2023-01-22 17:45:34.012664: step: 518/459, loss: 0.04585826024413109 2023-01-22 17:45:34.531481: step: 520/459, loss: 0.08335886895656586 2023-01-22 17:45:35.126488: step: 522/459, loss: 0.037074360996484756 2023-01-22 17:45:35.742635: step: 524/459, loss: 0.009427223354578018 2023-01-22 17:45:36.349962: step: 526/459, loss: 0.025642268359661102 2023-01-22 17:45:36.989978: step: 528/459, loss: 0.03076484613120556 2023-01-22 17:45:37.569934: step: 530/459, loss: 0.0012802764540538192 2023-01-22 17:45:38.203160: step: 532/459, loss: 0.009528516791760921 2023-01-22 17:45:38.883680: step: 534/459, loss: 0.021810177713632584 2023-01-22 17:45:39.474994: step: 536/459, loss: 0.22155021131038666 2023-01-22 17:45:40.066549: step: 538/459, loss: 0.17518767714500427 2023-01-22 17:45:40.746994: step: 540/459, loss: 0.04133465886116028 2023-01-22 17:45:41.377597: step: 542/459, loss: 0.030698183923959732 2023-01-22 17:45:42.039462: step: 544/459, loss: 0.0177402775734663 2023-01-22 17:45:42.661699: step: 546/459, loss: 0.06001794710755348 2023-01-22 17:45:43.237505: step: 548/459, loss: 0.017792267724871635 2023-01-22 17:45:43.879832: step: 550/459, loss: 0.01429742481559515 2023-01-22 17:45:44.544037: step: 552/459, loss: 0.02896074205636978 2023-01-22 17:45:45.135146: step: 554/459, loss: 0.001459785969927907 2023-01-22 17:45:45.786210: step: 556/459, loss: 0.04937002807855606 2023-01-22 17:45:46.359103: step: 558/459, loss: 0.23109298944473267 2023-01-22 17:45:47.020443: step: 560/459, loss: 0.002473793225362897 2023-01-22 17:45:47.613247: step: 562/459, loss: 0.021080635488033295 2023-01-22 17:45:48.191697: step: 564/459, loss: 0.005115637090057135 2023-01-22 17:45:48.823886: step: 566/459, loss: 0.6545935869216919 2023-01-22 17:45:49.464527: step: 568/459, loss: 0.0018708958523347974 2023-01-22 17:45:50.073519: step: 570/459, loss: 0.04296974465250969 2023-01-22 17:45:50.660876: step: 572/459, loss: 0.0019911956042051315 2023-01-22 17:45:51.314366: step: 574/459, loss: 0.04187099263072014 2023-01-22 17:45:51.930605: step: 576/459, loss: 0.016176579520106316 2023-01-22 17:45:52.582365: step: 578/459, loss: 0.025777265429496765 2023-01-22 17:45:53.255708: step: 580/459, loss: 0.018278611823916435 2023-01-22 17:45:53.813070: step: 582/459, loss: 0.009238175116479397 2023-01-22 17:45:54.432948: step: 584/459, loss: 0.03608061745762825 2023-01-22 17:45:55.008617: step: 586/459, loss: 0.02117164433002472 2023-01-22 17:45:55.617807: step: 588/459, loss: 0.06329478323459625 2023-01-22 17:45:56.211394: step: 590/459, loss: 0.010640764608979225 2023-01-22 17:45:56.879041: step: 592/459, loss: 0.021609535440802574 2023-01-22 17:45:57.467351: step: 594/459, loss: 0.04022269695997238 2023-01-22 17:45:58.110025: step: 596/459, loss: 0.014611008577048779 2023-01-22 17:45:58.715243: step: 598/459, loss: 0.1388273686170578 2023-01-22 17:45:59.356870: step: 600/459, loss: 0.04274583235383034 2023-01-22 17:45:59.964086: step: 602/459, loss: 0.03576911613345146 2023-01-22 17:46:00.534807: step: 604/459, loss: 0.0223550945520401 2023-01-22 17:46:01.146009: step: 606/459, loss: 0.003876556409522891 2023-01-22 17:46:01.742712: step: 608/459, loss: 0.01784966140985489 2023-01-22 17:46:02.372397: step: 610/459, loss: 0.03898947685956955 2023-01-22 17:46:03.001417: step: 612/459, loss: 0.002526714699342847 2023-01-22 17:46:03.688581: step: 614/459, loss: 0.001484686741605401 2023-01-22 17:46:04.325056: step: 616/459, loss: 0.03787422925233841 2023-01-22 17:46:04.945245: step: 618/459, loss: 0.0386069193482399 2023-01-22 17:46:05.551669: step: 620/459, loss: 0.011696519330143929 2023-01-22 17:46:06.209647: step: 622/459, loss: 0.005161181557923555 2023-01-22 17:46:06.844664: step: 624/459, loss: 0.023512501269578934 2023-01-22 17:46:07.467355: step: 626/459, loss: 0.04038706421852112 2023-01-22 17:46:08.123564: step: 628/459, loss: 0.6513887047767639 2023-01-22 17:46:08.780269: step: 630/459, loss: 0.3835689425468445 2023-01-22 17:46:09.420871: step: 632/459, loss: 0.030877161771059036 2023-01-22 17:46:10.076759: step: 634/459, loss: 0.01457283552736044 2023-01-22 17:46:10.722464: step: 636/459, loss: 0.15682588517665863 2023-01-22 17:46:11.329391: step: 638/459, loss: 0.022394899278879166 2023-01-22 17:46:12.054669: step: 640/459, loss: 0.016182437539100647 2023-01-22 17:46:12.721933: step: 642/459, loss: 0.03284122422337532 2023-01-22 17:46:13.283738: step: 644/459, loss: 0.19467586278915405 2023-01-22 17:46:13.865623: step: 646/459, loss: 0.06202385202050209 2023-01-22 17:46:14.517606: step: 648/459, loss: 0.06265541911125183 2023-01-22 17:46:15.101492: step: 650/459, loss: 0.008900345303118229 2023-01-22 17:46:15.733297: step: 652/459, loss: 0.03294380009174347 2023-01-22 17:46:16.363244: step: 654/459, loss: 0.006543938536196947 2023-01-22 17:46:17.004266: step: 656/459, loss: 0.03551902249455452 2023-01-22 17:46:17.601983: step: 658/459, loss: 0.005803567357361317 2023-01-22 17:46:18.211551: step: 660/459, loss: 0.009429418481886387 2023-01-22 17:46:18.997807: step: 662/459, loss: 0.060431044548749924 2023-01-22 17:46:19.637740: step: 664/459, loss: 0.031757060438394547 2023-01-22 17:46:20.234464: step: 666/459, loss: 0.01652299426496029 2023-01-22 17:46:20.833549: step: 668/459, loss: 0.0027532274834811687 2023-01-22 17:46:21.511844: step: 670/459, loss: 0.016595695167779922 2023-01-22 17:46:22.160908: step: 672/459, loss: 0.0022334831301122904 2023-01-22 17:46:22.782170: step: 674/459, loss: 0.006832063663750887 2023-01-22 17:46:23.466765: step: 676/459, loss: 0.04422454908490181 2023-01-22 17:46:24.101174: step: 678/459, loss: 0.01101303193718195 2023-01-22 17:46:24.744518: step: 680/459, loss: 0.013871155679225922 2023-01-22 17:46:25.325069: step: 682/459, loss: 0.035258982330560684 2023-01-22 17:46:25.998129: step: 684/459, loss: 0.053052838891744614 2023-01-22 17:46:26.616989: step: 686/459, loss: 0.001296844333410263 2023-01-22 17:46:27.269360: step: 688/459, loss: 0.09371978789567947 2023-01-22 17:46:27.896300: step: 690/459, loss: 0.04415563493967056 2023-01-22 17:46:28.469121: step: 692/459, loss: 0.007956723682582378 2023-01-22 17:46:29.072794: step: 694/459, loss: 0.03896617516875267 2023-01-22 17:46:29.656847: step: 696/459, loss: 0.0070441290736198425 2023-01-22 17:46:30.300275: step: 698/459, loss: 1.195624828338623 2023-01-22 17:46:31.012410: step: 700/459, loss: 0.25832003355026245 2023-01-22 17:46:31.591283: step: 702/459, loss: 0.06253194063901901 2023-01-22 17:46:32.219854: step: 704/459, loss: 0.6379082202911377 2023-01-22 17:46:32.800196: step: 706/459, loss: 0.05173865333199501 2023-01-22 17:46:33.335888: step: 708/459, loss: 0.22617921233177185 2023-01-22 17:46:33.979059: step: 710/459, loss: 0.01016996055841446 2023-01-22 17:46:34.542589: step: 712/459, loss: 0.008398262783885002 2023-01-22 17:46:35.160291: step: 714/459, loss: 0.012618409469723701 2023-01-22 17:46:35.725168: step: 716/459, loss: 0.04043165221810341 2023-01-22 17:46:36.378591: step: 718/459, loss: 0.11809688806533813 2023-01-22 17:46:37.029112: step: 720/459, loss: 0.013507095165550709 2023-01-22 17:46:37.647794: step: 722/459, loss: 0.039231520146131516 2023-01-22 17:46:38.269231: step: 724/459, loss: 0.07611386477947235 2023-01-22 17:46:38.953409: step: 726/459, loss: 0.03354640677571297 2023-01-22 17:46:39.510233: step: 728/459, loss: 0.12174548208713531 2023-01-22 17:46:40.097137: step: 730/459, loss: 0.03347906470298767 2023-01-22 17:46:40.690230: step: 732/459, loss: 0.06775523722171783 2023-01-22 17:46:41.282729: step: 734/459, loss: 0.011806577444076538 2023-01-22 17:46:41.925287: step: 736/459, loss: 0.02163851074874401 2023-01-22 17:46:42.559967: step: 738/459, loss: 0.010285435244441032 2023-01-22 17:46:43.158058: step: 740/459, loss: 0.012129929848015308 2023-01-22 17:46:43.758244: step: 742/459, loss: 0.037383563816547394 2023-01-22 17:46:44.367565: step: 744/459, loss: 0.06986545026302338 2023-01-22 17:46:44.971654: step: 746/459, loss: 0.033175550401210785 2023-01-22 17:46:45.570316: step: 748/459, loss: 0.006445023696869612 2023-01-22 17:46:46.183894: step: 750/459, loss: 0.011448879726231098 2023-01-22 17:46:46.795452: step: 752/459, loss: 0.05719899386167526 2023-01-22 17:46:47.384094: step: 754/459, loss: 0.13326159119606018 2023-01-22 17:46:47.961931: step: 756/459, loss: 0.01773810386657715 2023-01-22 17:46:48.608470: step: 758/459, loss: 0.012385285459458828 2023-01-22 17:46:49.279194: step: 760/459, loss: 0.008856807835400105 2023-01-22 17:46:49.861584: step: 762/459, loss: 0.003745195223018527 2023-01-22 17:46:50.399970: step: 764/459, loss: 0.0071546826511621475 2023-01-22 17:46:51.020495: step: 766/459, loss: 0.020138302817940712 2023-01-22 17:46:51.620306: step: 768/459, loss: 0.009842629544436932 2023-01-22 17:46:52.300427: step: 770/459, loss: 0.15462526679039001 2023-01-22 17:46:52.931139: step: 772/459, loss: 0.21192771196365356 2023-01-22 17:46:53.548886: step: 774/459, loss: 0.025400839745998383 2023-01-22 17:46:54.158608: step: 776/459, loss: 0.004108484368771315 2023-01-22 17:46:54.768614: step: 778/459, loss: 0.05109096318483353 2023-01-22 17:46:55.293669: step: 780/459, loss: 0.02067968063056469 2023-01-22 17:46:55.865260: step: 782/459, loss: 0.007601361256092787 2023-01-22 17:46:56.450689: step: 784/459, loss: 0.015037128701806068 2023-01-22 17:46:57.044507: step: 786/459, loss: 0.04697691649198532 2023-01-22 17:46:57.657610: step: 788/459, loss: 0.05971086025238037 2023-01-22 17:46:58.329929: step: 790/459, loss: 0.009744939394295216 2023-01-22 17:46:59.042275: step: 792/459, loss: 0.19630439579486847 2023-01-22 17:46:59.547540: step: 794/459, loss: 0.00965244509279728 2023-01-22 17:47:00.141855: step: 796/459, loss: 0.004381702281534672 2023-01-22 17:47:00.723208: step: 798/459, loss: 0.2544443905353546 2023-01-22 17:47:01.291761: step: 800/459, loss: 0.003947042860090733 2023-01-22 17:47:01.918990: step: 802/459, loss: 0.012215330265462399 2023-01-22 17:47:02.521303: step: 804/459, loss: 0.13581110537052155 2023-01-22 17:47:03.098329: step: 806/459, loss: 0.00513692619279027 2023-01-22 17:47:03.731560: step: 808/459, loss: 0.01872774213552475 2023-01-22 17:47:04.295084: step: 810/459, loss: 0.0022971010766923428 2023-01-22 17:47:04.849201: step: 812/459, loss: 0.004399368539452553 2023-01-22 17:47:05.539340: step: 814/459, loss: 0.03619520366191864 2023-01-22 17:47:06.138429: step: 816/459, loss: 0.06281745433807373 2023-01-22 17:47:06.788786: step: 818/459, loss: 0.02181599847972393 2023-01-22 17:47:07.435132: step: 820/459, loss: 0.05311461165547371 2023-01-22 17:47:08.082108: step: 822/459, loss: 0.100587859749794 2023-01-22 17:47:08.737644: step: 824/459, loss: 0.024775218218564987 2023-01-22 17:47:09.421920: step: 826/459, loss: 0.02402334101498127 2023-01-22 17:47:10.051290: step: 828/459, loss: 0.017186982557177544 2023-01-22 17:47:10.653700: step: 830/459, loss: 0.0012907214695587754 2023-01-22 17:47:11.236110: step: 832/459, loss: 0.00042950999340973794 2023-01-22 17:47:11.829913: step: 834/459, loss: 0.0172859039157629 2023-01-22 17:47:12.483980: step: 836/459, loss: 0.027673475444316864 2023-01-22 17:47:13.090114: step: 838/459, loss: 0.05120200291275978 2023-01-22 17:47:13.659438: step: 840/459, loss: 0.006151366978883743 2023-01-22 17:47:14.195307: step: 842/459, loss: 0.015104551799595356 2023-01-22 17:47:14.793389: step: 844/459, loss: 0.210410937666893 2023-01-22 17:47:15.299558: step: 846/459, loss: 0.00015671206347178668 2023-01-22 17:47:15.891493: step: 848/459, loss: 0.008273006416857243 2023-01-22 17:47:16.491075: step: 850/459, loss: 0.3051973879337311 2023-01-22 17:47:17.090508: step: 852/459, loss: 0.0004886930109933019 2023-01-22 17:47:17.706562: step: 854/459, loss: 0.0007338497089222074 2023-01-22 17:47:18.445635: step: 856/459, loss: 0.04337003454566002 2023-01-22 17:47:19.083839: step: 858/459, loss: 0.049767110496759415 2023-01-22 17:47:19.779644: step: 860/459, loss: 1.6814770698547363 2023-01-22 17:47:20.385716: step: 862/459, loss: 0.006381612736731768 2023-01-22 17:47:20.993046: step: 864/459, loss: 0.012228050269186497 2023-01-22 17:47:21.643446: step: 866/459, loss: 0.01070424634963274 2023-01-22 17:47:22.284573: step: 868/459, loss: 0.09152449667453766 2023-01-22 17:47:22.856457: step: 870/459, loss: 0.001348621677607298 2023-01-22 17:47:23.460125: step: 872/459, loss: 0.013759825378656387 2023-01-22 17:47:24.070173: step: 874/459, loss: 0.02177426964044571 2023-01-22 17:47:24.746511: step: 876/459, loss: 0.03943878039717674 2023-01-22 17:47:25.328904: step: 878/459, loss: 0.022462308406829834 2023-01-22 17:47:25.926659: step: 880/459, loss: 0.10934241861104965 2023-01-22 17:47:26.550803: step: 882/459, loss: 0.11176341027021408 2023-01-22 17:47:27.127279: step: 884/459, loss: 0.021425975486636162 2023-01-22 17:47:27.729533: step: 886/459, loss: 0.04354483261704445 2023-01-22 17:47:28.363248: step: 888/459, loss: 0.008463235571980476 2023-01-22 17:47:28.988437: step: 890/459, loss: 0.03559131175279617 2023-01-22 17:47:29.683364: step: 892/459, loss: 0.013117202557623386 2023-01-22 17:47:30.333298: step: 894/459, loss: 0.020317167043685913 2023-01-22 17:47:30.943213: step: 896/459, loss: 0.06574270874261856 2023-01-22 17:47:31.620903: step: 898/459, loss: 0.09993796795606613 2023-01-22 17:47:32.272458: step: 900/459, loss: 0.04925423488020897 2023-01-22 17:47:32.929919: step: 902/459, loss: 0.11890912801027298 2023-01-22 17:47:33.559891: step: 904/459, loss: 0.0018011138308793306 2023-01-22 17:47:34.259457: step: 906/459, loss: 0.30414077639579773 2023-01-22 17:47:34.880518: step: 908/459, loss: 0.07884429395198822 2023-01-22 17:47:35.529490: step: 910/459, loss: 0.045692894607782364 2023-01-22 17:47:36.108905: step: 912/459, loss: 0.039607733488082886 2023-01-22 17:47:36.707944: step: 914/459, loss: 0.11917826533317566 2023-01-22 17:47:37.323659: step: 916/459, loss: 0.9664004445075989 2023-01-22 17:47:37.913194: step: 918/459, loss: 0.04900089278817177 2023-01-22 17:47:38.388027: step: 920/459, loss: 0.00030217503081075847 ================================================== Loss: 0.124 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2969129367350967, 'r': 0.33578768556758565, 'f1': 0.31515602901890943}, 'combined': 0.23222023190867008, 'epoch': 26} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.32359916276751743, 'r': 0.3109493773138781, 'f1': 0.3171481827030746}, 'combined': 0.2029748369299677, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3001050420168067, 'r': 0.34281448822413213, 'f1': 0.32004116083988954}, 'combined': 0.2358198027241291, 'epoch': 26} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33251177688986344, 'r': 0.31587106012103494, 'f1': 0.3239778768763578}, 'combined': 0.20734584120086896, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3143424780303089, 'r': 0.3411838660974131, 'f1': 0.32721364410070375}, 'combined': 0.24110479038999222, 'epoch': 26} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3335241545423996, 'r': 0.3238039606030947, 'f1': 0.3285921892256913}, 'combined': 0.23559439982219377, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24666666666666665, 'r': 0.35238095238095235, 'f1': 0.2901960784313725}, 'combined': 0.19346405228758168, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.21022727272727273, 'r': 0.40217391304347827, 'f1': 0.27611940298507465}, 'combined': 0.13805970149253732, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.1724137931034483, 'f1': 0.22222222222222224}, 'combined': 0.14814814814814814, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:50:15.722349: step: 2/459, loss: 0.002556796418502927 2023-01-22 17:50:16.353372: step: 4/459, loss: 0.011949285864830017 2023-01-22 17:50:16.993517: step: 6/459, loss: 0.008967150002717972 2023-01-22 17:50:17.603345: step: 8/459, loss: 0.008934385143220425 2023-01-22 17:50:18.188583: step: 10/459, loss: 0.023935087025165558 2023-01-22 17:50:18.755679: step: 12/459, loss: 0.01759631559252739 2023-01-22 17:50:19.389290: step: 14/459, loss: 0.0161521527916193 2023-01-22 17:50:20.056037: step: 16/459, loss: 0.02392333373427391 2023-01-22 17:50:20.769239: step: 18/459, loss: 0.0035976702347397804 2023-01-22 17:50:21.352262: step: 20/459, loss: 0.004094540607184172 2023-01-22 17:50:21.958422: step: 22/459, loss: 0.021454621106386185 2023-01-22 17:50:22.560709: step: 24/459, loss: 0.4697837829589844 2023-01-22 17:50:23.181813: step: 26/459, loss: 0.013474516570568085 2023-01-22 17:50:23.814561: step: 28/459, loss: 0.055214736610651016 2023-01-22 17:50:24.387746: step: 30/459, loss: 0.003712661797180772 2023-01-22 17:50:24.999965: step: 32/459, loss: 0.058554504066705704 2023-01-22 17:50:25.633519: step: 34/459, loss: 0.014639431610703468 2023-01-22 17:50:26.191049: step: 36/459, loss: 0.04408085346221924 2023-01-22 17:50:26.862521: step: 38/459, loss: 0.06239192932844162 2023-01-22 17:50:27.440464: step: 40/459, loss: 0.010191058740019798 2023-01-22 17:50:28.055795: step: 42/459, loss: 0.00993895623832941 2023-01-22 17:50:28.652131: step: 44/459, loss: 0.03147495537996292 2023-01-22 17:50:29.261866: step: 46/459, loss: 0.058519985526800156 2023-01-22 17:50:29.858520: step: 48/459, loss: 0.10616017132997513 2023-01-22 17:50:30.432202: step: 50/459, loss: 0.01778385601937771 2023-01-22 17:50:31.179952: step: 52/459, loss: 0.05147829279303551 2023-01-22 17:50:31.764778: step: 54/459, loss: 0.07477321475744247 2023-01-22 17:50:32.406543: step: 56/459, loss: 0.010621995665133 2023-01-22 17:50:32.996136: step: 58/459, loss: 0.01745029352605343 2023-01-22 17:50:33.596895: step: 60/459, loss: 0.00854093674570322 2023-01-22 17:50:34.218261: step: 62/459, loss: 0.0038002957589924335 2023-01-22 17:50:34.822694: step: 64/459, loss: 0.03181346133351326 2023-01-22 17:50:35.417656: step: 66/459, loss: 1.4860001802444458 2023-01-22 17:50:36.039138: step: 68/459, loss: 0.10342404991388321 2023-01-22 17:50:36.644188: step: 70/459, loss: 0.047787122428417206 2023-01-22 17:50:37.214955: step: 72/459, loss: 0.004414758179336786 2023-01-22 17:50:37.766084: step: 74/459, loss: 0.1572715789079666 2023-01-22 17:50:38.396252: step: 76/459, loss: 0.034122515469789505 2023-01-22 17:50:39.039008: step: 78/459, loss: 0.03991858288645744 2023-01-22 17:50:39.694103: step: 80/459, loss: 0.017909644171595573 2023-01-22 17:50:40.280287: step: 82/459, loss: 0.3175327777862549 2023-01-22 17:50:40.905397: step: 84/459, loss: 0.01766190305352211 2023-01-22 17:50:41.539777: step: 86/459, loss: 0.012496829964220524 2023-01-22 17:50:42.176299: step: 88/459, loss: 0.20148637890815735 2023-01-22 17:50:42.797867: step: 90/459, loss: 0.004516961984336376 2023-01-22 17:50:43.386573: step: 92/459, loss: 0.011671796441078186 2023-01-22 17:50:43.978232: step: 94/459, loss: 0.04074464365839958 2023-01-22 17:50:44.570719: step: 96/459, loss: 0.023059219121932983 2023-01-22 17:50:45.243578: step: 98/459, loss: 0.08593697100877762 2023-01-22 17:50:45.895189: step: 100/459, loss: 0.04543840140104294 2023-01-22 17:50:46.497642: step: 102/459, loss: 0.012710457667708397 2023-01-22 17:50:47.228912: step: 104/459, loss: 0.008056298829615116 2023-01-22 17:50:47.801510: step: 106/459, loss: 0.004196443594992161 2023-01-22 17:50:48.392562: step: 108/459, loss: 0.011641720309853554 2023-01-22 17:50:48.927454: step: 110/459, loss: 0.04477481544017792 2023-01-22 17:50:49.555835: step: 112/459, loss: 0.007503591012209654 2023-01-22 17:50:50.194710: step: 114/459, loss: 0.0025759597774595022 2023-01-22 17:50:50.751148: step: 116/459, loss: 0.0019034267170354724 2023-01-22 17:50:51.368233: step: 118/459, loss: 0.023490458726882935 2023-01-22 17:50:52.010194: step: 120/459, loss: 0.2510875165462494 2023-01-22 17:50:52.748337: step: 122/459, loss: 0.027160795405507088 2023-01-22 17:50:53.406800: step: 124/459, loss: 0.04289604350924492 2023-01-22 17:50:54.094272: step: 126/459, loss: 0.03694195672869682 2023-01-22 17:50:54.724732: step: 128/459, loss: 0.016014594584703445 2023-01-22 17:50:55.301760: step: 130/459, loss: 0.0038654261734336615 2023-01-22 17:50:55.919054: step: 132/459, loss: 0.027562333270907402 2023-01-22 17:50:56.562623: step: 134/459, loss: 0.003604487283155322 2023-01-22 17:50:57.199894: step: 136/459, loss: 0.04579988494515419 2023-01-22 17:50:57.770519: step: 138/459, loss: 0.0034128697589039803 2023-01-22 17:50:58.399957: step: 140/459, loss: 0.0009795352816581726 2023-01-22 17:50:59.047389: step: 142/459, loss: 0.11536331474781036 2023-01-22 17:50:59.726426: step: 144/459, loss: 0.017205050215125084 2023-01-22 17:51:00.327427: step: 146/459, loss: 0.021088572219014168 2023-01-22 17:51:00.971467: step: 148/459, loss: 0.019172193482518196 2023-01-22 17:51:01.527624: step: 150/459, loss: 0.0021820347756147385 2023-01-22 17:51:02.133542: step: 152/459, loss: 0.18651838600635529 2023-01-22 17:51:02.756381: step: 154/459, loss: 0.011181052774190903 2023-01-22 17:51:03.320239: step: 156/459, loss: 0.036003291606903076 2023-01-22 17:51:03.913016: step: 158/459, loss: 1.0470305681228638 2023-01-22 17:51:04.551742: step: 160/459, loss: 0.056533362716436386 2023-01-22 17:51:05.154054: step: 162/459, loss: 0.015101904980838299 2023-01-22 17:51:05.788777: step: 164/459, loss: 0.0083169499412179 2023-01-22 17:51:06.463723: step: 166/459, loss: 0.31797415018081665 2023-01-22 17:51:07.034387: step: 168/459, loss: 0.17839868366718292 2023-01-22 17:51:07.690898: step: 170/459, loss: 0.017501579597592354 2023-01-22 17:51:08.282531: step: 172/459, loss: 0.05467083305120468 2023-01-22 17:51:08.915273: step: 174/459, loss: 0.06544414162635803 2023-01-22 17:51:09.535780: step: 176/459, loss: 0.023273933678865433 2023-01-22 17:51:10.240440: step: 178/459, loss: 0.030519526451826096 2023-01-22 17:51:10.856025: step: 180/459, loss: 0.0011437288485467434 2023-01-22 17:51:11.436536: step: 182/459, loss: 0.024779152125120163 2023-01-22 17:51:12.042557: step: 184/459, loss: 0.02122773602604866 2023-01-22 17:51:12.668200: step: 186/459, loss: 0.2401570826768875 2023-01-22 17:51:13.312673: step: 188/459, loss: 0.019077131524682045 2023-01-22 17:51:13.894280: step: 190/459, loss: 0.011322595179080963 2023-01-22 17:51:14.532451: step: 192/459, loss: 0.036544427275657654 2023-01-22 17:51:15.228794: step: 194/459, loss: 0.12524791061878204 2023-01-22 17:51:15.937650: step: 196/459, loss: 0.023693503811955452 2023-01-22 17:51:16.611698: step: 198/459, loss: 0.002328252885490656 2023-01-22 17:51:17.222225: step: 200/459, loss: 0.001275019720196724 2023-01-22 17:51:17.925792: step: 202/459, loss: 0.01807336136698723 2023-01-22 17:51:18.550096: step: 204/459, loss: 0.0673595443367958 2023-01-22 17:51:19.270651: step: 206/459, loss: 0.0022589382715523243 2023-01-22 17:51:19.935118: step: 208/459, loss: 0.08751749247312546 2023-01-22 17:51:20.602933: step: 210/459, loss: 0.003929988015443087 2023-01-22 17:51:21.223666: step: 212/459, loss: 0.036136332899332047 2023-01-22 17:51:21.875013: step: 214/459, loss: 0.026283614337444305 2023-01-22 17:51:22.530727: step: 216/459, loss: 0.02538437768816948 2023-01-22 17:51:23.152270: step: 218/459, loss: 0.023630572482943535 2023-01-22 17:51:23.670912: step: 220/459, loss: 0.0058891954831779 2023-01-22 17:51:24.301182: step: 222/459, loss: 0.002457413822412491 2023-01-22 17:51:24.969154: step: 224/459, loss: 0.02470390871167183 2023-01-22 17:51:25.600837: step: 226/459, loss: 0.05953550338745117 2023-01-22 17:51:26.209243: step: 228/459, loss: 0.007077408488839865 2023-01-22 17:51:26.776723: step: 230/459, loss: 8.061016082763672 2023-01-22 17:51:27.364714: step: 232/459, loss: 0.014573056250810623 2023-01-22 17:51:27.950184: step: 234/459, loss: 0.11404011398553848 2023-01-22 17:51:28.568243: step: 236/459, loss: 0.021129468455910683 2023-01-22 17:51:29.285210: step: 238/459, loss: 0.04573642462491989 2023-01-22 17:51:29.879077: step: 240/459, loss: 0.0921097919344902 2023-01-22 17:51:30.507217: step: 242/459, loss: 0.0076972562819719315 2023-01-22 17:51:31.179263: step: 244/459, loss: 0.028112055733799934 2023-01-22 17:51:31.823300: step: 246/459, loss: 0.43876591324806213 2023-01-22 17:51:32.492573: step: 248/459, loss: 0.030814357101917267 2023-01-22 17:51:33.123417: step: 250/459, loss: 0.0002884101995732635 2023-01-22 17:51:33.691424: step: 252/459, loss: 0.0054551600478589535 2023-01-22 17:51:34.301529: step: 254/459, loss: 0.004631435964256525 2023-01-22 17:51:34.930106: step: 256/459, loss: 0.4258734881877899 2023-01-22 17:51:35.502714: step: 258/459, loss: 0.006869328673928976 2023-01-22 17:51:36.116520: step: 260/459, loss: 0.02820771373808384 2023-01-22 17:51:36.705781: step: 262/459, loss: 0.008422531187534332 2023-01-22 17:51:37.350415: step: 264/459, loss: 0.03042769804596901 2023-01-22 17:51:37.982556: step: 266/459, loss: 0.0527377687394619 2023-01-22 17:51:38.636415: step: 268/459, loss: 0.047027792781591415 2023-01-22 17:51:39.258255: step: 270/459, loss: 0.004063123371452093 2023-01-22 17:51:39.799112: step: 272/459, loss: 0.040443290024995804 2023-01-22 17:51:40.418636: step: 274/459, loss: 0.00889017153531313 2023-01-22 17:51:41.006390: step: 276/459, loss: 0.05140143260359764 2023-01-22 17:51:41.621263: step: 278/459, loss: 0.0337640717625618 2023-01-22 17:51:42.215963: step: 280/459, loss: 0.005902176722884178 2023-01-22 17:51:42.789787: step: 282/459, loss: 0.04354343190789223 2023-01-22 17:51:43.383190: step: 284/459, loss: 0.013645930215716362 2023-01-22 17:51:44.008134: step: 286/459, loss: 0.24026905000209808 2023-01-22 17:51:44.595732: step: 288/459, loss: 0.03469518572092056 2023-01-22 17:51:45.178980: step: 290/459, loss: 0.06996407359838486 2023-01-22 17:51:45.794259: step: 292/459, loss: 0.012694415636360645 2023-01-22 17:51:46.396400: step: 294/459, loss: 0.09923427551984787 2023-01-22 17:51:46.941684: step: 296/459, loss: 0.0627911388874054 2023-01-22 17:51:47.595395: step: 298/459, loss: 0.008296230807900429 2023-01-22 17:51:48.223587: step: 300/459, loss: 0.016037384048104286 2023-01-22 17:51:48.829872: step: 302/459, loss: 0.28355756402015686 2023-01-22 17:51:49.517135: step: 304/459, loss: 0.019728563725948334 2023-01-22 17:51:50.137658: step: 306/459, loss: 0.0783148929476738 2023-01-22 17:51:50.702380: step: 308/459, loss: 0.012165835127234459 2023-01-22 17:51:51.292528: step: 310/459, loss: 0.26141881942749023 2023-01-22 17:51:51.889727: step: 312/459, loss: 0.17018185555934906 2023-01-22 17:51:52.507020: step: 314/459, loss: 0.041706010699272156 2023-01-22 17:51:53.091225: step: 316/459, loss: 0.007256557233631611 2023-01-22 17:51:53.717940: step: 318/459, loss: 0.0052497899159789085 2023-01-22 17:51:54.269953: step: 320/459, loss: 0.004286355338990688 2023-01-22 17:51:54.921148: step: 322/459, loss: 0.0012033989187330008 2023-01-22 17:51:55.520590: step: 324/459, loss: 0.014917487278580666 2023-01-22 17:51:56.117165: step: 326/459, loss: 0.016805274412035942 2023-01-22 17:51:56.694582: step: 328/459, loss: 0.010540321469306946 2023-01-22 17:51:57.319813: step: 330/459, loss: 0.006339258048683405 2023-01-22 17:51:57.926286: step: 332/459, loss: 0.0668330043554306 2023-01-22 17:51:58.573575: step: 334/459, loss: 0.15006178617477417 2023-01-22 17:51:59.229735: step: 336/459, loss: 0.1956474334001541 2023-01-22 17:51:59.797109: step: 338/459, loss: 0.029066288843750954 2023-01-22 17:52:00.384596: step: 340/459, loss: 0.0014270521933212876 2023-01-22 17:52:00.978442: step: 342/459, loss: 0.015324708074331284 2023-01-22 17:52:01.579543: step: 344/459, loss: 0.06053978577256203 2023-01-22 17:52:02.173303: step: 346/459, loss: 0.0009688126156106591 2023-01-22 17:52:02.870296: step: 348/459, loss: 0.0025080523919314146 2023-01-22 17:52:03.507513: step: 350/459, loss: 0.019815756008028984 2023-01-22 17:52:04.155361: step: 352/459, loss: 0.02237054519355297 2023-01-22 17:52:04.718352: step: 354/459, loss: 0.05631101131439209 2023-01-22 17:52:05.379793: step: 356/459, loss: 0.011681577190756798 2023-01-22 17:52:05.970960: step: 358/459, loss: 0.01359567977488041 2023-01-22 17:52:06.557977: step: 360/459, loss: 0.02565860003232956 2023-01-22 17:52:07.259697: step: 362/459, loss: 0.03746772184967995 2023-01-22 17:52:07.828361: step: 364/459, loss: 0.005726039409637451 2023-01-22 17:52:08.403790: step: 366/459, loss: 0.002301862696185708 2023-01-22 17:52:08.995547: step: 368/459, loss: 0.02172192372381687 2023-01-22 17:52:09.589627: step: 370/459, loss: 0.041412074118852615 2023-01-22 17:52:10.247111: step: 372/459, loss: 0.010972986929118633 2023-01-22 17:52:10.904837: step: 374/459, loss: 0.028295962139964104 2023-01-22 17:52:11.497251: step: 376/459, loss: 0.18700237572193146 2023-01-22 17:52:12.083592: step: 378/459, loss: 0.413794606924057 2023-01-22 17:52:12.682954: step: 380/459, loss: 0.05205227807164192 2023-01-22 17:52:13.315175: step: 382/459, loss: 0.018478671088814735 2023-01-22 17:52:13.914098: step: 384/459, loss: 0.7406625151634216 2023-01-22 17:52:14.530760: step: 386/459, loss: 0.0026912614703178406 2023-01-22 17:52:15.096834: step: 388/459, loss: 0.005099714733660221 2023-01-22 17:52:15.676432: step: 390/459, loss: 0.0030165158677846193 2023-01-22 17:52:16.355057: step: 392/459, loss: 0.017273826524615288 2023-01-22 17:52:16.966524: step: 394/459, loss: 0.018132219091057777 2023-01-22 17:52:17.554940: step: 396/459, loss: 0.007703475188463926 2023-01-22 17:52:18.169338: step: 398/459, loss: 0.052820924669504166 2023-01-22 17:52:18.808811: step: 400/459, loss: 0.027292853221297264 2023-01-22 17:52:19.449673: step: 402/459, loss: 0.10962878912687302 2023-01-22 17:52:20.176866: step: 404/459, loss: 0.0010272851213812828 2023-01-22 17:52:20.784633: step: 406/459, loss: 0.03275613486766815 2023-01-22 17:52:21.447781: step: 408/459, loss: 0.011089660227298737 2023-01-22 17:52:22.052830: step: 410/459, loss: 0.07048100978136063 2023-01-22 17:52:22.718463: step: 412/459, loss: 0.013046231120824814 2023-01-22 17:52:23.253041: step: 414/459, loss: 0.008058743551373482 2023-01-22 17:52:23.829561: step: 416/459, loss: 0.015493053011596203 2023-01-22 17:52:24.477428: step: 418/459, loss: 0.06353022903203964 2023-01-22 17:52:25.143265: step: 420/459, loss: 0.021550726145505905 2023-01-22 17:52:25.770316: step: 422/459, loss: 0.046770062297582626 2023-01-22 17:52:26.338807: step: 424/459, loss: 0.03024168312549591 2023-01-22 17:52:26.895971: step: 426/459, loss: 0.010370414704084396 2023-01-22 17:52:27.596668: step: 428/459, loss: 0.03381287679076195 2023-01-22 17:52:28.243732: step: 430/459, loss: 0.01399981789290905 2023-01-22 17:52:28.881503: step: 432/459, loss: 0.001425381749868393 2023-01-22 17:52:29.507524: step: 434/459, loss: 0.007176969200372696 2023-01-22 17:52:30.295263: step: 436/459, loss: 0.0005590888322331011 2023-01-22 17:52:30.927803: step: 438/459, loss: 0.010133866220712662 2023-01-22 17:52:31.526359: step: 440/459, loss: 0.02055775374174118 2023-01-22 17:52:32.200158: step: 442/459, loss: 0.0006659890641458333 2023-01-22 17:52:32.827386: step: 444/459, loss: 1.113370656967163 2023-01-22 17:52:33.384126: step: 446/459, loss: 0.02262764796614647 2023-01-22 17:52:34.013541: step: 448/459, loss: 0.01994205079972744 2023-01-22 17:52:34.675435: step: 450/459, loss: 0.006208812817931175 2023-01-22 17:52:35.300575: step: 452/459, loss: 0.4174884259700775 2023-01-22 17:52:35.979062: step: 454/459, loss: 0.10925702005624771 2023-01-22 17:52:36.645545: step: 456/459, loss: 0.03568735346198082 2023-01-22 17:52:37.314771: step: 458/459, loss: 0.23171117901802063 2023-01-22 17:52:37.965565: step: 460/459, loss: 0.07421164959669113 2023-01-22 17:52:38.614872: step: 462/459, loss: 0.002019045874476433 2023-01-22 17:52:39.284449: step: 464/459, loss: 0.008020770736038685 2023-01-22 17:52:39.901474: step: 466/459, loss: 0.0402887724339962 2023-01-22 17:52:40.575344: step: 468/459, loss: 0.022867033258080482 2023-01-22 17:52:41.151823: step: 470/459, loss: 0.0069313920103013515 2023-01-22 17:52:41.750225: step: 472/459, loss: 0.0235899630934 2023-01-22 17:52:42.438785: step: 474/459, loss: 0.053368933498859406 2023-01-22 17:52:43.064412: step: 476/459, loss: 0.0007293525850400329 2023-01-22 17:52:43.670917: step: 478/459, loss: 0.0035057144705206156 2023-01-22 17:52:44.266544: step: 480/459, loss: 0.005204401444643736 2023-01-22 17:52:44.936312: step: 482/459, loss: 0.06132529303431511 2023-01-22 17:52:45.532878: step: 484/459, loss: 0.05123179033398628 2023-01-22 17:52:46.127491: step: 486/459, loss: 0.013362487778067589 2023-01-22 17:52:46.737809: step: 488/459, loss: 0.03526352345943451 2023-01-22 17:52:47.343489: step: 490/459, loss: 0.001816153759136796 2023-01-22 17:52:48.009451: step: 492/459, loss: 0.07865867018699646 2023-01-22 17:52:48.623466: step: 494/459, loss: 0.015161196701228619 2023-01-22 17:52:49.288437: step: 496/459, loss: 0.07322859019041061 2023-01-22 17:52:49.951074: step: 498/459, loss: 0.000421662291046232 2023-01-22 17:52:50.512239: step: 500/459, loss: 0.03861309587955475 2023-01-22 17:52:51.090047: step: 502/459, loss: 0.05439207702875137 2023-01-22 17:52:51.717874: step: 504/459, loss: 0.0075822449289262295 2023-01-22 17:52:52.350644: step: 506/459, loss: 0.00017917223158292472 2023-01-22 17:52:53.002368: step: 508/459, loss: 0.004109605215489864 2023-01-22 17:52:53.567041: step: 510/459, loss: 0.0216212198138237 2023-01-22 17:52:54.212309: step: 512/459, loss: 0.034125201404094696 2023-01-22 17:52:54.926580: step: 514/459, loss: 0.017042765393853188 2023-01-22 17:52:55.554681: step: 516/459, loss: 0.015794361010193825 2023-01-22 17:52:56.158955: step: 518/459, loss: 0.06557834893465042 2023-01-22 17:52:56.736844: step: 520/459, loss: 0.03783280402421951 2023-01-22 17:52:57.375991: step: 522/459, loss: 0.017981380224227905 2023-01-22 17:52:58.086156: step: 524/459, loss: 0.07274020463228226 2023-01-22 17:52:58.712792: step: 526/459, loss: 0.014853447675704956 2023-01-22 17:52:59.248971: step: 528/459, loss: 1.0548157691955566 2023-01-22 17:52:59.832188: step: 530/459, loss: 0.022586600854992867 2023-01-22 17:53:00.411280: step: 532/459, loss: 0.00606783851981163 2023-01-22 17:53:01.053675: step: 534/459, loss: 0.04482012614607811 2023-01-22 17:53:01.663250: step: 536/459, loss: 0.005236268974840641 2023-01-22 17:53:02.270287: step: 538/459, loss: 0.022830110043287277 2023-01-22 17:53:02.883008: step: 540/459, loss: 0.3820991814136505 2023-01-22 17:53:03.445759: step: 542/459, loss: 0.016744408756494522 2023-01-22 17:53:04.004291: step: 544/459, loss: 0.017917416989803314 2023-01-22 17:53:04.621505: step: 546/459, loss: 0.40536558628082275 2023-01-22 17:53:05.239777: step: 548/459, loss: 0.011955685913562775 2023-01-22 17:53:05.853609: step: 550/459, loss: 0.011089346371591091 2023-01-22 17:53:06.458754: step: 552/459, loss: 0.05182916298508644 2023-01-22 17:53:07.147055: step: 554/459, loss: 0.005498868878930807 2023-01-22 17:53:07.718134: step: 556/459, loss: 0.001339538604952395 2023-01-22 17:53:08.345937: step: 558/459, loss: 0.22435371577739716 2023-01-22 17:53:08.994039: step: 560/459, loss: 0.002568336669355631 2023-01-22 17:53:09.625542: step: 562/459, loss: 0.04764203354716301 2023-01-22 17:53:10.298565: step: 564/459, loss: 0.03131382539868355 2023-01-22 17:53:10.959639: step: 566/459, loss: 0.013241435401141644 2023-01-22 17:53:11.607632: step: 568/459, loss: 0.01569027081131935 2023-01-22 17:53:12.236202: step: 570/459, loss: 0.07460205256938934 2023-01-22 17:53:12.900734: step: 572/459, loss: 0.03660262003540993 2023-01-22 17:53:13.529459: step: 574/459, loss: 0.011534315533936024 2023-01-22 17:53:14.119319: step: 576/459, loss: 0.004238510504364967 2023-01-22 17:53:14.741837: step: 578/459, loss: 0.009915018454194069 2023-01-22 17:53:15.361206: step: 580/459, loss: 0.003667992539703846 2023-01-22 17:53:16.055364: step: 582/459, loss: 0.015749391168355942 2023-01-22 17:53:16.716822: step: 584/459, loss: 0.029031582176685333 2023-01-22 17:53:17.439194: step: 586/459, loss: 0.007842348888516426 2023-01-22 17:53:18.041612: step: 588/459, loss: 0.13776002824306488 2023-01-22 17:53:18.680555: step: 590/459, loss: 0.011314480565488338 2023-01-22 17:53:19.336234: step: 592/459, loss: 0.007745593320578337 2023-01-22 17:53:19.926810: step: 594/459, loss: 0.022692358121275902 2023-01-22 17:53:20.508596: step: 596/459, loss: 0.02020348608493805 2023-01-22 17:53:21.083970: step: 598/459, loss: 0.0018270120490342379 2023-01-22 17:53:21.667970: step: 600/459, loss: 0.048009250313043594 2023-01-22 17:53:22.312465: step: 602/459, loss: 0.013464498333632946 2023-01-22 17:53:22.956693: step: 604/459, loss: 0.01009698398411274 2023-01-22 17:53:23.571228: step: 606/459, loss: 0.017389100044965744 2023-01-22 17:53:24.128650: step: 608/459, loss: 0.025212448090314865 2023-01-22 17:53:24.787046: step: 610/459, loss: 0.2687627375125885 2023-01-22 17:53:25.369185: step: 612/459, loss: 0.06852442771196365 2023-01-22 17:53:25.953999: step: 614/459, loss: 0.008279979228973389 2023-01-22 17:53:26.562830: step: 616/459, loss: 0.00034404723555780947 2023-01-22 17:53:27.201279: step: 618/459, loss: 0.016248345375061035 2023-01-22 17:53:27.853207: step: 620/459, loss: 0.013861670158803463 2023-01-22 17:53:28.464444: step: 622/459, loss: 0.052149076014757156 2023-01-22 17:53:29.062146: step: 624/459, loss: 0.024811483919620514 2023-01-22 17:53:29.632832: step: 626/459, loss: 0.028002170845866203 2023-01-22 17:53:30.275246: step: 628/459, loss: 0.042968183755874634 2023-01-22 17:53:30.884061: step: 630/459, loss: 0.00025976187316700816 2023-01-22 17:53:31.485196: step: 632/459, loss: 0.998838484287262 2023-01-22 17:53:32.073057: step: 634/459, loss: 0.008922360837459564 2023-01-22 17:53:32.658076: step: 636/459, loss: 0.09696422517299652 2023-01-22 17:53:33.322263: step: 638/459, loss: 0.03464225307106972 2023-01-22 17:53:33.963625: step: 640/459, loss: 0.12437614798545837 2023-01-22 17:53:34.613859: step: 642/459, loss: 0.03452873229980469 2023-01-22 17:53:35.221355: step: 644/459, loss: 0.0035033440217375755 2023-01-22 17:53:35.835693: step: 646/459, loss: 0.09482638537883759 2023-01-22 17:53:36.472215: step: 648/459, loss: 0.005336654372513294 2023-01-22 17:53:37.107212: step: 650/459, loss: 0.004811717662960291 2023-01-22 17:53:37.764904: step: 652/459, loss: 0.017512938007712364 2023-01-22 17:53:38.386226: step: 654/459, loss: 0.0001908241683850065 2023-01-22 17:53:39.009390: step: 656/459, loss: 0.03200798109173775 2023-01-22 17:53:39.699152: step: 658/459, loss: 0.0012799898395314813 2023-01-22 17:53:40.277929: step: 660/459, loss: 0.0008137557888403535 2023-01-22 17:53:40.911141: step: 662/459, loss: 0.01522554736584425 2023-01-22 17:53:41.539019: step: 664/459, loss: 0.026821764186024666 2023-01-22 17:53:42.181446: step: 666/459, loss: 0.010329365730285645 2023-01-22 17:53:42.774565: step: 668/459, loss: 0.06518066674470901 2023-01-22 17:53:43.443180: step: 670/459, loss: 0.04673764854669571 2023-01-22 17:53:44.032117: step: 672/459, loss: 0.01805119961500168 2023-01-22 17:53:44.648629: step: 674/459, loss: 0.007177308667451143 2023-01-22 17:53:45.305166: step: 676/459, loss: 0.049789056181907654 2023-01-22 17:53:45.947611: step: 678/459, loss: 0.013861939311027527 2023-01-22 17:53:46.611317: step: 680/459, loss: 0.20416097342967987 2023-01-22 17:53:47.219727: step: 682/459, loss: 0.013752831146121025 2023-01-22 17:53:47.816595: step: 684/459, loss: 0.00055104517377913 2023-01-22 17:53:48.374466: step: 686/459, loss: 0.06678423285484314 2023-01-22 17:53:49.116029: step: 688/459, loss: 0.015636947005987167 2023-01-22 17:53:49.811338: step: 690/459, loss: 0.005245404317975044 2023-01-22 17:53:50.457791: step: 692/459, loss: 0.03633078560233116 2023-01-22 17:53:51.068382: step: 694/459, loss: 0.02461899444460869 2023-01-22 17:53:51.656677: step: 696/459, loss: 0.04218187928199768 2023-01-22 17:53:52.243256: step: 698/459, loss: 0.030974891036748886 2023-01-22 17:53:52.831793: step: 700/459, loss: 0.0029584537260234356 2023-01-22 17:53:53.456615: step: 702/459, loss: 0.032843396067619324 2023-01-22 17:53:54.081222: step: 704/459, loss: 0.008197687566280365 2023-01-22 17:53:54.719118: step: 706/459, loss: 0.04038338363170624 2023-01-22 17:53:55.293362: step: 708/459, loss: 0.009391425177454948 2023-01-22 17:53:55.876438: step: 710/459, loss: 0.0051580192521214485 2023-01-22 17:53:56.475652: step: 712/459, loss: 0.003247020998969674 2023-01-22 17:53:57.078380: step: 714/459, loss: 0.40431463718414307 2023-01-22 17:53:57.689578: step: 716/459, loss: 0.013380136340856552 2023-01-22 17:53:58.343703: step: 718/459, loss: 0.00141344522126019 2023-01-22 17:53:58.937196: step: 720/459, loss: 0.12437529861927032 2023-01-22 17:53:59.578381: step: 722/459, loss: 0.15506799519062042 2023-01-22 17:54:00.219235: step: 724/459, loss: 0.03652463108301163 2023-01-22 17:54:00.824308: step: 726/459, loss: 0.0116523252800107 2023-01-22 17:54:01.445147: step: 728/459, loss: 0.0011880947276949883 2023-01-22 17:54:02.038251: step: 730/459, loss: 0.010874259285628796 2023-01-22 17:54:02.630072: step: 732/459, loss: 0.003364544128999114 2023-01-22 17:54:03.263917: step: 734/459, loss: 0.04069482535123825 2023-01-22 17:54:03.905410: step: 736/459, loss: 0.4540940821170807 2023-01-22 17:54:04.467085: step: 738/459, loss: 0.0029874886386096478 2023-01-22 17:54:05.035934: step: 740/459, loss: 0.0007233611540868878 2023-01-22 17:54:05.638027: step: 742/459, loss: 0.4043594002723694 2023-01-22 17:54:06.210534: step: 744/459, loss: 0.0033944384194910526 2023-01-22 17:54:06.833775: step: 746/459, loss: 0.010725623928010464 2023-01-22 17:54:07.439917: step: 748/459, loss: 0.028864065185189247 2023-01-22 17:54:08.072933: step: 750/459, loss: 0.2869672477245331 2023-01-22 17:54:08.647156: step: 752/459, loss: 0.0027520591393113136 2023-01-22 17:54:09.183850: step: 754/459, loss: 0.009986941702663898 2023-01-22 17:54:09.781410: step: 756/459, loss: 0.026975994929671288 2023-01-22 17:54:10.399404: step: 758/459, loss: 0.06463133543729782 2023-01-22 17:54:10.949014: step: 760/459, loss: 0.06619944423437119 2023-01-22 17:54:11.584441: step: 762/459, loss: 0.017195194959640503 2023-01-22 17:54:12.169394: step: 764/459, loss: 0.08432790637016296 2023-01-22 17:54:12.859974: step: 766/459, loss: 0.03046559914946556 2023-01-22 17:54:13.508062: step: 768/459, loss: 0.04024222865700722 2023-01-22 17:54:14.051836: step: 770/459, loss: 0.012401219457387924 2023-01-22 17:54:14.605928: step: 772/459, loss: 0.000431227294029668 2023-01-22 17:54:15.144346: step: 774/459, loss: 0.0017664716579020023 2023-01-22 17:54:15.847297: step: 776/459, loss: 0.04535061866044998 2023-01-22 17:54:16.481384: step: 778/459, loss: 0.6193322539329529 2023-01-22 17:54:17.195112: step: 780/459, loss: 0.0036046206951141357 2023-01-22 17:54:17.770199: step: 782/459, loss: 0.03738383576273918 2023-01-22 17:54:18.361917: step: 784/459, loss: 0.05519206076860428 2023-01-22 17:54:18.976625: step: 786/459, loss: 0.019211390987038612 2023-01-22 17:54:19.613181: step: 788/459, loss: 0.09702827036380768 2023-01-22 17:54:20.212459: step: 790/459, loss: 1.3568575382232666 2023-01-22 17:54:20.839578: step: 792/459, loss: 0.052546314895153046 2023-01-22 17:54:21.456172: step: 794/459, loss: 0.21985237300395966 2023-01-22 17:54:22.058443: step: 796/459, loss: 0.019692828878760338 2023-01-22 17:54:22.688025: step: 798/459, loss: 0.011581703089177608 2023-01-22 17:54:23.347636: step: 800/459, loss: 0.026210656389594078 2023-01-22 17:54:23.967154: step: 802/459, loss: 0.0006143710925243795 2023-01-22 17:54:24.534124: step: 804/459, loss: 0.020162807777523994 2023-01-22 17:54:25.181730: step: 806/459, loss: 0.376101553440094 2023-01-22 17:54:25.761689: step: 808/459, loss: 0.029508575797080994 2023-01-22 17:54:26.286960: step: 810/459, loss: 0.08182810992002487 2023-01-22 17:54:26.933668: step: 812/459, loss: 0.19281324744224548 2023-01-22 17:54:27.601992: step: 814/459, loss: 0.0017768770921975374 2023-01-22 17:54:28.223492: step: 816/459, loss: 0.015965241938829422 2023-01-22 17:54:28.828180: step: 818/459, loss: 0.00940035842359066 2023-01-22 17:54:29.472430: step: 820/459, loss: 0.005889589432626963 2023-01-22 17:54:30.139252: step: 822/459, loss: 0.02764943614602089 2023-01-22 17:54:30.737395: step: 824/459, loss: 0.10249798744916916 2023-01-22 17:54:31.356953: step: 826/459, loss: 0.3175942003726959 2023-01-22 17:54:31.988700: step: 828/459, loss: 0.004580629989504814 2023-01-22 17:54:32.561255: step: 830/459, loss: 0.031698957085609436 2023-01-22 17:54:33.199566: step: 832/459, loss: 0.5364214777946472 2023-01-22 17:54:33.766251: step: 834/459, loss: 0.005095324013382196 2023-01-22 17:54:34.337970: step: 836/459, loss: 0.005257736425846815 2023-01-22 17:54:35.004836: step: 838/459, loss: 0.007961424998939037 2023-01-22 17:54:35.609009: step: 840/459, loss: 0.011277742683887482 2023-01-22 17:54:36.225194: step: 842/459, loss: 0.007714628241956234 2023-01-22 17:54:36.905275: step: 844/459, loss: 0.0322132408618927 2023-01-22 17:54:37.663087: step: 846/459, loss: 0.007852588780224323 2023-01-22 17:54:38.238899: step: 848/459, loss: 0.04088585451245308 2023-01-22 17:54:38.820377: step: 850/459, loss: 0.06182702258229256 2023-01-22 17:54:39.453362: step: 852/459, loss: 0.039107996970415115 2023-01-22 17:54:40.113130: step: 854/459, loss: 0.007276168093085289 2023-01-22 17:54:40.698850: step: 856/459, loss: 0.058183472603559494 2023-01-22 17:54:41.251468: step: 858/459, loss: 0.0028085410594940186 2023-01-22 17:54:41.856706: step: 860/459, loss: 0.0007765153422951698 2023-01-22 17:54:42.428129: step: 862/459, loss: 0.009314765222370625 2023-01-22 17:54:43.117325: step: 864/459, loss: 0.0009922339813783765 2023-01-22 17:54:43.818389: step: 866/459, loss: 0.011936710216104984 2023-01-22 17:54:44.462387: step: 868/459, loss: 0.021143410354852676 2023-01-22 17:54:45.045547: step: 870/459, loss: 0.020123818889260292 2023-01-22 17:54:45.636023: step: 872/459, loss: 0.048482973128557205 2023-01-22 17:54:46.280096: step: 874/459, loss: 0.21939843893051147 2023-01-22 17:54:46.917136: step: 876/459, loss: 0.23864196240901947 2023-01-22 17:54:47.526231: step: 878/459, loss: 0.0015059307916089892 2023-01-22 17:54:48.093260: step: 880/459, loss: 0.0005756149766966701 2023-01-22 17:54:48.714922: step: 882/459, loss: 0.15634377300739288 2023-01-22 17:54:49.366117: step: 884/459, loss: 0.03005383536219597 2023-01-22 17:54:50.087896: step: 886/459, loss: 0.04370687156915665 2023-01-22 17:54:50.686498: step: 888/459, loss: 0.020334968343377113 2023-01-22 17:54:51.302931: step: 890/459, loss: 0.05364178121089935 2023-01-22 17:54:51.938645: step: 892/459, loss: 0.029651861637830734 2023-01-22 17:54:52.574365: step: 894/459, loss: 0.05204910412430763 2023-01-22 17:54:53.152222: step: 896/459, loss: 0.0005118152475915849 2023-01-22 17:54:53.838294: step: 898/459, loss: 0.22540368139743805 2023-01-22 17:54:54.502952: step: 900/459, loss: 0.03290703520178795 2023-01-22 17:54:55.150195: step: 902/459, loss: 0.12195084244012833 2023-01-22 17:54:55.768457: step: 904/459, loss: 0.02915540337562561 2023-01-22 17:54:56.409488: step: 906/459, loss: 0.008719928562641144 2023-01-22 17:54:57.021502: step: 908/459, loss: 0.03201425448060036 2023-01-22 17:54:57.680903: step: 910/459, loss: 0.01940903067588806 2023-01-22 17:54:58.304815: step: 912/459, loss: 0.03932953625917435 2023-01-22 17:54:58.921020: step: 914/459, loss: 0.012671761214733124 2023-01-22 17:54:59.607865: step: 916/459, loss: 0.002777149435132742 2023-01-22 17:55:00.247127: step: 918/459, loss: 0.49226972460746765 2023-01-22 17:55:00.714445: step: 920/459, loss: 0.00011090601765317842 ================================================== Loss: 0.085 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2875845559363176, 'r': 0.332332057998515, 'f1': 0.3083433002908757}, 'combined': 0.22720032653011893, 'epoch': 27} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31493362898821015, 'r': 0.3258032728588296, 'f1': 0.32027625305140306}, 'combined': 0.2049768019528979, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2880498546660849, 'r': 0.3366958453022928, 'f1': 0.3104789334633566}, 'combined': 0.22877395097299957, 'epoch': 27} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.32760248210845166, 'r': 0.33266542955921863, 'f1': 0.33011454444306765}, 'combined': 0.21127330844356326, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3105603849035355, 'r': 0.3394360184144904, 'f1': 0.3243568117940824}, 'combined': 0.23899975605879753, 'epoch': 27} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.32439698175795506, 'r': 0.3441736858323709, 'f1': 0.3339928306664685}, 'combined': 0.23946655783633594, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23270440251572325, 'r': 0.35238095238095235, 'f1': 0.2803030303030303}, 'combined': 0.18686868686868685, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.21323529411764705, 'r': 0.31521739130434784, 'f1': 0.2543859649122807}, 'combined': 0.12719298245614036, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 17:57:37.127950: step: 2/459, loss: 0.05941145867109299 2023-01-22 17:57:37.731715: step: 4/459, loss: 0.3108316957950592 2023-01-22 17:57:38.365244: step: 6/459, loss: 0.0012117669684812427 2023-01-22 17:57:38.963878: step: 8/459, loss: 0.01650828681886196 2023-01-22 17:57:39.581209: step: 10/459, loss: 0.01861696131527424 2023-01-22 17:57:40.190882: step: 12/459, loss: 0.014489592984318733 2023-01-22 17:57:40.833507: step: 14/459, loss: 0.00851240660995245 2023-01-22 17:57:41.452152: step: 16/459, loss: 0.02217547781765461 2023-01-22 17:57:42.098163: step: 18/459, loss: 0.003222203813493252 2023-01-22 17:57:42.778174: step: 20/459, loss: 0.010330821387469769 2023-01-22 17:57:43.401429: step: 22/459, loss: 0.0036225265357643366 2023-01-22 17:57:44.037607: step: 24/459, loss: 0.14840340614318848 2023-01-22 17:57:44.668384: step: 26/459, loss: 0.04664261266589165 2023-01-22 17:57:45.326278: step: 28/459, loss: 0.00899600237607956 2023-01-22 17:57:46.065774: step: 30/459, loss: 0.01649501360952854 2023-01-22 17:57:46.699629: step: 32/459, loss: 0.0299787949770689 2023-01-22 17:57:47.322708: step: 34/459, loss: 0.11176665127277374 2023-01-22 17:57:47.888849: step: 36/459, loss: 0.018528882414102554 2023-01-22 17:57:48.546535: step: 38/459, loss: 0.00686380872502923 2023-01-22 17:57:49.186279: step: 40/459, loss: 0.024618694558739662 2023-01-22 17:57:49.794794: step: 42/459, loss: 0.013065200299024582 2023-01-22 17:57:50.338939: step: 44/459, loss: 0.03507450968027115 2023-01-22 17:57:50.928262: step: 46/459, loss: 0.01341476570814848 2023-01-22 17:57:51.538496: step: 48/459, loss: 19.679689407348633 2023-01-22 17:57:52.131873: step: 50/459, loss: 0.029960349202156067 2023-01-22 17:57:52.786208: step: 52/459, loss: 0.007052543107420206 2023-01-22 17:57:53.417881: step: 54/459, loss: 0.0051376838237047195 2023-01-22 17:57:54.047203: step: 56/459, loss: 0.07997991889715195 2023-01-22 17:57:54.665365: step: 58/459, loss: 0.021123331040143967 2023-01-22 17:57:55.287589: step: 60/459, loss: 0.01774473302066326 2023-01-22 17:57:55.907478: step: 62/459, loss: 0.018804721534252167 2023-01-22 17:57:56.534806: step: 64/459, loss: 0.03568895161151886 2023-01-22 17:57:57.145264: step: 66/459, loss: 0.012593463994562626 2023-01-22 17:57:57.794464: step: 68/459, loss: 0.0015226148534566164 2023-01-22 17:57:58.477733: step: 70/459, loss: 0.026785096153616905 2023-01-22 17:57:59.084667: step: 72/459, loss: 0.035806868225336075 2023-01-22 17:57:59.767723: step: 74/459, loss: 0.013290898874402046 2023-01-22 17:58:00.306450: step: 76/459, loss: 0.007704571820795536 2023-01-22 17:58:00.903623: step: 78/459, loss: 0.0623183473944664 2023-01-22 17:58:01.547015: step: 80/459, loss: 0.01886957511305809 2023-01-22 17:58:02.135000: step: 82/459, loss: 0.014950178563594818 2023-01-22 17:58:02.802834: step: 84/459, loss: 0.2628275156021118 2023-01-22 17:58:03.376994: step: 86/459, loss: 0.04347718134522438 2023-01-22 17:58:03.967991: step: 88/459, loss: 0.007925002835690975 2023-01-22 17:58:04.610002: step: 90/459, loss: 0.06334137171506882 2023-01-22 17:58:05.222114: step: 92/459, loss: 0.002868188312277198 2023-01-22 17:58:05.861273: step: 94/459, loss: 0.024867307394742966 2023-01-22 17:58:06.451942: step: 96/459, loss: 0.01487242616713047 2023-01-22 17:58:07.114815: step: 98/459, loss: 0.038512829691171646 2023-01-22 17:58:07.735775: step: 100/459, loss: 0.0192011259496212 2023-01-22 17:58:08.399086: step: 102/459, loss: 0.0003619556373450905 2023-01-22 17:58:09.085673: step: 104/459, loss: 0.033639680594205856 2023-01-22 17:58:09.682078: step: 106/459, loss: 0.053204406052827835 2023-01-22 17:58:10.329755: step: 108/459, loss: 0.003586823819205165 2023-01-22 17:58:10.876604: step: 110/459, loss: 0.00018499493307899684 2023-01-22 17:58:11.467130: step: 112/459, loss: 0.007049934938549995 2023-01-22 17:58:12.054504: step: 114/459, loss: 0.03912494704127312 2023-01-22 17:58:12.639522: step: 116/459, loss: 0.02387193776667118 2023-01-22 17:58:13.376046: step: 118/459, loss: 0.01630781777203083 2023-01-22 17:58:13.940665: step: 120/459, loss: 0.017884759232401848 2023-01-22 17:58:14.563052: step: 122/459, loss: 0.10711070895195007 2023-01-22 17:58:15.165497: step: 124/459, loss: 0.0011218404397368431 2023-01-22 17:58:15.776067: step: 126/459, loss: 0.04745154082775116 2023-01-22 17:58:16.407459: step: 128/459, loss: 0.007678241468966007 2023-01-22 17:58:16.988884: step: 130/459, loss: 0.0002696895389817655 2023-01-22 17:58:17.527774: step: 132/459, loss: 0.0009508781949989498 2023-01-22 17:58:18.105055: step: 134/459, loss: 0.03761230409145355 2023-01-22 17:58:18.724196: step: 136/459, loss: 0.003964663948863745 2023-01-22 17:58:19.292416: step: 138/459, loss: 0.010352225042879581 2023-01-22 17:58:19.947317: step: 140/459, loss: 0.013466878794133663 2023-01-22 17:58:20.575346: step: 142/459, loss: 0.02094256691634655 2023-01-22 17:58:21.174355: step: 144/459, loss: 0.002302338369190693 2023-01-22 17:58:21.806213: step: 146/459, loss: 0.3109467029571533 2023-01-22 17:58:22.460906: step: 148/459, loss: 0.0018064514733850956 2023-01-22 17:58:23.075349: step: 150/459, loss: 0.5624876022338867 2023-01-22 17:58:23.713173: step: 152/459, loss: 0.01632050797343254 2023-01-22 17:58:24.377361: step: 154/459, loss: 0.010674025863409042 2023-01-22 17:58:24.984857: step: 156/459, loss: 0.00631437823176384 2023-01-22 17:58:25.618035: step: 158/459, loss: 2.6513497829437256 2023-01-22 17:58:26.302025: step: 160/459, loss: 0.0013768289936706424 2023-01-22 17:58:26.944986: step: 162/459, loss: 0.0013707326725125313 2023-01-22 17:58:27.564560: step: 164/459, loss: 0.03579749912023544 2023-01-22 17:58:28.205298: step: 166/459, loss: 0.024504566565155983 2023-01-22 17:58:28.876656: step: 168/459, loss: 0.0069246068596839905 2023-01-22 17:58:29.472856: step: 170/459, loss: 0.004520331043750048 2023-01-22 17:58:30.097539: step: 172/459, loss: 0.02847166545689106 2023-01-22 17:58:30.745454: step: 174/459, loss: 0.07268811017274857 2023-01-22 17:58:31.328300: step: 176/459, loss: 0.11926604807376862 2023-01-22 17:58:31.941327: step: 178/459, loss: 1.296430230140686 2023-01-22 17:58:32.544887: step: 180/459, loss: 0.002065723529085517 2023-01-22 17:58:33.178719: step: 182/459, loss: 0.08210490643978119 2023-01-22 17:58:33.800081: step: 184/459, loss: 0.05082903802394867 2023-01-22 17:58:34.378195: step: 186/459, loss: 0.0334903858602047 2023-01-22 17:58:34.993115: step: 188/459, loss: 0.04440271481871605 2023-01-22 17:58:35.602154: step: 190/459, loss: 0.03550637513399124 2023-01-22 17:58:36.194609: step: 192/459, loss: 0.010683873668313026 2023-01-22 17:58:36.793407: step: 194/459, loss: 0.004009505733847618 2023-01-22 17:58:37.396730: step: 196/459, loss: 0.0012221512151882052 2023-01-22 17:58:37.974507: step: 198/459, loss: 0.01642214134335518 2023-01-22 17:58:38.609417: step: 200/459, loss: 0.023447467014193535 2023-01-22 17:58:39.216581: step: 202/459, loss: 0.006082043517380953 2023-01-22 17:58:39.780868: step: 204/459, loss: 0.012367188930511475 2023-01-22 17:58:40.423101: step: 206/459, loss: 0.013343884609639645 2023-01-22 17:58:40.980300: step: 208/459, loss: 0.01635829359292984 2023-01-22 17:58:41.609525: step: 210/459, loss: 0.0405820831656456 2023-01-22 17:58:42.184833: step: 212/459, loss: 0.23440426588058472 2023-01-22 17:58:42.804760: step: 214/459, loss: 0.006955968681722879 2023-01-22 17:58:43.452384: step: 216/459, loss: 1.1034609079360962 2023-01-22 17:58:44.141097: step: 218/459, loss: 0.06541580706834793 2023-01-22 17:58:44.775954: step: 220/459, loss: 0.0029013906605541706 2023-01-22 17:58:45.387368: step: 222/459, loss: 0.028185436502099037 2023-01-22 17:58:45.962576: step: 224/459, loss: 0.10377515107393265 2023-01-22 17:58:46.627115: step: 226/459, loss: 0.060113225132226944 2023-01-22 17:58:47.167653: step: 228/459, loss: 0.001665513846091926 2023-01-22 17:58:47.732690: step: 230/459, loss: 0.14440801739692688 2023-01-22 17:58:48.313938: step: 232/459, loss: 0.07103420794010162 2023-01-22 17:58:48.916195: step: 234/459, loss: 0.007640192285180092 2023-01-22 17:58:49.582810: step: 236/459, loss: 0.005103878676891327 2023-01-22 17:58:50.204785: step: 238/459, loss: 0.030739178881049156 2023-01-22 17:58:50.842856: step: 240/459, loss: 0.08021114021539688 2023-01-22 17:58:51.474056: step: 242/459, loss: 0.0016296525718644261 2023-01-22 17:58:52.081137: step: 244/459, loss: 0.04656157270073891 2023-01-22 17:58:52.666796: step: 246/459, loss: 0.008917789906263351 2023-01-22 17:58:53.281272: step: 248/459, loss: 0.03076837584376335 2023-01-22 17:58:53.890571: step: 250/459, loss: 0.005306843668222427 2023-01-22 17:58:54.535776: step: 252/459, loss: 0.015764828771352768 2023-01-22 17:58:55.107967: step: 254/459, loss: 0.04200305789709091 2023-01-22 17:58:55.770026: step: 256/459, loss: 0.02685621567070484 2023-01-22 17:58:56.374867: step: 258/459, loss: 0.3682519495487213 2023-01-22 17:58:56.956763: step: 260/459, loss: 0.005189188756048679 2023-01-22 17:58:57.591530: step: 262/459, loss: 0.03016764298081398 2023-01-22 17:58:58.151023: step: 264/459, loss: 0.0012459148420020938 2023-01-22 17:58:58.793033: step: 266/459, loss: 0.01891731470823288 2023-01-22 17:58:59.402739: step: 268/459, loss: 0.018758926540613174 2023-01-22 17:59:00.005140: step: 270/459, loss: 0.08259087800979614 2023-01-22 17:59:00.666210: step: 272/459, loss: 0.02647973783314228 2023-01-22 17:59:01.263676: step: 274/459, loss: 0.08994212001562119 2023-01-22 17:59:01.895934: step: 276/459, loss: 0.005924654193222523 2023-01-22 17:59:02.518512: step: 278/459, loss: 0.01356319896876812 2023-01-22 17:59:03.095746: step: 280/459, loss: 0.004370101261883974 2023-01-22 17:59:03.721728: step: 282/459, loss: 0.0022065136581659317 2023-01-22 17:59:04.328696: step: 284/459, loss: 0.09375505149364471 2023-01-22 17:59:04.919633: step: 286/459, loss: 0.08628882467746735 2023-01-22 17:59:05.585613: step: 288/459, loss: 0.010583351366221905 2023-01-22 17:59:06.207671: step: 290/459, loss: 0.10336955636739731 2023-01-22 17:59:06.832778: step: 292/459, loss: 0.0002978017146233469 2023-01-22 17:59:07.448453: step: 294/459, loss: 0.4375552535057068 2023-01-22 17:59:08.136401: step: 296/459, loss: 0.04078018292784691 2023-01-22 17:59:08.703999: step: 298/459, loss: 0.10267192125320435 2023-01-22 17:59:09.276179: step: 300/459, loss: 0.12996084988117218 2023-01-22 17:59:09.872984: step: 302/459, loss: 0.024798313155770302 2023-01-22 17:59:10.504276: step: 304/459, loss: 0.005599912256002426 2023-01-22 17:59:11.072482: step: 306/459, loss: 0.015479405410587788 2023-01-22 17:59:11.684205: step: 308/459, loss: 0.017914168536663055 2023-01-22 17:59:12.322471: step: 310/459, loss: 0.038924962282180786 2023-01-22 17:59:12.981969: step: 312/459, loss: 0.020186321809887886 2023-01-22 17:59:13.577279: step: 314/459, loss: 0.02663654275238514 2023-01-22 17:59:14.208440: step: 316/459, loss: 0.006189205218106508 2023-01-22 17:59:14.827320: step: 318/459, loss: 0.008552856743335724 2023-01-22 17:59:15.487693: step: 320/459, loss: 0.009978863410651684 2023-01-22 17:59:16.113419: step: 322/459, loss: 0.06043631583452225 2023-01-22 17:59:16.714192: step: 324/459, loss: 0.03629247099161148 2023-01-22 17:59:17.316351: step: 326/459, loss: 0.05501700937747955 2023-01-22 17:59:17.939323: step: 328/459, loss: 0.008449778892099857 2023-01-22 17:59:18.540583: step: 330/459, loss: 0.00018524671031627804 2023-01-22 17:59:19.174914: step: 332/459, loss: 0.00990121066570282 2023-01-22 17:59:19.787432: step: 334/459, loss: 0.043547432869672775 2023-01-22 17:59:20.364392: step: 336/459, loss: 0.05256111919879913 2023-01-22 17:59:20.974700: step: 338/459, loss: 0.05020645633339882 2023-01-22 17:59:21.568998: step: 340/459, loss: 0.5370038151741028 2023-01-22 17:59:22.209446: step: 342/459, loss: 0.007150703109800816 2023-01-22 17:59:22.833212: step: 344/459, loss: 0.0060127489268779755 2023-01-22 17:59:23.479174: step: 346/459, loss: 0.053062062710523605 2023-01-22 17:59:24.086443: step: 348/459, loss: 0.0049169231206178665 2023-01-22 17:59:24.679263: step: 350/459, loss: 0.021853335201740265 2023-01-22 17:59:25.323375: step: 352/459, loss: 0.0080977538600564 2023-01-22 17:59:25.903636: step: 354/459, loss: 0.17690831422805786 2023-01-22 17:59:26.539810: step: 356/459, loss: 0.020695827901363373 2023-01-22 17:59:27.192069: step: 358/459, loss: 0.1427389234304428 2023-01-22 17:59:27.807809: step: 360/459, loss: 0.00511224614456296 2023-01-22 17:59:28.433545: step: 362/459, loss: 0.031719472259283066 2023-01-22 17:59:29.068313: step: 364/459, loss: 0.025995833799242973 2023-01-22 17:59:29.708939: step: 366/459, loss: 0.010315609164536 2023-01-22 17:59:30.386945: step: 368/459, loss: 0.001977713080123067 2023-01-22 17:59:30.990415: step: 370/459, loss: 0.04872211068868637 2023-01-22 17:59:31.566787: step: 372/459, loss: 0.0164900254458189 2023-01-22 17:59:32.142997: step: 374/459, loss: 0.017171286046504974 2023-01-22 17:59:32.809431: step: 376/459, loss: 0.015457016415894032 2023-01-22 17:59:33.462685: step: 378/459, loss: 0.0012582119088619947 2023-01-22 17:59:34.054819: step: 380/459, loss: 0.004070607014000416 2023-01-22 17:59:34.640678: step: 382/459, loss: 0.0025239039678126574 2023-01-22 17:59:35.219516: step: 384/459, loss: 0.0058722770772874355 2023-01-22 17:59:35.848751: step: 386/459, loss: 0.014411987736821175 2023-01-22 17:59:36.471431: step: 388/459, loss: 0.009386653080582619 2023-01-22 17:59:37.112814: step: 390/459, loss: 0.0224471315741539 2023-01-22 17:59:37.725485: step: 392/459, loss: 0.010551105253398418 2023-01-22 17:59:38.338253: step: 394/459, loss: 0.0239273514598608 2023-01-22 17:59:38.966438: step: 396/459, loss: 0.049413010478019714 2023-01-22 17:59:39.585467: step: 398/459, loss: 0.012004304677248001 2023-01-22 17:59:40.145326: step: 400/459, loss: 0.03866003081202507 2023-01-22 17:59:40.758811: step: 402/459, loss: 0.033173199743032455 2023-01-22 17:59:41.364054: step: 404/459, loss: 0.009427748620510101 2023-01-22 17:59:42.060048: step: 406/459, loss: 0.028201915323734283 2023-01-22 17:59:42.657434: step: 408/459, loss: 0.003192598232999444 2023-01-22 17:59:43.251831: step: 410/459, loss: 0.006053059361875057 2023-01-22 17:59:43.875251: step: 412/459, loss: 0.014847071841359138 2023-01-22 17:59:44.518718: step: 414/459, loss: 0.007970471866428852 2023-01-22 17:59:45.123880: step: 416/459, loss: 0.010114775039255619 2023-01-22 17:59:45.678322: step: 418/459, loss: 0.009753336198627949 2023-01-22 17:59:46.296666: step: 420/459, loss: 0.0771464928984642 2023-01-22 17:59:46.912134: step: 422/459, loss: 0.039731577038764954 2023-01-22 17:59:47.561486: step: 424/459, loss: 0.04537440091371536 2023-01-22 17:59:48.171125: step: 426/459, loss: 0.01023095939308405 2023-01-22 17:59:48.883444: step: 428/459, loss: 0.0365230031311512 2023-01-22 17:59:49.548942: step: 430/459, loss: 0.04935387521982193 2023-01-22 17:59:50.200634: step: 432/459, loss: 0.021411152556538582 2023-01-22 17:59:50.854073: step: 434/459, loss: 0.0035403736401349306 2023-01-22 17:59:51.522471: step: 436/459, loss: 0.010479881428182125 2023-01-22 17:59:52.076157: step: 438/459, loss: 0.01881113462150097 2023-01-22 17:59:52.727133: step: 440/459, loss: 0.020072823390364647 2023-01-22 17:59:53.329714: step: 442/459, loss: 0.00899420864880085 2023-01-22 17:59:53.947207: step: 444/459, loss: 0.03680766373872757 2023-01-22 17:59:54.544237: step: 446/459, loss: 0.015366356819868088 2023-01-22 17:59:55.120152: step: 448/459, loss: 0.013936375267803669 2023-01-22 17:59:55.747295: step: 450/459, loss: 0.11351874470710754 2023-01-22 17:59:56.344763: step: 452/459, loss: 0.005694000516086817 2023-01-22 17:59:56.959889: step: 454/459, loss: 0.03160266950726509 2023-01-22 17:59:57.540207: step: 456/459, loss: 0.008086075074970722 2023-01-22 17:59:58.225185: step: 458/459, loss: 0.0010812137043103576 2023-01-22 17:59:58.824609: step: 460/459, loss: 0.10633828490972519 2023-01-22 17:59:59.426978: step: 462/459, loss: 0.007459621876478195 2023-01-22 18:00:00.118489: step: 464/459, loss: 0.0461590439081192 2023-01-22 18:00:00.711709: step: 466/459, loss: 0.09817032516002655 2023-01-22 18:00:01.334969: step: 468/459, loss: 0.09830009937286377 2023-01-22 18:00:01.940650: step: 470/459, loss: 0.11264216154813766 2023-01-22 18:00:02.556221: step: 472/459, loss: 0.0007968302234075963 2023-01-22 18:00:03.171267: step: 474/459, loss: 0.0284377783536911 2023-01-22 18:00:03.833770: step: 476/459, loss: 0.045863766223192215 2023-01-22 18:00:04.460625: step: 478/459, loss: 0.004170488566160202 2023-01-22 18:00:05.090031: step: 480/459, loss: 0.1537868231534958 2023-01-22 18:00:05.706311: step: 482/459, loss: 0.00987004954367876 2023-01-22 18:00:06.334676: step: 484/459, loss: 0.07848459482192993 2023-01-22 18:00:06.912734: step: 486/459, loss: 0.02983160875737667 2023-01-22 18:00:07.580682: step: 488/459, loss: 0.01956058479845524 2023-01-22 18:00:08.209281: step: 490/459, loss: 0.0037928700912743807 2023-01-22 18:00:08.852051: step: 492/459, loss: 0.065627820789814 2023-01-22 18:00:09.449693: step: 494/459, loss: 0.006119112484157085 2023-01-22 18:00:10.143355: step: 496/459, loss: 0.004895551595836878 2023-01-22 18:00:10.719128: step: 498/459, loss: 0.019466310739517212 2023-01-22 18:00:11.344741: step: 500/459, loss: 0.032974883913993835 2023-01-22 18:00:11.965043: step: 502/459, loss: 0.09846020489931107 2023-01-22 18:00:12.655306: step: 504/459, loss: 0.3592538833618164 2023-01-22 18:00:13.251337: step: 506/459, loss: 0.034909650683403015 2023-01-22 18:00:13.909275: step: 508/459, loss: 0.0027777093928307295 2023-01-22 18:00:14.507221: step: 510/459, loss: 0.05303372070193291 2023-01-22 18:00:15.127271: step: 512/459, loss: 0.045209456235170364 2023-01-22 18:00:15.712119: step: 514/459, loss: 0.054974451661109924 2023-01-22 18:00:16.305962: step: 516/459, loss: 0.008619450964033604 2023-01-22 18:00:16.898301: step: 518/459, loss: 0.2354678213596344 2023-01-22 18:00:17.503959: step: 520/459, loss: 0.0002734030713327229 2023-01-22 18:00:18.072179: step: 522/459, loss: 7.189263124018908e-05 2023-01-22 18:00:18.668448: step: 524/459, loss: 0.006592820398509502 2023-01-22 18:00:19.292232: step: 526/459, loss: 0.43884527683258057 2023-01-22 18:00:19.937882: step: 528/459, loss: 0.4706663489341736 2023-01-22 18:00:20.516598: step: 530/459, loss: 0.006357571575790644 2023-01-22 18:00:21.173164: step: 532/459, loss: 0.014036940410733223 2023-01-22 18:00:21.800294: step: 534/459, loss: 0.024900004267692566 2023-01-22 18:00:22.443307: step: 536/459, loss: 0.012492386624217033 2023-01-22 18:00:23.033321: step: 538/459, loss: 0.027318404987454414 2023-01-22 18:00:23.631492: step: 540/459, loss: 0.007986591197550297 2023-01-22 18:00:24.314705: step: 542/459, loss: 0.023935360834002495 2023-01-22 18:00:24.948946: step: 544/459, loss: 0.008712836541235447 2023-01-22 18:00:25.528228: step: 546/459, loss: 0.002024037064984441 2023-01-22 18:00:26.144903: step: 548/459, loss: 0.034460872411727905 2023-01-22 18:00:26.771492: step: 550/459, loss: 0.11655804514884949 2023-01-22 18:00:27.409336: step: 552/459, loss: 1.6026612520217896 2023-01-22 18:00:28.092964: step: 554/459, loss: 0.049416955560445786 2023-01-22 18:00:28.732800: step: 556/459, loss: 0.012379526160657406 2023-01-22 18:00:29.317923: step: 558/459, loss: 0.011431019753217697 2023-01-22 18:00:29.917650: step: 560/459, loss: 0.014487975277006626 2023-01-22 18:00:30.514983: step: 562/459, loss: 0.031724169850349426 2023-01-22 18:00:31.086246: step: 564/459, loss: 0.03165552392601967 2023-01-22 18:00:31.671300: step: 566/459, loss: 0.04493225738406181 2023-01-22 18:00:32.253623: step: 568/459, loss: 0.009479191154241562 2023-01-22 18:00:32.854821: step: 570/459, loss: 0.0006467282655648887 2023-01-22 18:00:33.473388: step: 572/459, loss: 0.05288791283965111 2023-01-22 18:00:34.072179: step: 574/459, loss: 0.0897417739033699 2023-01-22 18:00:34.641212: step: 576/459, loss: 0.08883938193321228 2023-01-22 18:00:35.265751: step: 578/459, loss: 0.022299136966466904 2023-01-22 18:00:35.938841: step: 580/459, loss: 0.023519640788435936 2023-01-22 18:00:36.551302: step: 582/459, loss: 0.0008086525485850871 2023-01-22 18:00:37.122024: step: 584/459, loss: 0.17986714839935303 2023-01-22 18:00:37.729611: step: 586/459, loss: 0.020425546914339066 2023-01-22 18:00:38.420483: step: 588/459, loss: 0.0507589690387249 2023-01-22 18:00:39.112144: step: 590/459, loss: 0.02224678546190262 2023-01-22 18:00:39.722288: step: 592/459, loss: 0.003676899243146181 2023-01-22 18:00:40.325249: step: 594/459, loss: 0.0025532462168484926 2023-01-22 18:00:40.925669: step: 596/459, loss: 0.02601931430399418 2023-01-22 18:00:41.557066: step: 598/459, loss: 0.009631331078708172 2023-01-22 18:00:42.181144: step: 600/459, loss: 0.0003276934730820358 2023-01-22 18:00:42.781374: step: 602/459, loss: 0.002729474799707532 2023-01-22 18:00:43.614182: step: 604/459, loss: 0.0504467748105526 2023-01-22 18:00:44.186532: step: 606/459, loss: 0.009413693100214005 2023-01-22 18:00:44.759985: step: 608/459, loss: 0.007487486582249403 2023-01-22 18:00:45.387761: step: 610/459, loss: 0.02089555375277996 2023-01-22 18:00:46.041625: step: 612/459, loss: 0.0632053017616272 2023-01-22 18:00:46.569216: step: 614/459, loss: 0.028720544651150703 2023-01-22 18:00:47.214838: step: 616/459, loss: 0.0335274413228035 2023-01-22 18:00:47.866027: step: 618/459, loss: 0.012079049833118916 2023-01-22 18:00:48.476221: step: 620/459, loss: 0.010176225565373898 2023-01-22 18:00:49.144552: step: 622/459, loss: 0.04303912818431854 2023-01-22 18:00:49.770754: step: 624/459, loss: 0.034899476915597916 2023-01-22 18:00:50.372993: step: 626/459, loss: 0.011562592349946499 2023-01-22 18:00:50.973716: step: 628/459, loss: 0.010302478447556496 2023-01-22 18:00:51.593948: step: 630/459, loss: 0.031484492123126984 2023-01-22 18:00:52.262404: step: 632/459, loss: 0.00872462335973978 2023-01-22 18:00:52.826850: step: 634/459, loss: 0.0012763499980792403 2023-01-22 18:00:53.398396: step: 636/459, loss: 0.008533569984138012 2023-01-22 18:00:53.966650: step: 638/459, loss: 0.0012740965466946363 2023-01-22 18:00:54.527375: step: 640/459, loss: 0.0020529988687485456 2023-01-22 18:00:55.152422: step: 642/459, loss: 0.0011606015032157302 2023-01-22 18:00:55.760142: step: 644/459, loss: 0.05729849636554718 2023-01-22 18:00:56.335780: step: 646/459, loss: 0.00036670558620244265 2023-01-22 18:00:57.008763: step: 648/459, loss: 0.018963007256388664 2023-01-22 18:00:57.637567: step: 650/459, loss: 0.0019380050944164395 2023-01-22 18:00:58.304780: step: 652/459, loss: 0.0017830454744398594 2023-01-22 18:00:58.991067: step: 654/459, loss: 0.015319264493882656 2023-01-22 18:00:59.595052: step: 656/459, loss: 0.00637629721313715 2023-01-22 18:01:00.236630: step: 658/459, loss: 0.011996494606137276 2023-01-22 18:01:00.808035: step: 660/459, loss: 0.0026402282528579235 2023-01-22 18:01:01.358848: step: 662/459, loss: 0.033331044018268585 2023-01-22 18:01:01.981466: step: 664/459, loss: 0.04122241586446762 2023-01-22 18:01:02.613742: step: 666/459, loss: 0.05417530983686447 2023-01-22 18:01:03.265057: step: 668/459, loss: 0.7110795378684998 2023-01-22 18:01:03.871642: step: 670/459, loss: 0.0032479162327945232 2023-01-22 18:01:04.588819: step: 672/459, loss: 0.022539736703038216 2023-01-22 18:01:05.173950: step: 674/459, loss: 0.0007265821914188564 2023-01-22 18:01:05.867426: step: 676/459, loss: 0.03753208741545677 2023-01-22 18:01:06.560452: step: 678/459, loss: 0.28915441036224365 2023-01-22 18:01:07.123551: step: 680/459, loss: 0.010008925572037697 2023-01-22 18:01:07.758825: step: 682/459, loss: 0.009016115218400955 2023-01-22 18:01:08.419787: step: 684/459, loss: 0.020173542201519012 2023-01-22 18:01:08.979240: step: 686/459, loss: 0.0035502833779901266 2023-01-22 18:01:09.564080: step: 688/459, loss: 0.02214115485548973 2023-01-22 18:01:10.222211: step: 690/459, loss: 0.014559772796928883 2023-01-22 18:01:10.914166: step: 692/459, loss: 0.003827260807156563 2023-01-22 18:01:11.545933: step: 694/459, loss: 0.031115544959902763 2023-01-22 18:01:12.193355: step: 696/459, loss: 0.01664723828434944 2023-01-22 18:01:12.796815: step: 698/459, loss: 0.0030709202401340008 2023-01-22 18:01:13.460443: step: 700/459, loss: 0.021923886612057686 2023-01-22 18:01:14.090369: step: 702/459, loss: 0.00576819758862257 2023-01-22 18:01:14.784061: step: 704/459, loss: 0.06521624326705933 2023-01-22 18:01:15.449773: step: 706/459, loss: 0.015303825959563255 2023-01-22 18:01:16.035737: step: 708/459, loss: 0.03968990594148636 2023-01-22 18:01:16.635925: step: 710/459, loss: 0.012354903854429722 2023-01-22 18:01:17.268457: step: 712/459, loss: 0.015093996189534664 2023-01-22 18:01:17.890490: step: 714/459, loss: 0.03290937468409538 2023-01-22 18:01:18.476526: step: 716/459, loss: 0.0034538765903562307 2023-01-22 18:01:19.127584: step: 718/459, loss: 0.010222896933555603 2023-01-22 18:01:19.812979: step: 720/459, loss: 0.06758192181587219 2023-01-22 18:01:20.417996: step: 722/459, loss: 0.01182390097528696 2023-01-22 18:01:21.098935: step: 724/459, loss: 0.10681073367595673 2023-01-22 18:01:21.765585: step: 726/459, loss: 0.0033303177915513515 2023-01-22 18:01:22.378436: step: 728/459, loss: 0.0033493144437670708 2023-01-22 18:01:22.955323: step: 730/459, loss: 0.02835802175104618 2023-01-22 18:01:23.589037: step: 732/459, loss: 0.0066292667761445045 2023-01-22 18:01:24.236982: step: 734/459, loss: 0.014458037912845612 2023-01-22 18:01:24.820099: step: 736/459, loss: 0.025379691272974014 2023-01-22 18:01:25.412467: step: 738/459, loss: 0.04083295166492462 2023-01-22 18:01:26.029599: step: 740/459, loss: 0.043660182505846024 2023-01-22 18:01:26.643244: step: 742/459, loss: 0.00542782386764884 2023-01-22 18:01:27.242489: step: 744/459, loss: 0.0066055539064109325 2023-01-22 18:01:27.827441: step: 746/459, loss: 0.0011056537041440606 2023-01-22 18:01:28.434611: step: 748/459, loss: 0.012917914427816868 2023-01-22 18:01:29.030913: step: 750/459, loss: 0.039421193301677704 2023-01-22 18:01:29.710097: step: 752/459, loss: 0.4240401089191437 2023-01-22 18:01:30.293166: step: 754/459, loss: 0.26906484365463257 2023-01-22 18:01:30.883150: step: 756/459, loss: 0.03583712503314018 2023-01-22 18:01:31.506644: step: 758/459, loss: 0.00013370240048971027 2023-01-22 18:01:32.142761: step: 760/459, loss: 0.017914216965436935 2023-01-22 18:01:32.725969: step: 762/459, loss: 0.010735241696238518 2023-01-22 18:01:33.404404: step: 764/459, loss: 0.06860155612230301 2023-01-22 18:01:34.118063: step: 766/459, loss: 0.04328359290957451 2023-01-22 18:01:34.784475: step: 768/459, loss: 0.03425132855772972 2023-01-22 18:01:35.414045: step: 770/459, loss: 0.037463631480932236 2023-01-22 18:01:35.995993: step: 772/459, loss: 0.04248349368572235 2023-01-22 18:01:36.646407: step: 774/459, loss: 0.0023926955182105303 2023-01-22 18:01:37.327957: step: 776/459, loss: 0.001559615135192871 2023-01-22 18:01:37.967768: step: 778/459, loss: 0.016074461862444878 2023-01-22 18:01:38.578868: step: 780/459, loss: 0.0013094214955344796 2023-01-22 18:01:39.279562: step: 782/459, loss: 0.027052994817495346 2023-01-22 18:01:39.893871: step: 784/459, loss: 0.09766388684511185 2023-01-22 18:01:40.577679: step: 786/459, loss: 0.10547588020563126 2023-01-22 18:01:41.283849: step: 788/459, loss: 0.09087724983692169 2023-01-22 18:01:41.809178: step: 790/459, loss: 0.00032631229260005057 2023-01-22 18:01:42.436793: step: 792/459, loss: 0.0023292640689760447 2023-01-22 18:01:43.029498: step: 794/459, loss: 0.33331558108329773 2023-01-22 18:01:43.651486: step: 796/459, loss: 0.013687323778867722 2023-01-22 18:01:44.286483: step: 798/459, loss: 0.029223734512925148 2023-01-22 18:01:44.935119: step: 800/459, loss: 0.059731680899858475 2023-01-22 18:01:45.563087: step: 802/459, loss: 0.025911541655659676 2023-01-22 18:01:46.219683: step: 804/459, loss: 0.010552038438618183 2023-01-22 18:01:46.868771: step: 806/459, loss: 0.031298842281103134 2023-01-22 18:01:47.459368: step: 808/459, loss: 0.00015180069021880627 2023-01-22 18:01:48.096229: step: 810/459, loss: 0.0004317924613133073 2023-01-22 18:01:48.728535: step: 812/459, loss: 0.03708028793334961 2023-01-22 18:01:49.405354: step: 814/459, loss: 0.00539397681131959 2023-01-22 18:01:50.062686: step: 816/459, loss: 0.38016387820243835 2023-01-22 18:01:50.689606: step: 818/459, loss: 0.008098323829472065 2023-01-22 18:01:51.272931: step: 820/459, loss: 0.008099961094558239 2023-01-22 18:01:51.877263: step: 822/459, loss: 0.031771816313266754 2023-01-22 18:01:52.533412: step: 824/459, loss: 0.002131863497197628 2023-01-22 18:01:53.201966: step: 826/459, loss: 0.6110060214996338 2023-01-22 18:01:53.922177: step: 828/459, loss: 0.09498203545808792 2023-01-22 18:01:54.489625: step: 830/459, loss: 0.023690013214945793 2023-01-22 18:01:55.073140: step: 832/459, loss: 0.17716924846172333 2023-01-22 18:01:55.763917: step: 834/459, loss: 0.030982451513409615 2023-01-22 18:01:56.366305: step: 836/459, loss: 0.011706631630659103 2023-01-22 18:01:56.904081: step: 838/459, loss: 0.0016065607778728008 2023-01-22 18:01:57.536697: step: 840/459, loss: 0.03879469633102417 2023-01-22 18:01:58.148222: step: 842/459, loss: 0.004312042612582445 2023-01-22 18:01:58.696688: step: 844/459, loss: 0.013893297873437405 2023-01-22 18:01:59.277734: step: 846/459, loss: 0.0041062962263822556 2023-01-22 18:01:59.892028: step: 848/459, loss: 0.01100216805934906 2023-01-22 18:02:00.588487: step: 850/459, loss: 0.02146124094724655 2023-01-22 18:02:01.232386: step: 852/459, loss: 0.07534580677747726 2023-01-22 18:02:01.828429: step: 854/459, loss: 0.012814421206712723 2023-01-22 18:02:02.438130: step: 856/459, loss: 0.0021530233789235353 2023-01-22 18:02:03.054639: step: 858/459, loss: 0.5146773457527161 2023-01-22 18:02:03.637522: step: 860/459, loss: 0.07562511414289474 2023-01-22 18:02:04.232136: step: 862/459, loss: 0.059799738228321075 2023-01-22 18:02:04.849780: step: 864/459, loss: 2.472139835357666 2023-01-22 18:02:05.606781: step: 866/459, loss: 0.028610147535800934 2023-01-22 18:02:06.213929: step: 868/459, loss: 0.005457169376313686 2023-01-22 18:02:06.873829: step: 870/459, loss: 0.05137787014245987 2023-01-22 18:02:07.517258: step: 872/459, loss: 0.007391621824353933 2023-01-22 18:02:08.106207: step: 874/459, loss: 0.00924049224704504 2023-01-22 18:02:08.691169: step: 876/459, loss: 0.010988622903823853 2023-01-22 18:02:09.322070: step: 878/459, loss: 0.010470833629369736 2023-01-22 18:02:09.879196: step: 880/459, loss: 0.052433717995882034 2023-01-22 18:02:10.485004: step: 882/459, loss: 0.07259542495012283 2023-01-22 18:02:11.049306: step: 884/459, loss: 0.028310947120189667 2023-01-22 18:02:11.629257: step: 886/459, loss: 0.0020409305579960346 2023-01-22 18:02:12.311994: step: 888/459, loss: 0.02610262669622898 2023-01-22 18:02:12.907208: step: 890/459, loss: 0.0010350508382543921 2023-01-22 18:02:13.527437: step: 892/459, loss: 0.08515965938568115 2023-01-22 18:02:14.125516: step: 894/459, loss: 0.011588278226554394 2023-01-22 18:02:14.752695: step: 896/459, loss: 0.02250625006854534 2023-01-22 18:02:15.382420: step: 898/459, loss: 0.000176224421011284 2023-01-22 18:02:16.084201: step: 900/459, loss: 0.005608417559415102 2023-01-22 18:02:16.658301: step: 902/459, loss: 0.034268658608198166 2023-01-22 18:02:17.264141: step: 904/459, loss: 0.007022496312856674 2023-01-22 18:02:17.906935: step: 906/459, loss: 0.013795983046293259 2023-01-22 18:02:18.473354: step: 908/459, loss: 0.02423981763422489 2023-01-22 18:02:19.111307: step: 910/459, loss: 0.01994927041232586 2023-01-22 18:02:19.794914: step: 912/459, loss: 0.006996851414442062 2023-01-22 18:02:20.403291: step: 914/459, loss: 0.035332225263118744 2023-01-22 18:02:21.044786: step: 916/459, loss: 0.12766781449317932 2023-01-22 18:02:21.679584: step: 918/459, loss: 0.011943712830543518 2023-01-22 18:02:22.143480: step: 920/459, loss: 0.0011686473153531551 ================================================== Loss: 0.107 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2974056039873464, 'r': 0.3335231725930204, 'f1': 0.31443061172901915}, 'combined': 0.23168571390559303, 'epoch': 28} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3331670495906327, 'r': 0.3198403676070074, 'f1': 0.32636772204796677}, 'combined': 0.2088753421106987, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2958260755611411, 'r': 0.3407332597070449, 'f1': 0.31669563997462546}, 'combined': 0.23335468208656612, 'epoch': 28} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3357164363253103, 'r': 0.32472935295466376, 'f1': 0.33013150485224596}, 'combined': 0.21128416310543738, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3138436379783406, 'r': 0.33706925824618744, 'f1': 0.3250420843471926}, 'combined': 0.23950469372951033, 'epoch': 28} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.33655531123296756, 'r': 0.32887841088550024, 'f1': 0.3326725780623057}, 'combined': 0.2385199616295777, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20238095238095236, 'r': 0.32380952380952377, 'f1': 0.24908424908424906}, 'combined': 0.16605616605616602, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2357142857142857, 'r': 0.358695652173913, 'f1': 0.2844827586206896}, 'combined': 0.1422413793103448, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.1724137931034483, 'f1': 0.22727272727272724}, 'combined': 0.1515151515151515, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:04:58.677474: step: 2/459, loss: 0.09506294876337051 2023-01-22 18:04:59.320178: step: 4/459, loss: 0.0004099514917470515 2023-01-22 18:04:59.963653: step: 6/459, loss: 0.05362975224852562 2023-01-22 18:05:00.585585: step: 8/459, loss: 0.00678288983181119 2023-01-22 18:05:01.134136: step: 10/459, loss: 0.04106101766228676 2023-01-22 18:05:01.749195: step: 12/459, loss: 0.07361756265163422 2023-01-22 18:05:02.375737: step: 14/459, loss: 0.0151847954839468 2023-01-22 18:05:02.986355: step: 16/459, loss: 0.003860610071569681 2023-01-22 18:05:03.645411: step: 18/459, loss: 0.007304908707737923 2023-01-22 18:05:04.230334: step: 20/459, loss: 0.01006059255450964 2023-01-22 18:05:04.792113: step: 22/459, loss: 0.02473737858235836 2023-01-22 18:05:05.406712: step: 24/459, loss: 0.005873989313840866 2023-01-22 18:05:05.995726: step: 26/459, loss: 0.0037155328318476677 2023-01-22 18:05:06.568727: step: 28/459, loss: 0.01829911768436432 2023-01-22 18:05:07.153343: step: 30/459, loss: 0.018897486850619316 2023-01-22 18:05:07.750467: step: 32/459, loss: 0.11103769391775131 2023-01-22 18:05:08.289152: step: 34/459, loss: 0.016571883112192154 2023-01-22 18:05:08.945681: step: 36/459, loss: 0.02246231585741043 2023-01-22 18:05:09.541249: step: 38/459, loss: 0.00047842838102951646 2023-01-22 18:05:10.134932: step: 40/459, loss: 0.0071874214336276054 2023-01-22 18:05:10.745801: step: 42/459, loss: 0.0006898611900396645 2023-01-22 18:05:11.381384: step: 44/459, loss: 0.03743324801325798 2023-01-22 18:05:11.980281: step: 46/459, loss: 0.005819715093821287 2023-01-22 18:05:12.639884: step: 48/459, loss: 0.03301220387220383 2023-01-22 18:05:13.321859: step: 50/459, loss: 0.010164864361286163 2023-01-22 18:05:13.961865: step: 52/459, loss: 0.00412159925326705 2023-01-22 18:05:14.581531: step: 54/459, loss: 0.014645959250628948 2023-01-22 18:05:15.219674: step: 56/459, loss: 0.0012815194204449654 2023-01-22 18:05:15.792776: step: 58/459, loss: 0.004080306738615036 2023-01-22 18:05:16.395787: step: 60/459, loss: 0.041351594030857086 2023-01-22 18:05:17.038410: step: 62/459, loss: 0.02847079373896122 2023-01-22 18:05:17.643632: step: 64/459, loss: 0.039085503667593 2023-01-22 18:05:18.346599: step: 66/459, loss: 0.02024432085454464 2023-01-22 18:05:19.004135: step: 68/459, loss: 0.005877702496945858 2023-01-22 18:05:19.635879: step: 70/459, loss: 0.009159103035926819 2023-01-22 18:05:20.241786: step: 72/459, loss: 0.0047263093292713165 2023-01-22 18:05:20.897381: step: 74/459, loss: 0.07815837860107422 2023-01-22 18:05:21.491838: step: 76/459, loss: 0.022079601883888245 2023-01-22 18:05:22.139740: step: 78/459, loss: 0.08078441023826599 2023-01-22 18:05:22.754528: step: 80/459, loss: 8.38843043311499e-05 2023-01-22 18:05:23.362289: step: 82/459, loss: 0.01012772973626852 2023-01-22 18:05:23.976590: step: 84/459, loss: 0.0002275604783790186 2023-01-22 18:05:24.579121: step: 86/459, loss: 0.015270798467099667 2023-01-22 18:05:25.203656: step: 88/459, loss: 0.4403932988643646 2023-01-22 18:05:25.825974: step: 90/459, loss: 0.04843716695904732 2023-01-22 18:05:26.461155: step: 92/459, loss: 0.1557137370109558 2023-01-22 18:05:27.077472: step: 94/459, loss: 0.006818751338869333 2023-01-22 18:05:27.689901: step: 96/459, loss: 0.006172738038003445 2023-01-22 18:05:28.389897: step: 98/459, loss: 0.004751079715788364 2023-01-22 18:05:29.034712: step: 100/459, loss: 0.01636620983481407 2023-01-22 18:05:29.634866: step: 102/459, loss: 0.019500426948070526 2023-01-22 18:05:30.213523: step: 104/459, loss: 2.711756944656372 2023-01-22 18:05:30.854674: step: 106/459, loss: 0.03514890372753143 2023-01-22 18:05:31.467236: step: 108/459, loss: 0.024103818461298943 2023-01-22 18:05:32.083156: step: 110/459, loss: 0.12429874390363693 2023-01-22 18:05:32.686208: step: 112/459, loss: 0.0014483830891549587 2023-01-22 18:05:33.282916: step: 114/459, loss: 0.018563097342848778 2023-01-22 18:05:33.891498: step: 116/459, loss: 0.005763729102909565 2023-01-22 18:05:34.483332: step: 118/459, loss: 0.03171611949801445 2023-01-22 18:05:35.082472: step: 120/459, loss: 0.17810757458209991 2023-01-22 18:05:35.673532: step: 122/459, loss: 0.01239104475826025 2023-01-22 18:05:36.273365: step: 124/459, loss: 0.0017421204829588532 2023-01-22 18:05:36.865470: step: 126/459, loss: 0.04861907288432121 2023-01-22 18:05:37.476636: step: 128/459, loss: 0.004046943038702011 2023-01-22 18:05:38.054970: step: 130/459, loss: 0.02102583833038807 2023-01-22 18:05:38.683745: step: 132/459, loss: 0.6350687742233276 2023-01-22 18:05:39.244799: step: 134/459, loss: 0.009390340186655521 2023-01-22 18:05:39.815018: step: 136/459, loss: 0.0018470926443114877 2023-01-22 18:05:40.433142: step: 138/459, loss: 0.036273349076509476 2023-01-22 18:05:41.036717: step: 140/459, loss: 0.0011134818196296692 2023-01-22 18:05:41.646624: step: 142/459, loss: 0.013169209472835064 2023-01-22 18:05:42.270862: step: 144/459, loss: 0.023766105994582176 2023-01-22 18:05:42.893486: step: 146/459, loss: 0.023665890097618103 2023-01-22 18:05:43.511048: step: 148/459, loss: 0.042678166180849075 2023-01-22 18:05:44.154424: step: 150/459, loss: 0.002138726180419326 2023-01-22 18:05:44.819194: step: 152/459, loss: 0.007168373093008995 2023-01-22 18:05:45.484490: step: 154/459, loss: 0.0297981146723032 2023-01-22 18:05:46.147705: step: 156/459, loss: 0.026149552315473557 2023-01-22 18:05:46.874857: step: 158/459, loss: 1.309334635734558 2023-01-22 18:05:47.460194: step: 160/459, loss: 0.00012745292042382061 2023-01-22 18:05:48.095795: step: 162/459, loss: 0.17565123736858368 2023-01-22 18:05:48.736284: step: 164/459, loss: 0.05070458725094795 2023-01-22 18:05:49.373791: step: 166/459, loss: 0.02812275104224682 2023-01-22 18:05:49.954759: step: 168/459, loss: 0.002778986468911171 2023-01-22 18:05:50.537708: step: 170/459, loss: 0.006161876488476992 2023-01-22 18:05:51.153208: step: 172/459, loss: 0.6928167343139648 2023-01-22 18:05:51.796951: step: 174/459, loss: 0.06639766693115234 2023-01-22 18:05:52.365373: step: 176/459, loss: 0.008554046973586082 2023-01-22 18:05:52.954197: step: 178/459, loss: 0.6639444231987 2023-01-22 18:05:53.571434: step: 180/459, loss: 0.02736486680805683 2023-01-22 18:05:54.142686: step: 182/459, loss: 0.0007687495090067387 2023-01-22 18:05:54.718962: step: 184/459, loss: 0.0027108017820864916 2023-01-22 18:05:55.351657: step: 186/459, loss: 0.023562666028738022 2023-01-22 18:05:55.975228: step: 188/459, loss: 0.026505397632718086 2023-01-22 18:05:56.619200: step: 190/459, loss: 0.011981048621237278 2023-01-22 18:05:57.178584: step: 192/459, loss: 0.020438598468899727 2023-01-22 18:05:57.806908: step: 194/459, loss: 0.002037149854004383 2023-01-22 18:05:58.450435: step: 196/459, loss: 0.04596101865172386 2023-01-22 18:05:59.066425: step: 198/459, loss: 0.0008082315907813609 2023-01-22 18:05:59.788380: step: 200/459, loss: 0.03962026536464691 2023-01-22 18:06:00.480628: step: 202/459, loss: 0.030574267730116844 2023-01-22 18:06:01.126077: step: 204/459, loss: 0.030751651152968407 2023-01-22 18:06:01.826950: step: 206/459, loss: 0.07094991207122803 2023-01-22 18:06:02.433513: step: 208/459, loss: 0.004285713657736778 2023-01-22 18:06:03.069056: step: 210/459, loss: 0.02257639542222023 2023-01-22 18:06:03.724237: step: 212/459, loss: 0.005706014111638069 2023-01-22 18:06:04.328521: step: 214/459, loss: 0.00931136216968298 2023-01-22 18:06:04.896768: step: 216/459, loss: 0.017852632328867912 2023-01-22 18:06:05.565277: step: 218/459, loss: 0.007278607226908207 2023-01-22 18:06:06.148335: step: 220/459, loss: 0.018608197569847107 2023-01-22 18:06:06.815692: step: 222/459, loss: 0.0021300280932337046 2023-01-22 18:06:07.422788: step: 224/459, loss: 0.002514556283131242 2023-01-22 18:06:08.078317: step: 226/459, loss: 0.1883355975151062 2023-01-22 18:06:08.619632: step: 228/459, loss: 0.04825282841920853 2023-01-22 18:06:09.275107: step: 230/459, loss: 0.006150561384856701 2023-01-22 18:06:09.906272: step: 232/459, loss: 0.0022189123556017876 2023-01-22 18:06:10.501654: step: 234/459, loss: 0.00847680401057005 2023-01-22 18:06:11.111692: step: 236/459, loss: 0.5195475220680237 2023-01-22 18:06:11.736093: step: 238/459, loss: 0.002211044542491436 2023-01-22 18:06:12.373098: step: 240/459, loss: 0.004436793737113476 2023-01-22 18:06:12.949784: step: 242/459, loss: 0.03858329728245735 2023-01-22 18:06:13.520455: step: 244/459, loss: 0.01078864187002182 2023-01-22 18:06:14.160498: step: 246/459, loss: 0.0028128738049417734 2023-01-22 18:06:14.763968: step: 248/459, loss: 0.05036405846476555 2023-01-22 18:06:15.398138: step: 250/459, loss: 0.05263567715883255 2023-01-22 18:06:16.025047: step: 252/459, loss: 0.021038789302110672 2023-01-22 18:06:16.627779: step: 254/459, loss: 0.02288280799984932 2023-01-22 18:06:17.229626: step: 256/459, loss: 0.04157564043998718 2023-01-22 18:06:17.900640: step: 258/459, loss: 0.08974619209766388 2023-01-22 18:06:18.529880: step: 260/459, loss: 0.04405491426587105 2023-01-22 18:06:19.157987: step: 262/459, loss: 0.0006515912245959044 2023-01-22 18:06:19.880308: step: 264/459, loss: 0.006907097529619932 2023-01-22 18:06:20.472246: step: 266/459, loss: 0.03261615335941315 2023-01-22 18:06:21.137292: step: 268/459, loss: 0.026911884546279907 2023-01-22 18:06:21.765752: step: 270/459, loss: 0.0009440091671422124 2023-01-22 18:06:22.351400: step: 272/459, loss: 0.013482313603162766 2023-01-22 18:06:22.998440: step: 274/459, loss: 0.0009953612461686134 2023-01-22 18:06:23.610886: step: 276/459, loss: 0.003905420657247305 2023-01-22 18:06:24.173772: step: 278/459, loss: 0.005735867656767368 2023-01-22 18:06:24.768394: step: 280/459, loss: 0.14485721290111542 2023-01-22 18:06:25.351968: step: 282/459, loss: 0.02535228058695793 2023-01-22 18:06:25.924207: step: 284/459, loss: 0.04264013096690178 2023-01-22 18:06:26.576110: step: 286/459, loss: 0.007165634073317051 2023-01-22 18:06:27.264228: step: 288/459, loss: 0.026175202801823616 2023-01-22 18:06:27.971595: step: 290/459, loss: 0.0008419908117502928 2023-01-22 18:06:28.597502: step: 292/459, loss: 0.015466990880668163 2023-01-22 18:06:29.157883: step: 294/459, loss: 0.03046124428510666 2023-01-22 18:06:29.896404: step: 296/459, loss: 0.016938071697950363 2023-01-22 18:06:30.521049: step: 298/459, loss: 0.036702241748571396 2023-01-22 18:06:31.089232: step: 300/459, loss: 0.018376125022768974 2023-01-22 18:06:31.829827: step: 302/459, loss: 0.035688258707523346 2023-01-22 18:06:32.410847: step: 304/459, loss: 0.007449076510965824 2023-01-22 18:06:32.986111: step: 306/459, loss: 0.0066429018042981625 2023-01-22 18:06:33.592087: step: 308/459, loss: 0.003936828579753637 2023-01-22 18:06:34.238986: step: 310/459, loss: 0.011622226797044277 2023-01-22 18:06:34.909141: step: 312/459, loss: 0.0008043270208872855 2023-01-22 18:06:35.469712: step: 314/459, loss: 0.006797509267926216 2023-01-22 18:06:36.087509: step: 316/459, loss: 0.005348175298422575 2023-01-22 18:06:36.723240: step: 318/459, loss: 0.003600666532292962 2023-01-22 18:06:37.318261: step: 320/459, loss: 0.010688304901123047 2023-01-22 18:06:37.945576: step: 322/459, loss: 0.013519540429115295 2023-01-22 18:06:38.580985: step: 324/459, loss: 0.018259579315781593 2023-01-22 18:06:39.230253: step: 326/459, loss: 0.02230878174304962 2023-01-22 18:06:39.828502: step: 328/459, loss: 0.050370894372463226 2023-01-22 18:06:40.550140: step: 330/459, loss: 0.03563181310892105 2023-01-22 18:06:41.139796: step: 332/459, loss: 0.15213437378406525 2023-01-22 18:06:41.751480: step: 334/459, loss: 0.007859093137085438 2023-01-22 18:06:42.381558: step: 336/459, loss: 0.029264546930789948 2023-01-22 18:06:43.036510: step: 338/459, loss: 0.03792832791805267 2023-01-22 18:06:43.629194: step: 340/459, loss: 0.19428451359272003 2023-01-22 18:06:44.210582: step: 342/459, loss: 0.008465709164738655 2023-01-22 18:06:44.864568: step: 344/459, loss: 0.039735306054353714 2023-01-22 18:06:45.495479: step: 346/459, loss: 0.18143856525421143 2023-01-22 18:06:46.091502: step: 348/459, loss: 0.04547838121652603 2023-01-22 18:06:46.637862: step: 350/459, loss: 0.35791492462158203 2023-01-22 18:06:47.246086: step: 352/459, loss: 0.004396247677505016 2023-01-22 18:06:47.858111: step: 354/459, loss: 0.021688830107450485 2023-01-22 18:06:48.472751: step: 356/459, loss: 0.011279783211648464 2023-01-22 18:06:49.091332: step: 358/459, loss: 0.00580679951235652 2023-01-22 18:06:49.690336: step: 360/459, loss: 0.02833249233663082 2023-01-22 18:06:50.288093: step: 362/459, loss: 0.36130645871162415 2023-01-22 18:06:50.887457: step: 364/459, loss: 0.0005783631349913776 2023-01-22 18:06:51.479775: step: 366/459, loss: 0.00440833019092679 2023-01-22 18:06:52.035975: step: 368/459, loss: 0.0014084397116675973 2023-01-22 18:06:52.584024: step: 370/459, loss: 0.011435555294156075 2023-01-22 18:06:53.236688: step: 372/459, loss: 0.009403575211763382 2023-01-22 18:06:53.818784: step: 374/459, loss: 0.015655698254704475 2023-01-22 18:06:54.397718: step: 376/459, loss: 0.02062787301838398 2023-01-22 18:06:55.111513: step: 378/459, loss: 0.0048255943693220615 2023-01-22 18:06:55.690855: step: 380/459, loss: 0.052867211401462555 2023-01-22 18:06:56.300062: step: 382/459, loss: 0.04627783223986626 2023-01-22 18:06:56.945594: step: 384/459, loss: 0.017040548846125603 2023-01-22 18:06:57.608123: step: 386/459, loss: 0.0007358563598245382 2023-01-22 18:06:58.190081: step: 388/459, loss: 0.005770252086222172 2023-01-22 18:06:58.845294: step: 390/459, loss: 0.007877725176513195 2023-01-22 18:06:59.525711: step: 392/459, loss: 0.10429570078849792 2023-01-22 18:07:00.098216: step: 394/459, loss: 0.0008216170826926827 2023-01-22 18:07:00.713279: step: 396/459, loss: 0.13995066285133362 2023-01-22 18:07:01.335713: step: 398/459, loss: 0.06500190496444702 2023-01-22 18:07:01.869465: step: 400/459, loss: 0.008761191740632057 2023-01-22 18:07:02.492186: step: 402/459, loss: 0.008442176505923271 2023-01-22 18:07:03.108096: step: 404/459, loss: 0.0001845623628469184 2023-01-22 18:07:03.771431: step: 406/459, loss: 0.0129794180393219 2023-01-22 18:07:04.375149: step: 408/459, loss: 0.020104262977838516 2023-01-22 18:07:05.016359: step: 410/459, loss: 0.6673191785812378 2023-01-22 18:07:05.667938: step: 412/459, loss: 0.003434925340116024 2023-01-22 18:07:06.319942: step: 414/459, loss: 0.014217616058886051 2023-01-22 18:07:06.965784: step: 416/459, loss: 0.04916172847151756 2023-01-22 18:07:07.532449: step: 418/459, loss: 0.035395003855228424 2023-01-22 18:07:08.162898: step: 420/459, loss: 0.008979106321930885 2023-01-22 18:07:08.805558: step: 422/459, loss: 0.3810172379016876 2023-01-22 18:07:09.388721: step: 424/459, loss: 0.022993357852101326 2023-01-22 18:07:09.914606: step: 426/459, loss: 0.00039060076232999563 2023-01-22 18:07:10.510141: step: 428/459, loss: 0.053961098194122314 2023-01-22 18:07:11.152844: step: 430/459, loss: 0.058003902435302734 2023-01-22 18:07:11.774305: step: 432/459, loss: 0.006626148708164692 2023-01-22 18:07:12.356530: step: 434/459, loss: 0.00588555634021759 2023-01-22 18:07:12.980570: step: 436/459, loss: 0.0840316042304039 2023-01-22 18:07:13.615551: step: 438/459, loss: 0.02197914756834507 2023-01-22 18:07:14.258958: step: 440/459, loss: 0.0006302593974396586 2023-01-22 18:07:14.849873: step: 442/459, loss: 0.02069634571671486 2023-01-22 18:07:15.467222: step: 444/459, loss: 0.028604945167899132 2023-01-22 18:07:16.049039: step: 446/459, loss: 0.02524387650191784 2023-01-22 18:07:16.616528: step: 448/459, loss: 0.08404944837093353 2023-01-22 18:07:17.264674: step: 450/459, loss: 0.24291902780532837 2023-01-22 18:07:17.855701: step: 452/459, loss: 0.08187146484851837 2023-01-22 18:07:18.477723: step: 454/459, loss: 0.005202456843107939 2023-01-22 18:07:19.119732: step: 456/459, loss: 0.06528929620981216 2023-01-22 18:07:19.826314: step: 458/459, loss: 0.014080040156841278 2023-01-22 18:07:20.443834: step: 460/459, loss: 0.018423566594719887 2023-01-22 18:07:21.030730: step: 462/459, loss: 0.010038412176072598 2023-01-22 18:07:21.631806: step: 464/459, loss: 0.3055248260498047 2023-01-22 18:07:22.191750: step: 466/459, loss: 0.02109205164015293 2023-01-22 18:07:22.785613: step: 468/459, loss: 0.001172205084003508 2023-01-22 18:07:23.407135: step: 470/459, loss: 0.03797239437699318 2023-01-22 18:07:23.938810: step: 472/459, loss: 0.017965074628591537 2023-01-22 18:07:24.554940: step: 474/459, loss: 0.008109794929623604 2023-01-22 18:07:25.159100: step: 476/459, loss: 0.20266766846179962 2023-01-22 18:07:25.816082: step: 478/459, loss: 0.011901729740202427 2023-01-22 18:07:26.448872: step: 480/459, loss: 0.036328401416540146 2023-01-22 18:07:27.140355: step: 482/459, loss: 0.0016451975097879767 2023-01-22 18:07:27.808806: step: 484/459, loss: 0.0600278303027153 2023-01-22 18:07:28.426684: step: 486/459, loss: 0.014625447802245617 2023-01-22 18:07:29.082543: step: 488/459, loss: 0.017036717385053635 2023-01-22 18:07:29.751355: step: 490/459, loss: 0.19815808534622192 2023-01-22 18:07:30.357968: step: 492/459, loss: 0.005017109680920839 2023-01-22 18:07:30.993312: step: 494/459, loss: 0.02114185132086277 2023-01-22 18:07:31.599135: step: 496/459, loss: 0.04902274161577225 2023-01-22 18:07:32.268105: step: 498/459, loss: 0.003423331305384636 2023-01-22 18:07:32.796757: step: 500/459, loss: 0.10277093946933746 2023-01-22 18:07:33.423691: step: 502/459, loss: 0.035779695957899094 2023-01-22 18:07:34.072960: step: 504/459, loss: 0.02318069338798523 2023-01-22 18:07:34.743006: step: 506/459, loss: 0.1777963936328888 2023-01-22 18:07:35.295039: step: 508/459, loss: 0.005577618721872568 2023-01-22 18:07:35.884933: step: 510/459, loss: 0.06224065646529198 2023-01-22 18:07:36.482493: step: 512/459, loss: 0.019477711990475655 2023-01-22 18:07:37.125581: step: 514/459, loss: 0.024222057312726974 2023-01-22 18:07:37.801555: step: 516/459, loss: 0.043952032923698425 2023-01-22 18:07:38.445733: step: 518/459, loss: 0.017939578741788864 2023-01-22 18:07:39.069310: step: 520/459, loss: 0.0004764180921483785 2023-01-22 18:07:39.717097: step: 522/459, loss: 0.009416427463293076 2023-01-22 18:07:40.328549: step: 524/459, loss: 0.0008215789566747844 2023-01-22 18:07:40.897596: step: 526/459, loss: 0.0073473453521728516 2023-01-22 18:07:41.513863: step: 528/459, loss: 0.005913450848311186 2023-01-22 18:07:42.074853: step: 530/459, loss: 0.0032058500219136477 2023-01-22 18:07:42.716907: step: 532/459, loss: 0.0018353434279561043 2023-01-22 18:07:43.281202: step: 534/459, loss: 0.003950029145926237 2023-01-22 18:07:43.992265: step: 536/459, loss: 0.012339805252850056 2023-01-22 18:07:44.615117: step: 538/459, loss: 0.0919424369931221 2023-01-22 18:07:45.216060: step: 540/459, loss: 0.0197400264441967 2023-01-22 18:07:45.917198: step: 542/459, loss: 0.04744870588183403 2023-01-22 18:07:46.550949: step: 544/459, loss: 0.049766071140766144 2023-01-22 18:07:47.215403: step: 546/459, loss: 0.054161373525857925 2023-01-22 18:07:47.822535: step: 548/459, loss: 0.023033274337649345 2023-01-22 18:07:48.443151: step: 550/459, loss: 0.0022348458878695965 2023-01-22 18:07:49.121603: step: 552/459, loss: 0.12786738574504852 2023-01-22 18:07:49.817938: step: 554/459, loss: 0.006633817683905363 2023-01-22 18:07:50.420908: step: 556/459, loss: 0.005124045070260763 2023-01-22 18:07:51.041021: step: 558/459, loss: 0.008865662850439548 2023-01-22 18:07:51.626374: step: 560/459, loss: 0.0375056266784668 2023-01-22 18:07:52.246248: step: 562/459, loss: 0.01769411191344261 2023-01-22 18:07:52.878055: step: 564/459, loss: 0.0292666032910347 2023-01-22 18:07:53.419371: step: 566/459, loss: 0.0003054812259506434 2023-01-22 18:07:54.042356: step: 568/459, loss: 0.011448009870946407 2023-01-22 18:07:54.644586: step: 570/459, loss: 0.023655373603105545 2023-01-22 18:07:55.214153: step: 572/459, loss: 0.003962038550525904 2023-01-22 18:07:55.832548: step: 574/459, loss: 0.02558564767241478 2023-01-22 18:07:56.529048: step: 576/459, loss: 0.05110438913106918 2023-01-22 18:07:57.200742: step: 578/459, loss: 0.004991638939827681 2023-01-22 18:07:57.901562: step: 580/459, loss: 0.014380799606442451 2023-01-22 18:07:58.514223: step: 582/459, loss: 0.014383900910615921 2023-01-22 18:07:59.229287: step: 584/459, loss: 0.005523162428289652 2023-01-22 18:07:59.910994: step: 586/459, loss: 0.023058971390128136 2023-01-22 18:08:00.518618: step: 588/459, loss: 0.0005534823867492378 2023-01-22 18:08:01.201633: step: 590/459, loss: 0.37171119451522827 2023-01-22 18:08:01.866567: step: 592/459, loss: 0.00938004907220602 2023-01-22 18:08:02.468859: step: 594/459, loss: 0.014691843651235104 2023-01-22 18:08:02.999686: step: 596/459, loss: 0.02485569566488266 2023-01-22 18:08:03.696599: step: 598/459, loss: 0.008570261299610138 2023-01-22 18:08:04.302430: step: 600/459, loss: 1.3920533657073975 2023-01-22 18:08:04.871398: step: 602/459, loss: 0.015821028500795364 2023-01-22 18:08:05.479810: step: 604/459, loss: 0.24252542853355408 2023-01-22 18:08:06.037968: step: 606/459, loss: 0.028365090489387512 2023-01-22 18:08:06.583591: step: 608/459, loss: 0.013932344503700733 2023-01-22 18:08:07.124403: step: 610/459, loss: 0.006251933053135872 2023-01-22 18:08:07.775998: step: 612/459, loss: 0.025382399559020996 2023-01-22 18:08:08.377058: step: 614/459, loss: 0.007046802435070276 2023-01-22 18:08:09.036094: step: 616/459, loss: 0.020060289651155472 2023-01-22 18:08:09.693440: step: 618/459, loss: 0.00796076375991106 2023-01-22 18:08:10.359907: step: 620/459, loss: 0.022578835487365723 2023-01-22 18:08:10.971210: step: 622/459, loss: 0.0075783333741128445 2023-01-22 18:08:11.583600: step: 624/459, loss: 0.00042217710870318115 2023-01-22 18:08:12.124661: step: 626/459, loss: 0.04002712666988373 2023-01-22 18:08:12.771581: step: 628/459, loss: 0.040286414325237274 2023-01-22 18:08:13.360097: step: 630/459, loss: 0.008767703548073769 2023-01-22 18:08:13.995869: step: 632/459, loss: 0.019287504255771637 2023-01-22 18:08:14.636577: step: 634/459, loss: 0.013578822836279869 2023-01-22 18:08:15.286804: step: 636/459, loss: 0.024241723120212555 2023-01-22 18:08:15.962216: step: 638/459, loss: 0.001744585344567895 2023-01-22 18:08:16.532923: step: 640/459, loss: 0.001033161417581141 2023-01-22 18:08:17.092126: step: 642/459, loss: 0.006159567274153233 2023-01-22 18:08:17.674725: step: 644/459, loss: 0.008017907850444317 2023-01-22 18:08:18.301246: step: 646/459, loss: 0.008018946275115013 2023-01-22 18:08:18.899823: step: 648/459, loss: 0.008869122713804245 2023-01-22 18:08:19.574006: step: 650/459, loss: 0.004427915904670954 2023-01-22 18:08:20.190170: step: 652/459, loss: 0.04279812425374985 2023-01-22 18:08:20.793539: step: 654/459, loss: 0.028289733454585075 2023-01-22 18:08:21.433072: step: 656/459, loss: 0.023368462920188904 2023-01-22 18:08:22.079700: step: 658/459, loss: 0.025027422234416008 2023-01-22 18:08:22.731421: step: 660/459, loss: 0.10498842597007751 2023-01-22 18:08:23.408695: step: 662/459, loss: 0.03527127206325531 2023-01-22 18:08:23.994911: step: 664/459, loss: 0.0011718443129211664 2023-01-22 18:08:24.582922: step: 666/459, loss: 0.03782712295651436 2023-01-22 18:08:25.352897: step: 668/459, loss: 0.053949616849422455 2023-01-22 18:08:26.011413: step: 670/459, loss: 0.0053547746501863 2023-01-22 18:08:26.614912: step: 672/459, loss: 0.026598887518048286 2023-01-22 18:08:27.198180: step: 674/459, loss: 0.005372873041778803 2023-01-22 18:08:27.830776: step: 676/459, loss: 0.014527661725878716 2023-01-22 18:08:28.435975: step: 678/459, loss: 0.007179783657193184 2023-01-22 18:08:29.104134: step: 680/459, loss: 0.030232200399041176 2023-01-22 18:08:29.827550: step: 682/459, loss: 0.02097865752875805 2023-01-22 18:08:30.439044: step: 684/459, loss: 0.4184260070323944 2023-01-22 18:08:31.067918: step: 686/459, loss: 0.0005153791862539947 2023-01-22 18:08:31.756689: step: 688/459, loss: 0.020933955907821655 2023-01-22 18:08:32.375955: step: 690/459, loss: 0.0010170561727136374 2023-01-22 18:08:32.960554: step: 692/459, loss: 0.052912287414073944 2023-01-22 18:08:33.540507: step: 694/459, loss: 0.08296399563550949 2023-01-22 18:08:34.083999: step: 696/459, loss: 0.01805885136127472 2023-01-22 18:08:34.700138: step: 698/459, loss: 0.006504121236503124 2023-01-22 18:08:35.352083: step: 700/459, loss: 0.017409000545740128 2023-01-22 18:08:35.989572: step: 702/459, loss: 0.028296196833252907 2023-01-22 18:08:36.604850: step: 704/459, loss: 0.004041624721139669 2023-01-22 18:08:37.187331: step: 706/459, loss: 0.007573659997433424 2023-01-22 18:08:37.748666: step: 708/459, loss: 0.026548050343990326 2023-01-22 18:08:38.414687: step: 710/459, loss: 0.0005039083771407604 2023-01-22 18:08:39.002498: step: 712/459, loss: 0.025432877242565155 2023-01-22 18:08:39.601754: step: 714/459, loss: 0.21091631054878235 2023-01-22 18:08:40.184188: step: 716/459, loss: 0.006329445168375969 2023-01-22 18:08:40.823180: step: 718/459, loss: 0.07092879712581635 2023-01-22 18:08:41.507044: step: 720/459, loss: 0.035343095660209656 2023-01-22 18:08:42.115460: step: 722/459, loss: 0.004288628231734037 2023-01-22 18:08:42.756387: step: 724/459, loss: 0.007482781074941158 2023-01-22 18:08:43.320234: step: 726/459, loss: 0.2999779284000397 2023-01-22 18:08:43.901678: step: 728/459, loss: 0.022348452359437943 2023-01-22 18:08:44.555154: step: 730/459, loss: 0.009071452543139458 2023-01-22 18:08:45.130217: step: 732/459, loss: 0.018955690786242485 2023-01-22 18:08:45.669364: step: 734/459, loss: 0.021883433684706688 2023-01-22 18:08:46.353635: step: 736/459, loss: 0.03744450956583023 2023-01-22 18:08:47.060035: step: 738/459, loss: 0.3276454210281372 2023-01-22 18:08:47.662781: step: 740/459, loss: 0.005704994313418865 2023-01-22 18:08:48.248875: step: 742/459, loss: 0.08010192960500717 2023-01-22 18:08:48.799492: step: 744/459, loss: 0.027004893869161606 2023-01-22 18:08:49.483332: step: 746/459, loss: 0.021898213773965836 2023-01-22 18:08:50.079229: step: 748/459, loss: 0.0019375269766896963 2023-01-22 18:08:50.771931: step: 750/459, loss: 0.012667023576796055 2023-01-22 18:08:51.368524: step: 752/459, loss: 0.014084717258810997 2023-01-22 18:08:51.929162: step: 754/459, loss: 0.013453278690576553 2023-01-22 18:08:52.480529: step: 756/459, loss: 0.0009975602151826024 2023-01-22 18:08:53.113745: step: 758/459, loss: 0.02831319533288479 2023-01-22 18:08:53.709157: step: 760/459, loss: 0.0028659519739449024 2023-01-22 18:08:54.313474: step: 762/459, loss: 0.008460380136966705 2023-01-22 18:08:54.895466: step: 764/459, loss: 0.02881794422864914 2023-01-22 18:08:55.541843: step: 766/459, loss: 0.01664869487285614 2023-01-22 18:08:56.206525: step: 768/459, loss: 0.034149765968322754 2023-01-22 18:08:56.810926: step: 770/459, loss: 0.028289761394262314 2023-01-22 18:08:57.402278: step: 772/459, loss: 0.014332090504467487 2023-01-22 18:08:57.997041: step: 774/459, loss: 0.05655835196375847 2023-01-22 18:08:58.672493: step: 776/459, loss: 0.04883177578449249 2023-01-22 18:08:59.390680: step: 778/459, loss: 0.0018175747245550156 2023-01-22 18:09:00.112732: step: 780/459, loss: 0.013406412675976753 2023-01-22 18:09:00.718393: step: 782/459, loss: 0.05667152255773544 2023-01-22 18:09:01.268808: step: 784/459, loss: 0.010290547274053097 2023-01-22 18:09:01.959754: step: 786/459, loss: 0.0340069942176342 2023-01-22 18:09:02.584518: step: 788/459, loss: 0.002032516524195671 2023-01-22 18:09:03.181451: step: 790/459, loss: 0.033821951597929 2023-01-22 18:09:03.746223: step: 792/459, loss: 0.1160951554775238 2023-01-22 18:09:04.493246: step: 794/459, loss: 0.048485107719898224 2023-01-22 18:09:05.112652: step: 796/459, loss: 0.039761438965797424 2023-01-22 18:09:05.790767: step: 798/459, loss: 0.0033300200011581182 2023-01-22 18:09:06.361857: step: 800/459, loss: 0.0628899335861206 2023-01-22 18:09:07.001695: step: 802/459, loss: 0.005583528894931078 2023-01-22 18:09:07.581947: step: 804/459, loss: 0.013567838817834854 2023-01-22 18:09:08.202753: step: 806/459, loss: 0.004947857465595007 2023-01-22 18:09:08.837275: step: 808/459, loss: 0.03626111149787903 2023-01-22 18:09:09.439683: step: 810/459, loss: 0.00261063314974308 2023-01-22 18:09:10.064058: step: 812/459, loss: 0.037888914346694946 2023-01-22 18:09:10.694079: step: 814/459, loss: 0.037190526723861694 2023-01-22 18:09:11.320023: step: 816/459, loss: 0.013476832769811153 2023-01-22 18:09:11.944488: step: 818/459, loss: 0.004042983986437321 2023-01-22 18:09:12.526779: step: 820/459, loss: 0.07535413652658463 2023-01-22 18:09:13.135397: step: 822/459, loss: 0.11490937322378159 2023-01-22 18:09:13.766427: step: 824/459, loss: 0.05115649849176407 2023-01-22 18:09:14.386712: step: 826/459, loss: 0.014443134889006615 2023-01-22 18:09:14.966165: step: 828/459, loss: 0.21705634891986847 2023-01-22 18:09:15.577041: step: 830/459, loss: 0.01671600714325905 2023-01-22 18:09:16.260778: step: 832/459, loss: 0.00843905285000801 2023-01-22 18:09:16.968840: step: 834/459, loss: 0.11586400121450424 2023-01-22 18:09:17.604820: step: 836/459, loss: 0.08101072907447815 2023-01-22 18:09:18.198225: step: 838/459, loss: 0.011461331509053707 2023-01-22 18:09:18.874393: step: 840/459, loss: 0.009531029500067234 2023-01-22 18:09:19.533362: step: 842/459, loss: 0.18797457218170166 2023-01-22 18:09:20.118304: step: 844/459, loss: 0.01846967823803425 2023-01-22 18:09:20.711801: step: 846/459, loss: 0.6820302605628967 2023-01-22 18:09:21.321210: step: 848/459, loss: 0.05169803649187088 2023-01-22 18:09:22.001238: step: 850/459, loss: 0.006934535223990679 2023-01-22 18:09:22.598840: step: 852/459, loss: 0.02458152174949646 2023-01-22 18:09:23.227244: step: 854/459, loss: 0.11834771931171417 2023-01-22 18:09:23.909872: step: 856/459, loss: 0.0691138505935669 2023-01-22 18:09:24.555763: step: 858/459, loss: 0.6024131774902344 2023-01-22 18:09:25.146858: step: 860/459, loss: 0.01692531444132328 2023-01-22 18:09:25.774889: step: 862/459, loss: 0.004669561982154846 2023-01-22 18:09:26.306619: step: 864/459, loss: 0.012947647832334042 2023-01-22 18:09:26.942738: step: 866/459, loss: 0.006569486577063799 2023-01-22 18:09:27.541001: step: 868/459, loss: 0.027596691623330116 2023-01-22 18:09:28.258288: step: 870/459, loss: 0.08019176870584488 2023-01-22 18:09:28.880945: step: 872/459, loss: 0.16056352853775024 2023-01-22 18:09:29.460175: step: 874/459, loss: 0.5849565863609314 2023-01-22 18:09:30.059637: step: 876/459, loss: 0.026595553383231163 2023-01-22 18:09:30.659139: step: 878/459, loss: 0.003393305465579033 2023-01-22 18:09:31.340106: step: 880/459, loss: 0.012393954209983349 2023-01-22 18:09:31.931977: step: 882/459, loss: 0.2438741773366928 2023-01-22 18:09:32.484526: step: 884/459, loss: 0.014643429778516293 2023-01-22 18:09:33.062589: step: 886/459, loss: 0.009137582965195179 2023-01-22 18:09:33.687540: step: 888/459, loss: 0.005241394508630037 2023-01-22 18:09:34.288106: step: 890/459, loss: 0.11985930800437927 2023-01-22 18:09:34.936154: step: 892/459, loss: 0.0031968753319233656 2023-01-22 18:09:35.566351: step: 894/459, loss: 0.00691532576456666 2023-01-22 18:09:36.104991: step: 896/459, loss: 0.015808627009391785 2023-01-22 18:09:36.763170: step: 898/459, loss: 0.04150779917836189 2023-01-22 18:09:37.379394: step: 900/459, loss: 0.0026998594403266907 2023-01-22 18:09:38.007363: step: 902/459, loss: 0.008448168635368347 2023-01-22 18:09:38.641559: step: 904/459, loss: 0.008148484863340855 2023-01-22 18:09:39.286734: step: 906/459, loss: 0.03016464039683342 2023-01-22 18:09:39.882861: step: 908/459, loss: 0.13236787915229797 2023-01-22 18:09:40.415423: step: 910/459, loss: 0.01604619435966015 2023-01-22 18:09:41.115314: step: 912/459, loss: 0.019607778638601303 2023-01-22 18:09:41.690998: step: 914/459, loss: 0.37311598658561707 2023-01-22 18:09:42.384635: step: 916/459, loss: 0.019997533410787582 2023-01-22 18:09:42.993080: step: 918/459, loss: 0.020144494250416756 2023-01-22 18:09:43.464955: step: 920/459, loss: 1.639127233943327e-08 ================================================== Loss: 0.060 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3009389283990574, 'r': 0.3409118410896343, 'f1': 0.3196806766089631}, 'combined': 0.2355541827644991, 'epoch': 29} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.32232560855931963, 'r': 0.3006221553897203, 'f1': 0.3110958086377614}, 'combined': 0.19910131752816726, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2913348169828428, 'r': 0.3438524784882888, 'f1': 0.31542255206845643}, 'combined': 0.23241661731359947, 'epoch': 29} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3164475519318612, 'r': 0.29801930504956897, 'f1': 0.3069570911429018}, 'combined': 0.19645253833145712, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29931295280932707, 'r': 0.3322544163063687, 'f1': 0.3149245996285186}, 'combined': 0.23204970498943475, 'epoch': 29} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.33085391247421697, 'r': 0.31213772399483614, 'f1': 0.3212234230031365}, 'combined': 0.23031113347394694, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.17989417989417988, 'r': 0.32380952380952377, 'f1': 0.2312925170068027}, 'combined': 0.15419501133786845, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24404761904761904, 'r': 0.44565217391304346, 'f1': 0.3153846153846154}, 'combined': 0.1576923076923077, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29411764705882354, 'r': 0.1724137931034483, 'f1': 0.2173913043478261}, 'combined': 0.14492753623188406, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:12:19.676106: step: 2/459, loss: 0.05245495215058327 2023-01-22 18:12:20.256389: step: 4/459, loss: 0.029251297935843468 2023-01-22 18:12:20.920319: step: 6/459, loss: 0.057226743549108505 2023-01-22 18:12:21.507439: step: 8/459, loss: 0.004668266512453556 2023-01-22 18:12:22.126638: step: 10/459, loss: 0.0009234229219146073 2023-01-22 18:12:22.740171: step: 12/459, loss: 0.015065123327076435 2023-01-22 18:12:23.363427: step: 14/459, loss: 0.1189657673239708 2023-01-22 18:12:23.938420: step: 16/459, loss: 0.007440629880875349 2023-01-22 18:12:24.580759: step: 18/459, loss: 0.002416798612102866 2023-01-22 18:12:25.159273: step: 20/459, loss: 0.01988920383155346 2023-01-22 18:12:25.841775: step: 22/459, loss: 0.01188320480287075 2023-01-22 18:12:26.475263: step: 24/459, loss: 0.00045717120519839227 2023-01-22 18:12:27.110984: step: 26/459, loss: 0.018756939098238945 2023-01-22 18:12:27.737210: step: 28/459, loss: 0.29080769419670105 2023-01-22 18:12:28.358378: step: 30/459, loss: 0.008654487319290638 2023-01-22 18:12:28.962721: step: 32/459, loss: 0.009174495004117489 2023-01-22 18:12:29.591375: step: 34/459, loss: 0.08456864207983017 2023-01-22 18:12:30.090926: step: 36/459, loss: 2.0747476810356602e-05 2023-01-22 18:12:30.696895: step: 38/459, loss: 0.07281530648469925 2023-01-22 18:12:31.241005: step: 40/459, loss: 0.007965142838656902 2023-01-22 18:12:31.945877: step: 42/459, loss: 0.036511681973934174 2023-01-22 18:12:32.575240: step: 44/459, loss: 0.0046946825459599495 2023-01-22 18:12:33.211875: step: 46/459, loss: 0.015734830871224403 2023-01-22 18:12:33.790890: step: 48/459, loss: 0.020653678104281425 2023-01-22 18:12:34.417795: step: 50/459, loss: 0.0022787940688431263 2023-01-22 18:12:35.007728: step: 52/459, loss: 0.0006861652946099639 2023-01-22 18:12:35.608983: step: 54/459, loss: 0.00021124207705724984 2023-01-22 18:12:36.265398: step: 56/459, loss: 0.012795764021575451 2023-01-22 18:12:36.829116: step: 58/459, loss: 0.013208286836743355 2023-01-22 18:12:37.426567: step: 60/459, loss: 0.003922197036445141 2023-01-22 18:12:38.057711: step: 62/459, loss: 0.00433814711868763 2023-01-22 18:12:38.684156: step: 64/459, loss: 0.2963241934776306 2023-01-22 18:12:39.276598: step: 66/459, loss: 0.0005237239529378712 2023-01-22 18:12:39.937901: step: 68/459, loss: 0.02666778303682804 2023-01-22 18:12:40.502766: step: 70/459, loss: 0.006364449393004179 2023-01-22 18:12:41.122563: step: 72/459, loss: 0.007320385426282883 2023-01-22 18:12:41.702961: step: 74/459, loss: 0.024461695924401283 2023-01-22 18:12:42.362063: step: 76/459, loss: 0.005176987033337355 2023-01-22 18:12:43.011151: step: 78/459, loss: 0.02107144147157669 2023-01-22 18:12:43.623558: step: 80/459, loss: 0.00018643555813468993 2023-01-22 18:12:44.230545: step: 82/459, loss: 0.0037827298510819674 2023-01-22 18:12:44.821448: step: 84/459, loss: 0.003824847750365734 2023-01-22 18:12:45.424169: step: 86/459, loss: 0.026200320571660995 2023-01-22 18:12:46.104179: step: 88/459, loss: 0.04922819882631302 2023-01-22 18:12:46.743743: step: 90/459, loss: 0.0042509520426392555 2023-01-22 18:12:47.343052: step: 92/459, loss: 0.017068030312657356 2023-01-22 18:12:48.033595: step: 94/459, loss: 0.00515802763402462 2023-01-22 18:12:48.666933: step: 96/459, loss: 0.02264484390616417 2023-01-22 18:12:49.292350: step: 98/459, loss: 0.03707432001829147 2023-01-22 18:12:49.897806: step: 100/459, loss: 0.4468246102333069 2023-01-22 18:12:50.559274: step: 102/459, loss: 0.014801619574427605 2023-01-22 18:12:51.178669: step: 104/459, loss: 0.007515430450439453 2023-01-22 18:12:51.817866: step: 106/459, loss: 0.04547467827796936 2023-01-22 18:12:52.466116: step: 108/459, loss: 0.004816296976059675 2023-01-22 18:12:53.088779: step: 110/459, loss: 0.1075577437877655 2023-01-22 18:12:53.727449: step: 112/459, loss: 0.0018954855622723699 2023-01-22 18:12:54.324873: step: 114/459, loss: 0.1361733078956604 2023-01-22 18:12:55.016373: step: 116/459, loss: 0.05312167480587959 2023-01-22 18:12:55.631987: step: 118/459, loss: 0.05936865881085396 2023-01-22 18:12:56.255094: step: 120/459, loss: 0.0027606592047959566 2023-01-22 18:12:56.848496: step: 122/459, loss: 0.010076228529214859 2023-01-22 18:12:57.440044: step: 124/459, loss: 0.007692383136600256 2023-01-22 18:12:58.000879: step: 126/459, loss: 0.052283357828855515 2023-01-22 18:12:58.514325: step: 128/459, loss: 0.00015641011123079807 2023-01-22 18:12:59.152079: step: 130/459, loss: 0.00931833777576685 2023-01-22 18:12:59.761600: step: 132/459, loss: 0.00704349996522069 2023-01-22 18:13:00.358756: step: 134/459, loss: 0.007058671675622463 2023-01-22 18:13:00.951636: step: 136/459, loss: 0.007059279829263687 2023-01-22 18:13:01.545414: step: 138/459, loss: 0.0010138358920812607 2023-01-22 18:13:02.218029: step: 140/459, loss: 0.03418111428618431 2023-01-22 18:13:02.830340: step: 142/459, loss: 0.053715791553258896 2023-01-22 18:13:03.480963: step: 144/459, loss: 0.004698708653450012 2023-01-22 18:13:04.162273: step: 146/459, loss: 0.0003445943584665656 2023-01-22 18:13:04.850553: step: 148/459, loss: 0.005682784132659435 2023-01-22 18:13:05.488916: step: 150/459, loss: 0.03534476459026337 2023-01-22 18:13:06.151382: step: 152/459, loss: 0.004213517066091299 2023-01-22 18:13:06.713920: step: 154/459, loss: 0.009189153090119362 2023-01-22 18:13:07.304665: step: 156/459, loss: 0.0013710312778130174 2023-01-22 18:13:07.876010: step: 158/459, loss: 0.002123909769579768 2023-01-22 18:13:08.531538: step: 160/459, loss: 0.021460039541125298 2023-01-22 18:13:09.125237: step: 162/459, loss: 0.00427776575088501 2023-01-22 18:13:09.738721: step: 164/459, loss: 0.0029900067020207644 2023-01-22 18:13:10.340215: step: 166/459, loss: 0.02270074188709259 2023-01-22 18:13:11.061589: step: 168/459, loss: 0.00730576366186142 2023-01-22 18:13:11.671615: step: 170/459, loss: 0.0125905005261302 2023-01-22 18:13:12.235192: step: 172/459, loss: 0.06900046765804291 2023-01-22 18:13:12.861452: step: 174/459, loss: 0.0007279383135028183 2023-01-22 18:13:13.472478: step: 176/459, loss: 0.02904847450554371 2023-01-22 18:13:14.154792: step: 178/459, loss: 0.25870227813720703 2023-01-22 18:13:14.784648: step: 180/459, loss: 3.2752279366832227e-05 2023-01-22 18:13:15.412767: step: 182/459, loss: 0.02823549136519432 2023-01-22 18:13:16.039633: step: 184/459, loss: 0.008624699898064137 2023-01-22 18:13:16.659750: step: 186/459, loss: 0.010006478056311607 2023-01-22 18:13:17.288483: step: 188/459, loss: 0.04187233746051788 2023-01-22 18:13:17.921880: step: 190/459, loss: 0.21141819655895233 2023-01-22 18:13:18.569181: step: 192/459, loss: 0.008890276774764061 2023-01-22 18:13:19.162692: step: 194/459, loss: 0.006377889774739742 2023-01-22 18:13:19.748248: step: 196/459, loss: 0.04014892503619194 2023-01-22 18:13:20.369767: step: 198/459, loss: 0.004494927357882261 2023-01-22 18:13:20.972961: step: 200/459, loss: 0.005381635390222073 2023-01-22 18:13:21.591346: step: 202/459, loss: 0.08234607428312302 2023-01-22 18:13:22.187875: step: 204/459, loss: 0.0021013955119997263 2023-01-22 18:13:22.829166: step: 206/459, loss: 0.016282346099615097 2023-01-22 18:13:23.461848: step: 208/459, loss: 0.08328348398208618 2023-01-22 18:13:24.074246: step: 210/459, loss: 0.07649140805006027 2023-01-22 18:13:24.707366: step: 212/459, loss: 0.02337401732802391 2023-01-22 18:13:25.253802: step: 214/459, loss: 0.024936256930232048 2023-01-22 18:13:25.864956: step: 216/459, loss: 0.036827921867370605 2023-01-22 18:13:26.479683: step: 218/459, loss: 0.005895356182008982 2023-01-22 18:13:27.115403: step: 220/459, loss: 0.002954827854409814 2023-01-22 18:13:27.765409: step: 222/459, loss: 0.004738161340355873 2023-01-22 18:13:28.405563: step: 224/459, loss: 0.008844240568578243 2023-01-22 18:13:29.041831: step: 226/459, loss: 0.02155870944261551 2023-01-22 18:13:29.633270: step: 228/459, loss: 0.0019574558828026056 2023-01-22 18:13:30.219685: step: 230/459, loss: 0.009430624544620514 2023-01-22 18:13:30.835940: step: 232/459, loss: 0.006399424280971289 2023-01-22 18:13:31.536712: step: 234/459, loss: 0.022548306733369827 2023-01-22 18:13:32.127781: step: 236/459, loss: 0.01471507828682661 2023-01-22 18:13:32.818244: step: 238/459, loss: 0.049957823008298874 2023-01-22 18:13:33.414446: step: 240/459, loss: 0.024766651913523674 2023-01-22 18:13:34.038786: step: 242/459, loss: 0.006605522241443396 2023-01-22 18:13:34.664367: step: 244/459, loss: 0.012327753938734531 2023-01-22 18:13:35.309436: step: 246/459, loss: 0.001009032828733325 2023-01-22 18:13:35.872745: step: 248/459, loss: 0.030127376317977905 2023-01-22 18:13:36.482723: step: 250/459, loss: 0.034893251955509186 2023-01-22 18:13:37.127340: step: 252/459, loss: 0.023795755580067635 2023-01-22 18:13:37.677977: step: 254/459, loss: 6.234262400539592e-05 2023-01-22 18:13:38.392448: step: 256/459, loss: 0.008827339857816696 2023-01-22 18:13:38.993142: step: 258/459, loss: 0.060589514672756195 2023-01-22 18:13:39.521054: step: 260/459, loss: 0.010875198058784008 2023-01-22 18:13:40.156044: step: 262/459, loss: 0.0021311810705810785 2023-01-22 18:13:40.780604: step: 264/459, loss: 0.005094106309115887 2023-01-22 18:13:41.471721: step: 266/459, loss: 0.24459616839885712 2023-01-22 18:13:42.193692: step: 268/459, loss: 0.006128250155597925 2023-01-22 18:13:42.852599: step: 270/459, loss: 0.004805877339094877 2023-01-22 18:13:43.466368: step: 272/459, loss: 0.16826167702674866 2023-01-22 18:13:44.171897: step: 274/459, loss: 0.007595236413180828 2023-01-22 18:13:44.821078: step: 276/459, loss: 0.0014603956369683146 2023-01-22 18:13:45.400032: step: 278/459, loss: 0.0721047967672348 2023-01-22 18:13:46.020830: step: 280/459, loss: 0.001262181089259684 2023-01-22 18:13:46.648392: step: 282/459, loss: 0.010484393686056137 2023-01-22 18:13:47.244419: step: 284/459, loss: 0.0029746994841843843 2023-01-22 18:13:47.899194: step: 286/459, loss: 0.034860435873270035 2023-01-22 18:13:48.425338: step: 288/459, loss: 0.0014094373909756541 2023-01-22 18:13:49.014912: step: 290/459, loss: 0.0158942062407732 2023-01-22 18:13:49.701969: step: 292/459, loss: 0.2065984606742859 2023-01-22 18:13:50.354181: step: 294/459, loss: 0.022232791408896446 2023-01-22 18:13:50.953024: step: 296/459, loss: 0.008537765592336655 2023-01-22 18:13:51.653636: step: 298/459, loss: 0.04576633498072624 2023-01-22 18:13:52.207535: step: 300/459, loss: 0.03564140200614929 2023-01-22 18:13:52.824878: step: 302/459, loss: 0.006533507723361254 2023-01-22 18:13:53.554925: step: 304/459, loss: 0.006097136531025171 2023-01-22 18:13:54.136658: step: 306/459, loss: 0.019780926406383514 2023-01-22 18:13:54.873308: step: 308/459, loss: 0.0118198087438941 2023-01-22 18:13:55.519250: step: 310/459, loss: 0.002812266116961837 2023-01-22 18:13:56.137248: step: 312/459, loss: 0.013657514937222004 2023-01-22 18:13:56.685012: step: 314/459, loss: 0.003576167393475771 2023-01-22 18:13:57.312679: step: 316/459, loss: 0.061334915459156036 2023-01-22 18:13:57.939148: step: 318/459, loss: 0.0310454610735178 2023-01-22 18:13:58.599318: step: 320/459, loss: 0.0012048432836309075 2023-01-22 18:13:59.246000: step: 322/459, loss: 0.004642959218472242 2023-01-22 18:13:59.959160: step: 324/459, loss: 0.017364470288157463 2023-01-22 18:14:00.588750: step: 326/459, loss: 0.001046669203788042 2023-01-22 18:14:01.142337: step: 328/459, loss: 0.010442081838846207 2023-01-22 18:14:01.747653: step: 330/459, loss: 0.056794118136167526 2023-01-22 18:14:02.355592: step: 332/459, loss: 0.020593922585248947 2023-01-22 18:14:02.947501: step: 334/459, loss: 0.002645647618919611 2023-01-22 18:14:03.567878: step: 336/459, loss: 0.13286368548870087 2023-01-22 18:14:04.220737: step: 338/459, loss: 0.027546895667910576 2023-01-22 18:14:04.777893: step: 340/459, loss: 0.023797333240509033 2023-01-22 18:14:05.344096: step: 342/459, loss: 0.0025489884428679943 2023-01-22 18:14:05.924799: step: 344/459, loss: 0.05551740527153015 2023-01-22 18:14:06.528389: step: 346/459, loss: 0.0117504196241498 2023-01-22 18:14:07.144365: step: 348/459, loss: 0.006345089990645647 2023-01-22 18:14:07.862694: step: 350/459, loss: 5.9409831010270864e-05 2023-01-22 18:14:08.476246: step: 352/459, loss: 0.27162015438079834 2023-01-22 18:14:09.118969: step: 354/459, loss: 0.03499038517475128 2023-01-22 18:14:09.700589: step: 356/459, loss: 0.011422967538237572 2023-01-22 18:14:10.255616: step: 358/459, loss: 0.0038863997906446457 2023-01-22 18:14:10.848610: step: 360/459, loss: 0.0054791877046227455 2023-01-22 18:14:11.410725: step: 362/459, loss: 0.0012025826144963503 2023-01-22 18:14:12.018899: step: 364/459, loss: 0.004369880072772503 2023-01-22 18:14:12.595309: step: 366/459, loss: 0.03248889744281769 2023-01-22 18:14:13.167652: step: 368/459, loss: 0.012059167958796024 2023-01-22 18:14:13.793004: step: 370/459, loss: 0.008152713999152184 2023-01-22 18:14:14.375466: step: 372/459, loss: 0.028989044949412346 2023-01-22 18:14:15.025284: step: 374/459, loss: 0.09606532007455826 2023-01-22 18:14:15.622249: step: 376/459, loss: 0.001193084754049778 2023-01-22 18:14:16.290246: step: 378/459, loss: 0.01814092881977558 2023-01-22 18:14:16.951078: step: 380/459, loss: 0.02659834548830986 2023-01-22 18:14:17.550002: step: 382/459, loss: 0.003008452709764242 2023-01-22 18:14:18.148252: step: 384/459, loss: 2.980283898068592e-05 2023-01-22 18:14:18.783018: step: 386/459, loss: 0.008354700170457363 2023-01-22 18:14:19.370797: step: 388/459, loss: 0.27589550614356995 2023-01-22 18:14:19.990135: step: 390/459, loss: 0.0004837003943976015 2023-01-22 18:14:20.606197: step: 392/459, loss: 0.00458872364833951 2023-01-22 18:14:21.185174: step: 394/459, loss: 0.5462557673454285 2023-01-22 18:14:21.767474: step: 396/459, loss: 0.02226927876472473 2023-01-22 18:14:22.361865: step: 398/459, loss: 0.06008315086364746 2023-01-22 18:14:22.978478: step: 400/459, loss: 0.0058961547911167145 2023-01-22 18:14:23.566145: step: 402/459, loss: 0.001811720198020339 2023-01-22 18:14:24.168823: step: 404/459, loss: 0.007398507092148066 2023-01-22 18:14:24.739047: step: 406/459, loss: 0.002802419476211071 2023-01-22 18:14:25.317187: step: 408/459, loss: 0.0005716014420613647 2023-01-22 18:14:25.893367: step: 410/459, loss: 0.06107381358742714 2023-01-22 18:14:26.517759: step: 412/459, loss: 0.018064789474010468 2023-01-22 18:14:27.139415: step: 414/459, loss: 0.007164722308516502 2023-01-22 18:14:27.770871: step: 416/459, loss: 0.005652378313243389 2023-01-22 18:14:28.391748: step: 418/459, loss: 0.00160032301209867 2023-01-22 18:14:29.021444: step: 420/459, loss: 0.0032218610867857933 2023-01-22 18:14:29.664978: step: 422/459, loss: 0.07476861774921417 2023-01-22 18:14:30.306664: step: 424/459, loss: 0.015681447461247444 2023-01-22 18:14:30.935135: step: 426/459, loss: 0.0208895280957222 2023-01-22 18:14:31.548986: step: 428/459, loss: 0.0027082334272563457 2023-01-22 18:14:32.137771: step: 430/459, loss: 0.08343174308538437 2023-01-22 18:14:32.742486: step: 432/459, loss: 0.004368281923234463 2023-01-22 18:14:33.401263: step: 434/459, loss: 0.02712017484009266 2023-01-22 18:14:34.068761: step: 436/459, loss: 0.026987841352820396 2023-01-22 18:14:34.621460: step: 438/459, loss: 0.057286906987428665 2023-01-22 18:14:35.280813: step: 440/459, loss: 0.004299965687096119 2023-01-22 18:14:35.887452: step: 442/459, loss: 0.01765941083431244 2023-01-22 18:14:36.522378: step: 444/459, loss: 0.01643248274922371 2023-01-22 18:14:37.161471: step: 446/459, loss: 0.003282553283497691 2023-01-22 18:14:37.785367: step: 448/459, loss: 0.06360652297735214 2023-01-22 18:14:38.406642: step: 450/459, loss: 0.019707540050148964 2023-01-22 18:14:39.035071: step: 452/459, loss: 0.02786531299352646 2023-01-22 18:14:39.685587: step: 454/459, loss: 0.12280585616827011 2023-01-22 18:14:40.326520: step: 456/459, loss: 0.3874233365058899 2023-01-22 18:14:40.959753: step: 458/459, loss: 0.022738568484783173 2023-01-22 18:14:41.529906: step: 460/459, loss: 0.07797639816999435 2023-01-22 18:14:42.098204: step: 462/459, loss: 0.0016130417352542281 2023-01-22 18:14:42.677852: step: 464/459, loss: 0.007965948432683945 2023-01-22 18:14:43.302713: step: 466/459, loss: 0.001506027183495462 2023-01-22 18:14:43.910401: step: 468/459, loss: 0.17383447289466858 2023-01-22 18:14:44.537751: step: 470/459, loss: 0.006718483753502369 2023-01-22 18:14:45.151066: step: 472/459, loss: 0.010835807770490646 2023-01-22 18:14:45.755283: step: 474/459, loss: 0.017368000000715256 2023-01-22 18:14:46.365314: step: 476/459, loss: 0.009141786023974419 2023-01-22 18:14:46.990421: step: 478/459, loss: 0.012754354625940323 2023-01-22 18:14:47.599221: step: 480/459, loss: 0.005228786263614893 2023-01-22 18:14:48.188592: step: 482/459, loss: 0.001972962636500597 2023-01-22 18:14:48.748289: step: 484/459, loss: 0.0041792020201683044 2023-01-22 18:14:49.414590: step: 486/459, loss: 0.004355050157755613 2023-01-22 18:14:50.012667: step: 488/459, loss: 0.00129012914840132 2023-01-22 18:14:50.620725: step: 490/459, loss: 0.005823321640491486 2023-01-22 18:14:51.237462: step: 492/459, loss: 0.003394076833501458 2023-01-22 18:14:51.832839: step: 494/459, loss: 0.231731116771698 2023-01-22 18:14:52.420228: step: 496/459, loss: 0.0014036425855010748 2023-01-22 18:14:52.984865: step: 498/459, loss: 0.28712403774261475 2023-01-22 18:14:53.665996: step: 500/459, loss: 0.002315365709364414 2023-01-22 18:14:54.291606: step: 502/459, loss: 0.07200886309146881 2023-01-22 18:14:54.925862: step: 504/459, loss: 0.0035803895443677902 2023-01-22 18:14:55.544769: step: 506/459, loss: 0.028188426047563553 2023-01-22 18:14:56.175200: step: 508/459, loss: 0.013239997439086437 2023-01-22 18:14:56.960743: step: 510/459, loss: 0.012026202864944935 2023-01-22 18:14:57.595534: step: 512/459, loss: 0.026362180709838867 2023-01-22 18:14:58.215072: step: 514/459, loss: 0.01620306819677353 2023-01-22 18:14:58.779335: step: 516/459, loss: 0.005939077585935593 2023-01-22 18:14:59.395891: step: 518/459, loss: 0.09866874665021896 2023-01-22 18:14:59.952651: step: 520/459, loss: 0.0019494229927659035 2023-01-22 18:15:00.587738: step: 522/459, loss: 0.038452837616205215 2023-01-22 18:15:01.255238: step: 524/459, loss: 0.005672241561114788 2023-01-22 18:15:01.902388: step: 526/459, loss: 0.12514755129814148 2023-01-22 18:15:02.424062: step: 528/459, loss: 0.03848697245121002 2023-01-22 18:15:03.006262: step: 530/459, loss: 0.00034174908068962395 2023-01-22 18:15:03.591812: step: 532/459, loss: 0.011865893378853798 2023-01-22 18:15:04.216644: step: 534/459, loss: 0.03447820246219635 2023-01-22 18:15:04.850268: step: 536/459, loss: 0.02096041850745678 2023-01-22 18:15:05.480119: step: 538/459, loss: 0.07127783447504044 2023-01-22 18:15:06.171166: step: 540/459, loss: 0.0324137881398201 2023-01-22 18:15:06.825085: step: 542/459, loss: 0.0003777140227612108 2023-01-22 18:15:07.422271: step: 544/459, loss: 0.00011603112216107547 2023-01-22 18:15:08.035913: step: 546/459, loss: 0.026963884010910988 2023-01-22 18:15:08.653677: step: 548/459, loss: 0.0038743962068110704 2023-01-22 18:15:09.274887: step: 550/459, loss: 0.0010451750131323934 2023-01-22 18:15:09.880712: step: 552/459, loss: 0.003899593139067292 2023-01-22 18:15:10.478559: step: 554/459, loss: 0.015741486102342606 2023-01-22 18:15:11.133955: step: 556/459, loss: 0.004616181366145611 2023-01-22 18:15:11.720011: step: 558/459, loss: 0.5178422331809998 2023-01-22 18:15:12.337140: step: 560/459, loss: 0.05920511856675148 2023-01-22 18:15:12.975503: step: 562/459, loss: 0.000795458909124136 2023-01-22 18:15:13.627741: step: 564/459, loss: 0.012743337079882622 2023-01-22 18:15:14.223280: step: 566/459, loss: 0.100367471575737 2023-01-22 18:15:14.834490: step: 568/459, loss: 0.11932706087827682 2023-01-22 18:15:15.482678: step: 570/459, loss: 0.046638473868370056 2023-01-22 18:15:16.067482: step: 572/459, loss: 0.02004067599773407 2023-01-22 18:15:16.746497: step: 574/459, loss: 0.011717035435140133 2023-01-22 18:15:17.338477: step: 576/459, loss: 0.01016183104366064 2023-01-22 18:15:17.933647: step: 578/459, loss: 0.00374898174777627 2023-01-22 18:15:18.564581: step: 580/459, loss: 0.023432429879903793 2023-01-22 18:15:19.172277: step: 582/459, loss: 0.025487426668405533 2023-01-22 18:15:19.775784: step: 584/459, loss: 0.2897360920906067 2023-01-22 18:15:20.376746: step: 586/459, loss: 0.015121260657906532 2023-01-22 18:15:21.002850: step: 588/459, loss: 0.046651072800159454 2023-01-22 18:15:21.559703: step: 590/459, loss: 0.011823986656963825 2023-01-22 18:15:22.156055: step: 592/459, loss: 0.008459745906293392 2023-01-22 18:15:22.764832: step: 594/459, loss: 0.017614953219890594 2023-01-22 18:15:23.372457: step: 596/459, loss: 0.015314060263335705 2023-01-22 18:15:24.056385: step: 598/459, loss: 0.0457351915538311 2023-01-22 18:15:24.677911: step: 600/459, loss: 0.009263108484447002 2023-01-22 18:15:25.374432: step: 602/459, loss: 0.012620950117707253 2023-01-22 18:15:25.985355: step: 604/459, loss: 0.0009465546463616192 2023-01-22 18:15:26.634557: step: 606/459, loss: 0.012105172500014305 2023-01-22 18:15:27.267213: step: 608/459, loss: 0.04033985361456871 2023-01-22 18:15:27.881537: step: 610/459, loss: 0.06650696694850922 2023-01-22 18:15:28.504929: step: 612/459, loss: 0.013293324038386345 2023-01-22 18:15:29.120510: step: 614/459, loss: 0.08334282040596008 2023-01-22 18:15:29.721427: step: 616/459, loss: 0.04427178576588631 2023-01-22 18:15:30.327368: step: 618/459, loss: 0.004512656480073929 2023-01-22 18:15:30.971013: step: 620/459, loss: 0.037743400782346725 2023-01-22 18:15:31.523573: step: 622/459, loss: 0.0018185355002060533 2023-01-22 18:15:32.131972: step: 624/459, loss: 0.15983830392360687 2023-01-22 18:15:32.714636: step: 626/459, loss: 0.4444078803062439 2023-01-22 18:15:33.294070: step: 628/459, loss: 0.07803860306739807 2023-01-22 18:15:33.930828: step: 630/459, loss: 0.0317688025534153 2023-01-22 18:15:34.540485: step: 632/459, loss: 0.04308919981122017 2023-01-22 18:15:35.209835: step: 634/459, loss: 0.10719258338212967 2023-01-22 18:15:35.806035: step: 636/459, loss: 0.0274638868868351 2023-01-22 18:15:36.445457: step: 638/459, loss: 0.01928916946053505 2023-01-22 18:15:37.083408: step: 640/459, loss: 0.0043794079683721066 2023-01-22 18:15:37.634204: step: 642/459, loss: 0.004063847474753857 2023-01-22 18:15:38.199386: step: 644/459, loss: 0.012487547472119331 2023-01-22 18:15:38.859450: step: 646/459, loss: 0.033227063715457916 2023-01-22 18:15:39.497815: step: 648/459, loss: 0.024627966806292534 2023-01-22 18:15:40.081493: step: 650/459, loss: 0.002128639491274953 2023-01-22 18:15:40.699696: step: 652/459, loss: 0.14982374012470245 2023-01-22 18:15:41.370549: step: 654/459, loss: 0.0015597004676237702 2023-01-22 18:15:41.916804: step: 656/459, loss: 0.0017310424009338021 2023-01-22 18:15:42.515231: step: 658/459, loss: 0.01592702977359295 2023-01-22 18:15:43.105427: step: 660/459, loss: 0.030806556344032288 2023-01-22 18:15:43.736828: step: 662/459, loss: 0.037882205098867416 2023-01-22 18:15:44.378670: step: 664/459, loss: 0.20892642438411713 2023-01-22 18:15:44.984922: step: 666/459, loss: 1.1922272443771362 2023-01-22 18:15:45.622614: step: 668/459, loss: 0.0016457069432362914 2023-01-22 18:15:46.225372: step: 670/459, loss: 0.025892693549394608 2023-01-22 18:15:46.816810: step: 672/459, loss: 0.008962000720202923 2023-01-22 18:15:47.481287: step: 674/459, loss: 0.014451262541115284 2023-01-22 18:15:48.152721: step: 676/459, loss: 0.020385242998600006 2023-01-22 18:15:48.719670: step: 678/459, loss: 0.016033222898840904 2023-01-22 18:15:49.292802: step: 680/459, loss: 0.056574802845716476 2023-01-22 18:15:49.853916: step: 682/459, loss: 0.04148086905479431 2023-01-22 18:15:50.473275: step: 684/459, loss: 0.0064240493811666965 2023-01-22 18:15:51.112156: step: 686/459, loss: 0.008608432486653328 2023-01-22 18:15:51.776911: step: 688/459, loss: 0.03458913415670395 2023-01-22 18:15:52.384800: step: 690/459, loss: 0.3127951920032501 2023-01-22 18:15:52.983831: step: 692/459, loss: 0.04966873675584793 2023-01-22 18:15:53.554392: step: 694/459, loss: 0.007317657582461834 2023-01-22 18:15:54.265409: step: 696/459, loss: 0.011755547486245632 2023-01-22 18:15:54.885105: step: 698/459, loss: 0.060494475066661835 2023-01-22 18:15:55.431678: step: 700/459, loss: 0.018820500001311302 2023-01-22 18:15:56.085003: step: 702/459, loss: 0.007354554254561663 2023-01-22 18:15:56.722191: step: 704/459, loss: 0.027396293357014656 2023-01-22 18:15:57.394146: step: 706/459, loss: 0.0003882733581122011 2023-01-22 18:15:58.034705: step: 708/459, loss: 0.007906162180006504 2023-01-22 18:15:58.670172: step: 710/459, loss: 0.03532816469669342 2023-01-22 18:15:59.291724: step: 712/459, loss: 0.010213240049779415 2023-01-22 18:15:59.896186: step: 714/459, loss: 0.0003450537333264947 2023-01-22 18:16:00.522871: step: 716/459, loss: 0.03114059753715992 2023-01-22 18:16:01.095025: step: 718/459, loss: 0.02031620591878891 2023-01-22 18:16:01.664048: step: 720/459, loss: 0.42555204033851624 2023-01-22 18:16:02.345082: step: 722/459, loss: 0.01795092597603798 2023-01-22 18:16:02.935244: step: 724/459, loss: 0.040072593837976456 2023-01-22 18:16:03.520659: step: 726/459, loss: 0.0037519000470638275 2023-01-22 18:16:04.130747: step: 728/459, loss: 0.01671850122511387 2023-01-22 18:16:04.822021: step: 730/459, loss: 0.00308238179422915 2023-01-22 18:16:05.540579: step: 732/459, loss: 0.015787826851010323 2023-01-22 18:16:06.153387: step: 734/459, loss: 0.004700925201177597 2023-01-22 18:16:06.824002: step: 736/459, loss: 0.041897304356098175 2023-01-22 18:16:07.445992: step: 738/459, loss: 0.015907669439911842 2023-01-22 18:16:08.055693: step: 740/459, loss: 0.002028548624366522 2023-01-22 18:16:08.787380: step: 742/459, loss: 0.021283965557813644 2023-01-22 18:16:09.384293: step: 744/459, loss: 0.019219109788537025 2023-01-22 18:16:10.076878: step: 746/459, loss: 0.0006245728582143784 2023-01-22 18:16:10.724672: step: 748/459, loss: 0.046945903450250626 2023-01-22 18:16:11.327737: step: 750/459, loss: 0.5810630917549133 2023-01-22 18:16:11.949452: step: 752/459, loss: 0.14268161356449127 2023-01-22 18:16:12.542126: step: 754/459, loss: 0.017904194071888924 2023-01-22 18:16:13.175453: step: 756/459, loss: 0.016772184520959854 2023-01-22 18:16:13.901408: step: 758/459, loss: 0.022129010409116745 2023-01-22 18:16:14.493368: step: 760/459, loss: 0.07722749561071396 2023-01-22 18:16:15.113287: step: 762/459, loss: 0.03634714335203171 2023-01-22 18:16:15.751088: step: 764/459, loss: 0.18332284688949585 2023-01-22 18:16:16.456000: step: 766/459, loss: 0.06990350782871246 2023-01-22 18:16:17.074963: step: 768/459, loss: 0.03037531115114689 2023-01-22 18:16:17.698391: step: 770/459, loss: 0.04114643856883049 2023-01-22 18:16:18.332827: step: 772/459, loss: 0.030788758769631386 2023-01-22 18:16:19.007648: step: 774/459, loss: 0.07193885743618011 2023-01-22 18:16:19.671128: step: 776/459, loss: 0.025720510631799698 2023-01-22 18:16:20.312993: step: 778/459, loss: 0.0005244708736427128 2023-01-22 18:16:20.939075: step: 780/459, loss: 0.0011143978917971253 2023-01-22 18:16:21.528715: step: 782/459, loss: 0.0287458635866642 2023-01-22 18:16:22.129931: step: 784/459, loss: 0.0004842874768655747 2023-01-22 18:16:22.726453: step: 786/459, loss: 0.01568751037120819 2023-01-22 18:16:23.337704: step: 788/459, loss: 0.15407401323318481 2023-01-22 18:16:23.998233: step: 790/459, loss: 0.04794188216328621 2023-01-22 18:16:24.607382: step: 792/459, loss: 0.04680788516998291 2023-01-22 18:16:25.243126: step: 794/459, loss: 0.004941402934491634 2023-01-22 18:16:25.830029: step: 796/459, loss: 0.0009769470198079944 2023-01-22 18:16:26.411867: step: 798/459, loss: 0.0025395245756953955 2023-01-22 18:16:27.019015: step: 800/459, loss: 0.01459923479706049 2023-01-22 18:16:27.677553: step: 802/459, loss: 0.0019171856110915542 2023-01-22 18:16:28.333664: step: 804/459, loss: 0.02217291109263897 2023-01-22 18:16:29.023885: step: 806/459, loss: 0.00444024009630084 2023-01-22 18:16:29.616516: step: 808/459, loss: 0.025908542796969414 2023-01-22 18:16:30.201024: step: 810/459, loss: 0.8343669772148132 2023-01-22 18:16:30.835794: step: 812/459, loss: 0.04658350348472595 2023-01-22 18:16:31.420525: step: 814/459, loss: 0.02579822950065136 2023-01-22 18:16:32.020742: step: 816/459, loss: 0.18973703682422638 2023-01-22 18:16:32.608781: step: 818/459, loss: 0.009442084468901157 2023-01-22 18:16:33.215743: step: 820/459, loss: 0.008366435766220093 2023-01-22 18:16:33.776872: step: 822/459, loss: 0.016651233658194542 2023-01-22 18:16:34.375779: step: 824/459, loss: 0.035748191177845 2023-01-22 18:16:35.001984: step: 826/459, loss: 0.0047773378901183605 2023-01-22 18:16:35.624159: step: 828/459, loss: 0.02252700924873352 2023-01-22 18:16:36.218751: step: 830/459, loss: 0.03351079300045967 2023-01-22 18:16:36.774651: step: 832/459, loss: 0.04222866892814636 2023-01-22 18:16:37.372653: step: 834/459, loss: 0.004660348873585463 2023-01-22 18:16:38.003840: step: 836/459, loss: 0.0074849664233624935 2023-01-22 18:16:38.577509: step: 838/459, loss: 0.03715207055211067 2023-01-22 18:16:39.216210: step: 840/459, loss: 0.006808743346482515 2023-01-22 18:16:39.819579: step: 842/459, loss: 0.04018469899892807 2023-01-22 18:16:40.426435: step: 844/459, loss: 0.07408949732780457 2023-01-22 18:16:40.964883: step: 846/459, loss: 0.03692207857966423 2023-01-22 18:16:41.589594: step: 848/459, loss: 0.014838112518191338 2023-01-22 18:16:42.139918: step: 850/459, loss: 0.06874728947877884 2023-01-22 18:16:42.767933: step: 852/459, loss: 0.015783820301294327 2023-01-22 18:16:43.393287: step: 854/459, loss: 0.027096984907984734 2023-01-22 18:16:43.978774: step: 856/459, loss: 0.01010589487850666 2023-01-22 18:16:44.638249: step: 858/459, loss: 0.006819889415055513 2023-01-22 18:16:45.327085: step: 860/459, loss: 0.02246353216469288 2023-01-22 18:16:46.052996: step: 862/459, loss: 0.10209887474775314 2023-01-22 18:16:46.639475: step: 864/459, loss: 0.016753992065787315 2023-01-22 18:16:47.356611: step: 866/459, loss: 0.004498966038227081 2023-01-22 18:16:47.975975: step: 868/459, loss: 0.057907458394765854 2023-01-22 18:16:48.607465: step: 870/459, loss: 0.0037470804527401924 2023-01-22 18:16:49.175286: step: 872/459, loss: 0.07419589906930923 2023-01-22 18:16:49.788208: step: 874/459, loss: 0.07712563127279282 2023-01-22 18:16:50.434251: step: 876/459, loss: 0.0005265516228973866 2023-01-22 18:16:51.056272: step: 878/459, loss: 0.010005437768995762 2023-01-22 18:16:51.644101: step: 880/459, loss: 0.03454165160655975 2023-01-22 18:16:52.258959: step: 882/459, loss: 0.021075302734971046 2023-01-22 18:16:52.862079: step: 884/459, loss: 0.01898873969912529 2023-01-22 18:16:53.538267: step: 886/459, loss: 0.007844381965696812 2023-01-22 18:16:54.128973: step: 888/459, loss: 0.010770742781460285 2023-01-22 18:16:54.732190: step: 890/459, loss: 0.007521795574575663 2023-01-22 18:16:55.280470: step: 892/459, loss: 0.009704595431685448 2023-01-22 18:16:55.893777: step: 894/459, loss: 0.04240182414650917 2023-01-22 18:16:56.473965: step: 896/459, loss: 0.2500852942466736 2023-01-22 18:16:57.101718: step: 898/459, loss: 0.0053294990211725235 2023-01-22 18:16:57.775690: step: 900/459, loss: 0.06181704252958298 2023-01-22 18:16:58.356663: step: 902/459, loss: 0.017620636150240898 2023-01-22 18:16:58.941917: step: 904/459, loss: 0.014870326034724712 2023-01-22 18:16:59.544710: step: 906/459, loss: 0.00045166804920881987 2023-01-22 18:17:00.261661: step: 908/459, loss: 0.056653279811143875 2023-01-22 18:17:00.938554: step: 910/459, loss: 0.24091258645057678 2023-01-22 18:17:01.462428: step: 912/459, loss: 0.004873727913945913 2023-01-22 18:17:02.063404: step: 914/459, loss: 0.00332991243340075 2023-01-22 18:17:02.672071: step: 916/459, loss: 0.01792753115296364 2023-01-22 18:17:03.325653: step: 918/459, loss: 0.004011570941656828 2023-01-22 18:17:03.773595: step: 920/459, loss: 0.0 ================================================== Loss: 0.044 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3008447346600332, 'r': 0.34423031309297913, 'f1': 0.32107853982300893}, 'combined': 0.23658418723800656, 'epoch': 30} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33171634091408964, 'r': 0.315446874602227, 'f1': 0.32337710329334435}, 'combined': 0.20696134610774036, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29496869571511286, 'r': 0.35149969811971704, 'f1': 0.3207624950806769}, 'combined': 0.23635131216470928, 'epoch': 30} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33681465439842134, 'r': 0.3248621780366939, 'f1': 0.33073046193823685}, 'combined': 0.21166749564047155, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31556579679175695, 'r': 0.3473020154444384, 'f1': 0.33067418634005247}, 'combined': 0.24365466361898602, 'epoch': 30} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3410581922280737, 'r': 0.32861081294967687, 'f1': 0.33471882062532143}, 'combined': 0.23998707893890972, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20987654320987653, 'r': 0.32380952380952377, 'f1': 0.25468164794007486}, 'combined': 0.16978776529338324, 'epoch': 30} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.43478260869565216, 'f1': 0.3174603174603175}, 'combined': 0.15873015873015875, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.20689655172413793, 'f1': 0.2553191489361702}, 'combined': 0.17021276595744678, 'epoch': 30} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:19:40.853394: step: 2/459, loss: 0.028801115229725838 2023-01-22 18:19:41.495945: step: 4/459, loss: 1.9869321584701538 2023-01-22 18:19:42.082573: step: 6/459, loss: 0.0038405261002480984 2023-01-22 18:19:42.659896: step: 8/459, loss: 0.0011563576990738511 2023-01-22 18:19:43.339447: step: 10/459, loss: 0.007229732349514961 2023-01-22 18:19:43.940443: step: 12/459, loss: 0.0004853109421674162 2023-01-22 18:19:44.542082: step: 14/459, loss: 0.014539486728608608 2023-01-22 18:19:45.119565: step: 16/459, loss: 0.004787814803421497 2023-01-22 18:19:45.765597: step: 18/459, loss: 0.013237995095551014 2023-01-22 18:19:46.399322: step: 20/459, loss: 0.006656725890934467 2023-01-22 18:19:46.995945: step: 22/459, loss: 0.01863374374806881 2023-01-22 18:19:47.626312: step: 24/459, loss: 0.008679576218128204 2023-01-22 18:19:48.248200: step: 26/459, loss: 0.018378883600234985 2023-01-22 18:19:48.830074: step: 28/459, loss: 0.004930577706545591 2023-01-22 18:19:49.478187: step: 30/459, loss: 0.009593397378921509 2023-01-22 18:19:50.081830: step: 32/459, loss: 0.008705666288733482 2023-01-22 18:19:50.713042: step: 34/459, loss: 0.019819818437099457 2023-01-22 18:19:51.392619: step: 36/459, loss: 0.014558698050677776 2023-01-22 18:19:51.994590: step: 38/459, loss: 0.00469703646376729 2023-01-22 18:19:52.597776: step: 40/459, loss: 0.004007464740425348 2023-01-22 18:19:53.264354: step: 42/459, loss: 0.0034701365511864424 2023-01-22 18:19:53.830281: step: 44/459, loss: 0.024423258379101753 2023-01-22 18:19:54.493073: step: 46/459, loss: 0.009039225056767464 2023-01-22 18:19:55.100383: step: 48/459, loss: 0.003576849587261677 2023-01-22 18:19:55.704055: step: 50/459, loss: 0.00022852979600429535 2023-01-22 18:19:56.274412: step: 52/459, loss: 0.0016628175508230925 2023-01-22 18:19:56.985882: step: 54/459, loss: 0.028134318068623543 2023-01-22 18:19:57.607953: step: 56/459, loss: 0.05069763958454132 2023-01-22 18:19:58.235253: step: 58/459, loss: 0.0038721999153494835 2023-01-22 18:19:58.826037: step: 60/459, loss: 0.0026344340294599533 2023-01-22 18:19:59.431608: step: 62/459, loss: 0.007862803526222706 2023-01-22 18:20:00.035437: step: 64/459, loss: 0.004361435305327177 2023-01-22 18:20:00.718584: step: 66/459, loss: 0.003259101649746299 2023-01-22 18:20:01.415157: step: 68/459, loss: 0.2378072887659073 2023-01-22 18:20:02.008839: step: 70/459, loss: 0.14349637925624847 2023-01-22 18:20:02.637671: step: 72/459, loss: 0.11613625288009644 2023-01-22 18:20:03.309743: step: 74/459, loss: 0.0021861775312572718 2023-01-22 18:20:03.886170: step: 76/459, loss: 0.008826596662402153 2023-01-22 18:20:04.526721: step: 78/459, loss: 0.2265535295009613 2023-01-22 18:20:05.081033: step: 80/459, loss: 0.0036847833544015884 2023-01-22 18:20:05.693660: step: 82/459, loss: 0.0026764869689941406 2023-01-22 18:20:06.315516: step: 84/459, loss: 0.01152922585606575 2023-01-22 18:20:06.946525: step: 86/459, loss: 0.009285515174269676 2023-01-22 18:20:07.568683: step: 88/459, loss: 0.02711169235408306 2023-01-22 18:20:08.191144: step: 90/459, loss: 0.009661984629929066 2023-01-22 18:20:08.761880: step: 92/459, loss: 0.0064386059530079365 2023-01-22 18:20:09.420316: step: 94/459, loss: 0.06507430225610733 2023-01-22 18:20:10.112167: step: 96/459, loss: 0.01926150918006897 2023-01-22 18:20:10.780729: step: 98/459, loss: 0.1080813854932785 2023-01-22 18:20:11.375332: step: 100/459, loss: 0.008427885361015797 2023-01-22 18:20:11.997236: step: 102/459, loss: 0.0018889219500124454 2023-01-22 18:20:12.625192: step: 104/459, loss: 0.02801145426928997 2023-01-22 18:20:13.266733: step: 106/459, loss: 0.12259948253631592 2023-01-22 18:20:13.910337: step: 108/459, loss: 0.06301140785217285 2023-01-22 18:20:14.451387: step: 110/459, loss: 0.0014896506909281015 2023-01-22 18:20:15.084751: step: 112/459, loss: 0.008460665121674538 2023-01-22 18:20:15.669286: step: 114/459, loss: 0.05153464525938034 2023-01-22 18:20:16.322429: step: 116/459, loss: 0.004616596270352602 2023-01-22 18:20:16.930603: step: 118/459, loss: 0.0018516016425564885 2023-01-22 18:20:17.664656: step: 120/459, loss: 0.02155347168445587 2023-01-22 18:20:18.235733: step: 122/459, loss: 0.21367613971233368 2023-01-22 18:20:18.906156: step: 124/459, loss: 0.0024433324579149485 2023-01-22 18:20:19.453330: step: 126/459, loss: 0.00012796299415640533 2023-01-22 18:20:20.036663: step: 128/459, loss: 0.021287493407726288 2023-01-22 18:20:20.613567: step: 130/459, loss: 0.016263442113995552 2023-01-22 18:20:21.144224: step: 132/459, loss: 0.0002100144192809239 2023-01-22 18:20:21.739354: step: 134/459, loss: 0.023135066032409668 2023-01-22 18:20:22.326027: step: 136/459, loss: 0.01858498714864254 2023-01-22 18:20:22.926650: step: 138/459, loss: 0.023048195987939835 2023-01-22 18:20:23.523716: step: 140/459, loss: 0.005396615248173475 2023-01-22 18:20:24.191884: step: 142/459, loss: 0.004024008754640818 2023-01-22 18:20:24.900184: step: 144/459, loss: 0.06459125876426697 2023-01-22 18:20:25.500900: step: 146/459, loss: 0.005050932057201862 2023-01-22 18:20:26.048649: step: 148/459, loss: 0.0010252164211124182 2023-01-22 18:20:26.614083: step: 150/459, loss: 0.013588575646281242 2023-01-22 18:20:27.170585: step: 152/459, loss: 0.008883114904165268 2023-01-22 18:20:27.799006: step: 154/459, loss: 0.47545281052589417 2023-01-22 18:20:28.539853: step: 156/459, loss: 0.016122642904520035 2023-01-22 18:20:29.148642: step: 158/459, loss: 0.04295096918940544 2023-01-22 18:20:29.762802: step: 160/459, loss: 0.03143119812011719 2023-01-22 18:20:30.399899: step: 162/459, loss: 0.037878137081861496 2023-01-22 18:20:31.007311: step: 164/459, loss: 0.2206847071647644 2023-01-22 18:20:31.626482: step: 166/459, loss: 0.20474611222743988 2023-01-22 18:20:32.249591: step: 168/459, loss: 0.006831422448158264 2023-01-22 18:20:32.879340: step: 170/459, loss: 0.05060090497136116 2023-01-22 18:20:33.507659: step: 172/459, loss: 0.00022488983813673258 2023-01-22 18:20:34.112472: step: 174/459, loss: 0.050941433757543564 2023-01-22 18:20:34.800910: step: 176/459, loss: 1.2552919387817383 2023-01-22 18:20:35.382880: step: 178/459, loss: 0.011815158650279045 2023-01-22 18:20:35.955583: step: 180/459, loss: 0.004012011457234621 2023-01-22 18:20:36.554897: step: 182/459, loss: 0.0009679916547611356 2023-01-22 18:20:37.129196: step: 184/459, loss: 0.0009062773315235972 2023-01-22 18:20:37.738473: step: 186/459, loss: 0.4570258557796478 2023-01-22 18:20:38.356728: step: 188/459, loss: 0.0035400851629674435 2023-01-22 18:20:38.963858: step: 190/459, loss: 0.007888859137892723 2023-01-22 18:20:39.565166: step: 192/459, loss: 0.4915750026702881 2023-01-22 18:20:40.146018: step: 194/459, loss: 0.006178688257932663 2023-01-22 18:20:40.798872: step: 196/459, loss: 0.0032837074249982834 2023-01-22 18:20:41.416437: step: 198/459, loss: 0.004211755935102701 2023-01-22 18:20:41.998510: step: 200/459, loss: 0.010616750456392765 2023-01-22 18:20:42.609805: step: 202/459, loss: 0.011500472202897072 2023-01-22 18:20:43.231358: step: 204/459, loss: 0.05179424211382866 2023-01-22 18:20:43.810955: step: 206/459, loss: 0.036714136600494385 2023-01-22 18:20:44.446822: step: 208/459, loss: 0.04471601918339729 2023-01-22 18:20:45.113976: step: 210/459, loss: 0.016160525381565094 2023-01-22 18:20:45.729697: step: 212/459, loss: 0.025760633870959282 2023-01-22 18:20:46.326421: step: 214/459, loss: 0.0008010404999367893 2023-01-22 18:20:47.045311: step: 216/459, loss: 0.01604985073208809 2023-01-22 18:20:47.607909: step: 218/459, loss: 0.03197415545582771 2023-01-22 18:20:48.181194: step: 220/459, loss: 0.1829397827386856 2023-01-22 18:20:48.842998: step: 222/459, loss: 0.09508302062749863 2023-01-22 18:20:49.571351: step: 224/459, loss: 0.024520188570022583 2023-01-22 18:20:50.269209: step: 226/459, loss: 0.007963084615767002 2023-01-22 18:20:50.886367: step: 228/459, loss: 0.002862185938283801 2023-01-22 18:20:51.453743: step: 230/459, loss: 0.028124283999204636 2023-01-22 18:20:52.095106: step: 232/459, loss: 0.04587544873356819 2023-01-22 18:20:52.717990: step: 234/459, loss: 0.004742840770632029 2023-01-22 18:20:53.342742: step: 236/459, loss: 0.006002012174576521 2023-01-22 18:20:53.936501: step: 238/459, loss: 0.00042687435052357614 2023-01-22 18:20:54.659975: step: 240/459, loss: 0.013618789613246918 2023-01-22 18:20:55.217385: step: 242/459, loss: 0.035684630274772644 2023-01-22 18:20:55.802469: step: 244/459, loss: 0.001958467299118638 2023-01-22 18:20:56.453505: step: 246/459, loss: 0.007205728907138109 2023-01-22 18:20:57.103887: step: 248/459, loss: 0.7048457264900208 2023-01-22 18:20:57.674380: step: 250/459, loss: 0.015986736863851547 2023-01-22 18:20:58.285217: step: 252/459, loss: 0.20065680146217346 2023-01-22 18:20:58.878097: step: 254/459, loss: 0.002845758106559515 2023-01-22 18:20:59.528816: step: 256/459, loss: 0.019794179126620293 2023-01-22 18:21:00.213315: step: 258/459, loss: 0.000247186137130484 2023-01-22 18:21:00.781738: step: 260/459, loss: 0.014680705964565277 2023-01-22 18:21:01.400827: step: 262/459, loss: 0.0026434529572725296 2023-01-22 18:21:02.043929: step: 264/459, loss: 0.004420781508088112 2023-01-22 18:21:02.651701: step: 266/459, loss: 0.002013922668993473 2023-01-22 18:21:03.231343: step: 268/459, loss: 0.010666904039680958 2023-01-22 18:21:03.870848: step: 270/459, loss: 0.008632993325591087 2023-01-22 18:21:04.491264: step: 272/459, loss: 0.009610866196453571 2023-01-22 18:21:05.168083: step: 274/459, loss: 0.07280057668685913 2023-01-22 18:21:05.769087: step: 276/459, loss: 0.01577652245759964 2023-01-22 18:21:06.325624: step: 278/459, loss: 0.05274168401956558 2023-01-22 18:21:06.922455: step: 280/459, loss: 0.002328799106180668 2023-01-22 18:21:07.522317: step: 282/459, loss: 0.00018003738659899682 2023-01-22 18:21:08.048695: step: 284/459, loss: 0.00023927344591356814 2023-01-22 18:21:08.633094: step: 286/459, loss: 0.005285760387778282 2023-01-22 18:21:09.253944: step: 288/459, loss: 0.016758278012275696 2023-01-22 18:21:09.872884: step: 290/459, loss: 0.006594162434339523 2023-01-22 18:21:10.469664: step: 292/459, loss: 0.05407937243580818 2023-01-22 18:21:11.104076: step: 294/459, loss: 0.010681274347007275 2023-01-22 18:21:11.817578: step: 296/459, loss: 0.02152884379029274 2023-01-22 18:21:12.411578: step: 298/459, loss: 0.007205895613878965 2023-01-22 18:21:12.929929: step: 300/459, loss: 0.0027748066931962967 2023-01-22 18:21:13.600196: step: 302/459, loss: 0.0025836187414824963 2023-01-22 18:21:14.233140: step: 304/459, loss: 0.03286390006542206 2023-01-22 18:21:14.810618: step: 306/459, loss: 0.05492495000362396 2023-01-22 18:21:15.438181: step: 308/459, loss: 0.0038540286477655172 2023-01-22 18:21:16.199852: step: 310/459, loss: 0.005758919287472963 2023-01-22 18:21:16.793602: step: 312/459, loss: 0.004575125407427549 2023-01-22 18:21:17.495914: step: 314/459, loss: 0.00028125313110649586 2023-01-22 18:21:18.132790: step: 316/459, loss: 0.001279538730159402 2023-01-22 18:21:18.756949: step: 318/459, loss: 0.003015312133356929 2023-01-22 18:21:19.414483: step: 320/459, loss: 0.0003998266765847802 2023-01-22 18:21:20.035294: step: 322/459, loss: 0.0010966811096295714 2023-01-22 18:21:20.647978: step: 324/459, loss: 0.0007305251201614738 2023-01-22 18:21:21.198979: step: 326/459, loss: 0.056558746844530106 2023-01-22 18:21:21.800972: step: 328/459, loss: 0.018686814233660698 2023-01-22 18:21:22.411725: step: 330/459, loss: 0.0073934439569711685 2023-01-22 18:21:23.034143: step: 332/459, loss: 0.021749068051576614 2023-01-22 18:21:23.625264: step: 334/459, loss: 0.01719621568918228 2023-01-22 18:21:24.255274: step: 336/459, loss: 0.0008193753892555833 2023-01-22 18:21:24.862447: step: 338/459, loss: 0.13445237278938293 2023-01-22 18:21:25.435190: step: 340/459, loss: 0.0283421128988266 2023-01-22 18:21:26.082520: step: 342/459, loss: 0.004419565666466951 2023-01-22 18:21:26.705491: step: 344/459, loss: 0.04900899901986122 2023-01-22 18:21:27.267510: step: 346/459, loss: 0.0034288913011550903 2023-01-22 18:21:27.880255: step: 348/459, loss: 0.07528572529554367 2023-01-22 18:21:28.492046: step: 350/459, loss: 0.001240166020579636 2023-01-22 18:21:29.127742: step: 352/459, loss: 0.02570546418428421 2023-01-22 18:21:29.772647: step: 354/459, loss: 0.007693214807659388 2023-01-22 18:21:30.335507: step: 356/459, loss: 0.04648401588201523 2023-01-22 18:21:30.983016: step: 358/459, loss: 0.017548557370901108 2023-01-22 18:21:31.579206: step: 360/459, loss: 0.06455448269844055 2023-01-22 18:21:32.219960: step: 362/459, loss: 0.007051017135381699 2023-01-22 18:21:32.801134: step: 364/459, loss: 0.007748515345156193 2023-01-22 18:21:33.474198: step: 366/459, loss: 0.0006947817164473236 2023-01-22 18:21:34.176411: step: 368/459, loss: 0.0046484703198075294 2023-01-22 18:21:34.801697: step: 370/459, loss: 0.009123256430029869 2023-01-22 18:21:35.379848: step: 372/459, loss: 0.10108533501625061 2023-01-22 18:21:35.994991: step: 374/459, loss: 0.026483597233891487 2023-01-22 18:21:36.582424: step: 376/459, loss: 0.04072842746973038 2023-01-22 18:21:37.174118: step: 378/459, loss: 0.0020977617241442204 2023-01-22 18:21:37.740326: step: 380/459, loss: 0.0009734958875924349 2023-01-22 18:21:38.383704: step: 382/459, loss: 0.009841876104474068 2023-01-22 18:21:39.017384: step: 384/459, loss: 0.005666283890604973 2023-01-22 18:21:39.677819: step: 386/459, loss: 0.08037623763084412 2023-01-22 18:21:40.349937: step: 388/459, loss: 0.005652488209307194 2023-01-22 18:21:40.955877: step: 390/459, loss: 0.02700229361653328 2023-01-22 18:21:41.585942: step: 392/459, loss: 0.003619895549491048 2023-01-22 18:21:42.166499: step: 394/459, loss: 0.019960224628448486 2023-01-22 18:21:42.821192: step: 396/459, loss: 0.005445925984531641 2023-01-22 18:21:43.447009: step: 398/459, loss: 0.01656234636902809 2023-01-22 18:21:44.008305: step: 400/459, loss: 0.1137341856956482 2023-01-22 18:21:44.594901: step: 402/459, loss: 0.03297612816095352 2023-01-22 18:21:45.174279: step: 404/459, loss: 0.0001365979405818507 2023-01-22 18:21:45.822042: step: 406/459, loss: 0.0019557252526283264 2023-01-22 18:21:46.493286: step: 408/459, loss: 0.02091270685195923 2023-01-22 18:21:47.099151: step: 410/459, loss: 0.012804399244487286 2023-01-22 18:21:47.802460: step: 412/459, loss: 0.017531603574752808 2023-01-22 18:21:48.405028: step: 414/459, loss: 0.046375833451747894 2023-01-22 18:21:48.985905: step: 416/459, loss: 0.11002025753259659 2023-01-22 18:21:49.568688: step: 418/459, loss: 0.01029216032475233 2023-01-22 18:21:50.174554: step: 420/459, loss: 0.05881878361105919 2023-01-22 18:21:50.795801: step: 422/459, loss: 0.0037131670396775007 2023-01-22 18:21:51.431676: step: 424/459, loss: 0.00167024543043226 2023-01-22 18:21:51.998935: step: 426/459, loss: 0.01469134446233511 2023-01-22 18:21:52.639905: step: 428/459, loss: 0.11478906869888306 2023-01-22 18:21:53.261753: step: 430/459, loss: 0.026034019887447357 2023-01-22 18:21:53.866905: step: 432/459, loss: 0.23109248280525208 2023-01-22 18:21:54.503952: step: 434/459, loss: 0.04256438463926315 2023-01-22 18:21:55.190909: step: 436/459, loss: 0.03334687277674675 2023-01-22 18:21:55.756037: step: 438/459, loss: 0.00047927070409059525 2023-01-22 18:21:56.429602: step: 440/459, loss: 0.006644970737397671 2023-01-22 18:21:57.009758: step: 442/459, loss: 0.032653093338012695 2023-01-22 18:21:57.620088: step: 444/459, loss: 0.05658474192023277 2023-01-22 18:21:58.227386: step: 446/459, loss: 0.0647207647562027 2023-01-22 18:21:58.845739: step: 448/459, loss: 0.02918093092739582 2023-01-22 18:21:59.439659: step: 450/459, loss: 0.018390821292996407 2023-01-22 18:22:00.038384: step: 452/459, loss: 0.031814541667699814 2023-01-22 18:22:00.642592: step: 454/459, loss: 0.0364176407456398 2023-01-22 18:22:01.243392: step: 456/459, loss: 0.00024796530487947166 2023-01-22 18:22:01.839023: step: 458/459, loss: 0.013830813579261303 2023-01-22 18:22:02.529876: step: 460/459, loss: 0.008769681677222252 2023-01-22 18:22:03.104383: step: 462/459, loss: 0.0002626323839649558 2023-01-22 18:22:03.716452: step: 464/459, loss: 1.676279067993164 2023-01-22 18:22:04.354171: step: 466/459, loss: 0.009639124386012554 2023-01-22 18:22:04.941036: step: 468/459, loss: 0.08251742273569107 2023-01-22 18:22:05.543931: step: 470/459, loss: 0.08234070986509323 2023-01-22 18:22:06.191414: step: 472/459, loss: 0.057261958718299866 2023-01-22 18:22:06.909179: step: 474/459, loss: 0.05874444916844368 2023-01-22 18:22:07.512875: step: 476/459, loss: 0.017005303874611855 2023-01-22 18:22:08.175281: step: 478/459, loss: 0.08288182318210602 2023-01-22 18:22:08.798205: step: 480/459, loss: 0.004217989277094603 2023-01-22 18:22:09.385669: step: 482/459, loss: 0.03917783126235008 2023-01-22 18:22:09.982370: step: 484/459, loss: 0.007289201486855745 2023-01-22 18:22:10.626956: step: 486/459, loss: 0.019592780619859695 2023-01-22 18:22:11.223256: step: 488/459, loss: 0.1954043209552765 2023-01-22 18:22:11.832998: step: 490/459, loss: 0.31615257263183594 2023-01-22 18:22:12.476809: step: 492/459, loss: 0.057413723319768906 2023-01-22 18:22:13.021739: step: 494/459, loss: 0.007706269156187773 2023-01-22 18:22:13.681583: step: 496/459, loss: 0.022138439118862152 2023-01-22 18:22:14.291887: step: 498/459, loss: 0.020990753546357155 2023-01-22 18:22:14.907923: step: 500/459, loss: 0.004601405002176762 2023-01-22 18:22:15.519486: step: 502/459, loss: 0.039220165461301804 2023-01-22 18:22:16.208318: step: 504/459, loss: 0.010178502649068832 2023-01-22 18:22:16.852228: step: 506/459, loss: 0.004515639040619135 2023-01-22 18:22:17.465960: step: 508/459, loss: 0.046967413276433945 2023-01-22 18:22:18.084794: step: 510/459, loss: 0.012459524907171726 2023-01-22 18:22:18.690048: step: 512/459, loss: 0.03097841888666153 2023-01-22 18:22:19.337542: step: 514/459, loss: 0.6458399891853333 2023-01-22 18:22:19.954448: step: 516/459, loss: 0.19400793313980103 2023-01-22 18:22:20.582692: step: 518/459, loss: 0.0014084569411352277 2023-01-22 18:22:21.243754: step: 520/459, loss: 0.015648899599909782 2023-01-22 18:22:21.834241: step: 522/459, loss: 0.010340893641114235 2023-01-22 18:22:22.443199: step: 524/459, loss: 0.0016647999873384833 2023-01-22 18:22:23.165694: step: 526/459, loss: 0.0012582190101966262 2023-01-22 18:22:23.782994: step: 528/459, loss: 0.11418856680393219 2023-01-22 18:22:24.314241: step: 530/459, loss: 0.01918272115290165 2023-01-22 18:22:24.908117: step: 532/459, loss: 0.006133413407951593 2023-01-22 18:22:25.536149: step: 534/459, loss: 0.009769692085683346 2023-01-22 18:22:26.119871: step: 536/459, loss: 0.019699009135365486 2023-01-22 18:22:26.783357: step: 538/459, loss: 0.10325130075216293 2023-01-22 18:22:27.335367: step: 540/459, loss: 0.010626547038555145 2023-01-22 18:22:28.034974: step: 542/459, loss: 0.031030908226966858 2023-01-22 18:22:28.675791: step: 544/459, loss: 0.004075679928064346 2023-01-22 18:22:29.236117: step: 546/459, loss: 0.0074465712532401085 2023-01-22 18:22:29.807069: step: 548/459, loss: 0.07713166624307632 2023-01-22 18:22:30.478894: step: 550/459, loss: 0.006720115430653095 2023-01-22 18:22:31.021858: step: 552/459, loss: 0.009244942106306553 2023-01-22 18:22:31.622169: step: 554/459, loss: 0.016742806881666183 2023-01-22 18:22:32.234615: step: 556/459, loss: 0.006100622471421957 2023-01-22 18:22:32.785795: step: 558/459, loss: 0.00913793221116066 2023-01-22 18:22:33.390151: step: 560/459, loss: 0.18305747210979462 2023-01-22 18:22:34.133110: step: 562/459, loss: 0.012089629657566547 2023-01-22 18:22:34.732288: step: 564/459, loss: 0.0005413774633780122 2023-01-22 18:22:35.378671: step: 566/459, loss: 0.003407515585422516 2023-01-22 18:22:36.072108: step: 568/459, loss: 0.03262895345687866 2023-01-22 18:22:36.694373: step: 570/459, loss: 0.011479358188807964 2023-01-22 18:22:37.289565: step: 572/459, loss: 0.050094883888959885 2023-01-22 18:22:37.949209: step: 574/459, loss: 2.100395679473877 2023-01-22 18:22:38.634465: step: 576/459, loss: 0.004603299777954817 2023-01-22 18:22:39.226597: step: 578/459, loss: 0.013009211979806423 2023-01-22 18:22:39.878005: step: 580/459, loss: 0.002395421266555786 2023-01-22 18:22:40.487469: step: 582/459, loss: 0.0071515911258757114 2023-01-22 18:22:41.125898: step: 584/459, loss: 0.01894298754632473 2023-01-22 18:22:41.726888: step: 586/459, loss: 0.011178283952176571 2023-01-22 18:22:42.339971: step: 588/459, loss: 0.007230398710817099 2023-01-22 18:22:42.927506: step: 590/459, loss: 0.001695773913525045 2023-01-22 18:22:43.540413: step: 592/459, loss: 0.02999863773584366 2023-01-22 18:22:44.116679: step: 594/459, loss: 0.009609493426978588 2023-01-22 18:22:44.806382: step: 596/459, loss: 0.04610416665673256 2023-01-22 18:22:45.354152: step: 598/459, loss: 0.46515363454818726 2023-01-22 18:22:45.978934: step: 600/459, loss: 0.006987676490098238 2023-01-22 18:22:46.536855: step: 602/459, loss: 0.057828400284051895 2023-01-22 18:22:47.126390: step: 604/459, loss: 0.018199272453784943 2023-01-22 18:22:47.732232: step: 606/459, loss: 0.0046651773154735565 2023-01-22 18:22:48.430117: step: 608/459, loss: 0.0038176467642188072 2023-01-22 18:22:49.055845: step: 610/459, loss: 0.6292706727981567 2023-01-22 18:22:49.727501: step: 612/459, loss: 0.004504315555095673 2023-01-22 18:22:50.340545: step: 614/459, loss: 0.012239298783242702 2023-01-22 18:22:50.898782: step: 616/459, loss: 0.1288180947303772 2023-01-22 18:22:51.472614: step: 618/459, loss: 0.05825919657945633 2023-01-22 18:22:52.041944: step: 620/459, loss: 0.030891556292772293 2023-01-22 18:22:52.604273: step: 622/459, loss: 0.001072761369869113 2023-01-22 18:22:53.188600: step: 624/459, loss: 0.5785819292068481 2023-01-22 18:22:53.850689: step: 626/459, loss: 0.006858604960143566 2023-01-22 18:22:54.486762: step: 628/459, loss: 0.002848381642252207 2023-01-22 18:22:55.096823: step: 630/459, loss: 0.001477941288612783 2023-01-22 18:22:55.711072: step: 632/459, loss: 0.004480161238461733 2023-01-22 18:22:56.364578: step: 634/459, loss: 0.0027127659413963556 2023-01-22 18:22:56.998054: step: 636/459, loss: 0.5212228298187256 2023-01-22 18:22:57.588685: step: 638/459, loss: 0.018116602674126625 2023-01-22 18:22:58.196956: step: 640/459, loss: 0.021772224456071854 2023-01-22 18:22:58.886145: step: 642/459, loss: 0.009606493636965752 2023-01-22 18:22:59.452369: step: 644/459, loss: 0.0490703359246254 2023-01-22 18:23:00.098169: step: 646/459, loss: 0.002801725408062339 2023-01-22 18:23:00.713281: step: 648/459, loss: 0.0413166880607605 2023-01-22 18:23:01.393942: step: 650/459, loss: 0.009462247602641582 2023-01-22 18:23:02.066677: step: 652/459, loss: 0.018306046724319458 2023-01-22 18:23:02.694277: step: 654/459, loss: 0.32428762316703796 2023-01-22 18:23:03.313088: step: 656/459, loss: 0.012638108804821968 2023-01-22 18:23:03.922873: step: 658/459, loss: 0.0027295835316181183 2023-01-22 18:23:04.529577: step: 660/459, loss: 0.006827780045568943 2023-01-22 18:23:05.229836: step: 662/459, loss: 0.012639472261071205 2023-01-22 18:23:05.858388: step: 664/459, loss: 0.009360230527818203 2023-01-22 18:23:06.484469: step: 666/459, loss: 0.0031834510155022144 2023-01-22 18:23:07.102848: step: 668/459, loss: 0.04414955526590347 2023-01-22 18:23:07.684570: step: 670/459, loss: 1.1526979207992554 2023-01-22 18:23:08.368334: step: 672/459, loss: 0.007271300535649061 2023-01-22 18:23:09.016780: step: 674/459, loss: 0.008002307265996933 2023-01-22 18:23:09.617097: step: 676/459, loss: 0.02667863667011261 2023-01-22 18:23:10.219792: step: 678/459, loss: 0.4450950026512146 2023-01-22 18:23:10.860049: step: 680/459, loss: 0.004276152700185776 2023-01-22 18:23:11.477402: step: 682/459, loss: 0.004353318829089403 2023-01-22 18:23:12.143875: step: 684/459, loss: 0.034465644508600235 2023-01-22 18:23:12.758452: step: 686/459, loss: 0.006035847123712301 2023-01-22 18:23:13.371832: step: 688/459, loss: 0.010970605537295341 2023-01-22 18:23:13.946548: step: 690/459, loss: 0.00567459175363183 2023-01-22 18:23:14.634342: step: 692/459, loss: 0.09896210581064224 2023-01-22 18:23:15.249994: step: 694/459, loss: 0.0015048956265673041 2023-01-22 18:23:15.870708: step: 696/459, loss: 0.022032685577869415 2023-01-22 18:23:16.432665: step: 698/459, loss: 0.007737229578197002 2023-01-22 18:23:17.045094: step: 700/459, loss: 0.024344513192772865 2023-01-22 18:23:17.653443: step: 702/459, loss: 0.00598490284755826 2023-01-22 18:23:18.199840: step: 704/459, loss: 0.03278142958879471 2023-01-22 18:23:18.758645: step: 706/459, loss: 0.48431238532066345 2023-01-22 18:23:19.341967: step: 708/459, loss: 0.37862756848335266 2023-01-22 18:23:19.952743: step: 710/459, loss: 0.022966250777244568 2023-01-22 18:23:20.547332: step: 712/459, loss: 0.0018787283916026354 2023-01-22 18:23:21.115540: step: 714/459, loss: 0.01255375612527132 2023-01-22 18:23:21.816113: step: 716/459, loss: 0.035857684910297394 2023-01-22 18:23:22.479995: step: 718/459, loss: 0.00203140452504158 2023-01-22 18:23:23.119882: step: 720/459, loss: 0.019405679777264595 2023-01-22 18:23:23.724073: step: 722/459, loss: 0.03305333852767944 2023-01-22 18:23:24.336358: step: 724/459, loss: 0.010706583969295025 2023-01-22 18:23:24.988869: step: 726/459, loss: 0.015040465630590916 2023-01-22 18:23:25.552900: step: 728/459, loss: 0.0011246073991060257 2023-01-22 18:23:26.141371: step: 730/459, loss: 0.015639180317521095 2023-01-22 18:23:26.767389: step: 732/459, loss: 0.014484918676316738 2023-01-22 18:23:27.350007: step: 734/459, loss: 0.011140255257487297 2023-01-22 18:23:27.953828: step: 736/459, loss: 0.026344994083046913 2023-01-22 18:23:28.514235: step: 738/459, loss: 0.019181936979293823 2023-01-22 18:23:29.114830: step: 740/459, loss: 0.030578849837183952 2023-01-22 18:23:29.766513: step: 742/459, loss: 0.026879549026489258 2023-01-22 18:23:30.363777: step: 744/459, loss: 0.5058932304382324 2023-01-22 18:23:30.977667: step: 746/459, loss: 0.006037920247763395 2023-01-22 18:23:31.539436: step: 748/459, loss: 0.017315857112407684 2023-01-22 18:23:32.129034: step: 750/459, loss: 0.02007729560136795 2023-01-22 18:23:32.686468: step: 752/459, loss: 0.007296297699213028 2023-01-22 18:23:33.279777: step: 754/459, loss: 0.0011006419081240892 2023-01-22 18:23:33.873655: step: 756/459, loss: 0.015025127679109573 2023-01-22 18:23:34.449357: step: 758/459, loss: 0.019772784784436226 2023-01-22 18:23:35.052656: step: 760/459, loss: 0.0050301761366426945 2023-01-22 18:23:35.663594: step: 762/459, loss: 0.001479300670325756 2023-01-22 18:23:36.250485: step: 764/459, loss: 0.00023612476070411503 2023-01-22 18:23:36.864007: step: 766/459, loss: 0.003926795907318592 2023-01-22 18:23:37.467003: step: 768/459, loss: 0.01512060221284628 2023-01-22 18:23:38.145776: step: 770/459, loss: 0.09372639656066895 2023-01-22 18:23:38.756031: step: 772/459, loss: 0.019441548734903336 2023-01-22 18:23:39.420503: step: 774/459, loss: 0.02145972289144993 2023-01-22 18:23:40.018068: step: 776/459, loss: 0.017772706225514412 2023-01-22 18:23:40.627723: step: 778/459, loss: 0.016657650470733643 2023-01-22 18:23:41.174858: step: 780/459, loss: 0.5956637263298035 2023-01-22 18:23:41.816112: step: 782/459, loss: 0.00533566577360034 2023-01-22 18:23:42.419875: step: 784/459, loss: 0.01898176223039627 2023-01-22 18:23:43.099516: step: 786/459, loss: 0.016454512253403664 2023-01-22 18:23:43.764864: step: 788/459, loss: 0.01293948758393526 2023-01-22 18:23:44.356129: step: 790/459, loss: 0.004334777127951384 2023-01-22 18:23:45.009824: step: 792/459, loss: 0.007276391144841909 2023-01-22 18:23:45.691221: step: 794/459, loss: 0.004397972021251917 2023-01-22 18:23:46.332442: step: 796/459, loss: 0.10705967247486115 2023-01-22 18:23:46.986776: step: 798/459, loss: 0.019685745239257812 2023-01-22 18:23:47.698708: step: 800/459, loss: 0.005076737608760595 2023-01-22 18:23:48.316030: step: 802/459, loss: 0.006910678464919329 2023-01-22 18:23:48.897214: step: 804/459, loss: 0.007106830831617117 2023-01-22 18:23:49.553847: step: 806/459, loss: 0.060383886098861694 2023-01-22 18:23:50.157012: step: 808/459, loss: 0.045273978263139725 2023-01-22 18:23:50.747123: step: 810/459, loss: 0.03397113457322121 2023-01-22 18:23:51.337022: step: 812/459, loss: 0.055354248732328415 2023-01-22 18:23:51.942207: step: 814/459, loss: 0.0474126972258091 2023-01-22 18:23:52.580158: step: 816/459, loss: 0.06988150626420975 2023-01-22 18:23:53.172053: step: 818/459, loss: 0.2523580491542816 2023-01-22 18:23:53.779758: step: 820/459, loss: 0.008222239091992378 2023-01-22 18:23:54.415498: step: 822/459, loss: 0.15816067159175873 2023-01-22 18:23:55.005015: step: 824/459, loss: 0.0009249927243217826 2023-01-22 18:23:55.646612: step: 826/459, loss: 0.15059615671634674 2023-01-22 18:23:56.236435: step: 828/459, loss: 0.02089318446815014 2023-01-22 18:23:56.780409: step: 830/459, loss: 0.04154665768146515 2023-01-22 18:23:57.383277: step: 832/459, loss: 0.0524524562060833 2023-01-22 18:23:58.036301: step: 834/459, loss: 0.005730591248720884 2023-01-22 18:23:58.609637: step: 836/459, loss: 0.004746021702885628 2023-01-22 18:23:59.249730: step: 838/459, loss: 0.0008435940835624933 2023-01-22 18:23:59.886899: step: 840/459, loss: 0.0036469365004450083 2023-01-22 18:24:00.529897: step: 842/459, loss: 0.009027802385389805 2023-01-22 18:24:01.136960: step: 844/459, loss: 0.0033078461419790983 2023-01-22 18:24:01.782166: step: 846/459, loss: 0.0005922058480791748 2023-01-22 18:24:02.454747: step: 848/459, loss: 0.04389640688896179 2023-01-22 18:24:03.012411: step: 850/459, loss: 0.0028484349604696035 2023-01-22 18:24:03.605864: step: 852/459, loss: 0.001731951953843236 2023-01-22 18:24:04.377435: step: 854/459, loss: 0.01520884595811367 2023-01-22 18:24:04.934524: step: 856/459, loss: 0.0001820030011003837 2023-01-22 18:24:05.547978: step: 858/459, loss: 0.0002858951629605144 2023-01-22 18:24:06.165154: step: 860/459, loss: 0.011854927986860275 2023-01-22 18:24:06.836109: step: 862/459, loss: 0.03994705528020859 2023-01-22 18:24:07.444054: step: 864/459, loss: 0.001536424970254302 2023-01-22 18:24:08.038587: step: 866/459, loss: 0.0005489622708410025 2023-01-22 18:24:08.610865: step: 868/459, loss: 0.007570748683065176 2023-01-22 18:24:09.181798: step: 870/459, loss: 0.037351418286561966 2023-01-22 18:24:09.839029: step: 872/459, loss: 0.04328145831823349 2023-01-22 18:24:10.457538: step: 874/459, loss: 0.015373364090919495 2023-01-22 18:24:11.084067: step: 876/459, loss: 0.0012390126939862967 2023-01-22 18:24:11.844149: step: 878/459, loss: 0.003993292339146137 2023-01-22 18:24:12.439092: step: 880/459, loss: 0.06096893921494484 2023-01-22 18:24:13.110208: step: 882/459, loss: 0.024202069267630577 2023-01-22 18:24:13.755781: step: 884/459, loss: 0.018048735335469246 2023-01-22 18:24:14.342577: step: 886/459, loss: 0.0011242985492572188 2023-01-22 18:24:14.999426: step: 888/459, loss: 0.00012965765199624002 2023-01-22 18:24:15.588933: step: 890/459, loss: 0.012838969007134438 2023-01-22 18:24:16.178736: step: 892/459, loss: 0.008305399678647518 2023-01-22 18:24:16.854415: step: 894/459, loss: 0.015789687633514404 2023-01-22 18:24:17.509565: step: 896/459, loss: 0.0008995571406558156 2023-01-22 18:24:18.111569: step: 898/459, loss: 0.005653727799654007 2023-01-22 18:24:18.730518: step: 900/459, loss: 0.007662993390113115 2023-01-22 18:24:19.358917: step: 902/459, loss: 0.023132046684622765 2023-01-22 18:24:20.003053: step: 904/459, loss: 0.0011519925901666284 2023-01-22 18:24:20.627649: step: 906/459, loss: 0.00988523755222559 2023-01-22 18:24:21.196209: step: 908/459, loss: 0.0038799296598881483 2023-01-22 18:24:21.934099: step: 910/459, loss: 0.03192579746246338 2023-01-22 18:24:22.522628: step: 912/459, loss: 0.03435351327061653 2023-01-22 18:24:23.122831: step: 914/459, loss: 0.0013062494108453393 2023-01-22 18:24:23.755336: step: 916/459, loss: 0.0005122146103531122 2023-01-22 18:24:24.425928: step: 918/459, loss: 0.01680905558168888 2023-01-22 18:24:24.870108: step: 920/459, loss: 0.04630496725440025 ================================================== Loss: 0.060 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2919751213592233, 'r': 0.3423920777988615, 'f1': 0.3151801310043668}, 'combined': 0.23223799126637554, 'epoch': 31} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3119993948556595, 'r': 0.31710483949875207, 'f1': 0.31453140076521846}, 'combined': 0.2013000964897398, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2877549864675776, 'r': 0.35655409139151456, 'f1': 0.3184813663785223}, 'combined': 0.23467048048943748, 'epoch': 31} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3265609304836545, 'r': 0.3271546776299884, 'f1': 0.3268575344168822}, 'combined': 0.20918882202680458, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3045383787926617, 'r': 0.34498939684861296, 'f1': 0.3235042920626673}, 'combined': 0.23837158362512326, 'epoch': 31} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3216212283609085, 'r': 0.330709886591709, 'f1': 0.3261022432279594}, 'combined': 0.23380915552193315, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.17708333333333331, 'r': 0.32380952380952377, 'f1': 0.22895622895622894}, 'combined': 0.15263748597081928, 'epoch': 31} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.19186046511627908, 'r': 0.358695652173913, 'f1': 0.25000000000000006}, 'combined': 0.12500000000000003, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2777777777777778, 'r': 0.1724137931034483, 'f1': 0.21276595744680854}, 'combined': 0.14184397163120568, 'epoch': 31} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:26:59.975645: step: 2/459, loss: 0.008148059248924255 2023-01-22 18:27:00.553226: step: 4/459, loss: 0.0066770147532224655 2023-01-22 18:27:01.146831: step: 6/459, loss: 0.0006636575562879443 2023-01-22 18:27:01.755147: step: 8/459, loss: 0.0021431632339954376 2023-01-22 18:27:02.364993: step: 10/459, loss: 0.01377494540065527 2023-01-22 18:27:02.978867: step: 12/459, loss: 0.010302451439201832 2023-01-22 18:27:03.584227: step: 14/459, loss: 0.0019617013167589903 2023-01-22 18:27:04.231451: step: 16/459, loss: 0.6071649789810181 2023-01-22 18:27:04.835422: step: 18/459, loss: 0.014659995213150978 2023-01-22 18:27:05.457639: step: 20/459, loss: 0.015133173204958439 2023-01-22 18:27:06.066731: step: 22/459, loss: 0.009799810126423836 2023-01-22 18:27:06.716475: step: 24/459, loss: 0.004026359878480434 2023-01-22 18:27:07.340598: step: 26/459, loss: 0.003431786550208926 2023-01-22 18:27:07.948054: step: 28/459, loss: 0.013759316876530647 2023-01-22 18:27:08.497418: step: 30/459, loss: 0.0070787896402180195 2023-01-22 18:27:09.141714: step: 32/459, loss: 0.003529209876433015 2023-01-22 18:27:09.711645: step: 34/459, loss: 0.006811780389398336 2023-01-22 18:27:10.262384: step: 36/459, loss: 0.003767602611333132 2023-01-22 18:27:10.880720: step: 38/459, loss: 0.03327057883143425 2023-01-22 18:27:11.607588: step: 40/459, loss: 0.08669744431972504 2023-01-22 18:27:12.201228: step: 42/459, loss: 0.04450737684965134 2023-01-22 18:27:12.831144: step: 44/459, loss: 0.005988616496324539 2023-01-22 18:27:13.424629: step: 46/459, loss: 0.015607425011694431 2023-01-22 18:27:13.998869: step: 48/459, loss: 0.05381040647625923 2023-01-22 18:27:14.525142: step: 50/459, loss: 0.02090137079358101 2023-01-22 18:27:15.178185: step: 52/459, loss: 0.05800952389836311 2023-01-22 18:27:15.719565: step: 54/459, loss: 0.4870007336139679 2023-01-22 18:27:16.348327: step: 56/459, loss: 0.12854355573654175 2023-01-22 18:27:16.911751: step: 58/459, loss: 0.0009705807315185666 2023-01-22 18:27:17.491816: step: 60/459, loss: 0.001911835395731032 2023-01-22 18:27:18.077094: step: 62/459, loss: 0.09223572164773941 2023-01-22 18:27:18.704182: step: 64/459, loss: 0.00839613564312458 2023-01-22 18:27:19.345689: step: 66/459, loss: 0.10300400853157043 2023-01-22 18:27:19.905942: step: 68/459, loss: 0.0006145311053842306 2023-01-22 18:27:20.423945: step: 70/459, loss: 0.0025478664319962263 2023-01-22 18:27:21.051801: step: 72/459, loss: 0.014118949882686138 2023-01-22 18:27:21.629806: step: 74/459, loss: 0.01401839591562748 2023-01-22 18:27:22.224487: step: 76/459, loss: 0.0004386363725643605 2023-01-22 18:27:22.820080: step: 78/459, loss: 0.008048043586313725 2023-01-22 18:27:23.368433: step: 80/459, loss: 0.01939927041530609 2023-01-22 18:27:23.952537: step: 82/459, loss: 0.011331909336149693 2023-01-22 18:27:24.614451: step: 84/459, loss: 0.11296077072620392 2023-01-22 18:27:25.235232: step: 86/459, loss: 0.052697379142045975 2023-01-22 18:27:25.885046: step: 88/459, loss: 0.024707484990358353 2023-01-22 18:27:26.429374: step: 90/459, loss: 0.00262812664732337 2023-01-22 18:27:27.034575: step: 92/459, loss: 0.0023854945320636034 2023-01-22 18:27:27.629935: step: 94/459, loss: 7.16392751201056e-05 2023-01-22 18:27:28.231491: step: 96/459, loss: 0.00530041242018342 2023-01-22 18:27:28.899471: step: 98/459, loss: 0.020766008645296097 2023-01-22 18:27:29.493588: step: 100/459, loss: 0.021270625293254852 2023-01-22 18:27:30.055370: step: 102/459, loss: 0.030496804043650627 2023-01-22 18:27:30.665802: step: 104/459, loss: 0.004880545660853386 2023-01-22 18:27:31.262129: step: 106/459, loss: 0.021334674209356308 2023-01-22 18:27:31.846622: step: 108/459, loss: 0.0025974023155868053 2023-01-22 18:27:32.406803: step: 110/459, loss: 0.00022382025781553239 2023-01-22 18:27:32.949509: step: 112/459, loss: 0.0012097562430426478 2023-01-22 18:27:33.543782: step: 114/459, loss: 0.0013740326976403594 2023-01-22 18:27:34.163685: step: 116/459, loss: 0.0019104174571111798 2023-01-22 18:27:34.767775: step: 118/459, loss: 0.5206948518753052 2023-01-22 18:27:35.367121: step: 120/459, loss: 0.0016549776773899794 2023-01-22 18:27:35.956707: step: 122/459, loss: 0.059533778578042984 2023-01-22 18:27:36.581385: step: 124/459, loss: 0.021869374439120293 2023-01-22 18:27:37.199238: step: 126/459, loss: 0.04389239102602005 2023-01-22 18:27:37.772870: step: 128/459, loss: 0.0368104949593544 2023-01-22 18:27:38.362189: step: 130/459, loss: 0.026864411309361458 2023-01-22 18:27:38.915345: step: 132/459, loss: 0.0013397913426160812 2023-01-22 18:27:39.633673: step: 134/459, loss: 0.01273653656244278 2023-01-22 18:27:40.256317: step: 136/459, loss: 3.344167947769165 2023-01-22 18:27:40.948803: step: 138/459, loss: 0.0022867941297590733 2023-01-22 18:27:41.554173: step: 140/459, loss: 0.05522974207997322 2023-01-22 18:27:42.124813: step: 142/459, loss: 0.0009372398490086198 2023-01-22 18:27:42.644792: step: 144/459, loss: 0.014480635523796082 2023-01-22 18:27:43.268339: step: 146/459, loss: 0.00026856648037210107 2023-01-22 18:27:43.876705: step: 148/459, loss: 0.0004680812417063862 2023-01-22 18:27:44.446258: step: 150/459, loss: 0.0029577547684311867 2023-01-22 18:27:45.158692: step: 152/459, loss: 0.00040439082658849657 2023-01-22 18:27:45.787708: step: 154/459, loss: 0.027599919587373734 2023-01-22 18:27:46.454881: step: 156/459, loss: 0.01828007958829403 2023-01-22 18:27:47.027961: step: 158/459, loss: 0.18991898000240326 2023-01-22 18:27:47.659824: step: 160/459, loss: 0.0076036215759813786 2023-01-22 18:27:48.257424: step: 162/459, loss: 0.0017581962747499347 2023-01-22 18:27:48.818105: step: 164/459, loss: 0.014969698153436184 2023-01-22 18:27:49.481778: step: 166/459, loss: 0.01150697935372591 2023-01-22 18:27:50.065823: step: 168/459, loss: 0.00035959211527369916 2023-01-22 18:27:50.626293: step: 170/459, loss: 0.011942505836486816 2023-01-22 18:27:51.197606: step: 172/459, loss: 0.00456284312531352 2023-01-22 18:27:51.795416: step: 174/459, loss: 0.0008964460575953126 2023-01-22 18:27:52.426927: step: 176/459, loss: 0.03743039071559906 2023-01-22 18:27:53.061909: step: 178/459, loss: 0.010205366648733616 2023-01-22 18:27:53.638314: step: 180/459, loss: 0.01121522206813097 2023-01-22 18:27:54.232751: step: 182/459, loss: 0.0011204323964193463 2023-01-22 18:27:54.806507: step: 184/459, loss: 0.0014123143628239632 2023-01-22 18:27:55.422272: step: 186/459, loss: 0.03638826310634613 2023-01-22 18:27:56.019255: step: 188/459, loss: 0.0007661786512471735 2023-01-22 18:27:56.659894: step: 190/459, loss: 0.013175874017179012 2023-01-22 18:27:57.286811: step: 192/459, loss: 0.00427802000194788 2023-01-22 18:27:57.896479: step: 194/459, loss: 0.0011335160816088319 2023-01-22 18:27:58.537980: step: 196/459, loss: 0.00857358518987894 2023-01-22 18:27:59.176314: step: 198/459, loss: 0.03256786987185478 2023-01-22 18:27:59.771700: step: 200/459, loss: 0.0017513078637421131 2023-01-22 18:28:00.400551: step: 202/459, loss: 1.105126142501831 2023-01-22 18:28:01.026215: step: 204/459, loss: 7.078838825691491e-05 2023-01-22 18:28:01.638623: step: 206/459, loss: 0.03565685823559761 2023-01-22 18:28:02.207917: step: 208/459, loss: 0.015491300262510777 2023-01-22 18:28:02.797089: step: 210/459, loss: 0.33849915862083435 2023-01-22 18:28:03.384976: step: 212/459, loss: 0.007612982764840126 2023-01-22 18:28:04.050162: step: 214/459, loss: 0.014185012318193913 2023-01-22 18:28:04.691194: step: 216/459, loss: 0.030007531866431236 2023-01-22 18:28:05.272774: step: 218/459, loss: 0.007876249961555004 2023-01-22 18:28:05.892742: step: 220/459, loss: 0.034596916288137436 2023-01-22 18:28:06.459925: step: 222/459, loss: 0.005469528026878834 2023-01-22 18:28:07.171596: step: 224/459, loss: 0.011779135093092918 2023-01-22 18:28:07.719259: step: 226/459, loss: 0.005964033305644989 2023-01-22 18:28:08.283898: step: 228/459, loss: 0.005537157412618399 2023-01-22 18:28:08.959729: step: 230/459, loss: 0.009612384252250195 2023-01-22 18:28:09.669474: step: 232/459, loss: 0.18359948694705963 2023-01-22 18:28:10.315264: step: 234/459, loss: 0.004352929536253214 2023-01-22 18:28:10.938604: step: 236/459, loss: 0.008678993210196495 2023-01-22 18:28:11.580615: step: 238/459, loss: 0.03239323943853378 2023-01-22 18:28:12.177123: step: 240/459, loss: 0.0515851154923439 2023-01-22 18:28:12.786981: step: 242/459, loss: 0.013794810511171818 2023-01-22 18:28:13.370738: step: 244/459, loss: 0.010587592609226704 2023-01-22 18:28:13.941332: step: 246/459, loss: 0.017596518620848656 2023-01-22 18:28:14.570401: step: 248/459, loss: 0.026068568229675293 2023-01-22 18:28:15.117544: step: 250/459, loss: 0.005088179837912321 2023-01-22 18:28:15.780437: step: 252/459, loss: 0.01115916483104229 2023-01-22 18:28:16.382740: step: 254/459, loss: 0.11726588010787964 2023-01-22 18:28:17.005983: step: 256/459, loss: 0.00904833059757948 2023-01-22 18:28:17.580372: step: 258/459, loss: 0.03722623735666275 2023-01-22 18:28:18.197846: step: 260/459, loss: 0.021836739033460617 2023-01-22 18:28:18.804302: step: 262/459, loss: 0.005030371248722076 2023-01-22 18:28:19.451740: step: 264/459, loss: 0.05350791662931442 2023-01-22 18:28:20.039446: step: 266/459, loss: 0.01112674456089735 2023-01-22 18:28:20.627405: step: 268/459, loss: 0.004339438863098621 2023-01-22 18:28:21.202719: step: 270/459, loss: 0.03550296276807785 2023-01-22 18:28:21.840947: step: 272/459, loss: 0.012806195765733719 2023-01-22 18:28:22.432586: step: 274/459, loss: 0.0537659153342247 2023-01-22 18:28:23.060199: step: 276/459, loss: 0.04164683446288109 2023-01-22 18:28:23.604790: step: 278/459, loss: 0.0025422631297260523 2023-01-22 18:28:24.210499: step: 280/459, loss: 0.2273435741662979 2023-01-22 18:28:24.824226: step: 282/459, loss: 0.0014436671044677496 2023-01-22 18:28:25.390013: step: 284/459, loss: 3.81689715385437 2023-01-22 18:28:26.025671: step: 286/459, loss: 0.0005104521405883133 2023-01-22 18:28:26.564968: step: 288/459, loss: 0.058855824172496796 2023-01-22 18:28:27.196573: step: 290/459, loss: 0.02026013657450676 2023-01-22 18:28:27.794309: step: 292/459, loss: 0.022215792909264565 2023-01-22 18:28:28.414606: step: 294/459, loss: 0.030973009765148163 2023-01-22 18:28:28.977549: step: 296/459, loss: 0.002688673324882984 2023-01-22 18:28:29.574457: step: 298/459, loss: 0.11308299750089645 2023-01-22 18:28:30.141672: step: 300/459, loss: 0.003618089482188225 2023-01-22 18:28:30.759479: step: 302/459, loss: 0.007237148471176624 2023-01-22 18:28:31.362725: step: 304/459, loss: 0.0024757985956966877 2023-01-22 18:28:31.961579: step: 306/459, loss: 0.0027172488626092672 2023-01-22 18:28:32.516063: step: 308/459, loss: 0.012150338850915432 2023-01-22 18:28:33.179744: step: 310/459, loss: 0.06261182576417923 2023-01-22 18:28:33.767027: step: 312/459, loss: 0.002610422670841217 2023-01-22 18:28:34.334149: step: 314/459, loss: 0.19037489593029022 2023-01-22 18:28:34.954044: step: 316/459, loss: 0.004078502766788006 2023-01-22 18:28:35.551494: step: 318/459, loss: 0.11250381916761398 2023-01-22 18:28:36.178502: step: 320/459, loss: 0.031532395631074905 2023-01-22 18:28:36.806136: step: 322/459, loss: 0.20228399336338043 2023-01-22 18:28:37.363043: step: 324/459, loss: 0.009161693044006824 2023-01-22 18:28:37.976020: step: 326/459, loss: 0.022433562204241753 2023-01-22 18:28:38.606848: step: 328/459, loss: 0.025460276752710342 2023-01-22 18:28:39.161272: step: 330/459, loss: 0.00040108515531755984 2023-01-22 18:28:39.739643: step: 332/459, loss: 0.09122519195079803 2023-01-22 18:28:40.428221: step: 334/459, loss: 0.002960823941975832 2023-01-22 18:28:41.017132: step: 336/459, loss: 0.002689975779503584 2023-01-22 18:28:41.613487: step: 338/459, loss: 0.006981514859944582 2023-01-22 18:28:42.181729: step: 340/459, loss: 0.0023056156933307648 2023-01-22 18:28:42.790511: step: 342/459, loss: 0.029072904959321022 2023-01-22 18:28:43.491181: step: 344/459, loss: 0.027912134304642677 2023-01-22 18:28:44.062196: step: 346/459, loss: 0.0035996572114527225 2023-01-22 18:28:44.668031: step: 348/459, loss: 0.023217646405100822 2023-01-22 18:28:45.191540: step: 350/459, loss: 0.012559445574879646 2023-01-22 18:28:45.824905: step: 352/459, loss: 0.02636490762233734 2023-01-22 18:28:46.443005: step: 354/459, loss: 0.01424404513090849 2023-01-22 18:28:47.022485: step: 356/459, loss: 0.005482458043843508 2023-01-22 18:28:47.664996: step: 358/459, loss: 0.00039722659857943654 2023-01-22 18:28:48.349440: step: 360/459, loss: 0.003268652129918337 2023-01-22 18:28:48.934382: step: 362/459, loss: 0.004675100091844797 2023-01-22 18:28:49.570155: step: 364/459, loss: 0.0024130861274898052 2023-01-22 18:28:50.207498: step: 366/459, loss: 0.009143228642642498 2023-01-22 18:28:50.786190: step: 368/459, loss: 0.0064204684458673 2023-01-22 18:28:51.395452: step: 370/459, loss: 0.020430635660886765 2023-01-22 18:28:52.029674: step: 372/459, loss: 0.008099931292235851 2023-01-22 18:28:52.605617: step: 374/459, loss: 0.2858628034591675 2023-01-22 18:28:53.246876: step: 376/459, loss: 0.09692759811878204 2023-01-22 18:28:53.917264: step: 378/459, loss: 0.0035673747770488262 2023-01-22 18:28:54.532355: step: 380/459, loss: 0.004327279515564442 2023-01-22 18:28:55.047463: step: 382/459, loss: 0.0001279384596273303 2023-01-22 18:28:55.633765: step: 384/459, loss: 0.07124899327754974 2023-01-22 18:28:56.209278: step: 386/459, loss: 0.0017125803278759122 2023-01-22 18:28:56.769908: step: 388/459, loss: 0.017816593870520592 2023-01-22 18:28:57.344424: step: 390/459, loss: 0.11428958177566528 2023-01-22 18:28:57.908173: step: 392/459, loss: 0.17031976580619812 2023-01-22 18:28:58.504673: step: 394/459, loss: 0.0035712486132979393 2023-01-22 18:28:59.100114: step: 396/459, loss: 0.005258253775537014 2023-01-22 18:28:59.762014: step: 398/459, loss: 0.06745173037052155 2023-01-22 18:29:00.406362: step: 400/459, loss: 0.0015027946792542934 2023-01-22 18:29:00.982399: step: 402/459, loss: 0.016886748373508453 2023-01-22 18:29:01.543174: step: 404/459, loss: 0.05643637850880623 2023-01-22 18:29:02.159667: step: 406/459, loss: 0.008320698514580727 2023-01-22 18:29:02.770317: step: 408/459, loss: 0.007371270097792149 2023-01-22 18:29:03.382371: step: 410/459, loss: 0.005138150881975889 2023-01-22 18:29:04.009332: step: 412/459, loss: 0.3384832441806793 2023-01-22 18:29:04.547479: step: 414/459, loss: 0.0018273405730724335 2023-01-22 18:29:05.147915: step: 416/459, loss: 0.08488927781581879 2023-01-22 18:29:05.717680: step: 418/459, loss: 0.007447121199220419 2023-01-22 18:29:06.305613: step: 420/459, loss: 0.11158325523138046 2023-01-22 18:29:06.926979: step: 422/459, loss: 0.010250544175505638 2023-01-22 18:29:07.562352: step: 424/459, loss: 0.9621838331222534 2023-01-22 18:29:08.246888: step: 426/459, loss: 0.008301768451929092 2023-01-22 18:29:08.846108: step: 428/459, loss: 0.007777152117341757 2023-01-22 18:29:09.428490: step: 430/459, loss: 0.00987160298973322 2023-01-22 18:29:10.043554: step: 432/459, loss: 0.0021661100909113884 2023-01-22 18:29:10.717295: step: 434/459, loss: 0.0710400938987732 2023-01-22 18:29:11.335714: step: 436/459, loss: 0.14406639337539673 2023-01-22 18:29:11.942457: step: 438/459, loss: 0.0036502243019640446 2023-01-22 18:29:12.586948: step: 440/459, loss: 0.14975155889987946 2023-01-22 18:29:13.197150: step: 442/459, loss: 0.09057191759347916 2023-01-22 18:29:13.828097: step: 444/459, loss: 0.015324295498430729 2023-01-22 18:29:14.408609: step: 446/459, loss: 0.0014771625865250826 2023-01-22 18:29:15.005478: step: 448/459, loss: 0.01960756815969944 2023-01-22 18:29:15.585431: step: 450/459, loss: 0.013189616613090038 2023-01-22 18:29:16.165218: step: 452/459, loss: 0.020623210817575455 2023-01-22 18:29:16.794243: step: 454/459, loss: 0.00010311633377568796 2023-01-22 18:29:17.392642: step: 456/459, loss: 0.020538819953799248 2023-01-22 18:29:18.064337: step: 458/459, loss: 0.01055940706282854 2023-01-22 18:29:18.705604: step: 460/459, loss: 0.0015641917707398534 2023-01-22 18:29:19.456509: step: 462/459, loss: 0.013009244576096535 2023-01-22 18:29:20.054679: step: 464/459, loss: 0.002040938241407275 2023-01-22 18:29:20.623652: step: 466/459, loss: 0.0011213405523449183 2023-01-22 18:29:21.199959: step: 468/459, loss: 0.0009375425870530307 2023-01-22 18:29:21.792066: step: 470/459, loss: 0.004206739831715822 2023-01-22 18:29:22.410433: step: 472/459, loss: 0.13426345586776733 2023-01-22 18:29:23.117120: step: 474/459, loss: 0.003647989360615611 2023-01-22 18:29:23.768613: step: 476/459, loss: 0.04452262818813324 2023-01-22 18:29:24.352015: step: 478/459, loss: 0.0422191247344017 2023-01-22 18:29:24.959869: step: 480/459, loss: 0.004184972494840622 2023-01-22 18:29:25.524226: step: 482/459, loss: 0.011317534372210503 2023-01-22 18:29:26.165417: step: 484/459, loss: 0.08779405057430267 2023-01-22 18:29:26.731586: step: 486/459, loss: 0.0004716973635368049 2023-01-22 18:29:27.321232: step: 488/459, loss: 0.0004271832585800439 2023-01-22 18:29:27.958462: step: 490/459, loss: 0.028455719351768494 2023-01-22 18:29:28.555295: step: 492/459, loss: 0.22753117978572845 2023-01-22 18:29:29.148727: step: 494/459, loss: 0.0021309705916792154 2023-01-22 18:29:29.771142: step: 496/459, loss: 0.06532697379589081 2023-01-22 18:29:30.405402: step: 498/459, loss: 0.001327389501966536 2023-01-22 18:29:31.046130: step: 500/459, loss: 0.016743620857596397 2023-01-22 18:29:31.663887: step: 502/459, loss: 0.052909716963768005 2023-01-22 18:29:32.285251: step: 504/459, loss: 0.00924602709710598 2023-01-22 18:29:32.825581: step: 506/459, loss: 0.046931903809309006 2023-01-22 18:29:33.417802: step: 508/459, loss: 0.002669480862095952 2023-01-22 18:29:34.069165: step: 510/459, loss: 0.027114013209939003 2023-01-22 18:29:34.668926: step: 512/459, loss: 0.044073548167943954 2023-01-22 18:29:35.193595: step: 514/459, loss: 0.007550865411758423 2023-01-22 18:29:35.808807: step: 516/459, loss: 0.007436466868966818 2023-01-22 18:29:36.419019: step: 518/459, loss: 0.021341029554605484 2023-01-22 18:29:37.111541: step: 520/459, loss: 0.007934284396469593 2023-01-22 18:29:37.753443: step: 522/459, loss: 0.007329767104238272 2023-01-22 18:29:38.288440: step: 524/459, loss: 0.007693728897720575 2023-01-22 18:29:38.853259: step: 526/459, loss: 0.182826966047287 2023-01-22 18:29:39.430477: step: 528/459, loss: 0.021265272051095963 2023-01-22 18:29:40.066191: step: 530/459, loss: 0.024679189547896385 2023-01-22 18:29:40.646432: step: 532/459, loss: 0.8966848850250244 2023-01-22 18:29:41.295532: step: 534/459, loss: 0.02004648558795452 2023-01-22 18:29:41.896756: step: 536/459, loss: 0.02901262603700161 2023-01-22 18:29:42.501511: step: 538/459, loss: 2.9881959562771954e-05 2023-01-22 18:29:43.117518: step: 540/459, loss: 0.8521403074264526 2023-01-22 18:29:43.775727: step: 542/459, loss: 0.014357915148139 2023-01-22 18:29:44.337340: step: 544/459, loss: 0.01213186513632536 2023-01-22 18:29:44.901347: step: 546/459, loss: 1.2428461559466086e-05 2023-01-22 18:29:45.531326: step: 548/459, loss: 0.034278206527233124 2023-01-22 18:29:46.101274: step: 550/459, loss: 0.0033448473550379276 2023-01-22 18:29:46.608241: step: 552/459, loss: 0.008372804149985313 2023-01-22 18:29:47.172368: step: 554/459, loss: 0.0030587781220674515 2023-01-22 18:29:47.741804: step: 556/459, loss: 0.011416607536375523 2023-01-22 18:29:48.332215: step: 558/459, loss: 0.10498198121786118 2023-01-22 18:29:48.977483: step: 560/459, loss: 0.005340374540537596 2023-01-22 18:29:49.609388: step: 562/459, loss: 0.057365309447050095 2023-01-22 18:29:50.194289: step: 564/459, loss: 0.012788652442395687 2023-01-22 18:29:50.744796: step: 566/459, loss: 0.004599615931510925 2023-01-22 18:29:51.334378: step: 568/459, loss: 0.10837876796722412 2023-01-22 18:29:51.946676: step: 570/459, loss: 0.0018521120073273778 2023-01-22 18:29:52.524255: step: 572/459, loss: 0.014907168224453926 2023-01-22 18:29:53.169626: step: 574/459, loss: 0.011923305690288544 2023-01-22 18:29:53.756171: step: 576/459, loss: 0.0036433753557503223 2023-01-22 18:29:54.382045: step: 578/459, loss: 0.06492745131254196 2023-01-22 18:29:54.976441: step: 580/459, loss: 0.0470709502696991 2023-01-22 18:29:55.648014: step: 582/459, loss: 0.006006093695759773 2023-01-22 18:29:56.280926: step: 584/459, loss: 0.014616794884204865 2023-01-22 18:29:56.887591: step: 586/459, loss: 0.045272648334503174 2023-01-22 18:29:57.519889: step: 588/459, loss: 0.024319753050804138 2023-01-22 18:29:58.077800: step: 590/459, loss: 0.09540766477584839 2023-01-22 18:29:58.717498: step: 592/459, loss: 0.02192680537700653 2023-01-22 18:29:59.391860: step: 594/459, loss: 0.0036340474616736174 2023-01-22 18:29:59.960067: step: 596/459, loss: 0.0058111571706831455 2023-01-22 18:30:00.602313: step: 598/459, loss: 0.006104103289544582 2023-01-22 18:30:01.261748: step: 600/459, loss: 0.05832574889063835 2023-01-22 18:30:01.942698: step: 602/459, loss: 0.02890695072710514 2023-01-22 18:30:02.561810: step: 604/459, loss: 0.0034624694380909204 2023-01-22 18:30:03.167068: step: 606/459, loss: 0.02875342033803463 2023-01-22 18:30:03.726855: step: 608/459, loss: 0.0032975454814732075 2023-01-22 18:30:04.254165: step: 610/459, loss: 0.002872464247047901 2023-01-22 18:30:04.879696: step: 612/459, loss: 0.0013157837092876434 2023-01-22 18:30:05.653141: step: 614/459, loss: 0.0014205594779923558 2023-01-22 18:30:06.239860: step: 616/459, loss: 0.0008877580403350294 2023-01-22 18:30:06.809093: step: 618/459, loss: 0.0313604399561882 2023-01-22 18:30:07.371863: step: 620/459, loss: 0.0012171933194622397 2023-01-22 18:30:07.961824: step: 622/459, loss: 0.0063144732266664505 2023-01-22 18:30:08.567729: step: 624/459, loss: 0.007227122317999601 2023-01-22 18:30:09.143880: step: 626/459, loss: 0.020086102187633514 2023-01-22 18:30:09.799778: step: 628/459, loss: 0.0347590334713459 2023-01-22 18:30:10.370189: step: 630/459, loss: 0.03537003695964813 2023-01-22 18:30:10.913935: step: 632/459, loss: 0.061258237808942795 2023-01-22 18:30:11.560421: step: 634/459, loss: 0.0015751644968986511 2023-01-22 18:30:12.200819: step: 636/459, loss: 0.016308272257447243 2023-01-22 18:30:12.851899: step: 638/459, loss: 0.00032236697734333575 2023-01-22 18:30:13.469230: step: 640/459, loss: 0.011078651063144207 2023-01-22 18:30:14.014755: step: 642/459, loss: 0.00030173579580150545 2023-01-22 18:30:14.600542: step: 644/459, loss: 0.013170440681278706 2023-01-22 18:30:15.246100: step: 646/459, loss: 0.0006557632004842162 2023-01-22 18:30:15.890957: step: 648/459, loss: 0.009704393334686756 2023-01-22 18:30:16.532163: step: 650/459, loss: 0.0112488167360425 2023-01-22 18:30:17.135965: step: 652/459, loss: 0.00023623931338079274 2023-01-22 18:30:17.749493: step: 654/459, loss: 0.0019988277927041054 2023-01-22 18:30:18.383548: step: 656/459, loss: 0.0008036525687202811 2023-01-22 18:30:19.027630: step: 658/459, loss: 0.021892469376325607 2023-01-22 18:30:19.696115: step: 660/459, loss: 0.0035172177013009787 2023-01-22 18:30:20.312872: step: 662/459, loss: 0.001849631080403924 2023-01-22 18:30:20.955405: step: 664/459, loss: 0.008093730546534061 2023-01-22 18:30:21.530149: step: 666/459, loss: 7.148668373702094e-05 2023-01-22 18:30:22.243775: step: 668/459, loss: 0.0017959419637918472 2023-01-22 18:30:22.881517: step: 670/459, loss: 0.04407283663749695 2023-01-22 18:30:23.533196: step: 672/459, loss: 0.010154498741030693 2023-01-22 18:30:24.178841: step: 674/459, loss: 0.004571187309920788 2023-01-22 18:30:24.846000: step: 676/459, loss: 0.05855986103415489 2023-01-22 18:30:25.394107: step: 678/459, loss: 0.03279520571231842 2023-01-22 18:30:26.017587: step: 680/459, loss: 0.3403117060661316 2023-01-22 18:30:26.668230: step: 682/459, loss: 0.015369665808975697 2023-01-22 18:30:27.310478: step: 684/459, loss: 0.006735701579600573 2023-01-22 18:30:27.880504: step: 686/459, loss: 0.05410154536366463 2023-01-22 18:30:28.486621: step: 688/459, loss: 0.06739787012338638 2023-01-22 18:30:29.082620: step: 690/459, loss: 0.023647328838706017 2023-01-22 18:30:29.773737: step: 692/459, loss: 0.008427153341472149 2023-01-22 18:30:30.425311: step: 694/459, loss: 0.08066233992576599 2023-01-22 18:30:31.109588: step: 696/459, loss: 0.05319619178771973 2023-01-22 18:30:31.701363: step: 698/459, loss: 0.0012337097432464361 2023-01-22 18:30:32.339361: step: 700/459, loss: 0.011203749105334282 2023-01-22 18:30:32.906649: step: 702/459, loss: 0.0012996048899367452 2023-01-22 18:30:33.487606: step: 704/459, loss: 0.08911425620317459 2023-01-22 18:30:34.122077: step: 706/459, loss: 0.021211909130215645 2023-01-22 18:30:34.717131: step: 708/459, loss: 0.0018392677884548903 2023-01-22 18:30:35.289456: step: 710/459, loss: 0.0008058716775849462 2023-01-22 18:30:35.904493: step: 712/459, loss: 0.009576361626386642 2023-01-22 18:30:36.548532: step: 714/459, loss: 0.01879831589758396 2023-01-22 18:30:37.112911: step: 716/459, loss: 0.013181252405047417 2023-01-22 18:30:37.722135: step: 718/459, loss: 0.13126623630523682 2023-01-22 18:30:38.292657: step: 720/459, loss: 0.00047602321137674153 2023-01-22 18:30:38.883853: step: 722/459, loss: 0.0012785461731255054 2023-01-22 18:30:39.522888: step: 724/459, loss: 0.07531910389661789 2023-01-22 18:30:40.106578: step: 726/459, loss: 0.0031726276502013206 2023-01-22 18:30:40.697920: step: 728/459, loss: 0.020753851160407066 2023-01-22 18:30:41.315430: step: 730/459, loss: 0.24442721903324127 2023-01-22 18:30:41.908903: step: 732/459, loss: 0.02056899107992649 2023-01-22 18:30:42.543439: step: 734/459, loss: 0.011536579579114914 2023-01-22 18:30:43.134501: step: 736/459, loss: 0.0019203022820875049 2023-01-22 18:30:43.712312: step: 738/459, loss: 0.003059627488255501 2023-01-22 18:30:44.394573: step: 740/459, loss: 0.13032841682434082 2023-01-22 18:30:45.001929: step: 742/459, loss: 0.005485199391841888 2023-01-22 18:30:45.597849: step: 744/459, loss: 0.1113811656832695 2023-01-22 18:30:46.159041: step: 746/459, loss: 0.003202663501724601 2023-01-22 18:30:46.729296: step: 748/459, loss: 0.02034015581011772 2023-01-22 18:30:47.347108: step: 750/459, loss: 0.004890326410531998 2023-01-22 18:30:47.964934: step: 752/459, loss: 0.03545766696333885 2023-01-22 18:30:48.633317: step: 754/459, loss: 0.03314826264977455 2023-01-22 18:30:49.238332: step: 756/459, loss: 0.013928729109466076 2023-01-22 18:30:49.863301: step: 758/459, loss: 0.006439815275371075 2023-01-22 18:30:50.504662: step: 760/459, loss: 0.0002261567860841751 2023-01-22 18:30:51.209250: step: 762/459, loss: 0.03038681112229824 2023-01-22 18:30:51.850680: step: 764/459, loss: 0.02847125753760338 2023-01-22 18:30:52.464277: step: 766/459, loss: 0.046801500022411346 2023-01-22 18:30:53.112882: step: 768/459, loss: 0.009430193342268467 2023-01-22 18:30:53.717820: step: 770/459, loss: 0.0011056638322770596 2023-01-22 18:30:54.330246: step: 772/459, loss: 0.0020958033856004477 2023-01-22 18:30:54.945925: step: 774/459, loss: 0.3442172110080719 2023-01-22 18:30:55.547690: step: 776/459, loss: 0.08817853033542633 2023-01-22 18:30:56.140682: step: 778/459, loss: 0.009362312033772469 2023-01-22 18:30:56.753236: step: 780/459, loss: 0.15598484873771667 2023-01-22 18:30:57.374510: step: 782/459, loss: 0.06961601227521896 2023-01-22 18:30:57.978695: step: 784/459, loss: 0.3742819130420685 2023-01-22 18:30:58.614772: step: 786/459, loss: 0.035468969494104385 2023-01-22 18:30:59.162938: step: 788/459, loss: 0.8063175082206726 2023-01-22 18:30:59.673121: step: 790/459, loss: 0.056321099400520325 2023-01-22 18:31:00.286436: step: 792/459, loss: 0.02722703479230404 2023-01-22 18:31:00.906805: step: 794/459, loss: 0.0025154175236821175 2023-01-22 18:31:01.524070: step: 796/459, loss: 0.0046972124837338924 2023-01-22 18:31:02.142246: step: 798/459, loss: 0.7363826632499695 2023-01-22 18:31:02.745697: step: 800/459, loss: 0.9682532548904419 2023-01-22 18:31:03.348217: step: 802/459, loss: 0.00031113828299567103 2023-01-22 18:31:03.940823: step: 804/459, loss: 0.02743474207818508 2023-01-22 18:31:04.528796: step: 806/459, loss: 0.011904645711183548 2023-01-22 18:31:05.140722: step: 808/459, loss: 0.0011301263002678752 2023-01-22 18:31:05.751428: step: 810/459, loss: 0.04029640927910805 2023-01-22 18:31:06.322414: step: 812/459, loss: 0.05084746330976486 2023-01-22 18:31:07.020500: step: 814/459, loss: 0.07654024660587311 2023-01-22 18:31:07.640948: step: 816/459, loss: 0.0031709177419543266 2023-01-22 18:31:08.274069: step: 818/459, loss: 0.010187708772718906 2023-01-22 18:31:08.862449: step: 820/459, loss: 0.005908410996198654 2023-01-22 18:31:09.468173: step: 822/459, loss: 0.6332562565803528 2023-01-22 18:31:10.035215: step: 824/459, loss: 0.005710506811738014 2023-01-22 18:31:10.635149: step: 826/459, loss: 0.008832644671201706 2023-01-22 18:31:11.324709: step: 828/459, loss: 0.02503044717013836 2023-01-22 18:31:11.898303: step: 830/459, loss: 0.036701034754514694 2023-01-22 18:31:12.463397: step: 832/459, loss: 0.0005352243897505105 2023-01-22 18:31:13.056123: step: 834/459, loss: 0.02370169758796692 2023-01-22 18:31:13.643931: step: 836/459, loss: 0.0012770730536431074 2023-01-22 18:31:14.284805: step: 838/459, loss: 0.38082122802734375 2023-01-22 18:31:14.895129: step: 840/459, loss: 0.006596234627068043 2023-01-22 18:31:15.466910: step: 842/459, loss: 0.12550343573093414 2023-01-22 18:31:16.016990: step: 844/459, loss: 0.004402824677526951 2023-01-22 18:31:16.641967: step: 846/459, loss: 0.030555786564946175 2023-01-22 18:31:17.352734: step: 848/459, loss: 0.0251186303794384 2023-01-22 18:31:18.003222: step: 850/459, loss: 0.013550657778978348 2023-01-22 18:31:18.623313: step: 852/459, loss: 0.0014126345049589872 2023-01-22 18:31:19.196840: step: 854/459, loss: 0.004328660201281309 2023-01-22 18:31:19.836817: step: 856/459, loss: 0.0015067917993292212 2023-01-22 18:31:20.465192: step: 858/459, loss: 0.0010624155402183533 2023-01-22 18:31:21.164391: step: 860/459, loss: 0.011390852741897106 2023-01-22 18:31:21.751934: step: 862/459, loss: 0.01744629442691803 2023-01-22 18:31:22.487670: step: 864/459, loss: 0.0006727349245920777 2023-01-22 18:31:23.119714: step: 866/459, loss: 0.011185260489583015 2023-01-22 18:31:23.668558: step: 868/459, loss: 0.0008222234901040792 2023-01-22 18:31:24.226611: step: 870/459, loss: 0.011469247750937939 2023-01-22 18:31:24.934637: step: 872/459, loss: 0.012564464472234249 2023-01-22 18:31:25.504712: step: 874/459, loss: 0.0023684115149080753 2023-01-22 18:31:26.107368: step: 876/459, loss: 0.001058204798027873 2023-01-22 18:31:26.644609: step: 878/459, loss: 0.026455730199813843 2023-01-22 18:31:27.145238: step: 880/459, loss: 0.0032328099478036165 2023-01-22 18:31:27.740175: step: 882/459, loss: 0.05481947213411331 2023-01-22 18:31:28.320441: step: 884/459, loss: 0.013298179022967815 2023-01-22 18:31:28.949363: step: 886/459, loss: 0.0006241186638362706 2023-01-22 18:31:29.501922: step: 888/459, loss: 0.009149898774921894 2023-01-22 18:31:30.149562: step: 890/459, loss: 0.031851042062044144 2023-01-22 18:31:30.707384: step: 892/459, loss: 0.005942895542830229 2023-01-22 18:31:31.312242: step: 894/459, loss: 0.12018606066703796 2023-01-22 18:31:31.912617: step: 896/459, loss: 0.004651620052754879 2023-01-22 18:31:32.540672: step: 898/459, loss: 0.06588160246610641 2023-01-22 18:31:33.122713: step: 900/459, loss: 0.021710168570280075 2023-01-22 18:31:33.728987: step: 902/459, loss: 8.695674478076398e-05 2023-01-22 18:31:34.302537: step: 904/459, loss: 0.01098797470331192 2023-01-22 18:31:34.923047: step: 906/459, loss: 7.344595432281494 2023-01-22 18:31:35.470772: step: 908/459, loss: 0.3052416443824768 2023-01-22 18:31:36.153766: step: 910/459, loss: 0.02097228728234768 2023-01-22 18:31:36.706886: step: 912/459, loss: 0.00022844583145342767 2023-01-22 18:31:37.269011: step: 914/459, loss: 0.03864922374486923 2023-01-22 18:31:37.880005: step: 916/459, loss: 0.005172533914446831 2023-01-22 18:31:38.525621: step: 918/459, loss: 0.02516728639602661 2023-01-22 18:31:38.941890: step: 920/459, loss: 8.890528988558799e-06 ================================================== Loss: 0.081 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3026778286189684, 'r': 0.3451790796963947, 'f1': 0.3225343528368795}, 'combined': 0.23765689156401645, 'epoch': 32} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3219983819714017, 'r': 0.3114602531068831, 'f1': 0.31664166212344863}, 'combined': 0.2026506637590071, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29465629657228015, 'r': 0.3410632654821459, 'f1': 0.31616594706964096}, 'combined': 0.23296438205131437, 'epoch': 32} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33046965896974695, 'r': 0.31364574905855985, 'f1': 0.321837988772776}, 'combined': 0.2059763128145766, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31315088147886233, 'r': 0.3452384480820095, 'f1': 0.3284127475437166}, 'combined': 0.24198834029537009, 'epoch': 32} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3387240582733124, 'r': 0.322976631680843, 'f1': 0.33066296309275295}, 'combined': 0.23707910561367196, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23270440251572325, 'r': 0.35238095238095235, 'f1': 0.2803030303030303}, 'combined': 0.18686868686868685, 'epoch': 32} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.20270270270270271, 'r': 0.32608695652173914, 'f1': 0.25}, 'combined': 0.125, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.20689655172413793, 'f1': 0.2553191489361702}, 'combined': 0.17021276595744678, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:34:11.538646: step: 2/459, loss: 0.004234631545841694 2023-01-22 18:34:12.064191: step: 4/459, loss: 0.036366160959005356 2023-01-22 18:34:12.660913: step: 6/459, loss: 0.015352167189121246 2023-01-22 18:34:13.307017: step: 8/459, loss: 0.025266166776418686 2023-01-22 18:34:13.871077: step: 10/459, loss: 0.04391976818442345 2023-01-22 18:34:14.443371: step: 12/459, loss: 0.0036374118644744158 2023-01-22 18:34:14.960697: step: 14/459, loss: 0.0018145801732316613 2023-01-22 18:34:15.598627: step: 16/459, loss: 0.038366541266441345 2023-01-22 18:34:16.181832: step: 18/459, loss: 0.016915515065193176 2023-01-22 18:34:16.736873: step: 20/459, loss: 0.016350751742720604 2023-01-22 18:34:17.329445: step: 22/459, loss: 0.022700296714901924 2023-01-22 18:34:17.925473: step: 24/459, loss: 0.025238431990146637 2023-01-22 18:34:18.519620: step: 26/459, loss: 0.003780787345021963 2023-01-22 18:34:19.097313: step: 28/459, loss: 0.03528671711683273 2023-01-22 18:34:19.731164: step: 30/459, loss: 0.014186060056090355 2023-01-22 18:34:20.395442: step: 32/459, loss: 0.010120083577930927 2023-01-22 18:34:21.056595: step: 34/459, loss: 0.010880585759878159 2023-01-22 18:34:21.646229: step: 36/459, loss: 0.00020664260955527425 2023-01-22 18:34:22.247442: step: 38/459, loss: 0.0010706285247579217 2023-01-22 18:34:22.843566: step: 40/459, loss: 0.02518249675631523 2023-01-22 18:34:23.483651: step: 42/459, loss: 0.007937159389257431 2023-01-22 18:34:24.041978: step: 44/459, loss: 0.049875713884830475 2023-01-22 18:34:24.573767: step: 46/459, loss: 0.02031470276415348 2023-01-22 18:34:25.202511: step: 48/459, loss: 0.15249694883823395 2023-01-22 18:34:25.806079: step: 50/459, loss: 0.022966613993048668 2023-01-22 18:34:26.400036: step: 52/459, loss: 0.0233318954706192 2023-01-22 18:34:26.988332: step: 54/459, loss: 0.0009438458946533501 2023-01-22 18:34:27.629664: step: 56/459, loss: 0.01398018840700388 2023-01-22 18:34:28.205778: step: 58/459, loss: 0.0006099742604419589 2023-01-22 18:34:28.804829: step: 60/459, loss: 0.036208875477313995 2023-01-22 18:34:29.398320: step: 62/459, loss: 0.015104041434824467 2023-01-22 18:34:30.031690: step: 64/459, loss: 0.08306961506605148 2023-01-22 18:34:30.643159: step: 66/459, loss: 0.01637197658419609 2023-01-22 18:34:31.203367: step: 68/459, loss: 0.0057400893419981 2023-01-22 18:34:31.838799: step: 70/459, loss: 0.058853376656770706 2023-01-22 18:34:32.436550: step: 72/459, loss: 0.0010736885014921427 2023-01-22 18:34:33.107214: step: 74/459, loss: 0.019139790907502174 2023-01-22 18:34:33.678494: step: 76/459, loss: 0.01699014939367771 2023-01-22 18:34:34.331303: step: 78/459, loss: 0.0019620945677161217 2023-01-22 18:34:34.920092: step: 80/459, loss: 0.09695494174957275 2023-01-22 18:34:35.508196: step: 82/459, loss: 0.010676287114620209 2023-01-22 18:34:36.081043: step: 84/459, loss: 0.05369144678115845 2023-01-22 18:34:36.691023: step: 86/459, loss: 0.0017050557071343064 2023-01-22 18:34:37.360307: step: 88/459, loss: 1.02634859085083 2023-01-22 18:34:37.985984: step: 90/459, loss: 0.03623795881867409 2023-01-22 18:34:38.594075: step: 92/459, loss: 0.00042456775554455817 2023-01-22 18:34:39.199239: step: 94/459, loss: 0.031176570802927017 2023-01-22 18:34:39.769832: step: 96/459, loss: 0.011980554088950157 2023-01-22 18:34:40.322023: step: 98/459, loss: 0.0007641740958206356 2023-01-22 18:34:40.953235: step: 100/459, loss: 0.017860669642686844 2023-01-22 18:34:41.568347: step: 102/459, loss: 0.0008653693366795778 2023-01-22 18:34:42.129990: step: 104/459, loss: 0.02076854556798935 2023-01-22 18:34:42.784104: step: 106/459, loss: 0.037613753229379654 2023-01-22 18:34:43.483777: step: 108/459, loss: 0.04106057807803154 2023-01-22 18:34:44.212514: step: 110/459, loss: 8.559953130315989e-05 2023-01-22 18:34:44.827208: step: 112/459, loss: 0.004098064266145229 2023-01-22 18:34:45.368429: step: 114/459, loss: 0.003412562655285001 2023-01-22 18:34:45.986701: step: 116/459, loss: 0.01962643302977085 2023-01-22 18:34:46.606199: step: 118/459, loss: 0.03812236711382866 2023-01-22 18:34:47.336358: step: 120/459, loss: 0.01827421598136425 2023-01-22 18:34:48.004550: step: 122/459, loss: 0.009726119227707386 2023-01-22 18:34:48.543055: step: 124/459, loss: 0.006171009503304958 2023-01-22 18:34:49.149530: step: 126/459, loss: 0.001426915405318141 2023-01-22 18:34:49.801936: step: 128/459, loss: 0.028269296512007713 2023-01-22 18:34:50.382418: step: 130/459, loss: 0.009778803214430809 2023-01-22 18:34:50.967406: step: 132/459, loss: 0.015793204307556152 2023-01-22 18:34:51.611603: step: 134/459, loss: 0.0014910063473507762 2023-01-22 18:34:52.149937: step: 136/459, loss: 0.02227836474776268 2023-01-22 18:34:52.817220: step: 138/459, loss: 0.0017387197585776448 2023-01-22 18:34:53.408006: step: 140/459, loss: 0.0033323217649012804 2023-01-22 18:34:53.979369: step: 142/459, loss: 0.010578114539384842 2023-01-22 18:34:54.563451: step: 144/459, loss: 0.019737329334020615 2023-01-22 18:34:55.147852: step: 146/459, loss: 0.003673713654279709 2023-01-22 18:34:55.687011: step: 148/459, loss: 0.02575099654495716 2023-01-22 18:34:56.267832: step: 150/459, loss: 0.036868054419755936 2023-01-22 18:34:56.880726: step: 152/459, loss: 0.032901957631111145 2023-01-22 18:34:57.503346: step: 154/459, loss: 9.943557233782485e-05 2023-01-22 18:34:58.136046: step: 156/459, loss: 0.011235338635742664 2023-01-22 18:34:58.743837: step: 158/459, loss: 0.015066743828356266 2023-01-22 18:34:59.304997: step: 160/459, loss: 0.7496212124824524 2023-01-22 18:34:59.877873: step: 162/459, loss: 0.004793747328221798 2023-01-22 18:35:00.503521: step: 164/459, loss: 0.006832163780927658 2023-01-22 18:35:01.114765: step: 166/459, loss: 0.007423004135489464 2023-01-22 18:35:01.706094: step: 168/459, loss: 0.004643104504793882 2023-01-22 18:35:02.311159: step: 170/459, loss: 0.03806466981768608 2023-01-22 18:35:02.971300: step: 172/459, loss: 0.0023096418008208275 2023-01-22 18:35:03.598379: step: 174/459, loss: 0.04577695205807686 2023-01-22 18:35:04.135587: step: 176/459, loss: 0.002941898535937071 2023-01-22 18:35:04.745495: step: 178/459, loss: 0.021753516048192978 2023-01-22 18:35:05.321557: step: 180/459, loss: 0.0035899702925235033 2023-01-22 18:35:05.906854: step: 182/459, loss: 0.010667935013771057 2023-01-22 18:35:06.476592: step: 184/459, loss: 0.0020398462656885386 2023-01-22 18:35:06.993701: step: 186/459, loss: 0.003117984626442194 2023-01-22 18:35:07.573698: step: 188/459, loss: 0.0942942276597023 2023-01-22 18:35:08.163545: step: 190/459, loss: 0.010487518273293972 2023-01-22 18:35:08.769570: step: 192/459, loss: 0.010035332292318344 2023-01-22 18:35:09.379266: step: 194/459, loss: 0.017397809773683548 2023-01-22 18:35:10.037465: step: 196/459, loss: 0.12400245666503906 2023-01-22 18:35:10.648787: step: 198/459, loss: 0.0043572913855314255 2023-01-22 18:35:11.213752: step: 200/459, loss: 0.01699565164744854 2023-01-22 18:35:11.779417: step: 202/459, loss: 0.006842047907412052 2023-01-22 18:35:12.380077: step: 204/459, loss: 0.01106992643326521 2023-01-22 18:35:13.021532: step: 206/459, loss: 0.04436018317937851 2023-01-22 18:35:13.642735: step: 208/459, loss: 0.005954765249043703 2023-01-22 18:35:14.304331: step: 210/459, loss: 0.07782654464244843 2023-01-22 18:35:14.963586: step: 212/459, loss: 0.01109590008854866 2023-01-22 18:35:15.562116: step: 214/459, loss: 0.014011983759701252 2023-01-22 18:35:16.106792: step: 216/459, loss: 6.737349758623168e-05 2023-01-22 18:35:16.716021: step: 218/459, loss: 0.00207520485855639 2023-01-22 18:35:17.304658: step: 220/459, loss: 0.009610218927264214 2023-01-22 18:35:17.976205: step: 222/459, loss: 0.028649430721998215 2023-01-22 18:35:18.642981: step: 224/459, loss: 0.009082555770874023 2023-01-22 18:35:19.290154: step: 226/459, loss: 0.007120512425899506 2023-01-22 18:35:19.897420: step: 228/459, loss: 0.0805722028017044 2023-01-22 18:35:20.492558: step: 230/459, loss: 0.015902629122138023 2023-01-22 18:35:21.068795: step: 232/459, loss: 0.007649676408618689 2023-01-22 18:35:21.661365: step: 234/459, loss: 0.16558724641799927 2023-01-22 18:35:22.261541: step: 236/459, loss: 0.021013664081692696 2023-01-22 18:35:22.906747: step: 238/459, loss: 0.023082710802555084 2023-01-22 18:35:23.466468: step: 240/459, loss: 0.001825955929234624 2023-01-22 18:35:24.135429: step: 242/459, loss: 0.053991612046957016 2023-01-22 18:35:24.699261: step: 244/459, loss: 0.03778092563152313 2023-01-22 18:35:25.352562: step: 246/459, loss: 0.021420007571578026 2023-01-22 18:35:25.977073: step: 248/459, loss: 0.0281829833984375 2023-01-22 18:35:26.599413: step: 250/459, loss: 0.05765974521636963 2023-01-22 18:35:27.230819: step: 252/459, loss: 0.0030776276253163815 2023-01-22 18:35:27.911781: step: 254/459, loss: 0.006687235087156296 2023-01-22 18:35:28.598616: step: 256/459, loss: 0.0037794180680066347 2023-01-22 18:35:29.214627: step: 258/459, loss: 0.020946770906448364 2023-01-22 18:35:29.835384: step: 260/459, loss: 0.020166192203760147 2023-01-22 18:35:30.453481: step: 262/459, loss: 0.004278910346329212 2023-01-22 18:35:31.047659: step: 264/459, loss: 0.0028301163110882044 2023-01-22 18:35:31.597484: step: 266/459, loss: 0.03766366466879845 2023-01-22 18:35:32.183645: step: 268/459, loss: 0.27914485335350037 2023-01-22 18:35:32.719931: step: 270/459, loss: 0.004007326439023018 2023-01-22 18:35:33.284382: step: 272/459, loss: 0.08231227844953537 2023-01-22 18:35:33.896158: step: 274/459, loss: 0.008809550665318966 2023-01-22 18:35:34.564621: step: 276/459, loss: 0.0034556228201836348 2023-01-22 18:35:35.186151: step: 278/459, loss: 0.03158458322286606 2023-01-22 18:35:35.756304: step: 280/459, loss: 0.04156746715307236 2023-01-22 18:35:36.329328: step: 282/459, loss: 0.0007681819261051714 2023-01-22 18:35:36.892206: step: 284/459, loss: 0.005320281255990267 2023-01-22 18:35:37.455643: step: 286/459, loss: 0.0011180349392816424 2023-01-22 18:35:38.107937: step: 288/459, loss: 0.001892576809041202 2023-01-22 18:35:38.763884: step: 290/459, loss: 0.01142773125320673 2023-01-22 18:35:39.443524: step: 292/459, loss: 0.04678807407617569 2023-01-22 18:35:40.015469: step: 294/459, loss: 0.0025569258723407984 2023-01-22 18:35:40.592826: step: 296/459, loss: 0.013398569077253342 2023-01-22 18:35:41.188970: step: 298/459, loss: 0.015467137098312378 2023-01-22 18:35:41.772110: step: 300/459, loss: 0.006661698687821627 2023-01-22 18:35:42.386805: step: 302/459, loss: 0.004708386491984129 2023-01-22 18:35:42.972545: step: 304/459, loss: 0.008954213932156563 2023-01-22 18:35:43.648825: step: 306/459, loss: 0.02209390327334404 2023-01-22 18:35:44.202441: step: 308/459, loss: 0.0002579634019639343 2023-01-22 18:35:44.867678: step: 310/459, loss: 0.0668187290430069 2023-01-22 18:35:45.451692: step: 312/459, loss: 0.01119961030781269 2023-01-22 18:35:46.042958: step: 314/459, loss: 0.001592157525010407 2023-01-22 18:35:46.687223: step: 316/459, loss: 0.005148693919181824 2023-01-22 18:35:47.298422: step: 318/459, loss: 0.02492441050708294 2023-01-22 18:35:47.866994: step: 320/459, loss: 0.04811137914657593 2023-01-22 18:35:48.532166: step: 322/459, loss: 0.004627626854926348 2023-01-22 18:35:49.140895: step: 324/459, loss: 0.00046955051948316395 2023-01-22 18:35:49.752251: step: 326/459, loss: 0.015105369500815868 2023-01-22 18:35:50.397547: step: 328/459, loss: 0.0022059199400246143 2023-01-22 18:35:51.056796: step: 330/459, loss: 0.05332881212234497 2023-01-22 18:35:51.688882: step: 332/459, loss: 0.02308749035000801 2023-01-22 18:35:52.300132: step: 334/459, loss: 0.01038237102329731 2023-01-22 18:35:52.958955: step: 336/459, loss: 0.00707350205630064 2023-01-22 18:35:53.544755: step: 338/459, loss: 0.011827399954199791 2023-01-22 18:35:54.190151: step: 340/459, loss: 0.03238379582762718 2023-01-22 18:35:54.744212: step: 342/459, loss: 0.010089714080095291 2023-01-22 18:35:55.396635: step: 344/459, loss: 0.012845934368669987 2023-01-22 18:35:55.984029: step: 346/459, loss: 0.014643705449998379 2023-01-22 18:35:56.699217: step: 348/459, loss: 0.0650816261768341 2023-01-22 18:35:57.292901: step: 350/459, loss: 0.00724341394379735 2023-01-22 18:35:57.892624: step: 352/459, loss: 0.009248141199350357 2023-01-22 18:35:58.501752: step: 354/459, loss: 0.02953900769352913 2023-01-22 18:35:59.131738: step: 356/459, loss: 0.003193347714841366 2023-01-22 18:35:59.718082: step: 358/459, loss: 0.03542504832148552 2023-01-22 18:36:00.360431: step: 360/459, loss: 0.0013271799543872476 2023-01-22 18:36:00.919816: step: 362/459, loss: 0.013548187911510468 2023-01-22 18:36:01.569575: step: 364/459, loss: 0.04694579169154167 2023-01-22 18:36:02.169892: step: 366/459, loss: 0.0036059999838471413 2023-01-22 18:36:02.767915: step: 368/459, loss: 0.00107394193764776 2023-01-22 18:36:03.407620: step: 370/459, loss: 0.008569651283323765 2023-01-22 18:36:03.986621: step: 372/459, loss: 0.024501534178853035 2023-01-22 18:36:04.695830: step: 374/459, loss: 0.0009591477573849261 2023-01-22 18:36:05.363429: step: 376/459, loss: 0.0011198509018868208 2023-01-22 18:36:05.963452: step: 378/459, loss: 0.016909318044781685 2023-01-22 18:36:06.526819: step: 380/459, loss: 0.0003109597892034799 2023-01-22 18:36:07.154547: step: 382/459, loss: 0.031063159927725792 2023-01-22 18:36:07.840560: step: 384/459, loss: 0.008393966592848301 2023-01-22 18:36:08.522385: step: 386/459, loss: 0.015922250226140022 2023-01-22 18:36:09.135138: step: 388/459, loss: 0.10777147114276886 2023-01-22 18:36:09.714004: step: 390/459, loss: 0.006030482240021229 2023-01-22 18:36:10.333446: step: 392/459, loss: 0.00347311282530427 2023-01-22 18:36:10.990506: step: 394/459, loss: 0.004361819941550493 2023-01-22 18:36:11.598275: step: 396/459, loss: 0.010031986981630325 2023-01-22 18:36:12.188870: step: 398/459, loss: 0.019009439274668694 2023-01-22 18:36:12.829120: step: 400/459, loss: 0.02485479600727558 2023-01-22 18:36:13.445708: step: 402/459, loss: 0.07552199065685272 2023-01-22 18:36:14.049664: step: 404/459, loss: 0.004236845299601555 2023-01-22 18:36:14.628746: step: 406/459, loss: 0.022268205881118774 2023-01-22 18:36:15.225321: step: 408/459, loss: 0.002381716389209032 2023-01-22 18:36:15.794589: step: 410/459, loss: 0.049845997244119644 2023-01-22 18:36:16.422040: step: 412/459, loss: 0.0725514218211174 2023-01-22 18:36:17.015654: step: 414/459, loss: 0.44998228549957275 2023-01-22 18:36:17.562721: step: 416/459, loss: 0.003950102720409632 2023-01-22 18:36:18.113306: step: 418/459, loss: 0.03569298982620239 2023-01-22 18:36:18.748666: step: 420/459, loss: 0.0075606550090014935 2023-01-22 18:36:19.470888: step: 422/459, loss: 0.026329830288887024 2023-01-22 18:36:20.211224: step: 424/459, loss: 0.07393528521060944 2023-01-22 18:36:20.849542: step: 426/459, loss: 0.010479397140443325 2023-01-22 18:36:21.512293: step: 428/459, loss: 0.03939875215291977 2023-01-22 18:36:22.194172: step: 430/459, loss: 0.000620282837189734 2023-01-22 18:36:22.856448: step: 432/459, loss: 0.015389487147331238 2023-01-22 18:36:23.485063: step: 434/459, loss: 0.010710272006690502 2023-01-22 18:36:24.045528: step: 436/459, loss: 0.006208568345755339 2023-01-22 18:36:24.633189: step: 438/459, loss: 0.00014285289216786623 2023-01-22 18:36:25.206513: step: 440/459, loss: 0.020433543249964714 2023-01-22 18:36:25.801726: step: 442/459, loss: 0.04650651663541794 2023-01-22 18:36:26.449337: step: 444/459, loss: 0.020455252379179 2023-01-22 18:36:27.018679: step: 446/459, loss: 0.0253331009298563 2023-01-22 18:36:27.691159: step: 448/459, loss: 0.018260767683386803 2023-01-22 18:36:28.328503: step: 450/459, loss: 0.0006443065358325839 2023-01-22 18:36:28.881946: step: 452/459, loss: 0.01289509516209364 2023-01-22 18:36:29.476968: step: 454/459, loss: 0.013365423306822777 2023-01-22 18:36:30.044794: step: 456/459, loss: 0.006984825246036053 2023-01-22 18:36:30.653254: step: 458/459, loss: 0.009333848021924496 2023-01-22 18:36:31.191637: step: 460/459, loss: 0.010132447816431522 2023-01-22 18:36:31.807667: step: 462/459, loss: 0.005953726824373007 2023-01-22 18:36:32.347726: step: 464/459, loss: 0.006211122032254934 2023-01-22 18:36:32.931037: step: 466/459, loss: 0.002656649798154831 2023-01-22 18:36:33.522833: step: 468/459, loss: 0.0011374637251719832 2023-01-22 18:36:34.221454: step: 470/459, loss: 0.0001416908053215593 2023-01-22 18:36:34.836043: step: 472/459, loss: 0.0026120590046048164 2023-01-22 18:36:35.430472: step: 474/459, loss: 0.012859786860644817 2023-01-22 18:36:36.110817: step: 476/459, loss: 0.03658873587846756 2023-01-22 18:36:36.745406: step: 478/459, loss: 0.033596452325582504 2023-01-22 18:36:37.350346: step: 480/459, loss: 0.0021088793873786926 2023-01-22 18:36:37.924103: step: 482/459, loss: 0.02850884571671486 2023-01-22 18:36:38.560124: step: 484/459, loss: 0.005606101825833321 2023-01-22 18:36:39.153616: step: 486/459, loss: 0.00044472195440903306 2023-01-22 18:36:39.761662: step: 488/459, loss: 0.0013139352668076754 2023-01-22 18:36:40.383025: step: 490/459, loss: 0.005635205190628767 2023-01-22 18:36:41.008582: step: 492/459, loss: 0.024972543120384216 2023-01-22 18:36:41.635240: step: 494/459, loss: 0.018260590732097626 2023-01-22 18:36:42.179256: step: 496/459, loss: 0.0002623558684717864 2023-01-22 18:36:42.835304: step: 498/459, loss: 0.08461305499076843 2023-01-22 18:36:43.386104: step: 500/459, loss: 0.015456409193575382 2023-01-22 18:36:44.035849: step: 502/459, loss: 0.015775255858898163 2023-01-22 18:36:44.656019: step: 504/459, loss: 0.22462999820709229 2023-01-22 18:36:45.263100: step: 506/459, loss: 0.004544573370367289 2023-01-22 18:36:45.831422: step: 508/459, loss: 0.005923727061599493 2023-01-22 18:36:46.526974: step: 510/459, loss: 0.007012179121375084 2023-01-22 18:36:47.152701: step: 512/459, loss: 0.0021867912728339434 2023-01-22 18:36:47.810585: step: 514/459, loss: 0.027628378942608833 2023-01-22 18:36:48.404021: step: 516/459, loss: 0.0013795216800644994 2023-01-22 18:36:49.009049: step: 518/459, loss: 0.005205707624554634 2023-01-22 18:36:49.591886: step: 520/459, loss: 0.00502393115311861 2023-01-22 18:36:50.233587: step: 522/459, loss: 0.027063686400651932 2023-01-22 18:36:50.871212: step: 524/459, loss: 0.045558515936136246 2023-01-22 18:36:51.483635: step: 526/459, loss: 0.020238002762198448 2023-01-22 18:36:52.017576: step: 528/459, loss: 0.11675018817186356 2023-01-22 18:36:52.618104: step: 530/459, loss: 0.0023321800399571657 2023-01-22 18:36:53.175451: step: 532/459, loss: 0.039880018681287766 2023-01-22 18:36:53.673654: step: 534/459, loss: 1.8137841834686697e-05 2023-01-22 18:36:54.269971: step: 536/459, loss: 0.017451995983719826 2023-01-22 18:36:54.937305: step: 538/459, loss: 0.0037942370399832726 2023-01-22 18:36:55.537520: step: 540/459, loss: 0.002519971923902631 2023-01-22 18:36:56.109882: step: 542/459, loss: 0.0015344256535172462 2023-01-22 18:36:56.680999: step: 544/459, loss: 1.0373015403747559 2023-01-22 18:36:57.330784: step: 546/459, loss: 0.025804419070482254 2023-01-22 18:36:57.975414: step: 548/459, loss: 0.022582191973924637 2023-01-22 18:36:58.566280: step: 550/459, loss: 0.04277986288070679 2023-01-22 18:36:59.174745: step: 552/459, loss: 0.16968181729316711 2023-01-22 18:36:59.760396: step: 554/459, loss: 0.014455163851380348 2023-01-22 18:37:00.418210: step: 556/459, loss: 1.129088044166565 2023-01-22 18:37:00.988501: step: 558/459, loss: 0.023737343028187752 2023-01-22 18:37:01.572909: step: 560/459, loss: 0.025743482634425163 2023-01-22 18:37:02.179504: step: 562/459, loss: 0.006968502886593342 2023-01-22 18:37:02.738802: step: 564/459, loss: 0.03829498961567879 2023-01-22 18:37:03.354267: step: 566/459, loss: 0.06776323169469833 2023-01-22 18:37:03.986329: step: 568/459, loss: 0.006848793942481279 2023-01-22 18:37:04.608921: step: 570/459, loss: 0.006927241571247578 2023-01-22 18:37:05.147710: step: 572/459, loss: 0.005585575010627508 2023-01-22 18:37:05.806693: step: 574/459, loss: 0.0023757785093039274 2023-01-22 18:37:06.472611: step: 576/459, loss: 0.0001345625496469438 2023-01-22 18:37:07.106875: step: 578/459, loss: 0.0094767976552248 2023-01-22 18:37:07.770978: step: 580/459, loss: 0.00042161482269875705 2023-01-22 18:37:08.478444: step: 582/459, loss: 0.039882540702819824 2023-01-22 18:37:09.062684: step: 584/459, loss: 0.0444144532084465 2023-01-22 18:37:09.656446: step: 586/459, loss: 0.0029374947771430016 2023-01-22 18:37:10.253782: step: 588/459, loss: 0.0010461002821102738 2023-01-22 18:37:10.853680: step: 590/459, loss: 0.00026449045981280506 2023-01-22 18:37:11.477949: step: 592/459, loss: 0.0006223280797712505 2023-01-22 18:37:12.094953: step: 594/459, loss: 0.00041545930434949696 2023-01-22 18:37:12.690377: step: 596/459, loss: 0.005061842035502195 2023-01-22 18:37:13.278943: step: 598/459, loss: 0.027740919962525368 2023-01-22 18:37:13.882529: step: 600/459, loss: 0.04955441877245903 2023-01-22 18:37:14.478045: step: 602/459, loss: 0.03359590470790863 2023-01-22 18:37:15.116640: step: 604/459, loss: 0.014398176223039627 2023-01-22 18:37:15.664003: step: 606/459, loss: 0.0035100483801215887 2023-01-22 18:37:16.282627: step: 608/459, loss: 0.0021077662240713835 2023-01-22 18:37:16.883795: step: 610/459, loss: 0.005285394843667746 2023-01-22 18:37:17.384988: step: 612/459, loss: 0.02091139927506447 2023-01-22 18:37:17.995662: step: 614/459, loss: 0.03924482315778732 2023-01-22 18:37:18.700892: step: 616/459, loss: 0.01667371764779091 2023-01-22 18:37:19.295325: step: 618/459, loss: 0.006447154097259045 2023-01-22 18:37:19.846768: step: 620/459, loss: 0.022732317447662354 2023-01-22 18:37:20.427980: step: 622/459, loss: 0.07173370569944382 2023-01-22 18:37:21.054124: step: 624/459, loss: 0.015879454091191292 2023-01-22 18:37:21.609395: step: 626/459, loss: 0.0006428025080822408 2023-01-22 18:37:22.211186: step: 628/459, loss: 0.0036196415312588215 2023-01-22 18:37:22.784620: step: 630/459, loss: 0.001198666519485414 2023-01-22 18:37:23.427357: step: 632/459, loss: 0.004637808073312044 2023-01-22 18:37:24.059215: step: 634/459, loss: 0.009837810881435871 2023-01-22 18:37:24.696314: step: 636/459, loss: 0.011852912604808807 2023-01-22 18:37:25.234031: step: 638/459, loss: 0.021106040105223656 2023-01-22 18:37:25.857437: step: 640/459, loss: 0.014195707626640797 2023-01-22 18:37:26.498310: step: 642/459, loss: 0.05109719932079315 2023-01-22 18:37:27.239815: step: 644/459, loss: 0.008545851334929466 2023-01-22 18:37:27.822608: step: 646/459, loss: 0.0012549592647701502 2023-01-22 18:37:28.432759: step: 648/459, loss: 0.0019871655385941267 2023-01-22 18:37:28.977263: step: 650/459, loss: 0.024556780233979225 2023-01-22 18:37:29.546579: step: 652/459, loss: 0.014512770809233189 2023-01-22 18:37:30.198147: step: 654/459, loss: 0.00016479179612360895 2023-01-22 18:37:30.776623: step: 656/459, loss: 0.040520451962947845 2023-01-22 18:37:31.413951: step: 658/459, loss: 0.000840552500449121 2023-01-22 18:37:32.008210: step: 660/459, loss: 0.00509225856512785 2023-01-22 18:37:32.611866: step: 662/459, loss: 0.03606753796339035 2023-01-22 18:37:33.267888: step: 664/459, loss: 0.02473822608590126 2023-01-22 18:37:33.911639: step: 666/459, loss: 0.000658614095300436 2023-01-22 18:37:34.527817: step: 668/459, loss: 0.003742283908650279 2023-01-22 18:37:35.143394: step: 670/459, loss: 0.04463991895318031 2023-01-22 18:37:35.720586: step: 672/459, loss: 0.011859685182571411 2023-01-22 18:37:36.263658: step: 674/459, loss: 0.0028598180506378412 2023-01-22 18:37:36.860736: step: 676/459, loss: 0.022340530529618263 2023-01-22 18:37:37.476728: step: 678/459, loss: 0.016398873180150986 2023-01-22 18:37:38.066932: step: 680/459, loss: 0.005352445878088474 2023-01-22 18:37:38.612579: step: 682/459, loss: 0.10224176198244095 2023-01-22 18:37:39.199791: step: 684/459, loss: 1.8915355205535889 2023-01-22 18:37:39.743452: step: 686/459, loss: 0.005711458623409271 2023-01-22 18:37:40.316178: step: 688/459, loss: 0.15087153017520905 2023-01-22 18:37:40.925705: step: 690/459, loss: 0.0031019444577395916 2023-01-22 18:37:41.485191: step: 692/459, loss: 0.0007861480698920786 2023-01-22 18:37:42.149450: step: 694/459, loss: 0.09828571230173111 2023-01-22 18:37:42.770956: step: 696/459, loss: 0.0034847725182771683 2023-01-22 18:37:43.363069: step: 698/459, loss: 0.0005898561212234199 2023-01-22 18:37:43.940984: step: 700/459, loss: 0.014461545273661613 2023-01-22 18:37:44.563779: step: 702/459, loss: 0.017788244411349297 2023-01-22 18:37:45.117416: step: 704/459, loss: 0.057517290115356445 2023-01-22 18:37:45.749221: step: 706/459, loss: 0.004432004876434803 2023-01-22 18:37:46.435315: step: 708/459, loss: 0.05694511905312538 2023-01-22 18:37:47.015498: step: 710/459, loss: 0.00022403267212212086 2023-01-22 18:37:47.543459: step: 712/459, loss: 0.0011677086586132646 2023-01-22 18:37:48.096501: step: 714/459, loss: 0.2928163409233093 2023-01-22 18:37:48.706315: step: 716/459, loss: 0.02966177649796009 2023-01-22 18:37:49.358838: step: 718/459, loss: 0.017658567056059837 2023-01-22 18:37:49.950877: step: 720/459, loss: 0.010968558490276337 2023-01-22 18:37:50.558658: step: 722/459, loss: 0.006540338508784771 2023-01-22 18:37:51.140864: step: 724/459, loss: 0.012928251177072525 2023-01-22 18:37:51.746771: step: 726/459, loss: 0.0018430829513818026 2023-01-22 18:37:52.337956: step: 728/459, loss: 0.013066118583083153 2023-01-22 18:37:52.943459: step: 730/459, loss: 0.018062999472022057 2023-01-22 18:37:53.520497: step: 732/459, loss: 0.03178192302584648 2023-01-22 18:37:54.073298: step: 734/459, loss: 0.01616988144814968 2023-01-22 18:37:54.643844: step: 736/459, loss: 0.014060299843549728 2023-01-22 18:37:55.216213: step: 738/459, loss: 0.011985759250819683 2023-01-22 18:37:55.827520: step: 740/459, loss: 0.021273646503686905 2023-01-22 18:37:56.444497: step: 742/459, loss: 0.012861821800470352 2023-01-22 18:37:57.058183: step: 744/459, loss: 0.05313175544142723 2023-01-22 18:37:57.629869: step: 746/459, loss: 0.005414313171058893 2023-01-22 18:37:58.249097: step: 748/459, loss: 0.011800902895629406 2023-01-22 18:37:58.837190: step: 750/459, loss: 0.01608269475400448 2023-01-22 18:37:59.439778: step: 752/459, loss: 0.018279751762747765 2023-01-22 18:38:00.044334: step: 754/459, loss: 0.0009806619491428137 2023-01-22 18:38:00.643034: step: 756/459, loss: 0.0025741097051650286 2023-01-22 18:38:01.232499: step: 758/459, loss: 0.0018920780858024955 2023-01-22 18:38:01.831827: step: 760/459, loss: 0.015338432975113392 2023-01-22 18:38:02.449709: step: 762/459, loss: 0.003202370135113597 2023-01-22 18:38:03.130403: step: 764/459, loss: 0.018084798008203506 2023-01-22 18:38:03.719975: step: 766/459, loss: 0.0023570586927235126 2023-01-22 18:38:04.375634: step: 768/459, loss: 0.17639009654521942 2023-01-22 18:38:04.956686: step: 770/459, loss: 0.002229883335530758 2023-01-22 18:38:05.561236: step: 772/459, loss: 0.0005026318831369281 2023-01-22 18:38:06.116074: step: 774/459, loss: 0.0026842691004276276 2023-01-22 18:38:06.696221: step: 776/459, loss: 0.001169703435152769 2023-01-22 18:38:07.416921: step: 778/459, loss: 0.004916236735880375 2023-01-22 18:38:08.000797: step: 780/459, loss: 0.013509819284081459 2023-01-22 18:38:08.582317: step: 782/459, loss: 0.006406778935343027 2023-01-22 18:38:09.174698: step: 784/459, loss: 0.05795333534479141 2023-01-22 18:38:09.749316: step: 786/459, loss: 0.07152958959341049 2023-01-22 18:38:10.499985: step: 788/459, loss: 0.02616548351943493 2023-01-22 18:38:11.098748: step: 790/459, loss: 0.0034565788228064775 2023-01-22 18:38:11.703971: step: 792/459, loss: 0.028669510036706924 2023-01-22 18:38:12.377531: step: 794/459, loss: 0.0020464910194277763 2023-01-22 18:38:13.010346: step: 796/459, loss: 0.0069786012172698975 2023-01-22 18:38:13.572135: step: 798/459, loss: 0.04433286562561989 2023-01-22 18:38:14.185335: step: 800/459, loss: 0.0124508673325181 2023-01-22 18:38:14.782915: step: 802/459, loss: 0.002782493829727173 2023-01-22 18:38:15.349097: step: 804/459, loss: 0.020404210314154625 2023-01-22 18:38:15.910106: step: 806/459, loss: 0.002759488532319665 2023-01-22 18:38:16.563748: step: 808/459, loss: 0.03284760192036629 2023-01-22 18:38:17.182005: step: 810/459, loss: 0.016350314021110535 2023-01-22 18:38:17.776566: step: 812/459, loss: 0.015374204143881798 2023-01-22 18:38:18.338040: step: 814/459, loss: 0.0266237985342741 2023-01-22 18:38:19.045684: step: 816/459, loss: 0.0002592368400655687 2023-01-22 18:38:19.737869: step: 818/459, loss: 0.013205111026763916 2023-01-22 18:38:20.391610: step: 820/459, loss: 0.045226629823446274 2023-01-22 18:38:21.010406: step: 822/459, loss: 0.05136032775044441 2023-01-22 18:38:21.608626: step: 824/459, loss: 0.02599860168993473 2023-01-22 18:38:22.200723: step: 826/459, loss: 0.0667438805103302 2023-01-22 18:38:22.781449: step: 828/459, loss: 0.014581668190658092 2023-01-22 18:38:23.348338: step: 830/459, loss: 0.05871222913265228 2023-01-22 18:38:23.971275: step: 832/459, loss: 0.9962611794471741 2023-01-22 18:38:24.613011: step: 834/459, loss: 0.045172106474637985 2023-01-22 18:38:25.242401: step: 836/459, loss: 0.017142202705144882 2023-01-22 18:38:25.784920: step: 838/459, loss: 0.024627039209008217 2023-01-22 18:38:26.375063: step: 840/459, loss: 0.099724180996418 2023-01-22 18:38:26.911917: step: 842/459, loss: 0.0031521450728178024 2023-01-22 18:38:27.483939: step: 844/459, loss: 0.011845692992210388 2023-01-22 18:38:28.119634: step: 846/459, loss: 0.03610837459564209 2023-01-22 18:38:28.795966: step: 848/459, loss: 0.045356299728155136 2023-01-22 18:38:29.396049: step: 850/459, loss: 0.1143639013171196 2023-01-22 18:38:30.003848: step: 852/459, loss: 0.005528181791305542 2023-01-22 18:38:30.607323: step: 854/459, loss: 0.10996099561452866 2023-01-22 18:38:31.220779: step: 856/459, loss: 0.0350230410695076 2023-01-22 18:38:31.813173: step: 858/459, loss: 0.027356835082173347 2023-01-22 18:38:32.457453: step: 860/459, loss: 0.0017465929267928004 2023-01-22 18:38:33.037792: step: 862/459, loss: 0.023605795577168465 2023-01-22 18:38:33.643566: step: 864/459, loss: 0.005546466447412968 2023-01-22 18:38:34.205584: step: 866/459, loss: 3.2736032153479755e-05 2023-01-22 18:38:34.831134: step: 868/459, loss: 0.03550826385617256 2023-01-22 18:38:35.518621: step: 870/459, loss: 0.013545245863497257 2023-01-22 18:38:36.231760: step: 872/459, loss: 0.05159607529640198 2023-01-22 18:38:36.943305: step: 874/459, loss: 0.009276279248297215 2023-01-22 18:38:37.557078: step: 876/459, loss: 0.07637448608875275 2023-01-22 18:38:38.118269: step: 878/459, loss: 7.951210136525333e-05 2023-01-22 18:38:38.716270: step: 880/459, loss: 0.0033970624208450317 2023-01-22 18:38:39.316188: step: 882/459, loss: 0.004122481681406498 2023-01-22 18:38:39.947631: step: 884/459, loss: 2.5999224817496724e-05 2023-01-22 18:38:40.510916: step: 886/459, loss: 0.007280586753040552 2023-01-22 18:38:41.106315: step: 888/459, loss: 0.3008875250816345 2023-01-22 18:38:41.699135: step: 890/459, loss: 0.0001942059607245028 2023-01-22 18:38:42.333060: step: 892/459, loss: 0.003154048230499029 2023-01-22 18:38:42.880828: step: 894/459, loss: 0.009456641040742397 2023-01-22 18:38:43.469348: step: 896/459, loss: 0.029686931520700455 2023-01-22 18:38:44.091586: step: 898/459, loss: 0.0027157727163285017 2023-01-22 18:38:44.690985: step: 900/459, loss: 0.00856030359864235 2023-01-22 18:38:45.287872: step: 902/459, loss: 0.1253705620765686 2023-01-22 18:38:45.890777: step: 904/459, loss: 0.02793683484196663 2023-01-22 18:38:46.559673: step: 906/459, loss: 0.012846631929278374 2023-01-22 18:38:47.132209: step: 908/459, loss: 0.002371186390519142 2023-01-22 18:38:47.652433: step: 910/459, loss: 0.0009142153430730104 2023-01-22 18:38:48.234267: step: 912/459, loss: 0.010516193695366383 2023-01-22 18:38:48.870320: step: 914/459, loss: 0.2328685075044632 2023-01-22 18:38:49.503400: step: 916/459, loss: 0.005676266737282276 2023-01-22 18:38:50.107030: step: 918/459, loss: 0.00723200011998415 2023-01-22 18:38:50.543316: step: 920/459, loss: 3.65903542842716e-05 ================================================== Loss: 0.039 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29955281986531984, 'r': 0.33763638519924094, 'f1': 0.3174565120428189}, 'combined': 0.2339153246631297, 'epoch': 33} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33154434748315087, 'r': 0.29926477952983227, 'f1': 0.3145786635134248}, 'combined': 0.20133034464859184, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30610866595841557, 'r': 0.3479299637743661, 'f1': 0.32568222186339424}, 'combined': 0.23997637400460628, 'epoch': 33} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3407906540009529, 'r': 0.30916131213735215, 'f1': 0.3242063759913645}, 'combined': 0.20749208063447325, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31708084437691597, 'r': 0.3435543873609469, 'f1': 0.3297871805814554}, 'combined': 0.24300108042844082, 'epoch': 33} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3320215687907337, 'r': 0.3062951938160643, 'f1': 0.318639950323587}, 'combined': 0.2284588323074775, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2418300653594771, 'r': 0.35238095238095235, 'f1': 0.2868217054263566}, 'combined': 0.19121447028423771, 'epoch': 33} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22972972972972974, 'r': 0.3695652173913043, 'f1': 0.2833333333333333}, 'combined': 0.14166666666666666, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29411764705882354, 'r': 0.1724137931034483, 'f1': 0.2173913043478261}, 'combined': 0.14492753623188406, 'epoch': 33} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:41:22.941223: step: 2/459, loss: 0.0024583092890679836 2023-01-22 18:41:23.542969: step: 4/459, loss: 0.006580153945833445 2023-01-22 18:41:24.169667: step: 6/459, loss: 0.0047643450088799 2023-01-22 18:41:24.802435: step: 8/459, loss: 0.023812638595700264 2023-01-22 18:41:25.426562: step: 10/459, loss: 0.016239870339632034 2023-01-22 18:41:26.057158: step: 12/459, loss: 0.32735398411750793 2023-01-22 18:41:26.590922: step: 14/459, loss: 0.0005492155905812979 2023-01-22 18:41:27.183408: step: 16/459, loss: 0.0011119524715468287 2023-01-22 18:41:27.754293: step: 18/459, loss: 0.02839900180697441 2023-01-22 18:41:28.424211: step: 20/459, loss: 0.0057955957017838955 2023-01-22 18:41:29.009489: step: 22/459, loss: 0.00933907087892294 2023-01-22 18:41:29.611969: step: 24/459, loss: 0.0004923383239656687 2023-01-22 18:41:30.225457: step: 26/459, loss: 0.006763170938938856 2023-01-22 18:41:30.825127: step: 28/459, loss: 0.010495415888726711 2023-01-22 18:41:31.449459: step: 30/459, loss: 0.0019000312313437462 2023-01-22 18:41:32.119945: step: 32/459, loss: 0.007418585009872913 2023-01-22 18:41:32.746598: step: 34/459, loss: 0.022660031914711 2023-01-22 18:41:33.384696: step: 36/459, loss: 0.011698465794324875 2023-01-22 18:41:33.989098: step: 38/459, loss: 0.0009185700328089297 2023-01-22 18:41:34.541474: step: 40/459, loss: 0.018729615956544876 2023-01-22 18:41:35.166360: step: 42/459, loss: 0.011685275472700596 2023-01-22 18:41:35.763401: step: 44/459, loss: 0.0007904094527475536 2023-01-22 18:41:36.373677: step: 46/459, loss: 0.0021722738165408373 2023-01-22 18:41:37.042380: step: 48/459, loss: 2.740674972301349e-05 2023-01-22 18:41:37.620135: step: 50/459, loss: 0.001124133705161512 2023-01-22 18:41:38.222526: step: 52/459, loss: 0.02189047820866108 2023-01-22 18:41:38.837434: step: 54/459, loss: 0.24611490964889526 2023-01-22 18:41:39.466730: step: 56/459, loss: 0.0006493860273621976 2023-01-22 18:41:40.082634: step: 58/459, loss: 0.021451324224472046 2023-01-22 18:41:40.686526: step: 60/459, loss: 0.013374735601246357 2023-01-22 18:41:41.267591: step: 62/459, loss: 0.0007396662258543074 2023-01-22 18:41:41.859555: step: 64/459, loss: 0.007382345385849476 2023-01-22 18:41:42.415981: step: 66/459, loss: 0.00524678872898221 2023-01-22 18:41:42.996720: step: 68/459, loss: 0.03521258383989334 2023-01-22 18:41:43.590634: step: 70/459, loss: 4.6186567487893626e-05 2023-01-22 18:41:44.260717: step: 72/459, loss: 0.0009647402330301702 2023-01-22 18:41:44.868045: step: 74/459, loss: 0.0020720153115689754 2023-01-22 18:41:45.537272: step: 76/459, loss: 0.03282879665493965 2023-01-22 18:41:46.236406: step: 78/459, loss: 0.0021107897628098726 2023-01-22 18:41:46.790504: step: 80/459, loss: 0.0032492626924067736 2023-01-22 18:41:47.408712: step: 82/459, loss: 0.01618010923266411 2023-01-22 18:41:48.035030: step: 84/459, loss: 0.00507860304787755 2023-01-22 18:41:48.632672: step: 86/459, loss: 0.1293308436870575 2023-01-22 18:41:49.329479: step: 88/459, loss: 0.013149183243513107 2023-01-22 18:41:49.957211: step: 90/459, loss: 0.013106889091432095 2023-01-22 18:41:50.573822: step: 92/459, loss: 0.008200581185519695 2023-01-22 18:41:51.165401: step: 94/459, loss: 0.059887614101171494 2023-01-22 18:41:51.720079: step: 96/459, loss: 0.008717854507267475 2023-01-22 18:41:52.375780: step: 98/459, loss: 1.1641639471054077 2023-01-22 18:41:52.985731: step: 100/459, loss: 0.004299674648791552 2023-01-22 18:41:53.564127: step: 102/459, loss: 0.0017411193111911416 2023-01-22 18:41:54.125049: step: 104/459, loss: 0.0028293596114963293 2023-01-22 18:41:54.709246: step: 106/459, loss: 0.010412012226879597 2023-01-22 18:41:55.311451: step: 108/459, loss: 0.02988106571137905 2023-01-22 18:41:55.925698: step: 110/459, loss: 0.0013223637361079454 2023-01-22 18:41:56.508404: step: 112/459, loss: 0.0008742309873923659 2023-01-22 18:41:57.096428: step: 114/459, loss: 0.037959638983011246 2023-01-22 18:41:57.706394: step: 116/459, loss: 0.007708384655416012 2023-01-22 18:41:58.281134: step: 118/459, loss: 0.022929294034838676 2023-01-22 18:41:58.827168: step: 120/459, loss: 0.01225080993026495 2023-01-22 18:41:59.437433: step: 122/459, loss: 0.015864688903093338 2023-01-22 18:42:00.049429: step: 124/459, loss: 0.00026496301870793104 2023-01-22 18:42:00.667903: step: 126/459, loss: 0.0027475140523165464 2023-01-22 18:42:01.300765: step: 128/459, loss: 2.6971383704221807e-05 2023-01-22 18:42:01.954914: step: 130/459, loss: 0.006406653672456741 2023-01-22 18:42:02.580652: step: 132/459, loss: 0.0003961954789701849 2023-01-22 18:42:03.193096: step: 134/459, loss: 0.003998968750238419 2023-01-22 18:42:03.795007: step: 136/459, loss: 0.07589370757341385 2023-01-22 18:42:04.369804: step: 138/459, loss: 0.05056406930088997 2023-01-22 18:42:04.945593: step: 140/459, loss: 0.029767882078886032 2023-01-22 18:42:05.601084: step: 142/459, loss: 0.001702719135209918 2023-01-22 18:42:06.223057: step: 144/459, loss: 0.0011341796489432454 2023-01-22 18:42:06.841982: step: 146/459, loss: 0.010819158516824245 2023-01-22 18:42:07.413390: step: 148/459, loss: 0.006601519882678986 2023-01-22 18:42:08.009675: step: 150/459, loss: 0.0007630864856764674 2023-01-22 18:42:08.605406: step: 152/459, loss: 0.0004727329360321164 2023-01-22 18:42:09.215256: step: 154/459, loss: 0.0005479459650814533 2023-01-22 18:42:09.811376: step: 156/459, loss: 0.0052567897364497185 2023-01-22 18:42:10.524953: step: 158/459, loss: 0.0005216391873545945 2023-01-22 18:42:11.064949: step: 160/459, loss: 0.0005659894086420536 2023-01-22 18:42:11.622683: step: 162/459, loss: 0.008890370838344097 2023-01-22 18:42:12.261499: step: 164/459, loss: 0.0206079613417387 2023-01-22 18:42:12.919655: step: 166/459, loss: 0.0015512112295255065 2023-01-22 18:42:13.527551: step: 168/459, loss: 0.015211064368486404 2023-01-22 18:42:14.114531: step: 170/459, loss: 0.00637417146936059 2023-01-22 18:42:14.715810: step: 172/459, loss: 0.03838237002491951 2023-01-22 18:42:15.321194: step: 174/459, loss: 0.26566624641418457 2023-01-22 18:42:15.884428: step: 176/459, loss: 0.1176142543554306 2023-01-22 18:42:16.466583: step: 178/459, loss: 0.0165370162576437 2023-01-22 18:42:17.088843: step: 180/459, loss: 3.75843774236273e-05 2023-01-22 18:42:17.738366: step: 182/459, loss: 0.004779667127877474 2023-01-22 18:42:18.327803: step: 184/459, loss: 0.0005916694644838572 2023-01-22 18:42:18.903420: step: 186/459, loss: 0.0010133016621693969 2023-01-22 18:42:19.480052: step: 188/459, loss: 0.7296687364578247 2023-01-22 18:42:20.044430: step: 190/459, loss: 0.0026456783525645733 2023-01-22 18:42:20.699423: step: 192/459, loss: 0.005847548134624958 2023-01-22 18:42:21.283991: step: 194/459, loss: 0.002688198583200574 2023-01-22 18:42:21.912217: step: 196/459, loss: 0.002605362329632044 2023-01-22 18:42:22.508636: step: 198/459, loss: 0.0023131826892495155 2023-01-22 18:42:23.145774: step: 200/459, loss: 0.06916569173336029 2023-01-22 18:42:23.726967: step: 202/459, loss: 0.0025436070282012224 2023-01-22 18:42:24.275069: step: 204/459, loss: 0.017596455290913582 2023-01-22 18:42:24.855142: step: 206/459, loss: 0.08200648427009583 2023-01-22 18:42:25.436789: step: 208/459, loss: 0.002038748934864998 2023-01-22 18:42:25.972756: step: 210/459, loss: 0.6909592747688293 2023-01-22 18:42:26.540394: step: 212/459, loss: 0.00285587296821177 2023-01-22 18:42:27.211497: step: 214/459, loss: 0.224419504404068 2023-01-22 18:42:27.880783: step: 216/459, loss: 0.012140639126300812 2023-01-22 18:42:28.469280: step: 218/459, loss: 0.02650657668709755 2023-01-22 18:42:29.065656: step: 220/459, loss: 0.005767495837062597 2023-01-22 18:42:29.669518: step: 222/459, loss: 0.03266908600926399 2023-01-22 18:42:30.243469: step: 224/459, loss: 0.0006972285336814821 2023-01-22 18:42:30.850894: step: 226/459, loss: 0.0005274121649563313 2023-01-22 18:42:31.487157: step: 228/459, loss: 0.000933705479837954 2023-01-22 18:42:32.077566: step: 230/459, loss: 0.0054528298787772655 2023-01-22 18:42:32.643975: step: 232/459, loss: 0.021361930295825005 2023-01-22 18:42:33.314161: step: 234/459, loss: 0.007423868868499994 2023-01-22 18:42:33.852012: step: 236/459, loss: 0.00038666086038574576 2023-01-22 18:42:34.477987: step: 238/459, loss: 0.048123616725206375 2023-01-22 18:42:35.034454: step: 240/459, loss: 0.0008960350533016026 2023-01-22 18:42:35.602017: step: 242/459, loss: 0.004936893004924059 2023-01-22 18:42:36.174801: step: 244/459, loss: 0.0006819443660788238 2023-01-22 18:42:36.770103: step: 246/459, loss: 0.016907740384340286 2023-01-22 18:42:37.305860: step: 248/459, loss: 0.03346952795982361 2023-01-22 18:42:37.903598: step: 250/459, loss: 0.01698971539735794 2023-01-22 18:42:38.479997: step: 252/459, loss: 0.004138220101594925 2023-01-22 18:42:39.116076: step: 254/459, loss: 0.16324251890182495 2023-01-22 18:42:39.758280: step: 256/459, loss: 0.21180681884288788 2023-01-22 18:42:40.431594: step: 258/459, loss: 0.0037301150150597095 2023-01-22 18:42:40.983958: step: 260/459, loss: 0.009218056686222553 2023-01-22 18:42:41.601928: step: 262/459, loss: 2.3890819647931494e-05 2023-01-22 18:42:42.196060: step: 264/459, loss: 0.05529527738690376 2023-01-22 18:42:42.963093: step: 266/459, loss: 0.4243069589138031 2023-01-22 18:42:43.514762: step: 268/459, loss: 0.010824860073626041 2023-01-22 18:42:44.104800: step: 270/459, loss: 0.003650952596217394 2023-01-22 18:42:44.711468: step: 272/459, loss: 0.0010058231418952346 2023-01-22 18:42:45.357538: step: 274/459, loss: 0.0006724551203660667 2023-01-22 18:42:45.977509: step: 276/459, loss: 0.025177057832479477 2023-01-22 18:42:46.608164: step: 278/459, loss: 0.019508834928274155 2023-01-22 18:42:47.186741: step: 280/459, loss: 0.012520101852715015 2023-01-22 18:42:47.730683: step: 282/459, loss: 0.0001337890134891495 2023-01-22 18:42:48.272755: step: 284/459, loss: 0.002774402964860201 2023-01-22 18:42:48.834113: step: 286/459, loss: 0.006096140947192907 2023-01-22 18:42:49.448269: step: 288/459, loss: 0.0018771502654999495 2023-01-22 18:42:50.013541: step: 290/459, loss: 0.0035598967224359512 2023-01-22 18:42:50.564605: step: 292/459, loss: 0.0005755522288382053 2023-01-22 18:42:51.202503: step: 294/459, loss: 0.043000172823667526 2023-01-22 18:42:51.867847: step: 296/459, loss: 0.07460402697324753 2023-01-22 18:42:52.451384: step: 298/459, loss: 0.0013938996708020568 2023-01-22 18:42:52.986158: step: 300/459, loss: 0.5912879109382629 2023-01-22 18:42:53.617607: step: 302/459, loss: 0.08552318066358566 2023-01-22 18:42:54.206365: step: 304/459, loss: 0.0010543751996010542 2023-01-22 18:42:54.824441: step: 306/459, loss: 0.11570211499929428 2023-01-22 18:42:55.397724: step: 308/459, loss: 0.0031691326294094324 2023-01-22 18:42:55.908379: step: 310/459, loss: 0.0012053331593051553 2023-01-22 18:42:56.471022: step: 312/459, loss: 0.0005418765940703452 2023-01-22 18:42:57.102419: step: 314/459, loss: 9.735229832585901e-05 2023-01-22 18:42:57.735264: step: 316/459, loss: 9.56607109401375e-05 2023-01-22 18:42:58.332253: step: 318/459, loss: 0.0003967846278101206 2023-01-22 18:42:58.974827: step: 320/459, loss: 0.02746180258691311 2023-01-22 18:42:59.549966: step: 322/459, loss: 0.0008831480517983437 2023-01-22 18:43:00.180143: step: 324/459, loss: 0.011692889966070652 2023-01-22 18:43:00.768948: step: 326/459, loss: 0.0006417607073672116 2023-01-22 18:43:01.394572: step: 328/459, loss: 0.017662331461906433 2023-01-22 18:43:02.125310: step: 330/459, loss: 0.02843131497502327 2023-01-22 18:43:02.725677: step: 332/459, loss: 0.005949864629656076 2023-01-22 18:43:03.351043: step: 334/459, loss: 0.012028384953737259 2023-01-22 18:43:03.937759: step: 336/459, loss: 0.0024688676930963993 2023-01-22 18:43:04.541067: step: 338/459, loss: 0.004018816165626049 2023-01-22 18:43:05.154093: step: 340/459, loss: 0.0035536112263798714 2023-01-22 18:43:05.761839: step: 342/459, loss: 0.0019554763566702604 2023-01-22 18:43:06.346349: step: 344/459, loss: 0.009303497150540352 2023-01-22 18:43:06.942373: step: 346/459, loss: 0.01725386455655098 2023-01-22 18:43:07.576979: step: 348/459, loss: 0.03363772854208946 2023-01-22 18:43:08.073906: step: 350/459, loss: 0.0006114169955253601 2023-01-22 18:43:08.733833: step: 352/459, loss: 0.009485512971878052 2023-01-22 18:43:09.286523: step: 354/459, loss: 0.0020637535490095615 2023-01-22 18:43:09.920953: step: 356/459, loss: 0.05390734598040581 2023-01-22 18:43:10.568998: step: 358/459, loss: 0.00039312263834290206 2023-01-22 18:43:11.123940: step: 360/459, loss: 0.0036862145643681288 2023-01-22 18:43:11.725782: step: 362/459, loss: 0.00829172134399414 2023-01-22 18:43:12.325592: step: 364/459, loss: 0.08911331743001938 2023-01-22 18:43:12.925303: step: 366/459, loss: 0.04590543732047081 2023-01-22 18:43:13.526752: step: 368/459, loss: 0.3890998363494873 2023-01-22 18:43:14.149245: step: 370/459, loss: 0.020847609266638756 2023-01-22 18:43:14.754758: step: 372/459, loss: 0.0025556832551956177 2023-01-22 18:43:15.339561: step: 374/459, loss: 0.07325946539640427 2023-01-22 18:43:15.942351: step: 376/459, loss: 0.03190924972295761 2023-01-22 18:43:16.571379: step: 378/459, loss: 0.014481743797659874 2023-01-22 18:43:17.177500: step: 380/459, loss: 0.0017339772311970592 2023-01-22 18:43:17.824772: step: 382/459, loss: 0.00025636254576966166 2023-01-22 18:43:18.477007: step: 384/459, loss: 0.008851136080920696 2023-01-22 18:43:19.174496: step: 386/459, loss: 0.0002555647515691817 2023-01-22 18:43:19.925933: step: 388/459, loss: 0.012265067547559738 2023-01-22 18:43:20.509707: step: 390/459, loss: 0.3344159722328186 2023-01-22 18:43:21.043833: step: 392/459, loss: 0.0027383873239159584 2023-01-22 18:43:21.602162: step: 394/459, loss: 0.0015557720325887203 2023-01-22 18:43:22.189509: step: 396/459, loss: 0.0004844361974392086 2023-01-22 18:43:22.807759: step: 398/459, loss: 0.006327091250568628 2023-01-22 18:43:23.372150: step: 400/459, loss: 0.025968829169869423 2023-01-22 18:43:24.063985: step: 402/459, loss: 0.025250278413295746 2023-01-22 18:43:24.658665: step: 404/459, loss: 0.004502012860029936 2023-01-22 18:43:25.350026: step: 406/459, loss: 0.000573727476876229 2023-01-22 18:43:25.892395: step: 408/459, loss: 0.001575227128341794 2023-01-22 18:43:26.526391: step: 410/459, loss: 0.013496785424649715 2023-01-22 18:43:27.153690: step: 412/459, loss: 0.05979403480887413 2023-01-22 18:43:27.906161: step: 414/459, loss: 0.04038996994495392 2023-01-22 18:43:28.516275: step: 416/459, loss: 0.0004824124334845692 2023-01-22 18:43:29.097863: step: 418/459, loss: 0.017764706164598465 2023-01-22 18:43:29.694428: step: 420/459, loss: 0.002838284242898226 2023-01-22 18:43:30.309215: step: 422/459, loss: 0.0015194871230050921 2023-01-22 18:43:30.954039: step: 424/459, loss: 0.004575615283101797 2023-01-22 18:43:31.623033: step: 426/459, loss: 0.0015451203798875213 2023-01-22 18:43:32.271530: step: 428/459, loss: 0.01328886579722166 2023-01-22 18:43:32.872730: step: 430/459, loss: 0.003569243475794792 2023-01-22 18:43:33.478435: step: 432/459, loss: 0.005865688435733318 2023-01-22 18:43:34.066503: step: 434/459, loss: 0.029187781736254692 2023-01-22 18:43:34.678356: step: 436/459, loss: 0.001062049064785242 2023-01-22 18:43:35.239790: step: 438/459, loss: 0.003476877463981509 2023-01-22 18:43:35.818412: step: 440/459, loss: 0.0041506169363856316 2023-01-22 18:43:36.446746: step: 442/459, loss: 0.0024937032721936703 2023-01-22 18:43:37.162714: step: 444/459, loss: 0.00030292372684925795 2023-01-22 18:43:37.768890: step: 446/459, loss: 0.02529038116335869 2023-01-22 18:43:38.393367: step: 448/459, loss: 0.0012935636332258582 2023-01-22 18:43:38.983672: step: 450/459, loss: 0.043138667941093445 2023-01-22 18:43:39.555522: step: 452/459, loss: 0.017937490716576576 2023-01-22 18:43:40.178037: step: 454/459, loss: 0.005791183561086655 2023-01-22 18:43:40.850494: step: 456/459, loss: 0.014116933569312096 2023-01-22 18:43:41.490872: step: 458/459, loss: 0.00488214660435915 2023-01-22 18:43:42.062239: step: 460/459, loss: 0.012116262689232826 2023-01-22 18:43:42.706481: step: 462/459, loss: 0.007351442240178585 2023-01-22 18:43:43.274549: step: 464/459, loss: 0.013495237566530704 2023-01-22 18:43:43.869523: step: 466/459, loss: 0.004012835677713156 2023-01-22 18:43:44.501965: step: 468/459, loss: 0.005344472825527191 2023-01-22 18:43:45.062249: step: 470/459, loss: 0.005125305149704218 2023-01-22 18:43:45.661739: step: 472/459, loss: 0.0028955803718417883 2023-01-22 18:43:46.212445: step: 474/459, loss: 0.00135697890073061 2023-01-22 18:43:46.868149: step: 476/459, loss: 0.029668789356946945 2023-01-22 18:43:47.499831: step: 478/459, loss: 9.509398660156876e-05 2023-01-22 18:43:48.091454: step: 480/459, loss: 0.04227248579263687 2023-01-22 18:43:48.744770: step: 482/459, loss: 0.0725809708237648 2023-01-22 18:43:49.367341: step: 484/459, loss: 0.006996863055974245 2023-01-22 18:43:50.099697: step: 486/459, loss: 0.003746467176824808 2023-01-22 18:43:50.758523: step: 488/459, loss: 0.00754821440204978 2023-01-22 18:43:51.316722: step: 490/459, loss: 0.0019268274772912264 2023-01-22 18:43:52.009009: step: 492/459, loss: 0.01844080723822117 2023-01-22 18:43:52.653101: step: 494/459, loss: 0.010795260779559612 2023-01-22 18:43:53.228948: step: 496/459, loss: 0.0008722988422960043 2023-01-22 18:43:53.790546: step: 498/459, loss: 0.01763499341905117 2023-01-22 18:43:54.371837: step: 500/459, loss: 0.006166927516460419 2023-01-22 18:43:54.960807: step: 502/459, loss: 0.0007656325469724834 2023-01-22 18:43:55.565009: step: 504/459, loss: 0.0390876829624176 2023-01-22 18:43:56.100680: step: 506/459, loss: 0.06129439175128937 2023-01-22 18:43:56.620294: step: 508/459, loss: 0.0006808853358961642 2023-01-22 18:43:57.270343: step: 510/459, loss: 0.0011321126949042082 2023-01-22 18:43:57.892434: step: 512/459, loss: 0.0037536942400038242 2023-01-22 18:43:58.471161: step: 514/459, loss: 0.0006225109100341797 2023-01-22 18:43:59.116107: step: 516/459, loss: 0.020824255421757698 2023-01-22 18:43:59.692891: step: 518/459, loss: 0.0021292492747306824 2023-01-22 18:44:00.309109: step: 520/459, loss: 0.010326423682272434 2023-01-22 18:44:00.935078: step: 522/459, loss: 0.003610878251492977 2023-01-22 18:44:01.520726: step: 524/459, loss: 0.012599465437233448 2023-01-22 18:44:02.143269: step: 526/459, loss: 0.005229382775723934 2023-01-22 18:44:02.763318: step: 528/459, loss: 0.030323224142193794 2023-01-22 18:44:03.325267: step: 530/459, loss: 3.6528261261992157e-05 2023-01-22 18:44:03.949538: step: 532/459, loss: 0.007435801904648542 2023-01-22 18:44:04.578107: step: 534/459, loss: 0.0008363918168470263 2023-01-22 18:44:05.144295: step: 536/459, loss: 0.023861562833189964 2023-01-22 18:44:05.698217: step: 538/459, loss: 0.09199065715074539 2023-01-22 18:44:06.324714: step: 540/459, loss: 8.054395584622398e-05 2023-01-22 18:44:06.915575: step: 542/459, loss: 0.0004464667581487447 2023-01-22 18:44:07.568059: step: 544/459, loss: 0.005332102533429861 2023-01-22 18:44:08.200700: step: 546/459, loss: 6.708304135827348e-05 2023-01-22 18:44:08.818015: step: 548/459, loss: 0.0011580749414861202 2023-01-22 18:44:09.386276: step: 550/459, loss: 0.03471539169549942 2023-01-22 18:44:09.978352: step: 552/459, loss: 0.020218288525938988 2023-01-22 18:44:10.616356: step: 554/459, loss: 0.01539615634828806 2023-01-22 18:44:11.220850: step: 556/459, loss: 0.012484817765653133 2023-01-22 18:44:11.833627: step: 558/459, loss: 0.12806713581085205 2023-01-22 18:44:12.475453: step: 560/459, loss: 0.0069231134839355946 2023-01-22 18:44:13.073640: step: 562/459, loss: 0.010950378142297268 2023-01-22 18:44:13.636405: step: 564/459, loss: 0.009761380031704903 2023-01-22 18:44:14.248736: step: 566/459, loss: 0.00325956498272717 2023-01-22 18:44:14.882374: step: 568/459, loss: 0.012565786950290203 2023-01-22 18:44:15.483324: step: 570/459, loss: 0.00017255697457585484 2023-01-22 18:44:16.039262: step: 572/459, loss: 0.03693842515349388 2023-01-22 18:44:16.625866: step: 574/459, loss: 0.0023180022835731506 2023-01-22 18:44:17.218338: step: 576/459, loss: 0.0642339289188385 2023-01-22 18:44:17.850253: step: 578/459, loss: 0.004187536891549826 2023-01-22 18:44:18.531719: step: 580/459, loss: 0.0021050923969596624 2023-01-22 18:44:19.130372: step: 582/459, loss: 0.006144403945654631 2023-01-22 18:44:19.790614: step: 584/459, loss: 0.004225844517350197 2023-01-22 18:44:20.374974: step: 586/459, loss: 0.0018546594073995948 2023-01-22 18:44:20.982325: step: 588/459, loss: 0.001658477820456028 2023-01-22 18:44:21.638498: step: 590/459, loss: 0.00014163977175485343 2023-01-22 18:44:22.228173: step: 592/459, loss: 0.03226941451430321 2023-01-22 18:44:22.768391: step: 594/459, loss: 0.0006785910809412599 2023-01-22 18:44:23.335072: step: 596/459, loss: 2.6131046979571693e-05 2023-01-22 18:44:23.844673: step: 598/459, loss: 0.022259891033172607 2023-01-22 18:44:24.464926: step: 600/459, loss: 0.00453035207465291 2023-01-22 18:44:25.155299: step: 602/459, loss: 0.02148372121155262 2023-01-22 18:44:25.779688: step: 604/459, loss: 0.0005622727912850678 2023-01-22 18:44:26.407276: step: 606/459, loss: 0.050805166363716125 2023-01-22 18:44:27.032165: step: 608/459, loss: 0.012645379640161991 2023-01-22 18:44:27.703789: step: 610/459, loss: 0.06259093433618546 2023-01-22 18:44:28.265238: step: 612/459, loss: 0.028506552800536156 2023-01-22 18:44:28.880683: step: 614/459, loss: 0.0218779556453228 2023-01-22 18:44:29.479948: step: 616/459, loss: 0.00025892796111293137 2023-01-22 18:44:30.067356: step: 618/459, loss: 0.00048039137618616223 2023-01-22 18:44:30.679174: step: 620/459, loss: 0.09211982041597366 2023-01-22 18:44:31.300286: step: 622/459, loss: 0.020848605781793594 2023-01-22 18:44:31.957870: step: 624/459, loss: 0.03592122718691826 2023-01-22 18:44:32.593702: step: 626/459, loss: 0.0009048713836818933 2023-01-22 18:44:33.158689: step: 628/459, loss: 0.0021896869875490665 2023-01-22 18:44:33.743228: step: 630/459, loss: 0.005398926325142384 2023-01-22 18:44:34.427208: step: 632/459, loss: 0.006535406690090895 2023-01-22 18:44:35.040645: step: 634/459, loss: 0.0009230409632436931 2023-01-22 18:44:35.640449: step: 636/459, loss: 0.020167360082268715 2023-01-22 18:44:36.239551: step: 638/459, loss: 0.0042074802331626415 2023-01-22 18:44:36.815716: step: 640/459, loss: 0.00035808811662718654 2023-01-22 18:44:37.471186: step: 642/459, loss: 0.014301826246082783 2023-01-22 18:44:38.120490: step: 644/459, loss: 0.03640817850828171 2023-01-22 18:44:38.688441: step: 646/459, loss: 0.015260087326169014 2023-01-22 18:44:39.243817: step: 648/459, loss: 0.0009587914682924747 2023-01-22 18:44:39.836236: step: 650/459, loss: 0.01672711782157421 2023-01-22 18:44:40.427202: step: 652/459, loss: 4.30109394073952e-05 2023-01-22 18:44:41.042697: step: 654/459, loss: 0.0045194244012236595 2023-01-22 18:44:41.631264: step: 656/459, loss: 0.04004370793700218 2023-01-22 18:44:42.234370: step: 658/459, loss: 0.004722509533166885 2023-01-22 18:44:42.787420: step: 660/459, loss: 0.029141562059521675 2023-01-22 18:44:43.400468: step: 662/459, loss: 0.005373390391469002 2023-01-22 18:44:43.997745: step: 664/459, loss: 0.041884008795022964 2023-01-22 18:44:44.606028: step: 666/459, loss: 0.02666822262108326 2023-01-22 18:44:45.169149: step: 668/459, loss: 16.345090866088867 2023-01-22 18:44:45.765329: step: 670/459, loss: 0.02789297141134739 2023-01-22 18:44:46.364875: step: 672/459, loss: 0.027722375467419624 2023-01-22 18:44:47.041823: step: 674/459, loss: 0.3250839114189148 2023-01-22 18:44:47.611272: step: 676/459, loss: 0.001986500108614564 2023-01-22 18:44:48.224167: step: 678/459, loss: 0.0042684548534452915 2023-01-22 18:44:48.797120: step: 680/459, loss: 0.020431874319911003 2023-01-22 18:44:49.413468: step: 682/459, loss: 0.00014531787019222975 2023-01-22 18:44:50.019264: step: 684/459, loss: 0.010616187006235123 2023-01-22 18:44:50.586656: step: 686/459, loss: 0.0006878653657622635 2023-01-22 18:44:51.157453: step: 688/459, loss: 0.006892195902764797 2023-01-22 18:44:51.812579: step: 690/459, loss: 0.0057054623030126095 2023-01-22 18:44:52.425607: step: 692/459, loss: 0.07188019156455994 2023-01-22 18:44:53.035580: step: 694/459, loss: 0.016578758135437965 2023-01-22 18:44:53.634345: step: 696/459, loss: 7.557895878562704e-05 2023-01-22 18:44:54.155330: step: 698/459, loss: 0.009124752134084702 2023-01-22 18:44:54.734771: step: 700/459, loss: 0.03476976603269577 2023-01-22 18:44:55.405105: step: 702/459, loss: 0.053747352212667465 2023-01-22 18:44:56.060553: step: 704/459, loss: 0.08957261592149734 2023-01-22 18:44:56.618233: step: 706/459, loss: 0.006739290896803141 2023-01-22 18:44:57.243756: step: 708/459, loss: 0.0020346154924482107 2023-01-22 18:44:57.825717: step: 710/459, loss: 0.008989005349576473 2023-01-22 18:44:58.458416: step: 712/459, loss: 0.1910293698310852 2023-01-22 18:44:59.059031: step: 714/459, loss: 0.0018364692805334926 2023-01-22 18:44:59.647285: step: 716/459, loss: 0.01338303554803133 2023-01-22 18:45:00.213811: step: 718/459, loss: 0.001966721611097455 2023-01-22 18:45:00.838921: step: 720/459, loss: 0.02695549838244915 2023-01-22 18:45:01.433335: step: 722/459, loss: 0.012957463972270489 2023-01-22 18:45:02.062573: step: 724/459, loss: 0.003729237010702491 2023-01-22 18:45:02.692226: step: 726/459, loss: 0.18476684391498566 2023-01-22 18:45:03.220228: step: 728/459, loss: 0.0036338421050459146 2023-01-22 18:45:03.833110: step: 730/459, loss: 0.0004784781485795975 2023-01-22 18:45:04.404230: step: 732/459, loss: 0.2120058685541153 2023-01-22 18:45:05.134080: step: 734/459, loss: 0.012470971792936325 2023-01-22 18:45:05.709256: step: 736/459, loss: 0.029948070645332336 2023-01-22 18:45:06.329147: step: 738/459, loss: 0.010385624133050442 2023-01-22 18:45:06.903123: step: 740/459, loss: 0.003605615347623825 2023-01-22 18:45:07.507243: step: 742/459, loss: 0.0023071873001754284 2023-01-22 18:45:08.103587: step: 744/459, loss: 0.011693268083035946 2023-01-22 18:45:08.702723: step: 746/459, loss: 0.0012003894662484527 2023-01-22 18:45:09.315593: step: 748/459, loss: 0.013994552195072174 2023-01-22 18:45:09.905298: step: 750/459, loss: 0.009956649504601955 2023-01-22 18:45:10.498088: step: 752/459, loss: 0.005756675265729427 2023-01-22 18:45:11.097544: step: 754/459, loss: 0.17727860808372498 2023-01-22 18:45:11.764306: step: 756/459, loss: 0.07029744237661362 2023-01-22 18:45:12.368293: step: 758/459, loss: 0.0007379419403150678 2023-01-22 18:45:12.921635: step: 760/459, loss: 0.02882036752998829 2023-01-22 18:45:13.601567: step: 762/459, loss: 0.08657561987638474 2023-01-22 18:45:14.142065: step: 764/459, loss: 0.06726551800966263 2023-01-22 18:45:14.811665: step: 766/459, loss: 0.13402123749256134 2023-01-22 18:45:15.415853: step: 768/459, loss: 0.00031902719638310373 2023-01-22 18:45:15.965549: step: 770/459, loss: 0.005244940984994173 2023-01-22 18:45:16.545785: step: 772/459, loss: 0.16740316152572632 2023-01-22 18:45:17.111741: step: 774/459, loss: 0.25343796610832214 2023-01-22 18:45:17.782236: step: 776/459, loss: 0.00262244394980371 2023-01-22 18:45:18.463680: step: 778/459, loss: 0.9804738163948059 2023-01-22 18:45:19.000830: step: 780/459, loss: 0.002340376842767 2023-01-22 18:45:19.671055: step: 782/459, loss: 0.011986064724624157 2023-01-22 18:45:20.303341: step: 784/459, loss: 0.009925748221576214 2023-01-22 18:45:20.938873: step: 786/459, loss: 0.009919237345457077 2023-01-22 18:45:21.564629: step: 788/459, loss: 0.031644511967897415 2023-01-22 18:45:22.187891: step: 790/459, loss: 0.021729620173573494 2023-01-22 18:45:22.819720: step: 792/459, loss: 0.003510986687615514 2023-01-22 18:45:23.429309: step: 794/459, loss: 0.07396011799573898 2023-01-22 18:45:24.006718: step: 796/459, loss: 0.8313906192779541 2023-01-22 18:45:24.610642: step: 798/459, loss: 0.030764242634177208 2023-01-22 18:45:25.259786: step: 800/459, loss: 0.1899876892566681 2023-01-22 18:45:25.881897: step: 802/459, loss: 0.009401453658938408 2023-01-22 18:45:26.453218: step: 804/459, loss: 0.0012860166607424617 2023-01-22 18:45:26.995224: step: 806/459, loss: 0.0015073898248374462 2023-01-22 18:45:27.590225: step: 808/459, loss: 0.0023838067427277565 2023-01-22 18:45:28.154390: step: 810/459, loss: 0.036527104675769806 2023-01-22 18:45:28.723277: step: 812/459, loss: 0.0043732537887990475 2023-01-22 18:45:29.297113: step: 814/459, loss: 0.003633215557783842 2023-01-22 18:45:29.894039: step: 816/459, loss: 0.0009681982919573784 2023-01-22 18:45:30.465540: step: 818/459, loss: 0.0071206409484148026 2023-01-22 18:45:31.048515: step: 820/459, loss: 0.05970104783773422 2023-01-22 18:45:31.653612: step: 822/459, loss: 0.027183666825294495 2023-01-22 18:45:32.233720: step: 824/459, loss: 0.025332432240247726 2023-01-22 18:45:32.817568: step: 826/459, loss: 0.011281566694378853 2023-01-22 18:45:33.393545: step: 828/459, loss: 0.4604855179786682 2023-01-22 18:45:34.054203: step: 830/459, loss: 0.0009632374276407063 2023-01-22 18:45:34.689445: step: 832/459, loss: 0.036148782819509506 2023-01-22 18:45:35.315151: step: 834/459, loss: 0.03239819034934044 2023-01-22 18:45:35.939199: step: 836/459, loss: 0.002022266387939453 2023-01-22 18:45:36.533536: step: 838/459, loss: 0.03391813486814499 2023-01-22 18:45:37.136188: step: 840/459, loss: 0.003939043264836073 2023-01-22 18:45:37.701830: step: 842/459, loss: 0.008826307021081448 2023-01-22 18:45:38.231611: step: 844/459, loss: 0.014437601901590824 2023-01-22 18:45:38.829131: step: 846/459, loss: 0.0063597653061151505 2023-01-22 18:45:39.450836: step: 848/459, loss: 0.01831374317407608 2023-01-22 18:45:40.193849: step: 850/459, loss: 0.011505232192575932 2023-01-22 18:45:40.816254: step: 852/459, loss: 0.03413465619087219 2023-01-22 18:45:41.464157: step: 854/459, loss: 0.037814415991306305 2023-01-22 18:45:42.014115: step: 856/459, loss: 0.0010327694471925497 2023-01-22 18:45:42.564635: step: 858/459, loss: 0.013628397136926651 2023-01-22 18:45:43.166611: step: 860/459, loss: 0.0024954723194241524 2023-01-22 18:45:43.754778: step: 862/459, loss: 9.057804709300399e-05 2023-01-22 18:45:44.368634: step: 864/459, loss: 0.5603397488594055 2023-01-22 18:45:45.046768: step: 866/459, loss: 0.02214735932648182 2023-01-22 18:45:45.643302: step: 868/459, loss: 0.0028728279285132885 2023-01-22 18:45:46.284715: step: 870/459, loss: 0.0033663390204310417 2023-01-22 18:45:46.888510: step: 872/459, loss: 0.012459699995815754 2023-01-22 18:45:47.502692: step: 874/459, loss: 0.006558075547218323 2023-01-22 18:45:48.085314: step: 876/459, loss: 0.012605165131390095 2023-01-22 18:45:48.642966: step: 878/459, loss: 0.002759141381829977 2023-01-22 18:45:49.289939: step: 880/459, loss: 0.0010657565435394645 2023-01-22 18:45:49.918339: step: 882/459, loss: 0.001890946994535625 2023-01-22 18:45:50.490699: step: 884/459, loss: 0.007643773220479488 2023-01-22 18:45:51.224150: step: 886/459, loss: 0.006517336238175631 2023-01-22 18:45:51.807926: step: 888/459, loss: 0.11441607773303986 2023-01-22 18:45:52.418154: step: 890/459, loss: 0.009718586690723896 2023-01-22 18:45:53.112169: step: 892/459, loss: 0.02569263055920601 2023-01-22 18:45:53.729731: step: 894/459, loss: 0.03730001673102379 2023-01-22 18:45:54.337867: step: 896/459, loss: 0.02126590721309185 2023-01-22 18:45:54.930327: step: 898/459, loss: 0.020560549572110176 2023-01-22 18:45:55.512152: step: 900/459, loss: 0.00907465536147356 2023-01-22 18:45:56.122715: step: 902/459, loss: 0.0004850794794037938 2023-01-22 18:45:56.736167: step: 904/459, loss: 0.3131081759929657 2023-01-22 18:45:57.357863: step: 906/459, loss: 0.0014433983014896512 2023-01-22 18:45:57.942292: step: 908/459, loss: 0.0013374601257964969 2023-01-22 18:45:58.516853: step: 910/459, loss: 0.06464771926403046 2023-01-22 18:45:59.105053: step: 912/459, loss: 0.028771625831723213 2023-01-22 18:45:59.690740: step: 914/459, loss: 0.005821524187922478 2023-01-22 18:46:00.348965: step: 916/459, loss: 0.11277162283658981 2023-01-22 18:46:00.969776: step: 918/459, loss: 0.00046081640175543725 2023-01-22 18:46:01.402865: step: 920/459, loss: 0.0005697258748114109 ================================================== Loss: 0.073 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29920405982905984, 'r': 0.33213353889943076, 'f1': 0.31481002697841726}, 'combined': 0.2319652830367285, 'epoch': 34} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.326097829448058, 'r': 0.3154158259356557, 'f1': 0.32066789334253987}, 'combined': 0.2052274517392255, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30040313746355946, 'r': 0.3408749074064678, 'f1': 0.3193619132501485}, 'combined': 0.2353193045001094, 'epoch': 34} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3271801441009591, 'r': 0.31705810142631613, 'f1': 0.32203960579253366}, 'combined': 0.2061053477072215, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31149036487223647, 'r': 0.32744907426796777, 'f1': 0.3192704202390731}, 'combined': 0.23525188859721174, 'epoch': 34} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3225628670860818, 'r': 0.32491519428452176, 'f1': 0.3237347576113718}, 'combined': 0.23211171300437983, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25170068027210885, 'r': 0.35238095238095235, 'f1': 0.29365079365079366}, 'combined': 0.19576719576719576, 'epoch': 34} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.20833333333333334, 'r': 0.32608695652173914, 'f1': 0.25423728813559326}, 'combined': 0.12711864406779663, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29411764705882354, 'r': 0.1724137931034483, 'f1': 0.2173913043478261}, 'combined': 0.14492753623188406, 'epoch': 34} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:48:34.218918: step: 2/459, loss: 0.0012423836160451174 2023-01-22 18:48:34.810667: step: 4/459, loss: 0.002620651852339506 2023-01-22 18:48:35.428333: step: 6/459, loss: 0.0013258722610771656 2023-01-22 18:48:35.947105: step: 8/459, loss: 0.04647454619407654 2023-01-22 18:48:36.524573: step: 10/459, loss: 0.00031760166166350245 2023-01-22 18:48:37.157136: step: 12/459, loss: 0.016491837799549103 2023-01-22 18:48:37.721696: step: 14/459, loss: 0.00745420204475522 2023-01-22 18:48:38.282475: step: 16/459, loss: 0.0008145593455992639 2023-01-22 18:48:38.827647: step: 18/459, loss: 4.949169306200929e-05 2023-01-22 18:48:39.412319: step: 20/459, loss: 0.012735188007354736 2023-01-22 18:48:40.028773: step: 22/459, loss: 0.014565438032150269 2023-01-22 18:48:40.646970: step: 24/459, loss: 0.0007280361605808139 2023-01-22 18:48:41.190112: step: 26/459, loss: 0.03453516960144043 2023-01-22 18:48:41.782670: step: 28/459, loss: 6.57126511214301e-05 2023-01-22 18:48:42.380812: step: 30/459, loss: 2.489109516143799 2023-01-22 18:48:42.951979: step: 32/459, loss: 0.0052749221213161945 2023-01-22 18:48:43.564434: step: 34/459, loss: 0.0036776717752218246 2023-01-22 18:48:44.195949: step: 36/459, loss: 0.008727381005883217 2023-01-22 18:48:44.792257: step: 38/459, loss: 0.006715184077620506 2023-01-22 18:48:45.335345: step: 40/459, loss: 0.0017150420462712646 2023-01-22 18:48:46.001464: step: 42/459, loss: 0.0189694631844759 2023-01-22 18:48:46.559439: step: 44/459, loss: 0.021690497174859047 2023-01-22 18:48:47.096952: step: 46/459, loss: 0.0003741238615475595 2023-01-22 18:48:47.683806: step: 48/459, loss: 0.0666770488023758 2023-01-22 18:48:48.220656: step: 50/459, loss: 0.025837155058979988 2023-01-22 18:48:48.759837: step: 52/459, loss: 0.105820432305336 2023-01-22 18:48:49.359719: step: 54/459, loss: 0.003440660424530506 2023-01-22 18:48:49.938221: step: 56/459, loss: 0.001056927372701466 2023-01-22 18:48:50.597915: step: 58/459, loss: 0.001444103429093957 2023-01-22 18:48:51.208625: step: 60/459, loss: 0.010327134281396866 2023-01-22 18:48:51.784839: step: 62/459, loss: 0.001597592607140541 2023-01-22 18:48:52.369592: step: 64/459, loss: 0.004898462910205126 2023-01-22 18:48:53.125392: step: 66/459, loss: 6.447954365285113e-05 2023-01-22 18:48:53.676446: step: 68/459, loss: 0.0021969967056065798 2023-01-22 18:48:54.353988: step: 70/459, loss: 0.02766595222055912 2023-01-22 18:48:55.012548: step: 72/459, loss: 5.0358648877590895e-05 2023-01-22 18:48:55.582804: step: 74/459, loss: 0.0007716037798672915 2023-01-22 18:48:56.275837: step: 76/459, loss: 0.01923547312617302 2023-01-22 18:48:56.918403: step: 78/459, loss: 0.012469765730202198 2023-01-22 18:48:57.532134: step: 80/459, loss: 0.0161354448646307 2023-01-22 18:48:58.154839: step: 82/459, loss: 0.01704779453575611 2023-01-22 18:48:58.731666: step: 84/459, loss: 0.24805068969726562 2023-01-22 18:48:59.376366: step: 86/459, loss: 0.0001792940020095557 2023-01-22 18:48:59.973338: step: 88/459, loss: 6.51878144708462e-05 2023-01-22 18:49:00.688586: step: 90/459, loss: 0.004044809378683567 2023-01-22 18:49:01.281931: step: 92/459, loss: 0.015932125970721245 2023-01-22 18:49:01.897840: step: 94/459, loss: 0.01174802053719759 2023-01-22 18:49:02.521508: step: 96/459, loss: 0.03457477316260338 2023-01-22 18:49:03.098006: step: 98/459, loss: 0.0011719996109604836 2023-01-22 18:49:03.691429: step: 100/459, loss: 0.01587578095495701 2023-01-22 18:49:04.383270: step: 102/459, loss: 0.0047843316569924355 2023-01-22 18:49:04.936389: step: 104/459, loss: 0.004457125905901194 2023-01-22 18:49:05.541170: step: 106/459, loss: 0.01814371533691883 2023-01-22 18:49:06.090139: step: 108/459, loss: 0.0010491755092516541 2023-01-22 18:49:06.808061: step: 110/459, loss: 0.003320592688396573 2023-01-22 18:49:07.411914: step: 112/459, loss: 0.024496011435985565 2023-01-22 18:49:08.043392: step: 114/459, loss: 0.0029959254898130894 2023-01-22 18:49:08.588963: step: 116/459, loss: 0.02081713080406189 2023-01-22 18:49:09.153173: step: 118/459, loss: 0.0007607058505527675 2023-01-22 18:49:09.789345: step: 120/459, loss: 0.01679937168955803 2023-01-22 18:49:10.378031: step: 122/459, loss: 0.0005059437244199216 2023-01-22 18:49:10.948982: step: 124/459, loss: 8.592662925366312e-05 2023-01-22 18:49:11.519998: step: 126/459, loss: 0.014796567149460316 2023-01-22 18:49:12.122322: step: 128/459, loss: 0.005219867918640375 2023-01-22 18:49:12.685676: step: 130/459, loss: 7.554578041890636e-05 2023-01-22 18:49:13.300424: step: 132/459, loss: 0.0018122289329767227 2023-01-22 18:49:13.875105: step: 134/459, loss: 0.06967709958553314 2023-01-22 18:49:14.486523: step: 136/459, loss: 0.00010596955689834431 2023-01-22 18:49:15.095843: step: 138/459, loss: 0.03573021665215492 2023-01-22 18:49:15.716779: step: 140/459, loss: 0.0009620336932130158 2023-01-22 18:49:16.338907: step: 142/459, loss: 0.0005689216195605695 2023-01-22 18:49:16.955118: step: 144/459, loss: 0.009319552220404148 2023-01-22 18:49:17.547332: step: 146/459, loss: 0.005090351216495037 2023-01-22 18:49:18.184795: step: 148/459, loss: 0.01703721471130848 2023-01-22 18:49:18.822563: step: 150/459, loss: 0.03767284378409386 2023-01-22 18:49:19.457946: step: 152/459, loss: 0.0007746131741441786 2023-01-22 18:49:20.083380: step: 154/459, loss: 0.0001798786106519401 2023-01-22 18:49:20.665014: step: 156/459, loss: 0.05063090845942497 2023-01-22 18:49:21.280199: step: 158/459, loss: 0.01595304161310196 2023-01-22 18:49:21.871232: step: 160/459, loss: 0.3636937439441681 2023-01-22 18:49:22.500770: step: 162/459, loss: 0.18731467425823212 2023-01-22 18:49:23.137773: step: 164/459, loss: 0.44174593687057495 2023-01-22 18:49:23.699523: step: 166/459, loss: 0.09200941026210785 2023-01-22 18:49:24.327834: step: 168/459, loss: 0.021957598626613617 2023-01-22 18:49:24.894661: step: 170/459, loss: 0.01609627716243267 2023-01-22 18:49:25.489748: step: 172/459, loss: 0.00014058481610845774 2023-01-22 18:49:26.003734: step: 174/459, loss: 0.0007692097569815814 2023-01-22 18:49:26.613695: step: 176/459, loss: 0.07667554169893265 2023-01-22 18:49:27.231887: step: 178/459, loss: 0.0033506155014038086 2023-01-22 18:49:27.882675: step: 180/459, loss: 0.0010036592138931155 2023-01-22 18:49:28.521520: step: 182/459, loss: 0.005025631748139858 2023-01-22 18:49:29.233691: step: 184/459, loss: 0.027214419096708298 2023-01-22 18:49:29.892225: step: 186/459, loss: 0.00930073019117117 2023-01-22 18:49:30.516755: step: 188/459, loss: 0.054618533700704575 2023-01-22 18:49:31.130518: step: 190/459, loss: 0.00023515797511208802 2023-01-22 18:49:31.662614: step: 192/459, loss: 0.006277717184275389 2023-01-22 18:49:32.285588: step: 194/459, loss: 0.016913380473852158 2023-01-22 18:49:32.848364: step: 196/459, loss: 0.007010230794548988 2023-01-22 18:49:33.436348: step: 198/459, loss: 0.004677202086895704 2023-01-22 18:49:34.030410: step: 200/459, loss: 7.38484668545425e-05 2023-01-22 18:49:34.671631: step: 202/459, loss: 9.609689004719257e-05 2023-01-22 18:49:35.289712: step: 204/459, loss: 0.07504397630691528 2023-01-22 18:49:35.848175: step: 206/459, loss: 0.0026579590048640966 2023-01-22 18:49:36.497129: step: 208/459, loss: 0.004858457949012518 2023-01-22 18:49:37.066156: step: 210/459, loss: 0.00016412649711128324 2023-01-22 18:49:37.689578: step: 212/459, loss: 0.0022227205336093903 2023-01-22 18:49:38.231203: step: 214/459, loss: 0.0013904239749535918 2023-01-22 18:49:38.795142: step: 216/459, loss: 0.002501655602827668 2023-01-22 18:49:39.409417: step: 218/459, loss: 0.0036807151045650244 2023-01-22 18:49:40.015009: step: 220/459, loss: 0.06241341307759285 2023-01-22 18:49:40.617440: step: 222/459, loss: 0.013100613839924335 2023-01-22 18:49:41.210697: step: 224/459, loss: 0.001904606819152832 2023-01-22 18:49:41.839776: step: 226/459, loss: 0.0408858060836792 2023-01-22 18:49:42.395834: step: 228/459, loss: 0.0006548513774760067 2023-01-22 18:49:43.008540: step: 230/459, loss: 0.0020721396431326866 2023-01-22 18:49:43.629580: step: 232/459, loss: 0.059017397463321686 2023-01-22 18:49:44.236442: step: 234/459, loss: 0.0018018637783825397 2023-01-22 18:49:44.901127: step: 236/459, loss: 0.04805392771959305 2023-01-22 18:49:45.447728: step: 238/459, loss: 0.001921827788464725 2023-01-22 18:49:46.003017: step: 240/459, loss: 0.0017660903977230191 2023-01-22 18:49:46.658043: step: 242/459, loss: 0.0038659384008497 2023-01-22 18:49:47.272192: step: 244/459, loss: 0.0034904431086033583 2023-01-22 18:49:47.865196: step: 246/459, loss: 0.0015959928277879953 2023-01-22 18:49:48.468781: step: 248/459, loss: 0.01237226277589798 2023-01-22 18:49:49.068204: step: 250/459, loss: 0.0019120313227176666 2023-01-22 18:49:49.643115: step: 252/459, loss: 0.05013328418135643 2023-01-22 18:49:50.222438: step: 254/459, loss: 0.009803527034819126 2023-01-22 18:49:50.788289: step: 256/459, loss: 0.009593049064278603 2023-01-22 18:49:51.377666: step: 258/459, loss: 0.029860135167837143 2023-01-22 18:49:51.949134: step: 260/459, loss: 0.006872698664665222 2023-01-22 18:49:52.500986: step: 262/459, loss: 0.0011304543586447835 2023-01-22 18:49:53.022804: step: 264/459, loss: 0.004117137752473354 2023-01-22 18:49:53.531443: step: 266/459, loss: 0.002067325171083212 2023-01-22 18:49:54.095176: step: 268/459, loss: 0.015100291930139065 2023-01-22 18:49:54.694271: step: 270/459, loss: 0.0005733887664973736 2023-01-22 18:49:55.296806: step: 272/459, loss: 0.007912802509963512 2023-01-22 18:49:55.883183: step: 274/459, loss: 0.002140376018360257 2023-01-22 18:49:56.440234: step: 276/459, loss: 0.0034009995870292187 2023-01-22 18:49:57.091605: step: 278/459, loss: 0.01712910830974579 2023-01-22 18:49:57.646361: step: 280/459, loss: 0.003460431471467018 2023-01-22 18:49:58.182789: step: 282/459, loss: 0.005905568599700928 2023-01-22 18:49:58.831173: step: 284/459, loss: 0.006856928113847971 2023-01-22 18:49:59.489167: step: 286/459, loss: 0.0002509381447453052 2023-01-22 18:50:00.110949: step: 288/459, loss: 0.019645823165774345 2023-01-22 18:50:00.729319: step: 290/459, loss: 0.00014627192285843194 2023-01-22 18:50:01.354330: step: 292/459, loss: 0.00617293082177639 2023-01-22 18:50:01.952406: step: 294/459, loss: 0.0014588801423087716 2023-01-22 18:50:02.631032: step: 296/459, loss: 0.0033057485707104206 2023-01-22 18:50:03.248895: step: 298/459, loss: 0.08527544140815735 2023-01-22 18:50:03.857662: step: 300/459, loss: 0.00030394401983357966 2023-01-22 18:50:04.439297: step: 302/459, loss: 0.18286427855491638 2023-01-22 18:50:05.021389: step: 304/459, loss: 0.9518385529518127 2023-01-22 18:50:05.618575: step: 306/459, loss: 0.02664460986852646 2023-01-22 18:50:06.248262: step: 308/459, loss: 0.00031721845152787864 2023-01-22 18:50:06.899186: step: 310/459, loss: 0.0009765956783667207 2023-01-22 18:50:07.507342: step: 312/459, loss: 0.00048382964450865984 2023-01-22 18:50:08.071264: step: 314/459, loss: 0.005042421165853739 2023-01-22 18:50:08.701192: step: 316/459, loss: 0.007140889763832092 2023-01-22 18:50:09.333517: step: 318/459, loss: 0.010789673775434494 2023-01-22 18:50:09.893117: step: 320/459, loss: 0.060460321605205536 2023-01-22 18:50:10.556965: step: 322/459, loss: 0.015583538450300694 2023-01-22 18:50:11.157626: step: 324/459, loss: 0.006228788290172815 2023-01-22 18:50:11.745927: step: 326/459, loss: 0.008966827765107155 2023-01-22 18:50:12.314811: step: 328/459, loss: 1.433190300303977e-05 2023-01-22 18:50:12.907458: step: 330/459, loss: 0.011481096968054771 2023-01-22 18:50:13.520108: step: 332/459, loss: 0.0076582967303693295 2023-01-22 18:50:14.139629: step: 334/459, loss: 0.0016325023025274277 2023-01-22 18:50:14.823783: step: 336/459, loss: 0.009761678986251354 2023-01-22 18:50:15.419382: step: 338/459, loss: 0.01322714053094387 2023-01-22 18:50:16.089979: step: 340/459, loss: 0.001925047836266458 2023-01-22 18:50:16.691726: step: 342/459, loss: 0.00872771441936493 2023-01-22 18:50:17.259495: step: 344/459, loss: 0.0013646844308823347 2023-01-22 18:50:17.845490: step: 346/459, loss: 0.0026225445326417685 2023-01-22 18:50:18.463954: step: 348/459, loss: 0.006605423521250486 2023-01-22 18:50:19.123722: step: 350/459, loss: 0.008495595306158066 2023-01-22 18:50:19.742921: step: 352/459, loss: 0.0003255844349041581 2023-01-22 18:50:20.301655: step: 354/459, loss: 0.015386637300252914 2023-01-22 18:50:21.001288: step: 356/459, loss: 0.13493381440639496 2023-01-22 18:50:21.645180: step: 358/459, loss: 4.268726348876953 2023-01-22 18:50:22.225079: step: 360/459, loss: 0.009113338775932789 2023-01-22 18:50:22.822885: step: 362/459, loss: 0.0016047299141064286 2023-01-22 18:50:23.426554: step: 364/459, loss: 0.024845076724886894 2023-01-22 18:50:24.045518: step: 366/459, loss: 0.005039548967033625 2023-01-22 18:50:24.678195: step: 368/459, loss: 0.03033100999891758 2023-01-22 18:50:25.276335: step: 370/459, loss: 0.00078870594734326 2023-01-22 18:50:25.865318: step: 372/459, loss: 0.0067002237774431705 2023-01-22 18:50:26.501654: step: 374/459, loss: 0.007327121216803789 2023-01-22 18:50:27.105866: step: 376/459, loss: 0.0039058388210833073 2023-01-22 18:50:27.751965: step: 378/459, loss: 0.015310767106711864 2023-01-22 18:50:28.377892: step: 380/459, loss: 0.010465397499501705 2023-01-22 18:50:28.919284: step: 382/459, loss: 0.09005318582057953 2023-01-22 18:50:29.527894: step: 384/459, loss: 0.0001726438495097682 2023-01-22 18:50:30.162680: step: 386/459, loss: 0.005585835315287113 2023-01-22 18:50:30.805151: step: 388/459, loss: 0.010599546134471893 2023-01-22 18:50:31.381895: step: 390/459, loss: 0.005056166090071201 2023-01-22 18:50:32.027877: step: 392/459, loss: 0.01284484751522541 2023-01-22 18:50:32.713248: step: 394/459, loss: 0.024589311331510544 2023-01-22 18:50:33.405133: step: 396/459, loss: 0.02438373677432537 2023-01-22 18:50:34.046628: step: 398/459, loss: 0.008742649108171463 2023-01-22 18:50:34.674383: step: 400/459, loss: 9.510487143415958e-05 2023-01-22 18:50:35.266169: step: 402/459, loss: 0.40432578325271606 2023-01-22 18:50:35.858116: step: 404/459, loss: 8.791349682724103e-05 2023-01-22 18:50:36.476987: step: 406/459, loss: 0.04368646442890167 2023-01-22 18:50:37.152453: step: 408/459, loss: 0.08347027748823166 2023-01-22 18:50:37.757499: step: 410/459, loss: 0.0005805473774671555 2023-01-22 18:50:38.382231: step: 412/459, loss: 0.0011339386692270637 2023-01-22 18:50:39.025927: step: 414/459, loss: 0.00029597964021377265 2023-01-22 18:50:39.693646: step: 416/459, loss: 0.00013077587937004864 2023-01-22 18:50:40.331565: step: 418/459, loss: 0.007376216351985931 2023-01-22 18:50:40.937896: step: 420/459, loss: 0.008727934211492538 2023-01-22 18:50:41.572197: step: 422/459, loss: 0.00014411572192329913 2023-01-22 18:50:42.200184: step: 424/459, loss: 0.00011345432722009718 2023-01-22 18:50:42.847990: step: 426/459, loss: 0.0012389865005388856 2023-01-22 18:50:43.422217: step: 428/459, loss: 0.005017990246415138 2023-01-22 18:50:44.080716: step: 430/459, loss: 0.002782910130918026 2023-01-22 18:50:44.597352: step: 432/459, loss: 0.006840554066002369 2023-01-22 18:50:45.211095: step: 434/459, loss: 0.0020463322289288044 2023-01-22 18:50:45.819627: step: 436/459, loss: 0.010430690832436085 2023-01-22 18:50:46.429539: step: 438/459, loss: 6.768768071196973e-05 2023-01-22 18:50:47.037081: step: 440/459, loss: 0.023811299353837967 2023-01-22 18:50:47.702608: step: 442/459, loss: 0.008041372522711754 2023-01-22 18:50:48.258256: step: 444/459, loss: 0.008363822475075722 2023-01-22 18:50:48.907094: step: 446/459, loss: 0.004450314678251743 2023-01-22 18:50:49.508168: step: 448/459, loss: 0.003951564431190491 2023-01-22 18:50:50.103741: step: 450/459, loss: 0.010307434014976025 2023-01-22 18:50:50.706120: step: 452/459, loss: 0.0009754710481502116 2023-01-22 18:50:51.239299: step: 454/459, loss: 0.050804685801267624 2023-01-22 18:50:51.877396: step: 456/459, loss: 0.020802022889256477 2023-01-22 18:50:52.452245: step: 458/459, loss: 0.03740614652633667 2023-01-22 18:50:53.078776: step: 460/459, loss: 0.013327726162970066 2023-01-22 18:50:53.680113: step: 462/459, loss: 0.00794562790542841 2023-01-22 18:50:54.286285: step: 464/459, loss: 0.0004665796586778015 2023-01-22 18:50:54.958617: step: 466/459, loss: 0.014652355574071407 2023-01-22 18:50:55.508172: step: 468/459, loss: 0.0010529481805860996 2023-01-22 18:50:56.019799: step: 470/459, loss: 0.4690374732017517 2023-01-22 18:50:56.636531: step: 472/459, loss: 0.009591475129127502 2023-01-22 18:50:57.250139: step: 474/459, loss: 0.03173630312085152 2023-01-22 18:50:57.913104: step: 476/459, loss: 0.17004337906837463 2023-01-22 18:50:58.491483: step: 478/459, loss: 0.002253085607662797 2023-01-22 18:50:59.219957: step: 480/459, loss: 0.0015130485408008099 2023-01-22 18:50:59.848641: step: 482/459, loss: 0.037025898694992065 2023-01-22 18:51:00.469545: step: 484/459, loss: 0.030549442395567894 2023-01-22 18:51:01.074793: step: 486/459, loss: 0.025734160095453262 2023-01-22 18:51:01.702741: step: 488/459, loss: 0.008658588863909245 2023-01-22 18:51:02.364490: step: 490/459, loss: 0.012428553774952888 2023-01-22 18:51:03.028940: step: 492/459, loss: 0.016394751146435738 2023-01-22 18:51:03.630310: step: 494/459, loss: 1.2773910760879517 2023-01-22 18:51:04.251832: step: 496/459, loss: 0.015607055276632309 2023-01-22 18:51:04.861480: step: 498/459, loss: 0.001706192851997912 2023-01-22 18:51:05.399151: step: 500/459, loss: 0.0019956587348133326 2023-01-22 18:51:05.968251: step: 502/459, loss: 0.0036083043087273836 2023-01-22 18:51:06.568138: step: 504/459, loss: 0.28843894600868225 2023-01-22 18:51:07.232345: step: 506/459, loss: 0.00059811508981511 2023-01-22 18:51:07.795622: step: 508/459, loss: 0.0007141989772208035 2023-01-22 18:51:08.484449: step: 510/459, loss: 0.0020099086686968803 2023-01-22 18:51:09.050319: step: 512/459, loss: 0.0010586472926661372 2023-01-22 18:51:09.621552: step: 514/459, loss: 0.013290375471115112 2023-01-22 18:51:10.272219: step: 516/459, loss: 0.06412182748317719 2023-01-22 18:51:10.844158: step: 518/459, loss: 0.06791923940181732 2023-01-22 18:51:11.407665: step: 520/459, loss: 0.0011999823618680239 2023-01-22 18:51:12.038813: step: 522/459, loss: 0.8952812552452087 2023-01-22 18:51:12.689737: step: 524/459, loss: 0.01450792234390974 2023-01-22 18:51:13.287892: step: 526/459, loss: 0.0013402617769315839 2023-01-22 18:51:13.923131: step: 528/459, loss: 0.0012488276697695255 2023-01-22 18:51:14.542615: step: 530/459, loss: 0.13525497913360596 2023-01-22 18:51:15.165607: step: 532/459, loss: 0.013475441373884678 2023-01-22 18:51:15.783771: step: 534/459, loss: 0.0012885541655123234 2023-01-22 18:51:16.352617: step: 536/459, loss: 3.060702147195116e-05 2023-01-22 18:51:16.959292: step: 538/459, loss: 0.028864100575447083 2023-01-22 18:51:17.614048: step: 540/459, loss: 6.210755236679688e-05 2023-01-22 18:51:18.244560: step: 542/459, loss: 0.008181704208254814 2023-01-22 18:51:18.859786: step: 544/459, loss: 0.3981287181377411 2023-01-22 18:51:19.478197: step: 546/459, loss: 0.0015044391620904207 2023-01-22 18:51:20.120465: step: 548/459, loss: 0.0015206142561510205 2023-01-22 18:51:20.743855: step: 550/459, loss: 0.04159729182720184 2023-01-22 18:51:21.348802: step: 552/459, loss: 0.0001782789040589705 2023-01-22 18:51:21.914345: step: 554/459, loss: 0.0009526112698949873 2023-01-22 18:51:22.434334: step: 556/459, loss: 0.0006440942524932325 2023-01-22 18:51:23.043902: step: 558/459, loss: 0.0008196951821446419 2023-01-22 18:51:23.619421: step: 560/459, loss: 0.004664139822125435 2023-01-22 18:51:24.229712: step: 562/459, loss: 0.008534098975360394 2023-01-22 18:51:24.856530: step: 564/459, loss: 0.1724986582994461 2023-01-22 18:51:25.481125: step: 566/459, loss: 0.00013186974683776498 2023-01-22 18:51:26.078169: step: 568/459, loss: 0.008056399412453175 2023-01-22 18:51:26.720889: step: 570/459, loss: 0.038979750126600266 2023-01-22 18:51:27.299086: step: 572/459, loss: 8.814448665361851e-05 2023-01-22 18:51:27.939928: step: 574/459, loss: 0.00036685855593532324 2023-01-22 18:51:28.493397: step: 576/459, loss: 0.020510787144303322 2023-01-22 18:51:29.109253: step: 578/459, loss: 0.041284140199422836 2023-01-22 18:51:29.660278: step: 580/459, loss: 0.008122214116156101 2023-01-22 18:51:30.257734: step: 582/459, loss: 8.926483133109286e-05 2023-01-22 18:51:30.760971: step: 584/459, loss: 0.023125387728214264 2023-01-22 18:51:31.388255: step: 586/459, loss: 0.0029982654377818108 2023-01-22 18:51:31.940970: step: 588/459, loss: 0.0014757340541109443 2023-01-22 18:51:32.547808: step: 590/459, loss: 0.008016414009034634 2023-01-22 18:51:33.168433: step: 592/459, loss: 0.0019304461311548948 2023-01-22 18:51:33.848347: step: 594/459, loss: 0.002653672592714429 2023-01-22 18:51:34.413610: step: 596/459, loss: 0.0007477361359633505 2023-01-22 18:51:35.021496: step: 598/459, loss: 0.08604148030281067 2023-01-22 18:51:35.605812: step: 600/459, loss: 0.006233333609998226 2023-01-22 18:51:36.277922: step: 602/459, loss: 0.02497326210141182 2023-01-22 18:51:36.904442: step: 604/459, loss: 0.0021233647130429745 2023-01-22 18:51:37.521355: step: 606/459, loss: 0.07046283781528473 2023-01-22 18:51:38.124834: step: 608/459, loss: 0.0018301125383004546 2023-01-22 18:51:38.865629: step: 610/459, loss: 0.005097071174532175 2023-01-22 18:51:39.457485: step: 612/459, loss: 0.0003284789272584021 2023-01-22 18:51:40.049441: step: 614/459, loss: 0.03557853028178215 2023-01-22 18:51:40.634407: step: 616/459, loss: 6.6514262471173424e-06 2023-01-22 18:51:41.224615: step: 618/459, loss: 0.015991775318980217 2023-01-22 18:51:41.859020: step: 620/459, loss: 0.0001471045397920534 2023-01-22 18:51:42.446508: step: 622/459, loss: 0.0019338650163263083 2023-01-22 18:51:43.090583: step: 624/459, loss: 0.010881644673645496 2023-01-22 18:51:43.687939: step: 626/459, loss: 0.004155208356678486 2023-01-22 18:51:44.287256: step: 628/459, loss: 0.0005721898633055389 2023-01-22 18:51:44.894798: step: 630/459, loss: 0.003906835801899433 2023-01-22 18:51:45.456332: step: 632/459, loss: 0.011225746013224125 2023-01-22 18:51:46.034663: step: 634/459, loss: 0.0020672075916081667 2023-01-22 18:51:46.633974: step: 636/459, loss: 0.012055108323693275 2023-01-22 18:51:47.252214: step: 638/459, loss: 0.004406269174069166 2023-01-22 18:51:47.897375: step: 640/459, loss: 0.026051996275782585 2023-01-22 18:51:48.483159: step: 642/459, loss: 0.000290150084765628 2023-01-22 18:51:49.083495: step: 644/459, loss: 0.005373687949031591 2023-01-22 18:51:49.688673: step: 646/459, loss: 0.009587976150214672 2023-01-22 18:51:50.399601: step: 648/459, loss: 0.0011810804717242718 2023-01-22 18:51:51.051861: step: 650/459, loss: 1.0941236019134521 2023-01-22 18:51:51.723848: step: 652/459, loss: 0.0033543002791702747 2023-01-22 18:51:52.298743: step: 654/459, loss: 0.03892403841018677 2023-01-22 18:51:52.919106: step: 656/459, loss: 0.011304640211164951 2023-01-22 18:51:53.561583: step: 658/459, loss: 0.03899797052145004 2023-01-22 18:51:54.148155: step: 660/459, loss: 0.012699360959231853 2023-01-22 18:51:54.782974: step: 662/459, loss: 0.03363719582557678 2023-01-22 18:51:55.403734: step: 664/459, loss: 0.005926800426095724 2023-01-22 18:51:56.005648: step: 666/459, loss: 0.01783621497452259 2023-01-22 18:51:56.662167: step: 668/459, loss: 0.0005705186631530523 2023-01-22 18:51:57.276929: step: 670/459, loss: 0.006402057595551014 2023-01-22 18:51:57.883106: step: 672/459, loss: 0.5710983872413635 2023-01-22 18:51:58.490605: step: 674/459, loss: 0.019047781825065613 2023-01-22 18:51:59.111591: step: 676/459, loss: 0.00118681148160249 2023-01-22 18:51:59.762073: step: 678/459, loss: 0.002549814758822322 2023-01-22 18:52:00.353537: step: 680/459, loss: 0.015097343362867832 2023-01-22 18:52:00.945763: step: 682/459, loss: 0.00908929854631424 2023-01-22 18:52:01.511406: step: 684/459, loss: 0.006746775936335325 2023-01-22 18:52:02.077128: step: 686/459, loss: 0.05820062756538391 2023-01-22 18:52:02.682844: step: 688/459, loss: 0.01056618057191372 2023-01-22 18:52:03.235864: step: 690/459, loss: 6.156602466944605e-05 2023-01-22 18:52:03.850454: step: 692/459, loss: 0.04306929185986519 2023-01-22 18:52:04.396051: step: 694/459, loss: 0.000246200681431219 2023-01-22 18:52:05.022807: step: 696/459, loss: 0.005442261695861816 2023-01-22 18:52:05.677848: step: 698/459, loss: 0.01794293522834778 2023-01-22 18:52:06.277185: step: 700/459, loss: 0.020490579307079315 2023-01-22 18:52:06.941072: step: 702/459, loss: 0.0002261945337522775 2023-01-22 18:52:07.562477: step: 704/459, loss: 0.0008301330381073058 2023-01-22 18:52:08.184924: step: 706/459, loss: 0.08432815223932266 2023-01-22 18:52:08.744131: step: 708/459, loss: 0.0004764449258800596 2023-01-22 18:52:09.445339: step: 710/459, loss: 0.01918894425034523 2023-01-22 18:52:10.128531: step: 712/459, loss: 0.0026867412962019444 2023-01-22 18:52:10.746133: step: 714/459, loss: 0.00040033107507042587 2023-01-22 18:52:11.328084: step: 716/459, loss: 8.803174569038674e-05 2023-01-22 18:52:11.938020: step: 718/459, loss: 8.993460505735129e-05 2023-01-22 18:52:12.457543: step: 720/459, loss: 0.05014599487185478 2023-01-22 18:52:13.061554: step: 722/459, loss: 0.05105723813176155 2023-01-22 18:52:13.750950: step: 724/459, loss: 0.07031664997339249 2023-01-22 18:52:14.297103: step: 726/459, loss: 0.3945557177066803 2023-01-22 18:52:14.886892: step: 728/459, loss: 0.0002448858867865056 2023-01-22 18:52:15.527664: step: 730/459, loss: 0.1783016324043274 2023-01-22 18:52:16.079172: step: 732/459, loss: 0.13630139827728271 2023-01-22 18:52:16.650059: step: 734/459, loss: 0.0014800657518208027 2023-01-22 18:52:17.342296: step: 736/459, loss: 0.0006168074905872345 2023-01-22 18:52:17.912122: step: 738/459, loss: 0.0027281618677079678 2023-01-22 18:52:18.513888: step: 740/459, loss: 4.302806337364018e-05 2023-01-22 18:52:19.113811: step: 742/459, loss: 0.008935817517340183 2023-01-22 18:52:19.748495: step: 744/459, loss: 0.04514708369970322 2023-01-22 18:52:20.333882: step: 746/459, loss: 0.30270904302597046 2023-01-22 18:52:20.939777: step: 748/459, loss: 0.0011486273724585772 2023-01-22 18:52:21.573262: step: 750/459, loss: 0.0003582634963095188 2023-01-22 18:52:22.183296: step: 752/459, loss: 0.0018670476274564862 2023-01-22 18:52:22.782342: step: 754/459, loss: 0.001576111069880426 2023-01-22 18:52:23.370330: step: 756/459, loss: 0.0347810760140419 2023-01-22 18:52:24.003538: step: 758/459, loss: 0.030040843412280083 2023-01-22 18:52:24.703383: step: 760/459, loss: 0.03218269348144531 2023-01-22 18:52:25.288374: step: 762/459, loss: 0.008497079834342003 2023-01-22 18:52:25.936540: step: 764/459, loss: 0.005122836213558912 2023-01-22 18:52:26.626314: step: 766/459, loss: 0.024918600916862488 2023-01-22 18:52:27.189402: step: 768/459, loss: 0.0032731725368648767 2023-01-22 18:52:27.842173: step: 770/459, loss: 0.0020442025270313025 2023-01-22 18:52:28.393299: step: 772/459, loss: 0.0009415242238901556 2023-01-22 18:52:28.984237: step: 774/459, loss: 0.0004247945616953075 2023-01-22 18:52:29.561981: step: 776/459, loss: 0.0026058161165565252 2023-01-22 18:52:30.104342: step: 778/459, loss: 0.0004967059940099716 2023-01-22 18:52:30.685207: step: 780/459, loss: 0.00227211881428957 2023-01-22 18:52:31.340979: step: 782/459, loss: 0.017256522551178932 2023-01-22 18:52:31.923711: step: 784/459, loss: 0.014382081106305122 2023-01-22 18:52:32.511182: step: 786/459, loss: 0.00032683907193131745 2023-01-22 18:52:33.093936: step: 788/459, loss: 0.01838788390159607 2023-01-22 18:52:33.636541: step: 790/459, loss: 0.010099943727254868 2023-01-22 18:52:34.245724: step: 792/459, loss: 0.05571364983916283 2023-01-22 18:52:34.849415: step: 794/459, loss: 0.015555786900222301 2023-01-22 18:52:35.566033: step: 796/459, loss: 0.0023029325529932976 2023-01-22 18:52:36.163073: step: 798/459, loss: 0.024751149117946625 2023-01-22 18:52:36.802292: step: 800/459, loss: 0.014317325316369534 2023-01-22 18:52:37.362239: step: 802/459, loss: 0.03233841806650162 2023-01-22 18:52:37.988142: step: 804/459, loss: 0.00472696777433157 2023-01-22 18:52:38.593812: step: 806/459, loss: 0.006077863741666079 2023-01-22 18:52:39.175579: step: 808/459, loss: 0.00035048869904130697 2023-01-22 18:52:39.818937: step: 810/459, loss: 0.012094993144273758 2023-01-22 18:52:40.467159: step: 812/459, loss: 1.3886692523956299 2023-01-22 18:52:41.103691: step: 814/459, loss: 0.0006667262059636414 2023-01-22 18:52:41.656958: step: 816/459, loss: 0.0015617815079167485 2023-01-22 18:52:42.209695: step: 818/459, loss: 0.007909959182143211 2023-01-22 18:52:42.757889: step: 820/459, loss: 0.004828309640288353 2023-01-22 18:52:43.306605: step: 822/459, loss: 0.004674515221267939 2023-01-22 18:52:43.977932: step: 824/459, loss: 0.00024581074831075966 2023-01-22 18:52:44.558783: step: 826/459, loss: 0.03855103999376297 2023-01-22 18:52:45.169330: step: 828/459, loss: 0.005930834915488958 2023-01-22 18:52:45.850853: step: 830/459, loss: 0.00048186976346187294 2023-01-22 18:52:46.435437: step: 832/459, loss: 0.12764513492584229 2023-01-22 18:52:47.018009: step: 834/459, loss: 0.025889927521348 2023-01-22 18:52:47.667713: step: 836/459, loss: 0.027031291276216507 2023-01-22 18:52:48.283422: step: 838/459, loss: 0.0061707948334515095 2023-01-22 18:52:48.923295: step: 840/459, loss: 0.005598539486527443 2023-01-22 18:52:49.501247: step: 842/459, loss: 0.00030197520391084254 2023-01-22 18:52:50.083678: step: 844/459, loss: 0.0011728954268619418 2023-01-22 18:52:50.774113: step: 846/459, loss: 0.046855468302965164 2023-01-22 18:52:51.392692: step: 848/459, loss: 0.22644881904125214 2023-01-22 18:52:51.947795: step: 850/459, loss: 0.007666556630283594 2023-01-22 18:52:52.564860: step: 852/459, loss: 0.01662522926926613 2023-01-22 18:52:53.140456: step: 854/459, loss: 0.004937107674777508 2023-01-22 18:52:53.777436: step: 856/459, loss: 0.0008262797491624951 2023-01-22 18:52:54.401275: step: 858/459, loss: 0.01169034093618393 2023-01-22 18:52:54.994322: step: 860/459, loss: 0.00017978086543735117 2023-01-22 18:52:55.609100: step: 862/459, loss: 0.0018310246523469687 2023-01-22 18:52:56.259776: step: 864/459, loss: 0.07137089222669601 2023-01-22 18:52:56.846271: step: 866/459, loss: 0.0010942622320726514 2023-01-22 18:52:57.511569: step: 868/459, loss: 0.006561458110809326 2023-01-22 18:52:58.135296: step: 870/459, loss: 0.008507943712174892 2023-01-22 18:52:58.800775: step: 872/459, loss: 0.0030678475741297007 2023-01-22 18:52:59.402802: step: 874/459, loss: 0.004209999460726976 2023-01-22 18:52:59.990942: step: 876/459, loss: 0.0033423895947635174 2023-01-22 18:53:00.635238: step: 878/459, loss: 0.007051554508507252 2023-01-22 18:53:01.214577: step: 880/459, loss: 2.118631346093025e-05 2023-01-22 18:53:01.857646: step: 882/459, loss: 0.1331174373626709 2023-01-22 18:53:02.456609: step: 884/459, loss: 0.002623064210638404 2023-01-22 18:53:03.104314: step: 886/459, loss: 0.12559521198272705 2023-01-22 18:53:03.748502: step: 888/459, loss: 0.12835697829723358 2023-01-22 18:53:04.293263: step: 890/459, loss: 0.00996769592165947 2023-01-22 18:53:04.932597: step: 892/459, loss: 0.037907641381025314 2023-01-22 18:53:05.528837: step: 894/459, loss: 0.0009856842225417495 2023-01-22 18:53:06.121890: step: 896/459, loss: 0.009995168074965477 2023-01-22 18:53:06.709225: step: 898/459, loss: 0.0015168760437518358 2023-01-22 18:53:07.326087: step: 900/459, loss: 0.001599742448888719 2023-01-22 18:53:07.923980: step: 902/459, loss: 0.009711157530546188 2023-01-22 18:53:08.461941: step: 904/459, loss: 0.0486806221306324 2023-01-22 18:53:09.023864: step: 906/459, loss: 0.004595726262778044 2023-01-22 18:53:09.603547: step: 908/459, loss: 0.015071777626872063 2023-01-22 18:53:10.231450: step: 910/459, loss: 0.023151926696300507 2023-01-22 18:53:10.845319: step: 912/459, loss: 0.004173288121819496 2023-01-22 18:53:11.454561: step: 914/459, loss: 0.026038721203804016 2023-01-22 18:53:12.028149: step: 916/459, loss: 0.006074234377592802 2023-01-22 18:53:12.628171: step: 918/459, loss: 0.004306357819586992 2023-01-22 18:53:13.061004: step: 920/459, loss: 0.014717256650328636 ================================================== Loss: 0.052 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3031091638513514, 'r': 0.3404945445920304, 'f1': 0.3207160411081323}, 'combined': 0.23631708292178166, 'epoch': 35} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31990584033512615, 'r': 0.3082729006865761, 'f1': 0.31398165810669787}, 'combined': 0.2009482611882866, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2958107052521946, 'r': 0.342960798688977, 'f1': 0.3176455903498961}, 'combined': 0.23405464552097605, 'epoch': 35} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3329383757272868, 'r': 0.3202261831995177, 'f1': 0.32645857416076873}, 'combined': 0.20893348746289195, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30968851035367123, 'r': 0.33554485656915806, 'f1': 0.3220986145937091}, 'combined': 0.23733582127957512, 'epoch': 35} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.329600775670264, 'r': 0.32599529772309616, 'f1': 0.3277881224585119}, 'combined': 0.23501789912119725, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20606060606060603, 'r': 0.32380952380952377, 'f1': 0.25185185185185177}, 'combined': 0.16790123456790118, 'epoch': 35} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23125, 'r': 0.40217391304347827, 'f1': 0.29365079365079366}, 'combined': 0.14682539682539683, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.20689655172413793, 'f1': 0.24489795918367346}, 'combined': 0.16326530612244897, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 18:55:46.320095: step: 2/459, loss: 0.012336032465100288 2023-01-22 18:55:46.884888: step: 4/459, loss: 0.05542987957596779 2023-01-22 18:55:47.486064: step: 6/459, loss: 0.008785977959632874 2023-01-22 18:55:48.075690: step: 8/459, loss: 0.000804823124781251 2023-01-22 18:55:48.708388: step: 10/459, loss: 0.010614896193146706 2023-01-22 18:55:49.297184: step: 12/459, loss: 0.006036417558789253 2023-01-22 18:55:49.876929: step: 14/459, loss: 0.0010666678426787257 2023-01-22 18:55:50.494759: step: 16/459, loss: 0.01930234208703041 2023-01-22 18:55:51.112106: step: 18/459, loss: 0.0029704703483730555 2023-01-22 18:55:51.687517: step: 20/459, loss: 0.013860042206943035 2023-01-22 18:55:52.337276: step: 22/459, loss: 0.0077528259716928005 2023-01-22 18:55:52.914873: step: 24/459, loss: 0.01643790677189827 2023-01-22 18:55:53.536216: step: 26/459, loss: 0.0027700127102434635 2023-01-22 18:55:54.170190: step: 28/459, loss: 0.00020960158144589514 2023-01-22 18:55:54.733647: step: 30/459, loss: 0.005829653702676296 2023-01-22 18:55:55.380409: step: 32/459, loss: 0.006737781222909689 2023-01-22 18:55:56.003563: step: 34/459, loss: 0.015986133366823196 2023-01-22 18:55:56.609876: step: 36/459, loss: 0.005564133636653423 2023-01-22 18:55:57.210223: step: 38/459, loss: 0.0037983849178999662 2023-01-22 18:55:57.893198: step: 40/459, loss: 0.463188499212265 2023-01-22 18:55:58.546577: step: 42/459, loss: 0.0055553605780005455 2023-01-22 18:55:59.119280: step: 44/459, loss: 0.0003197699843440205 2023-01-22 18:55:59.749817: step: 46/459, loss: 0.057550374418497086 2023-01-22 18:56:00.360379: step: 48/459, loss: 0.009897771291434765 2023-01-22 18:56:00.979894: step: 50/459, loss: 0.011782685294747353 2023-01-22 18:56:01.576260: step: 52/459, loss: 0.07509036362171173 2023-01-22 18:56:02.243685: step: 54/459, loss: 0.030746156349778175 2023-01-22 18:56:02.849715: step: 56/459, loss: 0.0005779852508567274 2023-01-22 18:56:03.456010: step: 58/459, loss: 0.01710657589137554 2023-01-22 18:56:04.074578: step: 60/459, loss: 0.008371301926672459 2023-01-22 18:56:04.675429: step: 62/459, loss: 0.001102784532122314 2023-01-22 18:56:05.190168: step: 64/459, loss: 0.0012096891878172755 2023-01-22 18:56:05.807552: step: 66/459, loss: 0.010059782303869724 2023-01-22 18:56:06.350555: step: 68/459, loss: 0.0007638441165909171 2023-01-22 18:56:06.925911: step: 70/459, loss: 0.026365969330072403 2023-01-22 18:56:07.541707: step: 72/459, loss: 0.0014245775528252125 2023-01-22 18:56:08.144596: step: 74/459, loss: 0.006676665507256985 2023-01-22 18:56:08.805288: step: 76/459, loss: 0.028245145455002785 2023-01-22 18:56:09.404528: step: 78/459, loss: 0.0007604363490827382 2023-01-22 18:56:09.969642: step: 80/459, loss: 0.0010516246547922492 2023-01-22 18:56:10.632405: step: 82/459, loss: 0.0003114467835985124 2023-01-22 18:56:11.311250: step: 84/459, loss: 0.004697097931057215 2023-01-22 18:56:11.963230: step: 86/459, loss: 6.794723049097229e-06 2023-01-22 18:56:12.535247: step: 88/459, loss: 0.0006078635342419147 2023-01-22 18:56:13.098086: step: 90/459, loss: 0.00022669023019261658 2023-01-22 18:56:13.727377: step: 92/459, loss: 0.00022174054174683988 2023-01-22 18:56:14.278574: step: 94/459, loss: 0.014870654791593552 2023-01-22 18:56:14.822556: step: 96/459, loss: 0.24859409034252167 2023-01-22 18:56:15.396470: step: 98/459, loss: 0.0027933756355196238 2023-01-22 18:56:16.019224: step: 100/459, loss: 0.0029999185353517532 2023-01-22 18:56:16.627713: step: 102/459, loss: 0.00047668031766079366 2023-01-22 18:56:17.243472: step: 104/459, loss: 0.0033039639238268137 2023-01-22 18:56:17.901305: step: 106/459, loss: 0.011170150712132454 2023-01-22 18:56:18.582316: step: 108/459, loss: 0.0003264856932219118 2023-01-22 18:56:19.198366: step: 110/459, loss: 0.0042407214641571045 2023-01-22 18:56:19.807701: step: 112/459, loss: 0.005600828677415848 2023-01-22 18:56:20.393917: step: 114/459, loss: 0.028548384085297585 2023-01-22 18:56:20.908511: step: 116/459, loss: 0.001401675515808165 2023-01-22 18:56:21.519556: step: 118/459, loss: 0.23201973736286163 2023-01-22 18:56:22.092699: step: 120/459, loss: 0.0001517455675639212 2023-01-22 18:56:22.681807: step: 122/459, loss: 0.003455345518887043 2023-01-22 18:56:23.230096: step: 124/459, loss: 0.04268626496195793 2023-01-22 18:56:23.777925: step: 126/459, loss: 0.0001660830748733133 2023-01-22 18:56:24.305004: step: 128/459, loss: 0.06838731467723846 2023-01-22 18:56:24.985343: step: 130/459, loss: 0.00029747767257504165 2023-01-22 18:56:25.621143: step: 132/459, loss: 0.05230630561709404 2023-01-22 18:56:26.228078: step: 134/459, loss: 0.006733528338372707 2023-01-22 18:56:26.814536: step: 136/459, loss: 0.04543045535683632 2023-01-22 18:56:27.377658: step: 138/459, loss: 0.0021847898606210947 2023-01-22 18:56:28.035797: step: 140/459, loss: 0.03025820292532444 2023-01-22 18:56:28.655943: step: 142/459, loss: 0.0028352083172649145 2023-01-22 18:56:29.247478: step: 144/459, loss: 0.0004633474163711071 2023-01-22 18:56:29.920137: step: 146/459, loss: 0.0009226284455507994 2023-01-22 18:56:30.592649: step: 148/459, loss: 0.14645321667194366 2023-01-22 18:56:31.223883: step: 150/459, loss: 0.005741264671087265 2023-01-22 18:56:31.856567: step: 152/459, loss: 0.0038201010320335627 2023-01-22 18:56:32.416616: step: 154/459, loss: 5.512372081284411e-05 2023-01-22 18:56:32.934056: step: 156/459, loss: 0.01050160638988018 2023-01-22 18:56:33.472539: step: 158/459, loss: 0.0002367976267123595 2023-01-22 18:56:34.113436: step: 160/459, loss: 0.03599337860941887 2023-01-22 18:56:34.705333: step: 162/459, loss: 0.0007884735241532326 2023-01-22 18:56:35.290819: step: 164/459, loss: 0.08395393192768097 2023-01-22 18:56:35.818068: step: 166/459, loss: 0.010635508224368095 2023-01-22 18:56:36.439329: step: 168/459, loss: 0.0010792871471494436 2023-01-22 18:56:37.083420: step: 170/459, loss: 0.0038864652160555124 2023-01-22 18:56:37.678705: step: 172/459, loss: 0.04190342128276825 2023-01-22 18:56:38.281578: step: 174/459, loss: 0.21638502180576324 2023-01-22 18:56:38.899684: step: 176/459, loss: 0.005192434880882502 2023-01-22 18:56:39.462656: step: 178/459, loss: 0.017397629097104073 2023-01-22 18:56:40.062999: step: 180/459, loss: 0.0005195160047151148 2023-01-22 18:56:40.640345: step: 182/459, loss: 0.0027260815259069204 2023-01-22 18:56:41.290980: step: 184/459, loss: 0.0001826797379180789 2023-01-22 18:56:41.864543: step: 186/459, loss: 0.0014885127311572433 2023-01-22 18:56:42.463449: step: 188/459, loss: 0.001363901887089014 2023-01-22 18:56:43.071439: step: 190/459, loss: 0.10379873961210251 2023-01-22 18:56:43.713237: step: 192/459, loss: 0.0018048398196697235 2023-01-22 18:56:44.356118: step: 194/459, loss: 0.009768676944077015 2023-01-22 18:56:44.919588: step: 196/459, loss: 0.009458419866859913 2023-01-22 18:56:45.558617: step: 198/459, loss: 0.02612553909420967 2023-01-22 18:56:46.225162: step: 200/459, loss: 0.00026723084738478065 2023-01-22 18:56:46.769311: step: 202/459, loss: 0.0002661466132849455 2023-01-22 18:56:47.317241: step: 204/459, loss: 0.05153365805745125 2023-01-22 18:56:47.986271: step: 206/459, loss: 0.016306573525071144 2023-01-22 18:56:48.645914: step: 208/459, loss: 0.19415000081062317 2023-01-22 18:56:49.319773: step: 210/459, loss: 0.001877745264209807 2023-01-22 18:56:49.977549: step: 212/459, loss: 0.006924126762896776 2023-01-22 18:56:50.574516: step: 214/459, loss: 0.007908573374152184 2023-01-22 18:56:51.093053: step: 216/459, loss: 0.011734414845705032 2023-01-22 18:56:51.688431: step: 218/459, loss: 0.0035177445970475674 2023-01-22 18:56:52.349502: step: 220/459, loss: 0.011814429424703121 2023-01-22 18:56:52.993065: step: 222/459, loss: 0.0003552162670530379 2023-01-22 18:56:53.664853: step: 224/459, loss: 0.003047834150493145 2023-01-22 18:56:54.322004: step: 226/459, loss: 0.051892269402742386 2023-01-22 18:56:54.935637: step: 228/459, loss: 0.014718618243932724 2023-01-22 18:56:55.591291: step: 230/459, loss: 0.006830344907939434 2023-01-22 18:56:56.262834: step: 232/459, loss: 0.0016910785343497992 2023-01-22 18:56:56.854094: step: 234/459, loss: 0.009865974076092243 2023-01-22 18:56:57.457595: step: 236/459, loss: 0.046339910477399826 2023-01-22 18:56:58.071001: step: 238/459, loss: 2.3019465515972115e-06 2023-01-22 18:56:58.650321: step: 240/459, loss: 0.0021118337754160166 2023-01-22 18:56:59.197107: step: 242/459, loss: 0.00041936751222237945 2023-01-22 18:56:59.853497: step: 244/459, loss: 0.015312141738831997 2023-01-22 18:57:00.395684: step: 246/459, loss: 0.018727097660303116 2023-01-22 18:57:01.029080: step: 248/459, loss: 0.003584969323128462 2023-01-22 18:57:01.583319: step: 250/459, loss: 0.005572606343775988 2023-01-22 18:57:02.243962: step: 252/459, loss: 0.001620256225578487 2023-01-22 18:57:02.856655: step: 254/459, loss: 0.014076475985348225 2023-01-22 18:57:03.420664: step: 256/459, loss: 0.001544062397442758 2023-01-22 18:57:03.988495: step: 258/459, loss: 0.029778921976685524 2023-01-22 18:57:04.541974: step: 260/459, loss: 0.0010803245240822434 2023-01-22 18:57:05.122668: step: 262/459, loss: 0.0013275971869006753 2023-01-22 18:57:05.789818: step: 264/459, loss: 0.021097633987665176 2023-01-22 18:57:06.375034: step: 266/459, loss: 0.019267890602350235 2023-01-22 18:57:06.971124: step: 268/459, loss: 0.0009586066589690745 2023-01-22 18:57:07.609117: step: 270/459, loss: 0.006646362133324146 2023-01-22 18:57:08.298221: step: 272/459, loss: 0.018112313002347946 2023-01-22 18:57:08.935158: step: 274/459, loss: 0.044563379138708115 2023-01-22 18:57:09.503241: step: 276/459, loss: 0.034980930387973785 2023-01-22 18:57:10.080653: step: 278/459, loss: 0.001473423559218645 2023-01-22 18:57:10.648395: step: 280/459, loss: 0.007846547290682793 2023-01-22 18:57:11.286091: step: 282/459, loss: 0.020547451451420784 2023-01-22 18:57:11.876068: step: 284/459, loss: 0.0018059483263641596 2023-01-22 18:57:12.491112: step: 286/459, loss: 0.004998123738914728 2023-01-22 18:57:13.065905: step: 288/459, loss: 0.0002674443821888417 2023-01-22 18:57:13.649957: step: 290/459, loss: 0.01802315004169941 2023-01-22 18:57:14.225651: step: 292/459, loss: 0.08386961370706558 2023-01-22 18:57:14.876586: step: 294/459, loss: 0.014062258414924145 2023-01-22 18:57:15.504682: step: 296/459, loss: 0.0020053978078067303 2023-01-22 18:57:16.073425: step: 298/459, loss: 0.0007209135219454765 2023-01-22 18:57:16.697177: step: 300/459, loss: 0.10247762501239777 2023-01-22 18:57:17.322182: step: 302/459, loss: 0.00011642702156677842 2023-01-22 18:57:17.966420: step: 304/459, loss: 0.0021253954619169235 2023-01-22 18:57:18.619358: step: 306/459, loss: 0.14886626601219177 2023-01-22 18:57:19.251747: step: 308/459, loss: 0.030726788565516472 2023-01-22 18:57:19.869485: step: 310/459, loss: 0.013219938613474369 2023-01-22 18:57:20.494924: step: 312/459, loss: 0.09249261766672134 2023-01-22 18:57:21.022266: step: 314/459, loss: 0.0014003575779497623 2023-01-22 18:57:21.602482: step: 316/459, loss: 0.001484519336372614 2023-01-22 18:57:22.195415: step: 318/459, loss: 0.0005955088417977095 2023-01-22 18:57:22.778513: step: 320/459, loss: 0.0007052926812320948 2023-01-22 18:57:23.360127: step: 322/459, loss: 2.109529733657837 2023-01-22 18:57:24.036796: step: 324/459, loss: 0.0022370689548552036 2023-01-22 18:57:24.634263: step: 326/459, loss: 0.006994657218456268 2023-01-22 18:57:25.208610: step: 328/459, loss: 0.003391703823581338 2023-01-22 18:57:25.829470: step: 330/459, loss: 0.00897217821329832 2023-01-22 18:57:26.384210: step: 332/459, loss: 8.492426422890276e-05 2023-01-22 18:57:26.920209: step: 334/459, loss: 0.005440766457468271 2023-01-22 18:57:27.495792: step: 336/459, loss: 0.4395683705806732 2023-01-22 18:57:28.105526: step: 338/459, loss: 0.0004898759070783854 2023-01-22 18:57:28.690891: step: 340/459, loss: 0.005661939736455679 2023-01-22 18:57:29.289290: step: 342/459, loss: 0.002810798119753599 2023-01-22 18:57:29.856908: step: 344/459, loss: 0.010737248696386814 2023-01-22 18:57:30.433417: step: 346/459, loss: 0.0175283495336771 2023-01-22 18:57:31.047212: step: 348/459, loss: 0.036723047494888306 2023-01-22 18:57:31.601178: step: 350/459, loss: 0.0010474277660250664 2023-01-22 18:57:32.186129: step: 352/459, loss: 0.007298881188035011 2023-01-22 18:57:32.755083: step: 354/459, loss: 0.022487569600343704 2023-01-22 18:57:33.419697: step: 356/459, loss: 0.0001323282194789499 2023-01-22 18:57:33.983830: step: 358/459, loss: 0.0019557855557650328 2023-01-22 18:57:34.592176: step: 360/459, loss: 0.0007508830167353153 2023-01-22 18:57:35.130021: step: 362/459, loss: 0.005792820360511541 2023-01-22 18:57:35.717948: step: 364/459, loss: 0.006345278583467007 2023-01-22 18:57:36.343788: step: 366/459, loss: 0.01959369145333767 2023-01-22 18:57:37.008608: step: 368/459, loss: 0.006529917009174824 2023-01-22 18:57:37.597555: step: 370/459, loss: 0.011116444133222103 2023-01-22 18:57:38.174970: step: 372/459, loss: 0.09599024802446365 2023-01-22 18:57:38.871994: step: 374/459, loss: 0.1579362154006958 2023-01-22 18:57:39.486828: step: 376/459, loss: 0.0027162216138094664 2023-01-22 18:57:40.120311: step: 378/459, loss: 0.004287910647690296 2023-01-22 18:57:40.867987: step: 380/459, loss: 0.02779889479279518 2023-01-22 18:57:41.427761: step: 382/459, loss: 0.0007382340845651925 2023-01-22 18:57:42.053126: step: 384/459, loss: 0.005654179956763983 2023-01-22 18:57:42.622679: step: 386/459, loss: 0.0007708391058258712 2023-01-22 18:57:43.197520: step: 388/459, loss: 0.00023936691286507994 2023-01-22 18:57:43.741608: step: 390/459, loss: 0.006039737723767757 2023-01-22 18:57:44.350776: step: 392/459, loss: 0.08389925211668015 2023-01-22 18:57:44.959494: step: 394/459, loss: 0.04548410698771477 2023-01-22 18:57:45.549829: step: 396/459, loss: 0.02217145636677742 2023-01-22 18:57:46.131893: step: 398/459, loss: 0.21014828979969025 2023-01-22 18:57:46.745152: step: 400/459, loss: 0.011504396796226501 2023-01-22 18:57:47.343643: step: 402/459, loss: 0.00299580255523324 2023-01-22 18:57:47.969962: step: 404/459, loss: 0.032884400337934494 2023-01-22 18:57:48.580695: step: 406/459, loss: 0.0019891236443072557 2023-01-22 18:57:49.178942: step: 408/459, loss: 0.012345519848167896 2023-01-22 18:57:49.808423: step: 410/459, loss: 0.0013470490230247378 2023-01-22 18:57:50.429496: step: 412/459, loss: 0.020648375153541565 2023-01-22 18:57:51.064511: step: 414/459, loss: 0.00463608605787158 2023-01-22 18:57:51.658934: step: 416/459, loss: 0.00221062870696187 2023-01-22 18:57:52.315182: step: 418/459, loss: 0.030384687706828117 2023-01-22 18:57:52.861262: step: 420/459, loss: 0.02827196568250656 2023-01-22 18:57:53.389032: step: 422/459, loss: 0.001184134860523045 2023-01-22 18:57:54.011036: step: 424/459, loss: 0.004985291510820389 2023-01-22 18:57:54.578337: step: 426/459, loss: 0.00019678777607623488 2023-01-22 18:57:55.141859: step: 428/459, loss: 0.006948582362383604 2023-01-22 18:57:55.805506: step: 430/459, loss: 0.0011640081647783518 2023-01-22 18:57:56.393173: step: 432/459, loss: 0.022255703806877136 2023-01-22 18:57:56.985353: step: 434/459, loss: 0.012956257909536362 2023-01-22 18:57:57.551664: step: 436/459, loss: 0.2043738067150116 2023-01-22 18:57:58.090779: step: 438/459, loss: 0.0001714728568913415 2023-01-22 18:57:58.657756: step: 440/459, loss: 0.00011769444972742349 2023-01-22 18:57:59.286302: step: 442/459, loss: 4.072632691531908e-06 2023-01-22 18:57:59.886076: step: 444/459, loss: 0.02533719688653946 2023-01-22 18:58:00.504500: step: 446/459, loss: 1.6532132625579834 2023-01-22 18:58:01.099400: step: 448/459, loss: 0.005655759479850531 2023-01-22 18:58:01.691134: step: 450/459, loss: 0.006746772211045027 2023-01-22 18:58:02.325653: step: 452/459, loss: 7.885936793172732e-05 2023-01-22 18:58:02.962311: step: 454/459, loss: 0.015435861423611641 2023-01-22 18:58:03.598015: step: 456/459, loss: 0.006196838337928057 2023-01-22 18:58:04.195931: step: 458/459, loss: 0.004106833599507809 2023-01-22 18:58:04.875890: step: 460/459, loss: 0.04261523857712746 2023-01-22 18:58:05.476006: step: 462/459, loss: 0.02409670688211918 2023-01-22 18:58:06.110609: step: 464/459, loss: 0.009259517304599285 2023-01-22 18:58:06.765206: step: 466/459, loss: 0.008641015738248825 2023-01-22 18:58:07.414556: step: 468/459, loss: 0.0028009535744786263 2023-01-22 18:58:08.077092: step: 470/459, loss: 0.00500455079600215 2023-01-22 18:58:08.716942: step: 472/459, loss: 0.08476634323596954 2023-01-22 18:58:09.331317: step: 474/459, loss: 0.04731924459338188 2023-01-22 18:58:09.874346: step: 476/459, loss: 0.0005092010833323002 2023-01-22 18:58:10.480363: step: 478/459, loss: 0.30268681049346924 2023-01-22 18:58:11.126838: step: 480/459, loss: 0.00171946850605309 2023-01-22 18:58:11.708346: step: 482/459, loss: 0.01599387638270855 2023-01-22 18:58:12.314455: step: 484/459, loss: 0.32543498277664185 2023-01-22 18:58:12.916900: step: 486/459, loss: 0.0055351052433252335 2023-01-22 18:58:13.516430: step: 488/459, loss: 0.011890685185790062 2023-01-22 18:58:14.087522: step: 490/459, loss: 0.005521407350897789 2023-01-22 18:58:14.763612: step: 492/459, loss: 0.006573499646037817 2023-01-22 18:58:15.330422: step: 494/459, loss: 0.0002728884282987565 2023-01-22 18:58:15.898357: step: 496/459, loss: 0.0008656036807224154 2023-01-22 18:58:16.460442: step: 498/459, loss: 0.009391429834067822 2023-01-22 18:58:17.058154: step: 500/459, loss: 0.007876125164330006 2023-01-22 18:58:17.681287: step: 502/459, loss: 0.023953400552272797 2023-01-22 18:58:18.332460: step: 504/459, loss: 0.00251001282595098 2023-01-22 18:58:18.938522: step: 506/459, loss: 0.004769584164023399 2023-01-22 18:58:19.570921: step: 508/459, loss: 0.00175184546969831 2023-01-22 18:58:20.164374: step: 510/459, loss: 0.002679515862837434 2023-01-22 18:58:20.752594: step: 512/459, loss: 0.009569961577653885 2023-01-22 18:58:21.377435: step: 514/459, loss: 4.0450850065099075e-05 2023-01-22 18:58:21.966710: step: 516/459, loss: 0.0032633452210575342 2023-01-22 18:58:22.593589: step: 518/459, loss: 0.0021554369013756514 2023-01-22 18:58:23.226669: step: 520/459, loss: 0.008984297513961792 2023-01-22 18:58:23.789480: step: 522/459, loss: 0.0001928472047438845 2023-01-22 18:58:24.316443: step: 524/459, loss: 0.0002908246242441237 2023-01-22 18:58:24.898369: step: 526/459, loss: 0.0013939962955191731 2023-01-22 18:58:25.504127: step: 528/459, loss: 0.028057033196091652 2023-01-22 18:58:26.158522: step: 530/459, loss: 0.02643798664212227 2023-01-22 18:58:26.803410: step: 532/459, loss: 0.059709079563617706 2023-01-22 18:58:27.413299: step: 534/459, loss: 0.016459785401821136 2023-01-22 18:58:28.036892: step: 536/459, loss: 0.022442683577537537 2023-01-22 18:58:28.601477: step: 538/459, loss: 0.026172123849391937 2023-01-22 18:58:29.209189: step: 540/459, loss: 0.0012992434203624725 2023-01-22 18:58:29.856651: step: 542/459, loss: 0.006321974564343691 2023-01-22 18:58:30.504917: step: 544/459, loss: 0.1641024798154831 2023-01-22 18:58:31.128411: step: 546/459, loss: 0.0040565235540270805 2023-01-22 18:58:31.714071: step: 548/459, loss: 0.0003739072708413005 2023-01-22 18:58:32.316477: step: 550/459, loss: 0.01742553897202015 2023-01-22 18:58:33.070039: step: 552/459, loss: 0.013202657923102379 2023-01-22 18:58:33.760052: step: 554/459, loss: 0.1283104568719864 2023-01-22 18:58:34.349193: step: 556/459, loss: 0.05233766883611679 2023-01-22 18:58:34.900556: step: 558/459, loss: 0.00013026311353314668 2023-01-22 18:58:35.497359: step: 560/459, loss: 0.2509117126464844 2023-01-22 18:58:36.052324: step: 562/459, loss: 0.0040754834190011024 2023-01-22 18:58:36.723677: step: 564/459, loss: 0.005850822664797306 2023-01-22 18:58:37.342649: step: 566/459, loss: 0.0034382552839815617 2023-01-22 18:58:37.968361: step: 568/459, loss: 0.05515662208199501 2023-01-22 18:58:38.612909: step: 570/459, loss: 0.01832149177789688 2023-01-22 18:58:39.250815: step: 572/459, loss: 0.005146431270986795 2023-01-22 18:58:39.913483: step: 574/459, loss: 0.00010457808093633503 2023-01-22 18:58:40.532618: step: 576/459, loss: 0.0017492971383035183 2023-01-22 18:58:41.115812: step: 578/459, loss: 0.0027319598011672497 2023-01-22 18:58:41.689616: step: 580/459, loss: 0.0022562069352716208 2023-01-22 18:58:42.317350: step: 582/459, loss: 0.0006985919317230582 2023-01-22 18:58:42.971382: step: 584/459, loss: 0.007851464673876762 2023-01-22 18:58:43.551496: step: 586/459, loss: 0.0056243655271828175 2023-01-22 18:58:44.291541: step: 588/459, loss: 0.0034310659393668175 2023-01-22 18:58:44.811620: step: 590/459, loss: 0.011628594249486923 2023-01-22 18:58:45.369102: step: 592/459, loss: 0.0018461111467331648 2023-01-22 18:58:46.066784: step: 594/459, loss: 0.0077388230711221695 2023-01-22 18:58:46.680097: step: 596/459, loss: 1.863702345872298e-05 2023-01-22 18:58:47.284228: step: 598/459, loss: 0.001161194290034473 2023-01-22 18:58:47.885745: step: 600/459, loss: 0.0009276464115828276 2023-01-22 18:58:48.509069: step: 602/459, loss: 0.0006895385449752212 2023-01-22 18:58:49.143875: step: 604/459, loss: 0.01640535332262516 2023-01-22 18:58:49.876728: step: 606/459, loss: 0.08794116973876953 2023-01-22 18:58:50.545096: step: 608/459, loss: 0.48446783423423767 2023-01-22 18:58:51.181173: step: 610/459, loss: 0.010279154404997826 2023-01-22 18:58:51.727416: step: 612/459, loss: 0.0038739521987736225 2023-01-22 18:58:52.310126: step: 614/459, loss: 0.003095984226092696 2023-01-22 18:58:52.916247: step: 616/459, loss: 0.0004535072948783636 2023-01-22 18:58:53.530171: step: 618/459, loss: 0.006185912527143955 2023-01-22 18:58:54.086981: step: 620/459, loss: 0.020640280097723007 2023-01-22 18:58:54.660705: step: 622/459, loss: 0.0014994689263403416 2023-01-22 18:58:55.264718: step: 624/459, loss: 0.019553063437342644 2023-01-22 18:58:55.795065: step: 626/459, loss: 0.0072066388092935085 2023-01-22 18:58:56.376062: step: 628/459, loss: 0.019646363332867622 2023-01-22 18:58:57.027216: step: 630/459, loss: 0.010284202173352242 2023-01-22 18:58:57.662122: step: 632/459, loss: 0.23039740324020386 2023-01-22 18:58:58.212366: step: 634/459, loss: 0.009568098932504654 2023-01-22 18:58:58.801851: step: 636/459, loss: 0.0031247057486325502 2023-01-22 18:58:59.391044: step: 638/459, loss: 0.027061741799116135 2023-01-22 18:58:59.978905: step: 640/459, loss: 0.00045662239426746964 2023-01-22 18:59:00.634566: step: 642/459, loss: 0.014384963549673557 2023-01-22 18:59:01.360567: step: 644/459, loss: 0.017218708992004395 2023-01-22 18:59:01.874862: step: 646/459, loss: 0.004960222169756889 2023-01-22 18:59:02.486221: step: 648/459, loss: 0.0014191081281751394 2023-01-22 18:59:03.133917: step: 650/459, loss: 0.01039410661906004 2023-01-22 18:59:03.689819: step: 652/459, loss: 0.0025404279585927725 2023-01-22 18:59:04.272779: step: 654/459, loss: 0.0057366034016013145 2023-01-22 18:59:04.883355: step: 656/459, loss: 0.0006540220929309726 2023-01-22 18:59:05.441738: step: 658/459, loss: 0.0011738052126020193 2023-01-22 18:59:06.010097: step: 660/459, loss: 0.011684047058224678 2023-01-22 18:59:06.693296: step: 662/459, loss: 1.1977400390605908e-05 2023-01-22 18:59:07.293309: step: 664/459, loss: 0.20000554621219635 2023-01-22 18:59:07.871163: step: 666/459, loss: 0.000647523847874254 2023-01-22 18:59:08.413807: step: 668/459, loss: 0.00386466970667243 2023-01-22 18:59:09.009532: step: 670/459, loss: 0.0988989844918251 2023-01-22 18:59:09.707606: step: 672/459, loss: 0.20930136740207672 2023-01-22 18:59:10.297332: step: 674/459, loss: 0.0007626981823705137 2023-01-22 18:59:10.900051: step: 676/459, loss: 0.002135002752766013 2023-01-22 18:59:11.490191: step: 678/459, loss: 0.015808792784810066 2023-01-22 18:59:12.062569: step: 680/459, loss: 6.609495903830975e-05 2023-01-22 18:59:12.683831: step: 682/459, loss: 0.036694370210170746 2023-01-22 18:59:13.271289: step: 684/459, loss: 0.007617215160280466 2023-01-22 18:59:13.899703: step: 686/459, loss: 0.012505629099905491 2023-01-22 18:59:14.467670: step: 688/459, loss: 0.00021479300630744547 2023-01-22 18:59:15.072560: step: 690/459, loss: 0.0024425105657428503 2023-01-22 18:59:15.668913: step: 692/459, loss: 0.022554486989974976 2023-01-22 18:59:16.254739: step: 694/459, loss: 7.078263759613037 2023-01-22 18:59:16.844129: step: 696/459, loss: 0.007949194870889187 2023-01-22 18:59:17.452422: step: 698/459, loss: 0.0016955556347966194 2023-01-22 18:59:18.084552: step: 700/459, loss: 0.0012106643989682198 2023-01-22 18:59:18.708450: step: 702/459, loss: 0.005232302937656641 2023-01-22 18:59:19.351944: step: 704/459, loss: 0.0018387401942163706 2023-01-22 18:59:19.968178: step: 706/459, loss: 0.0008397063938900828 2023-01-22 18:59:20.562394: step: 708/459, loss: 0.021649247035384178 2023-01-22 18:59:21.202834: step: 710/459, loss: 0.001535501447506249 2023-01-22 18:59:21.768884: step: 712/459, loss: 0.0021521453745663166 2023-01-22 18:59:22.384684: step: 714/459, loss: 0.000822395842988044 2023-01-22 18:59:23.022258: step: 716/459, loss: 0.0030233620200306177 2023-01-22 18:59:23.608422: step: 718/459, loss: 0.007164048962295055 2023-01-22 18:59:24.207895: step: 720/459, loss: 0.004350057803094387 2023-01-22 18:59:24.856136: step: 722/459, loss: 0.00928383320569992 2023-01-22 18:59:25.447628: step: 724/459, loss: 0.00833914428949356 2023-01-22 18:59:26.137720: step: 726/459, loss: 0.008826087228953838 2023-01-22 18:59:26.789627: step: 728/459, loss: 0.01961175538599491 2023-01-22 18:59:27.456525: step: 730/459, loss: 0.027430254966020584 2023-01-22 18:59:28.004483: step: 732/459, loss: 1.0048873264167923e-05 2023-01-22 18:59:28.594585: step: 734/459, loss: 0.007422564551234245 2023-01-22 18:59:29.140952: step: 736/459, loss: 0.005092230159789324 2023-01-22 18:59:29.764310: step: 738/459, loss: 9.220204810844734e-05 2023-01-22 18:59:30.343756: step: 740/459, loss: 0.01234501414000988 2023-01-22 18:59:30.948638: step: 742/459, loss: 3.1019670132081956e-05 2023-01-22 18:59:31.542798: step: 744/459, loss: 0.04812059551477432 2023-01-22 18:59:32.152192: step: 746/459, loss: 0.020416956394910812 2023-01-22 18:59:32.749700: step: 748/459, loss: 0.007533850148320198 2023-01-22 18:59:33.367305: step: 750/459, loss: 0.05170911177992821 2023-01-22 18:59:33.981596: step: 752/459, loss: 0.00012592376151587814 2023-01-22 18:59:34.552915: step: 754/459, loss: 0.0009537444566376507 2023-01-22 18:59:35.124801: step: 756/459, loss: 0.00029687685309909284 2023-01-22 18:59:35.787949: step: 758/459, loss: 0.0022095334716141224 2023-01-22 18:59:36.359045: step: 760/459, loss: 6.023692185408436e-05 2023-01-22 18:59:36.986709: step: 762/459, loss: 0.001378800836391747 2023-01-22 18:59:37.581462: step: 764/459, loss: 0.0010938206687569618 2023-01-22 18:59:38.148620: step: 766/459, loss: 0.0013576161582022905 2023-01-22 18:59:38.709519: step: 768/459, loss: 0.00018524315964896232 2023-01-22 18:59:39.294835: step: 770/459, loss: 8.077456004684791e-05 2023-01-22 18:59:39.921116: step: 772/459, loss: 0.0004197026137262583 2023-01-22 18:59:40.532635: step: 774/459, loss: 0.0004784521588589996 2023-01-22 18:59:41.144265: step: 776/459, loss: 0.005477396305650473 2023-01-22 18:59:41.780052: step: 778/459, loss: 0.0524415597319603 2023-01-22 18:59:42.383333: step: 780/459, loss: 0.0012861441355198622 2023-01-22 18:59:43.064940: step: 782/459, loss: 0.00328316749073565 2023-01-22 18:59:43.693005: step: 784/459, loss: 0.037710197269916534 2023-01-22 18:59:44.322628: step: 786/459, loss: 0.008453216403722763 2023-01-22 18:59:45.044716: step: 788/459, loss: 0.007729657925665379 2023-01-22 18:59:45.670086: step: 790/459, loss: 0.052602000534534454 2023-01-22 18:59:46.294206: step: 792/459, loss: 0.0009555204887874424 2023-01-22 18:59:46.850189: step: 794/459, loss: 0.24523159861564636 2023-01-22 18:59:47.386042: step: 796/459, loss: 0.015927739441394806 2023-01-22 18:59:48.001143: step: 798/459, loss: 0.027439886704087257 2023-01-22 18:59:48.621783: step: 800/459, loss: 0.0003467523492872715 2023-01-22 18:59:49.248439: step: 802/459, loss: 0.012824086472392082 2023-01-22 18:59:49.802912: step: 804/459, loss: 0.08120985329151154 2023-01-22 18:59:50.421096: step: 806/459, loss: 0.026600569486618042 2023-01-22 18:59:50.991432: step: 808/459, loss: 0.06844249367713928 2023-01-22 18:59:51.566710: step: 810/459, loss: 0.000148155857459642 2023-01-22 18:59:52.189183: step: 812/459, loss: 0.03261023014783859 2023-01-22 18:59:52.801614: step: 814/459, loss: 0.0063739949837327 2023-01-22 18:59:53.326067: step: 816/459, loss: 0.0010005709482356906 2023-01-22 18:59:53.899932: step: 818/459, loss: 0.028723331168293953 2023-01-22 18:59:54.543338: step: 820/459, loss: 0.023854052647948265 2023-01-22 18:59:55.197025: step: 822/459, loss: 0.005133442580699921 2023-01-22 18:59:55.802236: step: 824/459, loss: 0.010812719352543354 2023-01-22 18:59:56.402881: step: 826/459, loss: 0.028064018115401268 2023-01-22 18:59:56.984159: step: 828/459, loss: 0.020584523677825928 2023-01-22 18:59:57.544490: step: 830/459, loss: 0.0017664688639342785 2023-01-22 18:59:58.133805: step: 832/459, loss: 0.007128015626221895 2023-01-22 18:59:58.756798: step: 834/459, loss: 0.00604555569589138 2023-01-22 18:59:59.360501: step: 836/459, loss: 0.002949119545519352 2023-01-22 18:59:59.970823: step: 838/459, loss: 0.008766476996243 2023-01-22 19:00:00.637858: step: 840/459, loss: 0.07124187797307968 2023-01-22 19:00:01.237852: step: 842/459, loss: 6.780996773159131e-05 2023-01-22 19:00:01.845823: step: 844/459, loss: 0.0012211039429530501 2023-01-22 19:00:02.417877: step: 846/459, loss: 0.00017686911451164633 2023-01-22 19:00:03.009480: step: 848/459, loss: 0.013934226706624031 2023-01-22 19:00:03.730845: step: 850/459, loss: 0.037994954735040665 2023-01-22 19:00:04.404954: step: 852/459, loss: 0.006349512375891209 2023-01-22 19:00:05.034283: step: 854/459, loss: 0.02300824411213398 2023-01-22 19:00:05.601487: step: 856/459, loss: 0.0004445640661288053 2023-01-22 19:00:06.247736: step: 858/459, loss: 0.024581076577305794 2023-01-22 19:00:06.859162: step: 860/459, loss: 0.005287088919430971 2023-01-22 19:00:07.486843: step: 862/459, loss: 0.01626964472234249 2023-01-22 19:00:08.107735: step: 864/459, loss: 0.08433536440134048 2023-01-22 19:00:08.752997: step: 866/459, loss: 0.010400758124887943 2023-01-22 19:00:09.419960: step: 868/459, loss: 0.18331864476203918 2023-01-22 19:00:09.990147: step: 870/459, loss: 0.00244311336427927 2023-01-22 19:00:10.556387: step: 872/459, loss: 0.0020521911792457104 2023-01-22 19:00:11.127280: step: 874/459, loss: 0.013464580290019512 2023-01-22 19:00:11.730975: step: 876/459, loss: 6.203623343026266e-05 2023-01-22 19:00:12.316413: step: 878/459, loss: 0.009263000451028347 2023-01-22 19:00:12.936777: step: 880/459, loss: 0.000301990716252476 2023-01-22 19:00:13.511885: step: 882/459, loss: 0.0012923774775117636 2023-01-22 19:00:14.128133: step: 884/459, loss: 0.05961879342794418 2023-01-22 19:00:14.715054: step: 886/459, loss: 0.01381315104663372 2023-01-22 19:00:15.315825: step: 888/459, loss: 0.03881435468792915 2023-01-22 19:00:15.916643: step: 890/459, loss: 0.03959501534700394 2023-01-22 19:00:16.557760: step: 892/459, loss: 0.010878299362957478 2023-01-22 19:00:17.210751: step: 894/459, loss: 0.08217725157737732 2023-01-22 19:00:17.828602: step: 896/459, loss: 0.001766720786690712 2023-01-22 19:00:18.421469: step: 898/459, loss: 0.005945147480815649 2023-01-22 19:00:19.088240: step: 900/459, loss: 0.041338540613651276 2023-01-22 19:00:19.657780: step: 902/459, loss: 0.00017694836424198002 2023-01-22 19:00:20.269351: step: 904/459, loss: 0.008113319985568523 2023-01-22 19:00:20.885044: step: 906/459, loss: 0.01844905875623226 2023-01-22 19:00:21.449043: step: 908/459, loss: 0.005742263048887253 2023-01-22 19:00:22.046909: step: 910/459, loss: 0.01944614201784134 2023-01-22 19:00:22.619432: step: 912/459, loss: 0.0022548269480466843 2023-01-22 19:00:23.351414: step: 914/459, loss: 0.05790354683995247 2023-01-22 19:00:24.013255: step: 916/459, loss: 0.0015368181047961116 2023-01-22 19:00:24.609508: step: 918/459, loss: 0.0676867738366127 2023-01-22 19:00:25.011563: step: 920/459, loss: 8.040957254706882e-06 ================================================== Loss: 0.048 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3087178859387307, 'r': 0.3479666494261974, 'f1': 0.3271693563739626}, 'combined': 0.24107215732818296, 'epoch': 36} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3250231276194629, 'r': 0.304653830793175, 'f1': 0.31450901800589376}, 'combined': 0.20128577152377197, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3053525610567329, 'r': 0.35865889050117206, 'f1': 0.32986603018170624}, 'combined': 0.2430591801338888, 'epoch': 36} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33001601349218923, 'r': 0.31051506724037803, 'f1': 0.31996868755448793}, 'combined': 0.20477996003487225, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31737153472250396, 'r': 0.3432671248421769, 'f1': 0.3298118045429849}, 'combined': 0.24301922440009413, 'epoch': 36} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3357391184332354, 'r': 0.3189062547013959, 'f1': 0.3271062752757655}, 'combined': 0.23452902755620927, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22839506172839505, 'r': 0.35238095238095235, 'f1': 0.27715355805243447}, 'combined': 0.18476903870162298, 'epoch': 36} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24342105263157895, 'r': 0.40217391304347827, 'f1': 0.30327868852459017}, 'combined': 0.15163934426229508, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2777777777777778, 'r': 0.1724137931034483, 'f1': 0.21276595744680854}, 'combined': 0.14184397163120568, 'epoch': 36} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 19:02:58.929668: step: 2/459, loss: 0.04228035733103752 2023-01-22 19:02:59.555561: step: 4/459, loss: 0.0038846053648740053 2023-01-22 19:03:00.156925: step: 6/459, loss: 0.025582972913980484 2023-01-22 19:03:00.715311: step: 8/459, loss: 0.0035098963417112827 2023-01-22 19:03:01.437309: step: 10/459, loss: 0.022690994665026665 2023-01-22 19:03:02.042106: step: 12/459, loss: 0.025223635137081146 2023-01-22 19:03:02.636602: step: 14/459, loss: 0.008831229992210865 2023-01-22 19:03:03.217179: step: 16/459, loss: 0.00040248516597785056 2023-01-22 19:03:03.819129: step: 18/459, loss: 0.0026567894965410233 2023-01-22 19:03:04.400136: step: 20/459, loss: 0.0007844538777135313 2023-01-22 19:03:04.971945: step: 22/459, loss: 0.0014698364539071918 2023-01-22 19:03:05.595355: step: 24/459, loss: 0.009693985804915428 2023-01-22 19:03:06.136492: step: 26/459, loss: 0.0017960654804483056 2023-01-22 19:03:06.772678: step: 28/459, loss: 0.0003610205021686852 2023-01-22 19:03:07.362753: step: 30/459, loss: 0.00419809203594923 2023-01-22 19:03:08.019708: step: 32/459, loss: 0.014579851180315018 2023-01-22 19:03:08.621625: step: 34/459, loss: 0.0010669735493138433 2023-01-22 19:03:09.256919: step: 36/459, loss: 0.1992916315793991 2023-01-22 19:03:09.873605: step: 38/459, loss: 0.0007534954347647727 2023-01-22 19:03:10.486447: step: 40/459, loss: 0.00491641741245985 2023-01-22 19:03:11.091180: step: 42/459, loss: 0.00025541658396832645 2023-01-22 19:03:11.721513: step: 44/459, loss: 0.09365145117044449 2023-01-22 19:03:12.284757: step: 46/459, loss: 9.963619959307835e-05 2023-01-22 19:03:12.914673: step: 48/459, loss: 0.00040259951492771506 2023-01-22 19:03:13.569261: step: 50/459, loss: 0.0004827782395295799 2023-01-22 19:03:14.172883: step: 52/459, loss: 0.018429027870297432 2023-01-22 19:03:14.840549: step: 54/459, loss: 0.003620343515649438 2023-01-22 19:03:15.413289: step: 56/459, loss: 0.01748562790453434 2023-01-22 19:03:16.027612: step: 58/459, loss: 0.03313051164150238 2023-01-22 19:03:16.667786: step: 60/459, loss: 0.00368084036745131 2023-01-22 19:03:17.260940: step: 62/459, loss: 0.003827727632597089 2023-01-22 19:03:17.806214: step: 64/459, loss: 0.004660498816519976 2023-01-22 19:03:18.423397: step: 66/459, loss: 0.0005812778254039586 2023-01-22 19:03:19.068727: step: 68/459, loss: 0.007208652328699827 2023-01-22 19:03:19.662495: step: 70/459, loss: 0.0040422603487968445 2023-01-22 19:03:20.231989: step: 72/459, loss: 0.0036394773051142693 2023-01-22 19:03:20.818063: step: 74/459, loss: 0.024823136627674103 2023-01-22 19:03:21.456358: step: 76/459, loss: 0.0007239109254442155 2023-01-22 19:03:22.044404: step: 78/459, loss: 0.005129151977598667 2023-01-22 19:03:22.644930: step: 80/459, loss: 0.0008204577025026083 2023-01-22 19:03:23.212678: step: 82/459, loss: 0.007780921645462513 2023-01-22 19:03:23.840879: step: 84/459, loss: 0.0897790789604187 2023-01-22 19:03:24.501267: step: 86/459, loss: 0.0015222450019791722 2023-01-22 19:03:25.202764: step: 88/459, loss: 0.003219345584511757 2023-01-22 19:03:25.820708: step: 90/459, loss: 0.010653126053512096 2023-01-22 19:03:26.470878: step: 92/459, loss: 0.022137148305773735 2023-01-22 19:03:27.103498: step: 94/459, loss: 0.016319530084729195 2023-01-22 19:03:27.759510: step: 96/459, loss: 0.0046202801167964935 2023-01-22 19:03:28.370487: step: 98/459, loss: 0.00017464558186475188 2023-01-22 19:03:29.072331: step: 100/459, loss: 0.029964039102196693 2023-01-22 19:03:29.648151: step: 102/459, loss: 0.002702499506995082 2023-01-22 19:03:30.204512: step: 104/459, loss: 0.0005523118888959289 2023-01-22 19:03:30.762513: step: 106/459, loss: 0.014975214377045631 2023-01-22 19:03:31.427774: step: 108/459, loss: 0.04735809192061424 2023-01-22 19:03:32.037522: step: 110/459, loss: 0.0007017138414084911 2023-01-22 19:03:32.625981: step: 112/459, loss: 0.0020540866535156965 2023-01-22 19:03:33.233885: step: 114/459, loss: 0.0025385739281773567 2023-01-22 19:03:33.834523: step: 116/459, loss: 0.00987279787659645 2023-01-22 19:03:34.399180: step: 118/459, loss: 0.0018584128702059388 2023-01-22 19:03:35.018631: step: 120/459, loss: 0.0016877055168151855 2023-01-22 19:03:35.613713: step: 122/459, loss: 0.0008298775064758956 2023-01-22 19:03:36.206304: step: 124/459, loss: 0.0007589836022816598 2023-01-22 19:03:36.936500: step: 126/459, loss: 0.03600279614329338 2023-01-22 19:03:37.539971: step: 128/459, loss: 0.0008409405127167702 2023-01-22 19:03:38.139865: step: 130/459, loss: 0.011020823381841183 2023-01-22 19:03:38.754476: step: 132/459, loss: 0.012343471869826317 2023-01-22 19:03:39.309309: step: 134/459, loss: 0.006897162180393934 2023-01-22 19:03:39.950884: step: 136/459, loss: 0.002032047603279352 2023-01-22 19:03:40.584302: step: 138/459, loss: 0.004747317638248205 2023-01-22 19:03:41.216506: step: 140/459, loss: 0.008500872179865837 2023-01-22 19:03:41.859292: step: 142/459, loss: 0.013406259939074516 2023-01-22 19:03:42.504050: step: 144/459, loss: 0.14109563827514648 2023-01-22 19:03:43.077523: step: 146/459, loss: 0.0017654045950621367 2023-01-22 19:03:43.642210: step: 148/459, loss: 0.0004312744422350079 2023-01-22 19:03:44.231709: step: 150/459, loss: 0.0012573694111779332 2023-01-22 19:03:44.844946: step: 152/459, loss: 0.031973909586668015 2023-01-22 19:03:45.414961: step: 154/459, loss: 0.0005623747711069882 2023-01-22 19:03:46.071672: step: 156/459, loss: 0.00033166224602609873 2023-01-22 19:03:46.688307: step: 158/459, loss: 0.001098228502087295 2023-01-22 19:03:47.248395: step: 160/459, loss: 1.37543429445941e-05 2023-01-22 19:03:47.783177: step: 162/459, loss: 0.080680251121521 2023-01-22 19:03:48.440290: step: 164/459, loss: 0.004193190950900316 2023-01-22 19:03:49.033055: step: 166/459, loss: 0.005388184450566769 2023-01-22 19:03:49.717782: step: 168/459, loss: 0.0010195632930845022 2023-01-22 19:03:50.321188: step: 170/459, loss: 0.05665628984570503 2023-01-22 19:03:50.909927: step: 172/459, loss: 0.016035662963986397 2023-01-22 19:03:51.554673: step: 174/459, loss: 0.001242509693838656 2023-01-22 19:03:52.152501: step: 176/459, loss: 0.0030239985790103674 2023-01-22 19:03:52.722667: step: 178/459, loss: 0.0002450550382491201 2023-01-22 19:03:53.442078: step: 180/459, loss: 0.04243532195687294 2023-01-22 19:03:54.031694: step: 182/459, loss: 0.002694344148039818 2023-01-22 19:03:54.591832: step: 184/459, loss: 0.0026956035289913416 2023-01-22 19:03:55.254152: step: 186/459, loss: 0.0003159334301017225 2023-01-22 19:03:55.885361: step: 188/459, loss: 0.05519768223166466 2023-01-22 19:03:56.447800: step: 190/459, loss: 0.0013561123050749302 2023-01-22 19:03:57.130664: step: 192/459, loss: 0.02277420274913311 2023-01-22 19:03:57.743529: step: 194/459, loss: 0.038476526737213135 2023-01-22 19:03:58.386319: step: 196/459, loss: 0.0031273229978978634 2023-01-22 19:03:58.980372: step: 198/459, loss: 0.005835122894495726 2023-01-22 19:03:59.636550: step: 200/459, loss: 0.0009664655080996454 2023-01-22 19:04:00.295648: step: 202/459, loss: 0.015382972545921803 2023-01-22 19:04:00.973558: step: 204/459, loss: 0.002299908548593521 2023-01-22 19:04:01.546399: step: 206/459, loss: 0.000443994504166767 2023-01-22 19:04:02.050080: step: 208/459, loss: 0.0006173683796077967 2023-01-22 19:04:02.632538: step: 210/459, loss: 0.012751122005283833 2023-01-22 19:04:03.230896: step: 212/459, loss: 0.004468843340873718 2023-01-22 19:04:03.769946: step: 214/459, loss: 0.00316936569288373 2023-01-22 19:04:04.397750: step: 216/459, loss: 0.0029273121617734432 2023-01-22 19:04:05.047086: step: 218/459, loss: 0.012506522238254547 2023-01-22 19:04:05.654937: step: 220/459, loss: 0.00013052343274466693 2023-01-22 19:04:06.322082: step: 222/459, loss: 0.0016035308362916112 2023-01-22 19:04:06.957695: step: 224/459, loss: 0.0023928398732095957 2023-01-22 19:04:07.596035: step: 226/459, loss: 0.04099736362695694 2023-01-22 19:04:08.215313: step: 228/459, loss: 0.06439891457557678 2023-01-22 19:04:08.764330: step: 230/459, loss: 0.011353347450494766 2023-01-22 19:04:09.406740: step: 232/459, loss: 0.0014136817771941423 2023-01-22 19:04:10.026828: step: 234/459, loss: 0.0462564192712307 2023-01-22 19:04:10.653801: step: 236/459, loss: 0.016184546053409576 2023-01-22 19:04:11.250877: step: 238/459, loss: 0.008270993828773499 2023-01-22 19:04:11.833094: step: 240/459, loss: 0.0009063410689122975 2023-01-22 19:04:12.496296: step: 242/459, loss: 0.0013266123132780194 2023-01-22 19:04:13.100568: step: 244/459, loss: 0.010542312636971474 2023-01-22 19:04:13.673777: step: 246/459, loss: 0.0408216156065464 2023-01-22 19:04:14.277034: step: 248/459, loss: 0.0003244449617341161 2023-01-22 19:04:14.853922: step: 250/459, loss: 0.0022815666161477566 2023-01-22 19:04:15.420008: step: 252/459, loss: 5.3955340263200924e-05 2023-01-22 19:04:16.040372: step: 254/459, loss: 0.033060912042856216 2023-01-22 19:04:16.591973: step: 256/459, loss: 0.005527052562683821 2023-01-22 19:04:17.200249: step: 258/459, loss: 0.0009180984343402088 2023-01-22 19:04:17.709635: step: 260/459, loss: 0.11661691218614578 2023-01-22 19:04:18.283417: step: 262/459, loss: 0.05616707354784012 2023-01-22 19:04:18.874710: step: 264/459, loss: 0.004444140940904617 2023-01-22 19:04:19.567471: step: 266/459, loss: 0.0059436010196805 2023-01-22 19:04:20.229661: step: 268/459, loss: 0.014889770187437534 2023-01-22 19:04:20.846519: step: 270/459, loss: 0.000961334619205445 2023-01-22 19:04:21.485256: step: 272/459, loss: 0.02398836798965931 2023-01-22 19:04:22.162695: step: 274/459, loss: 0.0005963720614090562 2023-01-22 19:04:22.762713: step: 276/459, loss: 0.0009983103955164552 2023-01-22 19:04:23.361196: step: 278/459, loss: 0.0006300421664491296 2023-01-22 19:04:23.910406: step: 280/459, loss: 2.5856757164001465 2023-01-22 19:04:24.436866: step: 282/459, loss: 0.0003556391457095742 2023-01-22 19:04:25.119479: step: 284/459, loss: 0.007512074429541826 2023-01-22 19:04:25.679759: step: 286/459, loss: 0.0073141977190971375 2023-01-22 19:04:26.432943: step: 288/459, loss: 1.961495041847229 2023-01-22 19:04:26.999965: step: 290/459, loss: 0.0008016128558665514 2023-01-22 19:04:27.675506: step: 292/459, loss: 0.007843377068638802 2023-01-22 19:04:28.225128: step: 294/459, loss: 0.007307684049010277 2023-01-22 19:04:28.919135: step: 296/459, loss: 0.0015205650124698877 2023-01-22 19:04:29.509481: step: 298/459, loss: 0.009913726709783077 2023-01-22 19:04:30.107369: step: 300/459, loss: 0.00020564318401739 2023-01-22 19:04:30.659321: step: 302/459, loss: 0.0038158027455210686 2023-01-22 19:04:31.299929: step: 304/459, loss: 0.01465790905058384 2023-01-22 19:04:31.972121: step: 306/459, loss: 0.008489926345646381 2023-01-22 19:04:32.569867: step: 308/459, loss: 0.0031451324466615915 2023-01-22 19:04:33.088902: step: 310/459, loss: 0.0013643287820741534 2023-01-22 19:04:33.696374: step: 312/459, loss: 0.00032456335611641407 2023-01-22 19:04:34.330839: step: 314/459, loss: 0.004827022552490234 2023-01-22 19:04:35.016534: step: 316/459, loss: 0.01083114929497242 2023-01-22 19:04:35.580083: step: 318/459, loss: 0.04618504270911217 2023-01-22 19:04:36.185153: step: 320/459, loss: 0.011187588796019554 2023-01-22 19:04:36.834612: step: 322/459, loss: 0.00024233837029896677 2023-01-22 19:04:37.424740: step: 324/459, loss: 0.0012969886884093285 2023-01-22 19:04:37.995547: step: 326/459, loss: 5.861623867531307e-05 2023-01-22 19:04:38.596787: step: 328/459, loss: 0.004257499240338802 2023-01-22 19:04:39.228910: step: 330/459, loss: 0.0032978258095681667 2023-01-22 19:04:39.813284: step: 332/459, loss: 0.0015289618168026209 2023-01-22 19:04:40.407126: step: 334/459, loss: 0.010877659544348717 2023-01-22 19:04:40.941659: step: 336/459, loss: 0.012811452150344849 2023-01-22 19:04:41.553110: step: 338/459, loss: 0.013335178606212139 2023-01-22 19:04:42.218578: step: 340/459, loss: 0.06424684822559357 2023-01-22 19:04:42.826239: step: 342/459, loss: 0.0474402941763401 2023-01-22 19:04:43.411475: step: 344/459, loss: 8.722118218429387e-05 2023-01-22 19:04:44.022914: step: 346/459, loss: 0.09729020297527313 2023-01-22 19:04:44.631828: step: 348/459, loss: 0.001155112055130303 2023-01-22 19:04:45.243034: step: 350/459, loss: 0.0026378307957202196 2023-01-22 19:04:45.923345: step: 352/459, loss: 0.00034116648021154106 2023-01-22 19:04:46.501793: step: 354/459, loss: 0.04743323102593422 2023-01-22 19:04:47.129543: step: 356/459, loss: 0.010709245689213276 2023-01-22 19:04:47.723141: step: 358/459, loss: 0.0012278046924620867 2023-01-22 19:04:48.400535: step: 360/459, loss: 0.0033287033438682556 2023-01-22 19:04:48.981374: step: 362/459, loss: 0.005466781556606293 2023-01-22 19:04:49.614061: step: 364/459, loss: 0.01082158274948597 2023-01-22 19:04:50.260055: step: 366/459, loss: 0.00390727911144495 2023-01-22 19:04:50.836237: step: 368/459, loss: 0.014081177301704884 2023-01-22 19:04:51.446231: step: 370/459, loss: 0.00017232813115697354 2023-01-22 19:04:52.064847: step: 372/459, loss: 0.0027893956284970045 2023-01-22 19:04:52.706962: step: 374/459, loss: 0.0013842687476426363 2023-01-22 19:04:53.329756: step: 376/459, loss: 0.0263416338711977 2023-01-22 19:04:54.002242: step: 378/459, loss: 0.006074142176657915 2023-01-22 19:04:54.629823: step: 380/459, loss: 0.07251002639532089 2023-01-22 19:04:55.315132: step: 382/459, loss: 0.0819995179772377 2023-01-22 19:04:56.003580: step: 384/459, loss: 0.034299157559871674 2023-01-22 19:04:56.600318: step: 386/459, loss: 0.0028713755309581757 2023-01-22 19:04:57.127645: step: 388/459, loss: 0.0005983432638458908 2023-01-22 19:04:57.732513: step: 390/459, loss: 0.01510630827397108 2023-01-22 19:04:58.355644: step: 392/459, loss: 0.001385183772072196 2023-01-22 19:04:58.962679: step: 394/459, loss: 0.00013070066052023321 2023-01-22 19:04:59.561373: step: 396/459, loss: 0.025900863111019135 2023-01-22 19:05:00.202036: step: 398/459, loss: 0.004499162547290325 2023-01-22 19:05:00.826590: step: 400/459, loss: 0.03241917863488197 2023-01-22 19:05:01.444412: step: 402/459, loss: 1.0759610631794203e-05 2023-01-22 19:05:02.101749: step: 404/459, loss: 0.0003774380311369896 2023-01-22 19:05:02.701212: step: 406/459, loss: 0.01627197302877903 2023-01-22 19:05:03.329425: step: 408/459, loss: 0.0005408434080891311 2023-01-22 19:05:03.883161: step: 410/459, loss: 0.0012192107969895005 2023-01-22 19:05:04.531514: step: 412/459, loss: 0.014665736816823483 2023-01-22 19:05:05.108285: step: 414/459, loss: 0.00019689137116074562 2023-01-22 19:05:05.727503: step: 416/459, loss: 0.00754181994125247 2023-01-22 19:05:06.402740: step: 418/459, loss: 0.00010827238293131813 2023-01-22 19:05:07.013825: step: 420/459, loss: 0.021266845986247063 2023-01-22 19:05:07.564784: step: 422/459, loss: 0.010281559079885483 2023-01-22 19:05:08.156054: step: 424/459, loss: 0.004777665715664625 2023-01-22 19:05:08.830517: step: 426/459, loss: 0.0012817323440685868 2023-01-22 19:05:09.406035: step: 428/459, loss: 0.0003662437084130943 2023-01-22 19:05:10.071140: step: 430/459, loss: 0.000614245655015111 2023-01-22 19:05:10.671960: step: 432/459, loss: 9.069981751963496e-05 2023-01-22 19:05:11.229977: step: 434/459, loss: 0.0012541244504973292 2023-01-22 19:05:11.840221: step: 436/459, loss: 0.07137790322303772 2023-01-22 19:05:12.544810: step: 438/459, loss: 0.0015643994556739926 2023-01-22 19:05:13.128655: step: 440/459, loss: 0.00046956015285104513 2023-01-22 19:05:13.748158: step: 442/459, loss: 0.03077620267868042 2023-01-22 19:05:14.374778: step: 444/459, loss: 1.662512840994168e-05 2023-01-22 19:05:14.967905: step: 446/459, loss: 0.009485923685133457 2023-01-22 19:05:15.605581: step: 448/459, loss: 1.3538874554797076e-05 2023-01-22 19:05:16.215311: step: 450/459, loss: 0.009875660762190819 2023-01-22 19:05:16.941047: step: 452/459, loss: 0.010796244256198406 2023-01-22 19:05:17.550816: step: 454/459, loss: 0.00032297481084242463 2023-01-22 19:05:18.173515: step: 456/459, loss: 0.022051509469747543 2023-01-22 19:05:18.746048: step: 458/459, loss: 0.015391801483929157 2023-01-22 19:05:19.343436: step: 460/459, loss: 0.01684381440281868 2023-01-22 19:05:19.912792: step: 462/459, loss: 0.01218040008097887 2023-01-22 19:05:20.530259: step: 464/459, loss: 0.030979299917817116 2023-01-22 19:05:21.114355: step: 466/459, loss: 0.009132026694715023 2023-01-22 19:05:21.658054: step: 468/459, loss: 0.0018816726515069604 2023-01-22 19:05:22.211863: step: 470/459, loss: 0.005460845306515694 2023-01-22 19:05:22.860934: step: 472/459, loss: 0.10582654923200607 2023-01-22 19:05:23.459908: step: 474/459, loss: 0.08576160669326782 2023-01-22 19:05:24.100879: step: 476/459, loss: 0.014389214105904102 2023-01-22 19:05:24.769910: step: 478/459, loss: 0.01141372974961996 2023-01-22 19:05:25.350730: step: 480/459, loss: 0.12831605970859528 2023-01-22 19:05:25.893325: step: 482/459, loss: 0.0076799336820840836 2023-01-22 19:05:26.518242: step: 484/459, loss: 0.6386221051216125 2023-01-22 19:05:27.192826: step: 486/459, loss: 0.008474588394165039 2023-01-22 19:05:27.803001: step: 488/459, loss: 0.004721233155578375 2023-01-22 19:05:28.435235: step: 490/459, loss: 0.02433832176029682 2023-01-22 19:05:29.099555: step: 492/459, loss: 0.025165075436234474 2023-01-22 19:05:29.791072: step: 494/459, loss: 0.006884340196847916 2023-01-22 19:05:30.435685: step: 496/459, loss: 0.016753170639276505 2023-01-22 19:05:30.983506: step: 498/459, loss: 0.0037983597721904516 2023-01-22 19:05:31.545215: step: 500/459, loss: 0.008527682162821293 2023-01-22 19:05:32.152025: step: 502/459, loss: 0.0033143027685582638 2023-01-22 19:05:32.794070: step: 504/459, loss: 0.02521004155278206 2023-01-22 19:05:33.469632: step: 506/459, loss: 0.037102360278367996 2023-01-22 19:05:34.065039: step: 508/459, loss: 0.09980816394090652 2023-01-22 19:05:34.774975: step: 510/459, loss: 0.004870290402323008 2023-01-22 19:05:35.379453: step: 512/459, loss: 1.101069450378418 2023-01-22 19:05:35.944174: step: 514/459, loss: 0.00022536948381457478 2023-01-22 19:05:36.523613: step: 516/459, loss: 0.0011325915111228824 2023-01-22 19:05:37.122789: step: 518/459, loss: 0.000430315121775493 2023-01-22 19:05:37.720302: step: 520/459, loss: 0.00128837744705379 2023-01-22 19:05:38.342768: step: 522/459, loss: 0.012062150053679943 2023-01-22 19:05:39.097807: step: 524/459, loss: 0.10840430110692978 2023-01-22 19:05:39.714683: step: 526/459, loss: 0.0014161410508677363 2023-01-22 19:05:40.351864: step: 528/459, loss: 0.014496558345854282 2023-01-22 19:05:40.965093: step: 530/459, loss: 0.013385863043367863 2023-01-22 19:05:41.650159: step: 532/459, loss: 0.09140781313180923 2023-01-22 19:05:42.324096: step: 534/459, loss: 0.014120402745902538 2023-01-22 19:05:42.920014: step: 536/459, loss: 0.0006189580890350044 2023-01-22 19:05:43.511988: step: 538/459, loss: 0.0049284836277365685 2023-01-22 19:05:44.182382: step: 540/459, loss: 0.0030213568825274706 2023-01-22 19:05:44.782832: step: 542/459, loss: 0.0008537629037164152 2023-01-22 19:05:45.526998: step: 544/459, loss: 0.13404834270477295 2023-01-22 19:05:46.132120: step: 546/459, loss: 0.002738847164437175 2023-01-22 19:05:46.787846: step: 548/459, loss: 0.006735598668456078 2023-01-22 19:05:47.422280: step: 550/459, loss: 0.004204690922051668 2023-01-22 19:05:47.981561: step: 552/459, loss: 0.028851034119725227 2023-01-22 19:05:48.558844: step: 554/459, loss: 0.03417814150452614 2023-01-22 19:05:49.169218: step: 556/459, loss: 0.0015466238837689161 2023-01-22 19:05:49.826080: step: 558/459, loss: 0.0005599496653303504 2023-01-22 19:05:50.409853: step: 560/459, loss: 0.013688911683857441 2023-01-22 19:05:51.086911: step: 562/459, loss: 0.0023527268785983324 2023-01-22 19:05:51.670397: step: 564/459, loss: 0.0028537993784993887 2023-01-22 19:05:52.184318: step: 566/459, loss: 0.0007768544601276517 2023-01-22 19:05:52.797756: step: 568/459, loss: 0.001564518315717578 2023-01-22 19:05:53.411082: step: 570/459, loss: 0.10342691838741302 2023-01-22 19:05:54.032221: step: 572/459, loss: 0.002207499695941806 2023-01-22 19:05:54.666009: step: 574/459, loss: 0.0004960914375260472 2023-01-22 19:05:55.275003: step: 576/459, loss: 0.0018030810169875622 2023-01-22 19:05:55.941011: step: 578/459, loss: 0.001171171898022294 2023-01-22 19:05:56.585769: step: 580/459, loss: 0.00572156859561801 2023-01-22 19:05:57.176499: step: 582/459, loss: 0.017256345599889755 2023-01-22 19:05:57.856579: step: 584/459, loss: 0.0028767301701009274 2023-01-22 19:05:58.503298: step: 586/459, loss: 0.06995204836130142 2023-01-22 19:05:59.104819: step: 588/459, loss: 5.911136759095825e-05 2023-01-22 19:05:59.692633: step: 590/459, loss: 0.05856357142329216 2023-01-22 19:06:00.363260: step: 592/459, loss: 0.0037245971616357565 2023-01-22 19:06:00.977559: step: 594/459, loss: 0.015601453371345997 2023-01-22 19:06:01.597749: step: 596/459, loss: 0.021538879722356796 2023-01-22 19:06:02.200664: step: 598/459, loss: 0.0003193156444467604 2023-01-22 19:06:02.802612: step: 600/459, loss: 0.09341423213481903 2023-01-22 19:06:03.431250: step: 602/459, loss: 0.0036340104416012764 2023-01-22 19:06:04.017529: step: 604/459, loss: 0.001673780265264213 2023-01-22 19:06:04.653298: step: 606/459, loss: 0.03209122642874718 2023-01-22 19:06:05.235879: step: 608/459, loss: 0.09675323218107224 2023-01-22 19:06:05.909072: step: 610/459, loss: 0.0008382440428249538 2023-01-22 19:06:06.488387: step: 612/459, loss: 0.03305443003773689 2023-01-22 19:06:07.092926: step: 614/459, loss: 0.0016479750629514456 2023-01-22 19:06:07.801479: step: 616/459, loss: 0.018671521916985512 2023-01-22 19:06:08.425434: step: 618/459, loss: 0.7842006087303162 2023-01-22 19:06:09.048495: step: 620/459, loss: 0.12838198244571686 2023-01-22 19:06:09.692646: step: 622/459, loss: 0.004105246160179377 2023-01-22 19:06:10.225962: step: 624/459, loss: 0.0071985553950071335 2023-01-22 19:06:10.867158: step: 626/459, loss: 0.0013022252824157476 2023-01-22 19:06:11.523377: step: 628/459, loss: 0.006462998688220978 2023-01-22 19:06:12.132696: step: 630/459, loss: 0.001094833598472178 2023-01-22 19:06:12.747543: step: 632/459, loss: 0.0038997868541628122 2023-01-22 19:06:13.341383: step: 634/459, loss: 0.00496253464370966 2023-01-22 19:06:13.962586: step: 636/459, loss: 0.001217188430018723 2023-01-22 19:06:14.607401: step: 638/459, loss: 0.22141030430793762 2023-01-22 19:06:15.283584: step: 640/459, loss: 0.001613646512851119 2023-01-22 19:06:15.813340: step: 642/459, loss: 0.012854182161390781 2023-01-22 19:06:16.393782: step: 644/459, loss: 0.0009404238080605865 2023-01-22 19:06:17.057934: step: 646/459, loss: 0.001139619736932218 2023-01-22 19:06:17.697343: step: 648/459, loss: 0.009509122930467129 2023-01-22 19:06:18.290762: step: 650/459, loss: 0.0015139939496293664 2023-01-22 19:06:18.917457: step: 652/459, loss: 0.0023873934987932444 2023-01-22 19:06:19.551770: step: 654/459, loss: 0.002622437197715044 2023-01-22 19:06:20.196401: step: 656/459, loss: 0.010121585801243782 2023-01-22 19:06:20.895501: step: 658/459, loss: 0.002710253931581974 2023-01-22 19:06:21.517704: step: 660/459, loss: 0.00877311173826456 2023-01-22 19:06:22.130959: step: 662/459, loss: 0.002121003344655037 2023-01-22 19:06:22.742673: step: 664/459, loss: 0.012838863767683506 2023-01-22 19:06:23.413129: step: 666/459, loss: 0.11685454100370407 2023-01-22 19:06:24.006135: step: 668/459, loss: 0.14088985323905945 2023-01-22 19:06:24.610318: step: 670/459, loss: 0.0006210471619851887 2023-01-22 19:06:25.208702: step: 672/459, loss: 0.08876243233680725 2023-01-22 19:06:25.817659: step: 674/459, loss: 0.013888775371015072 2023-01-22 19:06:26.402042: step: 676/459, loss: 0.0011122222058475018 2023-01-22 19:06:27.055575: step: 678/459, loss: 0.004319930449128151 2023-01-22 19:06:27.623432: step: 680/459, loss: 0.00696491077542305 2023-01-22 19:06:28.257156: step: 682/459, loss: 0.008793797343969345 2023-01-22 19:06:28.843115: step: 684/459, loss: 0.05953267216682434 2023-01-22 19:06:29.456734: step: 686/459, loss: 0.04519350454211235 2023-01-22 19:06:30.081452: step: 688/459, loss: 0.004102278035134077 2023-01-22 19:06:30.677106: step: 690/459, loss: 0.00186854787170887 2023-01-22 19:06:31.284064: step: 692/459, loss: 0.000972102745436132 2023-01-22 19:06:31.945301: step: 694/459, loss: 0.05278734490275383 2023-01-22 19:06:32.604636: step: 696/459, loss: 0.01866420917212963 2023-01-22 19:06:33.227462: step: 698/459, loss: 0.0007973656756803393 2023-01-22 19:06:33.817684: step: 700/459, loss: 0.0052415551617741585 2023-01-22 19:06:34.418026: step: 702/459, loss: 0.0019746755715459585 2023-01-22 19:06:35.035700: step: 704/459, loss: 0.43219122290611267 2023-01-22 19:06:35.591812: step: 706/459, loss: 0.019251108169555664 2023-01-22 19:06:36.157889: step: 708/459, loss: 0.00025726581225171685 2023-01-22 19:06:36.751112: step: 710/459, loss: 0.0005067792371846735 2023-01-22 19:06:37.329705: step: 712/459, loss: 0.0034989500418305397 2023-01-22 19:06:37.956549: step: 714/459, loss: 0.031130459159612656 2023-01-22 19:06:38.575521: step: 716/459, loss: 0.004678159486502409 2023-01-22 19:06:39.136075: step: 718/459, loss: 0.00038936256896704435 2023-01-22 19:06:39.809427: step: 720/459, loss: 0.0035614247899502516 2023-01-22 19:06:40.450631: step: 722/459, loss: 0.00952692236751318 2023-01-22 19:06:41.031246: step: 724/459, loss: 0.00807953905314207 2023-01-22 19:06:41.724335: step: 726/459, loss: 0.03678937256336212 2023-01-22 19:06:42.316451: step: 728/459, loss: 0.0010907596442848444 2023-01-22 19:06:42.981007: step: 730/459, loss: 0.011097487062215805 2023-01-22 19:06:43.581729: step: 732/459, loss: 0.006111897993832827 2023-01-22 19:06:44.216287: step: 734/459, loss: 0.017601946368813515 2023-01-22 19:06:44.760911: step: 736/459, loss: 0.009060525335371494 2023-01-22 19:06:45.357108: step: 738/459, loss: 0.003679279936477542 2023-01-22 19:06:45.971265: step: 740/459, loss: 0.08700217306613922 2023-01-22 19:06:46.629266: step: 742/459, loss: 0.05299575254321098 2023-01-22 19:06:47.235018: step: 744/459, loss: 6.09884737059474e-05 2023-01-22 19:06:47.891872: step: 746/459, loss: 0.00251629832200706 2023-01-22 19:06:48.474476: step: 748/459, loss: 0.00034692330518737435 2023-01-22 19:06:49.036066: step: 750/459, loss: 0.010920765809714794 2023-01-22 19:06:49.626856: step: 752/459, loss: 0.000613560841884464 2023-01-22 19:06:50.242352: step: 754/459, loss: 0.02767922915518284 2023-01-22 19:06:50.870949: step: 756/459, loss: 0.0063728028908371925 2023-01-22 19:06:51.480531: step: 758/459, loss: 0.0566084161400795 2023-01-22 19:06:52.108109: step: 760/459, loss: 2.512948412913829e-05 2023-01-22 19:06:52.716677: step: 762/459, loss: 0.0002769257698673755 2023-01-22 19:06:53.430224: step: 764/459, loss: 0.013835293240845203 2023-01-22 19:06:54.006181: step: 766/459, loss: 0.03561139106750488 2023-01-22 19:06:54.596108: step: 768/459, loss: 0.007573164068162441 2023-01-22 19:06:55.191725: step: 770/459, loss: 0.00046888779615983367 2023-01-22 19:06:55.850325: step: 772/459, loss: 0.000255480787018314 2023-01-22 19:06:56.470914: step: 774/459, loss: 3.163884684909135e-05 2023-01-22 19:06:57.135685: step: 776/459, loss: 0.009488050825893879 2023-01-22 19:06:57.698690: step: 778/459, loss: 0.0008868628065101802 2023-01-22 19:06:58.285661: step: 780/459, loss: 0.2794870436191559 2023-01-22 19:06:58.902843: step: 782/459, loss: 0.011959508061408997 2023-01-22 19:06:59.486131: step: 784/459, loss: 0.002665633335709572 2023-01-22 19:07:00.106230: step: 786/459, loss: 0.4511110782623291 2023-01-22 19:07:00.752999: step: 788/459, loss: 0.10641466826200485 2023-01-22 19:07:01.355356: step: 790/459, loss: 0.00219322694465518 2023-01-22 19:07:01.983401: step: 792/459, loss: 0.02461092919111252 2023-01-22 19:07:02.608176: step: 794/459, loss: 0.02234163135290146 2023-01-22 19:07:03.234580: step: 796/459, loss: 0.5309181213378906 2023-01-22 19:07:03.864216: step: 798/459, loss: 0.03401675075292587 2023-01-22 19:07:04.520058: step: 800/459, loss: 0.0030045723542571068 2023-01-22 19:07:05.170028: step: 802/459, loss: 0.025066280737519264 2023-01-22 19:07:05.789994: step: 804/459, loss: 0.003739378647878766 2023-01-22 19:07:06.377378: step: 806/459, loss: 0.0052855550311505795 2023-01-22 19:07:06.978198: step: 808/459, loss: 0.0024412476923316717 2023-01-22 19:07:07.565278: step: 810/459, loss: 0.024553054943680763 2023-01-22 19:07:08.238774: step: 812/459, loss: 0.0020086735021322966 2023-01-22 19:07:08.877305: step: 814/459, loss: 0.0063652233220636845 2023-01-22 19:07:09.466775: step: 816/459, loss: 0.008967725560069084 2023-01-22 19:07:10.070193: step: 818/459, loss: 0.0038890819996595383 2023-01-22 19:07:10.716087: step: 820/459, loss: 0.011205802671611309 2023-01-22 19:07:11.341119: step: 822/459, loss: 0.007312116678804159 2023-01-22 19:07:12.005249: step: 824/459, loss: 0.0025701974518597126 2023-01-22 19:07:12.585296: step: 826/459, loss: 0.003206731751561165 2023-01-22 19:07:13.184499: step: 828/459, loss: 0.00577958021312952 2023-01-22 19:07:13.774852: step: 830/459, loss: 0.1776501089334488 2023-01-22 19:07:14.397350: step: 832/459, loss: 0.004169206600636244 2023-01-22 19:07:14.993458: step: 834/459, loss: 0.00634086225181818 2023-01-22 19:07:15.663688: step: 836/459, loss: 0.005332375410944223 2023-01-22 19:07:16.284164: step: 838/459, loss: 0.019486382603645325 2023-01-22 19:07:16.915498: step: 840/459, loss: 0.01800631359219551 2023-01-22 19:07:17.553225: step: 842/459, loss: 0.027061285451054573 2023-01-22 19:07:18.156417: step: 844/459, loss: 0.0006169549305923283 2023-01-22 19:07:18.860366: step: 846/459, loss: 0.03413436561822891 2023-01-22 19:07:19.507361: step: 848/459, loss: 0.0005641883471980691 2023-01-22 19:07:20.079756: step: 850/459, loss: 3.902209209627472e-05 2023-01-22 19:07:20.691997: step: 852/459, loss: 0.003940414171665907 2023-01-22 19:07:21.298667: step: 854/459, loss: 0.025054560974240303 2023-01-22 19:07:21.891538: step: 856/459, loss: 0.03733174130320549 2023-01-22 19:07:22.487287: step: 858/459, loss: 2.4503280656062998e-05 2023-01-22 19:07:23.109130: step: 860/459, loss: 0.04270336031913757 2023-01-22 19:07:23.728557: step: 862/459, loss: 0.0002094700321322307 2023-01-22 19:07:24.329275: step: 864/459, loss: 0.05817889794707298 2023-01-22 19:07:25.010620: step: 866/459, loss: 0.004648582078516483 2023-01-22 19:07:25.705853: step: 868/459, loss: 0.03895110264420509 2023-01-22 19:07:26.337918: step: 870/459, loss: 0.00037402965244837105 2023-01-22 19:07:26.950177: step: 872/459, loss: 0.014263679273426533 2023-01-22 19:07:27.680602: step: 874/459, loss: 0.016065938398241997 2023-01-22 19:07:28.320892: step: 876/459, loss: 0.006769351661205292 2023-01-22 19:07:28.972701: step: 878/459, loss: 0.029565487056970596 2023-01-22 19:07:29.570498: step: 880/459, loss: 0.0007578210206702352 2023-01-22 19:07:30.233638: step: 882/459, loss: 0.0009195547318086028 2023-01-22 19:07:30.783283: step: 884/459, loss: 0.04196205362677574 2023-01-22 19:07:31.397562: step: 886/459, loss: 0.00018448826449457556 2023-01-22 19:07:32.039347: step: 888/459, loss: 0.007961432449519634 2023-01-22 19:07:32.637501: step: 890/459, loss: 0.02266845479607582 2023-01-22 19:07:33.207128: step: 892/459, loss: 0.0819881334900856 2023-01-22 19:07:33.864677: step: 894/459, loss: 0.30395326018333435 2023-01-22 19:07:34.445669: step: 896/459, loss: 3.5211316571803764e-05 2023-01-22 19:07:35.095502: step: 898/459, loss: 0.013112819753587246 2023-01-22 19:07:35.741099: step: 900/459, loss: 0.007420291658490896 2023-01-22 19:07:36.335958: step: 902/459, loss: 0.006632167845964432 2023-01-22 19:07:36.928126: step: 904/459, loss: 0.006686086300760508 2023-01-22 19:07:37.547051: step: 906/459, loss: 0.006855909246951342 2023-01-22 19:07:38.170655: step: 908/459, loss: 0.0014080989640206099 2023-01-22 19:07:38.772966: step: 910/459, loss: 0.00444366317242384 2023-01-22 19:07:39.393504: step: 912/459, loss: 0.03740920126438141 2023-01-22 19:07:40.043300: step: 914/459, loss: 0.04136045277118683 2023-01-22 19:07:40.609754: step: 916/459, loss: 0.031850580126047134 2023-01-22 19:07:41.307952: step: 918/459, loss: 6.941803439985961e-05 2023-01-22 19:07:41.756636: step: 920/459, loss: 0.00961019191890955 ================================================== Loss: 0.036 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29620313747550264, 'r': 0.33667111830707036, 'f1': 0.31514330257162715}, 'combined': 0.2322108545264621, 'epoch': 37} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3216087474525372, 'r': 0.31604863261941785, 'f1': 0.31880444905804517}, 'combined': 0.2040348473971489, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29500000000000004, 'r': 0.3442599620493359, 'f1': 0.3177320490367776}, 'combined': 0.2341183519218361, 'epoch': 37} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.32245130552079615, 'r': 0.31863704985949465, 'f1': 0.3205328309341735}, 'combined': 0.205141011797871, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31471798608711327, 'r': 0.33741112360383113, 'f1': 0.3256697108776905}, 'combined': 0.2399671553835614, 'epoch': 37} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3320920411320271, 'r': 0.32694097845023473, 'f1': 0.32949637920134}, 'combined': 0.23624268697454567, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2202380952380952, 'r': 0.35238095238095235, 'f1': 0.271062271062271}, 'combined': 0.18070818070818065, 'epoch': 37} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.20512820512820512, 'r': 0.34782608695652173, 'f1': 0.2580645161290323}, 'combined': 0.12903225806451615, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.20689655172413793, 'f1': 0.2553191489361702}, 'combined': 0.17021276595744678, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 19:10:17.888420: step: 2/459, loss: 0.012781570665538311 2023-01-22 19:10:18.533116: step: 4/459, loss: 0.012329253368079662 2023-01-22 19:10:19.161854: step: 6/459, loss: 0.0029356987215578556 2023-01-22 19:10:19.815215: step: 8/459, loss: 0.0021542466711252928 2023-01-22 19:10:20.394292: step: 10/459, loss: 0.0070365238934755325 2023-01-22 19:10:20.955274: step: 12/459, loss: 0.02976328693330288 2023-01-22 19:10:21.584835: step: 14/459, loss: 0.039310816675424576 2023-01-22 19:10:22.198612: step: 16/459, loss: 0.053209032863378525 2023-01-22 19:10:22.736263: step: 18/459, loss: 0.002968315966427326 2023-01-22 19:10:23.387685: step: 20/459, loss: 0.15747326612472534 2023-01-22 19:10:23.998303: step: 22/459, loss: 0.006761377677321434 2023-01-22 19:10:24.562121: step: 24/459, loss: 0.00935700535774231 2023-01-22 19:10:25.086873: step: 26/459, loss: 0.008593271486461163 2023-01-22 19:10:25.654044: step: 28/459, loss: 0.0034264815039932728 2023-01-22 19:10:26.232802: step: 30/459, loss: 0.001613634405657649 2023-01-22 19:10:26.934900: step: 32/459, loss: 0.011637525632977486 2023-01-22 19:10:27.597144: step: 34/459, loss: 0.024718012660741806 2023-01-22 19:10:28.215356: step: 36/459, loss: 0.007357648108154535 2023-01-22 19:10:28.849981: step: 38/459, loss: 0.03218184784054756 2023-01-22 19:10:29.449641: step: 40/459, loss: 0.00019996751507278532 2023-01-22 19:10:30.039454: step: 42/459, loss: 0.00026374348090030253 2023-01-22 19:10:30.645067: step: 44/459, loss: 0.001106594456359744 2023-01-22 19:10:31.229585: step: 46/459, loss: 0.048636067658662796 2023-01-22 19:10:31.813770: step: 48/459, loss: 0.0038816221058368683 2023-01-22 19:10:32.447391: step: 50/459, loss: 0.00044379194150678813 2023-01-22 19:10:33.045734: step: 52/459, loss: 0.005659847054630518 2023-01-22 19:10:33.705798: step: 54/459, loss: 0.00028047573869116604 2023-01-22 19:10:34.311802: step: 56/459, loss: 0.006504202727228403 2023-01-22 19:10:34.870917: step: 58/459, loss: 0.00872679241001606 2023-01-22 19:10:35.499872: step: 60/459, loss: 0.0024742847308516502 2023-01-22 19:10:36.115773: step: 62/459, loss: 0.0011853905161842704 2023-01-22 19:10:36.787196: step: 64/459, loss: 0.0012991811381652951 2023-01-22 19:10:37.351786: step: 66/459, loss: 0.00020590655913110822 2023-01-22 19:10:37.993387: step: 68/459, loss: 0.0037427018396556377 2023-01-22 19:10:38.558338: step: 70/459, loss: 4.746123886434361e-05 2023-01-22 19:10:39.278834: step: 72/459, loss: 0.00825604610145092 2023-01-22 19:10:39.883890: step: 74/459, loss: 0.007355828303843737 2023-01-22 19:10:40.495340: step: 76/459, loss: 0.008898487314581871 2023-01-22 19:10:41.124673: step: 78/459, loss: 0.11894124746322632 2023-01-22 19:10:41.707475: step: 80/459, loss: 0.04367423802614212 2023-01-22 19:10:42.331229: step: 82/459, loss: 4.548687684291508e-06 2023-01-22 19:10:42.933266: step: 84/459, loss: 0.0017333694268018007 2023-01-22 19:10:43.582853: step: 86/459, loss: 0.0961955189704895 2023-01-22 19:10:44.157257: step: 88/459, loss: 0.00045317100011743605 2023-01-22 19:10:44.722636: step: 90/459, loss: 0.00012908507778774947 2023-01-22 19:10:45.303096: step: 92/459, loss: 0.02275802195072174 2023-01-22 19:10:45.900911: step: 94/459, loss: 0.003709514392539859 2023-01-22 19:10:46.460517: step: 96/459, loss: 0.00036691714194603264 2023-01-22 19:10:47.072909: step: 98/459, loss: 0.0014341919450089335 2023-01-22 19:10:47.609597: step: 100/459, loss: 0.0002265498915221542 2023-01-22 19:10:48.252653: step: 102/459, loss: 0.46638867259025574 2023-01-22 19:10:48.892966: step: 104/459, loss: 0.0019779042340815067 2023-01-22 19:10:49.508527: step: 106/459, loss: 0.0034624459221959114 2023-01-22 19:10:50.119039: step: 108/459, loss: 0.025867149233818054 2023-01-22 19:10:50.734647: step: 110/459, loss: 0.013524731621146202 2023-01-22 19:10:51.358061: step: 112/459, loss: 0.00015915557742118835 2023-01-22 19:10:51.950504: step: 114/459, loss: 3.714037666213699e-05 2023-01-22 19:10:52.524185: step: 116/459, loss: 0.003186209127306938 2023-01-22 19:10:53.120844: step: 118/459, loss: 6.466474587796256e-05 2023-01-22 19:10:53.720830: step: 120/459, loss: 0.019568083807826042 2023-01-22 19:10:54.312232: step: 122/459, loss: 0.0089617520570755 2023-01-22 19:10:54.904510: step: 124/459, loss: 0.10357744991779327 2023-01-22 19:10:55.527525: step: 126/459, loss: 0.005818710196763277 2023-01-22 19:10:56.208209: step: 128/459, loss: 0.00025019209715537727 2023-01-22 19:10:56.797686: step: 130/459, loss: 0.01792311482131481 2023-01-22 19:10:57.439140: step: 132/459, loss: 0.027146577835083008 2023-01-22 19:10:58.042655: step: 134/459, loss: 0.019155070185661316 2023-01-22 19:10:58.663310: step: 136/459, loss: 0.0018331104656681418 2023-01-22 19:10:59.268352: step: 138/459, loss: 0.006528785917907953 2023-01-22 19:10:59.863057: step: 140/459, loss: 0.002632284304127097 2023-01-22 19:11:00.523541: step: 142/459, loss: 0.15517716109752655 2023-01-22 19:11:01.114940: step: 144/459, loss: 0.0005038364906795323 2023-01-22 19:11:01.727216: step: 146/459, loss: 0.004407123662531376 2023-01-22 19:11:02.291817: step: 148/459, loss: 0.003699215827509761 2023-01-22 19:11:02.900328: step: 150/459, loss: 0.029427099972963333 2023-01-22 19:11:03.433257: step: 152/459, loss: 7.894942973507568e-05 2023-01-22 19:11:04.016593: step: 154/459, loss: 0.002823036629706621 2023-01-22 19:11:04.769636: step: 156/459, loss: 0.007220831234008074 2023-01-22 19:11:05.397087: step: 158/459, loss: 0.0003394763625692576 2023-01-22 19:11:06.022825: step: 160/459, loss: 3.6237615859135985e-06 2023-01-22 19:11:06.709164: step: 162/459, loss: 0.010114790871739388 2023-01-22 19:11:07.293613: step: 164/459, loss: 0.0001283139135921374 2023-01-22 19:11:07.947368: step: 166/459, loss: 0.00017335591837763786 2023-01-22 19:11:08.588460: step: 168/459, loss: 0.01924181543290615 2023-01-22 19:11:09.203227: step: 170/459, loss: 0.011962099000811577 2023-01-22 19:11:09.835036: step: 172/459, loss: 8.623669418739155e-05 2023-01-22 19:11:10.366939: step: 174/459, loss: 0.0025300842244178057 2023-01-22 19:11:10.987468: step: 176/459, loss: 0.00011135532986372709 2023-01-22 19:11:11.554630: step: 178/459, loss: 9.59911722020479e-06 2023-01-22 19:11:12.135297: step: 180/459, loss: 0.10831788182258606 2023-01-22 19:11:12.708651: step: 182/459, loss: 0.0017555132508277893 2023-01-22 19:11:13.308088: step: 184/459, loss: 8.093080396065488e-05 2023-01-22 19:11:14.008845: step: 186/459, loss: 0.09375442564487457 2023-01-22 19:11:14.617646: step: 188/459, loss: 0.0023439419455826283 2023-01-22 19:11:15.181671: step: 190/459, loss: 0.030796291306614876 2023-01-22 19:11:15.772164: step: 192/459, loss: 0.008664173074066639 2023-01-22 19:11:16.354641: step: 194/459, loss: 8.479576536046807e-06 2023-01-22 19:11:16.907880: step: 196/459, loss: 0.0015112506225705147 2023-01-22 19:11:17.486015: step: 198/459, loss: 0.0007030975539237261 2023-01-22 19:11:18.076522: step: 200/459, loss: 0.037386421114206314 2023-01-22 19:11:18.753956: step: 202/459, loss: 0.013781196437776089 2023-01-22 19:11:19.343278: step: 204/459, loss: 0.0013590733287855983 2023-01-22 19:11:19.967606: step: 206/459, loss: 0.0027682199142873287 2023-01-22 19:11:20.612101: step: 208/459, loss: 0.018389105796813965 2023-01-22 19:11:21.281915: step: 210/459, loss: 0.03666301071643829 2023-01-22 19:11:21.879906: step: 212/459, loss: 6.36408876744099e-05 2023-01-22 19:11:22.545199: step: 214/459, loss: 0.003516943659633398 2023-01-22 19:11:23.198881: step: 216/459, loss: 0.020697807893157005 2023-01-22 19:11:23.778385: step: 218/459, loss: 2.633861731737852e-05 2023-01-22 19:11:24.374811: step: 220/459, loss: 0.00015773771156091243 2023-01-22 19:11:24.907412: step: 222/459, loss: 0.01078773196786642 2023-01-22 19:11:25.504633: step: 224/459, loss: 0.0005930644692853093 2023-01-22 19:11:26.171971: step: 226/459, loss: 0.02100580371916294 2023-01-22 19:11:26.803517: step: 228/459, loss: 0.0013088621199131012 2023-01-22 19:11:27.409953: step: 230/459, loss: 0.027295537292957306 2023-01-22 19:11:28.086717: step: 232/459, loss: 0.005453135818243027 2023-01-22 19:11:28.698594: step: 234/459, loss: 0.006881773006170988 2023-01-22 19:11:29.317632: step: 236/459, loss: 0.005848701111972332 2023-01-22 19:11:30.020613: step: 238/459, loss: 0.007982463575899601 2023-01-22 19:11:30.609502: step: 240/459, loss: 0.0006762474658899009 2023-01-22 19:11:31.181016: step: 242/459, loss: 0.021432019770145416 2023-01-22 19:11:31.778320: step: 244/459, loss: 0.0005961218266747892 2023-01-22 19:11:32.475308: step: 246/459, loss: 0.011240050196647644 2023-01-22 19:11:33.069472: step: 248/459, loss: 0.008177036419510841 2023-01-22 19:11:33.668878: step: 250/459, loss: 0.10779447108507156 2023-01-22 19:11:34.244160: step: 252/459, loss: 0.0059477584436535835 2023-01-22 19:11:34.873000: step: 254/459, loss: 0.010359741747379303 2023-01-22 19:11:35.465125: step: 256/459, loss: 0.0004232526698615402 2023-01-22 19:11:36.052042: step: 258/459, loss: 0.0045490749180316925 2023-01-22 19:11:36.703937: step: 260/459, loss: 0.0072285495698452 2023-01-22 19:11:37.292485: step: 262/459, loss: 0.0007659537950530648 2023-01-22 19:11:37.899050: step: 264/459, loss: 0.0004178071394562721 2023-01-22 19:11:38.533879: step: 266/459, loss: 0.16197918355464935 2023-01-22 19:11:39.129596: step: 268/459, loss: 6.075791679904796e-05 2023-01-22 19:11:39.790462: step: 270/459, loss: 0.0002907178131863475 2023-01-22 19:11:40.355600: step: 272/459, loss: 0.0014872346073389053 2023-01-22 19:11:40.966295: step: 274/459, loss: 0.004585022572427988 2023-01-22 19:11:41.553785: step: 276/459, loss: 0.0021707629784941673 2023-01-22 19:11:42.197046: step: 278/459, loss: 0.004255221225321293 2023-01-22 19:11:42.800506: step: 280/459, loss: 0.0018822373822331429 2023-01-22 19:11:43.400421: step: 282/459, loss: 0.00012243307719472796 2023-01-22 19:11:43.982329: step: 284/459, loss: 0.01639745384454727 2023-01-22 19:11:44.686269: step: 286/459, loss: 0.25607094168663025 2023-01-22 19:11:45.325381: step: 288/459, loss: 0.0068993279710412025 2023-01-22 19:11:45.894472: step: 290/459, loss: 0.0005039414972998202 2023-01-22 19:11:46.479094: step: 292/459, loss: 0.008268055506050587 2023-01-22 19:11:47.054829: step: 294/459, loss: 0.0008826226112432778 2023-01-22 19:11:47.685715: step: 296/459, loss: 0.005635819863528013 2023-01-22 19:11:48.292351: step: 298/459, loss: 0.03495287522673607 2023-01-22 19:11:48.874403: step: 300/459, loss: 0.0017060564132407308 2023-01-22 19:11:49.521464: step: 302/459, loss: 0.0003429662901908159 2023-01-22 19:11:50.135688: step: 304/459, loss: 0.001900414121337235 2023-01-22 19:11:50.712642: step: 306/459, loss: 0.0009758347878232598 2023-01-22 19:11:51.363774: step: 308/459, loss: 0.015289434231817722 2023-01-22 19:11:52.039287: step: 310/459, loss: 0.015763944014906883 2023-01-22 19:11:52.700291: step: 312/459, loss: 0.019654104486107826 2023-01-22 19:11:53.240899: step: 314/459, loss: 0.03220130875706673 2023-01-22 19:11:53.821131: step: 316/459, loss: 0.011235286481678486 2023-01-22 19:11:54.426937: step: 318/459, loss: 0.014462069608271122 2023-01-22 19:11:55.023251: step: 320/459, loss: 4.265511961420998e-05 2023-01-22 19:11:55.719840: step: 322/459, loss: 0.003778275568038225 2023-01-22 19:11:56.315209: step: 324/459, loss: 0.016384562477469444 2023-01-22 19:11:56.980464: step: 326/459, loss: 0.011643683537840843 2023-01-22 19:11:57.603529: step: 328/459, loss: 0.001593254623003304 2023-01-22 19:11:58.202789: step: 330/459, loss: 0.01927618682384491 2023-01-22 19:11:58.792553: step: 332/459, loss: 9.466959340898029e-07 2023-01-22 19:11:59.370272: step: 334/459, loss: 0.012157621793448925 2023-01-22 19:11:59.948146: step: 336/459, loss: 0.0014303915668278933 2023-01-22 19:12:00.562782: step: 338/459, loss: 0.0024114218540489674 2023-01-22 19:12:01.238606: step: 340/459, loss: 0.013570021837949753 2023-01-22 19:12:01.844037: step: 342/459, loss: 0.00034906217479147017 2023-01-22 19:12:02.464322: step: 344/459, loss: 0.008751044049859047 2023-01-22 19:12:03.053344: step: 346/459, loss: 0.08008716255426407 2023-01-22 19:12:03.656937: step: 348/459, loss: 0.00040801052819006145 2023-01-22 19:12:04.265782: step: 350/459, loss: 0.020372336730360985 2023-01-22 19:12:04.853692: step: 352/459, loss: 0.001226522377692163 2023-01-22 19:12:05.413892: step: 354/459, loss: 0.001062976778484881 2023-01-22 19:12:06.020830: step: 356/459, loss: 0.020388031378388405 2023-01-22 19:12:06.635832: step: 358/459, loss: 7.122705574147403e-05 2023-01-22 19:12:07.229317: step: 360/459, loss: 0.004690561909228563 2023-01-22 19:12:07.816289: step: 362/459, loss: 0.0007754630642011762 2023-01-22 19:12:08.454191: step: 364/459, loss: 0.025663921609520912 2023-01-22 19:12:09.034027: step: 366/459, loss: 0.01692892238497734 2023-01-22 19:12:09.593970: step: 368/459, loss: 0.00021488749189302325 2023-01-22 19:12:10.141125: step: 370/459, loss: 0.0005336173344403505 2023-01-22 19:12:10.739210: step: 372/459, loss: 0.034645095467567444 2023-01-22 19:12:11.354013: step: 374/459, loss: 0.0010630384786054492 2023-01-22 19:12:11.991711: step: 376/459, loss: 0.02738410420715809 2023-01-22 19:12:12.623138: step: 378/459, loss: 0.011454187333583832 2023-01-22 19:12:13.228308: step: 380/459, loss: 0.1160157322883606 2023-01-22 19:12:13.890038: step: 382/459, loss: 0.003211735049262643 2023-01-22 19:12:14.466772: step: 384/459, loss: 0.034470874816179276 2023-01-22 19:12:15.170486: step: 386/459, loss: 0.010337111540138721 2023-01-22 19:12:15.774616: step: 388/459, loss: 0.029090505093336105 2023-01-22 19:12:16.410029: step: 390/459, loss: 0.0007611605105921626 2023-01-22 19:12:16.960960: step: 392/459, loss: 0.02683228813111782 2023-01-22 19:12:17.582687: step: 394/459, loss: 0.04046817868947983 2023-01-22 19:12:18.215935: step: 396/459, loss: 0.013394040986895561 2023-01-22 19:12:18.832007: step: 398/459, loss: 0.01811261847615242 2023-01-22 19:12:19.437214: step: 400/459, loss: 0.08354783058166504 2023-01-22 19:12:20.073201: step: 402/459, loss: 0.002612548880279064 2023-01-22 19:12:20.666650: step: 404/459, loss: 0.012856760993599892 2023-01-22 19:12:21.349725: step: 406/459, loss: 5.098912515677512e-05 2023-01-22 19:12:22.023933: step: 408/459, loss: 0.009215991012752056 2023-01-22 19:12:22.676996: step: 410/459, loss: 0.0013455512234941125 2023-01-22 19:12:23.274609: step: 412/459, loss: 0.025162270292639732 2023-01-22 19:12:23.934633: step: 414/459, loss: 0.003719278145581484 2023-01-22 19:12:24.518417: step: 416/459, loss: 1.2579496797116008e-05 2023-01-22 19:12:25.060968: step: 418/459, loss: 0.00271070608869195 2023-01-22 19:12:25.630982: step: 420/459, loss: 0.019811363890767097 2023-01-22 19:12:26.268258: step: 422/459, loss: 0.027245912700891495 2023-01-22 19:12:26.978961: step: 424/459, loss: 0.002719248877838254 2023-01-22 19:12:27.572921: step: 426/459, loss: 0.03291086480021477 2023-01-22 19:12:28.199383: step: 428/459, loss: 0.0011206952622160316 2023-01-22 19:12:28.777465: step: 430/459, loss: 0.04765656590461731 2023-01-22 19:12:29.388738: step: 432/459, loss: 0.0011404193937778473 2023-01-22 19:12:29.961345: step: 434/459, loss: 0.0003687937860377133 2023-01-22 19:12:30.618994: step: 436/459, loss: 0.0014551873318850994 2023-01-22 19:12:31.225261: step: 438/459, loss: 0.02300163172185421 2023-01-22 19:12:31.876374: step: 440/459, loss: 0.001987803727388382 2023-01-22 19:12:32.470412: step: 442/459, loss: 0.011005443520843983 2023-01-22 19:12:33.056621: step: 444/459, loss: 0.03987272456288338 2023-01-22 19:12:33.654966: step: 446/459, loss: 0.0005207843496464193 2023-01-22 19:12:34.280503: step: 448/459, loss: 0.0022412845864892006 2023-01-22 19:12:34.900052: step: 450/459, loss: 0.004839078523218632 2023-01-22 19:12:35.496054: step: 452/459, loss: 0.0025366689078509808 2023-01-22 19:12:36.176337: step: 454/459, loss: 0.37815719842910767 2023-01-22 19:12:36.764235: step: 456/459, loss: 0.0009496755665168166 2023-01-22 19:12:37.420860: step: 458/459, loss: 0.05110325291752815 2023-01-22 19:12:37.981323: step: 460/459, loss: 6.529894017148763e-05 2023-01-22 19:12:38.562582: step: 462/459, loss: 0.0026653846725821495 2023-01-22 19:12:39.158008: step: 464/459, loss: 0.008639704436063766 2023-01-22 19:12:39.777985: step: 466/459, loss: 0.001002115081064403 2023-01-22 19:12:40.419400: step: 468/459, loss: 0.006393656600266695 2023-01-22 19:12:41.040902: step: 470/459, loss: 0.0016275510424748063 2023-01-22 19:12:41.569422: step: 472/459, loss: 0.03586231544613838 2023-01-22 19:12:42.191290: step: 474/459, loss: 0.06398405879735947 2023-01-22 19:12:42.762624: step: 476/459, loss: 0.0011268685339018703 2023-01-22 19:12:43.400869: step: 478/459, loss: 0.0011905297869816422 2023-01-22 19:12:43.970199: step: 480/459, loss: 0.00014914489293005317 2023-01-22 19:12:44.664636: step: 482/459, loss: 0.05993957445025444 2023-01-22 19:12:45.249893: step: 484/459, loss: 0.012571249157190323 2023-01-22 19:12:45.853379: step: 486/459, loss: 0.002577803563326597 2023-01-22 19:12:46.469550: step: 488/459, loss: 0.0002601702872198075 2023-01-22 19:12:47.102833: step: 490/459, loss: 0.001235205098055303 2023-01-22 19:12:47.657544: step: 492/459, loss: 0.02994113229215145 2023-01-22 19:12:48.227229: step: 494/459, loss: 0.0036656386218965054 2023-01-22 19:12:48.775935: step: 496/459, loss: 9.281550592277199e-05 2023-01-22 19:12:49.481110: step: 498/459, loss: 0.001852396409958601 2023-01-22 19:12:50.113591: step: 500/459, loss: 5.618742943624966e-05 2023-01-22 19:12:50.786168: step: 502/459, loss: 0.017524823546409607 2023-01-22 19:12:51.360235: step: 504/459, loss: 0.014185325242578983 2023-01-22 19:12:51.959573: step: 506/459, loss: 0.00035912118619307876 2023-01-22 19:12:52.619897: step: 508/459, loss: 0.003040154231712222 2023-01-22 19:12:53.230226: step: 510/459, loss: 0.00033111032098531723 2023-01-22 19:12:53.854061: step: 512/459, loss: 0.0028772728983312845 2023-01-22 19:12:54.490017: step: 514/459, loss: 0.0068571483716368675 2023-01-22 19:12:55.087313: step: 516/459, loss: 0.02742450125515461 2023-01-22 19:12:55.697345: step: 518/459, loss: 0.001856572343967855 2023-01-22 19:12:56.231417: step: 520/459, loss: 0.004631341900676489 2023-01-22 19:12:56.824893: step: 522/459, loss: 0.0015659175114706159 2023-01-22 19:12:57.430418: step: 524/459, loss: 0.00260421191342175 2023-01-22 19:12:57.972139: step: 526/459, loss: 0.05457228049635887 2023-01-22 19:12:58.593665: step: 528/459, loss: 0.00144168920814991 2023-01-22 19:12:59.275754: step: 530/459, loss: 0.09097301214933395 2023-01-22 19:12:59.827822: step: 532/459, loss: 0.00891572330147028 2023-01-22 19:13:00.498317: step: 534/459, loss: 0.010730738751590252 2023-01-22 19:13:01.083635: step: 536/459, loss: 0.0034593124873936176 2023-01-22 19:13:01.727692: step: 538/459, loss: 0.05444441735744476 2023-01-22 19:13:02.326189: step: 540/459, loss: 0.0024101706221699715 2023-01-22 19:13:02.869619: step: 542/459, loss: 6.824840238550678e-05 2023-01-22 19:13:03.385018: step: 544/459, loss: 5.625570338452235e-05 2023-01-22 19:13:03.905854: step: 546/459, loss: 0.02047671377658844 2023-01-22 19:13:04.481630: step: 548/459, loss: 0.006199614144861698 2023-01-22 19:13:05.182834: step: 550/459, loss: 0.0011529317125678062 2023-01-22 19:13:05.797875: step: 552/459, loss: 0.011933153495192528 2023-01-22 19:13:06.401409: step: 554/459, loss: 0.009431042708456516 2023-01-22 19:13:07.009954: step: 556/459, loss: 0.02049216628074646 2023-01-22 19:13:07.590969: step: 558/459, loss: 0.010350491851568222 2023-01-22 19:13:08.167470: step: 560/459, loss: 0.0004656836681533605 2023-01-22 19:13:08.785412: step: 562/459, loss: 0.0035360681358724833 2023-01-22 19:13:09.348171: step: 564/459, loss: 0.008260991424322128 2023-01-22 19:13:09.929929: step: 566/459, loss: 0.025157196447253227 2023-01-22 19:13:10.514922: step: 568/459, loss: 1.275471004191786e-05 2023-01-22 19:13:11.131206: step: 570/459, loss: 0.025495076552033424 2023-01-22 19:13:11.701174: step: 572/459, loss: 0.00011892819020431489 2023-01-22 19:13:12.326669: step: 574/459, loss: 0.003425747388973832 2023-01-22 19:13:12.891828: step: 576/459, loss: 0.004865691997110844 2023-01-22 19:13:13.477524: step: 578/459, loss: 0.08791067451238632 2023-01-22 19:13:14.111967: step: 580/459, loss: 0.0020610648207366467 2023-01-22 19:13:14.804794: step: 582/459, loss: 0.0020229073707014322 2023-01-22 19:13:15.479043: step: 584/459, loss: 0.0023387332912534475 2023-01-22 19:13:16.174623: step: 586/459, loss: 0.030809808522462845 2023-01-22 19:13:16.768821: step: 588/459, loss: 7.360022573266178e-05 2023-01-22 19:13:17.460632: step: 590/459, loss: 0.04503679275512695 2023-01-22 19:13:18.110422: step: 592/459, loss: 0.0002938195248134434 2023-01-22 19:13:18.749654: step: 594/459, loss: 0.04089610278606415 2023-01-22 19:13:19.392084: step: 596/459, loss: 0.01044928003102541 2023-01-22 19:13:20.020533: step: 598/459, loss: 0.0005596872069872916 2023-01-22 19:13:20.708543: step: 600/459, loss: 0.02185036800801754 2023-01-22 19:13:21.280224: step: 602/459, loss: 0.0022277156822383404 2023-01-22 19:13:21.934541: step: 604/459, loss: 0.012905878946185112 2023-01-22 19:13:22.525027: step: 606/459, loss: 0.00025757242110557854 2023-01-22 19:13:23.123926: step: 608/459, loss: 0.0009602593490853906 2023-01-22 19:13:23.683222: step: 610/459, loss: 0.0017295194556936622 2023-01-22 19:13:24.271425: step: 612/459, loss: 0.024534033611416817 2023-01-22 19:13:24.906578: step: 614/459, loss: 0.00040060788160189986 2023-01-22 19:13:25.590440: step: 616/459, loss: 0.017137721180915833 2023-01-22 19:13:26.156670: step: 618/459, loss: 0.008931536227464676 2023-01-22 19:13:26.788989: step: 620/459, loss: 0.0021084363106638193 2023-01-22 19:13:27.393935: step: 622/459, loss: 0.034648455679416656 2023-01-22 19:13:28.007249: step: 624/459, loss: 0.7541772723197937 2023-01-22 19:13:28.658818: step: 626/459, loss: 0.000265563401626423 2023-01-22 19:13:29.304326: step: 628/459, loss: 0.023972325026988983 2023-01-22 19:13:29.961667: step: 630/459, loss: 0.05955645814538002 2023-01-22 19:13:30.566691: step: 632/459, loss: 0.0013985661789774895 2023-01-22 19:13:31.232471: step: 634/459, loss: 0.09330250322818756 2023-01-22 19:13:31.843221: step: 636/459, loss: 0.014810984022915363 2023-01-22 19:13:32.474866: step: 638/459, loss: 0.0004922007792629302 2023-01-22 19:13:33.077172: step: 640/459, loss: 0.08154386281967163 2023-01-22 19:13:33.751056: step: 642/459, loss: 0.008685470558702946 2023-01-22 19:13:34.397849: step: 644/459, loss: 0.00310116121545434 2023-01-22 19:13:35.030175: step: 646/459, loss: 0.0006133219576440752 2023-01-22 19:13:35.649101: step: 648/459, loss: 0.00011989317135885358 2023-01-22 19:13:36.311056: step: 650/459, loss: 0.026301175355911255 2023-01-22 19:13:36.885187: step: 652/459, loss: 0.02024342119693756 2023-01-22 19:13:37.473686: step: 654/459, loss: 0.0002731801650952548 2023-01-22 19:13:38.025397: step: 656/459, loss: 0.0012140782782807946 2023-01-22 19:13:38.585102: step: 658/459, loss: 0.016286687925457954 2023-01-22 19:13:39.204604: step: 660/459, loss: 0.006569011136889458 2023-01-22 19:13:39.887564: step: 662/459, loss: 0.0017129273619502783 2023-01-22 19:13:40.488531: step: 664/459, loss: 0.004905332811176777 2023-01-22 19:13:41.117353: step: 666/459, loss: 0.008937446400523186 2023-01-22 19:13:41.730472: step: 668/459, loss: 0.010578080080449581 2023-01-22 19:13:42.374340: step: 670/459, loss: 0.13776393234729767 2023-01-22 19:13:42.993767: step: 672/459, loss: 0.006201405543833971 2023-01-22 19:13:43.584649: step: 674/459, loss: 0.03636515513062477 2023-01-22 19:13:44.149422: step: 676/459, loss: 0.009548717178404331 2023-01-22 19:13:44.814712: step: 678/459, loss: 0.009049579501152039 2023-01-22 19:13:45.475560: step: 680/459, loss: 0.021459966897964478 2023-01-22 19:13:46.201541: step: 682/459, loss: 0.018932171165943146 2023-01-22 19:13:46.841923: step: 684/459, loss: 0.0173687394708395 2023-01-22 19:13:47.575299: step: 686/459, loss: 0.03434179350733757 2023-01-22 19:13:48.176104: step: 688/459, loss: 0.024805426597595215 2023-01-22 19:13:48.774166: step: 690/459, loss: 0.0012282003881409764 2023-01-22 19:13:49.425350: step: 692/459, loss: 0.0030040296260267496 2023-01-22 19:13:49.986976: step: 694/459, loss: 0.00032702332828193903 2023-01-22 19:13:50.659947: step: 696/459, loss: 0.09606762230396271 2023-01-22 19:13:51.272240: step: 698/459, loss: 0.0017118348041549325 2023-01-22 19:13:51.837462: step: 700/459, loss: 0.003397963475435972 2023-01-22 19:13:52.428110: step: 702/459, loss: 0.019396042451262474 2023-01-22 19:13:53.059663: step: 704/459, loss: 0.012716390192508698 2023-01-22 19:13:53.699534: step: 706/459, loss: 0.0028107124380767345 2023-01-22 19:13:54.292653: step: 708/459, loss: 0.001132724923081696 2023-01-22 19:13:54.884050: step: 710/459, loss: 0.17383654415607452 2023-01-22 19:13:55.571106: step: 712/459, loss: 0.0005768447881564498 2023-01-22 19:13:56.153223: step: 714/459, loss: 0.023314569145441055 2023-01-22 19:13:56.759094: step: 716/459, loss: 0.01854575239121914 2023-01-22 19:13:57.349069: step: 718/459, loss: 0.004390773829072714 2023-01-22 19:13:57.964129: step: 720/459, loss: 0.0082683265209198 2023-01-22 19:13:58.583963: step: 722/459, loss: 0.0025655743665993214 2023-01-22 19:13:59.336578: step: 724/459, loss: 0.003744196379557252 2023-01-22 19:13:59.994847: step: 726/459, loss: 0.00011976621317444369 2023-01-22 19:14:00.617179: step: 728/459, loss: 0.009961050003767014 2023-01-22 19:14:01.232744: step: 730/459, loss: 0.01308141928166151 2023-01-22 19:14:01.895751: step: 732/459, loss: 0.027339814230799675 2023-01-22 19:14:02.548767: step: 734/459, loss: 0.021683447062969208 2023-01-22 19:14:03.144060: step: 736/459, loss: 0.012079430744051933 2023-01-22 19:14:03.702414: step: 738/459, loss: 0.026043415069580078 2023-01-22 19:14:04.344644: step: 740/459, loss: 0.0024703717790544033 2023-01-22 19:14:04.971915: step: 742/459, loss: 0.015335616655647755 2023-01-22 19:14:05.563609: step: 744/459, loss: 0.001847943291068077 2023-01-22 19:14:06.217983: step: 746/459, loss: 0.1671382635831833 2023-01-22 19:14:06.833028: step: 748/459, loss: 0.000885050802025944 2023-01-22 19:14:07.411037: step: 750/459, loss: 0.029000427573919296 2023-01-22 19:14:07.996130: step: 752/459, loss: 0.017432978376746178 2023-01-22 19:14:08.644544: step: 754/459, loss: 0.0025452347472310066 2023-01-22 19:14:09.215322: step: 756/459, loss: 0.003687456948682666 2023-01-22 19:14:09.854745: step: 758/459, loss: 0.03039696253836155 2023-01-22 19:14:10.425010: step: 760/459, loss: 0.001225016312673688 2023-01-22 19:14:11.050612: step: 762/459, loss: 0.03921025991439819 2023-01-22 19:14:11.625550: step: 764/459, loss: 0.00020219004363752902 2023-01-22 19:14:12.200906: step: 766/459, loss: 0.03375029191374779 2023-01-22 19:14:12.775594: step: 768/459, loss: 0.003654028056189418 2023-01-22 19:14:13.435225: step: 770/459, loss: 0.0039037426467984915 2023-01-22 19:14:14.162053: step: 772/459, loss: 0.022842999547719955 2023-01-22 19:14:14.751753: step: 774/459, loss: 0.0011949286563321948 2023-01-22 19:14:15.282419: step: 776/459, loss: 0.0010306513868272305 2023-01-22 19:14:15.902424: step: 778/459, loss: 0.013710375875234604 2023-01-22 19:14:16.502151: step: 780/459, loss: 0.0003295237256679684 2023-01-22 19:14:17.085776: step: 782/459, loss: 0.0021334688644856215 2023-01-22 19:14:17.726861: step: 784/459, loss: 0.010278108529746532 2023-01-22 19:14:18.347348: step: 786/459, loss: 0.0008902062545530498 2023-01-22 19:14:18.939336: step: 788/459, loss: 0.03792185336351395 2023-01-22 19:14:19.598561: step: 790/459, loss: 0.00552498921751976 2023-01-22 19:14:20.253362: step: 792/459, loss: 0.10399787873029709 2023-01-22 19:14:20.878369: step: 794/459, loss: 0.00025935404119081795 2023-01-22 19:14:21.450769: step: 796/459, loss: 0.0023160853888839483 2023-01-22 19:14:22.053894: step: 798/459, loss: 1.641603375901468e-05 2023-01-22 19:14:22.692546: step: 800/459, loss: 0.010787367820739746 2023-01-22 19:14:23.293860: step: 802/459, loss: 0.00877799466252327 2023-01-22 19:14:23.913755: step: 804/459, loss: 0.006426048930734396 2023-01-22 19:14:24.594430: step: 806/459, loss: 0.010080251842737198 2023-01-22 19:14:25.226375: step: 808/459, loss: 0.0016062819631770253 2023-01-22 19:14:25.832436: step: 810/459, loss: 0.0014336182503029704 2023-01-22 19:14:26.456434: step: 812/459, loss: 0.0006746207363903522 2023-01-22 19:14:26.992488: step: 814/459, loss: 0.008706176653504372 2023-01-22 19:14:27.658183: step: 816/459, loss: 0.0006371550844050944 2023-01-22 19:14:28.276204: step: 818/459, loss: 0.004563131835311651 2023-01-22 19:14:28.847005: step: 820/459, loss: 0.005527023691684008 2023-01-22 19:14:29.443257: step: 822/459, loss: 0.11471506953239441 2023-01-22 19:14:30.070392: step: 824/459, loss: 0.0003642875817604363 2023-01-22 19:14:30.683263: step: 826/459, loss: 0.006215618923306465 2023-01-22 19:14:31.283844: step: 828/459, loss: 0.040750645101070404 2023-01-22 19:14:31.946634: step: 830/459, loss: 0.03130358085036278 2023-01-22 19:14:32.528742: step: 832/459, loss: 0.01972520723938942 2023-01-22 19:14:33.140786: step: 834/459, loss: 0.7685978412628174 2023-01-22 19:14:33.731131: step: 836/459, loss: 0.00593677069991827 2023-01-22 19:14:34.412800: step: 838/459, loss: 0.005698087625205517 2023-01-22 19:14:35.037973: step: 840/459, loss: 0.005444688256829977 2023-01-22 19:14:35.583584: step: 842/459, loss: 1.605882243893575e-05 2023-01-22 19:14:36.253748: step: 844/459, loss: 0.0642928034067154 2023-01-22 19:14:36.865188: step: 846/459, loss: 0.09309759736061096 2023-01-22 19:14:37.505372: step: 848/459, loss: 0.016632448881864548 2023-01-22 19:14:38.086920: step: 850/459, loss: 0.00444350391626358 2023-01-22 19:14:38.674413: step: 852/459, loss: 0.0023615530226379633 2023-01-22 19:14:39.310439: step: 854/459, loss: 0.03691762313246727 2023-01-22 19:14:39.932875: step: 856/459, loss: 0.0070602670311927795 2023-01-22 19:14:40.639918: step: 858/459, loss: 0.007347792852669954 2023-01-22 19:14:41.248226: step: 860/459, loss: 0.0016333345556631684 2023-01-22 19:14:41.847023: step: 862/459, loss: 0.025699136778712273 2023-01-22 19:14:42.443437: step: 864/459, loss: 0.0024428211618214846 2023-01-22 19:14:43.057960: step: 866/459, loss: 0.0001375576830469072 2023-01-22 19:14:43.650234: step: 868/459, loss: 0.030234012752771378 2023-01-22 19:14:44.240918: step: 870/459, loss: 0.006866815034300089 2023-01-22 19:14:44.845823: step: 872/459, loss: 0.13505268096923828 2023-01-22 19:14:45.469636: step: 874/459, loss: 0.008035620674490929 2023-01-22 19:14:46.118739: step: 876/459, loss: 0.017099658027291298 2023-01-22 19:14:46.732921: step: 878/459, loss: 0.007608567830175161 2023-01-22 19:14:47.314271: step: 880/459, loss: 0.03425966575741768 2023-01-22 19:14:47.943484: step: 882/459, loss: 0.028672805055975914 2023-01-22 19:14:48.529375: step: 884/459, loss: 0.05764683336019516 2023-01-22 19:14:49.162784: step: 886/459, loss: 0.0021642702631652355 2023-01-22 19:14:49.756286: step: 888/459, loss: 0.0014052583137527108 2023-01-22 19:14:50.375599: step: 890/459, loss: 0.10809032618999481 2023-01-22 19:14:50.986566: step: 892/459, loss: 0.0010390537790954113 2023-01-22 19:14:51.589268: step: 894/459, loss: 0.003641548566520214 2023-01-22 19:14:52.211510: step: 896/459, loss: 0.05360041931271553 2023-01-22 19:14:52.828787: step: 898/459, loss: 2.4487530936312396e-07 2023-01-22 19:14:53.469173: step: 900/459, loss: 1.356717824935913 2023-01-22 19:14:54.086305: step: 902/459, loss: 0.0009602071368135512 2023-01-22 19:14:54.716612: step: 904/459, loss: 0.0771937295794487 2023-01-22 19:14:55.309482: step: 906/459, loss: 0.01959819532930851 2023-01-22 19:14:55.939273: step: 908/459, loss: 2.315142955922056e-05 2023-01-22 19:14:56.492196: step: 910/459, loss: 0.008849194273352623 2023-01-22 19:14:57.102407: step: 912/459, loss: 0.03140265867114067 2023-01-22 19:14:57.767640: step: 914/459, loss: 0.01763157919049263 2023-01-22 19:14:58.407465: step: 916/459, loss: 0.0006331168115139008 2023-01-22 19:14:59.018676: step: 918/459, loss: 0.0021960223093628883 2023-01-22 19:14:59.440248: step: 920/459, loss: 0.001750864670611918 ================================================== Loss: 0.025 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2790537370280415, 'r': 0.34259538492816477, 'f1': 0.30757711730348014}, 'combined': 0.22663577064466955, 'epoch': 38} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3237796379046777, 'r': 0.3181819917534594, 'f1': 0.32095641022216786}, 'combined': 0.2054121025421874, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.277958742594701, 'r': 0.35496439044826145, 'f1': 0.3117770562770563}, 'combined': 0.22973046251993623, 'epoch': 38} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3290221103521684, 'r': 0.3191424655463253, 'f1': 0.3240069927347912}, 'combined': 0.20736447535026634, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2937667560916453, 'r': 0.34282078746937733, 'f1': 0.3164037740741889}, 'combined': 0.23313962300203392, 'epoch': 38} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3325135106428469, 'r': 0.3337270636013974, 'f1': 0.3331191818826335}, 'combined': 0.23884016814226555, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2185792349726776, 'r': 0.38095238095238093, 'f1': 0.27777777777777773}, 'combined': 0.18518518518518515, 'epoch': 38} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.19230769230769232, 'r': 0.32608695652173914, 'f1': 0.24193548387096775}, 'combined': 0.12096774193548387, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.20689655172413793, 'f1': 0.2553191489361702}, 'combined': 0.17021276595744678, 'epoch': 38} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 19:17:35.615807: step: 2/459, loss: 0.007301489822566509 2023-01-22 19:17:36.189558: step: 4/459, loss: 0.00015940192679408938 2023-01-22 19:17:36.800472: step: 6/459, loss: 0.01295709889382124 2023-01-22 19:17:37.519621: step: 8/459, loss: 0.000521622016094625 2023-01-22 19:17:38.150400: step: 10/459, loss: 0.014655235223472118 2023-01-22 19:17:38.780497: step: 12/459, loss: 0.014525082893669605 2023-01-22 19:17:39.376082: step: 14/459, loss: 0.0017325755907222629 2023-01-22 19:17:40.011771: step: 16/459, loss: 0.04595445096492767 2023-01-22 19:17:40.628863: step: 18/459, loss: 0.0038306049536913633 2023-01-22 19:17:41.175899: step: 20/459, loss: 0.0014991529751569033 2023-01-22 19:17:41.731590: step: 22/459, loss: 0.0008966930327005684 2023-01-22 19:17:42.351423: step: 24/459, loss: 0.2120210826396942 2023-01-22 19:17:42.956952: step: 26/459, loss: 0.0001686650502961129 2023-01-22 19:17:43.605467: step: 28/459, loss: 0.007546348962932825 2023-01-22 19:17:44.224185: step: 30/459, loss: 0.000562879431527108 2023-01-22 19:17:44.810935: step: 32/459, loss: 0.002409936860203743 2023-01-22 19:17:45.463702: step: 34/459, loss: 0.0006999563775025308 2023-01-22 19:17:46.121018: step: 36/459, loss: 0.0007537674391642213 2023-01-22 19:17:46.692058: step: 38/459, loss: 0.0041891057044267654 2023-01-22 19:17:47.266037: step: 40/459, loss: 0.06318341940641403 2023-01-22 19:17:47.868735: step: 42/459, loss: 0.011479665525257587 2023-01-22 19:17:48.474721: step: 44/459, loss: 0.0006509727681986988 2023-01-22 19:17:49.030931: step: 46/459, loss: 0.0006689688307233155 2023-01-22 19:17:49.693597: step: 48/459, loss: 0.005811736453324556 2023-01-22 19:17:50.294607: step: 50/459, loss: 0.0004670723283197731 2023-01-22 19:17:50.919242: step: 52/459, loss: 0.17050127685070038 2023-01-22 19:17:51.508672: step: 54/459, loss: 0.005608849693089724 2023-01-22 19:17:52.112277: step: 56/459, loss: 0.0005789062124677002 2023-01-22 19:17:52.752044: step: 58/459, loss: 0.0032561002299189568 2023-01-22 19:17:53.395745: step: 60/459, loss: 0.00028532702708616853 2023-01-22 19:17:53.937900: step: 62/459, loss: 0.000392756424844265 2023-01-22 19:17:54.496276: step: 64/459, loss: 0.001257691765204072 2023-01-22 19:17:55.079774: step: 66/459, loss: 0.0006795079098083079 2023-01-22 19:17:55.760268: step: 68/459, loss: 0.0004780825402121991 2023-01-22 19:17:56.381117: step: 70/459, loss: 0.006041345652192831 2023-01-22 19:17:56.974889: step: 72/459, loss: 0.011235161684453487 2023-01-22 19:17:57.562787: step: 74/459, loss: 0.001066676457412541 2023-01-22 19:17:58.221552: step: 76/459, loss: 0.002620022976770997 2023-01-22 19:17:58.863098: step: 78/459, loss: 0.008677898906171322 2023-01-22 19:17:59.555680: step: 80/459, loss: 0.0011712387204170227 2023-01-22 19:18:00.204652: step: 82/459, loss: 0.0059568313881754875 2023-01-22 19:18:00.825860: step: 84/459, loss: 0.009516427293419838 2023-01-22 19:18:01.423501: step: 86/459, loss: 0.01827416941523552 2023-01-22 19:18:02.058371: step: 88/459, loss: 0.0008793047745712101 2023-01-22 19:18:02.712364: step: 90/459, loss: 0.0002884698333218694 2023-01-22 19:18:03.333537: step: 92/459, loss: 0.0033862912096083164 2023-01-22 19:18:03.958018: step: 94/459, loss: 0.0005852635949850082 2023-01-22 19:18:04.665334: step: 96/459, loss: 0.0005879533127881587 2023-01-22 19:18:05.260230: step: 98/459, loss: 0.005368327721953392 2023-01-22 19:18:05.863283: step: 100/459, loss: 0.0004207184538245201 2023-01-22 19:18:06.394411: step: 102/459, loss: 0.0056177470833063126 2023-01-22 19:18:07.052592: step: 104/459, loss: 0.0013406620128080249 2023-01-22 19:18:07.672830: step: 106/459, loss: 0.0022170087322592735 2023-01-22 19:18:08.312920: step: 108/459, loss: 0.01584138348698616 2023-01-22 19:18:08.974285: step: 110/459, loss: 0.015838980674743652 2023-01-22 19:18:09.606870: step: 112/459, loss: 3.9941907743923366e-05 2023-01-22 19:18:10.223480: step: 114/459, loss: 1.9424142010393552e-06 2023-01-22 19:18:10.825185: step: 116/459, loss: 0.015470989979803562 2023-01-22 19:18:11.406715: step: 118/459, loss: 0.0011058365926146507 2023-01-22 19:18:11.985684: step: 120/459, loss: 0.0009557371959090233 2023-01-22 19:18:12.593044: step: 122/459, loss: 1.1537063121795654 2023-01-22 19:18:13.237691: step: 124/459, loss: 1.2237632290634792e-05 2023-01-22 19:18:13.798573: step: 126/459, loss: 0.0031172188464552164 2023-01-22 19:18:14.397166: step: 128/459, loss: 1.7047854043994448e-06 2023-01-22 19:18:14.967894: step: 130/459, loss: 0.025969039648771286 2023-01-22 19:18:15.605222: step: 132/459, loss: 0.0008524562581442297 2023-01-22 19:18:16.288029: step: 134/459, loss: 0.00933290459215641 2023-01-22 19:18:16.877097: step: 136/459, loss: 0.0013482649810612202 2023-01-22 19:18:17.471812: step: 138/459, loss: 0.0008390850271098316 2023-01-22 19:18:18.094602: step: 140/459, loss: 0.0010270945494994521 2023-01-22 19:18:18.712545: step: 142/459, loss: 0.0020162644796073437 2023-01-22 19:18:19.291267: step: 144/459, loss: 0.0024447529576718807 2023-01-22 19:18:19.854747: step: 146/459, loss: 0.0008110665949061513 2023-01-22 19:18:20.444765: step: 148/459, loss: 0.0006972748669795692 2023-01-22 19:18:21.064525: step: 150/459, loss: 0.0020280792377889156 2023-01-22 19:18:21.642885: step: 152/459, loss: 0.0009950186358764768 2023-01-22 19:18:22.286833: step: 154/459, loss: 0.07835391163825989 2023-01-22 19:18:22.860011: step: 156/459, loss: 0.002351280301809311 2023-01-22 19:18:23.468792: step: 158/459, loss: 0.0067847673781216145 2023-01-22 19:18:24.110725: step: 160/459, loss: 5.974522355245426e-05 2023-01-22 19:18:24.655527: step: 162/459, loss: 0.001417831052094698 2023-01-22 19:18:25.253014: step: 164/459, loss: 0.006184965372085571 2023-01-22 19:18:25.911707: step: 166/459, loss: 0.004486329387873411 2023-01-22 19:18:26.525208: step: 168/459, loss: 0.00047635953524149954 2023-01-22 19:18:27.154308: step: 170/459, loss: 0.03250766918063164 2023-01-22 19:18:27.764763: step: 172/459, loss: 0.001894989749416709 2023-01-22 19:18:28.350680: step: 174/459, loss: 0.000276221486274153 2023-01-22 19:18:28.957741: step: 176/459, loss: 0.007496872451156378 2023-01-22 19:18:29.529376: step: 178/459, loss: 0.001469940529204905 2023-01-22 19:18:30.127288: step: 180/459, loss: 0.1308003067970276 2023-01-22 19:18:30.746261: step: 182/459, loss: 0.0028868522495031357 2023-01-22 19:18:31.349082: step: 184/459, loss: 0.003835164476186037 2023-01-22 19:18:31.977821: step: 186/459, loss: 0.00023708329536020756 2023-01-22 19:18:32.563643: step: 188/459, loss: 0.01061633974313736 2023-01-22 19:18:33.208437: step: 190/459, loss: 0.00011550681665539742 2023-01-22 19:18:33.822650: step: 192/459, loss: 0.004621786531060934 2023-01-22 19:18:34.426250: step: 194/459, loss: 0.004289960488677025 2023-01-22 19:18:35.113159: step: 196/459, loss: 0.013024888001382351 2023-01-22 19:18:35.658188: step: 198/459, loss: 0.005489485803991556 2023-01-22 19:18:36.257231: step: 200/459, loss: 0.0045728543773293495 2023-01-22 19:18:36.860931: step: 202/459, loss: 6.285426206886768e-05 2023-01-22 19:18:37.448701: step: 204/459, loss: 0.0017393670277670026 2023-01-22 19:18:38.015693: step: 206/459, loss: 0.0536370612680912 2023-01-22 19:18:38.683661: step: 208/459, loss: 0.00865218322724104 2023-01-22 19:18:39.331012: step: 210/459, loss: 0.05663987994194031 2023-01-22 19:18:39.892237: step: 212/459, loss: 0.0002444614947307855 2023-01-22 19:18:40.457787: step: 214/459, loss: 0.013910584151744843 2023-01-22 19:18:41.073512: step: 216/459, loss: 0.0008034930215217173 2023-01-22 19:18:41.647092: step: 218/459, loss: 7.720530993537977e-05 2023-01-22 19:18:42.268724: step: 220/459, loss: 0.009862849488854408 2023-01-22 19:18:42.851886: step: 222/459, loss: 6.547991506522521e-05 2023-01-22 19:18:43.461211: step: 224/459, loss: 0.03685590624809265 2023-01-22 19:18:44.020920: step: 226/459, loss: 0.0132658826187253 2023-01-22 19:18:44.649156: step: 228/459, loss: 0.008628790266811848 2023-01-22 19:18:45.188258: step: 230/459, loss: 0.0008758096373640001 2023-01-22 19:18:45.781038: step: 232/459, loss: 0.010995249263942242 2023-01-22 19:18:46.377427: step: 234/459, loss: 0.0006627593538723886 2023-01-22 19:18:46.958189: step: 236/459, loss: 0.0024987307842820883 2023-01-22 19:18:47.535272: step: 238/459, loss: 0.014565236866474152 2023-01-22 19:18:48.110161: step: 240/459, loss: 0.001858954201452434 2023-01-22 19:18:48.807530: step: 242/459, loss: 0.004902820102870464 2023-01-22 19:18:49.438467: step: 244/459, loss: 0.03427562490105629 2023-01-22 19:18:50.009619: step: 246/459, loss: 0.0003202511288691312 2023-01-22 19:18:50.663192: step: 248/459, loss: 0.013900031335651875 2023-01-22 19:18:51.230097: step: 250/459, loss: 0.0004538094508461654 2023-01-22 19:18:51.894197: step: 252/459, loss: 0.09501088410615921 2023-01-22 19:18:52.498697: step: 254/459, loss: 0.007937388494610786 2023-01-22 19:18:53.095762: step: 256/459, loss: 0.0006762384437024593 2023-01-22 19:18:53.719701: step: 258/459, loss: 0.0002967144828289747 2023-01-22 19:18:54.383086: step: 260/459, loss: 0.0007426970405504107 2023-01-22 19:18:54.989563: step: 262/459, loss: 0.009658308699727058 2023-01-22 19:18:55.544313: step: 264/459, loss: 0.002906407928094268 2023-01-22 19:18:56.176830: step: 266/459, loss: 0.001516128540970385 2023-01-22 19:18:56.762808: step: 268/459, loss: 0.014558368362486362 2023-01-22 19:18:57.428028: step: 270/459, loss: 0.029920583590865135 2023-01-22 19:18:58.038634: step: 272/459, loss: 0.010281060822308064 2023-01-22 19:18:58.625341: step: 274/459, loss: 0.0002331180003238842 2023-01-22 19:18:59.228556: step: 276/459, loss: 0.0026839228812605143 2023-01-22 19:18:59.809691: step: 278/459, loss: 0.0030908002518117428 2023-01-22 19:19:00.441318: step: 280/459, loss: 0.0024856680538505316 2023-01-22 19:19:01.129409: step: 282/459, loss: 6.907279748702422e-05 2023-01-22 19:19:01.767958: step: 284/459, loss: 0.013339214026927948 2023-01-22 19:19:02.388008: step: 286/459, loss: 0.005677344743162394 2023-01-22 19:19:02.942928: step: 288/459, loss: 0.006467596627771854 2023-01-22 19:19:03.614647: step: 290/459, loss: 0.03848528116941452 2023-01-22 19:19:04.236775: step: 292/459, loss: 0.008108670823276043 2023-01-22 19:19:04.818583: step: 294/459, loss: 0.033931832760572433 2023-01-22 19:19:05.403297: step: 296/459, loss: 1.69537597685121e-05 2023-01-22 19:19:06.059404: step: 298/459, loss: 0.0002907203743234277 2023-01-22 19:19:06.669803: step: 300/459, loss: 0.004349041264504194 2023-01-22 19:19:07.226529: step: 302/459, loss: 0.0007815213757567108 2023-01-22 19:19:07.854579: step: 304/459, loss: 3.4278305975021794e-05 2023-01-22 19:19:08.442470: step: 306/459, loss: 0.001242628088220954 2023-01-22 19:19:09.099215: step: 308/459, loss: 0.012683999724686146 2023-01-22 19:19:09.661402: step: 310/459, loss: 0.0005079153925180435 2023-01-22 19:19:10.295778: step: 312/459, loss: 0.09746459126472473 2023-01-22 19:19:10.914907: step: 314/459, loss: 0.00011953496868954971 2023-01-22 19:19:11.450959: step: 316/459, loss: 0.0007622343255206943 2023-01-22 19:19:12.105612: step: 318/459, loss: 0.025204699486494064 2023-01-22 19:19:12.799595: step: 320/459, loss: 0.009929354302585125 2023-01-22 19:19:13.417802: step: 322/459, loss: 0.0009053420508280396 2023-01-22 19:19:14.036175: step: 324/459, loss: 0.003917005844414234 2023-01-22 19:19:14.651525: step: 326/459, loss: 0.0640006884932518 2023-01-22 19:19:15.240603: step: 328/459, loss: 0.009595774114131927 2023-01-22 19:19:15.793391: step: 330/459, loss: 0.03832202032208443 2023-01-22 19:19:16.397197: step: 332/459, loss: 0.00033026470919139683 2023-01-22 19:19:16.914936: step: 334/459, loss: 0.0010530393337830901 2023-01-22 19:19:17.566596: step: 336/459, loss: 0.00022165429254528135 2023-01-22 19:19:18.146948: step: 338/459, loss: 0.0010910318233072758 2023-01-22 19:19:18.718677: step: 340/459, loss: 0.0005458489176817238 2023-01-22 19:19:19.329135: step: 342/459, loss: 0.017693009227514267 2023-01-22 19:19:19.926148: step: 344/459, loss: 0.030153457075357437 2023-01-22 19:19:20.660309: step: 346/459, loss: 0.0022635054774582386 2023-01-22 19:19:21.332444: step: 348/459, loss: 0.015169437974691391 2023-01-22 19:19:21.943941: step: 350/459, loss: 0.011772545985877514 2023-01-22 19:19:22.580537: step: 352/459, loss: 0.06861870735883713 2023-01-22 19:19:23.193707: step: 354/459, loss: 0.005025096703320742 2023-01-22 19:19:23.801865: step: 356/459, loss: 0.10925129801034927 2023-01-22 19:19:24.443502: step: 358/459, loss: 0.0024757797364145517 2023-01-22 19:19:25.045115: step: 360/459, loss: 0.003258957527577877 2023-01-22 19:19:25.637157: step: 362/459, loss: 0.006172328256070614 2023-01-22 19:19:26.318248: step: 364/459, loss: 0.021037200465798378 2023-01-22 19:19:26.842396: step: 366/459, loss: 0.00016891791892703623 2023-01-22 19:19:27.471399: step: 368/459, loss: 0.0009206868126057088 2023-01-22 19:19:28.182064: step: 370/459, loss: 0.012048610486090183 2023-01-22 19:19:28.817800: step: 372/459, loss: 0.002292015589773655 2023-01-22 19:19:29.488331: step: 374/459, loss: 0.008758327923715115 2023-01-22 19:19:30.089992: step: 376/459, loss: 0.0006473036482930183 2023-01-22 19:19:30.772746: step: 378/459, loss: 0.14396916329860687 2023-01-22 19:19:31.381268: step: 380/459, loss: 0.8273727893829346 2023-01-22 19:19:31.988652: step: 382/459, loss: 0.016590429469943047 2023-01-22 19:19:32.553314: step: 384/459, loss: 0.0021926702465862036 2023-01-22 19:19:33.183041: step: 386/459, loss: 0.0005842294194735587 2023-01-22 19:19:33.851113: step: 388/459, loss: 0.019742093980312347 2023-01-22 19:19:34.556315: step: 390/459, loss: 0.009530475363135338 2023-01-22 19:19:35.169110: step: 392/459, loss: 0.010659283958375454 2023-01-22 19:19:35.839459: step: 394/459, loss: 0.003205518703907728 2023-01-22 19:19:36.481398: step: 396/459, loss: 0.0031602566596120596 2023-01-22 19:19:37.043710: step: 398/459, loss: 0.09964288026094437 2023-01-22 19:19:37.656844: step: 400/459, loss: 0.0003283438563812524 2023-01-22 19:19:38.294054: step: 402/459, loss: 0.03000192902982235 2023-01-22 19:19:38.961692: step: 404/459, loss: 0.0044552842155098915 2023-01-22 19:19:39.594451: step: 406/459, loss: 0.031333211809396744 2023-01-22 19:19:40.213243: step: 408/459, loss: 0.001240129815414548 2023-01-22 19:19:40.917711: step: 410/459, loss: 0.004461356438696384 2023-01-22 19:19:41.529744: step: 412/459, loss: 0.011671709828078747 2023-01-22 19:19:42.154482: step: 414/459, loss: 0.0065024965442717075 2023-01-22 19:19:42.724944: step: 416/459, loss: 0.005352785810828209 2023-01-22 19:19:43.368772: step: 418/459, loss: 0.03550714626908302 2023-01-22 19:19:43.966891: step: 420/459, loss: 0.08898473531007767 2023-01-22 19:19:44.582913: step: 422/459, loss: 0.0003013868408743292 2023-01-22 19:19:45.126516: step: 424/459, loss: 0.16281011700630188 2023-01-22 19:19:45.704157: step: 426/459, loss: 7.06224818713963e-05 2023-01-22 19:19:46.257917: step: 428/459, loss: 0.04511960595846176 2023-01-22 19:19:46.849264: step: 430/459, loss: 0.0014231920940801501 2023-01-22 19:19:47.443626: step: 432/459, loss: 0.00668824790045619 2023-01-22 19:19:48.079147: step: 434/459, loss: 0.015284844674170017 2023-01-22 19:19:48.743450: step: 436/459, loss: 0.0009209667914547026 2023-01-22 19:19:49.338425: step: 438/459, loss: 0.009038095362484455 2023-01-22 19:19:49.994332: step: 440/459, loss: 0.10534002631902695 2023-01-22 19:19:50.595885: step: 442/459, loss: 0.012280333787202835 2023-01-22 19:19:51.133644: step: 444/459, loss: 0.0011706982040777802 2023-01-22 19:19:51.734635: step: 446/459, loss: 0.011656678281724453 2023-01-22 19:19:52.400810: step: 448/459, loss: 0.0002552765654399991 2023-01-22 19:19:53.026716: step: 450/459, loss: 3.659675348899327e-05 2023-01-22 19:19:53.657520: step: 452/459, loss: 0.0035348401870578527 2023-01-22 19:19:54.263667: step: 454/459, loss: 8.818191417958587e-05 2023-01-22 19:19:54.840897: step: 456/459, loss: 0.003440374741330743 2023-01-22 19:19:55.402475: step: 458/459, loss: 5.860644159838557e-05 2023-01-22 19:19:55.994453: step: 460/459, loss: 0.014498939737677574 2023-01-22 19:19:56.564315: step: 462/459, loss: 0.4060380458831787 2023-01-22 19:19:57.272763: step: 464/459, loss: 0.02649650163948536 2023-01-22 19:19:57.887399: step: 466/459, loss: 0.012787679210305214 2023-01-22 19:19:58.513807: step: 468/459, loss: 0.014825171791017056 2023-01-22 19:19:59.125926: step: 470/459, loss: 0.005172808654606342 2023-01-22 19:19:59.706852: step: 472/459, loss: 0.0016100323991850019 2023-01-22 19:20:00.416039: step: 474/459, loss: 0.0073200431652367115 2023-01-22 19:20:01.081285: step: 476/459, loss: 0.032878175377845764 2023-01-22 19:20:01.735822: step: 478/459, loss: 0.0783286765217781 2023-01-22 19:20:02.436345: step: 480/459, loss: 0.014577570371329784 2023-01-22 19:20:03.042186: step: 482/459, loss: 0.031108641996979713 2023-01-22 19:20:03.612764: step: 484/459, loss: 0.020595496520400047 2023-01-22 19:20:04.261474: step: 486/459, loss: 0.08638277649879456 2023-01-22 19:20:04.775933: step: 488/459, loss: 0.000876711041200906 2023-01-22 19:20:05.398474: step: 490/459, loss: 0.00770155992358923 2023-01-22 19:20:05.945060: step: 492/459, loss: 0.5839963555335999 2023-01-22 19:20:06.526336: step: 494/459, loss: 0.0005573781090788543 2023-01-22 19:20:07.190374: step: 496/459, loss: 0.0019451279658824205 2023-01-22 19:20:07.885530: step: 498/459, loss: 0.04620463401079178 2023-01-22 19:20:08.498546: step: 500/459, loss: 0.0016123418463394046 2023-01-22 19:20:09.065911: step: 502/459, loss: 0.002842874266207218 2023-01-22 19:20:09.721490: step: 504/459, loss: 0.009150652214884758 2023-01-22 19:20:10.299169: step: 506/459, loss: 0.0046772281639277935 2023-01-22 19:20:11.012584: step: 508/459, loss: 0.0008514671353623271 2023-01-22 19:20:11.621517: step: 510/459, loss: 0.001980863744392991 2023-01-22 19:20:12.275496: step: 512/459, loss: 0.0005660573951900005 2023-01-22 19:20:12.883012: step: 514/459, loss: 0.00039402663242071867 2023-01-22 19:20:13.442025: step: 516/459, loss: 0.015390596352517605 2023-01-22 19:20:14.096636: step: 518/459, loss: 0.00018370266479905695 2023-01-22 19:20:14.701479: step: 520/459, loss: 0.008323798887431622 2023-01-22 19:20:15.261095: step: 522/459, loss: 0.0022099297493696213 2023-01-22 19:20:15.829063: step: 524/459, loss: 0.0030798998195677996 2023-01-22 19:20:16.460152: step: 526/459, loss: 0.0028575460892170668 2023-01-22 19:20:17.082835: step: 528/459, loss: 0.0003580772608984262 2023-01-22 19:20:17.746718: step: 530/459, loss: 0.004742024466395378 2023-01-22 19:20:18.365049: step: 532/459, loss: 0.0012648293050006032 2023-01-22 19:20:18.983551: step: 534/459, loss: 0.0033165388740599155 2023-01-22 19:20:19.577104: step: 536/459, loss: 0.00495228311046958 2023-01-22 19:20:20.188938: step: 538/459, loss: 0.01740996725857258 2023-01-22 19:20:20.863099: step: 540/459, loss: 0.026933908462524414 2023-01-22 19:20:21.506018: step: 542/459, loss: 0.10587113350629807 2023-01-22 19:20:22.123030: step: 544/459, loss: 0.0027195981238037348 2023-01-22 19:20:22.723984: step: 546/459, loss: 0.0011723337229341269 2023-01-22 19:20:23.314508: step: 548/459, loss: 0.0035985473077744246 2023-01-22 19:20:23.941453: step: 550/459, loss: 0.5273443460464478 2023-01-22 19:20:24.506637: step: 552/459, loss: 0.00047122512478381395 2023-01-22 19:20:25.150357: step: 554/459, loss: 0.00011547328176675364 2023-01-22 19:20:25.769094: step: 556/459, loss: 0.008300797082483768 2023-01-22 19:20:26.355838: step: 558/459, loss: 0.0003997618332505226 2023-01-22 19:20:26.955104: step: 560/459, loss: 0.0029881515074521303 2023-01-22 19:20:27.603665: step: 562/459, loss: 0.004744921810925007 2023-01-22 19:20:28.226765: step: 564/459, loss: 0.0004274427192285657 2023-01-22 19:20:28.843887: step: 566/459, loss: 0.04406004771590233 2023-01-22 19:20:29.439921: step: 568/459, loss: 0.03434058651328087 2023-01-22 19:20:30.106012: step: 570/459, loss: 0.0032955422066152096 2023-01-22 19:20:30.732797: step: 572/459, loss: 0.002858051098883152 2023-01-22 19:20:31.345074: step: 574/459, loss: 0.0005602427991107106 2023-01-22 19:20:31.932893: step: 576/459, loss: 0.0012408617185428739 2023-01-22 19:20:32.570547: step: 578/459, loss: 0.011478239670395851 2023-01-22 19:20:33.201599: step: 580/459, loss: 0.00400835694745183 2023-01-22 19:20:33.773745: step: 582/459, loss: 0.061305589973926544 2023-01-22 19:20:34.423028: step: 584/459, loss: 0.07847490906715393 2023-01-22 19:20:35.051107: step: 586/459, loss: 0.00023004700778983533 2023-01-22 19:20:35.638977: step: 588/459, loss: 0.000609296839684248 2023-01-22 19:20:36.281513: step: 590/459, loss: 0.04806961119174957 2023-01-22 19:20:36.881929: step: 592/459, loss: 0.04460865259170532 2023-01-22 19:20:37.501904: step: 594/459, loss: 0.0011691129766404629 2023-01-22 19:20:38.149914: step: 596/459, loss: 0.17956924438476562 2023-01-22 19:20:38.766628: step: 598/459, loss: 0.012191125191748142 2023-01-22 19:20:39.380193: step: 600/459, loss: 0.00017307446978520602 2023-01-22 19:20:39.974638: step: 602/459, loss: 0.0003573465801309794 2023-01-22 19:20:40.572579: step: 604/459, loss: 0.9933748245239258 2023-01-22 19:20:41.181115: step: 606/459, loss: 4.324866313254461e-05 2023-01-22 19:20:41.787813: step: 608/459, loss: 0.006552404724061489 2023-01-22 19:20:42.494691: step: 610/459, loss: 4.833039565710351e-05 2023-01-22 19:20:43.106595: step: 612/459, loss: 0.0021120072342455387 2023-01-22 19:20:43.726572: step: 614/459, loss: 0.005490239709615707 2023-01-22 19:20:44.365251: step: 616/459, loss: 0.27845752239227295 2023-01-22 19:20:44.954905: step: 618/459, loss: 0.005775816738605499 2023-01-22 19:20:45.603344: step: 620/459, loss: 0.0010374662233516574 2023-01-22 19:20:46.192618: step: 622/459, loss: 0.001059019356034696 2023-01-22 19:20:46.766951: step: 624/459, loss: 0.004184341058135033 2023-01-22 19:20:47.386593: step: 626/459, loss: 0.00022050348343327641 2023-01-22 19:20:47.937538: step: 628/459, loss: 5.4070282203610986e-05 2023-01-22 19:20:48.495007: step: 630/459, loss: 0.0006534252315759659 2023-01-22 19:20:49.131088: step: 632/459, loss: 0.022299746051430702 2023-01-22 19:20:49.771057: step: 634/459, loss: 0.009642080403864384 2023-01-22 19:20:50.399855: step: 636/459, loss: 0.00048644133494235575 2023-01-22 19:20:51.040866: step: 638/459, loss: 0.0005062410491518676 2023-01-22 19:20:51.698065: step: 640/459, loss: 0.006695735268294811 2023-01-22 19:20:52.284833: step: 642/459, loss: 8.456659270450473e-05 2023-01-22 19:20:52.876412: step: 644/459, loss: 0.019917253404855728 2023-01-22 19:20:53.467264: step: 646/459, loss: 0.4215138554573059 2023-01-22 19:20:54.027770: step: 648/459, loss: 0.0034047579392790794 2023-01-22 19:20:54.638694: step: 650/459, loss: 0.023329805582761765 2023-01-22 19:20:55.279377: step: 652/459, loss: 0.010901868343353271 2023-01-22 19:20:55.942381: step: 654/459, loss: 0.0032258108258247375 2023-01-22 19:20:56.542953: step: 656/459, loss: 0.000988306594081223 2023-01-22 19:20:57.175714: step: 658/459, loss: 0.0006651802104897797 2023-01-22 19:20:57.767560: step: 660/459, loss: 0.003264207625761628 2023-01-22 19:20:58.322710: step: 662/459, loss: 0.003445685375481844 2023-01-22 19:20:58.990626: step: 664/459, loss: 0.05988955870270729 2023-01-22 19:20:59.570488: step: 666/459, loss: 0.00031321917776949704 2023-01-22 19:21:00.196879: step: 668/459, loss: 0.0757087990641594 2023-01-22 19:21:00.838197: step: 670/459, loss: 0.0010914587182924151 2023-01-22 19:21:01.385582: step: 672/459, loss: 0.13831661641597748 2023-01-22 19:21:02.041876: step: 674/459, loss: 0.05522604659199715 2023-01-22 19:21:02.680526: step: 676/459, loss: 0.004317151382565498 2023-01-22 19:21:03.307608: step: 678/459, loss: 0.0014591302024200559 2023-01-22 19:21:03.882888: step: 680/459, loss: 0.01147803757339716 2023-01-22 19:21:04.516753: step: 682/459, loss: 0.006577746942639351 2023-01-22 19:21:05.137851: step: 684/459, loss: 0.0013405284844338894 2023-01-22 19:21:05.780670: step: 686/459, loss: 0.002778467955067754 2023-01-22 19:21:06.393621: step: 688/459, loss: 0.00837174616754055 2023-01-22 19:21:07.022174: step: 690/459, loss: 0.002426723251119256 2023-01-22 19:21:07.659997: step: 692/459, loss: 0.0015953414840623736 2023-01-22 19:21:08.276509: step: 694/459, loss: 0.002999540651217103 2023-01-22 19:21:08.874816: step: 696/459, loss: 0.004439964424818754 2023-01-22 19:21:09.438347: step: 698/459, loss: 4.081071529071778e-05 2023-01-22 19:21:10.059065: step: 700/459, loss: 0.0026870740111917257 2023-01-22 19:21:10.630257: step: 702/459, loss: 0.0030714329332113266 2023-01-22 19:21:11.262279: step: 704/459, loss: 0.0034012545365840197 2023-01-22 19:21:11.860977: step: 706/459, loss: 0.002030859934166074 2023-01-22 19:21:12.472020: step: 708/459, loss: 0.02282346971333027 2023-01-22 19:21:13.159320: step: 710/459, loss: 0.0010239286348223686 2023-01-22 19:21:13.793656: step: 712/459, loss: 0.0024967966601252556 2023-01-22 19:21:14.419918: step: 714/459, loss: 0.0008362457738257945 2023-01-22 19:21:14.974251: step: 716/459, loss: 0.003690095618367195 2023-01-22 19:21:15.575687: step: 718/459, loss: 0.018830575048923492 2023-01-22 19:21:16.177105: step: 720/459, loss: 0.0013708426849916577 2023-01-22 19:21:16.773584: step: 722/459, loss: 0.003888128325343132 2023-01-22 19:21:17.384201: step: 724/459, loss: 0.044148191809654236 2023-01-22 19:21:17.952414: step: 726/459, loss: 0.013576114550232887 2023-01-22 19:21:18.566706: step: 728/459, loss: 0.0008795576868578792 2023-01-22 19:21:19.150907: step: 730/459, loss: 0.0014861124800518155 2023-01-22 19:21:19.796508: step: 732/459, loss: 0.6209379434585571 2023-01-22 19:21:20.368604: step: 734/459, loss: 0.011750518344342709 2023-01-22 19:21:20.988214: step: 736/459, loss: 0.004871700890362263 2023-01-22 19:21:21.584383: step: 738/459, loss: 0.002737227361649275 2023-01-22 19:21:22.182093: step: 740/459, loss: 0.002994140377268195 2023-01-22 19:21:22.847598: step: 742/459, loss: 0.003438410582020879 2023-01-22 19:21:23.469498: step: 744/459, loss: 0.00023978999524842948 2023-01-22 19:21:24.116171: step: 746/459, loss: 0.007722621783614159 2023-01-22 19:21:24.794491: step: 748/459, loss: 0.020896200090646744 2023-01-22 19:21:25.426283: step: 750/459, loss: 0.0011577218538150191 2023-01-22 19:21:25.985392: step: 752/459, loss: 0.021789534017443657 2023-01-22 19:21:26.565620: step: 754/459, loss: 0.49097755551338196 2023-01-22 19:21:27.134897: step: 756/459, loss: 0.01960299164056778 2023-01-22 19:21:27.758131: step: 758/459, loss: 0.0010462735081091523 2023-01-22 19:21:28.349625: step: 760/459, loss: 0.0009307655273005366 2023-01-22 19:21:28.991197: step: 762/459, loss: 0.0006313839694485068 2023-01-22 19:21:29.637654: step: 764/459, loss: 0.014236507937312126 2023-01-22 19:21:30.334576: step: 766/459, loss: 0.00045275798765942454 2023-01-22 19:21:30.947670: step: 768/459, loss: 0.000474282685900107 2023-01-22 19:21:31.568530: step: 770/459, loss: 0.0014870757004246116 2023-01-22 19:21:32.200361: step: 772/459, loss: 0.00023021911329124123 2023-01-22 19:21:32.759379: step: 774/459, loss: 0.0025714701041579247 2023-01-22 19:21:33.301747: step: 776/459, loss: 0.00016512107686139643 2023-01-22 19:21:33.974685: step: 778/459, loss: 0.038994453847408295 2023-01-22 19:21:34.655875: step: 780/459, loss: 0.0027057407423853874 2023-01-22 19:21:35.226179: step: 782/459, loss: 0.005546972155570984 2023-01-22 19:21:35.830367: step: 784/459, loss: 0.0008355886093340814 2023-01-22 19:21:36.471426: step: 786/459, loss: 0.004035585094243288 2023-01-22 19:21:37.070796: step: 788/459, loss: 0.0017255446873605251 2023-01-22 19:21:37.674601: step: 790/459, loss: 0.0036435811780393124 2023-01-22 19:21:38.260233: step: 792/459, loss: 0.04153135046362877 2023-01-22 19:21:38.924161: step: 794/459, loss: 0.005226816516369581 2023-01-22 19:21:39.496929: step: 796/459, loss: 0.013889150694012642 2023-01-22 19:21:40.068433: step: 798/459, loss: 0.004328093025833368 2023-01-22 19:21:40.636751: step: 800/459, loss: 0.0023282344918698072 2023-01-22 19:21:41.223277: step: 802/459, loss: 0.007033084519207478 2023-01-22 19:21:41.859921: step: 804/459, loss: 0.0042662471532821655 2023-01-22 19:21:42.473463: step: 806/459, loss: 0.10098186880350113 2023-01-22 19:21:43.090600: step: 808/459, loss: 0.0004080800572410226 2023-01-22 19:21:43.667816: step: 810/459, loss: 0.0017826561816036701 2023-01-22 19:21:44.253777: step: 812/459, loss: 0.04767231643199921 2023-01-22 19:21:44.885713: step: 814/459, loss: 0.04413728043437004 2023-01-22 19:21:45.581110: step: 816/459, loss: 0.00013532406592275947 2023-01-22 19:21:46.138666: step: 818/459, loss: 0.015432905405759811 2023-01-22 19:21:46.689597: step: 820/459, loss: 0.0001585751015227288 2023-01-22 19:21:47.276786: step: 822/459, loss: 0.004039656836539507 2023-01-22 19:21:47.951077: step: 824/459, loss: 0.0001975532213691622 2023-01-22 19:21:48.524370: step: 826/459, loss: 0.00011274935241090134 2023-01-22 19:21:49.129665: step: 828/459, loss: 0.05170341953635216 2023-01-22 19:21:49.735222: step: 830/459, loss: 0.14637945592403412 2023-01-22 19:21:50.369792: step: 832/459, loss: 0.00067858025431633 2023-01-22 19:21:50.905062: step: 834/459, loss: 0.0006267894641496241 2023-01-22 19:21:51.489028: step: 836/459, loss: 0.9184783697128296 2023-01-22 19:21:52.108627: step: 838/459, loss: 0.03093664161860943 2023-01-22 19:21:52.724346: step: 840/459, loss: 0.0002656790893524885 2023-01-22 19:21:53.329792: step: 842/459, loss: 0.0001461126666981727 2023-01-22 19:21:53.893504: step: 844/459, loss: 0.034708455204963684 2023-01-22 19:21:54.537482: step: 846/459, loss: 0.022393710911273956 2023-01-22 19:21:55.139293: step: 848/459, loss: 9.34097042772919e-05 2023-01-22 19:21:55.692122: step: 850/459, loss: 0.001001993310637772 2023-01-22 19:21:56.303049: step: 852/459, loss: 0.002569172065705061 2023-01-22 19:21:57.011249: step: 854/459, loss: 0.0003229699213989079 2023-01-22 19:21:57.628712: step: 856/459, loss: 0.00047732796519994736 2023-01-22 19:21:58.234578: step: 858/459, loss: 0.03546785190701485 2023-01-22 19:21:58.944746: step: 860/459, loss: 0.000248892669333145 2023-01-22 19:21:59.523791: step: 862/459, loss: 0.020980138331651688 2023-01-22 19:22:00.182798: step: 864/459, loss: 0.0006982789491303265 2023-01-22 19:22:00.723911: step: 866/459, loss: 0.0029043685644865036 2023-01-22 19:22:01.345095: step: 868/459, loss: 0.003693497506901622 2023-01-22 19:22:01.964482: step: 870/459, loss: 0.033400360494852066 2023-01-22 19:22:02.568778: step: 872/459, loss: 0.001069289050064981 2023-01-22 19:22:03.226923: step: 874/459, loss: 0.0047149378806352615 2023-01-22 19:22:03.809557: step: 876/459, loss: 0.0007566293352283537 2023-01-22 19:22:04.405233: step: 878/459, loss: 0.13952961564064026 2023-01-22 19:22:04.967849: step: 880/459, loss: 0.01368774939328432 2023-01-22 19:22:05.519270: step: 882/459, loss: 0.010220697149634361 2023-01-22 19:22:06.143944: step: 884/459, loss: 0.0007118714856915176 2023-01-22 19:22:06.825748: step: 886/459, loss: 1.1139073371887207 2023-01-22 19:22:07.349765: step: 888/459, loss: 0.0008829772123135626 2023-01-22 19:22:07.962271: step: 890/459, loss: 0.006153562571853399 2023-01-22 19:22:08.525371: step: 892/459, loss: 3.395992825971916e-05 2023-01-22 19:22:09.154508: step: 894/459, loss: 0.0021036399994045496 2023-01-22 19:22:09.860485: step: 896/459, loss: 0.0022422457113862038 2023-01-22 19:22:10.499648: step: 898/459, loss: 0.0032257132697850466 2023-01-22 19:22:11.122740: step: 900/459, loss: 0.005102149210870266 2023-01-22 19:22:11.773300: step: 902/459, loss: 0.00953193474560976 2023-01-22 19:22:12.365633: step: 904/459, loss: 0.14130643010139465 2023-01-22 19:22:13.019734: step: 906/459, loss: 0.07976793497800827 2023-01-22 19:22:13.657781: step: 908/459, loss: 0.0023941872641444206 2023-01-22 19:22:14.247758: step: 910/459, loss: 0.00022712362988386303 2023-01-22 19:22:14.874713: step: 912/459, loss: 0.0015048037748783827 2023-01-22 19:22:15.490649: step: 914/459, loss: 0.0029512629844248295 2023-01-22 19:22:16.029379: step: 916/459, loss: 0.011528683826327324 2023-01-22 19:22:16.588187: step: 918/459, loss: 0.000323440384818241 2023-01-22 19:22:17.073463: step: 920/459, loss: 4.2497900722082704e-05 ================================================== Loss: 0.032 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29800240805604206, 'r': 0.3228830645161291, 'f1': 0.3099442167577414}, 'combined': 0.22837994918991472, 'epoch': 39} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.341165541101385, 'r': 0.30825967453473635, 'f1': 0.32387895058668764}, 'combined': 0.20728252837548006, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29175302245250434, 'r': 0.3205407969639469, 'f1': 0.3054701627486438}, 'combined': 0.22508327781479015, 'epoch': 39} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3454963531393014, 'r': 0.31688837485388155, 'f1': 0.33057458373461396}, 'combined': 0.2115677335901529, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30891124701153805, 'r': 0.3159452792015541, 'f1': 0.31238867193099257}, 'combined': 0.2301811266859945, 'epoch': 39} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.35112206495248516, 'r': 0.3277352850788251, 'f1': 0.33902583524907254}, 'combined': 0.2430751271597124, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19961240310077516, 'r': 0.2452380952380952, 'f1': 0.22008547008547005}, 'combined': 0.14672364672364668, 'epoch': 39} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29310344827586204, 'r': 0.3695652173913043, 'f1': 0.3269230769230769}, 'combined': 0.16346153846153846, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 39} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3066986908783784, 'r': 0.3445268026565465, 'f1': 0.3245140750670242}, 'combined': 0.23911563425991253, 'epoch': 19} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31271473684488904, 'r': 0.30873473110322686, 'f1': 0.31071198921642224}, 'combined': 0.1988556730985102, 'epoch': 19} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.38095238095238093, 'f1': 0.3065134099616858}, 'combined': 0.20434227330779053, 'epoch': 19} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29418702499903554, 'r': 0.3594998939267151, 'f1': 0.32358060478117656}, 'combined': 0.23842781404928798, 'epoch': 13} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3202762492367604, 'r': 0.31212376289255195, 'f1': 0.31614745781013553}, 'combined': 0.20233437299848672, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2878787878787879, 'r': 0.41304347826086957, 'f1': 0.3392857142857143}, 'combined': 0.16964285714285715, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3189032241859154, 'r': 0.3376622373733222, 'f1': 0.32801474487694154}, 'combined': 0.24169507517248323, 'epoch': 15} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34157316773046453, 'r': 0.3008578947798814, 'f1': 0.3199253278791385}, 'combined': 0.22938042376240123, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35294117647058826, 'r': 0.20689655172413793, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 15}