Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.imprisoned-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.imprisoned-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.imprisoned-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.imprisoned-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.agreement-length-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.agreement-length-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.agreement-length-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.agreement-length-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1128]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 614103147, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:08:05.710788: step: 2/463, loss: 15.527769088745117 2023-01-24 00:08:06.353562: step: 4/463, loss: 35.84537124633789 2023-01-24 00:08:07.000359: step: 6/463, loss: 25.407726287841797 2023-01-24 00:08:07.585113: step: 8/463, loss: 26.817684173583984 2023-01-24 00:08:08.201421: step: 10/463, loss: 14.446836471557617 2023-01-24 00:08:08.798494: step: 12/463, loss: 12.531373023986816 2023-01-24 00:08:09.474654: step: 14/463, loss: 9.631613731384277 2023-01-24 00:08:10.153947: step: 16/463, loss: 11.01453971862793 2023-01-24 00:08:10.750189: step: 18/463, loss: 9.84468936920166 2023-01-24 00:08:11.471560: step: 20/463, loss: 7.798810958862305 2023-01-24 00:08:12.067443: step: 22/463, loss: 5.931806564331055 2023-01-24 00:08:12.686238: step: 24/463, loss: 10.619784355163574 2023-01-24 00:08:13.307375: step: 26/463, loss: 17.499927520751953 2023-01-24 00:08:13.977665: step: 28/463, loss: 38.443634033203125 2023-01-24 00:08:14.598698: step: 30/463, loss: 10.91313362121582 2023-01-24 00:08:15.215870: step: 32/463, loss: 9.917706489562988 2023-01-24 00:08:15.794215: step: 34/463, loss: 10.056838989257812 2023-01-24 00:08:16.409215: step: 36/463, loss: 5.252358913421631 2023-01-24 00:08:16.993718: step: 38/463, loss: 9.089670181274414 2023-01-24 00:08:17.656541: step: 40/463, loss: 15.586606979370117 2023-01-24 00:08:18.216618: step: 42/463, loss: 7.14359712600708 2023-01-24 00:08:18.809856: step: 44/463, loss: 32.87059020996094 2023-01-24 00:08:19.519319: step: 46/463, loss: 18.354740142822266 2023-01-24 00:08:20.146396: step: 48/463, loss: 9.673382759094238 2023-01-24 00:08:20.708676: step: 50/463, loss: 7.009995460510254 2023-01-24 00:08:21.288128: step: 52/463, loss: 10.952363014221191 2023-01-24 00:08:21.919526: step: 54/463, loss: 10.46281623840332 2023-01-24 00:08:22.572478: step: 56/463, loss: 10.566239356994629 2023-01-24 00:08:23.224049: step: 58/463, loss: 34.29521942138672 2023-01-24 00:08:23.846103: step: 60/463, loss: 23.089927673339844 2023-01-24 00:08:24.476794: step: 62/463, loss: 4.2957048416137695 2023-01-24 00:08:25.068184: step: 64/463, loss: 14.529444694519043 2023-01-24 00:08:25.671511: step: 66/463, loss: 14.4979248046875 2023-01-24 00:08:26.311930: step: 68/463, loss: 15.657442092895508 2023-01-24 00:08:26.913594: step: 70/463, loss: 17.717124938964844 2023-01-24 00:08:27.596112: step: 72/463, loss: 16.660926818847656 2023-01-24 00:08:28.226002: step: 74/463, loss: 14.563990592956543 2023-01-24 00:08:28.850187: step: 76/463, loss: 22.89514923095703 2023-01-24 00:08:29.543533: step: 78/463, loss: 19.75594711303711 2023-01-24 00:08:30.220320: step: 80/463, loss: 14.562483787536621 2023-01-24 00:08:30.862604: step: 82/463, loss: 17.214841842651367 2023-01-24 00:08:31.423929: step: 84/463, loss: 11.61182975769043 2023-01-24 00:08:32.052438: step: 86/463, loss: 25.90787124633789 2023-01-24 00:08:32.672609: step: 88/463, loss: 13.791238784790039 2023-01-24 00:08:33.297944: step: 90/463, loss: 10.36309814453125 2023-01-24 00:08:33.955548: step: 92/463, loss: 24.275922775268555 2023-01-24 00:08:34.559738: step: 94/463, loss: 19.896915435791016 2023-01-24 00:08:35.107852: step: 96/463, loss: 9.671799659729004 2023-01-24 00:08:35.718779: step: 98/463, loss: 6.2263383865356445 2023-01-24 00:08:36.327923: step: 100/463, loss: 4.396616458892822 2023-01-24 00:08:36.925287: step: 102/463, loss: 11.885747909545898 2023-01-24 00:08:37.521150: step: 104/463, loss: 9.516507148742676 2023-01-24 00:08:38.255368: step: 106/463, loss: 25.800642013549805 2023-01-24 00:08:38.955742: step: 108/463, loss: 10.907988548278809 2023-01-24 00:08:39.567523: step: 110/463, loss: 10.283793449401855 2023-01-24 00:08:40.240613: step: 112/463, loss: 15.061211585998535 2023-01-24 00:08:40.830238: step: 114/463, loss: 8.799469947814941 2023-01-24 00:08:41.422529: step: 116/463, loss: 16.10447883605957 2023-01-24 00:08:42.008646: step: 118/463, loss: 10.600774765014648 2023-01-24 00:08:42.624124: step: 120/463, loss: 3.9900472164154053 2023-01-24 00:08:43.258798: step: 122/463, loss: 9.503351211547852 2023-01-24 00:08:43.901510: step: 124/463, loss: 10.451807022094727 2023-01-24 00:08:44.544867: step: 126/463, loss: 10.882719039916992 2023-01-24 00:08:45.180522: step: 128/463, loss: 6.037629127502441 2023-01-24 00:08:45.801877: step: 130/463, loss: 7.3324809074401855 2023-01-24 00:08:46.371874: step: 132/463, loss: 10.906940460205078 2023-01-24 00:08:47.026890: step: 134/463, loss: 6.400213241577148 2023-01-24 00:08:47.682846: step: 136/463, loss: 9.18304443359375 2023-01-24 00:08:48.307410: step: 138/463, loss: 17.782630920410156 2023-01-24 00:08:48.938362: step: 140/463, loss: 4.7924299240112305 2023-01-24 00:08:49.585058: step: 142/463, loss: 4.532928466796875 2023-01-24 00:08:50.197432: step: 144/463, loss: 14.844175338745117 2023-01-24 00:08:50.816260: step: 146/463, loss: 6.041059494018555 2023-01-24 00:08:51.395352: step: 148/463, loss: 6.047374248504639 2023-01-24 00:08:52.003796: step: 150/463, loss: 15.270130157470703 2023-01-24 00:08:52.639235: step: 152/463, loss: 12.252359390258789 2023-01-24 00:08:53.302398: step: 154/463, loss: 13.79488754272461 2023-01-24 00:08:53.903647: step: 156/463, loss: 9.51964282989502 2023-01-24 00:08:54.552440: step: 158/463, loss: 12.256814002990723 2023-01-24 00:08:55.167324: step: 160/463, loss: 3.5211610794067383 2023-01-24 00:08:55.816385: step: 162/463, loss: 4.921966075897217 2023-01-24 00:08:56.427020: step: 164/463, loss: 3.3752942085266113 2023-01-24 00:08:57.079289: step: 166/463, loss: 19.74912452697754 2023-01-24 00:08:57.727512: step: 168/463, loss: 5.562749862670898 2023-01-24 00:08:58.322305: step: 170/463, loss: 13.177501678466797 2023-01-24 00:08:58.986841: step: 172/463, loss: 25.7593994140625 2023-01-24 00:08:59.627438: step: 174/463, loss: 8.650787353515625 2023-01-24 00:09:00.271045: step: 176/463, loss: 2.2988481521606445 2023-01-24 00:09:00.900081: step: 178/463, loss: 6.851744651794434 2023-01-24 00:09:01.547471: step: 180/463, loss: 20.88614845275879 2023-01-24 00:09:02.184543: step: 182/463, loss: 8.837570190429688 2023-01-24 00:09:02.816531: step: 184/463, loss: 4.462589263916016 2023-01-24 00:09:03.403702: step: 186/463, loss: 7.871462345123291 2023-01-24 00:09:04.104373: step: 188/463, loss: 12.794301986694336 2023-01-24 00:09:04.717337: step: 190/463, loss: 12.927425384521484 2023-01-24 00:09:05.295965: step: 192/463, loss: 3.3798985481262207 2023-01-24 00:09:05.902994: step: 194/463, loss: 2.569499969482422 2023-01-24 00:09:06.485326: step: 196/463, loss: 19.009502410888672 2023-01-24 00:09:07.119218: step: 198/463, loss: 6.475244998931885 2023-01-24 00:09:07.658997: step: 200/463, loss: 5.058328628540039 2023-01-24 00:09:08.332595: step: 202/463, loss: 5.227064609527588 2023-01-24 00:09:08.968585: step: 204/463, loss: 3.3872365951538086 2023-01-24 00:09:09.589439: step: 206/463, loss: 6.052916526794434 2023-01-24 00:09:10.301974: step: 208/463, loss: 7.080582618713379 2023-01-24 00:09:10.908466: step: 210/463, loss: 15.21970272064209 2023-01-24 00:09:11.530066: step: 212/463, loss: 8.629252433776855 2023-01-24 00:09:12.155253: step: 214/463, loss: 15.54279613494873 2023-01-24 00:09:12.840551: step: 216/463, loss: 14.683683395385742 2023-01-24 00:09:13.581857: step: 218/463, loss: 7.520158290863037 2023-01-24 00:09:14.224301: step: 220/463, loss: 3.270094871520996 2023-01-24 00:09:14.891490: step: 222/463, loss: 10.755022048950195 2023-01-24 00:09:15.503636: step: 224/463, loss: 7.708808898925781 2023-01-24 00:09:16.091893: step: 226/463, loss: 12.31395149230957 2023-01-24 00:09:16.835506: step: 228/463, loss: 8.528714179992676 2023-01-24 00:09:17.407188: step: 230/463, loss: 6.2049946784973145 2023-01-24 00:09:17.982622: step: 232/463, loss: 8.918888092041016 2023-01-24 00:09:18.564403: step: 234/463, loss: 6.436501502990723 2023-01-24 00:09:19.170563: step: 236/463, loss: 4.677071571350098 2023-01-24 00:09:19.805814: step: 238/463, loss: 2.847156047821045 2023-01-24 00:09:20.477206: step: 240/463, loss: 2.729511260986328 2023-01-24 00:09:21.076195: step: 242/463, loss: 8.17482852935791 2023-01-24 00:09:21.781385: step: 244/463, loss: 2.2812862396240234 2023-01-24 00:09:22.388645: step: 246/463, loss: 1.6848840713500977 2023-01-24 00:09:23.109794: step: 248/463, loss: 6.737085342407227 2023-01-24 00:09:23.758630: step: 250/463, loss: 5.814552307128906 2023-01-24 00:09:24.416250: step: 252/463, loss: 10.852094650268555 2023-01-24 00:09:25.044164: step: 254/463, loss: 2.0893714427948 2023-01-24 00:09:25.707361: step: 256/463, loss: 9.124494552612305 2023-01-24 00:09:26.362700: step: 258/463, loss: 5.709181785583496 2023-01-24 00:09:26.976496: step: 260/463, loss: 7.201657295227051 2023-01-24 00:09:27.538403: step: 262/463, loss: 2.6056108474731445 2023-01-24 00:09:28.162479: step: 264/463, loss: 6.954370498657227 2023-01-24 00:09:28.761468: step: 266/463, loss: 3.8930842876434326 2023-01-24 00:09:29.394607: step: 268/463, loss: 6.021478652954102 2023-01-24 00:09:30.015629: step: 270/463, loss: 6.4816813468933105 2023-01-24 00:09:30.575020: step: 272/463, loss: 7.582217216491699 2023-01-24 00:09:31.233447: step: 274/463, loss: 8.666520118713379 2023-01-24 00:09:31.822289: step: 276/463, loss: 5.154561996459961 2023-01-24 00:09:32.393964: step: 278/463, loss: 6.3566670417785645 2023-01-24 00:09:33.032184: step: 280/463, loss: 9.040838241577148 2023-01-24 00:09:33.631567: step: 282/463, loss: 5.793061256408691 2023-01-24 00:09:34.310664: step: 284/463, loss: 3.8504412174224854 2023-01-24 00:09:34.918495: step: 286/463, loss: 3.097719669342041 2023-01-24 00:09:35.591158: step: 288/463, loss: 11.67608642578125 2023-01-24 00:09:36.211603: step: 290/463, loss: 6.216445446014404 2023-01-24 00:09:36.843097: step: 292/463, loss: 9.175409317016602 2023-01-24 00:09:37.490065: step: 294/463, loss: 4.927739143371582 2023-01-24 00:09:38.095984: step: 296/463, loss: 10.063331604003906 2023-01-24 00:09:38.716249: step: 298/463, loss: 2.426312208175659 2023-01-24 00:09:39.306942: step: 300/463, loss: 15.16614055633545 2023-01-24 00:09:39.884641: step: 302/463, loss: 6.074978351593018 2023-01-24 00:09:40.557159: step: 304/463, loss: 12.848212242126465 2023-01-24 00:09:41.218284: step: 306/463, loss: 10.848212242126465 2023-01-24 00:09:41.976707: step: 308/463, loss: 5.430739879608154 2023-01-24 00:09:42.567894: step: 310/463, loss: 8.028779029846191 2023-01-24 00:09:43.195231: step: 312/463, loss: 2.672910451889038 2023-01-24 00:09:43.748377: step: 314/463, loss: 6.967820167541504 2023-01-24 00:09:44.332564: step: 316/463, loss: 2.14780330657959 2023-01-24 00:09:44.900293: step: 318/463, loss: 2.1438868045806885 2023-01-24 00:09:45.547004: step: 320/463, loss: 5.6445417404174805 2023-01-24 00:09:46.158636: step: 322/463, loss: 6.879384517669678 2023-01-24 00:09:46.758387: step: 324/463, loss: 10.660449981689453 2023-01-24 00:09:47.351214: step: 326/463, loss: 7.729612350463867 2023-01-24 00:09:47.972538: step: 328/463, loss: 1.928985834121704 2023-01-24 00:09:48.694623: step: 330/463, loss: 9.100521087646484 2023-01-24 00:09:49.321561: step: 332/463, loss: 8.423258781433105 2023-01-24 00:09:49.910053: step: 334/463, loss: 6.285608768463135 2023-01-24 00:09:50.547722: step: 336/463, loss: 4.942193031311035 2023-01-24 00:09:51.307879: step: 338/463, loss: 2.132909059524536 2023-01-24 00:09:51.992270: step: 340/463, loss: 3.3644590377807617 2023-01-24 00:09:52.592204: step: 342/463, loss: 5.647908687591553 2023-01-24 00:09:53.246117: step: 344/463, loss: 3.995138645172119 2023-01-24 00:09:53.985790: step: 346/463, loss: 4.053238868713379 2023-01-24 00:09:54.596026: step: 348/463, loss: 9.010847091674805 2023-01-24 00:09:55.208249: step: 350/463, loss: 3.1344430446624756 2023-01-24 00:09:55.842724: step: 352/463, loss: 3.2872653007507324 2023-01-24 00:09:56.462905: step: 354/463, loss: 2.1410598754882812 2023-01-24 00:09:57.033098: step: 356/463, loss: 2.29586124420166 2023-01-24 00:09:57.679634: step: 358/463, loss: 5.389079570770264 2023-01-24 00:09:58.290375: step: 360/463, loss: 2.4734394550323486 2023-01-24 00:09:58.901106: step: 362/463, loss: 3.0355725288391113 2023-01-24 00:09:59.540957: step: 364/463, loss: 7.597052574157715 2023-01-24 00:10:00.210708: step: 366/463, loss: 8.968563079833984 2023-01-24 00:10:00.863342: step: 368/463, loss: 5.926469802856445 2023-01-24 00:10:01.474916: step: 370/463, loss: 3.414600372314453 2023-01-24 00:10:02.074129: step: 372/463, loss: 7.4312849044799805 2023-01-24 00:10:02.793122: step: 374/463, loss: 4.893805503845215 2023-01-24 00:10:03.415267: step: 376/463, loss: 1.2331429719924927 2023-01-24 00:10:04.007439: step: 378/463, loss: 7.7484941482543945 2023-01-24 00:10:04.754953: step: 380/463, loss: 4.199658393859863 2023-01-24 00:10:05.411186: step: 382/463, loss: 9.93343734741211 2023-01-24 00:10:06.035141: step: 384/463, loss: 4.129388809204102 2023-01-24 00:10:06.681431: step: 386/463, loss: 6.334408760070801 2023-01-24 00:10:07.331465: step: 388/463, loss: 5.2793965339660645 2023-01-24 00:10:08.015909: step: 390/463, loss: 1.9536229372024536 2023-01-24 00:10:08.635635: step: 392/463, loss: 4.931037425994873 2023-01-24 00:10:09.276490: step: 394/463, loss: 2.1277451515197754 2023-01-24 00:10:09.935849: step: 396/463, loss: 11.578556060791016 2023-01-24 00:10:10.539077: step: 398/463, loss: 7.336834907531738 2023-01-24 00:10:11.131449: step: 400/463, loss: 1.4803102016448975 2023-01-24 00:10:11.769433: step: 402/463, loss: 1.6281784772872925 2023-01-24 00:10:12.398962: step: 404/463, loss: 1.6638565063476562 2023-01-24 00:10:12.969977: step: 406/463, loss: 0.46641552448272705 2023-01-24 00:10:13.598486: step: 408/463, loss: 1.8198914527893066 2023-01-24 00:10:14.196370: step: 410/463, loss: 3.632513999938965 2023-01-24 00:10:14.792413: step: 412/463, loss: 2.9901673793792725 2023-01-24 00:10:15.426941: step: 414/463, loss: 4.093265533447266 2023-01-24 00:10:16.065100: step: 416/463, loss: 2.8090016841888428 2023-01-24 00:10:16.691514: step: 418/463, loss: 4.752385139465332 2023-01-24 00:10:17.310544: step: 420/463, loss: 3.754805564880371 2023-01-24 00:10:18.024654: step: 422/463, loss: 0.85175621509552 2023-01-24 00:10:18.674697: step: 424/463, loss: 2.4817957878112793 2023-01-24 00:10:19.290527: step: 426/463, loss: 6.131841659545898 2023-01-24 00:10:19.889684: step: 428/463, loss: 1.089640736579895 2023-01-24 00:10:20.528556: step: 430/463, loss: 1.497068166732788 2023-01-24 00:10:21.146227: step: 432/463, loss: 10.586450576782227 2023-01-24 00:10:21.795865: step: 434/463, loss: 3.809283971786499 2023-01-24 00:10:22.441519: step: 436/463, loss: 5.007434844970703 2023-01-24 00:10:22.998190: step: 438/463, loss: 2.7349870204925537 2023-01-24 00:10:23.631598: step: 440/463, loss: 1.001546859741211 2023-01-24 00:10:24.269533: step: 442/463, loss: 3.353437900543213 2023-01-24 00:10:24.905764: step: 444/463, loss: 2.0643296241760254 2023-01-24 00:10:25.505928: step: 446/463, loss: 1.13667631149292 2023-01-24 00:10:26.125814: step: 448/463, loss: 4.531684875488281 2023-01-24 00:10:26.751194: step: 450/463, loss: 2.6654272079467773 2023-01-24 00:10:27.360474: step: 452/463, loss: 0.716002345085144 2023-01-24 00:10:27.990951: step: 454/463, loss: 1.642696499824524 2023-01-24 00:10:28.569605: step: 456/463, loss: 1.0909249782562256 2023-01-24 00:10:29.184197: step: 458/463, loss: 0.9979273676872253 2023-01-24 00:10:29.827622: step: 460/463, loss: 2.156653642654419 2023-01-24 00:10:30.519015: step: 462/463, loss: 1.6092184782028198 2023-01-24 00:10:31.096478: step: 464/463, loss: 7.144876480102539 2023-01-24 00:10:31.644215: step: 466/463, loss: 11.548429489135742 2023-01-24 00:10:32.288862: step: 468/463, loss: 1.762880563735962 2023-01-24 00:10:32.893318: step: 470/463, loss: 1.7343376874923706 2023-01-24 00:10:33.478988: step: 472/463, loss: 2.4140422344207764 2023-01-24 00:10:34.065622: step: 474/463, loss: 1.9792083501815796 2023-01-24 00:10:34.731807: step: 476/463, loss: 2.892806053161621 2023-01-24 00:10:35.325379: step: 478/463, loss: 2.300171375274658 2023-01-24 00:10:35.954602: step: 480/463, loss: 6.467311382293701 2023-01-24 00:10:36.548445: step: 482/463, loss: 1.1639219522476196 2023-01-24 00:10:37.105922: step: 484/463, loss: 4.534389972686768 2023-01-24 00:10:37.684101: step: 486/463, loss: 3.38034987449646 2023-01-24 00:10:38.418608: step: 488/463, loss: 5.718357086181641 2023-01-24 00:10:39.069111: step: 490/463, loss: 2.6815943717956543 2023-01-24 00:10:39.719624: step: 492/463, loss: 2.0928173065185547 2023-01-24 00:10:40.345344: step: 494/463, loss: 2.7798030376434326 2023-01-24 00:10:41.113636: step: 496/463, loss: 6.397806167602539 2023-01-24 00:10:41.755595: step: 498/463, loss: 3.2579054832458496 2023-01-24 00:10:42.442528: step: 500/463, loss: 1.5580883026123047 2023-01-24 00:10:43.040689: step: 502/463, loss: 3.48506498336792 2023-01-24 00:10:43.644199: step: 504/463, loss: 2.3566741943359375 2023-01-24 00:10:44.249890: step: 506/463, loss: 2.3373732566833496 2023-01-24 00:10:44.880476: step: 508/463, loss: 7.1874189376831055 2023-01-24 00:10:45.458802: step: 510/463, loss: 2.062103509902954 2023-01-24 00:10:46.074992: step: 512/463, loss: 1.7131600379943848 2023-01-24 00:10:46.680436: step: 514/463, loss: 0.8329946994781494 2023-01-24 00:10:47.283436: step: 516/463, loss: 0.6702396273612976 2023-01-24 00:10:47.902823: step: 518/463, loss: 2.1458892822265625 2023-01-24 00:10:48.498709: step: 520/463, loss: 4.183389663696289 2023-01-24 00:10:49.157586: step: 522/463, loss: 0.7865909337997437 2023-01-24 00:10:49.781665: step: 524/463, loss: 2.337918996810913 2023-01-24 00:10:50.413630: step: 526/463, loss: 0.9144871830940247 2023-01-24 00:10:51.079619: step: 528/463, loss: 10.358148574829102 2023-01-24 00:10:51.682698: step: 530/463, loss: 7.73471736907959 2023-01-24 00:10:52.372083: step: 532/463, loss: 2.0955002307891846 2023-01-24 00:10:52.990497: step: 534/463, loss: 3.5499162673950195 2023-01-24 00:10:53.630381: step: 536/463, loss: 9.840123176574707 2023-01-24 00:10:54.361803: step: 538/463, loss: 0.565033495426178 2023-01-24 00:10:54.972562: step: 540/463, loss: 2.978297233581543 2023-01-24 00:10:55.617673: step: 542/463, loss: 3.097630739212036 2023-01-24 00:10:56.280678: step: 544/463, loss: 0.6192395091056824 2023-01-24 00:10:56.917773: step: 546/463, loss: 0.6627085208892822 2023-01-24 00:10:57.507616: step: 548/463, loss: 0.5414282083511353 2023-01-24 00:10:58.127266: step: 550/463, loss: 5.869350433349609 2023-01-24 00:10:58.740106: step: 552/463, loss: 0.7944194674491882 2023-01-24 00:10:59.344886: step: 554/463, loss: 0.793233335018158 2023-01-24 00:10:59.974016: step: 556/463, loss: 0.9548848867416382 2023-01-24 00:11:00.527309: step: 558/463, loss: 0.9638885259628296 2023-01-24 00:11:01.075205: step: 560/463, loss: 1.126427173614502 2023-01-24 00:11:01.670496: step: 562/463, loss: 5.521804332733154 2023-01-24 00:11:02.316369: step: 564/463, loss: 4.382323741912842 2023-01-24 00:11:02.922715: step: 566/463, loss: 5.044526100158691 2023-01-24 00:11:03.617487: step: 568/463, loss: 4.004136562347412 2023-01-24 00:11:04.252156: step: 570/463, loss: 8.615403175354004 2023-01-24 00:11:04.853332: step: 572/463, loss: 0.8944520950317383 2023-01-24 00:11:05.460988: step: 574/463, loss: 2.199181318283081 2023-01-24 00:11:06.089456: step: 576/463, loss: 1.2330000400543213 2023-01-24 00:11:06.711717: step: 578/463, loss: 1.7642030715942383 2023-01-24 00:11:07.272522: step: 580/463, loss: 5.611152648925781 2023-01-24 00:11:07.945785: step: 582/463, loss: 1.9489843845367432 2023-01-24 00:11:08.569834: step: 584/463, loss: 4.125765323638916 2023-01-24 00:11:09.221452: step: 586/463, loss: 3.5803723335266113 2023-01-24 00:11:09.786358: step: 588/463, loss: 7.588560581207275 2023-01-24 00:11:10.512594: step: 590/463, loss: 0.6418760418891907 2023-01-24 00:11:11.110202: step: 592/463, loss: 4.374541759490967 2023-01-24 00:11:11.675097: step: 594/463, loss: 3.1244797706604004 2023-01-24 00:11:12.251251: step: 596/463, loss: 2.442072868347168 2023-01-24 00:11:12.967541: step: 598/463, loss: 6.230981349945068 2023-01-24 00:11:13.608686: step: 600/463, loss: 1.6357241868972778 2023-01-24 00:11:14.134530: step: 602/463, loss: 1.0334672927856445 2023-01-24 00:11:14.709663: step: 604/463, loss: 1.1081753969192505 2023-01-24 00:11:15.347220: step: 606/463, loss: 10.999990463256836 2023-01-24 00:11:15.969398: step: 608/463, loss: 6.123876571655273 2023-01-24 00:11:16.631897: step: 610/463, loss: 1.3420135974884033 2023-01-24 00:11:17.246935: step: 612/463, loss: 1.0952017307281494 2023-01-24 00:11:17.852682: step: 614/463, loss: 2.475162982940674 2023-01-24 00:11:18.445728: step: 616/463, loss: 12.448763847351074 2023-01-24 00:11:19.076310: step: 618/463, loss: 6.78243350982666 2023-01-24 00:11:19.805591: step: 620/463, loss: 3.799562692642212 2023-01-24 00:11:20.465498: step: 622/463, loss: 2.270915985107422 2023-01-24 00:11:21.141124: step: 624/463, loss: 2.5509397983551025 2023-01-24 00:11:21.759309: step: 626/463, loss: 0.5134139060974121 2023-01-24 00:11:22.369327: step: 628/463, loss: 1.3278541564941406 2023-01-24 00:11:22.952562: step: 630/463, loss: 1.3045064210891724 2023-01-24 00:11:23.551234: step: 632/463, loss: 2.4306788444519043 2023-01-24 00:11:24.162817: step: 634/463, loss: 2.8294200897216797 2023-01-24 00:11:24.756333: step: 636/463, loss: 0.8141008615493774 2023-01-24 00:11:25.347842: step: 638/463, loss: 2.476283550262451 2023-01-24 00:11:25.989572: step: 640/463, loss: 1.7333377599716187 2023-01-24 00:11:26.638431: step: 642/463, loss: 1.2413058280944824 2023-01-24 00:11:27.347468: step: 644/463, loss: 6.005527496337891 2023-01-24 00:11:27.962704: step: 646/463, loss: 0.4678344130516052 2023-01-24 00:11:28.618132: step: 648/463, loss: 0.9554503560066223 2023-01-24 00:11:29.208236: step: 650/463, loss: 3.2303411960601807 2023-01-24 00:11:29.792516: step: 652/463, loss: 1.3381673097610474 2023-01-24 00:11:30.451239: step: 654/463, loss: 4.682566165924072 2023-01-24 00:11:31.170201: step: 656/463, loss: 2.289367437362671 2023-01-24 00:11:31.776796: step: 658/463, loss: 1.656613826751709 2023-01-24 00:11:32.434461: step: 660/463, loss: 2.2105319499969482 2023-01-24 00:11:33.089954: step: 662/463, loss: 2.8972604274749756 2023-01-24 00:11:33.686042: step: 664/463, loss: 1.0741612911224365 2023-01-24 00:11:34.312449: step: 666/463, loss: 2.0426604747772217 2023-01-24 00:11:35.021323: step: 668/463, loss: 1.7445045709609985 2023-01-24 00:11:35.617883: step: 670/463, loss: 0.9677860140800476 2023-01-24 00:11:36.206945: step: 672/463, loss: 1.3433079719543457 2023-01-24 00:11:36.798244: step: 674/463, loss: 1.376589298248291 2023-01-24 00:11:37.429308: step: 676/463, loss: 7.443898677825928 2023-01-24 00:11:38.080770: step: 678/463, loss: 11.935944557189941 2023-01-24 00:11:38.659743: step: 680/463, loss: 1.2659592628479004 2023-01-24 00:11:39.380786: step: 682/463, loss: 2.4840707778930664 2023-01-24 00:11:39.962159: step: 684/463, loss: 0.634859025478363 2023-01-24 00:11:40.543780: step: 686/463, loss: 3.880099296569824 2023-01-24 00:11:41.144788: step: 688/463, loss: 0.5439204573631287 2023-01-24 00:11:41.714834: step: 690/463, loss: 2.7569563388824463 2023-01-24 00:11:42.397609: step: 692/463, loss: 1.7453328371047974 2023-01-24 00:11:43.023616: step: 694/463, loss: 2.5194528102874756 2023-01-24 00:11:43.623572: step: 696/463, loss: 0.7272177934646606 2023-01-24 00:11:44.260507: step: 698/463, loss: 5.252932548522949 2023-01-24 00:11:44.894141: step: 700/463, loss: 2.1721601486206055 2023-01-24 00:11:45.546623: step: 702/463, loss: 2.898956298828125 2023-01-24 00:11:46.177116: step: 704/463, loss: 7.721700668334961 2023-01-24 00:11:46.760551: step: 706/463, loss: 1.7600351572036743 2023-01-24 00:11:47.395858: step: 708/463, loss: 2.073437452316284 2023-01-24 00:11:47.981494: step: 710/463, loss: 1.2766555547714233 2023-01-24 00:11:48.759008: step: 712/463, loss: 3.5212788581848145 2023-01-24 00:11:49.364509: step: 714/463, loss: 1.4497286081314087 2023-01-24 00:11:50.053587: step: 716/463, loss: 0.9730030298233032 2023-01-24 00:11:50.673420: step: 718/463, loss: 0.9956092238426208 2023-01-24 00:11:51.302161: step: 720/463, loss: 4.657918453216553 2023-01-24 00:11:51.884305: step: 722/463, loss: 2.054473876953125 2023-01-24 00:11:52.541929: step: 724/463, loss: 1.123397707939148 2023-01-24 00:11:53.174435: step: 726/463, loss: 2.100891590118408 2023-01-24 00:11:53.789562: step: 728/463, loss: 2.4325504302978516 2023-01-24 00:11:54.373498: step: 730/463, loss: 1.371701955795288 2023-01-24 00:11:54.971135: step: 732/463, loss: 0.9629403352737427 2023-01-24 00:11:55.613522: step: 734/463, loss: 0.8145780563354492 2023-01-24 00:11:56.226399: step: 736/463, loss: 4.981250762939453 2023-01-24 00:11:56.849207: step: 738/463, loss: 1.6120213270187378 2023-01-24 00:11:57.484057: step: 740/463, loss: 2.0500338077545166 2023-01-24 00:11:58.088469: step: 742/463, loss: 1.8650671243667603 2023-01-24 00:11:58.695064: step: 744/463, loss: 1.8236877918243408 2023-01-24 00:11:59.324993: step: 746/463, loss: 0.7303426265716553 2023-01-24 00:11:59.978763: step: 748/463, loss: 0.9084663391113281 2023-01-24 00:12:00.653657: step: 750/463, loss: 1.3981428146362305 2023-01-24 00:12:01.305470: step: 752/463, loss: 2.2435474395751953 2023-01-24 00:12:01.976533: step: 754/463, loss: 2.9301655292510986 2023-01-24 00:12:02.550666: step: 756/463, loss: 0.7749660015106201 2023-01-24 00:12:03.172810: step: 758/463, loss: 1.1100990772247314 2023-01-24 00:12:03.718311: step: 760/463, loss: 1.2510125637054443 2023-01-24 00:12:04.305142: step: 762/463, loss: 1.8851476907730103 2023-01-24 00:12:04.909520: step: 764/463, loss: 1.8648860454559326 2023-01-24 00:12:05.556113: step: 766/463, loss: 2.2498273849487305 2023-01-24 00:12:06.248650: step: 768/463, loss: 2.031127691268921 2023-01-24 00:12:06.876398: step: 770/463, loss: 8.383289337158203 2023-01-24 00:12:07.517489: step: 772/463, loss: 8.871315956115723 2023-01-24 00:12:08.111703: step: 774/463, loss: 0.9845886826515198 2023-01-24 00:12:08.731830: step: 776/463, loss: 4.885703086853027 2023-01-24 00:12:09.413349: step: 778/463, loss: 1.8353314399719238 2023-01-24 00:12:10.088555: step: 780/463, loss: 2.4183855056762695 2023-01-24 00:12:10.729731: step: 782/463, loss: 0.9191327691078186 2023-01-24 00:12:11.327572: step: 784/463, loss: 0.5569547414779663 2023-01-24 00:12:12.003465: step: 786/463, loss: 2.9969797134399414 2023-01-24 00:12:12.563716: step: 788/463, loss: 2.8174095153808594 2023-01-24 00:12:13.232805: step: 790/463, loss: 1.5557632446289062 2023-01-24 00:12:13.884410: step: 792/463, loss: 10.48116397857666 2023-01-24 00:12:14.582270: step: 794/463, loss: 1.122731328010559 2023-01-24 00:12:15.212557: step: 796/463, loss: 1.3373991250991821 2023-01-24 00:12:15.829705: step: 798/463, loss: 2.2285656929016113 2023-01-24 00:12:16.457244: step: 800/463, loss: 1.8702635765075684 2023-01-24 00:12:17.021429: step: 802/463, loss: 0.6030070781707764 2023-01-24 00:12:17.654969: step: 804/463, loss: 4.200839042663574 2023-01-24 00:12:18.318968: step: 806/463, loss: 6.136408805847168 2023-01-24 00:12:18.992416: step: 808/463, loss: 0.8041371703147888 2023-01-24 00:12:19.608106: step: 810/463, loss: 1.754456639289856 2023-01-24 00:12:20.222549: step: 812/463, loss: 0.6482319235801697 2023-01-24 00:12:20.797820: step: 814/463, loss: 3.498009443283081 2023-01-24 00:12:21.381596: step: 816/463, loss: 10.918752670288086 2023-01-24 00:12:21.993146: step: 818/463, loss: 1.0988856554031372 2023-01-24 00:12:22.653207: step: 820/463, loss: 1.5297107696533203 2023-01-24 00:12:23.272985: step: 822/463, loss: 1.15298593044281 2023-01-24 00:12:23.837946: step: 824/463, loss: 1.5273621082305908 2023-01-24 00:12:24.401123: step: 826/463, loss: 1.9150218963623047 2023-01-24 00:12:24.975761: step: 828/463, loss: 1.6162713766098022 2023-01-24 00:12:25.643618: step: 830/463, loss: 0.7935346961021423 2023-01-24 00:12:26.284524: step: 832/463, loss: 0.8536718487739563 2023-01-24 00:12:26.908944: step: 834/463, loss: 0.5626165270805359 2023-01-24 00:12:27.577221: step: 836/463, loss: 1.8667511940002441 2023-01-24 00:12:28.350351: step: 838/463, loss: 3.566577911376953 2023-01-24 00:12:29.031610: step: 840/463, loss: 2.507504940032959 2023-01-24 00:12:29.676021: step: 842/463, loss: 7.969700813293457 2023-01-24 00:12:30.363485: step: 844/463, loss: 1.135223627090454 2023-01-24 00:12:30.980326: step: 846/463, loss: 6.44521427154541 2023-01-24 00:12:31.569390: step: 848/463, loss: 1.606684923171997 2023-01-24 00:12:32.189574: step: 850/463, loss: 1.409666657447815 2023-01-24 00:12:32.812902: step: 852/463, loss: 5.477226734161377 2023-01-24 00:12:33.407708: step: 854/463, loss: 2.8613264560699463 2023-01-24 00:12:34.067335: step: 856/463, loss: 0.5755441188812256 2023-01-24 00:12:34.675066: step: 858/463, loss: 1.9484812021255493 2023-01-24 00:12:35.331655: step: 860/463, loss: 2.248089075088501 2023-01-24 00:12:35.917055: step: 862/463, loss: 2.7081620693206787 2023-01-24 00:12:36.564909: step: 864/463, loss: 2.2356619834899902 2023-01-24 00:12:37.166051: step: 866/463, loss: 1.3385205268859863 2023-01-24 00:12:37.802855: step: 868/463, loss: 2.083824634552002 2023-01-24 00:12:38.400520: step: 870/463, loss: 1.0530328750610352 2023-01-24 00:12:38.992097: step: 872/463, loss: 4.014076232910156 2023-01-24 00:12:39.601220: step: 874/463, loss: 1.1937698125839233 2023-01-24 00:12:40.241467: step: 876/463, loss: 1.037207841873169 2023-01-24 00:12:40.832509: step: 878/463, loss: 2.081010341644287 2023-01-24 00:12:41.477582: step: 880/463, loss: 1.2557111978530884 2023-01-24 00:12:42.122485: step: 882/463, loss: 1.5222468376159668 2023-01-24 00:12:42.731610: step: 884/463, loss: 1.3831353187561035 2023-01-24 00:12:43.411493: step: 886/463, loss: 1.7854976654052734 2023-01-24 00:12:44.080736: step: 888/463, loss: 1.5007072687149048 2023-01-24 00:12:44.731636: step: 890/463, loss: 10.29051399230957 2023-01-24 00:12:45.398664: step: 892/463, loss: 2.548170804977417 2023-01-24 00:12:46.021697: step: 894/463, loss: 0.8683263063430786 2023-01-24 00:12:46.625216: step: 896/463, loss: 5.462545394897461 2023-01-24 00:12:47.311251: step: 898/463, loss: 3.8422775268554688 2023-01-24 00:12:47.968613: step: 900/463, loss: 3.2294583320617676 2023-01-24 00:12:48.580196: step: 902/463, loss: 1.8086528778076172 2023-01-24 00:12:49.173326: step: 904/463, loss: 2.4665334224700928 2023-01-24 00:12:49.856898: step: 906/463, loss: 2.427791118621826 2023-01-24 00:12:50.482660: step: 908/463, loss: 3.128635883331299 2023-01-24 00:12:51.156230: step: 910/463, loss: 1.2158808708190918 2023-01-24 00:12:51.723939: step: 912/463, loss: 2.347909688949585 2023-01-24 00:12:52.370198: step: 914/463, loss: 5.597985744476318 2023-01-24 00:12:52.950524: step: 916/463, loss: 2.721803903579712 2023-01-24 00:12:53.614487: step: 918/463, loss: 1.774017572402954 2023-01-24 00:12:54.201848: step: 920/463, loss: 2.1494343280792236 2023-01-24 00:12:54.799075: step: 922/463, loss: 1.5704811811447144 2023-01-24 00:12:55.358334: step: 924/463, loss: 2.1089937686920166 2023-01-24 00:12:55.974863: step: 926/463, loss: 2.468524694442749 ================================================== Loss: 5.717 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2576974564926372, 'r': 0.12129804662885947, 'f1': 0.16495287060839758}, 'combined': 0.12154422044829295, 'epoch': 0} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.324502487562189, 'r': 0.056471861471861465, 'f1': 0.09620206489675516}, 'combined': 0.06767984465098353, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25782312925170064, 'r': 0.11940768746061751, 'f1': 0.1632213608957795}, 'combined': 0.12026837118636383, 'epoch': 0} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.33991935483870966, 'r': 0.05474025974025974, 'f1': 0.09429530201342283}, 'combined': 0.06694966442953021, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.254983267859741, 'r': 0.11423636008082914, 'f1': 0.1577833798505447}, 'combined': 0.11626143778461187, 'epoch': 0} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.34039512400168137, 'r': 0.053932733932733935, 'f1': 0.09311256755202943}, 'combined': 0.06610992296194089, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.14453125, 'r': 0.13214285714285715, 'f1': 0.13805970149253732}, 'combined': 0.09203980099502487, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.55, 'r': 0.09482758620689655, 'f1': 0.16176470588235292}, 'combined': 0.10784313725490194, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2576974564926372, 'r': 0.12129804662885947, 'f1': 0.16495287060839758}, 'combined': 0.12154422044829295, 'epoch': 0} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.324502487562189, 'r': 0.056471861471861465, 'f1': 0.09620206489675516}, 'combined': 0.06767984465098353, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.14453125, 'r': 0.13214285714285715, 'f1': 0.13805970149253732}, 'combined': 0.09203980099502487, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25782312925170064, 'r': 0.11940768746061751, 'f1': 0.1632213608957795}, 'combined': 0.12026837118636383, 'epoch': 0} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.33991935483870966, 'r': 0.05474025974025974, 'f1': 0.09429530201342283}, 'combined': 0.06694966442953021, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.254983267859741, 'r': 0.11423636008082914, 'f1': 0.1577833798505447}, 'combined': 0.11626143778461187, 'epoch': 0} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.34039512400168137, 'r': 0.053932733932733935, 'f1': 0.09311256755202943}, 'combined': 0.06610992296194089, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.55, 'r': 0.09482758620689655, 'f1': 0.16176470588235292}, 'combined': 0.10784313725490194, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:15:55.771391: step: 2/463, loss: 0.4699351191520691 2023-01-24 00:15:56.396732: step: 4/463, loss: 2.375518798828125 2023-01-24 00:15:57.025397: step: 6/463, loss: 4.023386478424072 2023-01-24 00:15:57.634705: step: 8/463, loss: 2.4905319213867188 2023-01-24 00:15:58.291676: step: 10/463, loss: 4.645489692687988 2023-01-24 00:15:58.978881: step: 12/463, loss: 5.968050956726074 2023-01-24 00:15:59.553456: step: 14/463, loss: 3.0957016944885254 2023-01-24 00:16:00.203825: step: 16/463, loss: 1.8526201248168945 2023-01-24 00:16:00.891351: step: 18/463, loss: 2.784377336502075 2023-01-24 00:16:01.576288: step: 20/463, loss: 0.6427896618843079 2023-01-24 00:16:02.233015: step: 22/463, loss: 5.198925971984863 2023-01-24 00:16:02.873904: step: 24/463, loss: 1.3730851411819458 2023-01-24 00:16:03.427449: step: 26/463, loss: 1.4949419498443604 2023-01-24 00:16:04.056305: step: 28/463, loss: 4.609375953674316 2023-01-24 00:16:04.685046: step: 30/463, loss: 0.44352948665618896 2023-01-24 00:16:05.305885: step: 32/463, loss: 0.8155045509338379 2023-01-24 00:16:05.944152: step: 34/463, loss: 2.7601890563964844 2023-01-24 00:16:06.541659: step: 36/463, loss: 0.29658952355384827 2023-01-24 00:16:07.165890: step: 38/463, loss: 4.584624767303467 2023-01-24 00:16:07.794983: step: 40/463, loss: 3.0427956581115723 2023-01-24 00:16:08.408843: step: 42/463, loss: 1.0151573419570923 2023-01-24 00:16:09.039804: step: 44/463, loss: 3.3546783924102783 2023-01-24 00:16:09.695289: step: 46/463, loss: 0.9255642294883728 2023-01-24 00:16:10.297862: step: 48/463, loss: 1.005652904510498 2023-01-24 00:16:10.908929: step: 50/463, loss: 5.434584617614746 2023-01-24 00:16:11.535271: step: 52/463, loss: 5.163382530212402 2023-01-24 00:16:12.111216: step: 54/463, loss: 2.1297554969787598 2023-01-24 00:16:12.767730: step: 56/463, loss: 1.8579020500183105 2023-01-24 00:16:13.405297: step: 58/463, loss: 1.1325268745422363 2023-01-24 00:16:14.051512: step: 60/463, loss: 2.7965030670166016 2023-01-24 00:16:14.682612: step: 62/463, loss: 2.9996118545532227 2023-01-24 00:16:15.360775: step: 64/463, loss: 0.6919140219688416 2023-01-24 00:16:15.948270: step: 66/463, loss: 1.535449504852295 2023-01-24 00:16:16.536859: step: 68/463, loss: 1.230058193206787 2023-01-24 00:16:17.157492: step: 70/463, loss: 1.0200023651123047 2023-01-24 00:16:17.789426: step: 72/463, loss: 1.8213123083114624 2023-01-24 00:16:18.386452: step: 74/463, loss: 3.782329559326172 2023-01-24 00:16:18.965544: step: 76/463, loss: 5.164149284362793 2023-01-24 00:16:19.612378: step: 78/463, loss: 0.9681355953216553 2023-01-24 00:16:20.302243: step: 80/463, loss: 0.8423601388931274 2023-01-24 00:16:20.852344: step: 82/463, loss: 1.9487966299057007 2023-01-24 00:16:21.458780: step: 84/463, loss: 1.610843300819397 2023-01-24 00:16:22.070657: step: 86/463, loss: 0.5803954601287842 2023-01-24 00:16:22.672260: step: 88/463, loss: 0.8578608632087708 2023-01-24 00:16:23.283914: step: 90/463, loss: 0.8879852294921875 2023-01-24 00:16:23.841111: step: 92/463, loss: 0.8478673696517944 2023-01-24 00:16:24.444926: step: 94/463, loss: 4.7528486251831055 2023-01-24 00:16:25.145421: step: 96/463, loss: 6.5922932624816895 2023-01-24 00:16:25.745124: step: 98/463, loss: 1.2908775806427002 2023-01-24 00:16:26.333622: step: 100/463, loss: 0.7816176414489746 2023-01-24 00:16:27.012836: step: 102/463, loss: 0.7028681635856628 2023-01-24 00:16:27.708662: step: 104/463, loss: 0.7880913019180298 2023-01-24 00:16:28.292051: step: 106/463, loss: 1.630981683731079 2023-01-24 00:16:28.848975: step: 108/463, loss: 1.0314098596572876 2023-01-24 00:16:29.507239: step: 110/463, loss: 6.898183822631836 2023-01-24 00:16:30.125394: step: 112/463, loss: 1.985626220703125 2023-01-24 00:16:30.681897: step: 114/463, loss: 0.41272327303886414 2023-01-24 00:16:31.364147: step: 116/463, loss: 7.843683242797852 2023-01-24 00:16:31.976314: step: 118/463, loss: 0.8017847537994385 2023-01-24 00:16:32.579329: step: 120/463, loss: 0.9476606249809265 2023-01-24 00:16:33.282626: step: 122/463, loss: 1.3058056831359863 2023-01-24 00:16:33.884267: step: 124/463, loss: 2.2795910835266113 2023-01-24 00:16:34.504892: step: 126/463, loss: 13.127824783325195 2023-01-24 00:16:35.139335: step: 128/463, loss: 1.0921118259429932 2023-01-24 00:16:35.782878: step: 130/463, loss: 0.8384869694709778 2023-01-24 00:16:36.403668: step: 132/463, loss: 0.6046737432479858 2023-01-24 00:16:37.066774: step: 134/463, loss: 2.3284316062927246 2023-01-24 00:16:37.716178: step: 136/463, loss: 0.39184296131134033 2023-01-24 00:16:38.340257: step: 138/463, loss: 1.636389970779419 2023-01-24 00:16:38.976075: step: 140/463, loss: 2.258352518081665 2023-01-24 00:16:39.568460: step: 142/463, loss: 0.6712445020675659 2023-01-24 00:16:40.212258: step: 144/463, loss: 1.0969666242599487 2023-01-24 00:16:40.785933: step: 146/463, loss: 2.8438591957092285 2023-01-24 00:16:41.415187: step: 148/463, loss: 1.82707679271698 2023-01-24 00:16:42.088577: step: 150/463, loss: 1.7692571878433228 2023-01-24 00:16:42.734670: step: 152/463, loss: 1.9900355339050293 2023-01-24 00:16:43.394263: step: 154/463, loss: 3.4256234169006348 2023-01-24 00:16:44.013479: step: 156/463, loss: 2.424293279647827 2023-01-24 00:16:44.668822: step: 158/463, loss: 0.9029894471168518 2023-01-24 00:16:45.291445: step: 160/463, loss: 0.8645190000534058 2023-01-24 00:16:45.889933: step: 162/463, loss: 1.5848461389541626 2023-01-24 00:16:46.544168: step: 164/463, loss: 2.546576976776123 2023-01-24 00:16:47.199572: step: 166/463, loss: 0.3618963360786438 2023-01-24 00:16:47.846764: step: 168/463, loss: 1.1906884908676147 2023-01-24 00:16:48.456693: step: 170/463, loss: 2.1431944370269775 2023-01-24 00:16:49.072398: step: 172/463, loss: 1.368744134902954 2023-01-24 00:16:49.752872: step: 174/463, loss: 11.136245727539062 2023-01-24 00:16:50.385113: step: 176/463, loss: 1.1451706886291504 2023-01-24 00:16:51.001411: step: 178/463, loss: 0.9140709638595581 2023-01-24 00:16:51.636061: step: 180/463, loss: 6.094206809997559 2023-01-24 00:16:52.242789: step: 182/463, loss: 0.3942575454711914 2023-01-24 00:16:52.865397: step: 184/463, loss: 2.325169324874878 2023-01-24 00:16:53.543067: step: 186/463, loss: 1.0606709718704224 2023-01-24 00:16:54.250588: step: 188/463, loss: 1.2331364154815674 2023-01-24 00:16:54.863540: step: 190/463, loss: 2.40804386138916 2023-01-24 00:16:55.531066: step: 192/463, loss: 0.4889433681964874 2023-01-24 00:16:56.304317: step: 194/463, loss: 1.6716848611831665 2023-01-24 00:16:56.920836: step: 196/463, loss: 1.5071678161621094 2023-01-24 00:16:57.540427: step: 198/463, loss: 1.9952338933944702 2023-01-24 00:16:58.191165: step: 200/463, loss: 1.010396957397461 2023-01-24 00:16:58.735551: step: 202/463, loss: 1.538074254989624 2023-01-24 00:16:59.309882: step: 204/463, loss: 1.4458662271499634 2023-01-24 00:16:59.868702: step: 206/463, loss: 1.0505974292755127 2023-01-24 00:17:00.485500: step: 208/463, loss: 4.2932634353637695 2023-01-24 00:17:01.163594: step: 210/463, loss: 4.669669151306152 2023-01-24 00:17:01.818913: step: 212/463, loss: 1.171759009361267 2023-01-24 00:17:02.464074: step: 214/463, loss: 5.861434459686279 2023-01-24 00:17:03.088864: step: 216/463, loss: 1.4091838598251343 2023-01-24 00:17:03.700047: step: 218/463, loss: 0.37472182512283325 2023-01-24 00:17:04.300459: step: 220/463, loss: 2.0146262645721436 2023-01-24 00:17:05.003665: step: 222/463, loss: 0.843210756778717 2023-01-24 00:17:05.723161: step: 224/463, loss: 5.320677757263184 2023-01-24 00:17:06.458177: step: 226/463, loss: 1.0848666429519653 2023-01-24 00:17:07.164633: step: 228/463, loss: 1.5013128519058228 2023-01-24 00:17:07.792998: step: 230/463, loss: 1.77949857711792 2023-01-24 00:17:08.425495: step: 232/463, loss: 1.1891971826553345 2023-01-24 00:17:09.037003: step: 234/463, loss: 1.8524967432022095 2023-01-24 00:17:09.715936: step: 236/463, loss: 4.435482978820801 2023-01-24 00:17:10.337124: step: 238/463, loss: 1.02517831325531 2023-01-24 00:17:10.955190: step: 240/463, loss: 3.2591967582702637 2023-01-24 00:17:11.691636: step: 242/463, loss: 4.620558738708496 2023-01-24 00:17:12.247970: step: 244/463, loss: 1.8480751514434814 2023-01-24 00:17:12.898791: step: 246/463, loss: 3.3978934288024902 2023-01-24 00:17:13.528990: step: 248/463, loss: 0.9420206546783447 2023-01-24 00:17:14.136996: step: 250/463, loss: 0.5677589178085327 2023-01-24 00:17:14.776350: step: 252/463, loss: 3.938260555267334 2023-01-24 00:17:15.357525: step: 254/463, loss: 4.520252227783203 2023-01-24 00:17:15.951705: step: 256/463, loss: 1.6980226039886475 2023-01-24 00:17:16.539619: step: 258/463, loss: 1.1853129863739014 2023-01-24 00:17:17.082531: step: 260/463, loss: 0.8959956169128418 2023-01-24 00:17:17.781014: step: 262/463, loss: 6.061706066131592 2023-01-24 00:17:18.401405: step: 264/463, loss: 0.7431350946426392 2023-01-24 00:17:19.031454: step: 266/463, loss: 1.085697889328003 2023-01-24 00:17:19.694858: step: 268/463, loss: 2.104355573654175 2023-01-24 00:17:20.394262: step: 270/463, loss: 10.675613403320312 2023-01-24 00:17:21.030222: step: 272/463, loss: 7.407793045043945 2023-01-24 00:17:21.626242: step: 274/463, loss: 2.665255069732666 2023-01-24 00:17:22.285157: step: 276/463, loss: 1.66778564453125 2023-01-24 00:17:22.927051: step: 278/463, loss: 1.295359492301941 2023-01-24 00:17:23.456631: step: 280/463, loss: 5.692170143127441 2023-01-24 00:17:24.050334: step: 282/463, loss: 3.706310749053955 2023-01-24 00:17:24.671384: step: 284/463, loss: 3.0633318424224854 2023-01-24 00:17:25.196493: step: 286/463, loss: 2.0898256301879883 2023-01-24 00:17:25.877496: step: 288/463, loss: 0.6114554405212402 2023-01-24 00:17:26.525488: step: 290/463, loss: 1.6118087768554688 2023-01-24 00:17:27.108810: step: 292/463, loss: 0.6630935668945312 2023-01-24 00:17:27.685288: step: 294/463, loss: 1.647323727607727 2023-01-24 00:17:28.333016: step: 296/463, loss: 0.8704060912132263 2023-01-24 00:17:28.961880: step: 298/463, loss: 0.8944316506385803 2023-01-24 00:17:29.564836: step: 300/463, loss: 0.6378783583641052 2023-01-24 00:17:30.180150: step: 302/463, loss: 1.8073627948760986 2023-01-24 00:17:30.791646: step: 304/463, loss: 3.8258180618286133 2023-01-24 00:17:31.455737: step: 306/463, loss: 0.5500155091285706 2023-01-24 00:17:32.031008: step: 308/463, loss: 1.086765170097351 2023-01-24 00:17:32.585438: step: 310/463, loss: 0.3232910633087158 2023-01-24 00:17:33.173880: step: 312/463, loss: 7.9457221031188965 2023-01-24 00:17:33.806567: step: 314/463, loss: 1.1803704500198364 2023-01-24 00:17:34.359866: step: 316/463, loss: 0.6348320245742798 2023-01-24 00:17:34.981331: step: 318/463, loss: 1.7121257781982422 2023-01-24 00:17:35.640988: step: 320/463, loss: 0.897500216960907 2023-01-24 00:17:36.241537: step: 322/463, loss: 0.7974340319633484 2023-01-24 00:17:36.848816: step: 324/463, loss: 1.7553339004516602 2023-01-24 00:17:37.471294: step: 326/463, loss: 1.0383214950561523 2023-01-24 00:17:38.095658: step: 328/463, loss: 0.3046378195285797 2023-01-24 00:17:38.661466: step: 330/463, loss: 1.019732117652893 2023-01-24 00:17:39.241743: step: 332/463, loss: 1.1025385856628418 2023-01-24 00:17:39.844224: step: 334/463, loss: 1.4371451139450073 2023-01-24 00:17:40.444474: step: 336/463, loss: 1.814802646636963 2023-01-24 00:17:41.079976: step: 338/463, loss: 1.368302583694458 2023-01-24 00:17:41.706982: step: 340/463, loss: 2.4710474014282227 2023-01-24 00:17:42.361244: step: 342/463, loss: 1.7271807193756104 2023-01-24 00:17:42.985668: step: 344/463, loss: 2.8216137886047363 2023-01-24 00:17:43.550290: step: 346/463, loss: 5.167456150054932 2023-01-24 00:17:44.259049: step: 348/463, loss: 1.6524909734725952 2023-01-24 00:17:44.882579: step: 350/463, loss: 0.8680915236473083 2023-01-24 00:17:45.544144: step: 352/463, loss: 0.6050135493278503 2023-01-24 00:17:46.213409: step: 354/463, loss: 1.6030353307724 2023-01-24 00:17:46.820848: step: 356/463, loss: 0.8548694849014282 2023-01-24 00:17:47.441374: step: 358/463, loss: 2.1105258464813232 2023-01-24 00:17:48.087419: step: 360/463, loss: 3.0786657333374023 2023-01-24 00:17:48.718061: step: 362/463, loss: 4.957641124725342 2023-01-24 00:17:49.337120: step: 364/463, loss: 1.0194522142410278 2023-01-24 00:17:49.910150: step: 366/463, loss: 3.57954740524292 2023-01-24 00:17:50.458608: step: 368/463, loss: 0.6008582711219788 2023-01-24 00:17:51.031006: step: 370/463, loss: 2.0320205688476562 2023-01-24 00:17:51.626805: step: 372/463, loss: 2.916686534881592 2023-01-24 00:17:52.232961: step: 374/463, loss: 3.1982994079589844 2023-01-24 00:17:52.878601: step: 376/463, loss: 1.6316673755645752 2023-01-24 00:17:53.429824: step: 378/463, loss: 3.027087688446045 2023-01-24 00:17:54.090677: step: 380/463, loss: 1.1006081104278564 2023-01-24 00:17:54.701265: step: 382/463, loss: 1.2252501249313354 2023-01-24 00:17:55.273576: step: 384/463, loss: 1.5559329986572266 2023-01-24 00:17:55.877039: step: 386/463, loss: 1.224137306213379 2023-01-24 00:17:56.475341: step: 388/463, loss: 1.116799235343933 2023-01-24 00:17:57.100339: step: 390/463, loss: 2.363654613494873 2023-01-24 00:17:57.688999: step: 392/463, loss: 2.604508399963379 2023-01-24 00:17:58.246871: step: 394/463, loss: 2.78459095954895 2023-01-24 00:17:58.849540: step: 396/463, loss: 1.326066017150879 2023-01-24 00:17:59.501838: step: 398/463, loss: 0.3609238862991333 2023-01-24 00:18:00.149050: step: 400/463, loss: 1.1231260299682617 2023-01-24 00:18:00.749442: step: 402/463, loss: 3.6832668781280518 2023-01-24 00:18:01.597912: step: 404/463, loss: 2.0984902381896973 2023-01-24 00:18:02.214179: step: 406/463, loss: 1.530979037284851 2023-01-24 00:18:02.849601: step: 408/463, loss: 0.8785796761512756 2023-01-24 00:18:03.444981: step: 410/463, loss: 1.688921570777893 2023-01-24 00:18:04.123716: step: 412/463, loss: 15.834149360656738 2023-01-24 00:18:04.787031: step: 414/463, loss: 1.0040981769561768 2023-01-24 00:18:05.396015: step: 416/463, loss: 3.4200525283813477 2023-01-24 00:18:05.990707: step: 418/463, loss: 0.4762096703052521 2023-01-24 00:18:06.607003: step: 420/463, loss: 7.7542877197265625 2023-01-24 00:18:07.222737: step: 422/463, loss: 0.9857660531997681 2023-01-24 00:18:07.804094: step: 424/463, loss: 2.6752352714538574 2023-01-24 00:18:08.387717: step: 426/463, loss: 2.2038674354553223 2023-01-24 00:18:08.969999: step: 428/463, loss: 1.1377166509628296 2023-01-24 00:18:09.552924: step: 430/463, loss: 0.7741878628730774 2023-01-24 00:18:10.255531: step: 432/463, loss: 1.5173280239105225 2023-01-24 00:18:10.826441: step: 434/463, loss: 0.4295419156551361 2023-01-24 00:18:11.460555: step: 436/463, loss: 0.5969139933586121 2023-01-24 00:18:12.085137: step: 438/463, loss: 2.5548601150512695 2023-01-24 00:18:12.653713: step: 440/463, loss: 1.3626515865325928 2023-01-24 00:18:13.290464: step: 442/463, loss: 0.508644163608551 2023-01-24 00:18:13.875247: step: 444/463, loss: 0.9636958837509155 2023-01-24 00:18:14.587387: step: 446/463, loss: 1.264704704284668 2023-01-24 00:18:15.180678: step: 448/463, loss: 0.4971581697463989 2023-01-24 00:18:15.806980: step: 450/463, loss: 2.3474135398864746 2023-01-24 00:18:16.431410: step: 452/463, loss: 1.7622555494308472 2023-01-24 00:18:17.058141: step: 454/463, loss: 3.1796083450317383 2023-01-24 00:18:17.690626: step: 456/463, loss: 0.3744276165962219 2023-01-24 00:18:18.290349: step: 458/463, loss: 8.813024520874023 2023-01-24 00:18:18.899284: step: 460/463, loss: 0.5032314658164978 2023-01-24 00:18:19.572027: step: 462/463, loss: 2.072547435760498 2023-01-24 00:18:20.233666: step: 464/463, loss: 1.4978246688842773 2023-01-24 00:18:20.818810: step: 466/463, loss: 0.5556873083114624 2023-01-24 00:18:21.499562: step: 468/463, loss: 1.1066582202911377 2023-01-24 00:18:22.062661: step: 470/463, loss: 0.5107494592666626 2023-01-24 00:18:22.663885: step: 472/463, loss: 0.7218137979507446 2023-01-24 00:18:23.349410: step: 474/463, loss: 2.8748292922973633 2023-01-24 00:18:23.955705: step: 476/463, loss: 0.8965948820114136 2023-01-24 00:18:24.545244: step: 478/463, loss: 1.5408424139022827 2023-01-24 00:18:25.177715: step: 480/463, loss: 1.9921756982803345 2023-01-24 00:18:25.821276: step: 482/463, loss: 11.58511734008789 2023-01-24 00:18:26.491065: step: 484/463, loss: 1.0240808725357056 2023-01-24 00:18:27.082010: step: 486/463, loss: 1.72080659866333 2023-01-24 00:18:27.715924: step: 488/463, loss: 1.0303633213043213 2023-01-24 00:18:28.325639: step: 490/463, loss: 6.706460475921631 2023-01-24 00:18:28.957662: step: 492/463, loss: 4.149219036102295 2023-01-24 00:18:29.535112: step: 494/463, loss: 5.897592544555664 2023-01-24 00:18:30.174203: step: 496/463, loss: 5.925877571105957 2023-01-24 00:18:30.804336: step: 498/463, loss: 0.9180973768234253 2023-01-24 00:18:31.418919: step: 500/463, loss: 0.7941448092460632 2023-01-24 00:18:31.992249: step: 502/463, loss: 1.617560863494873 2023-01-24 00:18:32.595794: step: 504/463, loss: 1.1242046356201172 2023-01-24 00:18:33.221330: step: 506/463, loss: 0.39464277029037476 2023-01-24 00:18:33.832978: step: 508/463, loss: 1.6854923963546753 2023-01-24 00:18:34.442851: step: 510/463, loss: 0.8473150730133057 2023-01-24 00:18:35.057938: step: 512/463, loss: 0.8316122889518738 2023-01-24 00:18:35.627319: step: 514/463, loss: 0.42059633135795593 2023-01-24 00:18:36.330460: step: 516/463, loss: 5.968684196472168 2023-01-24 00:18:36.915145: step: 518/463, loss: 5.765023231506348 2023-01-24 00:18:37.530654: step: 520/463, loss: 1.7241390943527222 2023-01-24 00:18:38.123531: step: 522/463, loss: 0.6947194337844849 2023-01-24 00:18:38.779241: step: 524/463, loss: 1.8014427423477173 2023-01-24 00:18:39.364093: step: 526/463, loss: 4.886884689331055 2023-01-24 00:18:39.962445: step: 528/463, loss: 0.376088410615921 2023-01-24 00:18:40.633202: step: 530/463, loss: 0.7358375787734985 2023-01-24 00:18:41.266601: step: 532/463, loss: 0.9285250306129456 2023-01-24 00:18:41.886870: step: 534/463, loss: 2.393914222717285 2023-01-24 00:18:42.492841: step: 536/463, loss: 5.467259883880615 2023-01-24 00:18:43.126237: step: 538/463, loss: 1.023038387298584 2023-01-24 00:18:43.714084: step: 540/463, loss: 1.1552714109420776 2023-01-24 00:18:44.370815: step: 542/463, loss: 2.4227840900421143 2023-01-24 00:18:44.931036: step: 544/463, loss: 1.0433218479156494 2023-01-24 00:18:45.577715: step: 546/463, loss: 0.49584251642227173 2023-01-24 00:18:46.183968: step: 548/463, loss: 0.9385890960693359 2023-01-24 00:18:46.769621: step: 550/463, loss: 2.05942702293396 2023-01-24 00:18:47.383675: step: 552/463, loss: 0.28611278533935547 2023-01-24 00:18:47.991032: step: 554/463, loss: 0.7339053750038147 2023-01-24 00:18:48.556371: step: 556/463, loss: 5.100002288818359 2023-01-24 00:18:49.118277: step: 558/463, loss: 0.85732102394104 2023-01-24 00:18:49.800253: step: 560/463, loss: 7.781703948974609 2023-01-24 00:18:50.436011: step: 562/463, loss: 0.744465708732605 2023-01-24 00:18:51.068442: step: 564/463, loss: 2.656068801879883 2023-01-24 00:18:51.672061: step: 566/463, loss: 2.2977540493011475 2023-01-24 00:18:52.274977: step: 568/463, loss: 1.5928587913513184 2023-01-24 00:18:52.809334: step: 570/463, loss: 1.8760744333267212 2023-01-24 00:18:53.461044: step: 572/463, loss: 2.2433502674102783 2023-01-24 00:18:54.081307: step: 574/463, loss: 0.2144935578107834 2023-01-24 00:18:54.705773: step: 576/463, loss: 2.286482334136963 2023-01-24 00:18:55.277768: step: 578/463, loss: 0.7859871983528137 2023-01-24 00:18:55.848694: step: 580/463, loss: 0.4527796506881714 2023-01-24 00:18:56.479449: step: 582/463, loss: 1.0044068098068237 2023-01-24 00:18:57.094916: step: 584/463, loss: 2.674173593521118 2023-01-24 00:18:57.689270: step: 586/463, loss: 2.1646971702575684 2023-01-24 00:18:58.300750: step: 588/463, loss: 1.0891554355621338 2023-01-24 00:18:58.873924: step: 590/463, loss: 4.383869647979736 2023-01-24 00:18:59.431763: step: 592/463, loss: 4.611885070800781 2023-01-24 00:19:00.197366: step: 594/463, loss: 0.5704087615013123 2023-01-24 00:19:00.806356: step: 596/463, loss: 0.42145514488220215 2023-01-24 00:19:01.451490: step: 598/463, loss: 1.2059749364852905 2023-01-24 00:19:02.035800: step: 600/463, loss: 1.5599048137664795 2023-01-24 00:19:02.673771: step: 602/463, loss: 1.4827492237091064 2023-01-24 00:19:03.334077: step: 604/463, loss: 1.1338186264038086 2023-01-24 00:19:03.983776: step: 606/463, loss: 4.487750053405762 2023-01-24 00:19:04.570449: step: 608/463, loss: 0.5890522003173828 2023-01-24 00:19:05.134029: step: 610/463, loss: 0.57279372215271 2023-01-24 00:19:05.772254: step: 612/463, loss: 1.3416396379470825 2023-01-24 00:19:06.400262: step: 614/463, loss: 0.7399976849555969 2023-01-24 00:19:06.991505: step: 616/463, loss: 1.218562126159668 2023-01-24 00:19:07.570014: step: 618/463, loss: 2.0450758934020996 2023-01-24 00:19:08.114419: step: 620/463, loss: 1.2597293853759766 2023-01-24 00:19:08.758389: step: 622/463, loss: 5.658841133117676 2023-01-24 00:19:09.412994: step: 624/463, loss: 1.3252308368682861 2023-01-24 00:19:10.059312: step: 626/463, loss: 0.6772922277450562 2023-01-24 00:19:10.685559: step: 628/463, loss: 0.4827549457550049 2023-01-24 00:19:11.308818: step: 630/463, loss: 0.4760152995586395 2023-01-24 00:19:12.031697: step: 632/463, loss: 10.677329063415527 2023-01-24 00:19:12.741763: step: 634/463, loss: 0.8051328659057617 2023-01-24 00:19:13.326553: step: 636/463, loss: 0.7462083697319031 2023-01-24 00:19:13.994635: step: 638/463, loss: 1.583361268043518 2023-01-24 00:19:14.710250: step: 640/463, loss: 0.6152667999267578 2023-01-24 00:19:15.339370: step: 642/463, loss: 1.5198769569396973 2023-01-24 00:19:15.931560: step: 644/463, loss: 6.654841423034668 2023-01-24 00:19:16.610998: step: 646/463, loss: 4.302089691162109 2023-01-24 00:19:17.213177: step: 648/463, loss: 3.101635217666626 2023-01-24 00:19:17.785991: step: 650/463, loss: 2.7661993503570557 2023-01-24 00:19:18.402520: step: 652/463, loss: 1.6187717914581299 2023-01-24 00:19:19.017712: step: 654/463, loss: 1.3367908000946045 2023-01-24 00:19:19.581836: step: 656/463, loss: 1.0152181386947632 2023-01-24 00:19:20.256550: step: 658/463, loss: 0.8695769309997559 2023-01-24 00:19:20.986154: step: 660/463, loss: 0.9498647451400757 2023-01-24 00:19:21.566132: step: 662/463, loss: 7.022472381591797 2023-01-24 00:19:22.173298: step: 664/463, loss: 1.2537897825241089 2023-01-24 00:19:22.830177: step: 666/463, loss: 0.5233864188194275 2023-01-24 00:19:23.516473: step: 668/463, loss: 0.8162819147109985 2023-01-24 00:19:24.158391: step: 670/463, loss: 0.880567729473114 2023-01-24 00:19:24.740161: step: 672/463, loss: 10.166847229003906 2023-01-24 00:19:25.356297: step: 674/463, loss: 0.779448390007019 2023-01-24 00:19:25.980934: step: 676/463, loss: 2.913146495819092 2023-01-24 00:19:26.556368: step: 678/463, loss: 0.9089398384094238 2023-01-24 00:19:27.207854: step: 680/463, loss: 2.1726222038269043 2023-01-24 00:19:27.901411: step: 682/463, loss: 0.8791589140892029 2023-01-24 00:19:28.553380: step: 684/463, loss: 2.5875072479248047 2023-01-24 00:19:29.164609: step: 686/463, loss: 2.4053802490234375 2023-01-24 00:19:29.892689: step: 688/463, loss: 1.7778115272521973 2023-01-24 00:19:30.484202: step: 690/463, loss: 1.9519290924072266 2023-01-24 00:19:31.176212: step: 692/463, loss: 0.1798943281173706 2023-01-24 00:19:31.805632: step: 694/463, loss: 2.8078808784484863 2023-01-24 00:19:32.418819: step: 696/463, loss: 1.3408759832382202 2023-01-24 00:19:32.989504: step: 698/463, loss: 2.110194444656372 2023-01-24 00:19:33.616546: step: 700/463, loss: 7.268546104431152 2023-01-24 00:19:34.321148: step: 702/463, loss: 0.8053166270256042 2023-01-24 00:19:35.072365: step: 704/463, loss: 1.6108207702636719 2023-01-24 00:19:35.759027: step: 706/463, loss: 3.5655264854431152 2023-01-24 00:19:36.391971: step: 708/463, loss: 0.7549291253089905 2023-01-24 00:19:36.958989: step: 710/463, loss: 0.6753939390182495 2023-01-24 00:19:37.645658: step: 712/463, loss: 0.9439899325370789 2023-01-24 00:19:38.294105: step: 714/463, loss: 1.3559280633926392 2023-01-24 00:19:38.858494: step: 716/463, loss: 0.8545700907707214 2023-01-24 00:19:39.456807: step: 718/463, loss: 0.2907158434391022 2023-01-24 00:19:40.031119: step: 720/463, loss: 0.8471012115478516 2023-01-24 00:19:40.636476: step: 722/463, loss: 1.1871973276138306 2023-01-24 00:19:41.160053: step: 724/463, loss: 1.6353073120117188 2023-01-24 00:19:41.757358: step: 726/463, loss: 0.5061972141265869 2023-01-24 00:19:42.404585: step: 728/463, loss: 4.718185901641846 2023-01-24 00:19:43.008574: step: 730/463, loss: 0.32988643646240234 2023-01-24 00:19:43.701543: step: 732/463, loss: 1.4975134134292603 2023-01-24 00:19:44.376497: step: 734/463, loss: 2.751270055770874 2023-01-24 00:19:44.997823: step: 736/463, loss: 0.60331791639328 2023-01-24 00:19:45.616343: step: 738/463, loss: 1.1508113145828247 2023-01-24 00:19:46.232197: step: 740/463, loss: 0.9011660218238831 2023-01-24 00:19:46.878914: step: 742/463, loss: 0.6524103283882141 2023-01-24 00:19:47.538529: step: 744/463, loss: 2.0102248191833496 2023-01-24 00:19:48.178786: step: 746/463, loss: 0.2802926301956177 2023-01-24 00:19:48.772550: step: 748/463, loss: 0.7090492844581604 2023-01-24 00:19:49.431640: step: 750/463, loss: 0.3491131365299225 2023-01-24 00:19:50.016256: step: 752/463, loss: 0.28213489055633545 2023-01-24 00:19:50.571792: step: 754/463, loss: 1.4888509511947632 2023-01-24 00:19:51.186300: step: 756/463, loss: 5.215658664703369 2023-01-24 00:19:51.908976: step: 758/463, loss: 1.1845890283584595 2023-01-24 00:19:52.510578: step: 760/463, loss: 1.2456873655319214 2023-01-24 00:19:53.110202: step: 762/463, loss: 1.7081714868545532 2023-01-24 00:19:53.762705: step: 764/463, loss: 1.9328604936599731 2023-01-24 00:19:54.515910: step: 766/463, loss: 2.175710916519165 2023-01-24 00:19:55.193944: step: 768/463, loss: 1.0704314708709717 2023-01-24 00:19:55.800336: step: 770/463, loss: 0.2749355733394623 2023-01-24 00:19:56.425134: step: 772/463, loss: 1.2669671773910522 2023-01-24 00:19:57.015351: step: 774/463, loss: 0.7744167447090149 2023-01-24 00:19:57.652172: step: 776/463, loss: 4.097024917602539 2023-01-24 00:19:58.337921: step: 778/463, loss: 1.246978998184204 2023-01-24 00:19:58.968030: step: 780/463, loss: 0.4272772967815399 2023-01-24 00:19:59.549397: step: 782/463, loss: 0.8533040881156921 2023-01-24 00:20:00.128360: step: 784/463, loss: 0.8158129453659058 2023-01-24 00:20:00.773032: step: 786/463, loss: 0.3391382396221161 2023-01-24 00:20:01.441570: step: 788/463, loss: 0.49406400322914124 2023-01-24 00:20:02.038462: step: 790/463, loss: 1.9806959629058838 2023-01-24 00:20:02.655485: step: 792/463, loss: 0.20058730244636536 2023-01-24 00:20:03.238324: step: 794/463, loss: 1.7735226154327393 2023-01-24 00:20:03.813816: step: 796/463, loss: 1.942405104637146 2023-01-24 00:20:04.451070: step: 798/463, loss: 2.2006466388702393 2023-01-24 00:20:05.003475: step: 800/463, loss: 0.759432315826416 2023-01-24 00:20:05.599342: step: 802/463, loss: 1.5341131687164307 2023-01-24 00:20:06.191979: step: 804/463, loss: 0.26032164692878723 2023-01-24 00:20:06.836048: step: 806/463, loss: 0.5562014579772949 2023-01-24 00:20:07.528285: step: 808/463, loss: 3.8895342350006104 2023-01-24 00:20:08.135183: step: 810/463, loss: 1.5426108837127686 2023-01-24 00:20:08.779187: step: 812/463, loss: 1.3962609767913818 2023-01-24 00:20:09.387584: step: 814/463, loss: 2.5775716304779053 2023-01-24 00:20:10.005054: step: 816/463, loss: 4.908992767333984 2023-01-24 00:20:10.762887: step: 818/463, loss: 1.0185517072677612 2023-01-24 00:20:11.417379: step: 820/463, loss: 1.1766185760498047 2023-01-24 00:20:12.025503: step: 822/463, loss: 1.4200077056884766 2023-01-24 00:20:12.644252: step: 824/463, loss: 2.038999080657959 2023-01-24 00:20:13.326552: step: 826/463, loss: 1.2661778926849365 2023-01-24 00:20:14.020623: step: 828/463, loss: 8.435696601867676 2023-01-24 00:20:14.750867: step: 830/463, loss: 2.330742597579956 2023-01-24 00:20:15.334374: step: 832/463, loss: 0.3773508369922638 2023-01-24 00:20:15.941214: step: 834/463, loss: 0.6363090872764587 2023-01-24 00:20:16.603299: step: 836/463, loss: 0.8447015881538391 2023-01-24 00:20:17.202889: step: 838/463, loss: 2.4865756034851074 2023-01-24 00:20:17.814172: step: 840/463, loss: 0.3584865927696228 2023-01-24 00:20:18.439051: step: 842/463, loss: 2.4265570640563965 2023-01-24 00:20:19.009050: step: 844/463, loss: 0.3773811459541321 2023-01-24 00:20:19.698712: step: 846/463, loss: 1.4548594951629639 2023-01-24 00:20:20.317537: step: 848/463, loss: 1.7747187614440918 2023-01-24 00:20:20.966742: step: 850/463, loss: 1.1436755657196045 2023-01-24 00:20:21.522749: step: 852/463, loss: 1.6866071224212646 2023-01-24 00:20:22.160855: step: 854/463, loss: 0.6596888303756714 2023-01-24 00:20:22.843281: step: 856/463, loss: 0.7502093315124512 2023-01-24 00:20:23.512906: step: 858/463, loss: 0.746474027633667 2023-01-24 00:20:24.132330: step: 860/463, loss: 2.7668204307556152 2023-01-24 00:20:24.754060: step: 862/463, loss: 1.996107816696167 2023-01-24 00:20:25.363738: step: 864/463, loss: 0.7641968727111816 2023-01-24 00:20:25.982795: step: 866/463, loss: 0.35953930020332336 2023-01-24 00:20:26.652388: step: 868/463, loss: 19.84459686279297 2023-01-24 00:20:27.217829: step: 870/463, loss: 2.129950523376465 2023-01-24 00:20:27.831130: step: 872/463, loss: 0.7402244210243225 2023-01-24 00:20:28.464972: step: 874/463, loss: 2.562405824661255 2023-01-24 00:20:29.051479: step: 876/463, loss: 3.5468077659606934 2023-01-24 00:20:29.766544: step: 878/463, loss: 5.784448623657227 2023-01-24 00:20:30.394662: step: 880/463, loss: 0.8023273348808289 2023-01-24 00:20:31.003068: step: 882/463, loss: 2.4333505630493164 2023-01-24 00:20:31.608071: step: 884/463, loss: 5.7280097007751465 2023-01-24 00:20:32.260641: step: 886/463, loss: 0.48148149251937866 2023-01-24 00:20:32.900225: step: 888/463, loss: 2.1315040588378906 2023-01-24 00:20:33.566215: step: 890/463, loss: 3.3278262615203857 2023-01-24 00:20:34.194338: step: 892/463, loss: 1.4306143522262573 2023-01-24 00:20:34.782328: step: 894/463, loss: 0.7392702698707581 2023-01-24 00:20:35.411682: step: 896/463, loss: 1.2738280296325684 2023-01-24 00:20:36.007226: step: 898/463, loss: 4.200432777404785 2023-01-24 00:20:36.553354: step: 900/463, loss: 0.8969659209251404 2023-01-24 00:20:37.190389: step: 902/463, loss: 0.49081388115882874 2023-01-24 00:20:37.798631: step: 904/463, loss: 2.9365320205688477 2023-01-24 00:20:38.405573: step: 906/463, loss: 2.1249797344207764 2023-01-24 00:20:39.126431: step: 908/463, loss: 0.5331587791442871 2023-01-24 00:20:39.767485: step: 910/463, loss: 0.6702202558517456 2023-01-24 00:20:40.448096: step: 912/463, loss: 0.8810736536979675 2023-01-24 00:20:41.057169: step: 914/463, loss: 6.82108211517334 2023-01-24 00:20:41.697661: step: 916/463, loss: 1.0248939990997314 2023-01-24 00:20:42.383401: step: 918/463, loss: 4.763139247894287 2023-01-24 00:20:42.991757: step: 920/463, loss: 0.1915578544139862 2023-01-24 00:20:43.582456: step: 922/463, loss: 3.551762580871582 2023-01-24 00:20:44.146501: step: 924/463, loss: 0.15049009025096893 2023-01-24 00:20:44.725811: step: 926/463, loss: 0.3977356255054474 ================================================== Loss: 2.130 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3628329918032787, 'r': 0.16735586011342155, 'f1': 0.2290588615782665}, 'combined': 0.16878021379451214, 'epoch': 1} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.4075817582361348, 'r': 0.1477616434889023, 'f1': 0.21689264792196605}, 'combined': 0.15258779250791582, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3651107594936709, 'r': 0.1635751417769376, 'f1': 0.22593015665796345}, 'combined': 0.16647485227428885, 'epoch': 1} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.41578303242891773, 'r': 0.14460450650823592, 'f1': 0.21458043243757532}, 'combined': 0.15235210703067847, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3594258130081301, 'r': 0.16714319470699435, 'f1': 0.22817741935483873}, 'combined': 0.1681307300509338, 'epoch': 1} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.4093269631223721, 'r': 0.1407061435733154, 'f1': 0.20942309741144619}, 'combined': 0.14869039916212679, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3666666666666667, 'r': 0.10476190476190476, 'f1': 0.16296296296296298}, 'combined': 0.10864197530864197, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.06896551724137931, 'f1': 0.1176470588235294}, 'combined': 0.07843137254901959, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3628329918032787, 'r': 0.16735586011342155, 'f1': 0.2290588615782665}, 'combined': 0.16878021379451214, 'epoch': 1} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.4075817582361348, 'r': 0.1477616434889023, 'f1': 0.21689264792196605}, 'combined': 0.15258779250791582, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3666666666666667, 'r': 0.10476190476190476, 'f1': 0.16296296296296298}, 'combined': 0.10864197530864197, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3651107594936709, 'r': 0.1635751417769376, 'f1': 0.22593015665796345}, 'combined': 0.16647485227428885, 'epoch': 1} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.41578303242891773, 'r': 0.14460450650823592, 'f1': 0.21458043243757532}, 'combined': 0.15235210703067847, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3594258130081301, 'r': 0.16714319470699435, 'f1': 0.22817741935483873}, 'combined': 0.1681307300509338, 'epoch': 1} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.4093269631223721, 'r': 0.1407061435733154, 'f1': 0.20942309741144619}, 'combined': 0.14869039916212679, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.06896551724137931, 'f1': 0.1176470588235294}, 'combined': 0.07843137254901959, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:23:49.451989: step: 2/463, loss: 0.9894496202468872 2023-01-24 00:23:50.045754: step: 4/463, loss: 0.45465102791786194 2023-01-24 00:23:50.648488: step: 6/463, loss: 2.798133373260498 2023-01-24 00:23:51.264941: step: 8/463, loss: 0.8138078451156616 2023-01-24 00:23:51.981154: step: 10/463, loss: 1.2127692699432373 2023-01-24 00:23:52.613680: step: 12/463, loss: 1.0266129970550537 2023-01-24 00:23:53.288915: step: 14/463, loss: 1.0878266096115112 2023-01-24 00:23:53.902102: step: 16/463, loss: 2.4722635746002197 2023-01-24 00:23:54.542122: step: 18/463, loss: 2.5403664112091064 2023-01-24 00:23:55.170385: step: 20/463, loss: 0.9423449039459229 2023-01-24 00:23:55.790797: step: 22/463, loss: 0.6210274696350098 2023-01-24 00:23:56.351313: step: 24/463, loss: 8.77476692199707 2023-01-24 00:23:57.001073: step: 26/463, loss: 0.5426028966903687 2023-01-24 00:23:57.594740: step: 28/463, loss: 1.578312873840332 2023-01-24 00:23:58.211948: step: 30/463, loss: 1.9129862785339355 2023-01-24 00:23:58.895326: step: 32/463, loss: 1.1937426328659058 2023-01-24 00:23:59.501134: step: 34/463, loss: 0.6984506249427795 2023-01-24 00:24:00.124534: step: 36/463, loss: 0.957370400428772 2023-01-24 00:24:00.738330: step: 38/463, loss: 1.071913480758667 2023-01-24 00:24:01.346140: step: 40/463, loss: 3.399691104888916 2023-01-24 00:24:01.949338: step: 42/463, loss: 1.4550176858901978 2023-01-24 00:24:02.546852: step: 44/463, loss: 0.3390703499317169 2023-01-24 00:24:03.144943: step: 46/463, loss: 2.048489809036255 2023-01-24 00:24:03.759708: step: 48/463, loss: 1.198237657546997 2023-01-24 00:24:04.371475: step: 50/463, loss: 1.753849744796753 2023-01-24 00:24:05.011288: step: 52/463, loss: 1.4532337188720703 2023-01-24 00:24:05.592234: step: 54/463, loss: 0.5274620056152344 2023-01-24 00:24:06.263119: step: 56/463, loss: 1.1470394134521484 2023-01-24 00:24:06.864367: step: 58/463, loss: 1.5308152437210083 2023-01-24 00:24:07.434011: step: 60/463, loss: 0.39461639523506165 2023-01-24 00:24:08.020439: step: 62/463, loss: 0.9009755253791809 2023-01-24 00:24:08.613417: step: 64/463, loss: 3.662085771560669 2023-01-24 00:24:09.250399: step: 66/463, loss: 4.629024505615234 2023-01-24 00:24:09.847336: step: 68/463, loss: 8.116731643676758 2023-01-24 00:24:10.458973: step: 70/463, loss: 0.9731239080429077 2023-01-24 00:24:11.040182: step: 72/463, loss: 0.3047223687171936 2023-01-24 00:24:11.706290: step: 74/463, loss: 1.4033842086791992 2023-01-24 00:24:12.307081: step: 76/463, loss: 3.2886178493499756 2023-01-24 00:24:12.967627: step: 78/463, loss: 0.5910338163375854 2023-01-24 00:24:13.620661: step: 80/463, loss: 1.046976089477539 2023-01-24 00:24:14.200975: step: 82/463, loss: 2.0072383880615234 2023-01-24 00:24:14.827289: step: 84/463, loss: 0.7566812038421631 2023-01-24 00:24:15.409321: step: 86/463, loss: 0.746380090713501 2023-01-24 00:24:15.985272: step: 88/463, loss: 9.141138076782227 2023-01-24 00:24:16.732183: step: 90/463, loss: 1.823565125465393 2023-01-24 00:24:17.409702: step: 92/463, loss: 1.2571300268173218 2023-01-24 00:24:18.068520: step: 94/463, loss: 1.2493375539779663 2023-01-24 00:24:18.708566: step: 96/463, loss: 2.1169896125793457 2023-01-24 00:24:19.364886: step: 98/463, loss: 1.185326337814331 2023-01-24 00:24:20.000206: step: 100/463, loss: 0.5350555777549744 2023-01-24 00:24:20.590619: step: 102/463, loss: 3.9592957496643066 2023-01-24 00:24:21.179708: step: 104/463, loss: 1.7487577199935913 2023-01-24 00:24:21.841255: step: 106/463, loss: 1.0984340906143188 2023-01-24 00:24:22.433937: step: 108/463, loss: 0.6043806076049805 2023-01-24 00:24:23.069170: step: 110/463, loss: 3.031501293182373 2023-01-24 00:24:23.714832: step: 112/463, loss: 0.668498158454895 2023-01-24 00:24:24.327724: step: 114/463, loss: 0.29584604501724243 2023-01-24 00:24:24.948720: step: 116/463, loss: 0.9585784673690796 2023-01-24 00:24:25.597846: step: 118/463, loss: 1.2937836647033691 2023-01-24 00:24:26.200479: step: 120/463, loss: 3.4346203804016113 2023-01-24 00:24:26.785489: step: 122/463, loss: 3.9960038661956787 2023-01-24 00:24:27.371037: step: 124/463, loss: 0.532163679599762 2023-01-24 00:24:28.036330: step: 126/463, loss: 1.9848299026489258 2023-01-24 00:24:28.594712: step: 128/463, loss: 1.2614847421646118 2023-01-24 00:24:29.191575: step: 130/463, loss: 0.8085165023803711 2023-01-24 00:24:29.827001: step: 132/463, loss: 1.0154560804367065 2023-01-24 00:24:30.520692: step: 134/463, loss: 1.2225639820098877 2023-01-24 00:24:31.115648: step: 136/463, loss: 1.1401582956314087 2023-01-24 00:24:31.797438: step: 138/463, loss: 1.3652644157409668 2023-01-24 00:24:32.418572: step: 140/463, loss: 0.3359295725822449 2023-01-24 00:24:33.069057: step: 142/463, loss: 1.4944748878479004 2023-01-24 00:24:33.646965: step: 144/463, loss: 0.9680233001708984 2023-01-24 00:24:34.246749: step: 146/463, loss: 0.27856338024139404 2023-01-24 00:24:34.897438: step: 148/463, loss: 1.2067229747772217 2023-01-24 00:24:35.529073: step: 150/463, loss: 2.9133286476135254 2023-01-24 00:24:36.142568: step: 152/463, loss: 1.8904049396514893 2023-01-24 00:24:36.715622: step: 154/463, loss: 0.7690768241882324 2023-01-24 00:24:37.409575: step: 156/463, loss: 0.984764575958252 2023-01-24 00:24:38.086538: step: 158/463, loss: 0.9009560346603394 2023-01-24 00:24:38.695988: step: 160/463, loss: 0.8590795397758484 2023-01-24 00:24:39.405815: step: 162/463, loss: 2.0233514308929443 2023-01-24 00:24:39.994672: step: 164/463, loss: 1.7032970190048218 2023-01-24 00:24:40.586004: step: 166/463, loss: 0.523679256439209 2023-01-24 00:24:41.207049: step: 168/463, loss: 0.6783276200294495 2023-01-24 00:24:41.853352: step: 170/463, loss: 0.9665436148643494 2023-01-24 00:24:42.520187: step: 172/463, loss: 1.4590559005737305 2023-01-24 00:24:43.205724: step: 174/463, loss: 0.5568957924842834 2023-01-24 00:24:43.816602: step: 176/463, loss: 4.356968879699707 2023-01-24 00:24:44.390134: step: 178/463, loss: 2.6532323360443115 2023-01-24 00:24:45.042650: step: 180/463, loss: 0.28315022587776184 2023-01-24 00:24:45.654808: step: 182/463, loss: 1.5206120014190674 2023-01-24 00:24:46.306913: step: 184/463, loss: 1.0600918531417847 2023-01-24 00:24:46.918814: step: 186/463, loss: 0.8189708590507507 2023-01-24 00:24:47.626942: step: 188/463, loss: 2.103588581085205 2023-01-24 00:24:48.241263: step: 190/463, loss: 1.4584503173828125 2023-01-24 00:24:48.869104: step: 192/463, loss: 3.3904099464416504 2023-01-24 00:24:49.516004: step: 194/463, loss: 1.362410545349121 2023-01-24 00:24:50.178878: step: 196/463, loss: 2.0639400482177734 2023-01-24 00:24:50.773508: step: 198/463, loss: 0.7193225622177124 2023-01-24 00:24:51.341383: step: 200/463, loss: 0.5780397653579712 2023-01-24 00:24:51.981848: step: 202/463, loss: 0.6100775599479675 2023-01-24 00:24:52.599285: step: 204/463, loss: 0.7696682214736938 2023-01-24 00:24:53.245321: step: 206/463, loss: 1.430206060409546 2023-01-24 00:24:53.907075: step: 208/463, loss: 0.7321590185165405 2023-01-24 00:24:54.534502: step: 210/463, loss: 6.696055889129639 2023-01-24 00:24:55.111602: step: 212/463, loss: 5.673430919647217 2023-01-24 00:24:55.720944: step: 214/463, loss: 1.3207730054855347 2023-01-24 00:24:56.359934: step: 216/463, loss: 0.2534089684486389 2023-01-24 00:24:56.965355: step: 218/463, loss: 0.769867479801178 2023-01-24 00:24:57.586548: step: 220/463, loss: 0.7346243858337402 2023-01-24 00:24:58.310470: step: 222/463, loss: 0.9753665328025818 2023-01-24 00:24:59.031270: step: 224/463, loss: 0.2083137333393097 2023-01-24 00:24:59.634609: step: 226/463, loss: 9.185884475708008 2023-01-24 00:25:00.216079: step: 228/463, loss: 1.214653491973877 2023-01-24 00:25:00.823773: step: 230/463, loss: 0.5423922538757324 2023-01-24 00:25:01.465875: step: 232/463, loss: 1.7326282262802124 2023-01-24 00:25:02.071335: step: 234/463, loss: 0.6510726809501648 2023-01-24 00:25:02.722645: step: 236/463, loss: 0.28267186880111694 2023-01-24 00:25:03.345228: step: 238/463, loss: 1.7215492725372314 2023-01-24 00:25:03.969956: step: 240/463, loss: 1.45225989818573 2023-01-24 00:25:04.660925: step: 242/463, loss: 1.423581838607788 2023-01-24 00:25:05.307530: step: 244/463, loss: 0.5053572058677673 2023-01-24 00:25:05.884840: step: 246/463, loss: 0.7975178360939026 2023-01-24 00:25:06.524250: step: 248/463, loss: 0.2178017944097519 2023-01-24 00:25:07.114759: step: 250/463, loss: 1.4246304035186768 2023-01-24 00:25:07.815019: step: 252/463, loss: 0.25860071182250977 2023-01-24 00:25:08.415880: step: 254/463, loss: 4.413857460021973 2023-01-24 00:25:09.010042: step: 256/463, loss: 0.19995911419391632 2023-01-24 00:25:09.670447: step: 258/463, loss: 0.8768954277038574 2023-01-24 00:25:10.302295: step: 260/463, loss: 1.3025327920913696 2023-01-24 00:25:10.894047: step: 262/463, loss: 1.4638234376907349 2023-01-24 00:25:11.625728: step: 264/463, loss: 1.931910514831543 2023-01-24 00:25:12.192201: step: 266/463, loss: 3.1289453506469727 2023-01-24 00:25:12.788349: step: 268/463, loss: 0.6263246536254883 2023-01-24 00:25:13.448266: step: 270/463, loss: 0.43547502160072327 2023-01-24 00:25:14.044612: step: 272/463, loss: 0.38607877492904663 2023-01-24 00:25:14.639493: step: 274/463, loss: 0.5074111819267273 2023-01-24 00:25:15.241805: step: 276/463, loss: 2.655639171600342 2023-01-24 00:25:15.856552: step: 278/463, loss: 1.486943244934082 2023-01-24 00:25:16.452369: step: 280/463, loss: 1.6381365060806274 2023-01-24 00:25:17.067288: step: 282/463, loss: 0.7234300971031189 2023-01-24 00:25:17.729802: step: 284/463, loss: 3.6691396236419678 2023-01-24 00:25:18.322934: step: 286/463, loss: 1.1668576002120972 2023-01-24 00:25:18.980241: step: 288/463, loss: 0.8059089183807373 2023-01-24 00:25:19.590808: step: 290/463, loss: 0.3327215611934662 2023-01-24 00:25:20.278272: step: 292/463, loss: 2.0790326595306396 2023-01-24 00:25:20.897542: step: 294/463, loss: 0.8459987044334412 2023-01-24 00:25:21.544527: step: 296/463, loss: 1.0282001495361328 2023-01-24 00:25:22.191749: step: 298/463, loss: 1.7880882024765015 2023-01-24 00:25:22.805867: step: 300/463, loss: 0.4497677981853485 2023-01-24 00:25:23.424655: step: 302/463, loss: 2.4812240600585938 2023-01-24 00:25:24.027548: step: 304/463, loss: 1.0214134454727173 2023-01-24 00:25:24.587343: step: 306/463, loss: 1.1175379753112793 2023-01-24 00:25:25.264106: step: 308/463, loss: 1.0720598697662354 2023-01-24 00:25:25.873283: step: 310/463, loss: 1.0185978412628174 2023-01-24 00:25:26.457245: step: 312/463, loss: 0.29205477237701416 2023-01-24 00:25:27.106383: step: 314/463, loss: 1.3354535102844238 2023-01-24 00:25:27.795465: step: 316/463, loss: 0.4895039200782776 2023-01-24 00:25:28.443109: step: 318/463, loss: 0.835686445236206 2023-01-24 00:25:29.087043: step: 320/463, loss: 0.6792929172515869 2023-01-24 00:25:29.695826: step: 322/463, loss: 0.3367239534854889 2023-01-24 00:25:30.378874: step: 324/463, loss: 0.918974757194519 2023-01-24 00:25:31.032927: step: 326/463, loss: 0.6140539646148682 2023-01-24 00:25:31.748397: step: 328/463, loss: 2.310220241546631 2023-01-24 00:25:32.379813: step: 330/463, loss: 2.6445541381835938 2023-01-24 00:25:32.960930: step: 332/463, loss: 3.4335575103759766 2023-01-24 00:25:33.602493: step: 334/463, loss: 1.2598353624343872 2023-01-24 00:25:34.248758: step: 336/463, loss: 1.4418747425079346 2023-01-24 00:25:34.862636: step: 338/463, loss: 0.4796026349067688 2023-01-24 00:25:35.560025: step: 340/463, loss: 0.9120638966560364 2023-01-24 00:25:36.172638: step: 342/463, loss: 1.0964151620864868 2023-01-24 00:25:36.784592: step: 344/463, loss: 0.5410758852958679 2023-01-24 00:25:37.394958: step: 346/463, loss: 2.025989532470703 2023-01-24 00:25:38.062197: step: 348/463, loss: 0.8962111473083496 2023-01-24 00:25:38.674654: step: 350/463, loss: 0.35790562629699707 2023-01-24 00:25:39.375721: step: 352/463, loss: 0.4415629804134369 2023-01-24 00:25:40.003098: step: 354/463, loss: 0.3281746506690979 2023-01-24 00:25:40.609170: step: 356/463, loss: 1.1492011547088623 2023-01-24 00:25:41.253229: step: 358/463, loss: 0.8363897204399109 2023-01-24 00:25:41.891222: step: 360/463, loss: 8.211515426635742 2023-01-24 00:25:42.564679: step: 362/463, loss: 0.5745518207550049 2023-01-24 00:25:43.147703: step: 364/463, loss: 2.05226469039917 2023-01-24 00:25:43.714717: step: 366/463, loss: 3.104363203048706 2023-01-24 00:25:44.300134: step: 368/463, loss: 0.5608854293823242 2023-01-24 00:25:45.034832: step: 370/463, loss: 1.777147889137268 2023-01-24 00:25:45.629280: step: 372/463, loss: 2.157261610031128 2023-01-24 00:25:46.267006: step: 374/463, loss: 5.277553081512451 2023-01-24 00:25:46.903438: step: 376/463, loss: 2.3521714210510254 2023-01-24 00:25:47.500381: step: 378/463, loss: 0.4879208505153656 2023-01-24 00:25:48.086573: step: 380/463, loss: 0.8589454889297485 2023-01-24 00:25:48.729699: step: 382/463, loss: 0.2890065312385559 2023-01-24 00:25:49.436931: step: 384/463, loss: 2.1020936965942383 2023-01-24 00:25:50.089698: step: 386/463, loss: 0.9060550928115845 2023-01-24 00:25:50.685448: step: 388/463, loss: 1.0928341150283813 2023-01-24 00:25:51.258424: step: 390/463, loss: 0.7427297234535217 2023-01-24 00:25:51.894271: step: 392/463, loss: 2.1845855712890625 2023-01-24 00:25:52.517994: step: 394/463, loss: 0.5345891118049622 2023-01-24 00:25:53.110159: step: 396/463, loss: 2.4184165000915527 2023-01-24 00:25:53.757356: step: 398/463, loss: 1.054634690284729 2023-01-24 00:25:54.390607: step: 400/463, loss: 0.9807265400886536 2023-01-24 00:25:54.980379: step: 402/463, loss: 1.649864673614502 2023-01-24 00:25:55.614920: step: 404/463, loss: 0.6580896973609924 2023-01-24 00:25:56.250666: step: 406/463, loss: 0.24018622934818268 2023-01-24 00:25:56.918930: step: 408/463, loss: 0.8678486347198486 2023-01-24 00:25:57.584048: step: 410/463, loss: 0.8708839416503906 2023-01-24 00:25:58.202918: step: 412/463, loss: 2.3523831367492676 2023-01-24 00:25:58.762723: step: 414/463, loss: 0.5760695338249207 2023-01-24 00:25:59.404656: step: 416/463, loss: 9.316370010375977 2023-01-24 00:26:00.006861: step: 418/463, loss: 7.471907138824463 2023-01-24 00:26:00.563151: step: 420/463, loss: 0.3236074447631836 2023-01-24 00:26:01.204859: step: 422/463, loss: 1.886680245399475 2023-01-24 00:26:01.916637: step: 424/463, loss: 1.2870137691497803 2023-01-24 00:26:02.452156: step: 426/463, loss: 1.1143025159835815 2023-01-24 00:26:03.027551: step: 428/463, loss: 1.4919289350509644 2023-01-24 00:26:03.646854: step: 430/463, loss: 8.144288063049316 2023-01-24 00:26:04.302577: step: 432/463, loss: 8.805116653442383 2023-01-24 00:26:04.908603: step: 434/463, loss: 2.0070152282714844 2023-01-24 00:26:05.522033: step: 436/463, loss: 0.4333421289920807 2023-01-24 00:26:06.136442: step: 438/463, loss: 0.8502134084701538 2023-01-24 00:26:06.736682: step: 440/463, loss: 2.7435638904571533 2023-01-24 00:26:07.284834: step: 442/463, loss: 0.38558271527290344 2023-01-24 00:26:07.861525: step: 444/463, loss: 1.2557249069213867 2023-01-24 00:26:08.434807: step: 446/463, loss: 0.4668707847595215 2023-01-24 00:26:09.098204: step: 448/463, loss: 0.47489798069000244 2023-01-24 00:26:09.716415: step: 450/463, loss: 0.474469393491745 2023-01-24 00:26:10.350892: step: 452/463, loss: 0.9313415288925171 2023-01-24 00:26:10.954054: step: 454/463, loss: 3.299705982208252 2023-01-24 00:26:11.614528: step: 456/463, loss: 3.5572657585144043 2023-01-24 00:26:12.211898: step: 458/463, loss: 0.6326867938041687 2023-01-24 00:26:12.786126: step: 460/463, loss: 1.3534287214279175 2023-01-24 00:26:13.463248: step: 462/463, loss: 7.05568790435791 2023-01-24 00:26:14.039568: step: 464/463, loss: 0.4520980715751648 2023-01-24 00:26:14.658169: step: 466/463, loss: 0.7559211850166321 2023-01-24 00:26:15.251581: step: 468/463, loss: 2.4350976943969727 2023-01-24 00:26:15.975547: step: 470/463, loss: 0.483468234539032 2023-01-24 00:26:16.610815: step: 472/463, loss: 2.465373992919922 2023-01-24 00:26:17.214324: step: 474/463, loss: 0.9175227880477905 2023-01-24 00:26:17.838873: step: 476/463, loss: 1.258840799331665 2023-01-24 00:26:18.500295: step: 478/463, loss: 0.5492610335350037 2023-01-24 00:26:19.162589: step: 480/463, loss: 1.9110479354858398 2023-01-24 00:26:19.789606: step: 482/463, loss: 0.36270177364349365 2023-01-24 00:26:20.459900: step: 484/463, loss: 2.4305403232574463 2023-01-24 00:26:21.139872: step: 486/463, loss: 0.7894012331962585 2023-01-24 00:26:21.733175: step: 488/463, loss: 0.3941482901573181 2023-01-24 00:26:22.313050: step: 490/463, loss: 4.020229816436768 2023-01-24 00:26:22.909785: step: 492/463, loss: 0.9312267303466797 2023-01-24 00:26:23.525342: step: 494/463, loss: 0.5217389464378357 2023-01-24 00:26:24.103898: step: 496/463, loss: 0.8230952620506287 2023-01-24 00:26:24.707698: step: 498/463, loss: 0.31653013825416565 2023-01-24 00:26:25.404253: step: 500/463, loss: 0.9851170182228088 2023-01-24 00:26:26.152925: step: 502/463, loss: 0.5415799617767334 2023-01-24 00:26:26.763175: step: 504/463, loss: 1.740500569343567 2023-01-24 00:26:27.362742: step: 506/463, loss: 0.3798368275165558 2023-01-24 00:26:27.918746: step: 508/463, loss: 0.20772476494312286 2023-01-24 00:26:28.616328: step: 510/463, loss: 1.3534529209136963 2023-01-24 00:26:29.246920: step: 512/463, loss: 1.939197063446045 2023-01-24 00:26:29.875697: step: 514/463, loss: 1.2899365425109863 2023-01-24 00:26:30.474375: step: 516/463, loss: 0.357483834028244 2023-01-24 00:26:31.066401: step: 518/463, loss: 5.634459495544434 2023-01-24 00:26:31.642759: step: 520/463, loss: 1.0966293811798096 2023-01-24 00:26:32.250656: step: 522/463, loss: 1.047799825668335 2023-01-24 00:26:32.906654: step: 524/463, loss: 0.9033491015434265 2023-01-24 00:26:33.509529: step: 526/463, loss: 4.523362159729004 2023-01-24 00:26:34.229855: step: 528/463, loss: 0.883913516998291 2023-01-24 00:26:34.856399: step: 530/463, loss: 0.1642293930053711 2023-01-24 00:26:35.492911: step: 532/463, loss: 0.9187859892845154 2023-01-24 00:26:36.084234: step: 534/463, loss: 0.7170809507369995 2023-01-24 00:26:36.716476: step: 536/463, loss: 1.107334852218628 2023-01-24 00:26:37.370161: step: 538/463, loss: 1.0632717609405518 2023-01-24 00:26:38.055098: step: 540/463, loss: 1.013468623161316 2023-01-24 00:26:38.705035: step: 542/463, loss: 0.9506543278694153 2023-01-24 00:26:39.360048: step: 544/463, loss: 1.0769635438919067 2023-01-24 00:26:39.948503: step: 546/463, loss: 3.354572057723999 2023-01-24 00:26:40.537628: step: 548/463, loss: 1.234755039215088 2023-01-24 00:26:41.116540: step: 550/463, loss: 1.0811183452606201 2023-01-24 00:26:41.725336: step: 552/463, loss: 1.278045892715454 2023-01-24 00:26:42.347601: step: 554/463, loss: 1.124143362045288 2023-01-24 00:26:42.989730: step: 556/463, loss: 0.22297504544258118 2023-01-24 00:26:43.642431: step: 558/463, loss: 2.164219379425049 2023-01-24 00:26:44.305296: step: 560/463, loss: 2.4813928604125977 2023-01-24 00:26:44.926193: step: 562/463, loss: 1.9802374839782715 2023-01-24 00:26:45.571761: step: 564/463, loss: 0.9128883481025696 2023-01-24 00:26:46.187612: step: 566/463, loss: 1.4127860069274902 2023-01-24 00:26:46.828760: step: 568/463, loss: 2.2670340538024902 2023-01-24 00:26:47.542311: step: 570/463, loss: 0.5039073824882507 2023-01-24 00:26:48.140600: step: 572/463, loss: 0.3273859918117523 2023-01-24 00:26:48.781691: step: 574/463, loss: 1.2070035934448242 2023-01-24 00:26:49.362679: step: 576/463, loss: 5.308228969573975 2023-01-24 00:26:49.970650: step: 578/463, loss: 0.3560783565044403 2023-01-24 00:26:50.537577: step: 580/463, loss: 0.6547425985336304 2023-01-24 00:26:51.153630: step: 582/463, loss: 4.00238037109375 2023-01-24 00:26:51.775254: step: 584/463, loss: 0.9966640472412109 2023-01-24 00:26:52.404953: step: 586/463, loss: 0.7293791174888611 2023-01-24 00:26:53.000073: step: 588/463, loss: 1.5459048748016357 2023-01-24 00:26:53.581026: step: 590/463, loss: 0.5778902173042297 2023-01-24 00:26:54.194902: step: 592/463, loss: 1.4275087118148804 2023-01-24 00:26:54.809491: step: 594/463, loss: 3.2873973846435547 2023-01-24 00:26:55.458629: step: 596/463, loss: 1.1217232942581177 2023-01-24 00:26:56.154651: step: 598/463, loss: 1.198265790939331 2023-01-24 00:26:56.720174: step: 600/463, loss: 1.5561188459396362 2023-01-24 00:26:57.316952: step: 602/463, loss: 0.8235931396484375 2023-01-24 00:26:57.911230: step: 604/463, loss: 1.3168529272079468 2023-01-24 00:26:58.546565: step: 606/463, loss: 0.921160101890564 2023-01-24 00:26:59.217112: step: 608/463, loss: 0.28695330023765564 2023-01-24 00:26:59.857850: step: 610/463, loss: 0.46275076270103455 2023-01-24 00:27:00.435284: step: 612/463, loss: 6.218353271484375 2023-01-24 00:27:01.034033: step: 614/463, loss: 0.7837473154067993 2023-01-24 00:27:01.693246: step: 616/463, loss: 0.6660981178283691 2023-01-24 00:27:02.264525: step: 618/463, loss: 0.9069439172744751 2023-01-24 00:27:02.955449: step: 620/463, loss: 0.9275250434875488 2023-01-24 00:27:03.578190: step: 622/463, loss: 0.7706800103187561 2023-01-24 00:27:04.240134: step: 624/463, loss: 0.749581515789032 2023-01-24 00:27:04.928173: step: 626/463, loss: 0.32496774196624756 2023-01-24 00:27:05.485695: step: 628/463, loss: 1.476307988166809 2023-01-24 00:27:06.068475: step: 630/463, loss: 0.3072581887245178 2023-01-24 00:27:06.689200: step: 632/463, loss: 0.3495277762413025 2023-01-24 00:27:07.297952: step: 634/463, loss: 1.194953441619873 2023-01-24 00:27:07.873751: step: 636/463, loss: 0.4293559193611145 2023-01-24 00:27:08.521319: step: 638/463, loss: 0.21525681018829346 2023-01-24 00:27:09.178477: step: 640/463, loss: 0.5561047196388245 2023-01-24 00:27:09.850301: step: 642/463, loss: 1.3673030138015747 2023-01-24 00:27:10.408614: step: 644/463, loss: 3.6644043922424316 2023-01-24 00:27:11.021829: step: 646/463, loss: 0.4359138309955597 2023-01-24 00:27:11.633149: step: 648/463, loss: 6.949115753173828 2023-01-24 00:27:12.317977: step: 650/463, loss: 10.399660110473633 2023-01-24 00:27:12.908012: step: 652/463, loss: 0.44728463888168335 2023-01-24 00:27:13.497731: step: 654/463, loss: 0.6902983784675598 2023-01-24 00:27:14.171028: step: 656/463, loss: 0.3838913142681122 2023-01-24 00:27:14.855947: step: 658/463, loss: 1.2425439357757568 2023-01-24 00:27:15.483134: step: 660/463, loss: 2.1563799381256104 2023-01-24 00:27:16.097174: step: 662/463, loss: 0.6916783452033997 2023-01-24 00:27:16.698906: step: 664/463, loss: 0.9129911065101624 2023-01-24 00:27:17.298655: step: 666/463, loss: 8.048491477966309 2023-01-24 00:27:17.973611: step: 668/463, loss: 0.5652790665626526 2023-01-24 00:27:18.579191: step: 670/463, loss: 0.553842306137085 2023-01-24 00:27:19.253168: step: 672/463, loss: 1.1391154527664185 2023-01-24 00:27:19.842818: step: 674/463, loss: 3.352278709411621 2023-01-24 00:27:20.547874: step: 676/463, loss: 1.7200900316238403 2023-01-24 00:27:21.144566: step: 678/463, loss: 0.7151714563369751 2023-01-24 00:27:21.771426: step: 680/463, loss: 1.0860754251480103 2023-01-24 00:27:22.429086: step: 682/463, loss: 1.88545560836792 2023-01-24 00:27:23.079338: step: 684/463, loss: 1.268572449684143 2023-01-24 00:27:23.640325: step: 686/463, loss: 1.75947904586792 2023-01-24 00:27:24.246681: step: 688/463, loss: 1.295336127281189 2023-01-24 00:27:24.798442: step: 690/463, loss: 1.4038575887680054 2023-01-24 00:27:25.427964: step: 692/463, loss: 0.4189010262489319 2023-01-24 00:27:26.084683: step: 694/463, loss: 2.4310216903686523 2023-01-24 00:27:26.776784: step: 696/463, loss: 0.6463856101036072 2023-01-24 00:27:27.395443: step: 698/463, loss: 3.049435615539551 2023-01-24 00:27:28.060895: step: 700/463, loss: 0.9061576128005981 2023-01-24 00:27:28.698055: step: 702/463, loss: 0.6232960224151611 2023-01-24 00:27:29.349406: step: 704/463, loss: 0.6476078629493713 2023-01-24 00:27:29.935795: step: 706/463, loss: 0.31566017866134644 2023-01-24 00:27:30.661612: step: 708/463, loss: 2.5200462341308594 2023-01-24 00:27:31.296674: step: 710/463, loss: 1.9629522562026978 2023-01-24 00:27:31.971850: step: 712/463, loss: 6.286133766174316 2023-01-24 00:27:32.516921: step: 714/463, loss: 0.6906088590621948 2023-01-24 00:27:33.106990: step: 716/463, loss: 0.5868412256240845 2023-01-24 00:27:33.904512: step: 718/463, loss: 0.7171429395675659 2023-01-24 00:27:34.502266: step: 720/463, loss: 2.81927490234375 2023-01-24 00:27:35.099404: step: 722/463, loss: 0.9084492325782776 2023-01-24 00:27:35.803435: step: 724/463, loss: 2.517371654510498 2023-01-24 00:27:36.413403: step: 726/463, loss: 2.4285521507263184 2023-01-24 00:27:37.012502: step: 728/463, loss: 1.7607901096343994 2023-01-24 00:27:37.556720: step: 730/463, loss: 1.5865906476974487 2023-01-24 00:27:38.181700: step: 732/463, loss: 1.5720276832580566 2023-01-24 00:27:38.766753: step: 734/463, loss: 0.756534218788147 2023-01-24 00:27:39.402144: step: 736/463, loss: 0.3365505337715149 2023-01-24 00:27:40.071083: step: 738/463, loss: 2.3311614990234375 2023-01-24 00:27:40.695578: step: 740/463, loss: 1.0682337284088135 2023-01-24 00:27:41.270847: step: 742/463, loss: 0.9786279797554016 2023-01-24 00:27:41.925605: step: 744/463, loss: 4.865859031677246 2023-01-24 00:27:42.600303: step: 746/463, loss: 0.4941819906234741 2023-01-24 00:27:43.296085: step: 748/463, loss: 5.528733253479004 2023-01-24 00:27:43.908815: step: 750/463, loss: 0.8379408121109009 2023-01-24 00:27:44.598753: step: 752/463, loss: 1.5344420671463013 2023-01-24 00:27:45.219019: step: 754/463, loss: 2.0737733840942383 2023-01-24 00:27:45.881384: step: 756/463, loss: 1.3739871978759766 2023-01-24 00:27:46.592644: step: 758/463, loss: 3.3510522842407227 2023-01-24 00:27:47.183679: step: 760/463, loss: 2.1462883949279785 2023-01-24 00:27:47.845850: step: 762/463, loss: 0.6280003190040588 2023-01-24 00:27:48.498961: step: 764/463, loss: 1.7479057312011719 2023-01-24 00:27:49.082545: step: 766/463, loss: 2.78000545501709 2023-01-24 00:27:49.728364: step: 768/463, loss: 1.1239213943481445 2023-01-24 00:27:50.326017: step: 770/463, loss: 3.3071258068084717 2023-01-24 00:27:50.956639: step: 772/463, loss: 4.388950824737549 2023-01-24 00:27:51.562029: step: 774/463, loss: 4.353854656219482 2023-01-24 00:27:52.170372: step: 776/463, loss: 0.5010625123977661 2023-01-24 00:27:52.792634: step: 778/463, loss: 4.836090564727783 2023-01-24 00:27:53.401787: step: 780/463, loss: 0.608014702796936 2023-01-24 00:27:53.995177: step: 782/463, loss: 1.0015054941177368 2023-01-24 00:27:54.680482: step: 784/463, loss: 0.7252769470214844 2023-01-24 00:27:55.319355: step: 786/463, loss: 0.6398787498474121 2023-01-24 00:27:55.963040: step: 788/463, loss: 0.6458868980407715 2023-01-24 00:27:56.606113: step: 790/463, loss: 0.8810621500015259 2023-01-24 00:27:57.168321: step: 792/463, loss: 1.3582408428192139 2023-01-24 00:27:57.802581: step: 794/463, loss: 1.4753882884979248 2023-01-24 00:27:58.399233: step: 796/463, loss: 0.6381165385246277 2023-01-24 00:27:59.091814: step: 798/463, loss: 4.46254301071167 2023-01-24 00:27:59.710436: step: 800/463, loss: 1.0283323526382446 2023-01-24 00:28:00.325241: step: 802/463, loss: 1.2114746570587158 2023-01-24 00:28:00.901165: step: 804/463, loss: 0.43088680505752563 2023-01-24 00:28:01.583080: step: 806/463, loss: 1.3423254489898682 2023-01-24 00:28:02.230836: step: 808/463, loss: 3.007571220397949 2023-01-24 00:28:02.836456: step: 810/463, loss: 1.1263375282287598 2023-01-24 00:28:03.500068: step: 812/463, loss: 2.006821870803833 2023-01-24 00:28:04.108123: step: 814/463, loss: 1.7718194723129272 2023-01-24 00:28:04.724392: step: 816/463, loss: 1.7917163372039795 2023-01-24 00:28:05.353616: step: 818/463, loss: 0.42824414372444153 2023-01-24 00:28:06.065161: step: 820/463, loss: 0.9464489221572876 2023-01-24 00:28:06.726984: step: 822/463, loss: 1.3767268657684326 2023-01-24 00:28:07.303903: step: 824/463, loss: 1.622650146484375 2023-01-24 00:28:07.907743: step: 826/463, loss: 0.3880457878112793 2023-01-24 00:28:08.487579: step: 828/463, loss: 0.6651276350021362 2023-01-24 00:28:09.109506: step: 830/463, loss: 0.8124265670776367 2023-01-24 00:28:09.777075: step: 832/463, loss: 0.44176140427589417 2023-01-24 00:28:10.403412: step: 834/463, loss: 0.3317285478115082 2023-01-24 00:28:11.048640: step: 836/463, loss: 0.5124133229255676 2023-01-24 00:28:11.782729: step: 838/463, loss: 4.755606651306152 2023-01-24 00:28:12.438452: step: 840/463, loss: 0.2805412709712982 2023-01-24 00:28:13.110050: step: 842/463, loss: 0.48736289143562317 2023-01-24 00:28:13.717570: step: 844/463, loss: 0.7905008792877197 2023-01-24 00:28:14.440020: step: 846/463, loss: 0.2746060788631439 2023-01-24 00:28:15.050884: step: 848/463, loss: 0.3225979208946228 2023-01-24 00:28:15.640722: step: 850/463, loss: 1.3473353385925293 2023-01-24 00:28:16.190509: step: 852/463, loss: 1.595343828201294 2023-01-24 00:28:16.807286: step: 854/463, loss: 1.3866872787475586 2023-01-24 00:28:17.438895: step: 856/463, loss: 2.678562879562378 2023-01-24 00:28:18.065678: step: 858/463, loss: 1.3150672912597656 2023-01-24 00:28:18.649270: step: 860/463, loss: 0.4935355484485626 2023-01-24 00:28:19.272666: step: 862/463, loss: 0.37159791588783264 2023-01-24 00:28:19.875147: step: 864/463, loss: 1.1581287384033203 2023-01-24 00:28:20.517653: step: 866/463, loss: 0.7029290795326233 2023-01-24 00:28:21.088469: step: 868/463, loss: 1.3479571342468262 2023-01-24 00:28:21.717872: step: 870/463, loss: 0.631375253200531 2023-01-24 00:28:22.313158: step: 872/463, loss: 0.7809395790100098 2023-01-24 00:28:22.909138: step: 874/463, loss: 1.217526912689209 2023-01-24 00:28:23.582105: step: 876/463, loss: 0.7933959364891052 2023-01-24 00:28:24.200553: step: 878/463, loss: 0.96867436170578 2023-01-24 00:28:24.816028: step: 880/463, loss: 3.857832431793213 2023-01-24 00:28:25.502796: step: 882/463, loss: 9.899333953857422 2023-01-24 00:28:26.111820: step: 884/463, loss: 0.8342276811599731 2023-01-24 00:28:26.706888: step: 886/463, loss: 0.48514920473098755 2023-01-24 00:28:27.304384: step: 888/463, loss: 1.6072485446929932 2023-01-24 00:28:27.947292: step: 890/463, loss: 0.44526222348213196 2023-01-24 00:28:28.597233: step: 892/463, loss: 1.2081446647644043 2023-01-24 00:28:29.205142: step: 894/463, loss: 0.6090028285980225 2023-01-24 00:28:29.759734: step: 896/463, loss: 0.6402276158332825 2023-01-24 00:28:30.356285: step: 898/463, loss: 1.281656265258789 2023-01-24 00:28:31.025038: step: 900/463, loss: 2.849609375 2023-01-24 00:28:31.667269: step: 902/463, loss: 0.9052131175994873 2023-01-24 00:28:32.300216: step: 904/463, loss: 1.2643420696258545 2023-01-24 00:28:32.956465: step: 906/463, loss: 0.9379635453224182 2023-01-24 00:28:33.592249: step: 908/463, loss: 0.5241138339042664 2023-01-24 00:28:34.371477: step: 910/463, loss: 1.2361594438552856 2023-01-24 00:28:34.953579: step: 912/463, loss: 1.4825359582901 2023-01-24 00:28:35.582642: step: 914/463, loss: 0.9015185832977295 2023-01-24 00:28:36.247764: step: 916/463, loss: 0.5484318733215332 2023-01-24 00:28:36.865196: step: 918/463, loss: 1.6188864707946777 2023-01-24 00:28:37.556829: step: 920/463, loss: 3.3230466842651367 2023-01-24 00:28:38.122275: step: 922/463, loss: 0.4092645049095154 2023-01-24 00:28:38.742308: step: 924/463, loss: 1.1637914180755615 2023-01-24 00:28:39.382744: step: 926/463, loss: 1.2394627332687378 ================================================== Loss: 1.598 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3359078590785908, 'r': 0.2347537878787879, 'f1': 0.27636566332218504}, 'combined': 0.2036378571847679, 'epoch': 2} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.37459689049024225, 'r': 0.22280371573506583, 'f1': 0.2794157830919582}, 'combined': 0.19657391775313643, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3348558726516564, 'r': 0.23275019936204144, 'f1': 0.2746192296383416}, 'combined': 0.20235101131246225, 'epoch': 2} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3734992811743959, 'r': 0.22006876706272066, 'f1': 0.2769540124370931}, 'combined': 0.1966373488303361, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34451426630434784, 'r': 0.24011600378787878, 'f1': 0.2829938616071429}, 'combined': 0.2085217927631579, 'epoch': 2} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37802751332839785, 'r': 0.21629748153920503, 'f1': 0.2751571944359356}, 'combined': 0.19536160804951427, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3657407407407407, 'r': 0.1880952380952381, 'f1': 0.24842767295597484}, 'combined': 0.16561844863731656, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.358695652173913, 'r': 0.358695652173913, 'f1': 0.358695652173913}, 'combined': 0.1793478260869565, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46875, 'r': 0.12931034482758622, 'f1': 0.20270270270270271}, 'combined': 0.13513513513513514, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3359078590785908, 'r': 0.2347537878787879, 'f1': 0.27636566332218504}, 'combined': 0.2036378571847679, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.37459689049024225, 'r': 0.22280371573506583, 'f1': 0.2794157830919582}, 'combined': 0.19657391775313643, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3657407407407407, 'r': 0.1880952380952381, 'f1': 0.24842767295597484}, 'combined': 0.16561844863731656, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3348558726516564, 'r': 0.23275019936204144, 'f1': 0.2746192296383416}, 'combined': 0.20235101131246225, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3734992811743959, 'r': 0.22006876706272066, 'f1': 0.2769540124370931}, 'combined': 0.1966373488303361, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.358695652173913, 'r': 0.358695652173913, 'f1': 0.358695652173913}, 'combined': 0.1793478260869565, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34451426630434784, 'r': 0.24011600378787878, 'f1': 0.2829938616071429}, 'combined': 0.2085217927631579, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37802751332839785, 'r': 0.21629748153920503, 'f1': 0.2751571944359356}, 'combined': 0.19536160804951427, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46875, 'r': 0.12931034482758622, 'f1': 0.20270270270270271}, 'combined': 0.13513513513513514, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:31:45.766859: step: 2/463, loss: 1.9631409645080566 2023-01-24 00:31:46.373536: step: 4/463, loss: 0.3295742869377136 2023-01-24 00:31:47.013900: step: 6/463, loss: 5.400371074676514 2023-01-24 00:31:47.626352: step: 8/463, loss: 2.165060520172119 2023-01-24 00:31:48.249983: step: 10/463, loss: 0.7333164215087891 2023-01-24 00:31:48.855233: step: 12/463, loss: 0.3906591236591339 2023-01-24 00:31:49.523448: step: 14/463, loss: 1.0042941570281982 2023-01-24 00:31:50.204402: step: 16/463, loss: 2.549551010131836 2023-01-24 00:31:50.853734: step: 18/463, loss: 0.3142782747745514 2023-01-24 00:31:51.424229: step: 20/463, loss: 1.570871114730835 2023-01-24 00:31:52.038285: step: 22/463, loss: 1.670881986618042 2023-01-24 00:31:52.624246: step: 24/463, loss: 0.4239998757839203 2023-01-24 00:31:53.238061: step: 26/463, loss: 0.9970145225524902 2023-01-24 00:31:53.867792: step: 28/463, loss: 0.38401028513908386 2023-01-24 00:31:54.543563: step: 30/463, loss: 0.8737073540687561 2023-01-24 00:31:55.156296: step: 32/463, loss: 0.28797993063926697 2023-01-24 00:31:55.883155: step: 34/463, loss: 0.31003451347351074 2023-01-24 00:31:56.581386: step: 36/463, loss: 0.1685328334569931 2023-01-24 00:31:57.197581: step: 38/463, loss: 0.5837168097496033 2023-01-24 00:31:57.847958: step: 40/463, loss: 0.11931995302438736 2023-01-24 00:31:58.545184: step: 42/463, loss: 0.5261853337287903 2023-01-24 00:31:59.196525: step: 44/463, loss: 1.7068061828613281 2023-01-24 00:31:59.791304: step: 46/463, loss: 3.0730509757995605 2023-01-24 00:32:00.405914: step: 48/463, loss: 0.25946733355522156 2023-01-24 00:32:01.011197: step: 50/463, loss: 0.4657612442970276 2023-01-24 00:32:01.593522: step: 52/463, loss: 0.3661884665489197 2023-01-24 00:32:02.221434: step: 54/463, loss: 0.9090256690979004 2023-01-24 00:32:02.922926: step: 56/463, loss: 0.9568942189216614 2023-01-24 00:32:03.529490: step: 58/463, loss: 3.9637742042541504 2023-01-24 00:32:04.179331: step: 60/463, loss: 0.6480165719985962 2023-01-24 00:32:04.814717: step: 62/463, loss: 4.02121114730835 2023-01-24 00:32:05.466907: step: 64/463, loss: 0.259114533662796 2023-01-24 00:32:06.087329: step: 66/463, loss: 2.003004789352417 2023-01-24 00:32:06.686136: step: 68/463, loss: 1.2677558660507202 2023-01-24 00:32:07.283675: step: 70/463, loss: 0.35733819007873535 2023-01-24 00:32:07.914176: step: 72/463, loss: 2.159743070602417 2023-01-24 00:32:08.569448: step: 74/463, loss: 0.49539950489997864 2023-01-24 00:32:09.138209: step: 76/463, loss: 0.8129820227622986 2023-01-24 00:32:09.864663: step: 78/463, loss: 6.430912017822266 2023-01-24 00:32:10.512113: step: 80/463, loss: 0.9140894412994385 2023-01-24 00:32:11.264576: step: 82/463, loss: 0.29985129833221436 2023-01-24 00:32:11.908654: step: 84/463, loss: 0.3343806862831116 2023-01-24 00:32:12.539584: step: 86/463, loss: 1.0786560773849487 2023-01-24 00:32:13.161712: step: 88/463, loss: 0.7417458295822144 2023-01-24 00:32:13.752539: step: 90/463, loss: 1.8798578977584839 2023-01-24 00:32:14.368370: step: 92/463, loss: 0.6896175146102905 2023-01-24 00:32:14.991837: step: 94/463, loss: 0.23675788938999176 2023-01-24 00:32:15.644050: step: 96/463, loss: 7.074843406677246 2023-01-24 00:32:16.258328: step: 98/463, loss: 0.41692909598350525 2023-01-24 00:32:16.951346: step: 100/463, loss: 2.5718042850494385 2023-01-24 00:32:17.590841: step: 102/463, loss: 0.7475202083587646 2023-01-24 00:32:18.189967: step: 104/463, loss: 0.5104691386222839 2023-01-24 00:32:18.812460: step: 106/463, loss: 0.5841565132141113 2023-01-24 00:32:19.496868: step: 108/463, loss: 0.5135908126831055 2023-01-24 00:32:20.065424: step: 110/463, loss: 0.510888397693634 2023-01-24 00:32:20.737378: step: 112/463, loss: 0.241728276014328 2023-01-24 00:32:21.380420: step: 114/463, loss: 0.8761977553367615 2023-01-24 00:32:22.010706: step: 116/463, loss: 3.308727264404297 2023-01-24 00:32:22.672950: step: 118/463, loss: 1.5532370805740356 2023-01-24 00:32:23.326498: step: 120/463, loss: 0.5126050710678101 2023-01-24 00:32:23.965828: step: 122/463, loss: 14.654118537902832 2023-01-24 00:32:24.581964: step: 124/463, loss: 0.16141122579574585 2023-01-24 00:32:25.215568: step: 126/463, loss: 1.0366311073303223 2023-01-24 00:32:25.787478: step: 128/463, loss: 0.20946122705936432 2023-01-24 00:32:26.421622: step: 130/463, loss: 1.0647927522659302 2023-01-24 00:32:26.960776: step: 132/463, loss: 1.441382884979248 2023-01-24 00:32:27.553105: step: 134/463, loss: 0.11995767056941986 2023-01-24 00:32:28.152380: step: 136/463, loss: 6.160243511199951 2023-01-24 00:32:28.730985: step: 138/463, loss: 0.4830733835697174 2023-01-24 00:32:29.333092: step: 140/463, loss: 0.2555239200592041 2023-01-24 00:32:30.012532: step: 142/463, loss: 1.0444223880767822 2023-01-24 00:32:30.618358: step: 144/463, loss: 0.7386916279792786 2023-01-24 00:32:31.278470: step: 146/463, loss: 0.4643843472003937 2023-01-24 00:32:31.917745: step: 148/463, loss: 0.9403019547462463 2023-01-24 00:32:32.520295: step: 150/463, loss: 0.15822499990463257 2023-01-24 00:32:33.127586: step: 152/463, loss: 0.3805690407752991 2023-01-24 00:32:33.722954: step: 154/463, loss: 0.5390866994857788 2023-01-24 00:32:34.302836: step: 156/463, loss: 0.330912709236145 2023-01-24 00:32:34.851478: step: 158/463, loss: 1.8018302917480469 2023-01-24 00:32:35.586847: step: 160/463, loss: 3.447479724884033 2023-01-24 00:32:36.228556: step: 162/463, loss: 0.510549008846283 2023-01-24 00:32:36.836877: step: 164/463, loss: 4.088376998901367 2023-01-24 00:32:37.485219: step: 166/463, loss: 1.1485602855682373 2023-01-24 00:32:38.039910: step: 168/463, loss: 4.875633239746094 2023-01-24 00:32:38.693488: step: 170/463, loss: 0.5968398451805115 2023-01-24 00:32:39.359074: step: 172/463, loss: 0.7899906635284424 2023-01-24 00:32:40.076418: step: 174/463, loss: 0.9362796545028687 2023-01-24 00:32:40.691649: step: 176/463, loss: 0.5119436383247375 2023-01-24 00:32:41.381113: step: 178/463, loss: 0.6226829290390015 2023-01-24 00:32:42.073659: step: 180/463, loss: 0.9002023935317993 2023-01-24 00:32:42.715144: step: 182/463, loss: 0.5335453748703003 2023-01-24 00:32:43.338055: step: 184/463, loss: 0.4864686131477356 2023-01-24 00:32:44.052511: step: 186/463, loss: 0.8112006783485413 2023-01-24 00:32:44.634918: step: 188/463, loss: 0.23099157214164734 2023-01-24 00:32:45.235240: step: 190/463, loss: 0.3035227656364441 2023-01-24 00:32:45.850508: step: 192/463, loss: 0.6683353185653687 2023-01-24 00:32:46.454512: step: 194/463, loss: 0.9597965478897095 2023-01-24 00:32:47.127251: step: 196/463, loss: 0.8424886465072632 2023-01-24 00:32:47.713477: step: 198/463, loss: 0.8805145025253296 2023-01-24 00:32:48.327913: step: 200/463, loss: 0.8179623484611511 2023-01-24 00:32:48.994747: step: 202/463, loss: 2.8997373580932617 2023-01-24 00:32:49.645406: step: 204/463, loss: 1.7417500019073486 2023-01-24 00:32:50.332386: step: 206/463, loss: 1.435079574584961 2023-01-24 00:32:51.009473: step: 208/463, loss: 0.9599257707595825 2023-01-24 00:32:51.629529: step: 210/463, loss: 2.5940370559692383 2023-01-24 00:32:52.261075: step: 212/463, loss: 2.9812111854553223 2023-01-24 00:32:52.873203: step: 214/463, loss: 1.592813491821289 2023-01-24 00:32:53.407638: step: 216/463, loss: 0.3834039866924286 2023-01-24 00:32:54.037071: step: 218/463, loss: 1.1784158945083618 2023-01-24 00:32:54.677066: step: 220/463, loss: 1.1251037120819092 2023-01-24 00:32:55.319424: step: 222/463, loss: 2.6616623401641846 2023-01-24 00:32:55.994674: step: 224/463, loss: 0.8062566518783569 2023-01-24 00:32:56.622030: step: 226/463, loss: 0.9484009146690369 2023-01-24 00:32:57.310723: step: 228/463, loss: 0.6097075343132019 2023-01-24 00:32:57.962758: step: 230/463, loss: 0.43448060750961304 2023-01-24 00:32:58.615554: step: 232/463, loss: 1.5545936822891235 2023-01-24 00:32:59.220028: step: 234/463, loss: 0.2882930040359497 2023-01-24 00:32:59.752099: step: 236/463, loss: 0.9684540629386902 2023-01-24 00:33:00.384435: step: 238/463, loss: 0.8174282312393188 2023-01-24 00:33:00.990226: step: 240/463, loss: 2.4796576499938965 2023-01-24 00:33:01.633523: step: 242/463, loss: 0.3335955739021301 2023-01-24 00:33:02.239451: step: 244/463, loss: 0.7860901355743408 2023-01-24 00:33:02.848503: step: 246/463, loss: 0.8534994125366211 2023-01-24 00:33:03.548131: step: 248/463, loss: 1.5766079425811768 2023-01-24 00:33:04.126869: step: 250/463, loss: 0.6798403859138489 2023-01-24 00:33:04.783211: step: 252/463, loss: 1.0301579236984253 2023-01-24 00:33:05.412257: step: 254/463, loss: 4.389966011047363 2023-01-24 00:33:06.049870: step: 256/463, loss: 0.9535230398178101 2023-01-24 00:33:06.626968: step: 258/463, loss: 0.8579078912734985 2023-01-24 00:33:07.228175: step: 260/463, loss: 0.23244629800319672 2023-01-24 00:33:07.808094: step: 262/463, loss: 1.9337005615234375 2023-01-24 00:33:08.415192: step: 264/463, loss: 0.5071777701377869 2023-01-24 00:33:09.064170: step: 266/463, loss: 0.6461679935455322 2023-01-24 00:33:09.674547: step: 268/463, loss: 1.151232361793518 2023-01-24 00:33:10.366462: step: 270/463, loss: 1.8946025371551514 2023-01-24 00:33:11.000320: step: 272/463, loss: 3.2115299701690674 2023-01-24 00:33:11.616768: step: 274/463, loss: 0.5689046382904053 2023-01-24 00:33:12.215391: step: 276/463, loss: 0.31607815623283386 2023-01-24 00:33:12.837573: step: 278/463, loss: 0.5945724844932556 2023-01-24 00:33:13.470851: step: 280/463, loss: 3.64784574508667 2023-01-24 00:33:14.079503: step: 282/463, loss: 3.565765619277954 2023-01-24 00:33:14.707735: step: 284/463, loss: 0.500117301940918 2023-01-24 00:33:15.384235: step: 286/463, loss: 0.37881290912628174 2023-01-24 00:33:16.015555: step: 288/463, loss: 2.3957204818725586 2023-01-24 00:33:16.584588: step: 290/463, loss: 0.879256010055542 2023-01-24 00:33:17.138033: step: 292/463, loss: 2.1206486225128174 2023-01-24 00:33:17.795141: step: 294/463, loss: 1.466248869895935 2023-01-24 00:33:18.461881: step: 296/463, loss: 3.4259541034698486 2023-01-24 00:33:19.086857: step: 298/463, loss: 0.4141486585140228 2023-01-24 00:33:19.714900: step: 300/463, loss: 1.6834783554077148 2023-01-24 00:33:20.329537: step: 302/463, loss: 0.3619813919067383 2023-01-24 00:33:20.982612: step: 304/463, loss: 1.5470068454742432 2023-01-24 00:33:21.629473: step: 306/463, loss: 0.7013152837753296 2023-01-24 00:33:22.211957: step: 308/463, loss: 0.911643922328949 2023-01-24 00:33:22.831827: step: 310/463, loss: 1.921042561531067 2023-01-24 00:33:23.451093: step: 312/463, loss: 0.7804310321807861 2023-01-24 00:33:24.100970: step: 314/463, loss: 1.6041079759597778 2023-01-24 00:33:24.704883: step: 316/463, loss: 0.22782060503959656 2023-01-24 00:33:25.310920: step: 318/463, loss: 0.6577022671699524 2023-01-24 00:33:25.962481: step: 320/463, loss: 0.6868587136268616 2023-01-24 00:33:26.543996: step: 322/463, loss: 0.9585215449333191 2023-01-24 00:33:27.144235: step: 324/463, loss: 0.7922383546829224 2023-01-24 00:33:27.822597: step: 326/463, loss: 0.6419047713279724 2023-01-24 00:33:28.519618: step: 328/463, loss: 4.990467071533203 2023-01-24 00:33:29.181932: step: 330/463, loss: 1.1048173904418945 2023-01-24 00:33:29.747085: step: 332/463, loss: 2.630852699279785 2023-01-24 00:33:30.369655: step: 334/463, loss: 0.9806223511695862 2023-01-24 00:33:30.973924: step: 336/463, loss: 1.1511784791946411 2023-01-24 00:33:31.625831: step: 338/463, loss: 1.0340145826339722 2023-01-24 00:33:32.229222: step: 340/463, loss: 0.3171960711479187 2023-01-24 00:33:32.799534: step: 342/463, loss: 1.1889283657073975 2023-01-24 00:33:33.438655: step: 344/463, loss: 3.024854898452759 2023-01-24 00:33:34.112144: step: 346/463, loss: 1.23025381565094 2023-01-24 00:33:34.748072: step: 348/463, loss: 0.9623467922210693 2023-01-24 00:33:35.490012: step: 350/463, loss: 1.387969970703125 2023-01-24 00:33:36.118348: step: 352/463, loss: 0.8465249538421631 2023-01-24 00:33:36.765388: step: 354/463, loss: 0.3635883927345276 2023-01-24 00:33:37.427418: step: 356/463, loss: 0.9950834512710571 2023-01-24 00:33:38.057047: step: 358/463, loss: 0.5810906887054443 2023-01-24 00:33:38.678082: step: 360/463, loss: 2.273775815963745 2023-01-24 00:33:39.382424: step: 362/463, loss: 0.8270940780639648 2023-01-24 00:33:39.972205: step: 364/463, loss: 1.6942641735076904 2023-01-24 00:33:40.657437: step: 366/463, loss: 1.3049671649932861 2023-01-24 00:33:41.261999: step: 368/463, loss: 0.5454331040382385 2023-01-24 00:33:41.926497: step: 370/463, loss: 0.610421895980835 2023-01-24 00:33:42.470749: step: 372/463, loss: 1.0948559045791626 2023-01-24 00:33:43.119841: step: 374/463, loss: 6.674142360687256 2023-01-24 00:33:43.781393: step: 376/463, loss: 2.31650447845459 2023-01-24 00:33:44.355556: step: 378/463, loss: 9.842432022094727 2023-01-24 00:33:44.966512: step: 380/463, loss: 0.3015255928039551 2023-01-24 00:33:45.574162: step: 382/463, loss: 0.5648266077041626 2023-01-24 00:33:46.208853: step: 384/463, loss: 0.40498024225234985 2023-01-24 00:33:46.820954: step: 386/463, loss: 0.2953656315803528 2023-01-24 00:33:47.438531: step: 388/463, loss: 1.337213158607483 2023-01-24 00:33:48.032693: step: 390/463, loss: 0.41959941387176514 2023-01-24 00:33:48.654732: step: 392/463, loss: 1.4711488485336304 2023-01-24 00:33:49.270727: step: 394/463, loss: 0.3753756284713745 2023-01-24 00:33:49.912112: step: 396/463, loss: 0.7303197979927063 2023-01-24 00:33:50.488188: step: 398/463, loss: 1.4755606651306152 2023-01-24 00:33:51.127809: step: 400/463, loss: 2.029747247695923 2023-01-24 00:33:51.735608: step: 402/463, loss: 1.6652833223342896 2023-01-24 00:33:52.375938: step: 404/463, loss: 1.1327297687530518 2023-01-24 00:33:52.910170: step: 406/463, loss: 1.1665009260177612 2023-01-24 00:33:53.541513: step: 408/463, loss: 0.2519243061542511 2023-01-24 00:33:54.225009: step: 410/463, loss: 1.0021592378616333 2023-01-24 00:33:54.855379: step: 412/463, loss: 1.3879551887512207 2023-01-24 00:33:55.491302: step: 414/463, loss: 5.923034191131592 2023-01-24 00:33:56.152929: step: 416/463, loss: 0.7921345233917236 2023-01-24 00:33:56.809310: step: 418/463, loss: 0.4115428924560547 2023-01-24 00:33:57.425174: step: 420/463, loss: 0.41447991132736206 2023-01-24 00:33:58.105061: step: 422/463, loss: 1.258296251296997 2023-01-24 00:33:58.662113: step: 424/463, loss: 3.083317756652832 2023-01-24 00:33:59.295813: step: 426/463, loss: 0.9887552857398987 2023-01-24 00:33:59.904407: step: 428/463, loss: 0.42590761184692383 2023-01-24 00:34:00.582008: step: 430/463, loss: 0.621262788772583 2023-01-24 00:34:01.209143: step: 432/463, loss: 0.14389126002788544 2023-01-24 00:34:01.800979: step: 434/463, loss: 0.4204776883125305 2023-01-24 00:34:02.413978: step: 436/463, loss: 0.8170091509819031 2023-01-24 00:34:03.071435: step: 438/463, loss: 0.5282797813415527 2023-01-24 00:34:03.665378: step: 440/463, loss: 0.8529421091079712 2023-01-24 00:34:04.267864: step: 442/463, loss: 0.6759949326515198 2023-01-24 00:34:04.901316: step: 444/463, loss: 1.154392957687378 2023-01-24 00:34:05.595009: step: 446/463, loss: 1.7369529008865356 2023-01-24 00:34:06.245464: step: 448/463, loss: 1.9464983940124512 2023-01-24 00:34:06.942432: step: 450/463, loss: 0.8790971636772156 2023-01-24 00:34:07.544362: step: 452/463, loss: 0.27071911096572876 2023-01-24 00:34:08.178235: step: 454/463, loss: 1.0688369274139404 2023-01-24 00:34:08.821475: step: 456/463, loss: 1.0795540809631348 2023-01-24 00:34:09.400802: step: 458/463, loss: 3.497401237487793 2023-01-24 00:34:09.996415: step: 460/463, loss: 0.42303401231765747 2023-01-24 00:34:10.649380: step: 462/463, loss: 0.3534432053565979 2023-01-24 00:34:11.224603: step: 464/463, loss: 0.5937984585762024 2023-01-24 00:34:11.871684: step: 466/463, loss: 1.2703676223754883 2023-01-24 00:34:12.472554: step: 468/463, loss: 0.6497911810874939 2023-01-24 00:34:13.127217: step: 470/463, loss: 0.9235516786575317 2023-01-24 00:34:13.733279: step: 472/463, loss: 0.3466959595680237 2023-01-24 00:34:14.275903: step: 474/463, loss: 2.1102547645568848 2023-01-24 00:34:14.931916: step: 476/463, loss: 0.48518437147140503 2023-01-24 00:34:15.609842: step: 478/463, loss: 3.5039477348327637 2023-01-24 00:34:16.327466: step: 480/463, loss: 0.33502376079559326 2023-01-24 00:34:16.920682: step: 482/463, loss: 0.7092198729515076 2023-01-24 00:34:17.596627: step: 484/463, loss: 1.361219882965088 2023-01-24 00:34:18.215743: step: 486/463, loss: 1.331363558769226 2023-01-24 00:34:18.778981: step: 488/463, loss: 0.5899779796600342 2023-01-24 00:34:19.450481: step: 490/463, loss: 0.5424063205718994 2023-01-24 00:34:20.085905: step: 492/463, loss: 0.5190826654434204 2023-01-24 00:34:20.619559: step: 494/463, loss: 1.2172343730926514 2023-01-24 00:34:21.238368: step: 496/463, loss: 0.7246772646903992 2023-01-24 00:34:21.813972: step: 498/463, loss: 2.6958041191101074 2023-01-24 00:34:22.450132: step: 500/463, loss: 2.423814296722412 2023-01-24 00:34:23.028852: step: 502/463, loss: 3.8397650718688965 2023-01-24 00:34:23.714137: step: 504/463, loss: 2.174011707305908 2023-01-24 00:34:24.288829: step: 506/463, loss: 0.6858105063438416 2023-01-24 00:34:24.966520: step: 508/463, loss: 0.962867259979248 2023-01-24 00:34:25.565201: step: 510/463, loss: 2.7409560680389404 2023-01-24 00:34:26.246204: step: 512/463, loss: 1.3292902708053589 2023-01-24 00:34:26.928388: step: 514/463, loss: 2.5477705001831055 2023-01-24 00:34:27.530881: step: 516/463, loss: 1.2355120182037354 2023-01-24 00:34:28.170707: step: 518/463, loss: 1.7557098865509033 2023-01-24 00:34:28.843853: step: 520/463, loss: 0.4313976466655731 2023-01-24 00:34:29.588419: step: 522/463, loss: 0.4068562984466553 2023-01-24 00:34:30.217059: step: 524/463, loss: 0.2683221995830536 2023-01-24 00:34:30.799149: step: 526/463, loss: 1.1908127069473267 2023-01-24 00:34:31.483651: step: 528/463, loss: 1.7183837890625 2023-01-24 00:34:32.051792: step: 530/463, loss: 2.447889804840088 2023-01-24 00:34:32.743648: step: 532/463, loss: 0.3424261510372162 2023-01-24 00:34:33.432593: step: 534/463, loss: 0.4946627616882324 2023-01-24 00:34:34.121535: step: 536/463, loss: 0.32918205857276917 2023-01-24 00:34:34.780142: step: 538/463, loss: 2.637058734893799 2023-01-24 00:34:35.399631: step: 540/463, loss: 0.21026721596717834 2023-01-24 00:34:36.037130: step: 542/463, loss: 0.5172099471092224 2023-01-24 00:34:36.631385: step: 544/463, loss: 1.433451533317566 2023-01-24 00:34:37.265511: step: 546/463, loss: 6.453167915344238 2023-01-24 00:34:37.908072: step: 548/463, loss: 0.44483011960983276 2023-01-24 00:34:38.532419: step: 550/463, loss: 2.07494854927063 2023-01-24 00:34:39.163416: step: 552/463, loss: 0.5158511996269226 2023-01-24 00:34:39.818675: step: 554/463, loss: 0.44654688239097595 2023-01-24 00:34:40.408613: step: 556/463, loss: 1.0115303993225098 2023-01-24 00:34:41.039620: step: 558/463, loss: 0.20662128925323486 2023-01-24 00:34:41.650819: step: 560/463, loss: 1.005355715751648 2023-01-24 00:34:42.270339: step: 562/463, loss: 0.8261318206787109 2023-01-24 00:34:42.933097: step: 564/463, loss: 0.9945001006126404 2023-01-24 00:34:43.564642: step: 566/463, loss: 0.6689165830612183 2023-01-24 00:34:44.196371: step: 568/463, loss: 0.4809578061103821 2023-01-24 00:34:44.847221: step: 570/463, loss: 0.4491552412509918 2023-01-24 00:34:45.478585: step: 572/463, loss: 0.49372655153274536 2023-01-24 00:34:46.047417: step: 574/463, loss: 2.4488420486450195 2023-01-24 00:34:46.630535: step: 576/463, loss: 0.1824321150779724 2023-01-24 00:34:47.185409: step: 578/463, loss: 0.3488035500049591 2023-01-24 00:34:47.844979: step: 580/463, loss: 2.2504191398620605 2023-01-24 00:34:48.549159: step: 582/463, loss: 1.5180344581604004 2023-01-24 00:34:49.148648: step: 584/463, loss: 11.3477144241333 2023-01-24 00:34:49.794938: step: 586/463, loss: 1.6393275260925293 2023-01-24 00:34:50.417862: step: 588/463, loss: 0.2820075452327728 2023-01-24 00:34:50.991407: step: 590/463, loss: 0.8712867498397827 2023-01-24 00:34:51.617316: step: 592/463, loss: 0.7512973546981812 2023-01-24 00:34:52.286733: step: 594/463, loss: 0.28509292006492615 2023-01-24 00:34:52.908857: step: 596/463, loss: 1.4684151411056519 2023-01-24 00:34:53.522451: step: 598/463, loss: 0.28816452622413635 2023-01-24 00:34:54.130757: step: 600/463, loss: 1.2358410358428955 2023-01-24 00:34:54.719602: step: 602/463, loss: 0.25881049036979675 2023-01-24 00:34:55.344925: step: 604/463, loss: 1.8399579524993896 2023-01-24 00:34:55.893559: step: 606/463, loss: 0.27672016620635986 2023-01-24 00:34:56.543476: step: 608/463, loss: 0.6745149493217468 2023-01-24 00:34:57.140568: step: 610/463, loss: 2.90478253364563 2023-01-24 00:34:57.736088: step: 612/463, loss: 1.676227331161499 2023-01-24 00:34:58.311007: step: 614/463, loss: 0.6498035192489624 2023-01-24 00:34:58.969924: step: 616/463, loss: 1.0641303062438965 2023-01-24 00:34:59.670226: step: 618/463, loss: 0.48125964403152466 2023-01-24 00:35:00.301808: step: 620/463, loss: 0.7160807847976685 2023-01-24 00:35:00.948471: step: 622/463, loss: 0.7379382848739624 2023-01-24 00:35:01.487985: step: 624/463, loss: 1.1830506324768066 2023-01-24 00:35:02.117104: step: 626/463, loss: 1.2899017333984375 2023-01-24 00:35:02.727458: step: 628/463, loss: 0.4629679322242737 2023-01-24 00:35:03.383357: step: 630/463, loss: 1.2517423629760742 2023-01-24 00:35:04.021876: step: 632/463, loss: 2.39970326423645 2023-01-24 00:35:04.791526: step: 634/463, loss: 1.5096125602722168 2023-01-24 00:35:05.508881: step: 636/463, loss: 4.045668601989746 2023-01-24 00:35:06.190432: step: 638/463, loss: 0.33265841007232666 2023-01-24 00:35:06.833137: step: 640/463, loss: 0.7800288200378418 2023-01-24 00:35:07.426973: step: 642/463, loss: 0.7697612643241882 2023-01-24 00:35:08.016850: step: 644/463, loss: 1.1078002452850342 2023-01-24 00:35:08.635756: step: 646/463, loss: 0.2708837687969208 2023-01-24 00:35:09.254382: step: 648/463, loss: 0.910312294960022 2023-01-24 00:35:09.917320: step: 650/463, loss: 0.8091008067131042 2023-01-24 00:35:10.540316: step: 652/463, loss: 0.9438118934631348 2023-01-24 00:35:11.178522: step: 654/463, loss: 0.5321412086486816 2023-01-24 00:35:11.907222: step: 656/463, loss: 1.130190372467041 2023-01-24 00:35:12.568707: step: 658/463, loss: 0.6020475029945374 2023-01-24 00:35:13.239804: step: 660/463, loss: 0.8208694458007812 2023-01-24 00:35:13.847418: step: 662/463, loss: 0.4887430667877197 2023-01-24 00:35:14.485712: step: 664/463, loss: 0.7658129334449768 2023-01-24 00:35:15.097517: step: 666/463, loss: 1.3788002729415894 2023-01-24 00:35:15.679648: step: 668/463, loss: 1.990587830543518 2023-01-24 00:35:16.308133: step: 670/463, loss: 5.476766586303711 2023-01-24 00:35:16.984669: step: 672/463, loss: 0.6488998532295227 2023-01-24 00:35:17.663330: step: 674/463, loss: 0.502023458480835 2023-01-24 00:35:18.326407: step: 676/463, loss: 0.7002149820327759 2023-01-24 00:35:18.947435: step: 678/463, loss: 1.4208626747131348 2023-01-24 00:35:19.580164: step: 680/463, loss: 4.006251335144043 2023-01-24 00:35:20.172727: step: 682/463, loss: 1.7344279289245605 2023-01-24 00:35:20.780519: step: 684/463, loss: 0.28021082282066345 2023-01-24 00:35:21.384575: step: 686/463, loss: 2.0944736003875732 2023-01-24 00:35:22.045320: step: 688/463, loss: 0.751337468624115 2023-01-24 00:35:22.692747: step: 690/463, loss: 0.46314340829849243 2023-01-24 00:35:23.278032: step: 692/463, loss: 0.3087157905101776 2023-01-24 00:35:23.930592: step: 694/463, loss: 2.819667100906372 2023-01-24 00:35:24.497400: step: 696/463, loss: 2.325143814086914 2023-01-24 00:35:25.159655: step: 698/463, loss: 2.1220245361328125 2023-01-24 00:35:25.803025: step: 700/463, loss: 0.5378586053848267 2023-01-24 00:35:26.429318: step: 702/463, loss: 0.7357038259506226 2023-01-24 00:35:27.142107: step: 704/463, loss: 0.3467526137828827 2023-01-24 00:35:27.773685: step: 706/463, loss: 1.3888535499572754 2023-01-24 00:35:28.372964: step: 708/463, loss: 2.6169872283935547 2023-01-24 00:35:28.979669: step: 710/463, loss: 0.5280007123947144 2023-01-24 00:35:29.612255: step: 712/463, loss: 1.324952483177185 2023-01-24 00:35:30.303534: step: 714/463, loss: 1.1339224576950073 2023-01-24 00:35:30.877060: step: 716/463, loss: 0.6904457807540894 2023-01-24 00:35:31.472171: step: 718/463, loss: 1.1276763677597046 2023-01-24 00:35:32.100376: step: 720/463, loss: 0.9646050333976746 2023-01-24 00:35:32.758941: step: 722/463, loss: 0.30169767141342163 2023-01-24 00:35:33.390576: step: 724/463, loss: 1.4208829402923584 2023-01-24 00:35:34.025682: step: 726/463, loss: 0.5536409616470337 2023-01-24 00:35:34.650360: step: 728/463, loss: 0.9701492190361023 2023-01-24 00:35:35.308737: step: 730/463, loss: 1.8942193984985352 2023-01-24 00:35:35.992050: step: 732/463, loss: 0.8635294437408447 2023-01-24 00:35:36.622311: step: 734/463, loss: 4.132852554321289 2023-01-24 00:35:37.248632: step: 736/463, loss: 2.1820449829101562 2023-01-24 00:35:37.879063: step: 738/463, loss: 0.37622594833374023 2023-01-24 00:35:38.469235: step: 740/463, loss: 1.6622025966644287 2023-01-24 00:35:39.220192: step: 742/463, loss: 1.0265120267868042 2023-01-24 00:35:39.845867: step: 744/463, loss: 0.6414118409156799 2023-01-24 00:35:40.472403: step: 746/463, loss: 0.38249847292900085 2023-01-24 00:35:41.024940: step: 748/463, loss: 1.1120786666870117 2023-01-24 00:35:41.752549: step: 750/463, loss: 0.3488244414329529 2023-01-24 00:35:42.354815: step: 752/463, loss: 0.4003273844718933 2023-01-24 00:35:42.917126: step: 754/463, loss: 0.7610275149345398 2023-01-24 00:35:43.550116: step: 756/463, loss: 1.2281427383422852 2023-01-24 00:35:44.155788: step: 758/463, loss: 6.144506931304932 2023-01-24 00:35:44.739980: step: 760/463, loss: 2.4287023544311523 2023-01-24 00:35:45.384101: step: 762/463, loss: 0.7552431225776672 2023-01-24 00:35:45.998632: step: 764/463, loss: 1.0743494033813477 2023-01-24 00:35:46.566939: step: 766/463, loss: 1.2670271396636963 2023-01-24 00:35:47.174511: step: 768/463, loss: 0.43791210651397705 2023-01-24 00:35:47.751556: step: 770/463, loss: 1.9041593074798584 2023-01-24 00:35:48.399085: step: 772/463, loss: 1.292412519454956 2023-01-24 00:35:49.018677: step: 774/463, loss: 0.4081590175628662 2023-01-24 00:35:49.596810: step: 776/463, loss: 0.7381172180175781 2023-01-24 00:35:50.257731: step: 778/463, loss: 0.9095269441604614 2023-01-24 00:35:50.841166: step: 780/463, loss: 1.2024753093719482 2023-01-24 00:35:51.451602: step: 782/463, loss: 7.8511881828308105 2023-01-24 00:35:51.991414: step: 784/463, loss: 0.5342065691947937 2023-01-24 00:35:52.613978: step: 786/463, loss: 0.5282962918281555 2023-01-24 00:35:53.201340: step: 788/463, loss: 2.4384546279907227 2023-01-24 00:35:53.784949: step: 790/463, loss: 2.0045552253723145 2023-01-24 00:35:54.432941: step: 792/463, loss: 2.0778610706329346 2023-01-24 00:35:55.193221: step: 794/463, loss: 0.873043954372406 2023-01-24 00:35:55.823750: step: 796/463, loss: 0.9244177341461182 2023-01-24 00:35:56.421707: step: 798/463, loss: 1.3505010604858398 2023-01-24 00:35:57.097265: step: 800/463, loss: 4.569206714630127 2023-01-24 00:35:57.702210: step: 802/463, loss: 0.41953688859939575 2023-01-24 00:35:58.349267: step: 804/463, loss: 1.3496654033660889 2023-01-24 00:35:59.028173: step: 806/463, loss: 0.450666606426239 2023-01-24 00:35:59.653373: step: 808/463, loss: 0.4474203586578369 2023-01-24 00:36:00.239659: step: 810/463, loss: 0.3004681468009949 2023-01-24 00:36:00.834750: step: 812/463, loss: 1.2261439561843872 2023-01-24 00:36:01.440255: step: 814/463, loss: 0.8634434342384338 2023-01-24 00:36:02.105283: step: 816/463, loss: 0.31812822818756104 2023-01-24 00:36:02.654190: step: 818/463, loss: 0.3466211259365082 2023-01-24 00:36:03.231513: step: 820/463, loss: 0.38399791717529297 2023-01-24 00:36:03.831527: step: 822/463, loss: 4.588316440582275 2023-01-24 00:36:04.379945: step: 824/463, loss: 0.49231356382369995 2023-01-24 00:36:05.021806: step: 826/463, loss: 2.1821179389953613 2023-01-24 00:36:05.639084: step: 828/463, loss: 0.9604631662368774 2023-01-24 00:36:06.252278: step: 830/463, loss: 1.1713541746139526 2023-01-24 00:36:06.835795: step: 832/463, loss: 0.7424074411392212 2023-01-24 00:36:07.371066: step: 834/463, loss: 2.473926305770874 2023-01-24 00:36:07.940193: step: 836/463, loss: 1.1334962844848633 2023-01-24 00:36:08.677637: step: 838/463, loss: 1.5403788089752197 2023-01-24 00:36:09.298527: step: 840/463, loss: 5.8639750480651855 2023-01-24 00:36:09.916172: step: 842/463, loss: 0.6067245006561279 2023-01-24 00:36:10.517320: step: 844/463, loss: 0.5638745427131653 2023-01-24 00:36:11.133052: step: 846/463, loss: 1.1359305381774902 2023-01-24 00:36:11.663593: step: 848/463, loss: 0.5983893275260925 2023-01-24 00:36:12.288295: step: 850/463, loss: 1.5063880681991577 2023-01-24 00:36:12.851956: step: 852/463, loss: 0.4436891973018646 2023-01-24 00:36:13.497897: step: 854/463, loss: 1.8063496351242065 2023-01-24 00:36:14.130504: step: 856/463, loss: 0.7595394849777222 2023-01-24 00:36:14.685280: step: 858/463, loss: 0.6355447769165039 2023-01-24 00:36:15.317816: step: 860/463, loss: 0.474848210811615 2023-01-24 00:36:15.928254: step: 862/463, loss: 5.994952201843262 2023-01-24 00:36:16.559046: step: 864/463, loss: 1.419187068939209 2023-01-24 00:36:17.198100: step: 866/463, loss: 1.4884190559387207 2023-01-24 00:36:17.795658: step: 868/463, loss: 0.7256003618240356 2023-01-24 00:36:18.456723: step: 870/463, loss: 0.47975561022758484 2023-01-24 00:36:19.163003: step: 872/463, loss: 1.6436200141906738 2023-01-24 00:36:19.790533: step: 874/463, loss: 0.5869379043579102 2023-01-24 00:36:20.380216: step: 876/463, loss: 1.4649345874786377 2023-01-24 00:36:21.018715: step: 878/463, loss: 0.986615002155304 2023-01-24 00:36:21.640422: step: 880/463, loss: 0.7092854380607605 2023-01-24 00:36:22.271142: step: 882/463, loss: 1.0044018030166626 2023-01-24 00:36:22.891548: step: 884/463, loss: 3.209611654281616 2023-01-24 00:36:23.491645: step: 886/463, loss: 0.4413859248161316 2023-01-24 00:36:24.067033: step: 888/463, loss: 0.5557605028152466 2023-01-24 00:36:24.687253: step: 890/463, loss: 1.525524616241455 2023-01-24 00:36:25.332269: step: 892/463, loss: 1.7087215185165405 2023-01-24 00:36:25.975042: step: 894/463, loss: 0.8093967437744141 2023-01-24 00:36:26.614776: step: 896/463, loss: 0.46335291862487793 2023-01-24 00:36:27.220412: step: 898/463, loss: 0.6929244995117188 2023-01-24 00:36:27.825834: step: 900/463, loss: 3.2834620475769043 2023-01-24 00:36:28.384169: step: 902/463, loss: 0.8197821378707886 2023-01-24 00:36:29.000165: step: 904/463, loss: 0.4407203197479248 2023-01-24 00:36:29.596282: step: 906/463, loss: 0.9018424153327942 2023-01-24 00:36:30.237241: step: 908/463, loss: 1.4900413751602173 2023-01-24 00:36:30.926484: step: 910/463, loss: 0.6545317769050598 2023-01-24 00:36:31.537276: step: 912/463, loss: 4.649897575378418 2023-01-24 00:36:32.152242: step: 914/463, loss: 0.7948824763298035 2023-01-24 00:36:32.835223: step: 916/463, loss: 0.35423460602760315 2023-01-24 00:36:33.524995: step: 918/463, loss: 1.4610161781311035 2023-01-24 00:36:34.165934: step: 920/463, loss: 1.1138560771942139 2023-01-24 00:36:34.802975: step: 922/463, loss: 1.3463464975357056 2023-01-24 00:36:35.411484: step: 924/463, loss: 0.7795146703720093 2023-01-24 00:36:36.057321: step: 926/463, loss: 0.6993968486785889 ================================================== Loss: 1.348 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3175134350298824, 'r': 0.28196639012141356, 'f1': 0.2986860052140401}, 'combined': 0.22008442489455585, 'epoch': 3} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3511287434492222, 'r': 0.2585334351244926, 'f1': 0.2977994154967839}, 'combined': 0.2095071264801495, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31673369470187096, 'r': 0.284279008717239, 'f1': 0.2996300751879699}, 'combined': 0.22078005540166204, 'epoch': 3} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.349019943962768, 'r': 0.256373222545323, 'f1': 0.2956074588104126}, 'combined': 0.20988129575539294, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33208239277652374, 'r': 0.2791508538899431, 'f1': 0.3033247422680413}, 'combined': 0.2235024416711883, 'epoch': 3} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3535359508496168, 'r': 0.24328693481811609, 'f1': 0.28822848418080216}, 'combined': 0.20464222376836952, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21726190476190477, 'r': 0.19863945578231293, 'f1': 0.20753375977256575}, 'combined': 0.13835583984837715, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33653846153846156, 'r': 0.3804347826086957, 'f1': 0.35714285714285715}, 'combined': 0.17857142857142858, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3066801619433198, 'r': 0.13747731397459165, 'f1': 0.18984962406015038}, 'combined': 0.12656641604010024, 'epoch': 3} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3359078590785908, 'r': 0.2347537878787879, 'f1': 0.27636566332218504}, 'combined': 0.2036378571847679, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.37459689049024225, 'r': 0.22280371573506583, 'f1': 0.2794157830919582}, 'combined': 0.19657391775313643, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3657407407407407, 'r': 0.1880952380952381, 'f1': 0.24842767295597484}, 'combined': 0.16561844863731656, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31673369470187096, 'r': 0.284279008717239, 'f1': 0.2996300751879699}, 'combined': 0.22078005540166204, 'epoch': 3} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.349019943962768, 'r': 0.256373222545323, 'f1': 0.2956074588104126}, 'combined': 0.20988129575539294, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33653846153846156, 'r': 0.3804347826086957, 'f1': 0.35714285714285715}, 'combined': 0.17857142857142858, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33208239277652374, 'r': 0.2791508538899431, 'f1': 0.3033247422680413}, 'combined': 0.2235024416711883, 'epoch': 3} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3535359508496168, 'r': 0.24328693481811609, 'f1': 0.28822848418080216}, 'combined': 0.20464222376836952, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3066801619433198, 'r': 0.13747731397459165, 'f1': 0.18984962406015038}, 'combined': 0.12656641604010024, 'epoch': 3} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:39:31.646965: step: 2/463, loss: 1.7340394258499146 2023-01-24 00:39:32.353411: step: 4/463, loss: 0.3484487235546112 2023-01-24 00:39:32.923431: step: 6/463, loss: 1.1999965906143188 2023-01-24 00:39:33.548141: step: 8/463, loss: 0.7209756374359131 2023-01-24 00:39:34.223669: step: 10/463, loss: 0.8530198335647583 2023-01-24 00:39:34.904572: step: 12/463, loss: 0.25077882409095764 2023-01-24 00:39:35.521616: step: 14/463, loss: 1.3281463384628296 2023-01-24 00:39:36.095663: step: 16/463, loss: 0.47897109389305115 2023-01-24 00:39:36.732178: step: 18/463, loss: 0.5446123480796814 2023-01-24 00:39:37.277159: step: 20/463, loss: 0.9556565284729004 2023-01-24 00:39:37.896925: step: 22/463, loss: 0.5899953842163086 2023-01-24 00:39:38.568096: step: 24/463, loss: 0.6913174986839294 2023-01-24 00:39:39.169038: step: 26/463, loss: 0.6677520871162415 2023-01-24 00:39:39.758946: step: 28/463, loss: 1.688493013381958 2023-01-24 00:39:40.378641: step: 30/463, loss: 0.36871588230133057 2023-01-24 00:39:41.118535: step: 32/463, loss: 1.0641725063323975 2023-01-24 00:39:41.693552: step: 34/463, loss: 0.3463834524154663 2023-01-24 00:39:42.282196: step: 36/463, loss: 0.18104930222034454 2023-01-24 00:39:42.946603: step: 38/463, loss: 0.5030932426452637 2023-01-24 00:39:43.552623: step: 40/463, loss: 0.3648448586463928 2023-01-24 00:39:44.205288: step: 42/463, loss: 0.9708308577537537 2023-01-24 00:39:44.858724: step: 44/463, loss: 0.6289823055267334 2023-01-24 00:39:45.432764: step: 46/463, loss: 1.724927306175232 2023-01-24 00:39:46.059021: step: 48/463, loss: 2.506322145462036 2023-01-24 00:39:46.659222: step: 50/463, loss: 0.6962991952896118 2023-01-24 00:39:47.380199: step: 52/463, loss: 1.0259342193603516 2023-01-24 00:39:47.993585: step: 54/463, loss: 0.16692006587982178 2023-01-24 00:39:48.631984: step: 56/463, loss: 0.6931552290916443 2023-01-24 00:39:49.260951: step: 58/463, loss: 0.5792266130447388 2023-01-24 00:39:49.868963: step: 60/463, loss: 0.5275489091873169 2023-01-24 00:39:50.458178: step: 62/463, loss: 3.058749198913574 2023-01-24 00:39:51.083284: step: 64/463, loss: 0.41755664348602295 2023-01-24 00:39:51.764150: step: 66/463, loss: 0.7674996256828308 2023-01-24 00:39:52.360714: step: 68/463, loss: 0.9862866997718811 2023-01-24 00:39:52.975434: step: 70/463, loss: 1.50117826461792 2023-01-24 00:39:53.561936: step: 72/463, loss: 0.36125442385673523 2023-01-24 00:39:54.233049: step: 74/463, loss: 0.89136803150177 2023-01-24 00:39:54.922721: step: 76/463, loss: 3.733886241912842 2023-01-24 00:39:55.580512: step: 78/463, loss: 0.2654401659965515 2023-01-24 00:39:56.208930: step: 80/463, loss: 0.35165825486183167 2023-01-24 00:39:56.834583: step: 82/463, loss: 0.6585902571678162 2023-01-24 00:39:57.405788: step: 84/463, loss: 0.26866763830184937 2023-01-24 00:39:57.988890: step: 86/463, loss: 1.008736491203308 2023-01-24 00:39:58.633299: step: 88/463, loss: 0.5184181928634644 2023-01-24 00:39:59.231992: step: 90/463, loss: 0.40585097670555115 2023-01-24 00:39:59.817999: step: 92/463, loss: 7.72049617767334 2023-01-24 00:40:00.509538: step: 94/463, loss: 0.723196268081665 2023-01-24 00:40:01.114762: step: 96/463, loss: 1.0398081541061401 2023-01-24 00:40:01.687984: step: 98/463, loss: 0.8135270476341248 2023-01-24 00:40:02.322130: step: 100/463, loss: 1.2396117448806763 2023-01-24 00:40:02.875701: step: 102/463, loss: 0.5848272442817688 2023-01-24 00:40:03.479993: step: 104/463, loss: 0.6993635296821594 2023-01-24 00:40:04.141328: step: 106/463, loss: 0.3921169638633728 2023-01-24 00:40:04.728970: step: 108/463, loss: 1.3639081716537476 2023-01-24 00:40:05.335004: step: 110/463, loss: 3.877030372619629 2023-01-24 00:40:05.941708: step: 112/463, loss: 0.8264673352241516 2023-01-24 00:40:06.552394: step: 114/463, loss: 0.981216311454773 2023-01-24 00:40:07.191264: step: 116/463, loss: 0.6100276708602905 2023-01-24 00:40:07.883456: step: 118/463, loss: 0.5381000638008118 2023-01-24 00:40:08.546116: step: 120/463, loss: 0.457328736782074 2023-01-24 00:40:09.289703: step: 122/463, loss: 0.6687302589416504 2023-01-24 00:40:09.913343: step: 124/463, loss: 1.290010929107666 2023-01-24 00:40:10.508837: step: 126/463, loss: 0.9974846839904785 2023-01-24 00:40:11.239305: step: 128/463, loss: 0.3432944715023041 2023-01-24 00:40:11.912644: step: 130/463, loss: 4.994852066040039 2023-01-24 00:40:12.548030: step: 132/463, loss: 1.4381446838378906 2023-01-24 00:40:13.190999: step: 134/463, loss: 0.8872867822647095 2023-01-24 00:40:13.892593: step: 136/463, loss: 0.7015304565429688 2023-01-24 00:40:14.595760: step: 138/463, loss: 0.3191309869289398 2023-01-24 00:40:15.202785: step: 140/463, loss: 0.6768501400947571 2023-01-24 00:40:15.841495: step: 142/463, loss: 0.6274449825286865 2023-01-24 00:40:16.451619: step: 144/463, loss: 0.5197428464889526 2023-01-24 00:40:17.116265: step: 146/463, loss: 1.0605945587158203 2023-01-24 00:40:17.714460: step: 148/463, loss: 0.8910852670669556 2023-01-24 00:40:18.378231: step: 150/463, loss: 0.4079822897911072 2023-01-24 00:40:18.956231: step: 152/463, loss: 1.0618999004364014 2023-01-24 00:40:19.565804: step: 154/463, loss: 2.1478447914123535 2023-01-24 00:40:20.246937: step: 156/463, loss: 0.32651808857917786 2023-01-24 00:40:20.871235: step: 158/463, loss: 2.371861457824707 2023-01-24 00:40:21.515829: step: 160/463, loss: 0.9332466721534729 2023-01-24 00:40:22.125600: step: 162/463, loss: 0.8531134724617004 2023-01-24 00:40:22.789222: step: 164/463, loss: 2.2777204513549805 2023-01-24 00:40:23.396517: step: 166/463, loss: 1.8359529972076416 2023-01-24 00:40:23.978415: step: 168/463, loss: 1.1433210372924805 2023-01-24 00:40:24.597831: step: 170/463, loss: 0.3733145296573639 2023-01-24 00:40:25.229961: step: 172/463, loss: 1.7406060695648193 2023-01-24 00:40:25.880829: step: 174/463, loss: 0.23000362515449524 2023-01-24 00:40:26.523320: step: 176/463, loss: 0.5388394594192505 2023-01-24 00:40:27.147149: step: 178/463, loss: 1.1436280012130737 2023-01-24 00:40:27.779672: step: 180/463, loss: 0.6854603886604309 2023-01-24 00:40:28.390363: step: 182/463, loss: 2.2480931282043457 2023-01-24 00:40:29.052579: step: 184/463, loss: 0.6098314523696899 2023-01-24 00:40:29.636637: step: 186/463, loss: 0.4662625789642334 2023-01-24 00:40:30.272633: step: 188/463, loss: 3.0178093910217285 2023-01-24 00:40:30.872833: step: 190/463, loss: 0.956200361251831 2023-01-24 00:40:31.442325: step: 192/463, loss: 1.3292350769042969 2023-01-24 00:40:32.046459: step: 194/463, loss: 1.040854573249817 2023-01-24 00:40:32.718557: step: 196/463, loss: 0.6944494843482971 2023-01-24 00:40:33.361875: step: 198/463, loss: 3.2885217666625977 2023-01-24 00:40:33.983885: step: 200/463, loss: 1.1005222797393799 2023-01-24 00:40:34.631809: step: 202/463, loss: 0.8343765139579773 2023-01-24 00:40:35.311020: step: 204/463, loss: 4.929426193237305 2023-01-24 00:40:35.919123: step: 206/463, loss: 1.335435390472412 2023-01-24 00:40:36.520672: step: 208/463, loss: 0.316899836063385 2023-01-24 00:40:37.133808: step: 210/463, loss: 0.7850183844566345 2023-01-24 00:40:37.756366: step: 212/463, loss: 0.33058756589889526 2023-01-24 00:40:38.348492: step: 214/463, loss: 1.2808432579040527 2023-01-24 00:40:38.915357: step: 216/463, loss: 0.5489875674247742 2023-01-24 00:40:39.632446: step: 218/463, loss: 2.7180280685424805 2023-01-24 00:40:40.266843: step: 220/463, loss: 1.2448838949203491 2023-01-24 00:40:40.925266: step: 222/463, loss: 0.5225666165351868 2023-01-24 00:40:41.565451: step: 224/463, loss: 1.488265037536621 2023-01-24 00:40:42.274201: step: 226/463, loss: 2.8476967811584473 2023-01-24 00:40:42.853578: step: 228/463, loss: 0.6160991191864014 2023-01-24 00:40:43.450006: step: 230/463, loss: 1.1265666484832764 2023-01-24 00:40:44.049475: step: 232/463, loss: 2.0740318298339844 2023-01-24 00:40:44.607838: step: 234/463, loss: 0.370876282453537 2023-01-24 00:40:45.187885: step: 236/463, loss: 2.541010856628418 2023-01-24 00:40:45.765870: step: 238/463, loss: 0.7710763216018677 2023-01-24 00:40:46.468604: step: 240/463, loss: 0.8468317985534668 2023-01-24 00:40:47.192014: step: 242/463, loss: 0.5124418139457703 2023-01-24 00:40:47.855858: step: 244/463, loss: 0.5773882269859314 2023-01-24 00:40:48.413466: step: 246/463, loss: 1.1756136417388916 2023-01-24 00:40:49.109882: step: 248/463, loss: 3.647606372833252 2023-01-24 00:40:49.747363: step: 250/463, loss: 1.8153356313705444 2023-01-24 00:40:50.371424: step: 252/463, loss: 1.154940128326416 2023-01-24 00:40:51.021960: step: 254/463, loss: 0.4554460048675537 2023-01-24 00:40:51.701316: step: 256/463, loss: 0.5910336971282959 2023-01-24 00:40:52.356788: step: 258/463, loss: 0.14907124638557434 2023-01-24 00:40:52.935307: step: 260/463, loss: 1.6999313831329346 2023-01-24 00:40:53.532288: step: 262/463, loss: 0.630199670791626 2023-01-24 00:40:54.175597: step: 264/463, loss: 0.25744709372520447 2023-01-24 00:40:54.840374: step: 266/463, loss: 1.4644908905029297 2023-01-24 00:40:55.472059: step: 268/463, loss: 0.40976426005363464 2023-01-24 00:40:56.133745: step: 270/463, loss: 0.40035900473594666 2023-01-24 00:40:56.704237: step: 272/463, loss: 0.8513896465301514 2023-01-24 00:40:57.309620: step: 274/463, loss: 0.8250728845596313 2023-01-24 00:40:57.936742: step: 276/463, loss: 1.0996809005737305 2023-01-24 00:40:58.589140: step: 278/463, loss: 0.43936383724212646 2023-01-24 00:40:59.171799: step: 280/463, loss: 0.1905534416437149 2023-01-24 00:40:59.776617: step: 282/463, loss: 1.4509196281433105 2023-01-24 00:41:00.385819: step: 284/463, loss: 0.4119699001312256 2023-01-24 00:41:00.973630: step: 286/463, loss: 0.6485965847969055 2023-01-24 00:41:01.646598: step: 288/463, loss: 0.5153582096099854 2023-01-24 00:41:02.323484: step: 290/463, loss: 7.6968913078308105 2023-01-24 00:41:02.921955: step: 292/463, loss: 0.4355889856815338 2023-01-24 00:41:03.506319: step: 294/463, loss: 0.4394805133342743 2023-01-24 00:41:04.162889: step: 296/463, loss: 0.4392803907394409 2023-01-24 00:41:04.781526: step: 298/463, loss: 1.4043011665344238 2023-01-24 00:41:05.347366: step: 300/463, loss: 0.2856404483318329 2023-01-24 00:41:05.980101: step: 302/463, loss: 0.777006208896637 2023-01-24 00:41:06.591531: step: 304/463, loss: 0.7258918881416321 2023-01-24 00:41:07.251370: step: 306/463, loss: 0.6098880171775818 2023-01-24 00:41:07.838074: step: 308/463, loss: 1.2667661905288696 2023-01-24 00:41:08.470512: step: 310/463, loss: 2.737389326095581 2023-01-24 00:41:09.059105: step: 312/463, loss: 0.8679634928703308 2023-01-24 00:41:09.716457: step: 314/463, loss: 0.23747125267982483 2023-01-24 00:41:10.346898: step: 316/463, loss: 0.21397043764591217 2023-01-24 00:41:10.940510: step: 318/463, loss: 0.5615314245223999 2023-01-24 00:41:11.551131: step: 320/463, loss: 0.5066286325454712 2023-01-24 00:41:12.108202: step: 322/463, loss: 4.825674057006836 2023-01-24 00:41:12.743455: step: 324/463, loss: 0.20308728516101837 2023-01-24 00:41:13.352070: step: 326/463, loss: 0.5660277009010315 2023-01-24 00:41:14.028587: step: 328/463, loss: 0.8103926181793213 2023-01-24 00:41:14.653395: step: 330/463, loss: 0.8824500441551208 2023-01-24 00:41:15.255179: step: 332/463, loss: 0.176014244556427 2023-01-24 00:41:15.872818: step: 334/463, loss: 0.8181882500648499 2023-01-24 00:41:16.536527: step: 336/463, loss: 2.5450048446655273 2023-01-24 00:41:17.117978: step: 338/463, loss: 0.440561980009079 2023-01-24 00:41:17.800677: step: 340/463, loss: 0.6606693863868713 2023-01-24 00:41:18.351048: step: 342/463, loss: 1.3179476261138916 2023-01-24 00:41:18.967290: step: 344/463, loss: 0.8395357728004456 2023-01-24 00:41:19.617147: step: 346/463, loss: 0.6118634343147278 2023-01-24 00:41:20.290019: step: 348/463, loss: 1.053419828414917 2023-01-24 00:41:20.890207: step: 350/463, loss: 0.336164653301239 2023-01-24 00:41:21.508461: step: 352/463, loss: 0.6671659350395203 2023-01-24 00:41:22.192873: step: 354/463, loss: 0.3024587035179138 2023-01-24 00:41:22.802332: step: 356/463, loss: 0.728671669960022 2023-01-24 00:41:23.383112: step: 358/463, loss: 0.8876861929893494 2023-01-24 00:41:23.993507: step: 360/463, loss: 0.5911165475845337 2023-01-24 00:41:24.600506: step: 362/463, loss: 0.615291178226471 2023-01-24 00:41:25.198282: step: 364/463, loss: 1.1990472078323364 2023-01-24 00:41:25.816781: step: 366/463, loss: 0.8627260327339172 2023-01-24 00:41:26.412105: step: 368/463, loss: 0.4350024461746216 2023-01-24 00:41:27.190942: step: 370/463, loss: 0.7196427583694458 2023-01-24 00:41:27.838404: step: 372/463, loss: 0.8385066390037537 2023-01-24 00:41:28.492412: step: 374/463, loss: 2.4092013835906982 2023-01-24 00:41:29.130792: step: 376/463, loss: 0.635285496711731 2023-01-24 00:41:29.732250: step: 378/463, loss: 0.4487810730934143 2023-01-24 00:41:30.348770: step: 380/463, loss: 0.3436954617500305 2023-01-24 00:41:30.941824: step: 382/463, loss: 0.3639468550682068 2023-01-24 00:41:31.593571: step: 384/463, loss: 1.0329570770263672 2023-01-24 00:41:32.234811: step: 386/463, loss: 0.7797186970710754 2023-01-24 00:41:32.882686: step: 388/463, loss: 0.5842095017433167 2023-01-24 00:41:33.528811: step: 390/463, loss: 0.30774515867233276 2023-01-24 00:41:34.147444: step: 392/463, loss: 1.6820347309112549 2023-01-24 00:41:34.782970: step: 394/463, loss: 0.6768879890441895 2023-01-24 00:41:35.409865: step: 396/463, loss: 0.3983825147151947 2023-01-24 00:41:36.120615: step: 398/463, loss: 0.9445382356643677 2023-01-24 00:41:36.774866: step: 400/463, loss: 0.6782453060150146 2023-01-24 00:41:37.441761: step: 402/463, loss: 0.7141657471656799 2023-01-24 00:41:38.128124: step: 404/463, loss: 0.772838830947876 2023-01-24 00:41:38.762579: step: 406/463, loss: 0.6980345249176025 2023-01-24 00:41:39.361667: step: 408/463, loss: 0.8490490913391113 2023-01-24 00:41:39.992132: step: 410/463, loss: 0.8972468972206116 2023-01-24 00:41:40.584831: step: 412/463, loss: 4.472133636474609 2023-01-24 00:41:41.331756: step: 414/463, loss: 0.2878112494945526 2023-01-24 00:41:41.922670: step: 416/463, loss: 0.6879253387451172 2023-01-24 00:41:42.546361: step: 418/463, loss: 0.927514374256134 2023-01-24 00:41:43.125417: step: 420/463, loss: 0.5415744781494141 2023-01-24 00:41:43.645464: step: 422/463, loss: 0.5059882402420044 2023-01-24 00:41:44.272326: step: 424/463, loss: 0.3384130597114563 2023-01-24 00:41:44.950232: step: 426/463, loss: 4.936415672302246 2023-01-24 00:41:45.511200: step: 428/463, loss: 0.18496522307395935 2023-01-24 00:41:46.132704: step: 430/463, loss: 6.866271495819092 2023-01-24 00:41:46.734925: step: 432/463, loss: 0.2181302011013031 2023-01-24 00:41:47.364434: step: 434/463, loss: 0.38944119215011597 2023-01-24 00:41:48.006397: step: 436/463, loss: 1.0429494380950928 2023-01-24 00:41:48.660356: step: 438/463, loss: 0.8404353857040405 2023-01-24 00:41:49.290412: step: 440/463, loss: 1.1820025444030762 2023-01-24 00:41:49.933088: step: 442/463, loss: 1.4014828205108643 2023-01-24 00:41:50.567354: step: 444/463, loss: 2.487783193588257 2023-01-24 00:41:51.249541: step: 446/463, loss: 0.41984283924102783 2023-01-24 00:41:51.877368: step: 448/463, loss: 0.8152846693992615 2023-01-24 00:41:52.475670: step: 450/463, loss: 0.41035568714141846 2023-01-24 00:41:53.117328: step: 452/463, loss: 0.8781241178512573 2023-01-24 00:41:53.792566: step: 454/463, loss: 0.5124644637107849 2023-01-24 00:41:54.387079: step: 456/463, loss: 0.3099278509616852 2023-01-24 00:41:55.052563: step: 458/463, loss: 0.7695379257202148 2023-01-24 00:41:55.705377: step: 460/463, loss: 0.25489291548728943 2023-01-24 00:41:56.397158: step: 462/463, loss: 0.5928194522857666 2023-01-24 00:41:56.971344: step: 464/463, loss: 0.511032223701477 2023-01-24 00:41:57.652527: step: 466/463, loss: 0.40350183844566345 2023-01-24 00:41:58.215389: step: 468/463, loss: 0.14483462274074554 2023-01-24 00:41:58.776694: step: 470/463, loss: 0.2926200032234192 2023-01-24 00:41:59.399847: step: 472/463, loss: 0.4645593464374542 2023-01-24 00:42:00.042654: step: 474/463, loss: 0.40883779525756836 2023-01-24 00:42:00.671839: step: 476/463, loss: 1.0026620626449585 2023-01-24 00:42:01.382488: step: 478/463, loss: 1.5711491107940674 2023-01-24 00:42:02.041695: step: 480/463, loss: 0.8904794454574585 2023-01-24 00:42:02.626558: step: 482/463, loss: 0.6814005970954895 2023-01-24 00:42:03.206341: step: 484/463, loss: 0.5940240621566772 2023-01-24 00:42:03.852124: step: 486/463, loss: 1.36725652217865 2023-01-24 00:42:04.430212: step: 488/463, loss: 0.33825019001960754 2023-01-24 00:42:05.061787: step: 490/463, loss: 10.001785278320312 2023-01-24 00:42:05.701778: step: 492/463, loss: 0.23194283246994019 2023-01-24 00:42:06.276523: step: 494/463, loss: 1.2877411842346191 2023-01-24 00:42:06.859645: step: 496/463, loss: 0.37199661135673523 2023-01-24 00:42:07.499351: step: 498/463, loss: 1.051352858543396 2023-01-24 00:42:08.197961: step: 500/463, loss: 0.8722004890441895 2023-01-24 00:42:08.782292: step: 502/463, loss: 2.108574867248535 2023-01-24 00:42:09.389936: step: 504/463, loss: 1.8526277542114258 2023-01-24 00:42:10.076212: step: 506/463, loss: 1.7124611139297485 2023-01-24 00:42:10.641132: step: 508/463, loss: 0.5519593358039856 2023-01-24 00:42:11.206751: step: 510/463, loss: 0.7325560450553894 2023-01-24 00:42:11.814579: step: 512/463, loss: 1.5091421604156494 2023-01-24 00:42:12.415849: step: 514/463, loss: 1.156466007232666 2023-01-24 00:42:12.994096: step: 516/463, loss: 2.7727928161621094 2023-01-24 00:42:13.628846: step: 518/463, loss: 1.3215842247009277 2023-01-24 00:42:14.275248: step: 520/463, loss: 1.4164048433303833 2023-01-24 00:42:14.942049: step: 522/463, loss: 0.8687294721603394 2023-01-24 00:42:15.613342: step: 524/463, loss: 2.139601230621338 2023-01-24 00:42:16.249080: step: 526/463, loss: 0.672106146812439 2023-01-24 00:42:16.880162: step: 528/463, loss: 0.7225949764251709 2023-01-24 00:42:17.505250: step: 530/463, loss: 0.6531769633293152 2023-01-24 00:42:18.077059: step: 532/463, loss: 0.399929940700531 2023-01-24 00:42:18.693263: step: 534/463, loss: 0.4162968695163727 2023-01-24 00:42:19.336761: step: 536/463, loss: 0.2510896623134613 2023-01-24 00:42:19.969836: step: 538/463, loss: 0.3391830325126648 2023-01-24 00:42:20.597775: step: 540/463, loss: 0.2564721703529358 2023-01-24 00:42:21.288155: step: 542/463, loss: 1.8930720090866089 2023-01-24 00:42:21.893323: step: 544/463, loss: 0.6609528064727783 2023-01-24 00:42:22.522679: step: 546/463, loss: 0.3965955972671509 2023-01-24 00:42:23.099672: step: 548/463, loss: 0.1840658336877823 2023-01-24 00:42:23.735403: step: 550/463, loss: 0.2971612811088562 2023-01-24 00:42:24.322527: step: 552/463, loss: 0.5386552810668945 2023-01-24 00:42:24.923557: step: 554/463, loss: 0.9415072202682495 2023-01-24 00:42:25.493947: step: 556/463, loss: 1.3735125064849854 2023-01-24 00:42:26.097992: step: 558/463, loss: 1.4645370244979858 2023-01-24 00:42:26.752327: step: 560/463, loss: 0.9576241970062256 2023-01-24 00:42:27.409213: step: 562/463, loss: 1.184206485748291 2023-01-24 00:42:28.042485: step: 564/463, loss: 1.1412538290023804 2023-01-24 00:42:28.617335: step: 566/463, loss: 0.4779122769832611 2023-01-24 00:42:29.248433: step: 568/463, loss: 0.4947929382324219 2023-01-24 00:42:29.851198: step: 570/463, loss: 0.47703859210014343 2023-01-24 00:42:30.503602: step: 572/463, loss: 1.0094386339187622 2023-01-24 00:42:31.158001: step: 574/463, loss: 1.605323076248169 2023-01-24 00:42:31.783296: step: 576/463, loss: 1.0267601013183594 2023-01-24 00:42:32.440202: step: 578/463, loss: 1.1662286520004272 2023-01-24 00:42:33.057510: step: 580/463, loss: 1.5782971382141113 2023-01-24 00:42:33.740157: step: 582/463, loss: 1.510416865348816 2023-01-24 00:42:34.390426: step: 584/463, loss: 0.8945964574813843 2023-01-24 00:42:34.987754: step: 586/463, loss: 1.401969313621521 2023-01-24 00:42:35.577427: step: 588/463, loss: 0.6456435322761536 2023-01-24 00:42:36.176282: step: 590/463, loss: 0.3835964798927307 2023-01-24 00:42:36.772065: step: 592/463, loss: 0.6404721736907959 2023-01-24 00:42:37.393810: step: 594/463, loss: 0.3161999583244324 2023-01-24 00:42:37.960698: step: 596/463, loss: 1.2476121187210083 2023-01-24 00:42:38.522354: step: 598/463, loss: 1.115811824798584 2023-01-24 00:42:39.137961: step: 600/463, loss: 1.7108622789382935 2023-01-24 00:42:39.771304: step: 602/463, loss: 0.42225927114486694 2023-01-24 00:42:40.398643: step: 604/463, loss: 1.2379530668258667 2023-01-24 00:42:41.017366: step: 606/463, loss: 2.732727527618408 2023-01-24 00:42:41.733047: step: 608/463, loss: 1.1247632503509521 2023-01-24 00:42:42.292816: step: 610/463, loss: 2.344862699508667 2023-01-24 00:42:42.877689: step: 612/463, loss: 0.4471376836299896 2023-01-24 00:42:43.503131: step: 614/463, loss: 0.6649593114852905 2023-01-24 00:42:44.138054: step: 616/463, loss: 1.1859807968139648 2023-01-24 00:42:44.731427: step: 618/463, loss: 0.39125779271125793 2023-01-24 00:42:45.346220: step: 620/463, loss: 0.5141332745552063 2023-01-24 00:42:45.967935: step: 622/463, loss: 1.8105394840240479 2023-01-24 00:42:46.728445: step: 624/463, loss: 0.6822348237037659 2023-01-24 00:42:47.386544: step: 626/463, loss: 0.3842020630836487 2023-01-24 00:42:47.974831: step: 628/463, loss: 0.5940059423446655 2023-01-24 00:42:48.600252: step: 630/463, loss: 1.414339303970337 2023-01-24 00:42:49.240140: step: 632/463, loss: 0.988012433052063 2023-01-24 00:42:50.007554: step: 634/463, loss: 0.40161997079849243 2023-01-24 00:42:50.584996: step: 636/463, loss: 1.0795056819915771 2023-01-24 00:42:51.293055: step: 638/463, loss: 2.230233669281006 2023-01-24 00:42:51.887558: step: 640/463, loss: 0.6327934265136719 2023-01-24 00:42:52.508708: step: 642/463, loss: 2.2763237953186035 2023-01-24 00:42:53.074884: step: 644/463, loss: 4.373472213745117 2023-01-24 00:42:53.654254: step: 646/463, loss: 1.569833517074585 2023-01-24 00:42:54.295210: step: 648/463, loss: 1.4271926879882812 2023-01-24 00:42:54.838896: step: 650/463, loss: 7.3317036628723145 2023-01-24 00:42:55.431266: step: 652/463, loss: 0.2863078713417053 2023-01-24 00:42:56.047500: step: 654/463, loss: 1.2165193557739258 2023-01-24 00:42:56.656097: step: 656/463, loss: 0.4245227873325348 2023-01-24 00:42:57.323698: step: 658/463, loss: 0.9705733060836792 2023-01-24 00:42:57.909137: step: 660/463, loss: 0.43640100955963135 2023-01-24 00:42:58.508901: step: 662/463, loss: 1.0869696140289307 2023-01-24 00:42:59.129158: step: 664/463, loss: 0.5820673704147339 2023-01-24 00:42:59.760095: step: 666/463, loss: 0.4147489070892334 2023-01-24 00:43:00.359491: step: 668/463, loss: 1.4860161542892456 2023-01-24 00:43:00.942660: step: 670/463, loss: 0.45984312891960144 2023-01-24 00:43:01.665452: step: 672/463, loss: 0.48793429136276245 2023-01-24 00:43:02.287302: step: 674/463, loss: 0.33919471502304077 2023-01-24 00:43:02.943730: step: 676/463, loss: 1.3046720027923584 2023-01-24 00:43:03.591036: step: 678/463, loss: 0.5462546944618225 2023-01-24 00:43:04.285754: step: 680/463, loss: 0.40901878476142883 2023-01-24 00:43:04.897200: step: 682/463, loss: 0.21928246319293976 2023-01-24 00:43:05.622706: step: 684/463, loss: 0.7274168729782104 2023-01-24 00:43:06.229233: step: 686/463, loss: 1.2753839492797852 2023-01-24 00:43:06.875662: step: 688/463, loss: 3.791166305541992 2023-01-24 00:43:07.596276: step: 690/463, loss: 0.9322711825370789 2023-01-24 00:43:08.219692: step: 692/463, loss: 0.7225704789161682 2023-01-24 00:43:08.867501: step: 694/463, loss: 0.25140637159347534 2023-01-24 00:43:09.393592: step: 696/463, loss: 0.17500878870487213 2023-01-24 00:43:10.071703: step: 698/463, loss: 1.256880760192871 2023-01-24 00:43:10.646347: step: 700/463, loss: 0.7585497498512268 2023-01-24 00:43:11.260226: step: 702/463, loss: 0.3612159192562103 2023-01-24 00:43:11.909848: step: 704/463, loss: 0.877914547920227 2023-01-24 00:43:12.552065: step: 706/463, loss: 0.6788970232009888 2023-01-24 00:43:13.158869: step: 708/463, loss: 1.1258625984191895 2023-01-24 00:43:13.812402: step: 710/463, loss: 0.7332192063331604 2023-01-24 00:43:14.460454: step: 712/463, loss: 0.983177900314331 2023-01-24 00:43:15.144955: step: 714/463, loss: 1.0414884090423584 2023-01-24 00:43:15.748570: step: 716/463, loss: 0.39036551117897034 2023-01-24 00:43:16.323681: step: 718/463, loss: 2.8005099296569824 2023-01-24 00:43:16.982686: step: 720/463, loss: 0.6953034400939941 2023-01-24 00:43:17.567625: step: 722/463, loss: 0.6762683391571045 2023-01-24 00:43:18.207545: step: 724/463, loss: 0.47068125009536743 2023-01-24 00:43:18.919486: step: 726/463, loss: 1.6731644868850708 2023-01-24 00:43:19.597277: step: 728/463, loss: 0.534496545791626 2023-01-24 00:43:20.155391: step: 730/463, loss: 0.7140836119651794 2023-01-24 00:43:20.779837: step: 732/463, loss: 0.7032024264335632 2023-01-24 00:43:21.417350: step: 734/463, loss: 0.8038856983184814 2023-01-24 00:43:22.021940: step: 736/463, loss: 1.3651949167251587 2023-01-24 00:43:22.619170: step: 738/463, loss: 0.8286643028259277 2023-01-24 00:43:23.240687: step: 740/463, loss: 0.6162997484207153 2023-01-24 00:43:23.879376: step: 742/463, loss: 0.43109551072120667 2023-01-24 00:43:24.539823: step: 744/463, loss: 2.508298873901367 2023-01-24 00:43:25.207137: step: 746/463, loss: 0.8179532885551453 2023-01-24 00:43:25.909321: step: 748/463, loss: 0.742874026298523 2023-01-24 00:43:26.612881: step: 750/463, loss: 0.6353887319564819 2023-01-24 00:43:27.260083: step: 752/463, loss: 0.662401556968689 2023-01-24 00:43:27.932503: step: 754/463, loss: 0.9113625884056091 2023-01-24 00:43:28.524255: step: 756/463, loss: 1.2150079011917114 2023-01-24 00:43:29.134454: step: 758/463, loss: 2.5635287761688232 2023-01-24 00:43:29.814992: step: 760/463, loss: 1.4296231269836426 2023-01-24 00:43:30.402629: step: 762/463, loss: 0.9079815149307251 2023-01-24 00:43:31.051173: step: 764/463, loss: 2.120422601699829 2023-01-24 00:43:31.583435: step: 766/463, loss: 1.8938711881637573 2023-01-24 00:43:32.215890: step: 768/463, loss: 0.5766568183898926 2023-01-24 00:43:32.895324: step: 770/463, loss: 1.7700132131576538 2023-01-24 00:43:33.514146: step: 772/463, loss: 1.748260259628296 2023-01-24 00:43:34.140338: step: 774/463, loss: 0.4422953128814697 2023-01-24 00:43:34.719294: step: 776/463, loss: 0.24052318930625916 2023-01-24 00:43:35.300392: step: 778/463, loss: 0.31818318367004395 2023-01-24 00:43:35.873825: step: 780/463, loss: 0.7843020558357239 2023-01-24 00:43:36.492437: step: 782/463, loss: 0.7102949023246765 2023-01-24 00:43:37.098155: step: 784/463, loss: 1.0865919589996338 2023-01-24 00:43:37.758540: step: 786/463, loss: 1.4511466026306152 2023-01-24 00:43:38.399943: step: 788/463, loss: 0.8674811720848083 2023-01-24 00:43:39.094463: step: 790/463, loss: 3.590701103210449 2023-01-24 00:43:39.763051: step: 792/463, loss: 0.9477757811546326 2023-01-24 00:43:40.423527: step: 794/463, loss: 0.5989382863044739 2023-01-24 00:43:40.988986: step: 796/463, loss: 1.5985252857208252 2023-01-24 00:43:41.612035: step: 798/463, loss: 1.8965002298355103 2023-01-24 00:43:42.377685: step: 800/463, loss: 1.0696918964385986 2023-01-24 00:43:43.023509: step: 802/463, loss: 0.7908170223236084 2023-01-24 00:43:43.699502: step: 804/463, loss: 0.37102848291397095 2023-01-24 00:43:44.304643: step: 806/463, loss: 0.8724681735038757 2023-01-24 00:43:44.926872: step: 808/463, loss: 0.39117011427879333 2023-01-24 00:43:45.540595: step: 810/463, loss: 4.087313652038574 2023-01-24 00:43:46.209912: step: 812/463, loss: 2.742398500442505 2023-01-24 00:43:46.850307: step: 814/463, loss: 1.2095057964324951 2023-01-24 00:43:47.436566: step: 816/463, loss: 2.0081231594085693 2023-01-24 00:43:48.032458: step: 818/463, loss: 0.5464215874671936 2023-01-24 00:43:48.627344: step: 820/463, loss: 0.2684078514575958 2023-01-24 00:43:49.186069: step: 822/463, loss: 1.005362868309021 2023-01-24 00:43:49.754434: step: 824/463, loss: 0.4200587272644043 2023-01-24 00:43:50.441150: step: 826/463, loss: 1.5029656887054443 2023-01-24 00:43:51.066136: step: 828/463, loss: 0.8657869696617126 2023-01-24 00:43:51.689126: step: 830/463, loss: 0.6040905714035034 2023-01-24 00:43:52.295559: step: 832/463, loss: 1.0720486640930176 2023-01-24 00:43:52.941535: step: 834/463, loss: 0.7822814583778381 2023-01-24 00:43:53.652739: step: 836/463, loss: 1.2791012525558472 2023-01-24 00:43:54.275243: step: 838/463, loss: 0.4298741817474365 2023-01-24 00:43:54.855118: step: 840/463, loss: 0.7364590764045715 2023-01-24 00:43:55.541261: step: 842/463, loss: 0.360119491815567 2023-01-24 00:43:56.243401: step: 844/463, loss: 0.8521668314933777 2023-01-24 00:43:56.844082: step: 846/463, loss: 1.3991374969482422 2023-01-24 00:43:57.396216: step: 848/463, loss: 1.010469675064087 2023-01-24 00:43:58.019260: step: 850/463, loss: 0.3396068811416626 2023-01-24 00:43:58.642456: step: 852/463, loss: 2.724222183227539 2023-01-24 00:43:59.200614: step: 854/463, loss: 0.3038453161716461 2023-01-24 00:43:59.796823: step: 856/463, loss: 0.2597924470901489 2023-01-24 00:44:00.403342: step: 858/463, loss: 0.5692875385284424 2023-01-24 00:44:01.009710: step: 860/463, loss: 3.0865492820739746 2023-01-24 00:44:01.692515: step: 862/463, loss: 3.1895039081573486 2023-01-24 00:44:02.295282: step: 864/463, loss: 0.8428069353103638 2023-01-24 00:44:02.901363: step: 866/463, loss: 0.7019940614700317 2023-01-24 00:44:03.531614: step: 868/463, loss: 2.2596797943115234 2023-01-24 00:44:04.148614: step: 870/463, loss: 4.213397026062012 2023-01-24 00:44:04.787656: step: 872/463, loss: 0.5768600702285767 2023-01-24 00:44:05.419073: step: 874/463, loss: 0.5022801756858826 2023-01-24 00:44:06.079100: step: 876/463, loss: 2.6310482025146484 2023-01-24 00:44:06.714127: step: 878/463, loss: 0.3212132453918457 2023-01-24 00:44:07.361412: step: 880/463, loss: 1.1684412956237793 2023-01-24 00:44:07.960190: step: 882/463, loss: 0.993817925453186 2023-01-24 00:44:08.625059: step: 884/463, loss: 0.6590524911880493 2023-01-24 00:44:09.229476: step: 886/463, loss: 0.24868057668209076 2023-01-24 00:44:09.865000: step: 888/463, loss: 2.462275743484497 2023-01-24 00:44:10.478902: step: 890/463, loss: 0.4116349220275879 2023-01-24 00:44:11.128797: step: 892/463, loss: 0.9475703835487366 2023-01-24 00:44:11.758504: step: 894/463, loss: 0.7511650919914246 2023-01-24 00:44:12.431969: step: 896/463, loss: 0.8268802165985107 2023-01-24 00:44:13.127286: step: 898/463, loss: 0.23336485028266907 2023-01-24 00:44:13.745850: step: 900/463, loss: 1.0560531616210938 2023-01-24 00:44:14.368948: step: 902/463, loss: 1.9181888103485107 2023-01-24 00:44:15.023002: step: 904/463, loss: 0.4635281562805176 2023-01-24 00:44:15.660490: step: 906/463, loss: 2.1090750694274902 2023-01-24 00:44:16.258260: step: 908/463, loss: 0.8077011704444885 2023-01-24 00:44:16.911394: step: 910/463, loss: 1.2597877979278564 2023-01-24 00:44:17.575973: step: 912/463, loss: 0.7707855105400085 2023-01-24 00:44:18.123415: step: 914/463, loss: 0.8095113039016724 2023-01-24 00:44:18.778370: step: 916/463, loss: 0.5734654068946838 2023-01-24 00:44:19.409190: step: 918/463, loss: 0.4746544361114502 2023-01-24 00:44:20.055127: step: 920/463, loss: 1.6225924491882324 2023-01-24 00:44:20.702935: step: 922/463, loss: 0.1303655058145523 2023-01-24 00:44:21.325720: step: 924/463, loss: 0.6425697803497314 2023-01-24 00:44:21.890165: step: 926/463, loss: 0.5694750547409058 ================================================== Loss: 1.092 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3120013432513432, 'r': 0.2847678294191577, 'f1': 0.2977631867140796}, 'combined': 0.21940445336826916, 'epoch': 4} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.35991473270676144, 'r': 0.2476563582135144, 'f1': 0.29341478981343805}, 'combined': 0.20642246519538357, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3064311346548189, 'r': 0.27619504546686713, 'f1': 0.2905285208803173}, 'combined': 0.2140736469644443, 'epoch': 4} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35963387992050416, 'r': 0.24527593026724218, 'f1': 0.29164524353057414}, 'combined': 0.20706812290670762, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3256379261901891, 'r': 0.2867096731541703, 'f1': 0.3049364233143244}, 'combined': 0.2246899961263443, 'epoch': 4} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3684535250456828, 'r': 0.23496514976848928, 'f1': 0.28694417759525853}, 'combined': 0.20373036609263354, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.28098290598290593, 'r': 0.31309523809523804, 'f1': 0.2961711711711711}, 'combined': 0.19744744744744738, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34615384615384615, 'r': 0.391304347826087, 'f1': 0.36734693877551017}, 'combined': 0.18367346938775508, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5608552631578947, 'r': 0.15471869328493645, 'f1': 0.24253200568990038}, 'combined': 0.16168800379326692, 'epoch': 4} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3120013432513432, 'r': 0.2847678294191577, 'f1': 0.2977631867140796}, 'combined': 0.21940445336826916, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.35991473270676144, 'r': 0.2476563582135144, 'f1': 0.29341478981343805}, 'combined': 0.20642246519538357, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.28098290598290593, 'r': 0.31309523809523804, 'f1': 0.2961711711711711}, 'combined': 0.19744744744744738, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3064311346548189, 'r': 0.27619504546686713, 'f1': 0.2905285208803173}, 'combined': 0.2140736469644443, 'epoch': 4} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35963387992050416, 'r': 0.24527593026724218, 'f1': 0.29164524353057414}, 'combined': 0.20706812290670762, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34615384615384615, 'r': 0.391304347826087, 'f1': 0.36734693877551017}, 'combined': 0.18367346938775508, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3256379261901891, 'r': 0.2867096731541703, 'f1': 0.3049364233143244}, 'combined': 0.2246899961263443, 'epoch': 4} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3684535250456828, 'r': 0.23496514976848928, 'f1': 0.28694417759525853}, 'combined': 0.20373036609263354, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5608552631578947, 'r': 0.15471869328493645, 'f1': 0.24253200568990038}, 'combined': 0.16168800379326692, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:47:20.343397: step: 2/463, loss: 0.3863893747329712 2023-01-24 00:47:21.029123: step: 4/463, loss: 0.3575735092163086 2023-01-24 00:47:21.674703: step: 6/463, loss: 0.5962569713592529 2023-01-24 00:47:22.308628: step: 8/463, loss: 0.990675687789917 2023-01-24 00:47:22.967111: step: 10/463, loss: 2.215738296508789 2023-01-24 00:47:23.592591: step: 12/463, loss: 1.1157889366149902 2023-01-24 00:47:24.271176: step: 14/463, loss: 1.4605787992477417 2023-01-24 00:47:24.948156: step: 16/463, loss: 1.4854283332824707 2023-01-24 00:47:25.573084: step: 18/463, loss: 2.1818079948425293 2023-01-24 00:47:26.182068: step: 20/463, loss: 0.03924290090799332 2023-01-24 00:47:26.819496: step: 22/463, loss: 0.47864559292793274 2023-01-24 00:47:27.456434: step: 24/463, loss: 0.7163918614387512 2023-01-24 00:47:28.109248: step: 26/463, loss: 0.4209963083267212 2023-01-24 00:47:28.682415: step: 28/463, loss: 0.3823988139629364 2023-01-24 00:47:29.337609: step: 30/463, loss: 0.9405417442321777 2023-01-24 00:47:29.932479: step: 32/463, loss: 1.0441409349441528 2023-01-24 00:47:30.511684: step: 34/463, loss: 0.21052347123622894 2023-01-24 00:47:31.093374: step: 36/463, loss: 1.3335448503494263 2023-01-24 00:47:31.756188: step: 38/463, loss: 1.0264016389846802 2023-01-24 00:47:32.382098: step: 40/463, loss: 0.3724258542060852 2023-01-24 00:47:32.974073: step: 42/463, loss: 0.44833123683929443 2023-01-24 00:47:33.582448: step: 44/463, loss: 0.5111900568008423 2023-01-24 00:47:34.207614: step: 46/463, loss: 0.4780184328556061 2023-01-24 00:47:34.804672: step: 48/463, loss: 0.5115633010864258 2023-01-24 00:47:35.409924: step: 50/463, loss: 0.8130191564559937 2023-01-24 00:47:36.069800: step: 52/463, loss: 0.6649256944656372 2023-01-24 00:47:36.713619: step: 54/463, loss: 1.2249921560287476 2023-01-24 00:47:37.333585: step: 56/463, loss: 1.139028549194336 2023-01-24 00:47:37.919229: step: 58/463, loss: 0.8956627249717712 2023-01-24 00:47:38.558549: step: 60/463, loss: 1.716473937034607 2023-01-24 00:47:39.189043: step: 62/463, loss: 0.4602845311164856 2023-01-24 00:47:39.829291: step: 64/463, loss: 1.0957664251327515 2023-01-24 00:47:40.425057: step: 66/463, loss: 0.5952927470207214 2023-01-24 00:47:41.132478: step: 68/463, loss: 1.0120139122009277 2023-01-24 00:47:41.765745: step: 70/463, loss: 0.6634990572929382 2023-01-24 00:47:42.407324: step: 72/463, loss: 1.0147596597671509 2023-01-24 00:47:43.019320: step: 74/463, loss: 1.0784529447555542 2023-01-24 00:47:43.673453: step: 76/463, loss: 0.584472119808197 2023-01-24 00:47:44.223422: step: 78/463, loss: 0.5715597867965698 2023-01-24 00:47:44.847019: step: 80/463, loss: 0.3707471489906311 2023-01-24 00:47:45.430787: step: 82/463, loss: 0.4204487204551697 2023-01-24 00:47:46.027032: step: 84/463, loss: 0.5014489889144897 2023-01-24 00:47:46.674841: step: 86/463, loss: 1.4428887367248535 2023-01-24 00:47:47.234412: step: 88/463, loss: 0.20053860545158386 2023-01-24 00:47:47.891761: step: 90/463, loss: 0.28778016567230225 2023-01-24 00:47:48.505327: step: 92/463, loss: 0.5913171768188477 2023-01-24 00:47:49.064838: step: 94/463, loss: 1.3022630214691162 2023-01-24 00:47:49.643652: step: 96/463, loss: 0.6402971148490906 2023-01-24 00:47:50.190822: step: 98/463, loss: 0.6445887088775635 2023-01-24 00:47:50.746485: step: 100/463, loss: 0.9618914127349854 2023-01-24 00:47:51.355993: step: 102/463, loss: 0.8997213244438171 2023-01-24 00:47:51.987066: step: 104/463, loss: 0.6963939666748047 2023-01-24 00:47:52.623584: step: 106/463, loss: 1.0011179447174072 2023-01-24 00:47:53.248347: step: 108/463, loss: 0.5869520902633667 2023-01-24 00:47:53.916889: step: 110/463, loss: 2.320164918899536 2023-01-24 00:47:54.520157: step: 112/463, loss: 0.31800299882888794 2023-01-24 00:47:55.147375: step: 114/463, loss: 4.031039237976074 2023-01-24 00:47:55.755137: step: 116/463, loss: 0.9492335915565491 2023-01-24 00:47:56.398698: step: 118/463, loss: 0.7619549632072449 2023-01-24 00:47:57.026676: step: 120/463, loss: 0.6897922158241272 2023-01-24 00:47:57.725702: step: 122/463, loss: 0.5323767066001892 2023-01-24 00:47:58.310076: step: 124/463, loss: 0.6168705224990845 2023-01-24 00:47:58.901908: step: 126/463, loss: 1.9434114694595337 2023-01-24 00:47:59.563525: step: 128/463, loss: 0.773533284664154 2023-01-24 00:48:00.236083: step: 130/463, loss: 0.5842805504798889 2023-01-24 00:48:00.910254: step: 132/463, loss: 0.8484742045402527 2023-01-24 00:48:01.511350: step: 134/463, loss: 0.3523135185241699 2023-01-24 00:48:02.196713: step: 136/463, loss: 0.21999648213386536 2023-01-24 00:48:02.820527: step: 138/463, loss: 1.3752838373184204 2023-01-24 00:48:03.458622: step: 140/463, loss: 0.4521942138671875 2023-01-24 00:48:04.079940: step: 142/463, loss: 0.11112946271896362 2023-01-24 00:48:04.710854: step: 144/463, loss: 0.9559813737869263 2023-01-24 00:48:05.356056: step: 146/463, loss: 0.8978605270385742 2023-01-24 00:48:06.003508: step: 148/463, loss: 0.6189537644386292 2023-01-24 00:48:06.761462: step: 150/463, loss: 1.2362291812896729 2023-01-24 00:48:07.310178: step: 152/463, loss: 0.44211694598197937 2023-01-24 00:48:07.842954: step: 154/463, loss: 1.1691150665283203 2023-01-24 00:48:08.496049: step: 156/463, loss: 0.8341092467308044 2023-01-24 00:48:09.099503: step: 158/463, loss: 1.1253403425216675 2023-01-24 00:48:09.680666: step: 160/463, loss: 0.5984249711036682 2023-01-24 00:48:10.307840: step: 162/463, loss: 1.8284051418304443 2023-01-24 00:48:10.891703: step: 164/463, loss: 0.4714689254760742 2023-01-24 00:48:11.530774: step: 166/463, loss: 1.2042419910430908 2023-01-24 00:48:12.138953: step: 168/463, loss: 0.22368749976158142 2023-01-24 00:48:12.794830: step: 170/463, loss: 0.5675397515296936 2023-01-24 00:48:13.393991: step: 172/463, loss: 1.3883880376815796 2023-01-24 00:48:14.059204: step: 174/463, loss: 0.4046820104122162 2023-01-24 00:48:14.662571: step: 176/463, loss: 0.13495518267154694 2023-01-24 00:48:15.306776: step: 178/463, loss: 0.228426992893219 2023-01-24 00:48:15.948839: step: 180/463, loss: 0.3184826076030731 2023-01-24 00:48:16.510423: step: 182/463, loss: 0.2725116014480591 2023-01-24 00:48:17.106468: step: 184/463, loss: 3.4982118606567383 2023-01-24 00:48:17.757521: step: 186/463, loss: 0.647801399230957 2023-01-24 00:48:18.364135: step: 188/463, loss: 0.8511626124382019 2023-01-24 00:48:19.003395: step: 190/463, loss: 0.274105429649353 2023-01-24 00:48:19.701678: step: 192/463, loss: 0.4097806215286255 2023-01-24 00:48:20.346402: step: 194/463, loss: 0.1748201698064804 2023-01-24 00:48:20.920781: step: 196/463, loss: 1.467149019241333 2023-01-24 00:48:21.493423: step: 198/463, loss: 0.5210391879081726 2023-01-24 00:48:22.246541: step: 200/463, loss: 0.7888125777244568 2023-01-24 00:48:22.934365: step: 202/463, loss: 0.3475758731365204 2023-01-24 00:48:23.592021: step: 204/463, loss: 0.5458464622497559 2023-01-24 00:48:24.208741: step: 206/463, loss: 0.20726001262664795 2023-01-24 00:48:24.840751: step: 208/463, loss: 0.5466622710227966 2023-01-24 00:48:25.489453: step: 210/463, loss: 0.47893470525741577 2023-01-24 00:48:26.207010: step: 212/463, loss: 6.054546356201172 2023-01-24 00:48:26.836823: step: 214/463, loss: 1.2159669399261475 2023-01-24 00:48:27.516747: step: 216/463, loss: 1.8807878494262695 2023-01-24 00:48:28.146606: step: 218/463, loss: 1.988013505935669 2023-01-24 00:48:28.782431: step: 220/463, loss: 2.6662492752075195 2023-01-24 00:48:29.411229: step: 222/463, loss: 0.2902255058288574 2023-01-24 00:48:30.006650: step: 224/463, loss: 0.9989038109779358 2023-01-24 00:48:30.529336: step: 226/463, loss: 0.7414028644561768 2023-01-24 00:48:31.153014: step: 228/463, loss: 1.2251312732696533 2023-01-24 00:48:31.753246: step: 230/463, loss: 0.2905961573123932 2023-01-24 00:48:32.318354: step: 232/463, loss: 0.3173767924308777 2023-01-24 00:48:32.906965: step: 234/463, loss: 0.32189154624938965 2023-01-24 00:48:33.564486: step: 236/463, loss: 0.8532045483589172 2023-01-24 00:48:34.182347: step: 238/463, loss: 0.3094828426837921 2023-01-24 00:48:34.823836: step: 240/463, loss: 0.6980500221252441 2023-01-24 00:48:35.412283: step: 242/463, loss: 0.5545303821563721 2023-01-24 00:48:36.113358: step: 244/463, loss: 1.0029618740081787 2023-01-24 00:48:36.790733: step: 246/463, loss: 0.749351441860199 2023-01-24 00:48:37.380697: step: 248/463, loss: 0.23448096215724945 2023-01-24 00:48:38.039989: step: 250/463, loss: 0.6367695927619934 2023-01-24 00:48:38.662761: step: 252/463, loss: 1.3449070453643799 2023-01-24 00:48:39.289732: step: 254/463, loss: 1.880623698234558 2023-01-24 00:48:39.901431: step: 256/463, loss: 1.1597869396209717 2023-01-24 00:48:40.465159: step: 258/463, loss: 1.1038767099380493 2023-01-24 00:48:41.089980: step: 260/463, loss: 0.32010960578918457 2023-01-24 00:48:41.700282: step: 262/463, loss: 0.2911424934864044 2023-01-24 00:48:42.346166: step: 264/463, loss: 0.34550392627716064 2023-01-24 00:48:42.983976: step: 266/463, loss: 0.2702982425689697 2023-01-24 00:48:43.632855: step: 268/463, loss: 0.6505122184753418 2023-01-24 00:48:44.292913: step: 270/463, loss: 0.64354407787323 2023-01-24 00:48:44.944388: step: 272/463, loss: 0.2653587758541107 2023-01-24 00:48:45.558674: step: 274/463, loss: 0.5060144066810608 2023-01-24 00:48:46.187343: step: 276/463, loss: 0.26335543394088745 2023-01-24 00:48:46.835085: step: 278/463, loss: 0.5408301949501038 2023-01-24 00:48:47.443444: step: 280/463, loss: 2.705864667892456 2023-01-24 00:48:48.138469: step: 282/463, loss: 1.0373209714889526 2023-01-24 00:48:48.728589: step: 284/463, loss: 0.9243409037590027 2023-01-24 00:48:49.450000: step: 286/463, loss: 5.223256587982178 2023-01-24 00:48:50.097892: step: 288/463, loss: 0.20810578763484955 2023-01-24 00:48:50.705166: step: 290/463, loss: 0.9639858603477478 2023-01-24 00:48:51.342405: step: 292/463, loss: 4.869887351989746 2023-01-24 00:48:52.004800: step: 294/463, loss: 1.241434931755066 2023-01-24 00:48:52.664162: step: 296/463, loss: 0.8284125328063965 2023-01-24 00:48:53.286228: step: 298/463, loss: 2.8179001808166504 2023-01-24 00:48:53.953085: step: 300/463, loss: 0.9180371165275574 2023-01-24 00:48:54.560780: step: 302/463, loss: 0.3724747896194458 2023-01-24 00:48:55.162431: step: 304/463, loss: 1.7364217042922974 2023-01-24 00:48:55.781144: step: 306/463, loss: 0.7017210125923157 2023-01-24 00:48:56.428353: step: 308/463, loss: 0.36338651180267334 2023-01-24 00:48:56.980200: step: 310/463, loss: 0.6489390730857849 2023-01-24 00:48:57.515333: step: 312/463, loss: 0.2740022540092468 2023-01-24 00:48:58.214301: step: 314/463, loss: 0.6766409873962402 2023-01-24 00:48:58.887597: step: 316/463, loss: 2.06605863571167 2023-01-24 00:48:59.504523: step: 318/463, loss: 2.4657034873962402 2023-01-24 00:49:00.096464: step: 320/463, loss: 0.5121650695800781 2023-01-24 00:49:00.713931: step: 322/463, loss: 0.32155340909957886 2023-01-24 00:49:01.306567: step: 324/463, loss: 0.6549612879753113 2023-01-24 00:49:01.872688: step: 326/463, loss: 0.11490531265735626 2023-01-24 00:49:02.466015: step: 328/463, loss: 0.9924050569534302 2023-01-24 00:49:03.162122: step: 330/463, loss: 0.6189658045768738 2023-01-24 00:49:03.759586: step: 332/463, loss: 0.6732014417648315 2023-01-24 00:49:04.431490: step: 334/463, loss: 0.22540482878684998 2023-01-24 00:49:05.018313: step: 336/463, loss: 0.7905368208885193 2023-01-24 00:49:05.796406: step: 338/463, loss: 1.0343519449234009 2023-01-24 00:49:06.417040: step: 340/463, loss: 0.7904757261276245 2023-01-24 00:49:07.052417: step: 342/463, loss: 0.23388974368572235 2023-01-24 00:49:07.686561: step: 344/463, loss: 0.8804963827133179 2023-01-24 00:49:08.340720: step: 346/463, loss: 0.47160154581069946 2023-01-24 00:49:09.031303: step: 348/463, loss: 0.26067930459976196 2023-01-24 00:49:09.600934: step: 350/463, loss: 0.17925947904586792 2023-01-24 00:49:10.228704: step: 352/463, loss: 1.664016604423523 2023-01-24 00:49:10.869242: step: 354/463, loss: 1.2224078178405762 2023-01-24 00:49:11.519242: step: 356/463, loss: 1.1642916202545166 2023-01-24 00:49:12.178149: step: 358/463, loss: 0.48592278361320496 2023-01-24 00:49:12.753078: step: 360/463, loss: 0.6521704792976379 2023-01-24 00:49:13.391746: step: 362/463, loss: 1.6079185009002686 2023-01-24 00:49:14.014192: step: 364/463, loss: 0.714464545249939 2023-01-24 00:49:14.598068: step: 366/463, loss: 1.676059365272522 2023-01-24 00:49:15.288559: step: 368/463, loss: 0.36790716648101807 2023-01-24 00:49:15.969496: step: 370/463, loss: 1.1975822448730469 2023-01-24 00:49:16.590545: step: 372/463, loss: 0.31047096848487854 2023-01-24 00:49:17.236066: step: 374/463, loss: 1.584760069847107 2023-01-24 00:49:17.848277: step: 376/463, loss: 3.1279797554016113 2023-01-24 00:49:18.439690: step: 378/463, loss: 0.6355035305023193 2023-01-24 00:49:19.081966: step: 380/463, loss: 0.8016023635864258 2023-01-24 00:49:19.736020: step: 382/463, loss: 1.8463962078094482 2023-01-24 00:49:20.304409: step: 384/463, loss: 0.736260175704956 2023-01-24 00:49:20.940756: step: 386/463, loss: 0.45459505915641785 2023-01-24 00:49:21.550196: step: 388/463, loss: 1.3248239755630493 2023-01-24 00:49:22.162538: step: 390/463, loss: 1.2044389247894287 2023-01-24 00:49:22.793534: step: 392/463, loss: 0.9222238659858704 2023-01-24 00:49:23.447748: step: 394/463, loss: 0.8253852128982544 2023-01-24 00:49:24.087418: step: 396/463, loss: 0.4306389093399048 2023-01-24 00:49:24.686072: step: 398/463, loss: 1.731351613998413 2023-01-24 00:49:25.381922: step: 400/463, loss: 1.2463810443878174 2023-01-24 00:49:25.993156: step: 402/463, loss: 0.9045649170875549 2023-01-24 00:49:26.644956: step: 404/463, loss: 0.7475670576095581 2023-01-24 00:49:27.247636: step: 406/463, loss: 3.3206732273101807 2023-01-24 00:49:27.873510: step: 408/463, loss: 0.3650139272212982 2023-01-24 00:49:28.547683: step: 410/463, loss: 0.3127685785293579 2023-01-24 00:49:29.139462: step: 412/463, loss: 0.6187971234321594 2023-01-24 00:49:29.792586: step: 414/463, loss: 0.6707149147987366 2023-01-24 00:49:30.449921: step: 416/463, loss: 0.9502821564674377 2023-01-24 00:49:31.054369: step: 418/463, loss: 3.609078884124756 2023-01-24 00:49:31.673309: step: 420/463, loss: 0.39151954650878906 2023-01-24 00:49:32.400545: step: 422/463, loss: 0.7408649921417236 2023-01-24 00:49:33.022132: step: 424/463, loss: 1.4824883937835693 2023-01-24 00:49:33.703568: step: 426/463, loss: 0.619574248790741 2023-01-24 00:49:34.374328: step: 428/463, loss: 1.531851887702942 2023-01-24 00:49:35.028501: step: 430/463, loss: 0.6259394288063049 2023-01-24 00:49:35.629877: step: 432/463, loss: 0.3961343467235565 2023-01-24 00:49:36.253849: step: 434/463, loss: 0.40586528182029724 2023-01-24 00:49:36.900280: step: 436/463, loss: 0.8019479513168335 2023-01-24 00:49:37.545799: step: 438/463, loss: 0.8769626021385193 2023-01-24 00:49:38.227912: step: 440/463, loss: 1.0228432416915894 2023-01-24 00:49:38.819219: step: 442/463, loss: 2.8285961151123047 2023-01-24 00:49:39.461888: step: 444/463, loss: 0.715281069278717 2023-01-24 00:49:40.036506: step: 446/463, loss: 0.9363575577735901 2023-01-24 00:49:40.615845: step: 448/463, loss: 0.5015245676040649 2023-01-24 00:49:41.254770: step: 450/463, loss: 1.4283026456832886 2023-01-24 00:49:41.928382: step: 452/463, loss: 0.4573126435279846 2023-01-24 00:49:42.558422: step: 454/463, loss: 0.9487614035606384 2023-01-24 00:49:43.190134: step: 456/463, loss: 0.5394518375396729 2023-01-24 00:49:43.880045: step: 458/463, loss: 1.664815068244934 2023-01-24 00:49:44.487281: step: 460/463, loss: 0.953886091709137 2023-01-24 00:49:45.099158: step: 462/463, loss: 0.5573630928993225 2023-01-24 00:49:45.748528: step: 464/463, loss: 1.1987260580062866 2023-01-24 00:49:46.350318: step: 466/463, loss: 0.5955697298049927 2023-01-24 00:49:46.960132: step: 468/463, loss: 0.25831711292266846 2023-01-24 00:49:47.541687: step: 470/463, loss: 0.8151804208755493 2023-01-24 00:49:48.153771: step: 472/463, loss: 0.28837278485298157 2023-01-24 00:49:48.753710: step: 474/463, loss: 0.5213176012039185 2023-01-24 00:49:49.369071: step: 476/463, loss: 0.7198722958564758 2023-01-24 00:49:49.995500: step: 478/463, loss: 0.22036226093769073 2023-01-24 00:49:50.630037: step: 480/463, loss: 0.6231991052627563 2023-01-24 00:49:51.205864: step: 482/463, loss: 1.1199618577957153 2023-01-24 00:49:51.882742: step: 484/463, loss: 0.4148159921169281 2023-01-24 00:49:52.550729: step: 486/463, loss: 0.6651439666748047 2023-01-24 00:49:53.157639: step: 488/463, loss: 2.4023184776306152 2023-01-24 00:49:53.772440: step: 490/463, loss: 0.8105908632278442 2023-01-24 00:49:54.471006: step: 492/463, loss: 1.7775897979736328 2023-01-24 00:49:55.040690: step: 494/463, loss: 1.1228872537612915 2023-01-24 00:49:55.791428: step: 496/463, loss: 1.0675991773605347 2023-01-24 00:49:56.423709: step: 498/463, loss: 0.28961098194122314 2023-01-24 00:49:57.029119: step: 500/463, loss: 0.212953120470047 2023-01-24 00:49:57.716407: step: 502/463, loss: 0.18485136330127716 2023-01-24 00:49:58.349244: step: 504/463, loss: 0.861315131187439 2023-01-24 00:49:58.912260: step: 506/463, loss: 0.3703833222389221 2023-01-24 00:49:59.554280: step: 508/463, loss: 0.27352622151374817 2023-01-24 00:50:00.161900: step: 510/463, loss: 0.5304149389266968 2023-01-24 00:50:00.863981: step: 512/463, loss: 2.1279239654541016 2023-01-24 00:50:01.482388: step: 514/463, loss: 0.8828540444374084 2023-01-24 00:50:02.174493: step: 516/463, loss: 0.22110137343406677 2023-01-24 00:50:02.886216: step: 518/463, loss: 2.6386022567749023 2023-01-24 00:50:03.505775: step: 520/463, loss: 0.5568625926971436 2023-01-24 00:50:04.164567: step: 522/463, loss: 0.1527269184589386 2023-01-24 00:50:04.804629: step: 524/463, loss: 0.21827933192253113 2023-01-24 00:50:05.454028: step: 526/463, loss: 0.14788824319839478 2023-01-24 00:50:06.072535: step: 528/463, loss: 0.4158935546875 2023-01-24 00:50:06.692233: step: 530/463, loss: 0.25702232122421265 2023-01-24 00:50:07.269372: step: 532/463, loss: 0.37308844923973083 2023-01-24 00:50:07.893710: step: 534/463, loss: 0.8332465291023254 2023-01-24 00:50:08.469748: step: 536/463, loss: 2.704768419265747 2023-01-24 00:50:09.105557: step: 538/463, loss: 0.33523029088974 2023-01-24 00:50:09.722166: step: 540/463, loss: 0.3258781433105469 2023-01-24 00:50:10.357171: step: 542/463, loss: 0.6666723489761353 2023-01-24 00:50:11.006127: step: 544/463, loss: 0.64485764503479 2023-01-24 00:50:11.622527: step: 546/463, loss: 0.21757450699806213 2023-01-24 00:50:12.280080: step: 548/463, loss: 0.2513701021671295 2023-01-24 00:50:12.888766: step: 550/463, loss: 0.16574302315711975 2023-01-24 00:50:13.506275: step: 552/463, loss: 0.4610222280025482 2023-01-24 00:50:14.126138: step: 554/463, loss: 0.8860491514205933 2023-01-24 00:50:14.734852: step: 556/463, loss: 0.6384575963020325 2023-01-24 00:50:15.350491: step: 558/463, loss: 0.7664865851402283 2023-01-24 00:50:15.987277: step: 560/463, loss: 0.788520336151123 2023-01-24 00:50:16.635349: step: 562/463, loss: 0.3473874032497406 2023-01-24 00:50:17.284113: step: 564/463, loss: 0.4502032697200775 2023-01-24 00:50:17.865322: step: 566/463, loss: 1.3270275592803955 2023-01-24 00:50:18.503826: step: 568/463, loss: 1.0448793172836304 2023-01-24 00:50:19.113390: step: 570/463, loss: 0.6215246915817261 2023-01-24 00:50:19.747795: step: 572/463, loss: 0.7589542269706726 2023-01-24 00:50:20.463957: step: 574/463, loss: 0.8095200061798096 2023-01-24 00:50:21.064494: step: 576/463, loss: 1.8550550937652588 2023-01-24 00:50:21.694280: step: 578/463, loss: 0.43327796459198 2023-01-24 00:50:22.375370: step: 580/463, loss: 0.40627580881118774 2023-01-24 00:50:22.959991: step: 582/463, loss: 1.1020116806030273 2023-01-24 00:50:23.681648: step: 584/463, loss: 3.2649688720703125 2023-01-24 00:50:24.243042: step: 586/463, loss: 0.5661211013793945 2023-01-24 00:50:24.826236: step: 588/463, loss: 0.5098762512207031 2023-01-24 00:50:25.434997: step: 590/463, loss: 0.5354594588279724 2023-01-24 00:50:26.047746: step: 592/463, loss: 0.9716578722000122 2023-01-24 00:50:26.647404: step: 594/463, loss: 1.1682329177856445 2023-01-24 00:50:27.203860: step: 596/463, loss: 0.4516056776046753 2023-01-24 00:50:27.795644: step: 598/463, loss: 8.725214004516602 2023-01-24 00:50:28.428522: step: 600/463, loss: 0.46109700202941895 2023-01-24 00:50:28.987349: step: 602/463, loss: 0.45094096660614014 2023-01-24 00:50:29.598769: step: 604/463, loss: 0.5413081645965576 2023-01-24 00:50:30.272626: step: 606/463, loss: 1.5756171941757202 2023-01-24 00:50:30.978430: step: 608/463, loss: 1.2951186895370483 2023-01-24 00:50:31.526635: step: 610/463, loss: 1.0512694120407104 2023-01-24 00:50:32.137404: step: 612/463, loss: 1.204910397529602 2023-01-24 00:50:32.754059: step: 614/463, loss: 0.799831211566925 2023-01-24 00:50:33.368664: step: 616/463, loss: 0.7554730772972107 2023-01-24 00:50:33.937691: step: 618/463, loss: 0.6233709454536438 2023-01-24 00:50:34.635553: step: 620/463, loss: 0.3549632728099823 2023-01-24 00:50:35.241201: step: 622/463, loss: 0.9039913415908813 2023-01-24 00:50:35.834116: step: 624/463, loss: 0.48011353611946106 2023-01-24 00:50:36.498178: step: 626/463, loss: 1.6705304384231567 2023-01-24 00:50:37.064492: step: 628/463, loss: 0.6088965535163879 2023-01-24 00:50:37.697922: step: 630/463, loss: 0.7361432313919067 2023-01-24 00:50:38.274206: step: 632/463, loss: 2.712071418762207 2023-01-24 00:50:38.895101: step: 634/463, loss: 1.0086297988891602 2023-01-24 00:50:39.514505: step: 636/463, loss: 1.0237398147583008 2023-01-24 00:50:40.250151: step: 638/463, loss: 5.167460918426514 2023-01-24 00:50:40.927344: step: 640/463, loss: 0.8206397891044617 2023-01-24 00:50:41.509657: step: 642/463, loss: 0.6851760149002075 2023-01-24 00:50:42.170666: step: 644/463, loss: 0.6574079394340515 2023-01-24 00:50:42.725558: step: 646/463, loss: 0.23733748495578766 2023-01-24 00:50:43.425991: step: 648/463, loss: 0.4620678722858429 2023-01-24 00:50:44.187383: step: 650/463, loss: 0.5614731907844543 2023-01-24 00:50:44.798329: step: 652/463, loss: 1.0454063415527344 2023-01-24 00:50:45.410891: step: 654/463, loss: 0.2254156470298767 2023-01-24 00:50:45.961016: step: 656/463, loss: 0.47361481189727783 2023-01-24 00:50:46.600258: step: 658/463, loss: 1.363447904586792 2023-01-24 00:50:47.144044: step: 660/463, loss: 0.6232132911682129 2023-01-24 00:50:47.767487: step: 662/463, loss: 0.6385989785194397 2023-01-24 00:50:48.391597: step: 664/463, loss: 0.6713772416114807 2023-01-24 00:50:49.052804: step: 666/463, loss: 0.2960265576839447 2023-01-24 00:50:49.628911: step: 668/463, loss: 1.056592583656311 2023-01-24 00:50:50.240031: step: 670/463, loss: 0.6035277843475342 2023-01-24 00:50:50.845132: step: 672/463, loss: 0.9271546602249146 2023-01-24 00:50:51.480871: step: 674/463, loss: 0.7067851424217224 2023-01-24 00:50:52.073133: step: 676/463, loss: 0.195792093873024 2023-01-24 00:50:52.680655: step: 678/463, loss: 0.3384869396686554 2023-01-24 00:50:53.375407: step: 680/463, loss: 7.769752025604248 2023-01-24 00:50:53.972802: step: 682/463, loss: 0.9718935489654541 2023-01-24 00:50:54.560221: step: 684/463, loss: 0.16098447144031525 2023-01-24 00:50:55.159320: step: 686/463, loss: 0.2969854772090912 2023-01-24 00:50:55.779360: step: 688/463, loss: 1.5008783340454102 2023-01-24 00:50:56.402187: step: 690/463, loss: 0.7111888527870178 2023-01-24 00:50:56.988767: step: 692/463, loss: 1.0745080709457397 2023-01-24 00:50:57.724588: step: 694/463, loss: 0.7414842844009399 2023-01-24 00:50:58.454900: step: 696/463, loss: 1.5490267276763916 2023-01-24 00:50:59.171995: step: 698/463, loss: 0.25031721591949463 2023-01-24 00:50:59.801485: step: 700/463, loss: 0.5157595276832581 2023-01-24 00:51:00.429995: step: 702/463, loss: 1.186463475227356 2023-01-24 00:51:01.082976: step: 704/463, loss: 1.1104087829589844 2023-01-24 00:51:01.723477: step: 706/463, loss: 0.963397741317749 2023-01-24 00:51:02.378371: step: 708/463, loss: 0.5998778343200684 2023-01-24 00:51:03.027862: step: 710/463, loss: 0.647660493850708 2023-01-24 00:51:03.617644: step: 712/463, loss: 0.4265337884426117 2023-01-24 00:51:04.217815: step: 714/463, loss: 0.41238299012184143 2023-01-24 00:51:04.814059: step: 716/463, loss: 1.5473061800003052 2023-01-24 00:51:05.402030: step: 718/463, loss: 0.6642049551010132 2023-01-24 00:51:05.994323: step: 720/463, loss: 0.33834731578826904 2023-01-24 00:51:06.619312: step: 722/463, loss: 2.062776803970337 2023-01-24 00:51:07.193840: step: 724/463, loss: 1.3618738651275635 2023-01-24 00:51:07.831891: step: 726/463, loss: 2.342925786972046 2023-01-24 00:51:08.491774: step: 728/463, loss: 0.9797955751419067 2023-01-24 00:51:09.113543: step: 730/463, loss: 1.335374116897583 2023-01-24 00:51:09.754649: step: 732/463, loss: 0.6890373826026917 2023-01-24 00:51:10.436270: step: 734/463, loss: 0.30695655941963196 2023-01-24 00:51:11.031775: step: 736/463, loss: 0.9697650671005249 2023-01-24 00:51:11.603331: step: 738/463, loss: 0.6149583458900452 2023-01-24 00:51:12.224360: step: 740/463, loss: 1.1344510316848755 2023-01-24 00:51:12.846477: step: 742/463, loss: 1.3802719116210938 2023-01-24 00:51:13.480688: step: 744/463, loss: 1.6088602542877197 2023-01-24 00:51:14.056709: step: 746/463, loss: 1.177931547164917 2023-01-24 00:51:14.797387: step: 748/463, loss: 2.1819450855255127 2023-01-24 00:51:15.517796: step: 750/463, loss: 0.8506755232810974 2023-01-24 00:51:16.140964: step: 752/463, loss: 0.5677505731582642 2023-01-24 00:51:16.808783: step: 754/463, loss: 0.9900178909301758 2023-01-24 00:51:17.473938: step: 756/463, loss: 0.5651147961616516 2023-01-24 00:51:18.174743: step: 758/463, loss: 0.9057740569114685 2023-01-24 00:51:18.820201: step: 760/463, loss: 0.6004732847213745 2023-01-24 00:51:19.527432: step: 762/463, loss: 0.5382717847824097 2023-01-24 00:51:20.154081: step: 764/463, loss: 0.8502494692802429 2023-01-24 00:51:20.686314: step: 766/463, loss: 0.7398142218589783 2023-01-24 00:51:21.295956: step: 768/463, loss: 0.18497681617736816 2023-01-24 00:51:21.887338: step: 770/463, loss: 0.2375188022851944 2023-01-24 00:51:22.480835: step: 772/463, loss: 0.29508715867996216 2023-01-24 00:51:23.108003: step: 774/463, loss: 0.8006939888000488 2023-01-24 00:51:23.790828: step: 776/463, loss: 0.36238810420036316 2023-01-24 00:51:24.396157: step: 778/463, loss: 0.4047558605670929 2023-01-24 00:51:24.975640: step: 780/463, loss: 0.22463026642799377 2023-01-24 00:51:25.607532: step: 782/463, loss: 1.0848793983459473 2023-01-24 00:51:26.170600: step: 784/463, loss: 0.2838694453239441 2023-01-24 00:51:26.795311: step: 786/463, loss: 0.8061056733131409 2023-01-24 00:51:27.455003: step: 788/463, loss: 2.629643678665161 2023-01-24 00:51:28.081741: step: 790/463, loss: 0.38516557216644287 2023-01-24 00:51:28.689249: step: 792/463, loss: 0.544502854347229 2023-01-24 00:51:29.329108: step: 794/463, loss: 0.30546462535858154 2023-01-24 00:51:29.918758: step: 796/463, loss: 1.0400903224945068 2023-01-24 00:51:30.585611: step: 798/463, loss: 1.4803717136383057 2023-01-24 00:51:31.194793: step: 800/463, loss: 0.8701518774032593 2023-01-24 00:51:31.784617: step: 802/463, loss: 0.1340070366859436 2023-01-24 00:51:32.445454: step: 804/463, loss: 0.11923053115606308 2023-01-24 00:51:33.094438: step: 806/463, loss: 2.8427577018737793 2023-01-24 00:51:33.707502: step: 808/463, loss: 1.1224031448364258 2023-01-24 00:51:34.326945: step: 810/463, loss: 14.18635368347168 2023-01-24 00:51:35.007960: step: 812/463, loss: 1.4548835754394531 2023-01-24 00:51:35.593349: step: 814/463, loss: 1.617120385169983 2023-01-24 00:51:36.257016: step: 816/463, loss: 2.059990406036377 2023-01-24 00:51:36.832130: step: 818/463, loss: 0.8708318471908569 2023-01-24 00:51:37.468746: step: 820/463, loss: 1.3516734838485718 2023-01-24 00:51:38.059580: step: 822/463, loss: 1.3701775074005127 2023-01-24 00:51:38.681297: step: 824/463, loss: 1.0117342472076416 2023-01-24 00:51:39.259038: step: 826/463, loss: 3.3929145336151123 2023-01-24 00:51:39.841683: step: 828/463, loss: 0.8460104465484619 2023-01-24 00:51:40.441531: step: 830/463, loss: 0.7734691500663757 2023-01-24 00:51:41.065545: step: 832/463, loss: 0.4168839454650879 2023-01-24 00:51:41.633680: step: 834/463, loss: 0.4539085626602173 2023-01-24 00:51:42.186103: step: 836/463, loss: 0.7054557800292969 2023-01-24 00:51:42.760686: step: 838/463, loss: 0.6881542801856995 2023-01-24 00:51:43.436499: step: 840/463, loss: 0.7078602910041809 2023-01-24 00:51:44.036834: step: 842/463, loss: 0.7923325896263123 2023-01-24 00:51:44.729239: step: 844/463, loss: 0.8482879996299744 2023-01-24 00:51:45.282468: step: 846/463, loss: 1.0969957113265991 2023-01-24 00:51:45.852816: step: 848/463, loss: 0.21337513625621796 2023-01-24 00:51:46.528006: step: 850/463, loss: 1.688162088394165 2023-01-24 00:51:47.087346: step: 852/463, loss: 0.2040848433971405 2023-01-24 00:51:47.665296: step: 854/463, loss: 0.7207590341567993 2023-01-24 00:51:48.354406: step: 856/463, loss: 0.251441091299057 2023-01-24 00:51:48.949133: step: 858/463, loss: 0.8844625949859619 2023-01-24 00:51:49.597246: step: 860/463, loss: 0.737562358379364 2023-01-24 00:51:50.251834: step: 862/463, loss: 0.5874814391136169 2023-01-24 00:51:50.854664: step: 864/463, loss: 0.19034340977668762 2023-01-24 00:51:51.542661: step: 866/463, loss: 0.1941864937543869 2023-01-24 00:51:52.240943: step: 868/463, loss: 0.9642801880836487 2023-01-24 00:51:52.859528: step: 870/463, loss: 3.889802932739258 2023-01-24 00:51:53.439498: step: 872/463, loss: 0.9385435581207275 2023-01-24 00:51:54.084576: step: 874/463, loss: 1.0203657150268555 2023-01-24 00:51:54.690098: step: 876/463, loss: 0.23191100358963013 2023-01-24 00:51:55.334753: step: 878/463, loss: 1.2247843742370605 2023-01-24 00:51:55.963044: step: 880/463, loss: 1.143066644668579 2023-01-24 00:51:56.606497: step: 882/463, loss: 0.8988180160522461 2023-01-24 00:51:57.234912: step: 884/463, loss: 0.9694375395774841 2023-01-24 00:51:57.824958: step: 886/463, loss: 1.17799973487854 2023-01-24 00:51:58.411249: step: 888/463, loss: 0.4216225743293762 2023-01-24 00:51:59.016706: step: 890/463, loss: 0.7219412922859192 2023-01-24 00:51:59.664097: step: 892/463, loss: 0.45210739970207214 2023-01-24 00:52:00.256154: step: 894/463, loss: 0.5940302610397339 2023-01-24 00:52:00.821213: step: 896/463, loss: 0.3613348603248596 2023-01-24 00:52:01.464594: step: 898/463, loss: 0.9214510917663574 2023-01-24 00:52:02.072914: step: 900/463, loss: 0.704590380191803 2023-01-24 00:52:02.803437: step: 902/463, loss: 0.5595338344573975 2023-01-24 00:52:03.447971: step: 904/463, loss: 0.8539330959320068 2023-01-24 00:52:03.998141: step: 906/463, loss: 0.15769706666469574 2023-01-24 00:52:04.634710: step: 908/463, loss: 1.3529388904571533 2023-01-24 00:52:05.258079: step: 910/463, loss: 1.0600653886795044 2023-01-24 00:52:05.851775: step: 912/463, loss: 0.5684487223625183 2023-01-24 00:52:06.467357: step: 914/463, loss: 0.8429340124130249 2023-01-24 00:52:07.068156: step: 916/463, loss: 1.2097488641738892 2023-01-24 00:52:07.634714: step: 918/463, loss: 0.7614465951919556 2023-01-24 00:52:08.244337: step: 920/463, loss: 0.5025354027748108 2023-01-24 00:52:08.963756: step: 922/463, loss: 0.668211042881012 2023-01-24 00:52:09.536939: step: 924/463, loss: 0.5085679888725281 2023-01-24 00:52:10.171573: step: 926/463, loss: 0.4125468134880066 ================================================== Loss: 0.963 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3296252358752359, 'r': 0.29272222085315064, 'f1': 0.3100796188735887}, 'combined': 0.22847971917001272, 'epoch': 5} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.36010427991126526, 'r': 0.2770515086523572, 'f1': 0.3131649615755617}, 'combined': 0.22031705839486754, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3437430026869682, 'r': 0.3026503856674635, 'f1': 0.321890521184164}, 'combined': 0.2371824892935945, 'epoch': 5} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35470985116912407, 'r': 0.2740658867482692, 'f1': 0.30921635182998825}, 'combined': 0.21954360979929163, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34970503352856297, 'r': 0.30400494391971666, 'f1': 0.3252575691785418}, 'combined': 0.23966347202629396, 'epoch': 5} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3732078615749239, 'r': 0.2694124754873906, 'f1': 0.31292770570535394}, 'combined': 0.22217867105080127, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36858974358974356, 'r': 0.2738095238095238, 'f1': 0.314207650273224}, 'combined': 0.209471766848816, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.25, 'f1': 0.25}, 'combined': 0.125, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.10344827586206896, 'f1': 0.14634146341463414}, 'combined': 0.09756097560975609, 'epoch': 5} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3296252358752359, 'r': 0.29272222085315064, 'f1': 0.3100796188735887}, 'combined': 0.22847971917001272, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.36010427991126526, 'r': 0.2770515086523572, 'f1': 0.3131649615755617}, 'combined': 0.22031705839486754, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36858974358974356, 'r': 0.2738095238095238, 'f1': 0.314207650273224}, 'combined': 0.209471766848816, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3064311346548189, 'r': 0.27619504546686713, 'f1': 0.2905285208803173}, 'combined': 0.2140736469644443, 'epoch': 4} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35963387992050416, 'r': 0.24527593026724218, 'f1': 0.29164524353057414}, 'combined': 0.20706812290670762, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34615384615384615, 'r': 0.391304347826087, 'f1': 0.36734693877551017}, 'combined': 0.18367346938775508, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3256379261901891, 'r': 0.2867096731541703, 'f1': 0.3049364233143244}, 'combined': 0.2246899961263443, 'epoch': 4} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3684535250456828, 'r': 0.23496514976848928, 'f1': 0.28694417759525853}, 'combined': 0.20373036609263354, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5608552631578947, 'r': 0.15471869328493645, 'f1': 0.24253200568990038}, 'combined': 0.16168800379326692, 'epoch': 4} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:54:54.051251: step: 2/463, loss: 0.1371469348669052 2023-01-24 00:54:54.768037: step: 4/463, loss: 0.4760722517967224 2023-01-24 00:54:55.424771: step: 6/463, loss: 0.4385990798473358 2023-01-24 00:54:55.978073: step: 8/463, loss: 0.5278136134147644 2023-01-24 00:54:56.597762: step: 10/463, loss: 0.32076188921928406 2023-01-24 00:54:57.259164: step: 12/463, loss: 0.5934610366821289 2023-01-24 00:54:57.896687: step: 14/463, loss: 0.6407036781311035 2023-01-24 00:54:58.492087: step: 16/463, loss: 0.23554666340351105 2023-01-24 00:54:59.226421: step: 18/463, loss: 0.44900044798851013 2023-01-24 00:54:59.756753: step: 20/463, loss: 0.877699077129364 2023-01-24 00:55:00.371718: step: 22/463, loss: 0.5613518357276917 2023-01-24 00:55:00.984374: step: 24/463, loss: 0.36877211928367615 2023-01-24 00:55:01.615068: step: 26/463, loss: 1.189070701599121 2023-01-24 00:55:02.220315: step: 28/463, loss: 0.6898945569992065 2023-01-24 00:55:02.879261: step: 30/463, loss: 0.11483632773160934 2023-01-24 00:55:03.452888: step: 32/463, loss: 0.2049659788608551 2023-01-24 00:55:04.078196: step: 34/463, loss: 0.3397113084793091 2023-01-24 00:55:04.735011: step: 36/463, loss: 0.7735847234725952 2023-01-24 00:55:05.373767: step: 38/463, loss: 0.5879465341567993 2023-01-24 00:55:06.128937: step: 40/463, loss: 0.9714674949645996 2023-01-24 00:55:06.707318: step: 42/463, loss: 1.058514952659607 2023-01-24 00:55:07.321198: step: 44/463, loss: 0.5902323126792908 2023-01-24 00:55:07.984204: step: 46/463, loss: 0.3542936146259308 2023-01-24 00:55:08.537910: step: 48/463, loss: 1.0961813926696777 2023-01-24 00:55:09.164088: step: 50/463, loss: 0.4449099898338318 2023-01-24 00:55:09.720630: step: 52/463, loss: 0.9456583857536316 2023-01-24 00:55:10.359031: step: 54/463, loss: 0.32163742184638977 2023-01-24 00:55:10.999690: step: 56/463, loss: 0.2400655448436737 2023-01-24 00:55:11.573323: step: 58/463, loss: 0.5468078255653381 2023-01-24 00:55:12.191959: step: 60/463, loss: 0.39666521549224854 2023-01-24 00:55:12.807346: step: 62/463, loss: 0.6493630409240723 2023-01-24 00:55:13.508998: step: 64/463, loss: 0.47781896591186523 2023-01-24 00:55:14.186611: step: 66/463, loss: 1.0075100660324097 2023-01-24 00:55:14.795362: step: 68/463, loss: 0.7276087999343872 2023-01-24 00:55:15.371461: step: 70/463, loss: 0.056442249566316605 2023-01-24 00:55:16.005566: step: 72/463, loss: 0.9523353576660156 2023-01-24 00:55:16.675248: step: 74/463, loss: 0.3175801932811737 2023-01-24 00:55:17.299165: step: 76/463, loss: 0.7422631978988647 2023-01-24 00:55:17.873863: step: 78/463, loss: 0.6364451050758362 2023-01-24 00:55:18.434221: step: 80/463, loss: 0.6671873331069946 2023-01-24 00:55:19.093654: step: 82/463, loss: 0.22614818811416626 2023-01-24 00:55:19.696102: step: 84/463, loss: 0.2294299602508545 2023-01-24 00:55:20.306822: step: 86/463, loss: 0.3948286473751068 2023-01-24 00:55:20.962073: step: 88/463, loss: 0.9551804661750793 2023-01-24 00:55:21.591356: step: 90/463, loss: 0.28109946846961975 2023-01-24 00:55:22.216740: step: 92/463, loss: 1.105984091758728 2023-01-24 00:55:22.836162: step: 94/463, loss: 0.495746910572052 2023-01-24 00:55:23.520126: step: 96/463, loss: 0.28173354268074036 2023-01-24 00:55:24.172929: step: 98/463, loss: 0.46409115195274353 2023-01-24 00:55:24.882374: step: 100/463, loss: 0.7424864172935486 2023-01-24 00:55:25.572407: step: 102/463, loss: 0.2601187825202942 2023-01-24 00:55:26.101907: step: 104/463, loss: 0.2718942165374756 2023-01-24 00:55:26.655382: step: 106/463, loss: 0.122797392308712 2023-01-24 00:55:27.311268: step: 108/463, loss: 1.1949496269226074 2023-01-24 00:55:27.936607: step: 110/463, loss: 0.6528382301330566 2023-01-24 00:55:28.552569: step: 112/463, loss: 1.2756760120391846 2023-01-24 00:55:29.154911: step: 114/463, loss: 0.47296807169914246 2023-01-24 00:55:29.883107: step: 116/463, loss: 0.47234785556793213 2023-01-24 00:55:30.503622: step: 118/463, loss: 0.5145313739776611 2023-01-24 00:55:31.122511: step: 120/463, loss: 0.2069784253835678 2023-01-24 00:55:31.727208: step: 122/463, loss: 0.498482882976532 2023-01-24 00:55:32.361637: step: 124/463, loss: 0.48430323600769043 2023-01-24 00:55:32.939369: step: 126/463, loss: 0.3850591480731964 2023-01-24 00:55:33.683271: step: 128/463, loss: 0.5562359094619751 2023-01-24 00:55:34.223940: step: 130/463, loss: 0.4546586573123932 2023-01-24 00:55:34.798675: step: 132/463, loss: 0.34977957606315613 2023-01-24 00:55:35.421661: step: 134/463, loss: 0.6470716595649719 2023-01-24 00:55:36.061703: step: 136/463, loss: 0.46885278820991516 2023-01-24 00:55:36.657037: step: 138/463, loss: 0.18356569111347198 2023-01-24 00:55:37.262999: step: 140/463, loss: 0.26443538069725037 2023-01-24 00:55:37.812726: step: 142/463, loss: 0.23886185884475708 2023-01-24 00:55:38.445033: step: 144/463, loss: 0.7617937326431274 2023-01-24 00:55:39.056646: step: 146/463, loss: 1.9936541318893433 2023-01-24 00:55:39.673454: step: 148/463, loss: 0.8742160201072693 2023-01-24 00:55:40.252604: step: 150/463, loss: 0.44535163044929504 2023-01-24 00:55:40.862067: step: 152/463, loss: 0.6916799545288086 2023-01-24 00:55:41.515393: step: 154/463, loss: 1.2999006509780884 2023-01-24 00:55:42.132504: step: 156/463, loss: 0.2688369154930115 2023-01-24 00:55:42.732470: step: 158/463, loss: 0.37048834562301636 2023-01-24 00:55:43.352922: step: 160/463, loss: 0.23033808171749115 2023-01-24 00:55:43.929834: step: 162/463, loss: 1.5172080993652344 2023-01-24 00:55:44.585471: step: 164/463, loss: 1.2180461883544922 2023-01-24 00:55:45.254357: step: 166/463, loss: 1.1592462062835693 2023-01-24 00:55:45.757788: step: 168/463, loss: 0.13061420619487762 2023-01-24 00:55:46.357462: step: 170/463, loss: 0.9254062175750732 2023-01-24 00:55:46.947159: step: 172/463, loss: 0.35999149084091187 2023-01-24 00:55:47.561325: step: 174/463, loss: 0.41956210136413574 2023-01-24 00:55:48.245183: step: 176/463, loss: 0.3925466239452362 2023-01-24 00:55:49.078678: step: 178/463, loss: 0.32362014055252075 2023-01-24 00:55:49.728462: step: 180/463, loss: 0.9651039242744446 2023-01-24 00:55:50.356394: step: 182/463, loss: 0.24384376406669617 2023-01-24 00:55:50.997298: step: 184/463, loss: 0.5158557891845703 2023-01-24 00:55:51.557746: step: 186/463, loss: 0.2707631289958954 2023-01-24 00:55:52.296659: step: 188/463, loss: 0.7794051766395569 2023-01-24 00:55:52.912522: step: 190/463, loss: 0.6750908493995667 2023-01-24 00:55:53.580784: step: 192/463, loss: 0.11341684311628342 2023-01-24 00:55:54.189670: step: 194/463, loss: 0.2782857120037079 2023-01-24 00:55:54.794409: step: 196/463, loss: 0.27160173654556274 2023-01-24 00:55:55.384073: step: 198/463, loss: 0.27452272176742554 2023-01-24 00:55:55.998252: step: 200/463, loss: 2.01969051361084 2023-01-24 00:55:56.593387: step: 202/463, loss: 0.9027014970779419 2023-01-24 00:55:57.226873: step: 204/463, loss: 0.34169432520866394 2023-01-24 00:55:57.822211: step: 206/463, loss: 2.2250516414642334 2023-01-24 00:55:58.457615: step: 208/463, loss: 0.9440475106239319 2023-01-24 00:55:59.102517: step: 210/463, loss: 1.1313639879226685 2023-01-24 00:55:59.749195: step: 212/463, loss: 0.6175208687782288 2023-01-24 00:56:00.364358: step: 214/463, loss: 1.2457197904586792 2023-01-24 00:56:01.015025: step: 216/463, loss: 0.24323886632919312 2023-01-24 00:56:01.640603: step: 218/463, loss: 0.6350215673446655 2023-01-24 00:56:02.251821: step: 220/463, loss: 1.2311522960662842 2023-01-24 00:56:02.867764: step: 222/463, loss: 0.803783118724823 2023-01-24 00:56:03.495583: step: 224/463, loss: 1.0965855121612549 2023-01-24 00:56:04.076715: step: 226/463, loss: 0.19014286994934082 2023-01-24 00:56:04.667389: step: 228/463, loss: 0.5811508297920227 2023-01-24 00:56:05.321593: step: 230/463, loss: 0.5326465964317322 2023-01-24 00:56:05.982814: step: 232/463, loss: 0.28501200675964355 2023-01-24 00:56:06.543032: step: 234/463, loss: 0.42416003346443176 2023-01-24 00:56:07.213423: step: 236/463, loss: 1.0808448791503906 2023-01-24 00:56:07.803644: step: 238/463, loss: 0.6103437542915344 2023-01-24 00:56:08.444107: step: 240/463, loss: 0.3864894509315491 2023-01-24 00:56:09.092603: step: 242/463, loss: 2.4782729148864746 2023-01-24 00:56:09.692085: step: 244/463, loss: 0.5968711376190186 2023-01-24 00:56:10.277316: step: 246/463, loss: 0.20735357701778412 2023-01-24 00:56:10.912973: step: 248/463, loss: 0.8448105454444885 2023-01-24 00:56:11.590377: step: 250/463, loss: 0.664246678352356 2023-01-24 00:56:12.236349: step: 252/463, loss: 0.8777984380722046 2023-01-24 00:56:12.827729: step: 254/463, loss: 0.9847599864006042 2023-01-24 00:56:13.486736: step: 256/463, loss: 0.380230188369751 2023-01-24 00:56:14.080934: step: 258/463, loss: 0.5467402338981628 2023-01-24 00:56:14.700149: step: 260/463, loss: 1.7148702144622803 2023-01-24 00:56:15.307891: step: 262/463, loss: 0.3121128976345062 2023-01-24 00:56:15.930356: step: 264/463, loss: 0.3450508117675781 2023-01-24 00:56:16.609159: step: 266/463, loss: 0.5480004549026489 2023-01-24 00:56:17.142338: step: 268/463, loss: 0.624117910861969 2023-01-24 00:56:17.721092: step: 270/463, loss: 1.108161211013794 2023-01-24 00:56:18.383252: step: 272/463, loss: 1.180019497871399 2023-01-24 00:56:18.974615: step: 274/463, loss: 0.22811348736286163 2023-01-24 00:56:19.562365: step: 276/463, loss: 0.36148181557655334 2023-01-24 00:56:20.146103: step: 278/463, loss: 0.416695773601532 2023-01-24 00:56:20.891374: step: 280/463, loss: 0.29900628328323364 2023-01-24 00:56:21.573914: step: 282/463, loss: 0.7144445180892944 2023-01-24 00:56:22.189271: step: 284/463, loss: 0.35956665873527527 2023-01-24 00:56:22.884044: step: 286/463, loss: 0.7016297578811646 2023-01-24 00:56:23.535555: step: 288/463, loss: 0.8132688999176025 2023-01-24 00:56:24.165857: step: 290/463, loss: 0.4136676490306854 2023-01-24 00:56:24.878477: step: 292/463, loss: 0.5401250123977661 2023-01-24 00:56:25.474878: step: 294/463, loss: 0.4200729429721832 2023-01-24 00:56:26.114498: step: 296/463, loss: 0.7275921106338501 2023-01-24 00:56:26.744927: step: 298/463, loss: 0.9649881720542908 2023-01-24 00:56:27.384389: step: 300/463, loss: 0.6399365067481995 2023-01-24 00:56:28.009986: step: 302/463, loss: 0.40402311086654663 2023-01-24 00:56:28.668273: step: 304/463, loss: 0.5088135600090027 2023-01-24 00:56:29.273540: step: 306/463, loss: 0.20988470315933228 2023-01-24 00:56:29.875764: step: 308/463, loss: 0.7641887068748474 2023-01-24 00:56:30.565858: step: 310/463, loss: 0.22165605425834656 2023-01-24 00:56:31.220466: step: 312/463, loss: 0.49857211112976074 2023-01-24 00:56:31.875818: step: 314/463, loss: 0.6122472286224365 2023-01-24 00:56:32.486622: step: 316/463, loss: 0.17926014959812164 2023-01-24 00:56:33.107135: step: 318/463, loss: 0.33741241693496704 2023-01-24 00:56:33.810145: step: 320/463, loss: 0.3826013207435608 2023-01-24 00:56:34.477983: step: 322/463, loss: 0.3800063133239746 2023-01-24 00:56:35.151549: step: 324/463, loss: 0.9585512280464172 2023-01-24 00:56:35.765659: step: 326/463, loss: 0.7558473348617554 2023-01-24 00:56:36.419603: step: 328/463, loss: 0.634215235710144 2023-01-24 00:56:37.033378: step: 330/463, loss: 0.32856863737106323 2023-01-24 00:56:37.662421: step: 332/463, loss: 0.7975993752479553 2023-01-24 00:56:38.275197: step: 334/463, loss: 0.4296196401119232 2023-01-24 00:56:38.894881: step: 336/463, loss: 0.7635879516601562 2023-01-24 00:56:39.496542: step: 338/463, loss: 0.6164973974227905 2023-01-24 00:56:40.079344: step: 340/463, loss: 0.8857709169387817 2023-01-24 00:56:40.646356: step: 342/463, loss: 0.3926467001438141 2023-01-24 00:56:41.280776: step: 344/463, loss: 0.7101549506187439 2023-01-24 00:56:41.909150: step: 346/463, loss: 0.46895211935043335 2023-01-24 00:56:42.563352: step: 348/463, loss: 0.11919423192739487 2023-01-24 00:56:43.281681: step: 350/463, loss: 1.9104876518249512 2023-01-24 00:56:44.005956: step: 352/463, loss: 1.044557809829712 2023-01-24 00:56:44.629014: step: 354/463, loss: 0.2168923020362854 2023-01-24 00:56:45.210117: step: 356/463, loss: 0.24679605662822723 2023-01-24 00:56:45.925545: step: 358/463, loss: 0.14614443480968475 2023-01-24 00:56:46.487368: step: 360/463, loss: 0.4004991054534912 2023-01-24 00:56:47.099773: step: 362/463, loss: 1.5253599882125854 2023-01-24 00:56:47.639088: step: 364/463, loss: 1.3745557069778442 2023-01-24 00:56:48.299281: step: 366/463, loss: 0.42011359333992004 2023-01-24 00:56:48.908672: step: 368/463, loss: 0.2747398912906647 2023-01-24 00:56:49.551421: step: 370/463, loss: 0.39870011806488037 2023-01-24 00:56:50.130655: step: 372/463, loss: 1.0600340366363525 2023-01-24 00:56:50.735143: step: 374/463, loss: 0.1595984846353531 2023-01-24 00:56:51.323060: step: 376/463, loss: 1.8068654537200928 2023-01-24 00:56:51.845298: step: 378/463, loss: 0.5117876529693604 2023-01-24 00:56:52.482834: step: 380/463, loss: 1.6031677722930908 2023-01-24 00:56:53.080287: step: 382/463, loss: 0.6070787906646729 2023-01-24 00:56:53.673551: step: 384/463, loss: 0.6294779777526855 2023-01-24 00:56:54.325488: step: 386/463, loss: 0.9038749933242798 2023-01-24 00:56:54.990185: step: 388/463, loss: 0.3743821084499359 2023-01-24 00:56:55.593334: step: 390/463, loss: 0.12668579816818237 2023-01-24 00:56:56.253843: step: 392/463, loss: 0.9490168690681458 2023-01-24 00:56:56.833595: step: 394/463, loss: 0.35142582654953003 2023-01-24 00:56:57.410334: step: 396/463, loss: 0.22487671673297882 2023-01-24 00:56:58.060989: step: 398/463, loss: 0.4317098557949066 2023-01-24 00:56:58.652256: step: 400/463, loss: 1.8146450519561768 2023-01-24 00:56:59.233757: step: 402/463, loss: 0.9872174263000488 2023-01-24 00:56:59.893809: step: 404/463, loss: 0.7323670387268066 2023-01-24 00:57:00.513963: step: 406/463, loss: 6.090478897094727 2023-01-24 00:57:01.215324: step: 408/463, loss: 0.7719764709472656 2023-01-24 00:57:01.814133: step: 410/463, loss: 0.7804189920425415 2023-01-24 00:57:02.382669: step: 412/463, loss: 0.7377474904060364 2023-01-24 00:57:02.977747: step: 414/463, loss: 0.21471774578094482 2023-01-24 00:57:03.617248: step: 416/463, loss: 0.848105788230896 2023-01-24 00:57:04.254832: step: 418/463, loss: 3.317871570587158 2023-01-24 00:57:04.885253: step: 420/463, loss: 0.1999308168888092 2023-01-24 00:57:05.557505: step: 422/463, loss: 0.824564516544342 2023-01-24 00:57:06.209604: step: 424/463, loss: 1.0665363073349 2023-01-24 00:57:06.843907: step: 426/463, loss: 0.14650654792785645 2023-01-24 00:57:07.518081: step: 428/463, loss: 0.21111030876636505 2023-01-24 00:57:08.156968: step: 430/463, loss: 2.903256416320801 2023-01-24 00:57:08.794968: step: 432/463, loss: 0.9318215250968933 2023-01-24 00:57:09.406772: step: 434/463, loss: 1.2392979860305786 2023-01-24 00:57:10.123524: step: 436/463, loss: 0.7258490324020386 2023-01-24 00:57:10.747837: step: 438/463, loss: 0.2884543538093567 2023-01-24 00:57:11.342268: step: 440/463, loss: 0.43020322918891907 2023-01-24 00:57:11.962226: step: 442/463, loss: 1.462837815284729 2023-01-24 00:57:12.583523: step: 444/463, loss: 0.6640911102294922 2023-01-24 00:57:13.240042: step: 446/463, loss: 0.6311531066894531 2023-01-24 00:57:13.821145: step: 448/463, loss: 0.193573459982872 2023-01-24 00:57:14.450975: step: 450/463, loss: 1.1072381734848022 2023-01-24 00:57:15.053870: step: 452/463, loss: 0.290145605802536 2023-01-24 00:57:15.654852: step: 454/463, loss: 1.526018500328064 2023-01-24 00:57:16.278642: step: 456/463, loss: 0.20943063497543335 2023-01-24 00:57:16.889380: step: 458/463, loss: 0.27499303221702576 2023-01-24 00:57:17.510756: step: 460/463, loss: 0.22853600978851318 2023-01-24 00:57:18.086333: step: 462/463, loss: 0.639344334602356 2023-01-24 00:57:18.698505: step: 464/463, loss: 1.0266096591949463 2023-01-24 00:57:19.286992: step: 466/463, loss: 0.19507017731666565 2023-01-24 00:57:19.969417: step: 468/463, loss: 2.4745426177978516 2023-01-24 00:57:20.528587: step: 470/463, loss: 0.6674564480781555 2023-01-24 00:57:21.177288: step: 472/463, loss: 1.1224582195281982 2023-01-24 00:57:21.842768: step: 474/463, loss: 0.7425593733787537 2023-01-24 00:57:22.434926: step: 476/463, loss: 0.6810380816459656 2023-01-24 00:57:23.071002: step: 478/463, loss: 0.4650612473487854 2023-01-24 00:57:23.619167: step: 480/463, loss: 0.6863660216331482 2023-01-24 00:57:24.206902: step: 482/463, loss: 2.5799264907836914 2023-01-24 00:57:24.817004: step: 484/463, loss: 0.9625426530838013 2023-01-24 00:57:25.471083: step: 486/463, loss: 0.688912570476532 2023-01-24 00:57:26.067019: step: 488/463, loss: 1.9928020238876343 2023-01-24 00:57:26.679988: step: 490/463, loss: 1.1922327280044556 2023-01-24 00:57:27.288796: step: 492/463, loss: 0.25166401267051697 2023-01-24 00:57:27.905489: step: 494/463, loss: 1.137066125869751 2023-01-24 00:57:28.564773: step: 496/463, loss: 0.8249053955078125 2023-01-24 00:57:29.234710: step: 498/463, loss: 0.8874479532241821 2023-01-24 00:57:29.866532: step: 500/463, loss: 2.1045594215393066 2023-01-24 00:57:30.527526: step: 502/463, loss: 0.6758328080177307 2023-01-24 00:57:31.141736: step: 504/463, loss: 0.20468348264694214 2023-01-24 00:57:31.745376: step: 506/463, loss: 0.28430312871932983 2023-01-24 00:57:32.381926: step: 508/463, loss: 0.7090197801589966 2023-01-24 00:57:33.133084: step: 510/463, loss: 0.36517858505249023 2023-01-24 00:57:33.722673: step: 512/463, loss: 0.5751896500587463 2023-01-24 00:57:34.435014: step: 514/463, loss: 0.4090133607387543 2023-01-24 00:57:35.064971: step: 516/463, loss: 2.220255136489868 2023-01-24 00:57:35.767547: step: 518/463, loss: 1.2079318761825562 2023-01-24 00:57:36.379212: step: 520/463, loss: 0.23231106996536255 2023-01-24 00:57:36.985003: step: 522/463, loss: 0.25605931878089905 2023-01-24 00:57:37.694053: step: 524/463, loss: 0.45129382610321045 2023-01-24 00:57:38.387288: step: 526/463, loss: 0.5611875653266907 2023-01-24 00:57:39.034008: step: 528/463, loss: 0.38233932852745056 2023-01-24 00:57:39.643345: step: 530/463, loss: 0.25171276926994324 2023-01-24 00:57:40.253519: step: 532/463, loss: 0.30157628655433655 2023-01-24 00:57:40.874696: step: 534/463, loss: 0.8644418120384216 2023-01-24 00:57:41.487055: step: 536/463, loss: 4.413722991943359 2023-01-24 00:57:42.067203: step: 538/463, loss: 1.0219889879226685 2023-01-24 00:57:42.694563: step: 540/463, loss: 0.2469603419303894 2023-01-24 00:57:43.301217: step: 542/463, loss: 1.5006029605865479 2023-01-24 00:57:43.960502: step: 544/463, loss: 0.5080916285514832 2023-01-24 00:57:44.542226: step: 546/463, loss: 0.5418081879615784 2023-01-24 00:57:45.167014: step: 548/463, loss: 0.19782106578350067 2023-01-24 00:57:45.880247: step: 550/463, loss: 0.2924264967441559 2023-01-24 00:57:46.494642: step: 552/463, loss: 0.10219843685626984 2023-01-24 00:57:47.151784: step: 554/463, loss: 0.3024296760559082 2023-01-24 00:57:47.693747: step: 556/463, loss: 0.7373350262641907 2023-01-24 00:57:48.245753: step: 558/463, loss: 0.4936484396457672 2023-01-24 00:57:48.936913: step: 560/463, loss: 1.1446311473846436 2023-01-24 00:57:49.627439: step: 562/463, loss: 1.0378072261810303 2023-01-24 00:57:50.249648: step: 564/463, loss: 0.3615554869174957 2023-01-24 00:57:50.888211: step: 566/463, loss: 0.6427013278007507 2023-01-24 00:57:51.525767: step: 568/463, loss: 0.5062682628631592 2023-01-24 00:57:52.108429: step: 570/463, loss: 1.3549234867095947 2023-01-24 00:57:52.725494: step: 572/463, loss: 1.0127463340759277 2023-01-24 00:57:53.296857: step: 574/463, loss: 1.4146630764007568 2023-01-24 00:57:53.923892: step: 576/463, loss: 1.0919172763824463 2023-01-24 00:57:54.556894: step: 578/463, loss: 0.2527444362640381 2023-01-24 00:57:55.152854: step: 580/463, loss: 1.1812858581542969 2023-01-24 00:57:55.731572: step: 582/463, loss: 1.206566572189331 2023-01-24 00:57:56.379050: step: 584/463, loss: 0.5365237593650818 2023-01-24 00:57:57.075098: step: 586/463, loss: 1.8441133499145508 2023-01-24 00:57:57.679940: step: 588/463, loss: 0.7697260975837708 2023-01-24 00:57:58.279304: step: 590/463, loss: 0.5550885200500488 2023-01-24 00:57:58.923869: step: 592/463, loss: 0.28043198585510254 2023-01-24 00:57:59.536978: step: 594/463, loss: 0.8145703077316284 2023-01-24 00:58:00.205139: step: 596/463, loss: 1.731743574142456 2023-01-24 00:58:00.887050: step: 598/463, loss: 0.29779335856437683 2023-01-24 00:58:01.432333: step: 600/463, loss: 0.8340661525726318 2023-01-24 00:58:02.100240: step: 602/463, loss: 0.5743610858917236 2023-01-24 00:58:02.758401: step: 604/463, loss: 0.8369799256324768 2023-01-24 00:58:03.293102: step: 606/463, loss: 1.1558184623718262 2023-01-24 00:58:03.923843: step: 608/463, loss: 1.0623866319656372 2023-01-24 00:58:04.573078: step: 610/463, loss: 0.2867727279663086 2023-01-24 00:58:05.202270: step: 612/463, loss: 0.45477595925331116 2023-01-24 00:58:05.763805: step: 614/463, loss: 0.4281424582004547 2023-01-24 00:58:06.457319: step: 616/463, loss: 0.7719258666038513 2023-01-24 00:58:07.113004: step: 618/463, loss: 0.3491917550563812 2023-01-24 00:58:07.719505: step: 620/463, loss: 1.0642414093017578 2023-01-24 00:58:08.359877: step: 622/463, loss: 0.4791565537452698 2023-01-24 00:58:08.963117: step: 624/463, loss: 0.6672351956367493 2023-01-24 00:58:09.558508: step: 626/463, loss: 0.7634481191635132 2023-01-24 00:58:10.155141: step: 628/463, loss: 0.7113901376724243 2023-01-24 00:58:10.778996: step: 630/463, loss: 0.681832492351532 2023-01-24 00:58:11.464505: step: 632/463, loss: 0.7508941292762756 2023-01-24 00:58:12.066525: step: 634/463, loss: 0.2895006239414215 2023-01-24 00:58:12.706347: step: 636/463, loss: 1.1713721752166748 2023-01-24 00:58:13.353489: step: 638/463, loss: 0.4624900817871094 2023-01-24 00:58:13.988943: step: 640/463, loss: 1.8934876918792725 2023-01-24 00:58:14.608477: step: 642/463, loss: 1.1910455226898193 2023-01-24 00:58:15.308475: step: 644/463, loss: 1.3675522804260254 2023-01-24 00:58:15.898761: step: 646/463, loss: 1.548194169998169 2023-01-24 00:58:16.447985: step: 648/463, loss: 0.8450793623924255 2023-01-24 00:58:17.078280: step: 650/463, loss: 0.25081393122673035 2023-01-24 00:58:17.696899: step: 652/463, loss: 0.955899178981781 2023-01-24 00:58:18.277046: step: 654/463, loss: 0.3193843364715576 2023-01-24 00:58:18.817145: step: 656/463, loss: 0.5525912046432495 2023-01-24 00:58:19.435496: step: 658/463, loss: 0.24562543630599976 2023-01-24 00:58:20.065464: step: 660/463, loss: 0.27373450994491577 2023-01-24 00:58:20.634707: step: 662/463, loss: 0.8397605419158936 2023-01-24 00:58:21.291344: step: 664/463, loss: 0.7866261005401611 2023-01-24 00:58:21.971112: step: 666/463, loss: 1.2448163032531738 2023-01-24 00:58:22.591744: step: 668/463, loss: 0.3449970781803131 2023-01-24 00:58:23.218546: step: 670/463, loss: 0.33828234672546387 2023-01-24 00:58:23.834568: step: 672/463, loss: 1.2453911304473877 2023-01-24 00:58:24.484125: step: 674/463, loss: 0.3203742504119873 2023-01-24 00:58:25.128765: step: 676/463, loss: 1.2022063732147217 2023-01-24 00:58:25.792216: step: 678/463, loss: 1.241389513015747 2023-01-24 00:58:26.452671: step: 680/463, loss: 0.9188660383224487 2023-01-24 00:58:27.124264: step: 682/463, loss: 0.8007543087005615 2023-01-24 00:58:27.772753: step: 684/463, loss: 0.22888749837875366 2023-01-24 00:58:28.335721: step: 686/463, loss: 0.21273814141750336 2023-01-24 00:58:28.931702: step: 688/463, loss: 0.23064613342285156 2023-01-24 00:58:29.542679: step: 690/463, loss: 0.28576409816741943 2023-01-24 00:58:30.222168: step: 692/463, loss: 0.4080398678779602 2023-01-24 00:58:30.821199: step: 694/463, loss: 0.7413073778152466 2023-01-24 00:58:31.470579: step: 696/463, loss: 1.1279393434524536 2023-01-24 00:58:32.119023: step: 698/463, loss: 1.735126256942749 2023-01-24 00:58:32.731219: step: 700/463, loss: 0.7060929536819458 2023-01-24 00:58:33.356977: step: 702/463, loss: 0.30307111144065857 2023-01-24 00:58:33.929559: step: 704/463, loss: 0.21976308524608612 2023-01-24 00:58:34.538838: step: 706/463, loss: 0.18529047071933746 2023-01-24 00:58:35.193890: step: 708/463, loss: 2.033637523651123 2023-01-24 00:58:35.885800: step: 710/463, loss: 2.0886316299438477 2023-01-24 00:58:36.489570: step: 712/463, loss: 0.553626298904419 2023-01-24 00:58:37.075273: step: 714/463, loss: 0.3453107178211212 2023-01-24 00:58:37.683185: step: 716/463, loss: 1.1461572647094727 2023-01-24 00:58:38.352702: step: 718/463, loss: 0.5042234659194946 2023-01-24 00:58:38.951069: step: 720/463, loss: 0.46316826343536377 2023-01-24 00:58:39.578920: step: 722/463, loss: 0.5107334852218628 2023-01-24 00:58:40.195195: step: 724/463, loss: 1.0452358722686768 2023-01-24 00:58:40.811973: step: 726/463, loss: 1.10407292842865 2023-01-24 00:58:41.508281: step: 728/463, loss: 0.6831986904144287 2023-01-24 00:58:42.151016: step: 730/463, loss: 0.2722459137439728 2023-01-24 00:58:42.881497: step: 732/463, loss: 1.53357994556427 2023-01-24 00:58:43.482344: step: 734/463, loss: 0.6353361010551453 2023-01-24 00:58:44.085356: step: 736/463, loss: 0.25523871183395386 2023-01-24 00:58:44.715662: step: 738/463, loss: 6.125033855438232 2023-01-24 00:58:45.338650: step: 740/463, loss: 0.3599884510040283 2023-01-24 00:58:45.970506: step: 742/463, loss: 1.2175943851470947 2023-01-24 00:58:46.620300: step: 744/463, loss: 0.14675307273864746 2023-01-24 00:58:47.247665: step: 746/463, loss: 0.393962562084198 2023-01-24 00:58:47.882272: step: 748/463, loss: 1.2193959951400757 2023-01-24 00:58:48.571280: step: 750/463, loss: 0.8211663961410522 2023-01-24 00:58:49.182631: step: 752/463, loss: 1.9203747510910034 2023-01-24 00:58:49.816555: step: 754/463, loss: 1.1069111824035645 2023-01-24 00:58:50.400537: step: 756/463, loss: 0.5836355686187744 2023-01-24 00:58:51.105202: step: 758/463, loss: 1.9634008407592773 2023-01-24 00:58:51.704364: step: 760/463, loss: 0.6439905166625977 2023-01-24 00:58:52.366750: step: 762/463, loss: 0.9541369676589966 2023-01-24 00:58:52.987245: step: 764/463, loss: 0.3488662540912628 2023-01-24 00:58:53.669805: step: 766/463, loss: 0.5070546269416809 2023-01-24 00:58:54.242963: step: 768/463, loss: 1.103134036064148 2023-01-24 00:58:54.859051: step: 770/463, loss: 1.179733157157898 2023-01-24 00:58:55.472711: step: 772/463, loss: 0.7735854387283325 2023-01-24 00:58:56.079884: step: 774/463, loss: 0.7130111455917358 2023-01-24 00:58:56.737608: step: 776/463, loss: 0.6790408492088318 2023-01-24 00:58:57.398403: step: 778/463, loss: 1.2636682987213135 2023-01-24 00:58:58.051164: step: 780/463, loss: 4.13569974899292 2023-01-24 00:58:58.704521: step: 782/463, loss: 0.5796583890914917 2023-01-24 00:58:59.321846: step: 784/463, loss: 0.31979137659072876 2023-01-24 00:58:59.988319: step: 786/463, loss: 3.4319112300872803 2023-01-24 00:59:00.543541: step: 788/463, loss: 1.533059000968933 2023-01-24 00:59:01.137639: step: 790/463, loss: 0.20900431275367737 2023-01-24 00:59:01.763113: step: 792/463, loss: 0.38969290256500244 2023-01-24 00:59:02.308165: step: 794/463, loss: 0.29645660519599915 2023-01-24 00:59:02.856267: step: 796/463, loss: 0.5455203056335449 2023-01-24 00:59:03.566305: step: 798/463, loss: 0.7012280821800232 2023-01-24 00:59:04.238288: step: 800/463, loss: 0.26013681292533875 2023-01-24 00:59:04.891422: step: 802/463, loss: 0.6615736484527588 2023-01-24 00:59:05.453149: step: 804/463, loss: 0.3303930163383484 2023-01-24 00:59:06.025699: step: 806/463, loss: 4.188092231750488 2023-01-24 00:59:06.621852: step: 808/463, loss: 0.9565754532814026 2023-01-24 00:59:07.238721: step: 810/463, loss: 0.5150042772293091 2023-01-24 00:59:07.894302: step: 812/463, loss: 0.6861979365348816 2023-01-24 00:59:08.533881: step: 814/463, loss: 2.7507095336914062 2023-01-24 00:59:09.266517: step: 816/463, loss: 0.6653714179992676 2023-01-24 00:59:09.864244: step: 818/463, loss: 0.8517414927482605 2023-01-24 00:59:10.521732: step: 820/463, loss: 0.7769732475280762 2023-01-24 00:59:11.132643: step: 822/463, loss: 0.9031229019165039 2023-01-24 00:59:11.766466: step: 824/463, loss: 0.7348657846450806 2023-01-24 00:59:12.371265: step: 826/463, loss: 0.6733543872833252 2023-01-24 00:59:12.933844: step: 828/463, loss: 1.0673044919967651 2023-01-24 00:59:13.538660: step: 830/463, loss: 0.6395003199577332 2023-01-24 00:59:14.118505: step: 832/463, loss: 0.4970768094062805 2023-01-24 00:59:14.772931: step: 834/463, loss: 0.8308888673782349 2023-01-24 00:59:15.428414: step: 836/463, loss: 0.692698061466217 2023-01-24 00:59:16.081632: step: 838/463, loss: 2.8874306678771973 2023-01-24 00:59:16.817731: step: 840/463, loss: 0.8500898480415344 2023-01-24 00:59:17.463797: step: 842/463, loss: 0.9425470232963562 2023-01-24 00:59:18.089415: step: 844/463, loss: 0.969414234161377 2023-01-24 00:59:18.687081: step: 846/463, loss: 0.3752686083316803 2023-01-24 00:59:19.290819: step: 848/463, loss: 0.12484701722860336 2023-01-24 00:59:20.003018: step: 850/463, loss: 1.2174444198608398 2023-01-24 00:59:20.643452: step: 852/463, loss: 0.17923931777477264 2023-01-24 00:59:21.260905: step: 854/463, loss: 0.9467581510543823 2023-01-24 00:59:21.842100: step: 856/463, loss: 0.23574116826057434 2023-01-24 00:59:22.467054: step: 858/463, loss: 0.2232736200094223 2023-01-24 00:59:23.046647: step: 860/463, loss: 0.3482277989387512 2023-01-24 00:59:23.608885: step: 862/463, loss: 0.41503259539604187 2023-01-24 00:59:24.221553: step: 864/463, loss: 1.008038878440857 2023-01-24 00:59:24.854298: step: 866/463, loss: 0.36273622512817383 2023-01-24 00:59:25.419990: step: 868/463, loss: 0.32585325837135315 2023-01-24 00:59:26.024493: step: 870/463, loss: 0.16357232630252838 2023-01-24 00:59:26.628998: step: 872/463, loss: 2.131190061569214 2023-01-24 00:59:27.316152: step: 874/463, loss: 0.18750214576721191 2023-01-24 00:59:27.968550: step: 876/463, loss: 0.31763455271720886 2023-01-24 00:59:28.580000: step: 878/463, loss: 0.5719870924949646 2023-01-24 00:59:29.217188: step: 880/463, loss: 0.6472684741020203 2023-01-24 00:59:29.870604: step: 882/463, loss: 0.4398764371871948 2023-01-24 00:59:30.531177: step: 884/463, loss: 0.8721925616264343 2023-01-24 00:59:31.231182: step: 886/463, loss: 1.2925094366073608 2023-01-24 00:59:31.821392: step: 888/463, loss: 0.8126130700111389 2023-01-24 00:59:32.464017: step: 890/463, loss: 1.2156544923782349 2023-01-24 00:59:33.077027: step: 892/463, loss: 0.3512120842933655 2023-01-24 00:59:33.739049: step: 894/463, loss: 0.27437350153923035 2023-01-24 00:59:34.348712: step: 896/463, loss: 1.5544650554656982 2023-01-24 00:59:35.075932: step: 898/463, loss: 0.762451708316803 2023-01-24 00:59:35.704676: step: 900/463, loss: 11.541376113891602 2023-01-24 00:59:36.369040: step: 902/463, loss: 0.3377987742424011 2023-01-24 00:59:37.045410: step: 904/463, loss: 0.49180614948272705 2023-01-24 00:59:37.733320: step: 906/463, loss: 0.9053537249565125 2023-01-24 00:59:38.373704: step: 908/463, loss: 0.40473267436027527 2023-01-24 00:59:38.990822: step: 910/463, loss: 1.5902875661849976 2023-01-24 00:59:39.597023: step: 912/463, loss: 1.7497566938400269 2023-01-24 00:59:40.215877: step: 914/463, loss: 1.186607003211975 2023-01-24 00:59:40.846284: step: 916/463, loss: 2.520381212234497 2023-01-24 00:59:41.447472: step: 918/463, loss: 1.9548581838607788 2023-01-24 00:59:42.143932: step: 920/463, loss: 0.9713904857635498 2023-01-24 00:59:42.713746: step: 922/463, loss: 0.6692941188812256 2023-01-24 00:59:43.314640: step: 924/463, loss: 0.3704962730407715 2023-01-24 00:59:43.918891: step: 926/463, loss: 0.5253973603248596 ================================================== Loss: 0.801 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3213281368676895, 'r': 0.27629351162486937, 'f1': 0.29711400028091856}, 'combined': 0.2189261054701505, 'epoch': 6} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3571573951544391, 'r': 0.3027126702833356, 'f1': 0.32768896321100355}, 'combined': 0.23053494899266583, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34103525591464356, 'r': 0.28807144723093, 'f1': 0.3123238688663882}, 'combined': 0.23013337705944392, 'epoch': 6} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3529448169755959, 'r': 0.29699015086970876, 'f1': 0.32255883935518037}, 'combined': 0.22901677594217806, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3343606651376147, 'r': 0.2761008522727273, 'f1': 0.3024507261410788}, 'combined': 0.22285842978816334, 'epoch': 6} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35923661079685915, 'r': 0.2875145673160564, 'f1': 0.3193987493574011}, 'combined': 0.22677311204375478, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29059829059829057, 'r': 0.32380952380952377, 'f1': 0.3063063063063063}, 'combined': 0.20420420420420418, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2265625, 'r': 0.31521739130434784, 'f1': 0.2636363636363636}, 'combined': 0.1318181818181818, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6388888888888888, 'r': 0.19827586206896552, 'f1': 0.3026315789473684}, 'combined': 0.20175438596491227, 'epoch': 6} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3296252358752359, 'r': 0.29272222085315064, 'f1': 0.3100796188735887}, 'combined': 0.22847971917001272, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.36010427991126526, 'r': 0.2770515086523572, 'f1': 0.3131649615755617}, 'combined': 0.22031705839486754, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36858974358974356, 'r': 0.2738095238095238, 'f1': 0.314207650273224}, 'combined': 0.209471766848816, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3064311346548189, 'r': 0.27619504546686713, 'f1': 0.2905285208803173}, 'combined': 0.2140736469644443, 'epoch': 4} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35963387992050416, 'r': 0.24527593026724218, 'f1': 0.29164524353057414}, 'combined': 0.20706812290670762, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34615384615384615, 'r': 0.391304347826087, 'f1': 0.36734693877551017}, 'combined': 0.18367346938775508, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3343606651376147, 'r': 0.2761008522727273, 'f1': 0.3024507261410788}, 'combined': 0.22285842978816334, 'epoch': 6} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35923661079685915, 'r': 0.2875145673160564, 'f1': 0.3193987493574011}, 'combined': 0.22677311204375478, 'epoch': 6} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6388888888888888, 'r': 0.19827586206896552, 'f1': 0.3026315789473684}, 'combined': 0.20175438596491227, 'epoch': 6} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:02:26.810042: step: 2/463, loss: 0.2924317717552185 2023-01-24 01:02:27.465353: step: 4/463, loss: 0.6459670662879944 2023-01-24 01:02:28.142816: step: 6/463, loss: 1.7969746589660645 2023-01-24 01:02:28.809791: step: 8/463, loss: 0.17514309287071228 2023-01-24 01:02:29.392449: step: 10/463, loss: 0.15155097842216492 2023-01-24 01:02:29.986670: step: 12/463, loss: 0.2682904005050659 2023-01-24 01:02:30.653549: step: 14/463, loss: 0.514197587966919 2023-01-24 01:02:31.416658: step: 16/463, loss: 0.3550119400024414 2023-01-24 01:02:32.010367: step: 18/463, loss: 0.21410077810287476 2023-01-24 01:02:32.718269: step: 20/463, loss: 1.5201659202575684 2023-01-24 01:02:33.401716: step: 22/463, loss: 1.6097393035888672 2023-01-24 01:02:34.077807: step: 24/463, loss: 0.6049212217330933 2023-01-24 01:02:34.706424: step: 26/463, loss: 0.45338720083236694 2023-01-24 01:02:35.351743: step: 28/463, loss: 0.5114819407463074 2023-01-24 01:02:35.990498: step: 30/463, loss: 0.8023658990859985 2023-01-24 01:02:36.626216: step: 32/463, loss: 0.6443910598754883 2023-01-24 01:02:37.283713: step: 34/463, loss: 0.30315569043159485 2023-01-24 01:02:37.879880: step: 36/463, loss: 0.40705516934394836 2023-01-24 01:02:38.473233: step: 38/463, loss: 0.3118268847465515 2023-01-24 01:02:39.097284: step: 40/463, loss: 0.2434554100036621 2023-01-24 01:02:39.713203: step: 42/463, loss: 0.0942670926451683 2023-01-24 01:02:40.287207: step: 44/463, loss: 0.23523245751857758 2023-01-24 01:02:40.889270: step: 46/463, loss: 0.30537816882133484 2023-01-24 01:02:41.558707: step: 48/463, loss: 1.551579236984253 2023-01-24 01:02:42.155151: step: 50/463, loss: 0.24812689423561096 2023-01-24 01:02:42.821150: step: 52/463, loss: 0.39288198947906494 2023-01-24 01:02:43.445609: step: 54/463, loss: 1.0936028957366943 2023-01-24 01:02:44.058472: step: 56/463, loss: 0.16987797617912292 2023-01-24 01:02:44.677290: step: 58/463, loss: 0.12082823365926743 2023-01-24 01:02:45.286181: step: 60/463, loss: 0.13755959272384644 2023-01-24 01:02:45.865636: step: 62/463, loss: 0.9608087539672852 2023-01-24 01:02:46.493648: step: 64/463, loss: 0.2369552105665207 2023-01-24 01:02:47.033341: step: 66/463, loss: 0.25196751952171326 2023-01-24 01:02:47.650503: step: 68/463, loss: 0.5326698422431946 2023-01-24 01:02:48.250873: step: 70/463, loss: 0.6153982877731323 2023-01-24 01:02:48.915304: step: 72/463, loss: 0.3046908974647522 2023-01-24 01:02:49.591120: step: 74/463, loss: 0.5395478010177612 2023-01-24 01:02:50.214308: step: 76/463, loss: 0.2049635499715805 2023-01-24 01:02:50.874562: step: 78/463, loss: 0.131704181432724 2023-01-24 01:02:51.500642: step: 80/463, loss: 0.32136693596839905 2023-01-24 01:02:52.091200: step: 82/463, loss: 1.7959011793136597 2023-01-24 01:02:52.709021: step: 84/463, loss: 0.38815051317214966 2023-01-24 01:02:53.309778: step: 86/463, loss: 0.24130097031593323 2023-01-24 01:02:53.965893: step: 88/463, loss: 0.11695665121078491 2023-01-24 01:02:54.615778: step: 90/463, loss: 0.9458560347557068 2023-01-24 01:02:55.257846: step: 92/463, loss: 0.2658766806125641 2023-01-24 01:02:55.944951: step: 94/463, loss: 0.1312265396118164 2023-01-24 01:02:56.594395: step: 96/463, loss: 0.40745365619659424 2023-01-24 01:02:57.170555: step: 98/463, loss: 0.8665324449539185 2023-01-24 01:02:57.797394: step: 100/463, loss: 0.7897643446922302 2023-01-24 01:02:58.413599: step: 102/463, loss: 0.2308054119348526 2023-01-24 01:02:59.015768: step: 104/463, loss: 1.6778600215911865 2023-01-24 01:02:59.636632: step: 106/463, loss: 0.43516474962234497 2023-01-24 01:03:00.239405: step: 108/463, loss: 0.09825699776411057 2023-01-24 01:03:00.870402: step: 110/463, loss: 0.21485744416713715 2023-01-24 01:03:01.511194: step: 112/463, loss: 0.5308113694190979 2023-01-24 01:03:02.133969: step: 114/463, loss: 0.27296721935272217 2023-01-24 01:03:02.741103: step: 116/463, loss: 0.19981759786605835 2023-01-24 01:03:03.330116: step: 118/463, loss: 0.6389927268028259 2023-01-24 01:03:04.012455: step: 120/463, loss: 0.2578306198120117 2023-01-24 01:03:04.681784: step: 122/463, loss: 0.1808868795633316 2023-01-24 01:03:05.360033: step: 124/463, loss: 0.21032492816448212 2023-01-24 01:03:05.910976: step: 126/463, loss: 0.48566532135009766 2023-01-24 01:03:06.638849: step: 128/463, loss: 0.5067209601402283 2023-01-24 01:03:07.346198: step: 130/463, loss: 0.521309494972229 2023-01-24 01:03:07.951029: step: 132/463, loss: 0.28699249029159546 2023-01-24 01:03:08.558321: step: 134/463, loss: 0.4059025049209595 2023-01-24 01:03:09.164802: step: 136/463, loss: 0.6112493276596069 2023-01-24 01:03:09.832171: step: 138/463, loss: 6.905817985534668 2023-01-24 01:03:10.441857: step: 140/463, loss: 0.8701781034469604 2023-01-24 01:03:11.095544: step: 142/463, loss: 0.5060449242591858 2023-01-24 01:03:11.743822: step: 144/463, loss: 1.37553870677948 2023-01-24 01:03:12.428658: step: 146/463, loss: 0.23579971492290497 2023-01-24 01:03:13.004257: step: 148/463, loss: 0.6357467770576477 2023-01-24 01:03:13.632743: step: 150/463, loss: 0.4640912413597107 2023-01-24 01:03:14.235363: step: 152/463, loss: 1.3747117519378662 2023-01-24 01:03:14.856892: step: 154/463, loss: 0.5280831456184387 2023-01-24 01:03:15.528629: step: 156/463, loss: 0.7836452126502991 2023-01-24 01:03:16.158792: step: 158/463, loss: 1.5843793153762817 2023-01-24 01:03:16.765850: step: 160/463, loss: 1.2575896978378296 2023-01-24 01:03:17.408615: step: 162/463, loss: 0.198529452085495 2023-01-24 01:03:18.099541: step: 164/463, loss: 0.7544883489608765 2023-01-24 01:03:18.709302: step: 166/463, loss: 0.5000490546226501 2023-01-24 01:03:19.341984: step: 168/463, loss: 0.16949398815631866 2023-01-24 01:03:19.944791: step: 170/463, loss: 0.31470897793769836 2023-01-24 01:03:20.567013: step: 172/463, loss: 0.2585620880126953 2023-01-24 01:03:21.153224: step: 174/463, loss: 0.44241607189178467 2023-01-24 01:03:21.744437: step: 176/463, loss: 0.6171706914901733 2023-01-24 01:03:22.481041: step: 178/463, loss: 0.5622004270553589 2023-01-24 01:03:23.158153: step: 180/463, loss: 1.2424858808517456 2023-01-24 01:03:23.826265: step: 182/463, loss: 0.4132775068283081 2023-01-24 01:03:24.623223: step: 184/463, loss: 0.4425542652606964 2023-01-24 01:03:25.233055: step: 186/463, loss: 0.12215128540992737 2023-01-24 01:03:25.865502: step: 188/463, loss: 0.41826942563056946 2023-01-24 01:03:26.464201: step: 190/463, loss: 0.24470974504947662 2023-01-24 01:03:27.134801: step: 192/463, loss: 0.5538733601570129 2023-01-24 01:03:27.782427: step: 194/463, loss: 0.11127350479364395 2023-01-24 01:03:28.361352: step: 196/463, loss: 1.4958598613739014 2023-01-24 01:03:28.918281: step: 198/463, loss: 0.5070638060569763 2023-01-24 01:03:29.497728: step: 200/463, loss: 0.271684467792511 2023-01-24 01:03:30.129673: step: 202/463, loss: 1.3053030967712402 2023-01-24 01:03:30.756929: step: 204/463, loss: 1.3056875467300415 2023-01-24 01:03:31.464214: step: 206/463, loss: 0.5693117380142212 2023-01-24 01:03:32.107309: step: 208/463, loss: 0.6355133652687073 2023-01-24 01:03:32.824687: step: 210/463, loss: 0.3644798696041107 2023-01-24 01:03:33.384830: step: 212/463, loss: 0.1446819007396698 2023-01-24 01:03:34.042039: step: 214/463, loss: 0.39397895336151123 2023-01-24 01:03:34.647165: step: 216/463, loss: 0.26857396960258484 2023-01-24 01:03:35.238153: step: 218/463, loss: 0.6043521165847778 2023-01-24 01:03:35.853525: step: 220/463, loss: 0.5219537019729614 2023-01-24 01:03:36.478298: step: 222/463, loss: 0.20940008759498596 2023-01-24 01:03:37.050791: step: 224/463, loss: 0.18791015446186066 2023-01-24 01:03:37.736971: step: 226/463, loss: 0.6779966354370117 2023-01-24 01:03:38.348263: step: 228/463, loss: 0.4429236054420471 2023-01-24 01:03:38.963437: step: 230/463, loss: 0.4453403949737549 2023-01-24 01:03:39.613378: step: 232/463, loss: 0.21273258328437805 2023-01-24 01:03:40.235159: step: 234/463, loss: 0.37537869811058044 2023-01-24 01:03:40.926636: step: 236/463, loss: 0.566279947757721 2023-01-24 01:03:41.552240: step: 238/463, loss: 0.8993304967880249 2023-01-24 01:03:42.134350: step: 240/463, loss: 0.36255574226379395 2023-01-24 01:03:42.732852: step: 242/463, loss: 0.26984500885009766 2023-01-24 01:03:43.348374: step: 244/463, loss: 0.23397760093212128 2023-01-24 01:03:43.966276: step: 246/463, loss: 0.2572099268436432 2023-01-24 01:03:44.561867: step: 248/463, loss: 0.19606970250606537 2023-01-24 01:03:45.218192: step: 250/463, loss: 0.7257263660430908 2023-01-24 01:03:45.858082: step: 252/463, loss: 0.40418314933776855 2023-01-24 01:03:46.495879: step: 254/463, loss: 0.3790585994720459 2023-01-24 01:03:47.182711: step: 256/463, loss: 0.6089288592338562 2023-01-24 01:03:47.790577: step: 258/463, loss: 0.16544397175312042 2023-01-24 01:03:48.393206: step: 260/463, loss: 0.1775173544883728 2023-01-24 01:03:48.990849: step: 262/463, loss: 0.5019344687461853 2023-01-24 01:03:49.635144: step: 264/463, loss: 0.6287235021591187 2023-01-24 01:03:50.279624: step: 266/463, loss: 0.3955256938934326 2023-01-24 01:03:50.922633: step: 268/463, loss: 0.3569977879524231 2023-01-24 01:03:51.496805: step: 270/463, loss: 0.7792713046073914 2023-01-24 01:03:52.133271: step: 272/463, loss: 0.3760615289211273 2023-01-24 01:03:52.760661: step: 274/463, loss: 0.48680803179740906 2023-01-24 01:03:53.331026: step: 276/463, loss: 0.7342326045036316 2023-01-24 01:03:53.957490: step: 278/463, loss: 0.7495352625846863 2023-01-24 01:03:54.529381: step: 280/463, loss: 0.1290467083454132 2023-01-24 01:03:55.171642: step: 282/463, loss: 0.7610254287719727 2023-01-24 01:03:55.738770: step: 284/463, loss: 0.814067542552948 2023-01-24 01:03:56.336185: step: 286/463, loss: 1.5646833181381226 2023-01-24 01:03:57.023025: step: 288/463, loss: 0.17038848996162415 2023-01-24 01:03:57.647854: step: 290/463, loss: 1.4968148469924927 2023-01-24 01:03:58.254126: step: 292/463, loss: 0.30045151710510254 2023-01-24 01:03:58.856535: step: 294/463, loss: 0.21755480766296387 2023-01-24 01:03:59.456947: step: 296/463, loss: 0.17588627338409424 2023-01-24 01:04:00.095571: step: 298/463, loss: 0.12127363681793213 2023-01-24 01:04:00.716811: step: 300/463, loss: 1.118198275566101 2023-01-24 01:04:01.371166: step: 302/463, loss: 0.5179328322410583 2023-01-24 01:04:02.034305: step: 304/463, loss: 1.349529504776001 2023-01-24 01:04:02.662892: step: 306/463, loss: 0.3123844861984253 2023-01-24 01:04:03.241021: step: 308/463, loss: 0.5265710353851318 2023-01-24 01:04:03.882530: step: 310/463, loss: 0.869347333908081 2023-01-24 01:04:04.490953: step: 312/463, loss: 0.13021226227283478 2023-01-24 01:04:05.145248: step: 314/463, loss: 0.15256482362747192 2023-01-24 01:04:05.786875: step: 316/463, loss: 0.22558824717998505 2023-01-24 01:04:06.460342: step: 318/463, loss: 0.9738587141036987 2023-01-24 01:04:07.141104: step: 320/463, loss: 0.26306387782096863 2023-01-24 01:04:07.788454: step: 322/463, loss: 0.6062888503074646 2023-01-24 01:04:08.370684: step: 324/463, loss: 0.818843424320221 2023-01-24 01:04:09.002150: step: 326/463, loss: 0.46620500087738037 2023-01-24 01:04:09.616635: step: 328/463, loss: 0.2878604829311371 2023-01-24 01:04:10.271218: step: 330/463, loss: 0.22390830516815186 2023-01-24 01:04:10.881539: step: 332/463, loss: 0.2887449860572815 2023-01-24 01:04:11.441021: step: 334/463, loss: 0.2752339839935303 2023-01-24 01:04:12.107676: step: 336/463, loss: 0.08501852303743362 2023-01-24 01:04:12.821228: step: 338/463, loss: 0.6073396801948547 2023-01-24 01:04:13.559170: step: 340/463, loss: 0.3282373249530792 2023-01-24 01:04:14.175818: step: 342/463, loss: 0.3153262138366699 2023-01-24 01:04:14.747824: step: 344/463, loss: 0.8225773572921753 2023-01-24 01:04:15.329437: step: 346/463, loss: 0.11703789979219437 2023-01-24 01:04:15.980272: step: 348/463, loss: 1.5473849773406982 2023-01-24 01:04:16.607632: step: 350/463, loss: 0.2892407476902008 2023-01-24 01:04:17.242552: step: 352/463, loss: 0.2536793053150177 2023-01-24 01:04:17.844694: step: 354/463, loss: 0.27161040902137756 2023-01-24 01:04:18.521783: step: 356/463, loss: 0.6725258827209473 2023-01-24 01:04:19.124079: step: 358/463, loss: 0.6325103044509888 2023-01-24 01:04:19.777350: step: 360/463, loss: 0.24178291857242584 2023-01-24 01:04:20.372488: step: 362/463, loss: 0.8023243546485901 2023-01-24 01:04:21.049235: step: 364/463, loss: 0.705376923084259 2023-01-24 01:04:21.667678: step: 366/463, loss: 0.2957385182380676 2023-01-24 01:04:22.248850: step: 368/463, loss: 0.21079199016094208 2023-01-24 01:04:22.901046: step: 370/463, loss: 0.22580979764461517 2023-01-24 01:04:23.592040: step: 372/463, loss: 0.36290353536605835 2023-01-24 01:04:24.170887: step: 374/463, loss: 1.0268250703811646 2023-01-24 01:04:24.769588: step: 376/463, loss: 1.7339035272598267 2023-01-24 01:04:25.367016: step: 378/463, loss: 10.988343238830566 2023-01-24 01:04:25.975939: step: 380/463, loss: 0.6798520684242249 2023-01-24 01:04:26.567595: step: 382/463, loss: 0.5074886679649353 2023-01-24 01:04:27.250695: step: 384/463, loss: 0.584663987159729 2023-01-24 01:04:27.903645: step: 386/463, loss: 0.3682064116001129 2023-01-24 01:04:28.515223: step: 388/463, loss: 0.35181182622909546 2023-01-24 01:04:29.135038: step: 390/463, loss: 0.3159389793872833 2023-01-24 01:04:29.745973: step: 392/463, loss: 0.2983010709285736 2023-01-24 01:04:30.419221: step: 394/463, loss: 0.22417858242988586 2023-01-24 01:04:31.016813: step: 396/463, loss: 0.3424162268638611 2023-01-24 01:04:31.585993: step: 398/463, loss: 0.7964850664138794 2023-01-24 01:04:32.190224: step: 400/463, loss: 0.32179611921310425 2023-01-24 01:04:32.817271: step: 402/463, loss: 0.19760094583034515 2023-01-24 01:04:33.445811: step: 404/463, loss: 0.8311609625816345 2023-01-24 01:04:34.126403: step: 406/463, loss: 0.32505759596824646 2023-01-24 01:04:34.743825: step: 408/463, loss: 0.2526588439941406 2023-01-24 01:04:35.374982: step: 410/463, loss: 0.20183967053890228 2023-01-24 01:04:35.942774: step: 412/463, loss: 0.1331614851951599 2023-01-24 01:04:36.596342: step: 414/463, loss: 0.5594495534896851 2023-01-24 01:04:37.178193: step: 416/463, loss: 0.43715915083885193 2023-01-24 01:04:37.795920: step: 418/463, loss: 0.9685855507850647 2023-01-24 01:04:38.422232: step: 420/463, loss: 0.3982907831668854 2023-01-24 01:04:39.128176: step: 422/463, loss: 0.8895206451416016 2023-01-24 01:04:39.768895: step: 424/463, loss: 0.2528805732727051 2023-01-24 01:04:40.411842: step: 426/463, loss: 0.34469565749168396 2023-01-24 01:04:41.050416: step: 428/463, loss: 0.6004028916358948 2023-01-24 01:04:41.676794: step: 430/463, loss: 0.5974717140197754 2023-01-24 01:04:42.279129: step: 432/463, loss: 0.22086842358112335 2023-01-24 01:04:42.959114: step: 434/463, loss: 0.5064923167228699 2023-01-24 01:04:43.677221: step: 436/463, loss: 0.25773563981056213 2023-01-24 01:04:44.263344: step: 438/463, loss: 0.20917242765426636 2023-01-24 01:04:44.873182: step: 440/463, loss: 0.12695471942424774 2023-01-24 01:04:45.506387: step: 442/463, loss: 1.3899328708648682 2023-01-24 01:04:46.109419: step: 444/463, loss: 0.2484578639268875 2023-01-24 01:04:46.752144: step: 446/463, loss: 0.801271915435791 2023-01-24 01:04:47.318651: step: 448/463, loss: 0.7357480525970459 2023-01-24 01:04:48.031568: step: 450/463, loss: 0.25009143352508545 2023-01-24 01:04:48.610208: step: 452/463, loss: 2.0035817623138428 2023-01-24 01:04:49.249092: step: 454/463, loss: 0.15412165224552155 2023-01-24 01:04:49.974223: step: 456/463, loss: 0.37381523847579956 2023-01-24 01:04:50.626564: step: 458/463, loss: 0.7186870574951172 2023-01-24 01:04:51.305492: step: 460/463, loss: 0.49584388732910156 2023-01-24 01:04:51.962093: step: 462/463, loss: 0.5489187836647034 2023-01-24 01:04:52.555062: step: 464/463, loss: 0.7308542728424072 2023-01-24 01:04:53.125553: step: 466/463, loss: 0.3339444100856781 2023-01-24 01:04:53.712211: step: 468/463, loss: 0.32205259799957275 2023-01-24 01:04:54.441713: step: 470/463, loss: 1.525435209274292 2023-01-24 01:04:55.122479: step: 472/463, loss: 0.23388491570949554 2023-01-24 01:04:55.784768: step: 474/463, loss: 0.9795718193054199 2023-01-24 01:04:56.437741: step: 476/463, loss: 0.45122674107551575 2023-01-24 01:04:57.064269: step: 478/463, loss: 0.2656644880771637 2023-01-24 01:04:57.704861: step: 480/463, loss: 0.19978493452072144 2023-01-24 01:04:58.337722: step: 482/463, loss: 0.2845817506313324 2023-01-24 01:04:59.011571: step: 484/463, loss: 0.37539079785346985 2023-01-24 01:04:59.688214: step: 486/463, loss: 0.5412411093711853 2023-01-24 01:05:00.310224: step: 488/463, loss: 0.5723088383674622 2023-01-24 01:05:01.034284: step: 490/463, loss: 2.0728869438171387 2023-01-24 01:05:01.716791: step: 492/463, loss: 1.3226650953292847 2023-01-24 01:05:02.389303: step: 494/463, loss: 0.8394266366958618 2023-01-24 01:05:03.019215: step: 496/463, loss: 0.32500067353248596 2023-01-24 01:05:03.644577: step: 498/463, loss: 0.27593696117401123 2023-01-24 01:05:04.255986: step: 500/463, loss: 0.38193148374557495 2023-01-24 01:05:04.858323: step: 502/463, loss: 0.1281970888376236 2023-01-24 01:05:05.492435: step: 504/463, loss: 0.3873727321624756 2023-01-24 01:05:06.049734: step: 506/463, loss: 0.7752090692520142 2023-01-24 01:05:06.719337: step: 508/463, loss: 0.19130638241767883 2023-01-24 01:05:07.359413: step: 510/463, loss: 0.41665419936180115 2023-01-24 01:05:08.010590: step: 512/463, loss: 0.205368772149086 2023-01-24 01:05:08.676173: step: 514/463, loss: 0.23581798374652863 2023-01-24 01:05:09.302236: step: 516/463, loss: 0.3139932155609131 2023-01-24 01:05:09.936294: step: 518/463, loss: 0.5283504724502563 2023-01-24 01:05:10.587230: step: 520/463, loss: 0.39107745885849 2023-01-24 01:05:11.169782: step: 522/463, loss: 0.6221797466278076 2023-01-24 01:05:11.775291: step: 524/463, loss: 0.5362050533294678 2023-01-24 01:05:12.396268: step: 526/463, loss: 1.0764684677124023 2023-01-24 01:05:12.970220: step: 528/463, loss: 0.31952986121177673 2023-01-24 01:05:13.567498: step: 530/463, loss: 0.6441364288330078 2023-01-24 01:05:14.177244: step: 532/463, loss: 0.19354405999183655 2023-01-24 01:05:14.844628: step: 534/463, loss: 0.8123067021369934 2023-01-24 01:05:15.454013: step: 536/463, loss: 0.29804176092147827 2023-01-24 01:05:16.101404: step: 538/463, loss: 0.16331526637077332 2023-01-24 01:05:16.714858: step: 540/463, loss: 0.40207067131996155 2023-01-24 01:05:17.293416: step: 542/463, loss: 0.38540440797805786 2023-01-24 01:05:17.870824: step: 544/463, loss: 0.12684573233127594 2023-01-24 01:05:18.586763: step: 546/463, loss: 0.3104609549045563 2023-01-24 01:05:19.196391: step: 548/463, loss: 0.49482518434524536 2023-01-24 01:05:19.784187: step: 550/463, loss: 0.12039569020271301 2023-01-24 01:05:20.343668: step: 552/463, loss: 0.2101477086544037 2023-01-24 01:05:20.970771: step: 554/463, loss: 0.1725948303937912 2023-01-24 01:05:21.527425: step: 556/463, loss: 1.1571440696716309 2023-01-24 01:05:22.192415: step: 558/463, loss: 0.19257591664791107 2023-01-24 01:05:22.843498: step: 560/463, loss: 0.29185354709625244 2023-01-24 01:05:23.466073: step: 562/463, loss: 0.44687265157699585 2023-01-24 01:05:24.079956: step: 564/463, loss: 0.16196037828922272 2023-01-24 01:05:24.711199: step: 566/463, loss: 0.9087367057800293 2023-01-24 01:05:25.280457: step: 568/463, loss: 0.7685391306877136 2023-01-24 01:05:25.929121: step: 570/463, loss: 0.2829102873802185 2023-01-24 01:05:26.657489: step: 572/463, loss: 1.2263611555099487 2023-01-24 01:05:27.261284: step: 574/463, loss: 1.2089903354644775 2023-01-24 01:05:27.877707: step: 576/463, loss: 0.23912407457828522 2023-01-24 01:05:28.454399: step: 578/463, loss: 0.1289568394422531 2023-01-24 01:05:29.110791: step: 580/463, loss: 0.7650595307350159 2023-01-24 01:05:29.728210: step: 582/463, loss: 0.1878809779882431 2023-01-24 01:05:30.343122: step: 584/463, loss: 0.21184967458248138 2023-01-24 01:05:30.895841: step: 586/463, loss: 0.4684463143348694 2023-01-24 01:05:31.488506: step: 588/463, loss: 0.28665074706077576 2023-01-24 01:05:32.085057: step: 590/463, loss: 0.2618367075920105 2023-01-24 01:05:32.717278: step: 592/463, loss: 0.8079449534416199 2023-01-24 01:05:33.319753: step: 594/463, loss: 1.2516543865203857 2023-01-24 01:05:33.921654: step: 596/463, loss: 0.9878435134887695 2023-01-24 01:05:34.557094: step: 598/463, loss: 1.0013201236724854 2023-01-24 01:05:35.219734: step: 600/463, loss: 0.4189344346523285 2023-01-24 01:05:35.818216: step: 602/463, loss: 0.6035969257354736 2023-01-24 01:05:36.429273: step: 604/463, loss: 0.9859971404075623 2023-01-24 01:05:37.005207: step: 606/463, loss: 0.9016733169555664 2023-01-24 01:05:37.610137: step: 608/463, loss: 0.3164084255695343 2023-01-24 01:05:38.196904: step: 610/463, loss: 1.075129747390747 2023-01-24 01:05:38.828948: step: 612/463, loss: 0.48143842816352844 2023-01-24 01:05:39.392024: step: 614/463, loss: 0.20436955988407135 2023-01-24 01:05:40.048753: step: 616/463, loss: 0.25873205065727234 2023-01-24 01:05:40.657125: step: 618/463, loss: 0.1502208709716797 2023-01-24 01:05:41.200198: step: 620/463, loss: 0.3765846788883209 2023-01-24 01:05:41.740471: step: 622/463, loss: 0.5470739006996155 2023-01-24 01:05:42.372004: step: 624/463, loss: 0.18770313262939453 2023-01-24 01:05:43.020152: step: 626/463, loss: 0.5667265057563782 2023-01-24 01:05:43.702136: step: 628/463, loss: 0.2066912055015564 2023-01-24 01:05:44.393233: step: 630/463, loss: 0.26877740025520325 2023-01-24 01:05:45.064201: step: 632/463, loss: 0.8836117386817932 2023-01-24 01:05:45.672741: step: 634/463, loss: 0.16277842223644257 2023-01-24 01:05:46.290059: step: 636/463, loss: 0.2794587016105652 2023-01-24 01:05:46.930392: step: 638/463, loss: 0.22553445398807526 2023-01-24 01:05:47.611902: step: 640/463, loss: 0.7869256734848022 2023-01-24 01:05:48.248029: step: 642/463, loss: 0.6434556841850281 2023-01-24 01:05:48.911977: step: 644/463, loss: 0.5300359725952148 2023-01-24 01:05:49.560540: step: 646/463, loss: 0.42739999294281006 2023-01-24 01:05:50.181210: step: 648/463, loss: 0.33280813694000244 2023-01-24 01:05:50.827674: step: 650/463, loss: 0.8890302181243896 2023-01-24 01:05:51.438036: step: 652/463, loss: 0.22244475781917572 2023-01-24 01:05:52.056733: step: 654/463, loss: 1.3736999034881592 2023-01-24 01:05:52.703531: step: 656/463, loss: 0.6121673583984375 2023-01-24 01:05:53.366026: step: 658/463, loss: 0.6737586259841919 2023-01-24 01:05:53.955072: step: 660/463, loss: 0.30787405371665955 2023-01-24 01:05:54.644049: step: 662/463, loss: 2.0757267475128174 2023-01-24 01:05:55.284679: step: 664/463, loss: 0.34689632058143616 2023-01-24 01:05:55.926213: step: 666/463, loss: 0.24379436671733856 2023-01-24 01:05:56.620851: step: 668/463, loss: 2.1758999824523926 2023-01-24 01:05:57.240511: step: 670/463, loss: 4.9169440269470215 2023-01-24 01:05:57.914929: step: 672/463, loss: 0.2017715573310852 2023-01-24 01:05:58.524081: step: 674/463, loss: 0.11330913007259369 2023-01-24 01:05:59.155213: step: 676/463, loss: 0.16683141887187958 2023-01-24 01:05:59.769542: step: 678/463, loss: 0.815362274646759 2023-01-24 01:06:00.327986: step: 680/463, loss: 0.3539970815181732 2023-01-24 01:06:00.982313: step: 682/463, loss: 2.1730191707611084 2023-01-24 01:06:01.592637: step: 684/463, loss: 0.27092108130455017 2023-01-24 01:06:02.289729: step: 686/463, loss: 0.19393417239189148 2023-01-24 01:06:02.909868: step: 688/463, loss: 0.5028522610664368 2023-01-24 01:06:03.582737: step: 690/463, loss: 0.526164710521698 2023-01-24 01:06:04.255933: step: 692/463, loss: 0.7807934880256653 2023-01-24 01:06:04.852483: step: 694/463, loss: 0.29466983675956726 2023-01-24 01:06:05.517443: step: 696/463, loss: 0.39204248785972595 2023-01-24 01:06:06.122200: step: 698/463, loss: 0.08186376839876175 2023-01-24 01:06:06.715749: step: 700/463, loss: 0.47767120599746704 2023-01-24 01:06:07.302687: step: 702/463, loss: 0.9724576473236084 2023-01-24 01:06:07.969064: step: 704/463, loss: 0.26902055740356445 2023-01-24 01:06:08.626054: step: 706/463, loss: 1.9026131629943848 2023-01-24 01:06:09.283816: step: 708/463, loss: 1.287264108657837 2023-01-24 01:06:09.866369: step: 710/463, loss: 0.30372345447540283 2023-01-24 01:06:10.487042: step: 712/463, loss: 0.1897369921207428 2023-01-24 01:06:11.176479: step: 714/463, loss: 0.719849705696106 2023-01-24 01:06:11.824282: step: 716/463, loss: 0.9432751536369324 2023-01-24 01:06:12.466545: step: 718/463, loss: 0.3186300992965698 2023-01-24 01:06:13.125295: step: 720/463, loss: 1.056477665901184 2023-01-24 01:06:13.737192: step: 722/463, loss: 0.6201511025428772 2023-01-24 01:06:14.374445: step: 724/463, loss: 0.689306914806366 2023-01-24 01:06:14.977530: step: 726/463, loss: 0.10952012240886688 2023-01-24 01:06:15.632786: step: 728/463, loss: 0.7241306900978088 2023-01-24 01:06:16.223849: step: 730/463, loss: 2.0394368171691895 2023-01-24 01:06:16.836158: step: 732/463, loss: 1.0719571113586426 2023-01-24 01:06:17.351072: step: 734/463, loss: 0.23701752722263336 2023-01-24 01:06:18.017310: step: 736/463, loss: 0.2271384298801422 2023-01-24 01:06:18.671981: step: 738/463, loss: 0.7731890678405762 2023-01-24 01:06:19.298701: step: 740/463, loss: 0.2568628489971161 2023-01-24 01:06:19.900768: step: 742/463, loss: 0.2536655366420746 2023-01-24 01:06:20.480847: step: 744/463, loss: 0.34534215927124023 2023-01-24 01:06:21.092484: step: 746/463, loss: 1.7968966960906982 2023-01-24 01:06:21.710456: step: 748/463, loss: 0.3700242042541504 2023-01-24 01:06:22.261738: step: 750/463, loss: 0.26883116364479065 2023-01-24 01:06:22.880386: step: 752/463, loss: 0.6381238102912903 2023-01-24 01:06:23.556693: step: 754/463, loss: 0.3252519965171814 2023-01-24 01:06:24.090531: step: 756/463, loss: 0.904443085193634 2023-01-24 01:06:24.692435: step: 758/463, loss: 0.7774116396903992 2023-01-24 01:06:25.222800: step: 760/463, loss: 0.6394470930099487 2023-01-24 01:06:25.852218: step: 762/463, loss: 0.9578912854194641 2023-01-24 01:06:26.494689: step: 764/463, loss: 0.1748456209897995 2023-01-24 01:06:27.067995: step: 766/463, loss: 0.48684728145599365 2023-01-24 01:06:27.689139: step: 768/463, loss: 0.2203337699174881 2023-01-24 01:06:28.229024: step: 770/463, loss: 0.3098141849040985 2023-01-24 01:06:28.891587: step: 772/463, loss: 0.10141605138778687 2023-01-24 01:06:29.476809: step: 774/463, loss: 1.0393526554107666 2023-01-24 01:06:30.082673: step: 776/463, loss: 0.3754832446575165 2023-01-24 01:06:30.683898: step: 778/463, loss: 0.20177383720874786 2023-01-24 01:06:31.250768: step: 780/463, loss: 0.2653917372226715 2023-01-24 01:06:31.891593: step: 782/463, loss: 2.7043190002441406 2023-01-24 01:06:32.496375: step: 784/463, loss: 0.36081433296203613 2023-01-24 01:06:33.113152: step: 786/463, loss: 0.36651062965393066 2023-01-24 01:06:33.740850: step: 788/463, loss: 2.0124497413635254 2023-01-24 01:06:34.307552: step: 790/463, loss: 0.2784193754196167 2023-01-24 01:06:34.918665: step: 792/463, loss: 1.0397369861602783 2023-01-24 01:06:35.601340: step: 794/463, loss: 0.43702030181884766 2023-01-24 01:06:36.190163: step: 796/463, loss: 0.3848629295825958 2023-01-24 01:06:36.840335: step: 798/463, loss: 0.9617966413497925 2023-01-24 01:06:37.446067: step: 800/463, loss: 1.1139748096466064 2023-01-24 01:06:38.025340: step: 802/463, loss: 0.1123814806342125 2023-01-24 01:06:38.625822: step: 804/463, loss: 0.473356693983078 2023-01-24 01:06:39.271637: step: 806/463, loss: 1.5484853982925415 2023-01-24 01:06:39.997674: step: 808/463, loss: 0.6806536912918091 2023-01-24 01:06:40.617836: step: 810/463, loss: 0.8491877913475037 2023-01-24 01:06:41.244704: step: 812/463, loss: 0.13020454347133636 2023-01-24 01:06:41.873040: step: 814/463, loss: 0.5292938947677612 2023-01-24 01:06:42.470809: step: 816/463, loss: 3.117556095123291 2023-01-24 01:06:43.094447: step: 818/463, loss: 0.41445407271385193 2023-01-24 01:06:43.713588: step: 820/463, loss: 0.09199021756649017 2023-01-24 01:06:44.340925: step: 822/463, loss: 0.42910289764404297 2023-01-24 01:06:44.956015: step: 824/463, loss: 0.6269572377204895 2023-01-24 01:06:45.559303: step: 826/463, loss: 0.15939131379127502 2023-01-24 01:06:46.207697: step: 828/463, loss: 0.5565905570983887 2023-01-24 01:06:46.879078: step: 830/463, loss: 0.9332891702651978 2023-01-24 01:06:47.517745: step: 832/463, loss: 0.291536808013916 2023-01-24 01:06:48.211332: step: 834/463, loss: 0.6959925889968872 2023-01-24 01:06:48.804138: step: 836/463, loss: 0.9747889041900635 2023-01-24 01:06:49.482644: step: 838/463, loss: 0.1822262853384018 2023-01-24 01:06:50.132795: step: 840/463, loss: 0.3691006600856781 2023-01-24 01:06:50.742736: step: 842/463, loss: 0.5506543517112732 2023-01-24 01:06:51.357172: step: 844/463, loss: 0.43808749318122864 2023-01-24 01:06:52.011168: step: 846/463, loss: 0.32877227663993835 2023-01-24 01:06:52.626317: step: 848/463, loss: 0.5414854288101196 2023-01-24 01:06:53.265655: step: 850/463, loss: 0.17924955487251282 2023-01-24 01:06:53.929154: step: 852/463, loss: 1.8871729373931885 2023-01-24 01:06:54.489285: step: 854/463, loss: 0.3657010793685913 2023-01-24 01:06:55.215754: step: 856/463, loss: 0.2917175590991974 2023-01-24 01:06:55.838738: step: 858/463, loss: 0.23123741149902344 2023-01-24 01:06:56.468587: step: 860/463, loss: 0.26885032653808594 2023-01-24 01:06:57.088135: step: 862/463, loss: 0.40323007106781006 2023-01-24 01:06:57.758832: step: 864/463, loss: 0.7605690360069275 2023-01-24 01:06:58.391420: step: 866/463, loss: 0.6159927845001221 2023-01-24 01:06:58.992131: step: 868/463, loss: 0.4428705871105194 2023-01-24 01:06:59.596271: step: 870/463, loss: 0.16092491149902344 2023-01-24 01:07:00.245482: step: 872/463, loss: 0.3546864092350006 2023-01-24 01:07:00.890923: step: 874/463, loss: 0.5645516514778137 2023-01-24 01:07:01.533394: step: 876/463, loss: 1.8069472312927246 2023-01-24 01:07:02.179473: step: 878/463, loss: 0.2768940031528473 2023-01-24 01:07:02.791265: step: 880/463, loss: 0.17256468534469604 2023-01-24 01:07:03.404787: step: 882/463, loss: 0.27570846676826477 2023-01-24 01:07:04.026382: step: 884/463, loss: 1.5930858850479126 2023-01-24 01:07:04.729206: step: 886/463, loss: 0.14186421036720276 2023-01-24 01:07:05.380057: step: 888/463, loss: 0.7413537502288818 2023-01-24 01:07:06.032742: step: 890/463, loss: 0.23236803710460663 2023-01-24 01:07:06.669812: step: 892/463, loss: 0.264069527387619 2023-01-24 01:07:07.313618: step: 894/463, loss: 0.9383943676948547 2023-01-24 01:07:07.902565: step: 896/463, loss: 6.721160888671875 2023-01-24 01:07:08.495483: step: 898/463, loss: 1.8199433088302612 2023-01-24 01:07:09.135626: step: 900/463, loss: 1.2914268970489502 2023-01-24 01:07:09.784075: step: 902/463, loss: 0.5187832713127136 2023-01-24 01:07:10.392869: step: 904/463, loss: 0.5670135617256165 2023-01-24 01:07:10.999490: step: 906/463, loss: 1.0960602760314941 2023-01-24 01:07:11.597865: step: 908/463, loss: 0.34083202481269836 2023-01-24 01:07:12.195693: step: 910/463, loss: 0.2917945384979248 2023-01-24 01:07:12.797732: step: 912/463, loss: 3.9993436336517334 2023-01-24 01:07:13.516433: step: 914/463, loss: 0.7767676115036011 2023-01-24 01:07:14.229780: step: 916/463, loss: 1.1644188165664673 2023-01-24 01:07:14.867618: step: 918/463, loss: 0.7658258080482483 2023-01-24 01:07:15.419624: step: 920/463, loss: 0.7191877961158752 2023-01-24 01:07:16.127821: step: 922/463, loss: 0.38791587948799133 2023-01-24 01:07:16.750534: step: 924/463, loss: 0.281393826007843 2023-01-24 01:07:17.365470: step: 926/463, loss: 0.2897534966468811 ================================================== Loss: 0.627 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3317110655737705, 'r': 0.3065814393939394, 'f1': 0.3186515748031496}, 'combined': 0.23479589722337338, 'epoch': 7} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3575246673508706, 'r': 0.2782135184363543, 'f1': 0.3129218846852904}, 'combined': 0.22014604952734002, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33089770354906056, 'r': 0.3001893939393939, 'f1': 0.3147964250248262}, 'combined': 0.23195526054460874, 'epoch': 7} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35413706138734513, 'r': 0.2724844987617913, 'f1': 0.30799086977517376}, 'combined': 0.21867351754037337, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24709302325581395, 'r': 0.30357142857142855, 'f1': 0.27243589743589736}, 'combined': 0.18162393162393156, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28703703703703703, 'r': 0.33695652173913043, 'f1': 0.31}, 'combined': 0.155, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3296252358752359, 'r': 0.29272222085315064, 'f1': 0.3100796188735887}, 'combined': 0.22847971917001272, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.36010427991126526, 'r': 0.2770515086523572, 'f1': 0.3131649615755617}, 'combined': 0.22031705839486754, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36858974358974356, 'r': 0.2738095238095238, 'f1': 0.314207650273224}, 'combined': 0.209471766848816, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3064311346548189, 'r': 0.27619504546686713, 'f1': 0.2905285208803173}, 'combined': 0.2140736469644443, 'epoch': 4} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35963387992050416, 'r': 0.24527593026724218, 'f1': 0.29164524353057414}, 'combined': 0.20706812290670762, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34615384615384615, 'r': 0.391304347826087, 'f1': 0.36734693877551017}, 'combined': 0.18367346938775508, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:10:02.309289: step: 2/463, loss: 0.16605666279792786 2023-01-24 01:10:02.944585: step: 4/463, loss: 0.38503795862197876 2023-01-24 01:10:03.530883: step: 6/463, loss: 0.27171841263771057 2023-01-24 01:10:04.161227: step: 8/463, loss: 0.8257753849029541 2023-01-24 01:10:04.783180: step: 10/463, loss: 0.6764319539070129 2023-01-24 01:10:05.404218: step: 12/463, loss: 0.36713922023773193 2023-01-24 01:10:06.045380: step: 14/463, loss: 0.35808345675468445 2023-01-24 01:10:06.645777: step: 16/463, loss: 0.19340191781520844 2023-01-24 01:10:07.312349: step: 18/463, loss: 0.956613302230835 2023-01-24 01:10:07.942940: step: 20/463, loss: 0.2382398545742035 2023-01-24 01:10:08.589598: step: 22/463, loss: 1.2971596717834473 2023-01-24 01:10:09.156719: step: 24/463, loss: 0.22699479758739471 2023-01-24 01:10:09.815005: step: 26/463, loss: 0.4809612035751343 2023-01-24 01:10:10.506213: step: 28/463, loss: 0.4105561375617981 2023-01-24 01:10:11.152352: step: 30/463, loss: 0.4868346154689789 2023-01-24 01:10:11.823654: step: 32/463, loss: 0.1574963480234146 2023-01-24 01:10:12.450565: step: 34/463, loss: 0.1449679732322693 2023-01-24 01:10:13.007170: step: 36/463, loss: 0.5476135611534119 2023-01-24 01:10:13.677507: step: 38/463, loss: 0.3754819929599762 2023-01-24 01:10:14.304899: step: 40/463, loss: 1.022408366203308 2023-01-24 01:10:14.944438: step: 42/463, loss: 4.200562477111816 2023-01-24 01:10:15.575866: step: 44/463, loss: 0.7335466146469116 2023-01-24 01:10:16.262534: step: 46/463, loss: 0.08522189408540726 2023-01-24 01:10:16.885801: step: 48/463, loss: 0.31444239616394043 2023-01-24 01:10:17.492857: step: 50/463, loss: 0.4450739920139313 2023-01-24 01:10:18.154898: step: 52/463, loss: 0.5992401838302612 2023-01-24 01:10:18.749326: step: 54/463, loss: 0.5345771908760071 2023-01-24 01:10:19.363281: step: 56/463, loss: 0.4356203079223633 2023-01-24 01:10:20.021950: step: 58/463, loss: 0.1747041642665863 2023-01-24 01:10:20.685887: step: 60/463, loss: 0.34538328647613525 2023-01-24 01:10:21.354447: step: 62/463, loss: 1.1009329557418823 2023-01-24 01:10:21.945712: step: 64/463, loss: 0.34781578183174133 2023-01-24 01:10:22.560211: step: 66/463, loss: 0.18515340983867645 2023-01-24 01:10:23.219599: step: 68/463, loss: 0.20897725224494934 2023-01-24 01:10:23.858725: step: 70/463, loss: 0.21665160357952118 2023-01-24 01:10:24.467160: step: 72/463, loss: 0.4176701605319977 2023-01-24 01:10:25.101044: step: 74/463, loss: 0.3244898021221161 2023-01-24 01:10:25.757831: step: 76/463, loss: 0.18227700889110565 2023-01-24 01:10:26.377026: step: 78/463, loss: 0.19006197154521942 2023-01-24 01:10:26.997032: step: 80/463, loss: 0.454755038022995 2023-01-24 01:10:27.536511: step: 82/463, loss: 0.5575661063194275 2023-01-24 01:10:28.108010: step: 84/463, loss: 0.13021796941757202 2023-01-24 01:10:28.677217: step: 86/463, loss: 0.16196787357330322 2023-01-24 01:10:29.320129: step: 88/463, loss: 0.5640078783035278 2023-01-24 01:10:29.921481: step: 90/463, loss: 0.27958258986473083 2023-01-24 01:10:30.622583: step: 92/463, loss: 0.16173551976680756 2023-01-24 01:10:31.206108: step: 94/463, loss: 0.674604058265686 2023-01-24 01:10:31.818989: step: 96/463, loss: 0.5134526491165161 2023-01-24 01:10:32.474271: step: 98/463, loss: 0.2657164931297302 2023-01-24 01:10:33.101515: step: 100/463, loss: 1.119792103767395 2023-01-24 01:10:33.648590: step: 102/463, loss: 0.20650717616081238 2023-01-24 01:10:34.195633: step: 104/463, loss: 0.2864614427089691 2023-01-24 01:10:34.836210: step: 106/463, loss: 0.45595303177833557 2023-01-24 01:10:35.438816: step: 108/463, loss: 0.15068478882312775 2023-01-24 01:10:36.092201: step: 110/463, loss: 0.516718327999115 2023-01-24 01:10:36.689434: step: 112/463, loss: 0.32369163632392883 2023-01-24 01:10:37.263846: step: 114/463, loss: 0.14688003063201904 2023-01-24 01:10:37.937299: step: 116/463, loss: 0.5222381949424744 2023-01-24 01:10:38.615890: step: 118/463, loss: 0.29266080260276794 2023-01-24 01:10:39.277931: step: 120/463, loss: 0.31856074929237366 2023-01-24 01:10:39.934402: step: 122/463, loss: 0.6885375380516052 2023-01-24 01:10:40.524483: step: 124/463, loss: 0.5786613821983337 2023-01-24 01:10:41.137677: step: 126/463, loss: 0.5416733622550964 2023-01-24 01:10:41.737902: step: 128/463, loss: 0.2725480794906616 2023-01-24 01:10:42.382690: step: 130/463, loss: 0.13606895506381989 2023-01-24 01:10:43.039838: step: 132/463, loss: 1.5169601440429688 2023-01-24 01:10:43.645122: step: 134/463, loss: 0.2975769639015198 2023-01-24 01:10:44.283918: step: 136/463, loss: 0.2525339126586914 2023-01-24 01:10:44.892435: step: 138/463, loss: 0.5193480253219604 2023-01-24 01:10:45.503963: step: 140/463, loss: 0.42469823360443115 2023-01-24 01:10:46.111126: step: 142/463, loss: 0.3142640292644501 2023-01-24 01:10:46.770771: step: 144/463, loss: 0.16297177970409393 2023-01-24 01:10:47.400285: step: 146/463, loss: 0.12845709919929504 2023-01-24 01:10:48.052767: step: 148/463, loss: 0.23619522154331207 2023-01-24 01:10:48.723353: step: 150/463, loss: 0.658191442489624 2023-01-24 01:10:49.384391: step: 152/463, loss: 0.9733262658119202 2023-01-24 01:10:49.971596: step: 154/463, loss: 0.3638192415237427 2023-01-24 01:10:50.552176: step: 156/463, loss: 0.06956522911787033 2023-01-24 01:10:51.139972: step: 158/463, loss: 0.10947830229997635 2023-01-24 01:10:51.717834: step: 160/463, loss: 0.12222448736429214 2023-01-24 01:10:52.303362: step: 162/463, loss: 0.15499521791934967 2023-01-24 01:10:52.929264: step: 164/463, loss: 1.1875330209732056 2023-01-24 01:10:53.655943: step: 166/463, loss: 0.20230290293693542 2023-01-24 01:10:54.405879: step: 168/463, loss: 0.16144074499607086 2023-01-24 01:10:55.020534: step: 170/463, loss: 0.3539266884326935 2023-01-24 01:10:55.647001: step: 172/463, loss: 0.3606109917163849 2023-01-24 01:10:56.303113: step: 174/463, loss: 0.40831878781318665 2023-01-24 01:10:56.970549: step: 176/463, loss: 1.2973953485488892 2023-01-24 01:10:57.572220: step: 178/463, loss: 0.21367326378822327 2023-01-24 01:10:58.228895: step: 180/463, loss: 0.20832814276218414 2023-01-24 01:10:58.831973: step: 182/463, loss: 0.36238086223602295 2023-01-24 01:10:59.410428: step: 184/463, loss: 1.5830647945404053 2023-01-24 01:11:00.099024: step: 186/463, loss: 1.4087945222854614 2023-01-24 01:11:00.722238: step: 188/463, loss: 0.4729519784450531 2023-01-24 01:11:01.349420: step: 190/463, loss: 0.2737700045108795 2023-01-24 01:11:01.973635: step: 192/463, loss: 0.9331732392311096 2023-01-24 01:11:02.560725: step: 194/463, loss: 0.21332256495952606 2023-01-24 01:11:03.180513: step: 196/463, loss: 0.503099799156189 2023-01-24 01:11:03.809616: step: 198/463, loss: 0.2785847783088684 2023-01-24 01:11:04.408454: step: 200/463, loss: 0.36131852865219116 2023-01-24 01:11:05.021181: step: 202/463, loss: 0.6547910571098328 2023-01-24 01:11:05.634494: step: 204/463, loss: 0.4358242452144623 2023-01-24 01:11:06.286240: step: 206/463, loss: 0.4440295398235321 2023-01-24 01:11:06.880445: step: 208/463, loss: 0.7967302203178406 2023-01-24 01:11:07.452635: step: 210/463, loss: 0.3737398386001587 2023-01-24 01:11:08.108080: step: 212/463, loss: 0.21163801848888397 2023-01-24 01:11:08.779922: step: 214/463, loss: 0.2476838231086731 2023-01-24 01:11:09.436551: step: 216/463, loss: 0.7783970832824707 2023-01-24 01:11:09.994746: step: 218/463, loss: 0.23048819601535797 2023-01-24 01:11:10.607012: step: 220/463, loss: 0.6242182850837708 2023-01-24 01:11:11.331726: step: 222/463, loss: 0.6001423001289368 2023-01-24 01:11:12.000311: step: 224/463, loss: 0.33959248661994934 2023-01-24 01:11:12.603178: step: 226/463, loss: 0.2712199091911316 2023-01-24 01:11:13.193819: step: 228/463, loss: 0.131121426820755 2023-01-24 01:11:13.855228: step: 230/463, loss: 0.8217250108718872 2023-01-24 01:11:14.452050: step: 232/463, loss: 0.4797295331954956 2023-01-24 01:11:15.138176: step: 234/463, loss: 0.1514052301645279 2023-01-24 01:11:15.814857: step: 236/463, loss: 0.39388927817344666 2023-01-24 01:11:16.406344: step: 238/463, loss: 0.43603402376174927 2023-01-24 01:11:17.003157: step: 240/463, loss: 0.4434458315372467 2023-01-24 01:11:17.638580: step: 242/463, loss: 0.4920247197151184 2023-01-24 01:11:18.259429: step: 244/463, loss: 0.22592106461524963 2023-01-24 01:11:18.864194: step: 246/463, loss: 0.2814777195453644 2023-01-24 01:11:19.505036: step: 248/463, loss: 0.16360117495059967 2023-01-24 01:11:20.192628: step: 250/463, loss: 0.2970806956291199 2023-01-24 01:11:20.882240: step: 252/463, loss: 0.18687830865383148 2023-01-24 01:11:21.622304: step: 254/463, loss: 0.575664758682251 2023-01-24 01:11:22.275785: step: 256/463, loss: 0.2237425297498703 2023-01-24 01:11:22.873249: step: 258/463, loss: 0.17019006609916687 2023-01-24 01:11:23.462778: step: 260/463, loss: 0.8056997656822205 2023-01-24 01:11:24.078623: step: 262/463, loss: 0.38630932569503784 2023-01-24 01:11:24.693438: step: 264/463, loss: 0.22960993647575378 2023-01-24 01:11:25.266063: step: 266/463, loss: 0.15392890572547913 2023-01-24 01:11:25.814723: step: 268/463, loss: 0.5244348049163818 2023-01-24 01:11:26.444549: step: 270/463, loss: 0.8930394649505615 2023-01-24 01:11:27.081933: step: 272/463, loss: 0.11313141137361526 2023-01-24 01:11:27.739754: step: 274/463, loss: 0.7167916893959045 2023-01-24 01:11:28.358720: step: 276/463, loss: 0.2995671331882477 2023-01-24 01:11:28.973522: step: 278/463, loss: 0.1194152906537056 2023-01-24 01:11:29.610134: step: 280/463, loss: 0.2433032989501953 2023-01-24 01:11:30.321468: step: 282/463, loss: 0.8974010944366455 2023-01-24 01:11:30.995910: step: 284/463, loss: 0.4042874872684479 2023-01-24 01:11:31.612401: step: 286/463, loss: 0.8136952519416809 2023-01-24 01:11:32.210118: step: 288/463, loss: 0.21810388565063477 2023-01-24 01:11:32.838877: step: 290/463, loss: 0.1440027952194214 2023-01-24 01:11:33.438208: step: 292/463, loss: 0.21192555129528046 2023-01-24 01:11:34.075008: step: 294/463, loss: 0.49177196621894836 2023-01-24 01:11:34.725029: step: 296/463, loss: 0.5422770977020264 2023-01-24 01:11:35.358016: step: 298/463, loss: 2.0303211212158203 2023-01-24 01:11:35.937778: step: 300/463, loss: 0.2693426012992859 2023-01-24 01:11:36.548310: step: 302/463, loss: 0.30332493782043457 2023-01-24 01:11:37.147956: step: 304/463, loss: 0.22875037789344788 2023-01-24 01:11:37.744888: step: 306/463, loss: 0.9787039756774902 2023-01-24 01:11:38.438743: step: 308/463, loss: 0.4407539963722229 2023-01-24 01:11:39.079092: step: 310/463, loss: 0.7326889038085938 2023-01-24 01:11:39.690804: step: 312/463, loss: 1.3565117120742798 2023-01-24 01:11:40.343494: step: 314/463, loss: 0.4017893075942993 2023-01-24 01:11:41.021059: step: 316/463, loss: 0.7146849036216736 2023-01-24 01:11:41.585555: step: 318/463, loss: 0.5877069234848022 2023-01-24 01:11:42.162651: step: 320/463, loss: 0.21019087731838226 2023-01-24 01:11:42.731276: step: 322/463, loss: 0.3129178285598755 2023-01-24 01:11:43.316105: step: 324/463, loss: 0.44565463066101074 2023-01-24 01:11:43.932865: step: 326/463, loss: 0.22542966902256012 2023-01-24 01:11:44.620126: step: 328/463, loss: 0.22374482452869415 2023-01-24 01:11:45.158320: step: 330/463, loss: 1.4278644323349 2023-01-24 01:11:45.867772: step: 332/463, loss: 0.6530548334121704 2023-01-24 01:11:46.559804: step: 334/463, loss: 0.5928555130958557 2023-01-24 01:11:47.216933: step: 336/463, loss: 0.17140734195709229 2023-01-24 01:11:47.846731: step: 338/463, loss: 0.26012611389160156 2023-01-24 01:11:48.429382: step: 340/463, loss: 0.15077431499958038 2023-01-24 01:11:49.066314: step: 342/463, loss: 0.357882022857666 2023-01-24 01:11:49.665952: step: 344/463, loss: 0.2551735043525696 2023-01-24 01:11:50.285131: step: 346/463, loss: 0.09058777987957001 2023-01-24 01:11:50.870248: step: 348/463, loss: 1.1877142190933228 2023-01-24 01:11:51.550536: step: 350/463, loss: 0.29360121488571167 2023-01-24 01:11:52.165344: step: 352/463, loss: 0.18265113234519958 2023-01-24 01:11:52.780717: step: 354/463, loss: 0.37458887696266174 2023-01-24 01:11:53.388386: step: 356/463, loss: 0.18946047127246857 2023-01-24 01:11:54.145525: step: 358/463, loss: 0.19492414593696594 2023-01-24 01:11:54.811185: step: 360/463, loss: 0.38568946719169617 2023-01-24 01:11:55.405328: step: 362/463, loss: 0.12099465727806091 2023-01-24 01:11:56.021894: step: 364/463, loss: 0.6806142926216125 2023-01-24 01:11:56.649755: step: 366/463, loss: 0.12497488409280777 2023-01-24 01:11:57.249237: step: 368/463, loss: 0.7256534695625305 2023-01-24 01:11:57.932097: step: 370/463, loss: 0.39734387397766113 2023-01-24 01:11:58.524818: step: 372/463, loss: 0.26140114665031433 2023-01-24 01:11:59.188013: step: 374/463, loss: 1.122131109237671 2023-01-24 01:11:59.794214: step: 376/463, loss: 0.13975252211093903 2023-01-24 01:12:00.470495: step: 378/463, loss: 0.3936381936073303 2023-01-24 01:12:01.124447: step: 380/463, loss: 0.2021259069442749 2023-01-24 01:12:01.752208: step: 382/463, loss: 0.7655689716339111 2023-01-24 01:12:02.337131: step: 384/463, loss: 0.2888185977935791 2023-01-24 01:12:03.016140: step: 386/463, loss: 0.1998376101255417 2023-01-24 01:12:03.667096: step: 388/463, loss: 0.22097688913345337 2023-01-24 01:12:04.255317: step: 390/463, loss: 0.45952108502388 2023-01-24 01:12:04.868340: step: 392/463, loss: 0.7932746410369873 2023-01-24 01:12:05.483933: step: 394/463, loss: 0.28967711329460144 2023-01-24 01:12:06.069249: step: 396/463, loss: 0.21227812767028809 2023-01-24 01:12:06.699941: step: 398/463, loss: 0.17929302155971527 2023-01-24 01:12:07.393854: step: 400/463, loss: 0.48571062088012695 2023-01-24 01:12:08.110658: step: 402/463, loss: 0.4316459596157074 2023-01-24 01:12:08.713348: step: 404/463, loss: 0.177780881524086 2023-01-24 01:12:09.330186: step: 406/463, loss: 1.1432218551635742 2023-01-24 01:12:09.940670: step: 408/463, loss: 0.2557734251022339 2023-01-24 01:12:10.483532: step: 410/463, loss: 0.1307230442762375 2023-01-24 01:12:11.098219: step: 412/463, loss: 0.39132463932037354 2023-01-24 01:12:11.829651: step: 414/463, loss: 0.39097917079925537 2023-01-24 01:12:12.472464: step: 416/463, loss: 0.7414718270301819 2023-01-24 01:12:13.030849: step: 418/463, loss: 0.304313600063324 2023-01-24 01:12:13.624892: step: 420/463, loss: 0.15967613458633423 2023-01-24 01:12:14.280701: step: 422/463, loss: 0.5787858366966248 2023-01-24 01:12:15.026265: step: 424/463, loss: 0.6100149154663086 2023-01-24 01:12:15.622078: step: 426/463, loss: 0.7994292974472046 2023-01-24 01:12:16.180970: step: 428/463, loss: 0.28759124875068665 2023-01-24 01:12:16.909295: step: 430/463, loss: 0.8480523824691772 2023-01-24 01:12:17.504279: step: 432/463, loss: 0.6292333006858826 2023-01-24 01:12:18.148954: step: 434/463, loss: 0.4309714734554291 2023-01-24 01:12:18.785630: step: 436/463, loss: 0.21160286664962769 2023-01-24 01:12:19.364969: step: 438/463, loss: 0.3637198805809021 2023-01-24 01:12:19.900958: step: 440/463, loss: 0.13538244366645813 2023-01-24 01:12:20.511562: step: 442/463, loss: 0.07438520342111588 2023-01-24 01:12:21.062699: step: 444/463, loss: 0.2561189532279968 2023-01-24 01:12:21.689928: step: 446/463, loss: 3.0319862365722656 2023-01-24 01:12:22.313679: step: 448/463, loss: 0.4105055332183838 2023-01-24 01:12:22.926893: step: 450/463, loss: 0.18965764343738556 2023-01-24 01:12:23.571207: step: 452/463, loss: 0.6696047186851501 2023-01-24 01:12:24.223316: step: 454/463, loss: 0.5856834053993225 2023-01-24 01:12:24.809152: step: 456/463, loss: 0.3066183924674988 2023-01-24 01:12:25.375528: step: 458/463, loss: 0.42356032133102417 2023-01-24 01:12:26.012002: step: 460/463, loss: 0.4760408401489258 2023-01-24 01:12:26.608252: step: 462/463, loss: 0.7641807794570923 2023-01-24 01:12:27.287297: step: 464/463, loss: 0.5540781617164612 2023-01-24 01:12:27.926832: step: 466/463, loss: 0.4628383219242096 2023-01-24 01:12:28.522933: step: 468/463, loss: 0.406451553106308 2023-01-24 01:12:29.133211: step: 470/463, loss: 0.45568588376045227 2023-01-24 01:12:29.743198: step: 472/463, loss: 0.3280514180660248 2023-01-24 01:12:30.421336: step: 474/463, loss: 0.08937735110521317 2023-01-24 01:12:31.031442: step: 476/463, loss: 0.23692870140075684 2023-01-24 01:12:31.626279: step: 478/463, loss: 0.3718286156654358 2023-01-24 01:12:32.221379: step: 480/463, loss: 0.2191193401813507 2023-01-24 01:12:32.783160: step: 482/463, loss: 0.13248218595981598 2023-01-24 01:12:33.422709: step: 484/463, loss: 0.27719196677207947 2023-01-24 01:12:34.102551: step: 486/463, loss: 0.37342092394828796 2023-01-24 01:12:34.727859: step: 488/463, loss: 0.13320937752723694 2023-01-24 01:12:35.336750: step: 490/463, loss: 0.40785008668899536 2023-01-24 01:12:36.009996: step: 492/463, loss: 0.24955704808235168 2023-01-24 01:12:36.672730: step: 494/463, loss: 0.6893935799598694 2023-01-24 01:12:37.242991: step: 496/463, loss: 0.2156197875738144 2023-01-24 01:12:37.868626: step: 498/463, loss: 0.26435020565986633 2023-01-24 01:12:38.470078: step: 500/463, loss: 0.7202986478805542 2023-01-24 01:12:39.087652: step: 502/463, loss: 0.388916015625 2023-01-24 01:12:39.670480: step: 504/463, loss: 0.23769189417362213 2023-01-24 01:12:40.264770: step: 506/463, loss: 0.24594120681285858 2023-01-24 01:12:40.872632: step: 508/463, loss: 0.3116559386253357 2023-01-24 01:12:41.486156: step: 510/463, loss: 0.6120627522468567 2023-01-24 01:12:42.075244: step: 512/463, loss: 0.13350899517536163 2023-01-24 01:12:42.700666: step: 514/463, loss: 0.5492000579833984 2023-01-24 01:12:43.263325: step: 516/463, loss: 0.2865014374256134 2023-01-24 01:12:43.829085: step: 518/463, loss: 0.24633845686912537 2023-01-24 01:12:44.525126: step: 520/463, loss: 0.3166522681713104 2023-01-24 01:12:45.136977: step: 522/463, loss: 0.4371377229690552 2023-01-24 01:12:45.823448: step: 524/463, loss: 0.40074044466018677 2023-01-24 01:12:46.454362: step: 526/463, loss: 0.46855199337005615 2023-01-24 01:12:47.099943: step: 528/463, loss: 3.86938214302063 2023-01-24 01:12:47.737001: step: 530/463, loss: 0.19978971779346466 2023-01-24 01:12:48.365562: step: 532/463, loss: 0.4813389480113983 2023-01-24 01:12:48.973258: step: 534/463, loss: 0.6236270070075989 2023-01-24 01:12:49.641364: step: 536/463, loss: 0.16415177285671234 2023-01-24 01:12:50.296681: step: 538/463, loss: 0.1210830807685852 2023-01-24 01:12:50.898157: step: 540/463, loss: 0.980380117893219 2023-01-24 01:12:51.569907: step: 542/463, loss: 0.20416581630706787 2023-01-24 01:12:52.198892: step: 544/463, loss: 0.6928606033325195 2023-01-24 01:12:52.907557: step: 546/463, loss: 0.23601214587688446 2023-01-24 01:12:53.531510: step: 548/463, loss: 0.8512402176856995 2023-01-24 01:12:54.187151: step: 550/463, loss: 0.2258833348751068 2023-01-24 01:12:54.844092: step: 552/463, loss: 0.8820619583129883 2023-01-24 01:12:55.461450: step: 554/463, loss: 0.34149032831192017 2023-01-24 01:12:56.134776: step: 556/463, loss: 0.6171802282333374 2023-01-24 01:12:56.760266: step: 558/463, loss: 0.36615365743637085 2023-01-24 01:12:57.327015: step: 560/463, loss: 0.14724135398864746 2023-01-24 01:12:57.916920: step: 562/463, loss: 4.725128173828125 2023-01-24 01:12:58.550188: step: 564/463, loss: 0.20555773377418518 2023-01-24 01:12:59.209129: step: 566/463, loss: 0.31372949481010437 2023-01-24 01:12:59.837681: step: 568/463, loss: 0.2661688029766083 2023-01-24 01:13:00.465120: step: 570/463, loss: 0.49571532011032104 2023-01-24 01:13:01.072737: step: 572/463, loss: 0.7472081184387207 2023-01-24 01:13:01.742384: step: 574/463, loss: 0.14258597791194916 2023-01-24 01:13:02.347901: step: 576/463, loss: 0.06764918565750122 2023-01-24 01:13:03.024651: step: 578/463, loss: 0.618348240852356 2023-01-24 01:13:03.626075: step: 580/463, loss: 0.5996707677841187 2023-01-24 01:13:04.276422: step: 582/463, loss: 0.25724029541015625 2023-01-24 01:13:04.910276: step: 584/463, loss: 0.26397761702537537 2023-01-24 01:13:05.534898: step: 586/463, loss: 0.2698228359222412 2023-01-24 01:13:06.094269: step: 588/463, loss: 0.44326046109199524 2023-01-24 01:13:06.698424: step: 590/463, loss: 0.40149739384651184 2023-01-24 01:13:07.267885: step: 592/463, loss: 0.08317933976650238 2023-01-24 01:13:07.916355: step: 594/463, loss: 0.19514963030815125 2023-01-24 01:13:08.506494: step: 596/463, loss: 0.08358833938837051 2023-01-24 01:13:09.297245: step: 598/463, loss: 0.18622425198554993 2023-01-24 01:13:09.915622: step: 600/463, loss: 0.6685605645179749 2023-01-24 01:13:10.609380: step: 602/463, loss: 0.2858685255050659 2023-01-24 01:13:11.237706: step: 604/463, loss: 1.4778695106506348 2023-01-24 01:13:11.885460: step: 606/463, loss: 0.21214744448661804 2023-01-24 01:13:12.571584: step: 608/463, loss: 0.5130025148391724 2023-01-24 01:13:13.240501: step: 610/463, loss: 0.29039013385772705 2023-01-24 01:13:13.867732: step: 612/463, loss: 0.18156082928180695 2023-01-24 01:13:14.481610: step: 614/463, loss: 2.660917043685913 2023-01-24 01:13:15.120020: step: 616/463, loss: 0.6312980055809021 2023-01-24 01:13:15.733007: step: 618/463, loss: 0.5098439455032349 2023-01-24 01:13:16.316587: step: 620/463, loss: 0.3204038441181183 2023-01-24 01:13:16.885030: step: 622/463, loss: 0.5286929607391357 2023-01-24 01:13:17.498554: step: 624/463, loss: 0.2748023271560669 2023-01-24 01:13:18.129620: step: 626/463, loss: 0.7639045119285583 2023-01-24 01:13:18.695177: step: 628/463, loss: 0.6020656824111938 2023-01-24 01:13:19.338867: step: 630/463, loss: 1.1166795492172241 2023-01-24 01:13:19.975015: step: 632/463, loss: 0.2037794440984726 2023-01-24 01:13:20.535077: step: 634/463, loss: 0.1382436454296112 2023-01-24 01:13:21.098969: step: 636/463, loss: 0.1174374520778656 2023-01-24 01:13:21.667762: step: 638/463, loss: 0.38640671968460083 2023-01-24 01:13:22.325305: step: 640/463, loss: 0.6320656538009644 2023-01-24 01:13:22.948188: step: 642/463, loss: 0.4496777653694153 2023-01-24 01:13:23.502955: step: 644/463, loss: 0.17578867077827454 2023-01-24 01:13:24.136944: step: 646/463, loss: 0.9669739007949829 2023-01-24 01:13:24.728756: step: 648/463, loss: 0.5131873488426208 2023-01-24 01:13:25.375006: step: 650/463, loss: 0.1610749065876007 2023-01-24 01:13:26.013382: step: 652/463, loss: 1.2972561120986938 2023-01-24 01:13:26.633189: step: 654/463, loss: 0.13698109984397888 2023-01-24 01:13:27.262002: step: 656/463, loss: 0.21357330679893494 2023-01-24 01:13:27.862574: step: 658/463, loss: 0.24299253523349762 2023-01-24 01:13:28.477500: step: 660/463, loss: 0.245240718126297 2023-01-24 01:13:29.083130: step: 662/463, loss: 0.39379072189331055 2023-01-24 01:13:29.693859: step: 664/463, loss: 1.096177339553833 2023-01-24 01:13:30.333709: step: 666/463, loss: 1.2380033731460571 2023-01-24 01:13:30.915924: step: 668/463, loss: 0.40676596760749817 2023-01-24 01:13:31.546550: step: 670/463, loss: 2.1788182258605957 2023-01-24 01:13:32.151212: step: 672/463, loss: 0.161956325173378 2023-01-24 01:13:32.776772: step: 674/463, loss: 0.2692037522792816 2023-01-24 01:13:33.358037: step: 676/463, loss: 0.3344259560108185 2023-01-24 01:13:34.008679: step: 678/463, loss: 0.4161405563354492 2023-01-24 01:13:34.665875: step: 680/463, loss: 1.0605838298797607 2023-01-24 01:13:35.220194: step: 682/463, loss: 0.35379353165626526 2023-01-24 01:13:35.861590: step: 684/463, loss: 0.5773361325263977 2023-01-24 01:13:36.527226: step: 686/463, loss: 0.6317059397697449 2023-01-24 01:13:37.163807: step: 688/463, loss: 0.5077909231185913 2023-01-24 01:13:37.819939: step: 690/463, loss: 0.653782308101654 2023-01-24 01:13:38.448867: step: 692/463, loss: 0.34692201018333435 2023-01-24 01:13:39.051205: step: 694/463, loss: 0.14859025180339813 2023-01-24 01:13:39.670341: step: 696/463, loss: 0.3697377145290375 2023-01-24 01:13:40.401276: step: 698/463, loss: 0.19919483363628387 2023-01-24 01:13:41.092719: step: 700/463, loss: 0.1356484293937683 2023-01-24 01:13:41.745652: step: 702/463, loss: 0.34258559346199036 2023-01-24 01:13:42.402028: step: 704/463, loss: 0.2163543403148651 2023-01-24 01:13:43.008790: step: 706/463, loss: 0.18497994542121887 2023-01-24 01:13:43.691140: step: 708/463, loss: 0.17582876980304718 2023-01-24 01:13:44.354544: step: 710/463, loss: 0.42442822456359863 2023-01-24 01:13:44.993791: step: 712/463, loss: 0.1025143638253212 2023-01-24 01:13:45.571454: step: 714/463, loss: 0.2363549917936325 2023-01-24 01:13:46.219084: step: 716/463, loss: 0.2981453835964203 2023-01-24 01:13:46.846499: step: 718/463, loss: 0.120298370718956 2023-01-24 01:13:47.449150: step: 720/463, loss: 0.8736783862113953 2023-01-24 01:13:48.046967: step: 722/463, loss: 1.307024359703064 2023-01-24 01:13:48.689009: step: 724/463, loss: 0.44082021713256836 2023-01-24 01:13:49.351105: step: 726/463, loss: 0.3198314607143402 2023-01-24 01:13:49.985875: step: 728/463, loss: 0.5363556146621704 2023-01-24 01:13:50.649835: step: 730/463, loss: 0.7009714841842651 2023-01-24 01:13:51.305664: step: 732/463, loss: 0.46512967348098755 2023-01-24 01:13:51.952027: step: 734/463, loss: 0.8982319831848145 2023-01-24 01:13:52.529386: step: 736/463, loss: 0.4872097074985504 2023-01-24 01:13:53.120625: step: 738/463, loss: 0.3129192590713501 2023-01-24 01:13:53.766029: step: 740/463, loss: 0.3380332887172699 2023-01-24 01:13:54.374895: step: 742/463, loss: 0.37212640047073364 2023-01-24 01:13:55.044250: step: 744/463, loss: 0.36790990829467773 2023-01-24 01:13:55.686768: step: 746/463, loss: 0.5156781673431396 2023-01-24 01:13:56.382348: step: 748/463, loss: 0.16620518267154694 2023-01-24 01:13:56.970973: step: 750/463, loss: 0.3536047339439392 2023-01-24 01:13:57.664115: step: 752/463, loss: 0.3581531047821045 2023-01-24 01:13:58.233065: step: 754/463, loss: 0.29618600010871887 2023-01-24 01:13:58.879602: step: 756/463, loss: 0.41566455364227295 2023-01-24 01:13:59.526371: step: 758/463, loss: 0.1523197591304779 2023-01-24 01:14:00.155525: step: 760/463, loss: 0.24861012399196625 2023-01-24 01:14:00.770076: step: 762/463, loss: 0.14960232377052307 2023-01-24 01:14:01.379339: step: 764/463, loss: 0.37326762080192566 2023-01-24 01:14:01.984243: step: 766/463, loss: 1.032442569732666 2023-01-24 01:14:02.574737: step: 768/463, loss: 0.24615058302879333 2023-01-24 01:14:03.209971: step: 770/463, loss: 0.379844605922699 2023-01-24 01:14:03.876189: step: 772/463, loss: 0.3463161587715149 2023-01-24 01:14:04.587068: step: 774/463, loss: 0.12175416946411133 2023-01-24 01:14:05.290804: step: 776/463, loss: 0.34933358430862427 2023-01-24 01:14:05.931259: step: 778/463, loss: 0.6857178807258606 2023-01-24 01:14:06.507344: step: 780/463, loss: 0.10660197585821152 2023-01-24 01:14:07.106939: step: 782/463, loss: 0.140287846326828 2023-01-24 01:14:07.752021: step: 784/463, loss: 0.9389126300811768 2023-01-24 01:14:08.337255: step: 786/463, loss: 0.3757321536540985 2023-01-24 01:14:09.083665: step: 788/463, loss: 0.26747259497642517 2023-01-24 01:14:09.696370: step: 790/463, loss: 1.1871981620788574 2023-01-24 01:14:10.348198: step: 792/463, loss: 0.16555778682231903 2023-01-24 01:14:10.894194: step: 794/463, loss: 0.4520220160484314 2023-01-24 01:14:11.466069: step: 796/463, loss: 0.28253278136253357 2023-01-24 01:14:12.178738: step: 798/463, loss: 0.16808001697063446 2023-01-24 01:14:12.774038: step: 800/463, loss: 1.061667799949646 2023-01-24 01:14:13.438219: step: 802/463, loss: 0.5352565050125122 2023-01-24 01:14:14.146029: step: 804/463, loss: 1.347078561782837 2023-01-24 01:14:14.764658: step: 806/463, loss: 1.5225313901901245 2023-01-24 01:14:15.388333: step: 808/463, loss: 0.6356695890426636 2023-01-24 01:14:15.993693: step: 810/463, loss: 0.1352871209383011 2023-01-24 01:14:16.656359: step: 812/463, loss: 0.3311280608177185 2023-01-24 01:14:17.319334: step: 814/463, loss: 0.36845797300338745 2023-01-24 01:14:17.942711: step: 816/463, loss: 0.22509220242500305 2023-01-24 01:14:18.609092: step: 818/463, loss: 0.6035776138305664 2023-01-24 01:14:19.249899: step: 820/463, loss: 0.17270228266716003 2023-01-24 01:14:19.857376: step: 822/463, loss: 0.1980770230293274 2023-01-24 01:14:20.484174: step: 824/463, loss: 0.9872917532920837 2023-01-24 01:14:21.086842: step: 826/463, loss: 0.24498605728149414 2023-01-24 01:14:21.721736: step: 828/463, loss: 0.15664802491664886 2023-01-24 01:14:22.270870: step: 830/463, loss: 0.7998246550559998 2023-01-24 01:14:22.907119: step: 832/463, loss: 0.21807464957237244 2023-01-24 01:14:23.483492: step: 834/463, loss: 0.13048501312732697 2023-01-24 01:14:24.128390: step: 836/463, loss: 0.24473729729652405 2023-01-24 01:14:24.720447: step: 838/463, loss: 0.33657675981521606 2023-01-24 01:14:25.241042: step: 840/463, loss: 0.1961754411458969 2023-01-24 01:14:25.864076: step: 842/463, loss: 1.0098419189453125 2023-01-24 01:14:26.584598: step: 844/463, loss: 1.0318102836608887 2023-01-24 01:14:27.294301: step: 846/463, loss: 0.14499063789844513 2023-01-24 01:14:27.899083: step: 848/463, loss: 0.7830240726470947 2023-01-24 01:14:28.465346: step: 850/463, loss: 1.417130470275879 2023-01-24 01:14:29.102578: step: 852/463, loss: 0.18718506395816803 2023-01-24 01:14:29.718521: step: 854/463, loss: 0.15253271162509918 2023-01-24 01:14:30.369380: step: 856/463, loss: 0.29637497663497925 2023-01-24 01:14:30.935668: step: 858/463, loss: 0.40681684017181396 2023-01-24 01:14:31.472484: step: 860/463, loss: 0.26153290271759033 2023-01-24 01:14:32.121955: step: 862/463, loss: 0.3760908544063568 2023-01-24 01:14:32.717843: step: 864/463, loss: 0.29465341567993164 2023-01-24 01:14:33.335503: step: 866/463, loss: 0.394317090511322 2023-01-24 01:14:33.940776: step: 868/463, loss: 0.23566144704818726 2023-01-24 01:14:34.556253: step: 870/463, loss: 0.4286607503890991 2023-01-24 01:14:35.196451: step: 872/463, loss: 0.3694833815097809 2023-01-24 01:14:35.832804: step: 874/463, loss: 0.16438595950603485 2023-01-24 01:14:36.479490: step: 876/463, loss: 0.43546831607818604 2023-01-24 01:14:37.123094: step: 878/463, loss: 0.6398623585700989 2023-01-24 01:14:37.768594: step: 880/463, loss: 0.48116743564605713 2023-01-24 01:14:38.440576: step: 882/463, loss: 0.40169209241867065 2023-01-24 01:14:39.030854: step: 884/463, loss: 1.7779492139816284 2023-01-24 01:14:39.732121: step: 886/463, loss: 0.6085631847381592 2023-01-24 01:14:40.383393: step: 888/463, loss: 0.23559829592704773 2023-01-24 01:14:41.044021: step: 890/463, loss: 0.512502908706665 2023-01-24 01:14:41.692459: step: 892/463, loss: 0.43436652421951294 2023-01-24 01:14:42.297738: step: 894/463, loss: 0.2656591236591339 2023-01-24 01:14:42.927548: step: 896/463, loss: 0.5442951917648315 2023-01-24 01:14:43.510769: step: 898/463, loss: 0.10734881460666656 2023-01-24 01:14:44.122502: step: 900/463, loss: 1.857844352722168 2023-01-24 01:14:44.696168: step: 902/463, loss: 0.4055730700492859 2023-01-24 01:14:45.346673: step: 904/463, loss: 0.27678850293159485 2023-01-24 01:14:45.939725: step: 906/463, loss: 0.20790454745292664 2023-01-24 01:14:46.560141: step: 908/463, loss: 0.3623642325401306 2023-01-24 01:14:47.221068: step: 910/463, loss: 0.9539350271224976 2023-01-24 01:14:47.855524: step: 912/463, loss: 0.4681026041507721 2023-01-24 01:14:48.447653: step: 914/463, loss: 0.09791918098926544 2023-01-24 01:14:49.067643: step: 916/463, loss: 0.5666313171386719 2023-01-24 01:14:49.693504: step: 918/463, loss: 0.48329466581344604 2023-01-24 01:14:50.313711: step: 920/463, loss: 0.6062929630279541 2023-01-24 01:14:50.927395: step: 922/463, loss: 0.23412644863128662 2023-01-24 01:14:51.607971: step: 924/463, loss: 0.2843016982078552 2023-01-24 01:14:52.332161: step: 926/463, loss: 0.19190752506256104 ================================================== Loss: 0.477 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.305868477710233, 'r': 0.3273431146652209, 'f1': 0.3162416524813408}, 'combined': 0.23302016498625108, 'epoch': 8} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3440663534487102, 'r': 0.30203381463298645, 'f1': 0.3216828546184038}, 'combined': 0.22839482677906667, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31310003840245776, 'r': 0.3315176877202494, 'f1': 0.32204575378538514}, 'combined': 0.23729687121028378, 'epoch': 8} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36057209418915465, 'r': 0.28914987308885964, 'f1': 0.32093535550589164}, 'combined': 0.22786410240918306, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23333333333333334, 'r': 0.30434782608695654, 'f1': 0.26415094339622636}, 'combined': 0.13207547169811318, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3076923076923077, 'r': 0.13793103448275862, 'f1': 0.1904761904761905}, 'combined': 0.12698412698412698, 'epoch': 8} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3064311346548189, 'r': 0.27619504546686713, 'f1': 0.2905285208803173}, 'combined': 0.2140736469644443, 'epoch': 4} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35963387992050416, 'r': 0.24527593026724218, 'f1': 0.29164524353057414}, 'combined': 0.20706812290670762, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34615384615384615, 'r': 0.391304347826087, 'f1': 0.36734693877551017}, 'combined': 0.18367346938775508, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:17:37.802436: step: 2/463, loss: 0.2228008657693863 2023-01-24 01:17:38.458299: step: 4/463, loss: 0.42538008093833923 2023-01-24 01:17:39.043683: step: 6/463, loss: 0.15729910135269165 2023-01-24 01:17:39.592207: step: 8/463, loss: 0.14772729575634003 2023-01-24 01:17:40.299599: step: 10/463, loss: 0.31918972730636597 2023-01-24 01:17:40.888421: step: 12/463, loss: 0.15498241782188416 2023-01-24 01:17:41.493784: step: 14/463, loss: 0.19670912623405457 2023-01-24 01:17:42.172257: step: 16/463, loss: 0.22724421322345734 2023-01-24 01:17:42.799700: step: 18/463, loss: 0.41790473461151123 2023-01-24 01:17:43.446227: step: 20/463, loss: 0.19328801333904266 2023-01-24 01:17:44.046848: step: 22/463, loss: 0.20779888331890106 2023-01-24 01:17:44.749442: step: 24/463, loss: 0.6936038732528687 2023-01-24 01:17:45.428728: step: 26/463, loss: 0.7225140929222107 2023-01-24 01:17:46.133802: step: 28/463, loss: 0.24356356263160706 2023-01-24 01:17:46.712859: step: 30/463, loss: 0.25411301851272583 2023-01-24 01:17:47.355595: step: 32/463, loss: 0.11846842616796494 2023-01-24 01:17:47.972499: step: 34/463, loss: 0.25899538397789 2023-01-24 01:17:48.606050: step: 36/463, loss: 0.20825208723545074 2023-01-24 01:17:49.182285: step: 38/463, loss: 0.22589340806007385 2023-01-24 01:17:49.845162: step: 40/463, loss: 0.12709973752498627 2023-01-24 01:17:50.516871: step: 42/463, loss: 0.21221649646759033 2023-01-24 01:17:51.120294: step: 44/463, loss: 0.1629675328731537 2023-01-24 01:17:51.701671: step: 46/463, loss: 0.38636088371276855 2023-01-24 01:17:52.378276: step: 48/463, loss: 0.31967154145240784 2023-01-24 01:17:52.991954: step: 50/463, loss: 5.306634902954102 2023-01-24 01:17:53.558405: step: 52/463, loss: 0.33300015330314636 2023-01-24 01:17:54.204795: step: 54/463, loss: 0.2448233664035797 2023-01-24 01:17:54.829727: step: 56/463, loss: 0.12510181963443756 2023-01-24 01:17:55.478320: step: 58/463, loss: 0.18100319802761078 2023-01-24 01:17:56.052716: step: 60/463, loss: 0.7753440141677856 2023-01-24 01:17:56.725480: step: 62/463, loss: 0.11995119601488113 2023-01-24 01:17:57.362657: step: 64/463, loss: 0.15060070157051086 2023-01-24 01:17:57.959570: step: 66/463, loss: 0.26481321454048157 2023-01-24 01:17:58.623648: step: 68/463, loss: 0.14438419044017792 2023-01-24 01:17:59.251562: step: 70/463, loss: 0.7643477320671082 2023-01-24 01:17:59.854133: step: 72/463, loss: 0.19118031859397888 2023-01-24 01:18:00.464214: step: 74/463, loss: 0.18241563439369202 2023-01-24 01:18:01.063451: step: 76/463, loss: 0.293626993894577 2023-01-24 01:18:01.622167: step: 78/463, loss: 0.1509246528148651 2023-01-24 01:18:02.162025: step: 80/463, loss: 0.5545043349266052 2023-01-24 01:18:02.708173: step: 82/463, loss: 0.21407639980316162 2023-01-24 01:18:03.308396: step: 84/463, loss: 0.5662803649902344 2023-01-24 01:18:03.946536: step: 86/463, loss: 0.1961895376443863 2023-01-24 01:18:04.544475: step: 88/463, loss: 0.13479551672935486 2023-01-24 01:18:05.190474: step: 90/463, loss: 0.1574918031692505 2023-01-24 01:18:05.769571: step: 92/463, loss: 0.26465293765068054 2023-01-24 01:18:06.385638: step: 94/463, loss: 0.5740188360214233 2023-01-24 01:18:06.989608: step: 96/463, loss: 1.684244990348816 2023-01-24 01:18:07.589591: step: 98/463, loss: 0.3046642541885376 2023-01-24 01:18:08.219748: step: 100/463, loss: 0.26488715410232544 2023-01-24 01:18:08.865900: step: 102/463, loss: 0.2747839093208313 2023-01-24 01:18:09.589675: step: 104/463, loss: 0.4031190872192383 2023-01-24 01:18:10.151565: step: 106/463, loss: 0.3369276523590088 2023-01-24 01:18:10.728100: step: 108/463, loss: 0.23848536610603333 2023-01-24 01:18:11.328691: step: 110/463, loss: 0.5717326402664185 2023-01-24 01:18:11.908611: step: 112/463, loss: 0.1948515772819519 2023-01-24 01:18:12.505246: step: 114/463, loss: 0.1334758996963501 2023-01-24 01:18:13.188205: step: 116/463, loss: 0.15821677446365356 2023-01-24 01:18:13.800840: step: 118/463, loss: 0.2576925754547119 2023-01-24 01:18:14.417885: step: 120/463, loss: 0.3984506130218506 2023-01-24 01:18:15.157191: step: 122/463, loss: 0.4669053554534912 2023-01-24 01:18:15.848950: step: 124/463, loss: 0.6709992289543152 2023-01-24 01:18:16.431892: step: 126/463, loss: 1.191402792930603 2023-01-24 01:18:17.077510: step: 128/463, loss: 0.18675921857357025 2023-01-24 01:18:17.736611: step: 130/463, loss: 0.3111220896244049 2023-01-24 01:18:18.396890: step: 132/463, loss: 0.2078704982995987 2023-01-24 01:18:19.057018: step: 134/463, loss: 0.3205476403236389 2023-01-24 01:18:19.734493: step: 136/463, loss: 0.1422797441482544 2023-01-24 01:18:20.275688: step: 138/463, loss: 0.281350314617157 2023-01-24 01:18:20.898283: step: 140/463, loss: 0.750588595867157 2023-01-24 01:18:21.499770: step: 142/463, loss: 0.4838573932647705 2023-01-24 01:18:22.088401: step: 144/463, loss: 0.7459380030632019 2023-01-24 01:18:22.723269: step: 146/463, loss: 0.2160051465034485 2023-01-24 01:18:23.381603: step: 148/463, loss: 0.2352786511182785 2023-01-24 01:18:23.928808: step: 150/463, loss: 0.2472989410161972 2023-01-24 01:18:24.471407: step: 152/463, loss: 0.28077736496925354 2023-01-24 01:18:25.202636: step: 154/463, loss: 0.615119993686676 2023-01-24 01:18:25.793776: step: 156/463, loss: 0.3916129469871521 2023-01-24 01:18:26.397231: step: 158/463, loss: 0.334790974855423 2023-01-24 01:18:27.057245: step: 160/463, loss: 0.16241268813610077 2023-01-24 01:18:27.703135: step: 162/463, loss: 0.4995151460170746 2023-01-24 01:18:28.349913: step: 164/463, loss: 0.1919601410627365 2023-01-24 01:18:28.940341: step: 166/463, loss: 0.17928315699100494 2023-01-24 01:18:29.569748: step: 168/463, loss: 0.035166069865226746 2023-01-24 01:18:30.200285: step: 170/463, loss: 0.3764524757862091 2023-01-24 01:18:30.840032: step: 172/463, loss: 2.431833267211914 2023-01-24 01:18:31.439399: step: 174/463, loss: 0.255334734916687 2023-01-24 01:18:32.029795: step: 176/463, loss: 0.390529602766037 2023-01-24 01:18:32.622323: step: 178/463, loss: 0.22276625037193298 2023-01-24 01:18:33.346401: step: 180/463, loss: 0.2155236005783081 2023-01-24 01:18:33.920827: step: 182/463, loss: 0.18070781230926514 2023-01-24 01:18:34.585139: step: 184/463, loss: 0.12680552899837494 2023-01-24 01:18:35.156654: step: 186/463, loss: 0.166420117020607 2023-01-24 01:18:35.752832: step: 188/463, loss: 1.2608935832977295 2023-01-24 01:18:36.318333: step: 190/463, loss: 0.21768110990524292 2023-01-24 01:18:37.052382: step: 192/463, loss: 0.3063470423221588 2023-01-24 01:18:37.644977: step: 194/463, loss: 0.08596242964267731 2023-01-24 01:18:38.321689: step: 196/463, loss: 0.17139321565628052 2023-01-24 01:18:38.933331: step: 198/463, loss: 0.1535470336675644 2023-01-24 01:18:39.606724: step: 200/463, loss: 0.24380038678646088 2023-01-24 01:18:40.208346: step: 202/463, loss: 0.12761950492858887 2023-01-24 01:18:40.779820: step: 204/463, loss: 0.05873498320579529 2023-01-24 01:18:41.415220: step: 206/463, loss: 0.7502644062042236 2023-01-24 01:18:42.030555: step: 208/463, loss: 0.11838825792074203 2023-01-24 01:18:42.685124: step: 210/463, loss: 0.4137934148311615 2023-01-24 01:18:43.417832: step: 212/463, loss: 0.25922486186027527 2023-01-24 01:18:44.095751: step: 214/463, loss: 0.2043389081954956 2023-01-24 01:18:44.701501: step: 216/463, loss: 0.2906085252761841 2023-01-24 01:18:45.452136: step: 218/463, loss: 0.3389670252799988 2023-01-24 01:18:46.061264: step: 220/463, loss: 0.13579672574996948 2023-01-24 01:18:46.747272: step: 222/463, loss: 0.3806457221508026 2023-01-24 01:18:47.359561: step: 224/463, loss: 0.1581679880619049 2023-01-24 01:18:48.007854: step: 226/463, loss: 0.26383718848228455 2023-01-24 01:18:48.692043: step: 228/463, loss: 0.14241524040699005 2023-01-24 01:18:49.351710: step: 230/463, loss: 0.6941490173339844 2023-01-24 01:18:49.994275: step: 232/463, loss: 0.24720457196235657 2023-01-24 01:18:50.536507: step: 234/463, loss: 0.40441545844078064 2023-01-24 01:18:51.190960: step: 236/463, loss: 0.27150121331214905 2023-01-24 01:18:51.812614: step: 238/463, loss: 0.46524500846862793 2023-01-24 01:18:52.400476: step: 240/463, loss: 0.3357931971549988 2023-01-24 01:18:53.003535: step: 242/463, loss: 0.10618637502193451 2023-01-24 01:18:53.663857: step: 244/463, loss: 1.0408345460891724 2023-01-24 01:18:54.262824: step: 246/463, loss: 0.6330245137214661 2023-01-24 01:18:54.963505: step: 248/463, loss: 0.2870582938194275 2023-01-24 01:18:55.619093: step: 250/463, loss: 0.6080569624900818 2023-01-24 01:18:56.263395: step: 252/463, loss: 0.16866326332092285 2023-01-24 01:18:56.861654: step: 254/463, loss: 0.6184661388397217 2023-01-24 01:18:57.480280: step: 256/463, loss: 0.3337157070636749 2023-01-24 01:18:58.107975: step: 258/463, loss: 0.3145408034324646 2023-01-24 01:18:58.733725: step: 260/463, loss: 0.24604161083698273 2023-01-24 01:18:59.388705: step: 262/463, loss: 0.45525941252708435 2023-01-24 01:18:59.994462: step: 264/463, loss: 0.12283444404602051 2023-01-24 01:19:00.609367: step: 266/463, loss: 0.6228953003883362 2023-01-24 01:19:01.207651: step: 268/463, loss: 0.22398529946804047 2023-01-24 01:19:01.845289: step: 270/463, loss: 0.388765811920166 2023-01-24 01:19:02.425174: step: 272/463, loss: 0.20893023908138275 2023-01-24 01:19:02.996784: step: 274/463, loss: 0.24452008306980133 2023-01-24 01:19:03.645139: step: 276/463, loss: 0.2382676899433136 2023-01-24 01:19:04.285493: step: 278/463, loss: 0.1275937855243683 2023-01-24 01:19:04.916121: step: 280/463, loss: 0.3993491530418396 2023-01-24 01:19:05.525722: step: 282/463, loss: 0.11395271867513657 2023-01-24 01:19:06.182717: step: 284/463, loss: 3.4581658840179443 2023-01-24 01:19:06.857143: step: 286/463, loss: 0.21429359912872314 2023-01-24 01:19:07.569634: step: 288/463, loss: 0.3442201316356659 2023-01-24 01:19:08.262686: step: 290/463, loss: 0.309764564037323 2023-01-24 01:19:08.886327: step: 292/463, loss: 0.2859235405921936 2023-01-24 01:19:09.453419: step: 294/463, loss: 0.4181315302848816 2023-01-24 01:19:10.120344: step: 296/463, loss: 0.7402014136314392 2023-01-24 01:19:10.688116: step: 298/463, loss: 0.30835407972335815 2023-01-24 01:19:11.304337: step: 300/463, loss: 0.1339174509048462 2023-01-24 01:19:11.972313: step: 302/463, loss: 0.1674123853445053 2023-01-24 01:19:12.642216: step: 304/463, loss: 0.4925304055213928 2023-01-24 01:19:13.245970: step: 306/463, loss: 0.3685706853866577 2023-01-24 01:19:13.871250: step: 308/463, loss: 0.3533685505390167 2023-01-24 01:19:14.525702: step: 310/463, loss: 2.284857749938965 2023-01-24 01:19:15.138859: step: 312/463, loss: 0.13572295010089874 2023-01-24 01:19:15.745504: step: 314/463, loss: 0.1681004911661148 2023-01-24 01:19:16.378379: step: 316/463, loss: 0.36551886796951294 2023-01-24 01:19:16.929095: step: 318/463, loss: 0.0928819477558136 2023-01-24 01:19:17.509932: step: 320/463, loss: 0.13877809047698975 2023-01-24 01:19:18.165085: step: 322/463, loss: 0.8390374779701233 2023-01-24 01:19:18.847279: step: 324/463, loss: 0.16404572129249573 2023-01-24 01:19:19.536911: step: 326/463, loss: 0.2808404862880707 2023-01-24 01:19:20.269067: step: 328/463, loss: 0.2824220657348633 2023-01-24 01:19:20.864667: step: 330/463, loss: 0.2887516915798187 2023-01-24 01:19:21.458799: step: 332/463, loss: 0.10095532238483429 2023-01-24 01:19:22.080522: step: 334/463, loss: 1.3969674110412598 2023-01-24 01:19:22.752630: step: 336/463, loss: 0.3673721253871918 2023-01-24 01:19:23.367379: step: 338/463, loss: 0.1307014673948288 2023-01-24 01:19:24.016337: step: 340/463, loss: 0.31993213295936584 2023-01-24 01:19:24.581215: step: 342/463, loss: 0.30238622426986694 2023-01-24 01:19:25.188162: step: 344/463, loss: 0.08877012878656387 2023-01-24 01:19:25.854266: step: 346/463, loss: 0.9750210642814636 2023-01-24 01:19:26.454071: step: 348/463, loss: 5.506454944610596 2023-01-24 01:19:27.117898: step: 350/463, loss: 0.21171796321868896 2023-01-24 01:19:27.820482: step: 352/463, loss: 0.29846805334091187 2023-01-24 01:19:28.413774: step: 354/463, loss: 0.10833857953548431 2023-01-24 01:19:29.022072: step: 356/463, loss: 0.2038392722606659 2023-01-24 01:19:29.651772: step: 358/463, loss: 0.3675570487976074 2023-01-24 01:19:30.288204: step: 360/463, loss: 0.3241778016090393 2023-01-24 01:19:30.844035: step: 362/463, loss: 0.2552442252635956 2023-01-24 01:19:31.509026: step: 364/463, loss: 0.5118953585624695 2023-01-24 01:19:32.171009: step: 366/463, loss: 0.7427288889884949 2023-01-24 01:19:32.748555: step: 368/463, loss: 0.28763559460639954 2023-01-24 01:19:33.367601: step: 370/463, loss: 0.11767619848251343 2023-01-24 01:19:33.951907: step: 372/463, loss: 0.4620572328567505 2023-01-24 01:19:34.653121: step: 374/463, loss: 0.46878889203071594 2023-01-24 01:19:35.259093: step: 376/463, loss: 0.05252925679087639 2023-01-24 01:19:35.876068: step: 378/463, loss: 0.1564519852399826 2023-01-24 01:19:36.495889: step: 380/463, loss: 0.5178438425064087 2023-01-24 01:19:37.136710: step: 382/463, loss: 0.38523590564727783 2023-01-24 01:19:37.729216: step: 384/463, loss: 0.6638131141662598 2023-01-24 01:19:38.333336: step: 386/463, loss: 0.09200496226549149 2023-01-24 01:19:38.933876: step: 388/463, loss: 0.35649675130844116 2023-01-24 01:19:39.567157: step: 390/463, loss: 0.14091196656227112 2023-01-24 01:19:40.304102: step: 392/463, loss: 0.3735799193382263 2023-01-24 01:19:41.033418: step: 394/463, loss: 0.7927460074424744 2023-01-24 01:19:41.680294: step: 396/463, loss: 0.05448054522275925 2023-01-24 01:19:42.356926: step: 398/463, loss: 1.024483561515808 2023-01-24 01:19:42.997302: step: 400/463, loss: 0.14631520211696625 2023-01-24 01:19:43.588203: step: 402/463, loss: 0.28533440828323364 2023-01-24 01:19:44.138684: step: 404/463, loss: 0.16602928936481476 2023-01-24 01:19:44.763115: step: 406/463, loss: 0.327955037355423 2023-01-24 01:19:45.352465: step: 408/463, loss: 0.6565306186676025 2023-01-24 01:19:46.037940: step: 410/463, loss: 0.10340748727321625 2023-01-24 01:19:46.638743: step: 412/463, loss: 0.09518487006425858 2023-01-24 01:19:47.182105: step: 414/463, loss: 0.2090694010257721 2023-01-24 01:19:47.792377: step: 416/463, loss: 0.18851354718208313 2023-01-24 01:19:48.405562: step: 418/463, loss: 0.4341897964477539 2023-01-24 01:19:49.044600: step: 420/463, loss: 0.1682562530040741 2023-01-24 01:19:49.703288: step: 422/463, loss: 0.5487556457519531 2023-01-24 01:19:50.322351: step: 424/463, loss: 0.15600907802581787 2023-01-24 01:19:50.990712: step: 426/463, loss: 0.30814123153686523 2023-01-24 01:19:51.616290: step: 428/463, loss: 0.3562421500682831 2023-01-24 01:19:52.475176: step: 430/463, loss: 0.2553286552429199 2023-01-24 01:19:53.015619: step: 432/463, loss: 0.9165219068527222 2023-01-24 01:19:53.693000: step: 434/463, loss: 0.7144289016723633 2023-01-24 01:19:54.359097: step: 436/463, loss: 0.2043074071407318 2023-01-24 01:19:54.907332: step: 438/463, loss: 0.0899614542722702 2023-01-24 01:19:55.620966: step: 440/463, loss: 0.9613052606582642 2023-01-24 01:19:56.253759: step: 442/463, loss: 0.3111993968486786 2023-01-24 01:19:56.914185: step: 444/463, loss: 0.23450905084609985 2023-01-24 01:19:57.554406: step: 446/463, loss: 0.3186935484409332 2023-01-24 01:19:58.174170: step: 448/463, loss: 0.21560165286064148 2023-01-24 01:19:58.773569: step: 450/463, loss: 0.08718789368867874 2023-01-24 01:19:59.340437: step: 452/463, loss: 0.29815348982810974 2023-01-24 01:19:59.925929: step: 454/463, loss: 0.13146169483661652 2023-01-24 01:20:00.564161: step: 456/463, loss: 0.26177603006362915 2023-01-24 01:20:01.281327: step: 458/463, loss: 0.722028374671936 2023-01-24 01:20:01.882700: step: 460/463, loss: 0.2517569065093994 2023-01-24 01:20:02.472486: step: 462/463, loss: 0.24647130072116852 2023-01-24 01:20:03.051810: step: 464/463, loss: 0.6798186898231506 2023-01-24 01:20:03.668338: step: 466/463, loss: 1.5788246393203735 2023-01-24 01:20:04.235476: step: 468/463, loss: 0.33885258436203003 2023-01-24 01:20:04.855823: step: 470/463, loss: 0.1758035272359848 2023-01-24 01:20:05.475913: step: 472/463, loss: 0.18635722994804382 2023-01-24 01:20:06.096668: step: 474/463, loss: 0.6038302779197693 2023-01-24 01:20:06.753761: step: 476/463, loss: 0.25968557596206665 2023-01-24 01:20:07.379600: step: 478/463, loss: 0.115684874355793 2023-01-24 01:20:07.995765: step: 480/463, loss: 0.32118338346481323 2023-01-24 01:20:08.640814: step: 482/463, loss: 0.752265453338623 2023-01-24 01:20:09.294553: step: 484/463, loss: 0.1738254874944687 2023-01-24 01:20:09.933338: step: 486/463, loss: 1.9769984483718872 2023-01-24 01:20:10.486408: step: 488/463, loss: 0.2456904649734497 2023-01-24 01:20:11.117008: step: 490/463, loss: 0.5687512159347534 2023-01-24 01:20:11.773360: step: 492/463, loss: 0.2267220914363861 2023-01-24 01:20:12.313577: step: 494/463, loss: 1.7658839225769043 2023-01-24 01:20:12.861690: step: 496/463, loss: 0.196818009018898 2023-01-24 01:20:13.462355: step: 498/463, loss: 0.21181552112102509 2023-01-24 01:20:14.066101: step: 500/463, loss: 0.43559393286705017 2023-01-24 01:20:14.741121: step: 502/463, loss: 0.2659333348274231 2023-01-24 01:20:15.342751: step: 504/463, loss: 0.22960197925567627 2023-01-24 01:20:15.968037: step: 506/463, loss: 0.32936570048332214 2023-01-24 01:20:16.586222: step: 508/463, loss: 0.9430136680603027 2023-01-24 01:20:17.209522: step: 510/463, loss: 0.35591650009155273 2023-01-24 01:20:17.815498: step: 512/463, loss: 0.16233564913272858 2023-01-24 01:20:18.394920: step: 514/463, loss: 0.7070266604423523 2023-01-24 01:20:18.960258: step: 516/463, loss: 0.19441160559654236 2023-01-24 01:20:19.572262: step: 518/463, loss: 0.41760697960853577 2023-01-24 01:20:20.181582: step: 520/463, loss: 0.1596154272556305 2023-01-24 01:20:20.865344: step: 522/463, loss: 0.4977467358112335 2023-01-24 01:20:21.512265: step: 524/463, loss: 0.15036801993846893 2023-01-24 01:20:22.101464: step: 526/463, loss: 0.27847564220428467 2023-01-24 01:20:22.700904: step: 528/463, loss: 0.38705310225486755 2023-01-24 01:20:23.353145: step: 530/463, loss: 0.5457172393798828 2023-01-24 01:20:24.002420: step: 532/463, loss: 0.3418586552143097 2023-01-24 01:20:24.565808: step: 534/463, loss: 0.16629938781261444 2023-01-24 01:20:25.177766: step: 536/463, loss: 0.07164991647005081 2023-01-24 01:20:25.738763: step: 538/463, loss: 0.19592878222465515 2023-01-24 01:20:26.323271: step: 540/463, loss: 0.26360878348350525 2023-01-24 01:20:26.972677: step: 542/463, loss: 0.3603450059890747 2023-01-24 01:20:27.590557: step: 544/463, loss: 0.3240545690059662 2023-01-24 01:20:28.182616: step: 546/463, loss: 0.4782842993736267 2023-01-24 01:20:28.861813: step: 548/463, loss: 0.13311883807182312 2023-01-24 01:20:29.544449: step: 550/463, loss: 0.18598465621471405 2023-01-24 01:20:30.215702: step: 552/463, loss: 0.22276514768600464 2023-01-24 01:20:30.854523: step: 554/463, loss: 0.5088359713554382 2023-01-24 01:20:31.482318: step: 556/463, loss: 0.2266351282596588 2023-01-24 01:20:32.111710: step: 558/463, loss: 1.7039029598236084 2023-01-24 01:20:32.773542: step: 560/463, loss: 0.45954686403274536 2023-01-24 01:20:33.397219: step: 562/463, loss: 0.0674513578414917 2023-01-24 01:20:34.089023: step: 564/463, loss: 0.2890896797180176 2023-01-24 01:20:34.672177: step: 566/463, loss: 0.3025360107421875 2023-01-24 01:20:35.241283: step: 568/463, loss: 0.20712748169898987 2023-01-24 01:20:35.820022: step: 570/463, loss: 0.16903318464756012 2023-01-24 01:20:36.458267: step: 572/463, loss: 0.294373482465744 2023-01-24 01:20:37.132055: step: 574/463, loss: 0.3773675262928009 2023-01-24 01:20:37.770929: step: 576/463, loss: 0.26128533482551575 2023-01-24 01:20:38.405822: step: 578/463, loss: 0.2308340221643448 2023-01-24 01:20:39.037550: step: 580/463, loss: 0.2821766436100006 2023-01-24 01:20:39.564137: step: 582/463, loss: 0.1615988314151764 2023-01-24 01:20:40.138299: step: 584/463, loss: 0.3454597592353821 2023-01-24 01:20:40.787553: step: 586/463, loss: 1.0423604249954224 2023-01-24 01:20:41.395688: step: 588/463, loss: 0.19406263530254364 2023-01-24 01:20:41.993888: step: 590/463, loss: 0.47391000390052795 2023-01-24 01:20:42.614063: step: 592/463, loss: 0.30231600999832153 2023-01-24 01:20:43.272494: step: 594/463, loss: 0.18090927600860596 2023-01-24 01:20:43.851151: step: 596/463, loss: 0.08973708003759384 2023-01-24 01:20:44.499821: step: 598/463, loss: 0.37598443031311035 2023-01-24 01:20:45.135271: step: 600/463, loss: 0.9699556231498718 2023-01-24 01:20:45.720129: step: 602/463, loss: 0.7103447914123535 2023-01-24 01:20:46.253653: step: 604/463, loss: 0.4810757040977478 2023-01-24 01:20:46.953818: step: 606/463, loss: 0.2388215959072113 2023-01-24 01:20:47.607365: step: 608/463, loss: 0.28800544142723083 2023-01-24 01:20:48.290423: step: 610/463, loss: 0.7408730387687683 2023-01-24 01:20:48.931360: step: 612/463, loss: 0.16970224678516388 2023-01-24 01:20:49.599282: step: 614/463, loss: 0.23399102687835693 2023-01-24 01:20:50.274361: step: 616/463, loss: 0.9716830849647522 2023-01-24 01:20:50.890280: step: 618/463, loss: 0.21102190017700195 2023-01-24 01:20:51.501595: step: 620/463, loss: 0.30438852310180664 2023-01-24 01:20:52.133179: step: 622/463, loss: 0.3083685636520386 2023-01-24 01:20:52.743524: step: 624/463, loss: 1.4786274433135986 2023-01-24 01:20:53.380957: step: 626/463, loss: 0.7403117418289185 2023-01-24 01:20:53.994691: step: 628/463, loss: 0.2730647325515747 2023-01-24 01:20:54.593408: step: 630/463, loss: 0.5078891515731812 2023-01-24 01:20:55.229461: step: 632/463, loss: 0.3561764657497406 2023-01-24 01:20:55.895950: step: 634/463, loss: 0.17674383521080017 2023-01-24 01:20:56.482172: step: 636/463, loss: 0.3796806335449219 2023-01-24 01:20:57.119694: step: 638/463, loss: 0.494425892829895 2023-01-24 01:20:57.748971: step: 640/463, loss: 0.3575546443462372 2023-01-24 01:20:58.407259: step: 642/463, loss: 0.27355965971946716 2023-01-24 01:20:59.097378: step: 644/463, loss: 0.3931627869606018 2023-01-24 01:20:59.749848: step: 646/463, loss: 0.16954456269741058 2023-01-24 01:21:00.463849: step: 648/463, loss: 0.11751600354909897 2023-01-24 01:21:01.101998: step: 650/463, loss: 0.1768917441368103 2023-01-24 01:21:01.721577: step: 652/463, loss: 0.39474573731422424 2023-01-24 01:21:02.303175: step: 654/463, loss: 0.26157045364379883 2023-01-24 01:21:02.903261: step: 656/463, loss: 0.3054327070713043 2023-01-24 01:21:03.487466: step: 658/463, loss: 1.2896463871002197 2023-01-24 01:21:04.134898: step: 660/463, loss: 1.2262147665023804 2023-01-24 01:21:04.779572: step: 662/463, loss: 0.29632094502449036 2023-01-24 01:21:05.360358: step: 664/463, loss: 0.10360486060380936 2023-01-24 01:21:05.972838: step: 666/463, loss: 0.03450671583414078 2023-01-24 01:21:06.633317: step: 668/463, loss: 0.21669511497020721 2023-01-24 01:21:07.260299: step: 670/463, loss: 0.4017583727836609 2023-01-24 01:21:07.855194: step: 672/463, loss: 0.36537325382232666 2023-01-24 01:21:08.453906: step: 674/463, loss: 0.5199413299560547 2023-01-24 01:21:09.091558: step: 676/463, loss: 0.1591717153787613 2023-01-24 01:21:09.680286: step: 678/463, loss: 0.2848286032676697 2023-01-24 01:21:10.288277: step: 680/463, loss: 0.18013739585876465 2023-01-24 01:21:10.974815: step: 682/463, loss: 0.4330521821975708 2023-01-24 01:21:11.598051: step: 684/463, loss: 0.19010809063911438 2023-01-24 01:21:12.260634: step: 686/463, loss: 0.3683933615684509 2023-01-24 01:21:12.921650: step: 688/463, loss: 0.1285184770822525 2023-01-24 01:21:13.617348: step: 690/463, loss: 0.4473402798175812 2023-01-24 01:21:14.246743: step: 692/463, loss: 0.20139290392398834 2023-01-24 01:21:14.914994: step: 694/463, loss: 0.08718008548021317 2023-01-24 01:21:15.504075: step: 696/463, loss: 0.22788263857364655 2023-01-24 01:21:16.167096: step: 698/463, loss: 0.704470157623291 2023-01-24 01:21:16.779876: step: 700/463, loss: 0.7730032205581665 2023-01-24 01:21:17.428931: step: 702/463, loss: 0.2804076075553894 2023-01-24 01:21:18.103230: step: 704/463, loss: 0.4671097993850708 2023-01-24 01:21:18.739680: step: 706/463, loss: 0.15912914276123047 2023-01-24 01:21:19.345729: step: 708/463, loss: 0.2885390520095825 2023-01-24 01:21:19.978285: step: 710/463, loss: 0.24276964366436005 2023-01-24 01:21:20.637320: step: 712/463, loss: 0.11997894942760468 2023-01-24 01:21:21.251723: step: 714/463, loss: 0.5876578092575073 2023-01-24 01:21:21.873467: step: 716/463, loss: 0.2529209852218628 2023-01-24 01:21:22.489279: step: 718/463, loss: 0.21366465091705322 2023-01-24 01:21:23.114498: step: 720/463, loss: 0.9887641668319702 2023-01-24 01:21:23.732622: step: 722/463, loss: 0.4889693260192871 2023-01-24 01:21:24.342481: step: 724/463, loss: 0.1924746334552765 2023-01-24 01:21:24.911098: step: 726/463, loss: 0.19602714478969574 2023-01-24 01:21:25.462754: step: 728/463, loss: 0.27322423458099365 2023-01-24 01:21:26.161585: step: 730/463, loss: 0.3773791193962097 2023-01-24 01:21:26.763744: step: 732/463, loss: 0.5458710789680481 2023-01-24 01:21:27.421068: step: 734/463, loss: 0.07524027675390244 2023-01-24 01:21:28.113639: step: 736/463, loss: 0.6881182193756104 2023-01-24 01:21:28.730557: step: 738/463, loss: 0.30595526099205017 2023-01-24 01:21:29.330991: step: 740/463, loss: 0.5792680382728577 2023-01-24 01:21:29.973548: step: 742/463, loss: 0.18030396103858948 2023-01-24 01:21:30.580422: step: 744/463, loss: 0.1500871479511261 2023-01-24 01:21:31.223189: step: 746/463, loss: 0.0964006558060646 2023-01-24 01:21:31.869897: step: 748/463, loss: 0.1836720108985901 2023-01-24 01:21:32.507850: step: 750/463, loss: 0.21397441625595093 2023-01-24 01:21:33.114985: step: 752/463, loss: 0.21302129328250885 2023-01-24 01:21:33.746327: step: 754/463, loss: 0.12123412638902664 2023-01-24 01:21:34.414471: step: 756/463, loss: 0.2178911715745926 2023-01-24 01:21:35.063183: step: 758/463, loss: 1.2009069919586182 2023-01-24 01:21:35.644699: step: 760/463, loss: 0.1653495728969574 2023-01-24 01:21:36.260185: step: 762/463, loss: 1.3999863862991333 2023-01-24 01:21:36.895277: step: 764/463, loss: 0.2686472237110138 2023-01-24 01:21:37.567707: step: 766/463, loss: 1.8620402812957764 2023-01-24 01:21:38.125658: step: 768/463, loss: 0.0567685104906559 2023-01-24 01:21:38.666288: step: 770/463, loss: 0.1241387128829956 2023-01-24 01:21:39.298689: step: 772/463, loss: 1.2940545082092285 2023-01-24 01:21:39.930902: step: 774/463, loss: 0.1675059199333191 2023-01-24 01:21:40.525637: step: 776/463, loss: 0.1985366940498352 2023-01-24 01:21:41.197988: step: 778/463, loss: 0.2432357668876648 2023-01-24 01:21:41.878393: step: 780/463, loss: 0.26957112550735474 2023-01-24 01:21:42.579720: step: 782/463, loss: 0.344143807888031 2023-01-24 01:21:43.194609: step: 784/463, loss: 0.2490839809179306 2023-01-24 01:21:43.781206: step: 786/463, loss: 0.10575327277183533 2023-01-24 01:21:44.469822: step: 788/463, loss: 0.22406642138957977 2023-01-24 01:21:45.090345: step: 790/463, loss: 0.17756953835487366 2023-01-24 01:21:45.841513: step: 792/463, loss: 0.46453601121902466 2023-01-24 01:21:46.442498: step: 794/463, loss: 1.518202543258667 2023-01-24 01:21:47.067902: step: 796/463, loss: 1.04442298412323 2023-01-24 01:21:47.778568: step: 798/463, loss: 1.1884628534317017 2023-01-24 01:21:48.388811: step: 800/463, loss: 0.351652592420578 2023-01-24 01:21:48.946992: step: 802/463, loss: 0.07654285430908203 2023-01-24 01:21:49.649590: step: 804/463, loss: 0.1954786479473114 2023-01-24 01:21:50.249797: step: 806/463, loss: 0.2121914178133011 2023-01-24 01:21:50.915258: step: 808/463, loss: 0.2721375823020935 2023-01-24 01:21:51.593148: step: 810/463, loss: 0.469184547662735 2023-01-24 01:21:52.215453: step: 812/463, loss: 0.3151530623435974 2023-01-24 01:21:52.877219: step: 814/463, loss: 0.24219289422035217 2023-01-24 01:21:53.492067: step: 816/463, loss: 0.10834243148565292 2023-01-24 01:21:54.070664: step: 818/463, loss: 0.477215051651001 2023-01-24 01:21:54.693234: step: 820/463, loss: 0.15848544239997864 2023-01-24 01:21:55.275145: step: 822/463, loss: 0.20052915811538696 2023-01-24 01:21:55.839656: step: 824/463, loss: 0.21196505427360535 2023-01-24 01:21:56.449239: step: 826/463, loss: 0.19432342052459717 2023-01-24 01:21:57.047172: step: 828/463, loss: 0.31151828169822693 2023-01-24 01:21:57.666270: step: 830/463, loss: 0.4891687333583832 2023-01-24 01:21:58.265274: step: 832/463, loss: 0.20313924551010132 2023-01-24 01:21:58.840114: step: 834/463, loss: 0.4191374182701111 2023-01-24 01:21:59.492678: step: 836/463, loss: 0.3220886290073395 2023-01-24 01:22:00.088142: step: 838/463, loss: 0.468591570854187 2023-01-24 01:22:00.727565: step: 840/463, loss: 0.16392499208450317 2023-01-24 01:22:01.464976: step: 842/463, loss: 0.1577317714691162 2023-01-24 01:22:02.086012: step: 844/463, loss: 0.3383632004261017 2023-01-24 01:22:02.714136: step: 846/463, loss: 0.2543044090270996 2023-01-24 01:22:03.293742: step: 848/463, loss: 0.19166195392608643 2023-01-24 01:22:04.080171: step: 850/463, loss: 0.12888097763061523 2023-01-24 01:22:04.698633: step: 852/463, loss: 0.10550086945295334 2023-01-24 01:22:05.308932: step: 854/463, loss: 0.2819836139678955 2023-01-24 01:22:05.941699: step: 856/463, loss: 0.1762540638446808 2023-01-24 01:22:06.558015: step: 858/463, loss: 0.42769697308540344 2023-01-24 01:22:07.207076: step: 860/463, loss: 0.29915520548820496 2023-01-24 01:22:07.855719: step: 862/463, loss: 0.22789283096790314 2023-01-24 01:22:08.457550: step: 864/463, loss: 0.39145123958587646 2023-01-24 01:22:09.095814: step: 866/463, loss: 0.5866736173629761 2023-01-24 01:22:09.772088: step: 868/463, loss: 1.1536961793899536 2023-01-24 01:22:10.374310: step: 870/463, loss: 0.3032526969909668 2023-01-24 01:22:11.075311: step: 872/463, loss: 0.9425399899482727 2023-01-24 01:22:11.697920: step: 874/463, loss: 0.3136405348777771 2023-01-24 01:22:12.321716: step: 876/463, loss: 0.943146824836731 2023-01-24 01:22:13.003061: step: 878/463, loss: 0.41317957639694214 2023-01-24 01:22:13.637943: step: 880/463, loss: 0.2071446031332016 2023-01-24 01:22:14.286405: step: 882/463, loss: 0.2551629841327667 2023-01-24 01:22:14.946868: step: 884/463, loss: 0.26185524463653564 2023-01-24 01:22:15.612043: step: 886/463, loss: 0.1880650818347931 2023-01-24 01:22:16.294733: step: 888/463, loss: 0.42936015129089355 2023-01-24 01:22:17.023742: step: 890/463, loss: 0.22071272134780884 2023-01-24 01:22:17.563658: step: 892/463, loss: 0.051134686917066574 2023-01-24 01:22:18.228684: step: 894/463, loss: 0.14463946223258972 2023-01-24 01:22:18.920591: step: 896/463, loss: 0.376079797744751 2023-01-24 01:22:19.562299: step: 898/463, loss: 0.6788853406906128 2023-01-24 01:22:20.169696: step: 900/463, loss: 0.09051565080881119 2023-01-24 01:22:20.831154: step: 902/463, loss: 0.15574021637439728 2023-01-24 01:22:21.470367: step: 904/463, loss: 0.2696878910064697 2023-01-24 01:22:22.097106: step: 906/463, loss: 0.3230466842651367 2023-01-24 01:22:22.692191: step: 908/463, loss: 0.158351331949234 2023-01-24 01:22:23.335683: step: 910/463, loss: 0.1112188771367073 2023-01-24 01:22:23.950828: step: 912/463, loss: 0.5698286890983582 2023-01-24 01:22:24.569641: step: 914/463, loss: 0.44627395272254944 2023-01-24 01:22:25.142252: step: 916/463, loss: 0.19077302515506744 2023-01-24 01:22:25.742743: step: 918/463, loss: 0.07962384074926376 2023-01-24 01:22:26.372478: step: 920/463, loss: 0.20242834091186523 2023-01-24 01:22:27.017249: step: 922/463, loss: 0.5022553205490112 2023-01-24 01:22:27.659681: step: 924/463, loss: 0.16418512165546417 2023-01-24 01:22:28.249000: step: 926/463, loss: 0.06647752970457077 ================================================== Loss: 0.398 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30636883541295307, 'r': 0.32551688762626263, 'f1': 0.3156527395163759}, 'combined': 0.2325862291173296, 'epoch': 9} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3672544966932755, 'r': 0.2884197618010017, 'f1': 0.32309779767736846}, 'combined': 0.227304983290611, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3132335148215366, 'r': 0.32687815656565655, 'f1': 0.3199104108742663}, 'combined': 0.23572346064419622, 'epoch': 9} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.366263484909441, 'r': 0.28444546384764613, 'f1': 0.32021070881080793}, 'combined': 0.22734960325567363, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31871870057986296, 'r': 0.32716957521645024, 'f1': 0.3228888518024033}, 'combined': 0.2379181013280866, 'epoch': 9} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37985434297753, 'r': 0.2827362605234146, 'f1': 0.32417784348157397}, 'combined': 0.23016626887191752, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.22142857142857142, 'f1': 0.23484848484848483}, 'combined': 0.15656565656565655, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2916666666666667, 'r': 0.3804347826086957, 'f1': 0.3301886792452831}, 'combined': 0.16509433962264156, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.12931034482758622, 'f1': 0.1829268292682927}, 'combined': 0.12195121951219513, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3064311346548189, 'r': 0.27619504546686713, 'f1': 0.2905285208803173}, 'combined': 0.2140736469644443, 'epoch': 4} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35963387992050416, 'r': 0.24527593026724218, 'f1': 0.29164524353057414}, 'combined': 0.20706812290670762, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34615384615384615, 'r': 0.391304347826087, 'f1': 0.36734693877551017}, 'combined': 0.18367346938775508, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:25:06.511807: step: 2/463, loss: 0.4241265058517456 2023-01-24 01:25:07.155057: step: 4/463, loss: 0.26619458198547363 2023-01-24 01:25:07.747688: step: 6/463, loss: 0.17707359790802002 2023-01-24 01:25:08.372611: step: 8/463, loss: 0.09891939908266068 2023-01-24 01:25:08.955248: step: 10/463, loss: 0.3878695070743561 2023-01-24 01:25:09.583213: step: 12/463, loss: 0.30637240409851074 2023-01-24 01:25:10.230918: step: 14/463, loss: 0.19770881533622742 2023-01-24 01:25:10.795898: step: 16/463, loss: 0.13057823479175568 2023-01-24 01:25:11.400609: step: 18/463, loss: 0.19703514873981476 2023-01-24 01:25:12.049785: step: 20/463, loss: 0.11027371138334274 2023-01-24 01:25:12.688767: step: 22/463, loss: 0.08701611310243607 2023-01-24 01:25:13.346967: step: 24/463, loss: 0.21756665408611298 2023-01-24 01:25:13.973532: step: 26/463, loss: 0.24298976361751556 2023-01-24 01:25:14.573852: step: 28/463, loss: 0.09162376075983047 2023-01-24 01:25:15.215062: step: 30/463, loss: 0.22040577232837677 2023-01-24 01:25:15.930565: step: 32/463, loss: 0.10033777356147766 2023-01-24 01:25:16.542213: step: 34/463, loss: 0.2234550267457962 2023-01-24 01:25:17.180583: step: 36/463, loss: 0.055313754826784134 2023-01-24 01:25:17.785668: step: 38/463, loss: 0.10465532541275024 2023-01-24 01:25:18.332949: step: 40/463, loss: 0.035702455788850784 2023-01-24 01:25:18.849176: step: 42/463, loss: 0.1500108391046524 2023-01-24 01:25:19.563989: step: 44/463, loss: 0.07030950486660004 2023-01-24 01:25:20.184650: step: 46/463, loss: 0.061292968690395355 2023-01-24 01:25:20.882323: step: 48/463, loss: 0.31412267684936523 2023-01-24 01:25:21.479440: step: 50/463, loss: 1.1013761758804321 2023-01-24 01:25:22.036063: step: 52/463, loss: 0.2434743493795395 2023-01-24 01:25:22.648920: step: 54/463, loss: 0.11900374293327332 2023-01-24 01:25:23.217242: step: 56/463, loss: 0.5787142515182495 2023-01-24 01:25:23.837712: step: 58/463, loss: 0.14953824877738953 2023-01-24 01:25:24.497619: step: 60/463, loss: 0.3202550709247589 2023-01-24 01:25:25.157850: step: 62/463, loss: 0.15321289002895355 2023-01-24 01:25:25.828976: step: 64/463, loss: 0.2626948952674866 2023-01-24 01:25:26.468026: step: 66/463, loss: 0.20559559762477875 2023-01-24 01:25:27.186941: step: 68/463, loss: 0.11478876322507858 2023-01-24 01:25:27.812807: step: 70/463, loss: 0.2612624764442444 2023-01-24 01:25:28.372574: step: 72/463, loss: 0.05089710280299187 2023-01-24 01:25:28.983552: step: 74/463, loss: 0.7583202719688416 2023-01-24 01:25:29.623240: step: 76/463, loss: 0.19888728857040405 2023-01-24 01:25:30.209280: step: 78/463, loss: 0.15234889090061188 2023-01-24 01:25:30.908107: step: 80/463, loss: 0.2985592186450958 2023-01-24 01:25:31.543969: step: 82/463, loss: 1.4265671968460083 2023-01-24 01:25:32.213201: step: 84/463, loss: 1.0064750909805298 2023-01-24 01:25:32.884088: step: 86/463, loss: 0.40094542503356934 2023-01-24 01:25:33.513727: step: 88/463, loss: 1.3028095960617065 2023-01-24 01:25:34.067248: step: 90/463, loss: 0.026631955057382584 2023-01-24 01:25:35.433250: step: 92/463, loss: 0.28732308745384216 2023-01-24 01:25:36.045947: step: 94/463, loss: 0.12200113385915756 2023-01-24 01:25:36.611098: step: 96/463, loss: 0.25750893354415894 2023-01-24 01:25:37.289140: step: 98/463, loss: 0.1828591525554657 2023-01-24 01:25:37.909428: step: 100/463, loss: 0.2760144770145416 2023-01-24 01:25:38.534637: step: 102/463, loss: 0.42419159412384033 2023-01-24 01:25:39.118738: step: 104/463, loss: 0.12669357657432556 2023-01-24 01:25:39.795784: step: 106/463, loss: 0.19266223907470703 2023-01-24 01:25:40.435653: step: 108/463, loss: 0.31891366839408875 2023-01-24 01:25:41.041550: step: 110/463, loss: 0.17168541252613068 2023-01-24 01:25:41.659015: step: 112/463, loss: 0.49303901195526123 2023-01-24 01:25:42.343465: step: 114/463, loss: 0.4556392729282379 2023-01-24 01:25:42.984460: step: 116/463, loss: 0.10191883146762848 2023-01-24 01:25:43.557544: step: 118/463, loss: 0.11718977987766266 2023-01-24 01:25:44.171228: step: 120/463, loss: 0.1481093168258667 2023-01-24 01:25:44.742723: step: 122/463, loss: 0.32999187707901 2023-01-24 01:25:45.326384: step: 124/463, loss: 0.1415906846523285 2023-01-24 01:25:46.003190: step: 126/463, loss: 0.382484495639801 2023-01-24 01:25:46.682267: step: 128/463, loss: 0.1729118674993515 2023-01-24 01:25:47.276778: step: 130/463, loss: 0.06293108314275742 2023-01-24 01:25:47.825585: step: 132/463, loss: 0.10343655943870544 2023-01-24 01:25:48.440323: step: 134/463, loss: 0.040937941521406174 2023-01-24 01:25:49.090357: step: 136/463, loss: 0.14443926513195038 2023-01-24 01:25:49.733739: step: 138/463, loss: 0.27807390689849854 2023-01-24 01:25:50.341004: step: 140/463, loss: 0.08962972462177277 2023-01-24 01:25:50.960841: step: 142/463, loss: 0.7843950986862183 2023-01-24 01:25:51.648307: step: 144/463, loss: 0.21415314078330994 2023-01-24 01:25:52.268419: step: 146/463, loss: 0.24731050431728363 2023-01-24 01:25:52.879723: step: 148/463, loss: 0.1980876475572586 2023-01-24 01:25:53.473593: step: 150/463, loss: 0.15468904376029968 2023-01-24 01:25:54.066054: step: 152/463, loss: 0.10555817931890488 2023-01-24 01:25:54.709690: step: 154/463, loss: 0.3590904474258423 2023-01-24 01:25:55.356755: step: 156/463, loss: 0.043715983629226685 2023-01-24 01:25:56.050030: step: 158/463, loss: 0.7096344828605652 2023-01-24 01:25:56.669515: step: 160/463, loss: 0.10776230692863464 2023-01-24 01:25:57.241462: step: 162/463, loss: 0.05576489865779877 2023-01-24 01:25:57.861494: step: 164/463, loss: 0.4126170873641968 2023-01-24 01:25:58.446714: step: 166/463, loss: 0.33625614643096924 2023-01-24 01:25:59.024268: step: 168/463, loss: 0.5817965865135193 2023-01-24 01:25:59.672796: step: 170/463, loss: 1.11186683177948 2023-01-24 01:26:00.330924: step: 172/463, loss: 0.11138477921485901 2023-01-24 01:26:01.096000: step: 174/463, loss: 0.2599419057369232 2023-01-24 01:26:01.752942: step: 176/463, loss: 0.3679421544075012 2023-01-24 01:26:02.355142: step: 178/463, loss: 0.7429520487785339 2023-01-24 01:26:03.024455: step: 180/463, loss: 0.15189163386821747 2023-01-24 01:26:03.673001: step: 182/463, loss: 0.1175861656665802 2023-01-24 01:26:04.269640: step: 184/463, loss: 0.10712771117687225 2023-01-24 01:26:04.806637: step: 186/463, loss: 0.19148828089237213 2023-01-24 01:26:05.369802: step: 188/463, loss: 0.14443768560886383 2023-01-24 01:26:05.936229: step: 190/463, loss: 0.22416876256465912 2023-01-24 01:26:06.545662: step: 192/463, loss: 0.16049931943416595 2023-01-24 01:26:07.154910: step: 194/463, loss: 0.5730118751525879 2023-01-24 01:26:07.811625: step: 196/463, loss: 0.1047125980257988 2023-01-24 01:26:08.478758: step: 198/463, loss: 0.2474374771118164 2023-01-24 01:26:09.130980: step: 200/463, loss: 0.22841763496398926 2023-01-24 01:26:09.731403: step: 202/463, loss: 0.38318756222724915 2023-01-24 01:26:10.354353: step: 204/463, loss: 0.13350670039653778 2023-01-24 01:26:10.948623: step: 206/463, loss: 0.1695595681667328 2023-01-24 01:26:11.531852: step: 208/463, loss: 0.26435554027557373 2023-01-24 01:26:12.170532: step: 210/463, loss: 0.14682213962078094 2023-01-24 01:26:12.782279: step: 212/463, loss: 2.736466407775879 2023-01-24 01:26:13.386621: step: 214/463, loss: 0.9620687365531921 2023-01-24 01:26:14.001257: step: 216/463, loss: 0.5500484108924866 2023-01-24 01:26:14.607242: step: 218/463, loss: 0.07786887884140015 2023-01-24 01:26:15.157269: step: 220/463, loss: 1.4060120582580566 2023-01-24 01:26:15.789468: step: 222/463, loss: 0.22933408617973328 2023-01-24 01:26:16.366380: step: 224/463, loss: 0.6487823724746704 2023-01-24 01:26:17.038887: step: 226/463, loss: 0.25622957944869995 2023-01-24 01:26:17.644556: step: 228/463, loss: 0.1111135184764862 2023-01-24 01:26:18.272438: step: 230/463, loss: 0.2876119911670685 2023-01-24 01:26:18.838256: step: 232/463, loss: 0.6933346390724182 2023-01-24 01:26:19.502382: step: 234/463, loss: 0.19928805530071259 2023-01-24 01:26:20.110120: step: 236/463, loss: 0.37963709235191345 2023-01-24 01:26:20.774295: step: 238/463, loss: 0.16789104044437408 2023-01-24 01:26:21.426929: step: 240/463, loss: 0.27272364497184753 2023-01-24 01:26:22.048930: step: 242/463, loss: 0.21558840572834015 2023-01-24 01:26:22.688465: step: 244/463, loss: 0.4203178882598877 2023-01-24 01:26:23.307665: step: 246/463, loss: 0.5448747277259827 2023-01-24 01:26:23.934653: step: 248/463, loss: 0.13140209019184113 2023-01-24 01:26:24.544249: step: 250/463, loss: 0.20725847780704498 2023-01-24 01:26:25.172602: step: 252/463, loss: 0.3917500078678131 2023-01-24 01:26:25.801314: step: 254/463, loss: 0.5829880833625793 2023-01-24 01:26:26.541480: step: 256/463, loss: 0.43690741062164307 2023-01-24 01:26:27.202386: step: 258/463, loss: 0.18546342849731445 2023-01-24 01:26:27.845442: step: 260/463, loss: 0.4483679234981537 2023-01-24 01:26:28.440312: step: 262/463, loss: 0.48405349254608154 2023-01-24 01:26:29.032129: step: 264/463, loss: 0.1684543937444687 2023-01-24 01:26:29.643054: step: 266/463, loss: 0.22877441346645355 2023-01-24 01:26:30.264764: step: 268/463, loss: 0.19972260296344757 2023-01-24 01:26:30.890425: step: 270/463, loss: 0.1380741447210312 2023-01-24 01:26:31.524756: step: 272/463, loss: 0.11327855288982391 2023-01-24 01:26:32.184932: step: 274/463, loss: 1.7222435474395752 2023-01-24 01:26:32.845141: step: 276/463, loss: 0.2449798285961151 2023-01-24 01:26:33.498960: step: 278/463, loss: 0.19586515426635742 2023-01-24 01:26:34.187216: step: 280/463, loss: 0.1370493322610855 2023-01-24 01:26:34.791480: step: 282/463, loss: 0.16208390891551971 2023-01-24 01:26:35.403622: step: 284/463, loss: 0.3291768729686737 2023-01-24 01:26:35.958642: step: 286/463, loss: 0.1444743424654007 2023-01-24 01:26:36.579474: step: 288/463, loss: 0.19859126210212708 2023-01-24 01:26:37.204554: step: 290/463, loss: 0.27081921696662903 2023-01-24 01:26:37.742554: step: 292/463, loss: 0.07210173457860947 2023-01-24 01:26:38.390117: step: 294/463, loss: 0.256599485874176 2023-01-24 01:26:39.067223: step: 296/463, loss: 0.49550575017929077 2023-01-24 01:26:39.656704: step: 298/463, loss: 0.18268398940563202 2023-01-24 01:26:40.249899: step: 300/463, loss: 0.18767701089382172 2023-01-24 01:26:40.868773: step: 302/463, loss: 0.09204762428998947 2023-01-24 01:26:41.523710: step: 304/463, loss: 0.35264891386032104 2023-01-24 01:26:42.163455: step: 306/463, loss: 0.17153751850128174 2023-01-24 01:26:42.821373: step: 308/463, loss: 0.17490185797214508 2023-01-24 01:26:43.418453: step: 310/463, loss: 0.17932447791099548 2023-01-24 01:26:44.073102: step: 312/463, loss: 0.21114033460617065 2023-01-24 01:26:44.727599: step: 314/463, loss: 0.23593908548355103 2023-01-24 01:26:45.362656: step: 316/463, loss: 0.10831187665462494 2023-01-24 01:26:46.007126: step: 318/463, loss: 0.14376197755336761 2023-01-24 01:26:46.612905: step: 320/463, loss: 0.06846840679645538 2023-01-24 01:26:47.292336: step: 322/463, loss: 0.9931140542030334 2023-01-24 01:26:47.868251: step: 324/463, loss: 0.16432538628578186 2023-01-24 01:26:48.495694: step: 326/463, loss: 0.2666153609752655 2023-01-24 01:26:49.089857: step: 328/463, loss: 0.3661361038684845 2023-01-24 01:26:49.724172: step: 330/463, loss: 1.046091914176941 2023-01-24 01:26:50.314794: step: 332/463, loss: 0.22565847635269165 2023-01-24 01:26:51.024116: step: 334/463, loss: 0.20623044669628143 2023-01-24 01:26:51.628373: step: 336/463, loss: 0.634175717830658 2023-01-24 01:26:52.216693: step: 338/463, loss: 0.47141656279563904 2023-01-24 01:26:52.787633: step: 340/463, loss: 0.6007882952690125 2023-01-24 01:26:53.377499: step: 342/463, loss: 0.07624448090791702 2023-01-24 01:26:53.991541: step: 344/463, loss: 0.4304564297199249 2023-01-24 01:26:54.590428: step: 346/463, loss: 0.6646032929420471 2023-01-24 01:26:55.211934: step: 348/463, loss: 0.46830084919929504 2023-01-24 01:26:55.792893: step: 350/463, loss: 0.04707396775484085 2023-01-24 01:26:56.410353: step: 352/463, loss: 0.18056663870811462 2023-01-24 01:26:57.025606: step: 354/463, loss: 0.10965027660131454 2023-01-24 01:26:57.744548: step: 356/463, loss: 0.12750652432441711 2023-01-24 01:26:58.379456: step: 358/463, loss: 0.4046700596809387 2023-01-24 01:26:59.000720: step: 360/463, loss: 0.0656653344631195 2023-01-24 01:26:59.636476: step: 362/463, loss: 0.12228541821241379 2023-01-24 01:27:00.278009: step: 364/463, loss: 0.09335733950138092 2023-01-24 01:27:00.903323: step: 366/463, loss: 0.09806755185127258 2023-01-24 01:27:01.539469: step: 368/463, loss: 0.16745002567768097 2023-01-24 01:27:02.134998: step: 370/463, loss: 0.21159182488918304 2023-01-24 01:27:02.748298: step: 372/463, loss: 0.17830699682235718 2023-01-24 01:27:03.319026: step: 374/463, loss: 0.14341558516025543 2023-01-24 01:27:03.951191: step: 376/463, loss: 0.11200292408466339 2023-01-24 01:27:04.622187: step: 378/463, loss: 0.11204990744590759 2023-01-24 01:27:05.304118: step: 380/463, loss: 0.1678876131772995 2023-01-24 01:27:05.883581: step: 382/463, loss: 0.25400614738464355 2023-01-24 01:27:06.529169: step: 384/463, loss: 0.12198774516582489 2023-01-24 01:27:07.189349: step: 386/463, loss: 0.20294222235679626 2023-01-24 01:27:07.782443: step: 388/463, loss: 0.2115267813205719 2023-01-24 01:27:08.431104: step: 390/463, loss: 0.068730428814888 2023-01-24 01:27:09.029309: step: 392/463, loss: 0.2959420084953308 2023-01-24 01:27:09.621037: step: 394/463, loss: 0.20840223133563995 2023-01-24 01:27:10.257142: step: 396/463, loss: 0.0838068425655365 2023-01-24 01:27:10.945555: step: 398/463, loss: 0.19911953806877136 2023-01-24 01:27:11.507707: step: 400/463, loss: 0.15147481858730316 2023-01-24 01:27:12.097198: step: 402/463, loss: 0.2681199312210083 2023-01-24 01:27:12.742672: step: 404/463, loss: 0.25698310136795044 2023-01-24 01:27:13.309035: step: 406/463, loss: 0.2611204981803894 2023-01-24 01:27:13.871021: step: 408/463, loss: 0.22646421194076538 2023-01-24 01:27:14.456250: step: 410/463, loss: 0.07868530601263046 2023-01-24 01:27:15.065671: step: 412/463, loss: 0.16657701134681702 2023-01-24 01:27:15.721078: step: 414/463, loss: 0.3124934732913971 2023-01-24 01:27:16.278205: step: 416/463, loss: 0.15809299051761627 2023-01-24 01:27:16.961284: step: 418/463, loss: 0.8763929009437561 2023-01-24 01:27:17.575727: step: 420/463, loss: 0.2339327037334442 2023-01-24 01:27:18.346135: step: 422/463, loss: 0.43484196066856384 2023-01-24 01:27:18.982361: step: 424/463, loss: 0.28402402997016907 2023-01-24 01:27:19.593354: step: 426/463, loss: 0.5299597382545471 2023-01-24 01:27:20.233575: step: 428/463, loss: 0.3564468026161194 2023-01-24 01:27:20.904984: step: 430/463, loss: 0.12499785423278809 2023-01-24 01:27:21.565551: step: 432/463, loss: 0.7001398205757141 2023-01-24 01:27:22.233624: step: 434/463, loss: 0.36984509229660034 2023-01-24 01:27:22.820061: step: 436/463, loss: 0.3832896947860718 2023-01-24 01:27:23.484583: step: 438/463, loss: 0.19821679592132568 2023-01-24 01:27:24.146974: step: 440/463, loss: 0.4628855288028717 2023-01-24 01:27:24.744857: step: 442/463, loss: 0.15107575058937073 2023-01-24 01:27:25.384834: step: 444/463, loss: 0.1646573543548584 2023-01-24 01:27:26.122511: step: 446/463, loss: 8.372335433959961 2023-01-24 01:27:26.760199: step: 448/463, loss: 0.3198821246623993 2023-01-24 01:27:27.381193: step: 450/463, loss: 0.2865915596485138 2023-01-24 01:27:28.119359: step: 452/463, loss: 0.3769145905971527 2023-01-24 01:27:28.761315: step: 454/463, loss: 0.11401497572660446 2023-01-24 01:27:29.407590: step: 456/463, loss: 1.2974541187286377 2023-01-24 01:27:30.024914: step: 458/463, loss: 0.2050916850566864 2023-01-24 01:27:30.638383: step: 460/463, loss: 0.3453603982925415 2023-01-24 01:27:31.277862: step: 462/463, loss: 0.19660183787345886 2023-01-24 01:27:31.939817: step: 464/463, loss: 0.9102486968040466 2023-01-24 01:27:32.595777: step: 466/463, loss: 0.2695101499557495 2023-01-24 01:27:33.290379: step: 468/463, loss: 0.12088160216808319 2023-01-24 01:27:33.951899: step: 470/463, loss: 0.2791857421398163 2023-01-24 01:27:34.557979: step: 472/463, loss: 0.09417106211185455 2023-01-24 01:27:35.186214: step: 474/463, loss: 0.18710176646709442 2023-01-24 01:27:35.803718: step: 476/463, loss: 0.16343703866004944 2023-01-24 01:27:36.443264: step: 478/463, loss: 0.19765669107437134 2023-01-24 01:27:37.139316: step: 480/463, loss: 0.2597247362136841 2023-01-24 01:27:37.732153: step: 482/463, loss: 0.11481492966413498 2023-01-24 01:27:38.297547: step: 484/463, loss: 1.0396926403045654 2023-01-24 01:27:38.969840: step: 486/463, loss: 0.4746440052986145 2023-01-24 01:27:39.511928: step: 488/463, loss: 0.7210532426834106 2023-01-24 01:27:40.148381: step: 490/463, loss: 0.14356116950511932 2023-01-24 01:27:40.792997: step: 492/463, loss: 0.250260591506958 2023-01-24 01:27:41.409620: step: 494/463, loss: 0.3653924763202667 2023-01-24 01:27:42.133997: step: 496/463, loss: 0.15914107859134674 2023-01-24 01:27:42.776022: step: 498/463, loss: 0.5501121282577515 2023-01-24 01:27:43.426282: step: 500/463, loss: 0.2607405483722687 2023-01-24 01:27:44.097341: step: 502/463, loss: 1.0505927801132202 2023-01-24 01:27:44.716171: step: 504/463, loss: 0.22030946612358093 2023-01-24 01:27:45.305579: step: 506/463, loss: 0.24808676540851593 2023-01-24 01:27:45.930655: step: 508/463, loss: 0.5522169470787048 2023-01-24 01:27:46.534924: step: 510/463, loss: 1.1414976119995117 2023-01-24 01:27:47.139353: step: 512/463, loss: 0.23810553550720215 2023-01-24 01:27:47.737016: step: 514/463, loss: 0.09768594056367874 2023-01-24 01:27:48.329383: step: 516/463, loss: 0.3345191776752472 2023-01-24 01:27:49.014075: step: 518/463, loss: 0.10159920901060104 2023-01-24 01:27:49.592159: step: 520/463, loss: 0.119684137403965 2023-01-24 01:27:50.208948: step: 522/463, loss: 0.28392139077186584 2023-01-24 01:27:50.806761: step: 524/463, loss: 0.3994729518890381 2023-01-24 01:27:51.412556: step: 526/463, loss: 0.18633146584033966 2023-01-24 01:27:52.002110: step: 528/463, loss: 0.09377001225948334 2023-01-24 01:27:52.604168: step: 530/463, loss: 0.48783770203590393 2023-01-24 01:27:53.250363: step: 532/463, loss: 0.09127616882324219 2023-01-24 01:27:53.873895: step: 534/463, loss: 0.24466851353645325 2023-01-24 01:27:54.481063: step: 536/463, loss: 0.14128364622592926 2023-01-24 01:27:55.115436: step: 538/463, loss: 0.11648022383451462 2023-01-24 01:27:55.816686: step: 540/463, loss: 0.2794290781021118 2023-01-24 01:27:56.394196: step: 542/463, loss: 0.08328187465667725 2023-01-24 01:27:57.020767: step: 544/463, loss: 0.09336328506469727 2023-01-24 01:27:57.534172: step: 546/463, loss: 0.10264899581670761 2023-01-24 01:27:58.158803: step: 548/463, loss: 0.2721741497516632 2023-01-24 01:27:58.780015: step: 550/463, loss: 0.26606637239456177 2023-01-24 01:27:59.350813: step: 552/463, loss: 0.0865168496966362 2023-01-24 01:27:59.987338: step: 554/463, loss: 0.5307626128196716 2023-01-24 01:28:00.656466: step: 556/463, loss: 0.13459518551826477 2023-01-24 01:28:01.222125: step: 558/463, loss: 0.29977402091026306 2023-01-24 01:28:01.861028: step: 560/463, loss: 0.6622909307479858 2023-01-24 01:28:02.486735: step: 562/463, loss: 0.14811523258686066 2023-01-24 01:28:03.095447: step: 564/463, loss: 0.15946295857429504 2023-01-24 01:28:03.698945: step: 566/463, loss: 0.651989221572876 2023-01-24 01:28:04.402426: step: 568/463, loss: 0.24959461390972137 2023-01-24 01:28:05.019703: step: 570/463, loss: 0.3264499306678772 2023-01-24 01:28:05.667911: step: 572/463, loss: 0.13238640129566193 2023-01-24 01:28:06.283033: step: 574/463, loss: 0.2001570761203766 2023-01-24 01:28:06.932718: step: 576/463, loss: 0.19359090924263 2023-01-24 01:28:07.552462: step: 578/463, loss: 2.4906442165374756 2023-01-24 01:28:08.190554: step: 580/463, loss: 0.13954022526741028 2023-01-24 01:28:08.821155: step: 582/463, loss: 0.27772724628448486 2023-01-24 01:28:09.363602: step: 584/463, loss: 0.41966521739959717 2023-01-24 01:28:10.006757: step: 586/463, loss: 0.07242762297391891 2023-01-24 01:28:10.610683: step: 588/463, loss: 0.27733081579208374 2023-01-24 01:28:11.273813: step: 590/463, loss: 0.23530533909797668 2023-01-24 01:28:11.931589: step: 592/463, loss: 0.6201727986335754 2023-01-24 01:28:12.567779: step: 594/463, loss: 0.5709316730499268 2023-01-24 01:28:13.132602: step: 596/463, loss: 0.07146921008825302 2023-01-24 01:28:13.776098: step: 598/463, loss: 0.1410476118326187 2023-01-24 01:28:14.323129: step: 600/463, loss: 0.27253013849258423 2023-01-24 01:28:14.920275: step: 602/463, loss: 0.12643295526504517 2023-01-24 01:28:15.512860: step: 604/463, loss: 0.14017175137996674 2023-01-24 01:28:16.172843: step: 606/463, loss: 0.06530436873435974 2023-01-24 01:28:16.799944: step: 608/463, loss: 0.1642066091299057 2023-01-24 01:28:17.413619: step: 610/463, loss: 0.15415795147418976 2023-01-24 01:28:18.065934: step: 612/463, loss: 0.5681823492050171 2023-01-24 01:28:18.710963: step: 614/463, loss: 0.20369680225849152 2023-01-24 01:28:19.290165: step: 616/463, loss: 0.14326678216457367 2023-01-24 01:28:19.914010: step: 618/463, loss: 0.25101086497306824 2023-01-24 01:28:20.515254: step: 620/463, loss: 0.3720051050186157 2023-01-24 01:28:21.200313: step: 622/463, loss: 0.08952219039201736 2023-01-24 01:28:21.868484: step: 624/463, loss: 0.5833289623260498 2023-01-24 01:28:22.580421: step: 626/463, loss: 0.1006213054060936 2023-01-24 01:28:23.190465: step: 628/463, loss: 0.19025026261806488 2023-01-24 01:28:23.812249: step: 630/463, loss: 0.0334579236805439 2023-01-24 01:28:24.560567: step: 632/463, loss: 0.1071770116686821 2023-01-24 01:28:25.227775: step: 634/463, loss: 0.12439550459384918 2023-01-24 01:28:25.876543: step: 636/463, loss: 0.11561362445354462 2023-01-24 01:28:26.508255: step: 638/463, loss: 0.46325919032096863 2023-01-24 01:28:27.112297: step: 640/463, loss: 0.21409446001052856 2023-01-24 01:28:27.708221: step: 642/463, loss: 0.11711053550243378 2023-01-24 01:28:28.323883: step: 644/463, loss: 0.9880533218383789 2023-01-24 01:28:29.016786: step: 646/463, loss: 0.3509558141231537 2023-01-24 01:28:29.622066: step: 648/463, loss: 0.4058859944343567 2023-01-24 01:28:30.273507: step: 650/463, loss: 0.4098854959011078 2023-01-24 01:28:30.874039: step: 652/463, loss: 0.1194213405251503 2023-01-24 01:28:31.535403: step: 654/463, loss: 0.09912446141242981 2023-01-24 01:28:32.131032: step: 656/463, loss: 0.35023802518844604 2023-01-24 01:28:32.810229: step: 658/463, loss: 0.2197221964597702 2023-01-24 01:28:33.427318: step: 660/463, loss: 0.14884886145591736 2023-01-24 01:28:34.026005: step: 662/463, loss: 0.39105820655822754 2023-01-24 01:28:34.619255: step: 664/463, loss: 0.06723938882350922 2023-01-24 01:28:35.197252: step: 666/463, loss: 0.6749042868614197 2023-01-24 01:28:35.787992: step: 668/463, loss: 0.18190982937812805 2023-01-24 01:28:36.356810: step: 670/463, loss: 0.26240724325180054 2023-01-24 01:28:36.977434: step: 672/463, loss: 0.2644597589969635 2023-01-24 01:28:37.607365: step: 674/463, loss: 0.6042328476905823 2023-01-24 01:28:38.278367: step: 676/463, loss: 0.1970590353012085 2023-01-24 01:28:38.875636: step: 678/463, loss: 0.3828853964805603 2023-01-24 01:28:39.495532: step: 680/463, loss: 0.20319879055023193 2023-01-24 01:28:40.122107: step: 682/463, loss: 0.1828479915857315 2023-01-24 01:28:40.764198: step: 684/463, loss: 0.08427514880895615 2023-01-24 01:28:41.521212: step: 686/463, loss: 0.1247759759426117 2023-01-24 01:28:42.224009: step: 688/463, loss: 0.6700718998908997 2023-01-24 01:28:42.884809: step: 690/463, loss: 0.614714503288269 2023-01-24 01:28:43.522119: step: 692/463, loss: 0.24023066461086273 2023-01-24 01:28:44.161888: step: 694/463, loss: 0.10985132306814194 2023-01-24 01:28:44.783033: step: 696/463, loss: 0.48690077662467957 2023-01-24 01:28:45.392098: step: 698/463, loss: 0.1326299011707306 2023-01-24 01:28:46.040158: step: 700/463, loss: 0.11016003042459488 2023-01-24 01:28:46.688023: step: 702/463, loss: 0.30969738960266113 2023-01-24 01:28:47.332459: step: 704/463, loss: 0.16951261460781097 2023-01-24 01:28:47.976877: step: 706/463, loss: 0.1861243098974228 2023-01-24 01:28:48.620924: step: 708/463, loss: 0.13849462568759918 2023-01-24 01:28:49.320320: step: 710/463, loss: 0.5747129917144775 2023-01-24 01:28:49.967422: step: 712/463, loss: 0.2757572531700134 2023-01-24 01:28:50.618645: step: 714/463, loss: 0.17006336152553558 2023-01-24 01:28:51.206836: step: 716/463, loss: 0.48959383368492126 2023-01-24 01:28:51.818381: step: 718/463, loss: 0.12063183635473251 2023-01-24 01:28:52.448457: step: 720/463, loss: 0.1398492455482483 2023-01-24 01:28:53.060971: step: 722/463, loss: 0.19601020216941833 2023-01-24 01:28:53.643478: step: 724/463, loss: 0.0626579150557518 2023-01-24 01:28:54.253082: step: 726/463, loss: 7.160354137420654 2023-01-24 01:28:54.880972: step: 728/463, loss: 0.13046561181545258 2023-01-24 01:28:55.473921: step: 730/463, loss: 0.2812177240848541 2023-01-24 01:28:56.100506: step: 732/463, loss: 1.5743343830108643 2023-01-24 01:28:56.721151: step: 734/463, loss: 0.13764502108097076 2023-01-24 01:28:57.378240: step: 736/463, loss: 0.5620519518852234 2023-01-24 01:28:57.997414: step: 738/463, loss: 3.3639867305755615 2023-01-24 01:28:58.619227: step: 740/463, loss: 0.3245205581188202 2023-01-24 01:28:59.200583: step: 742/463, loss: 0.06337276101112366 2023-01-24 01:28:59.866020: step: 744/463, loss: 0.2872947156429291 2023-01-24 01:29:00.462683: step: 746/463, loss: 0.37575235962867737 2023-01-24 01:29:01.076849: step: 748/463, loss: 5.097278594970703 2023-01-24 01:29:01.670635: step: 750/463, loss: 0.22502657771110535 2023-01-24 01:29:02.322957: step: 752/463, loss: 0.34942761063575745 2023-01-24 01:29:02.874228: step: 754/463, loss: 0.48635581135749817 2023-01-24 01:29:03.548319: step: 756/463, loss: 0.4082602560520172 2023-01-24 01:29:04.224299: step: 758/463, loss: 0.10675328224897385 2023-01-24 01:29:04.842794: step: 760/463, loss: 0.8068703413009644 2023-01-24 01:29:05.470444: step: 762/463, loss: 0.3108627498149872 2023-01-24 01:29:06.122847: step: 764/463, loss: 0.7893519997596741 2023-01-24 01:29:06.773021: step: 766/463, loss: 1.5653387308120728 2023-01-24 01:29:07.413049: step: 768/463, loss: 0.2262437492609024 2023-01-24 01:29:08.019935: step: 770/463, loss: 0.23338159918785095 2023-01-24 01:29:08.675235: step: 772/463, loss: 0.14889773726463318 2023-01-24 01:29:09.274550: step: 774/463, loss: 0.15522754192352295 2023-01-24 01:29:09.854520: step: 776/463, loss: 0.1642398238182068 2023-01-24 01:29:10.495473: step: 778/463, loss: 0.1955113559961319 2023-01-24 01:29:11.140831: step: 780/463, loss: 0.13207551836967468 2023-01-24 01:29:11.802462: step: 782/463, loss: 0.09018883854150772 2023-01-24 01:29:12.394912: step: 784/463, loss: 0.4083217978477478 2023-01-24 01:29:13.053170: step: 786/463, loss: 0.0720791295170784 2023-01-24 01:29:13.710686: step: 788/463, loss: 0.13399308919906616 2023-01-24 01:29:14.349353: step: 790/463, loss: 0.2942740023136139 2023-01-24 01:29:15.002625: step: 792/463, loss: 1.3912181854248047 2023-01-24 01:29:15.592823: step: 794/463, loss: 0.11958467215299606 2023-01-24 01:29:16.256857: step: 796/463, loss: 0.11377686262130737 2023-01-24 01:29:16.916294: step: 798/463, loss: 0.11199328303337097 2023-01-24 01:29:17.523188: step: 800/463, loss: 0.09284019470214844 2023-01-24 01:29:18.137380: step: 802/463, loss: 0.15194310247898102 2023-01-24 01:29:18.795399: step: 804/463, loss: 0.6586217880249023 2023-01-24 01:29:19.515922: step: 806/463, loss: 0.2193405032157898 2023-01-24 01:29:20.184857: step: 808/463, loss: 1.0213263034820557 2023-01-24 01:29:20.834199: step: 810/463, loss: 0.4759097993373871 2023-01-24 01:29:21.457439: step: 812/463, loss: 1.1603127717971802 2023-01-24 01:29:22.071050: step: 814/463, loss: 1.418661117553711 2023-01-24 01:29:22.703338: step: 816/463, loss: 0.16498184204101562 2023-01-24 01:29:23.246521: step: 818/463, loss: 0.17249858379364014 2023-01-24 01:29:23.901517: step: 820/463, loss: 0.8015976548194885 2023-01-24 01:29:24.481585: step: 822/463, loss: 0.1316993087530136 2023-01-24 01:29:25.153888: step: 824/463, loss: 0.3313693106174469 2023-01-24 01:29:25.759236: step: 826/463, loss: 0.13438278436660767 2023-01-24 01:29:26.343599: step: 828/463, loss: 0.26280784606933594 2023-01-24 01:29:26.953736: step: 830/463, loss: 0.43109071254730225 2023-01-24 01:29:27.554312: step: 832/463, loss: 0.23146310448646545 2023-01-24 01:29:28.140588: step: 834/463, loss: 0.21439597010612488 2023-01-24 01:29:28.785408: step: 836/463, loss: 0.10331112146377563 2023-01-24 01:29:29.379869: step: 838/463, loss: 0.12465700507164001 2023-01-24 01:29:30.010227: step: 840/463, loss: 0.6542497277259827 2023-01-24 01:29:30.654448: step: 842/463, loss: 0.7847336530685425 2023-01-24 01:29:31.277375: step: 844/463, loss: 0.17413102090358734 2023-01-24 01:29:31.870827: step: 846/463, loss: 0.2655140459537506 2023-01-24 01:29:32.593276: step: 848/463, loss: 0.2812594473361969 2023-01-24 01:29:33.237183: step: 850/463, loss: 0.1744619905948639 2023-01-24 01:29:33.880253: step: 852/463, loss: 0.8941884636878967 2023-01-24 01:29:34.505006: step: 854/463, loss: 0.2859010398387909 2023-01-24 01:29:35.147260: step: 856/463, loss: 0.15807992219924927 2023-01-24 01:29:35.771560: step: 858/463, loss: 0.04032767936587334 2023-01-24 01:29:36.374956: step: 860/463, loss: 0.16435253620147705 2023-01-24 01:29:36.976713: step: 862/463, loss: 0.4138454794883728 2023-01-24 01:29:37.617521: step: 864/463, loss: 0.4070228636264801 2023-01-24 01:29:38.271144: step: 866/463, loss: 0.22488842904567719 2023-01-24 01:29:38.948014: step: 868/463, loss: 0.1210099384188652 2023-01-24 01:29:39.563902: step: 870/463, loss: 0.13129571080207825 2023-01-24 01:29:40.114818: step: 872/463, loss: 0.3064727485179901 2023-01-24 01:29:40.797111: step: 874/463, loss: 0.3433142900466919 2023-01-24 01:29:41.471972: step: 876/463, loss: 0.5327534675598145 2023-01-24 01:29:42.111117: step: 878/463, loss: 0.3669295012950897 2023-01-24 01:29:42.701660: step: 880/463, loss: 0.07462699711322784 2023-01-24 01:29:43.291233: step: 882/463, loss: 0.2050832211971283 2023-01-24 01:29:43.874860: step: 884/463, loss: 0.2422201931476593 2023-01-24 01:29:44.515428: step: 886/463, loss: 0.49755483865737915 2023-01-24 01:29:45.274895: step: 888/463, loss: 0.37691059708595276 2023-01-24 01:29:45.877176: step: 890/463, loss: 0.12227554619312286 2023-01-24 01:29:46.494378: step: 892/463, loss: 0.31827932596206665 2023-01-24 01:29:47.114596: step: 894/463, loss: 0.2925332188606262 2023-01-24 01:29:47.724578: step: 896/463, loss: 0.1712029129266739 2023-01-24 01:29:48.371765: step: 898/463, loss: 0.1715616136789322 2023-01-24 01:29:49.001429: step: 900/463, loss: 0.22472627460956573 2023-01-24 01:29:49.605755: step: 902/463, loss: 0.15998475253582 2023-01-24 01:29:50.183052: step: 904/463, loss: 4.375208377838135 2023-01-24 01:29:50.763548: step: 906/463, loss: 0.10431958734989166 2023-01-24 01:29:51.451459: step: 908/463, loss: 1.4105165004730225 2023-01-24 01:29:52.003059: step: 910/463, loss: 0.07492836564779282 2023-01-24 01:29:52.661058: step: 912/463, loss: 0.2832067012786865 2023-01-24 01:29:53.299506: step: 914/463, loss: 0.08408641070127487 2023-01-24 01:29:53.945071: step: 916/463, loss: 0.17427518963813782 2023-01-24 01:29:54.545112: step: 918/463, loss: 0.08751285076141357 2023-01-24 01:29:55.175824: step: 920/463, loss: 1.1811261177062988 2023-01-24 01:29:55.841983: step: 922/463, loss: 0.5310910940170288 2023-01-24 01:29:56.536315: step: 924/463, loss: 0.7723174095153809 2023-01-24 01:29:57.129004: step: 926/463, loss: 0.1820191591978073 ================================================== Loss: 0.375 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3338471246219534, 'r': 0.3230982588670799, 'f1': 0.3283847558841543}, 'combined': 0.2419677148620084, 'epoch': 10} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3877113096690912, 'r': 0.3065152238745171, 'f1': 0.3423649576610103}, 'combined': 0.240859769208751, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3369043163672655, 'r': 0.3196762547348485, 'f1': 0.32806426141885325}, 'combined': 0.2417315610454708, 'epoch': 10} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.39242638806342167, 'r': 0.29234909084550104, 'f1': 0.33507478380190064}, 'combined': 0.23790309649934946, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36875, 'r': 0.21071428571428572, 'f1': 0.2681818181818182}, 'combined': 0.17878787878787877, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 10} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:32:41.979861: step: 2/463, loss: 0.1557893007993698 2023-01-24 01:32:42.538563: step: 4/463, loss: 0.09774535894393921 2023-01-24 01:32:43.154522: step: 6/463, loss: 0.35459160804748535 2023-01-24 01:32:43.761600: step: 8/463, loss: 0.4779803156852722 2023-01-24 01:32:44.382672: step: 10/463, loss: 0.18447989225387573 2023-01-24 01:32:44.951668: step: 12/463, loss: 0.0690571740269661 2023-01-24 01:32:45.671204: step: 14/463, loss: 0.06573549658060074 2023-01-24 01:32:46.246755: step: 16/463, loss: 0.33801355957984924 2023-01-24 01:32:46.859870: step: 18/463, loss: 0.3420027792453766 2023-01-24 01:32:47.461016: step: 20/463, loss: 0.09694930911064148 2023-01-24 01:32:48.186594: step: 22/463, loss: 0.06692788749933243 2023-01-24 01:32:48.778367: step: 24/463, loss: 0.12547041475772858 2023-01-24 01:32:49.407224: step: 26/463, loss: 0.2752712070941925 2023-01-24 01:32:50.054444: step: 28/463, loss: 0.1981227546930313 2023-01-24 01:32:50.648771: step: 30/463, loss: 0.05699937045574188 2023-01-24 01:32:51.251635: step: 32/463, loss: 0.07129170745611191 2023-01-24 01:32:51.853498: step: 34/463, loss: 1.1676236391067505 2023-01-24 01:32:52.502139: step: 36/463, loss: 0.260797381401062 2023-01-24 01:32:53.137116: step: 38/463, loss: 0.10853022336959839 2023-01-24 01:32:53.718033: step: 40/463, loss: 0.24779656529426575 2023-01-24 01:32:54.384440: step: 42/463, loss: 0.17829185724258423 2023-01-24 01:32:54.995843: step: 44/463, loss: 0.20599225163459778 2023-01-24 01:32:55.630743: step: 46/463, loss: 1.2065032720565796 2023-01-24 01:32:56.185588: step: 48/463, loss: 7.832232475280762 2023-01-24 01:32:56.759355: step: 50/463, loss: 0.3363843560218811 2023-01-24 01:32:57.379270: step: 52/463, loss: 0.170681893825531 2023-01-24 01:32:57.926849: step: 54/463, loss: 0.29162225127220154 2023-01-24 01:32:58.524721: step: 56/463, loss: 0.7374223470687866 2023-01-24 01:32:59.186832: step: 58/463, loss: 0.11654240638017654 2023-01-24 01:32:59.879788: step: 60/463, loss: 0.4363039433956146 2023-01-24 01:33:00.465016: step: 62/463, loss: 0.23605430126190186 2023-01-24 01:33:01.054301: step: 64/463, loss: 0.12272446602582932 2023-01-24 01:33:01.685805: step: 66/463, loss: 0.11844140291213989 2023-01-24 01:33:02.311475: step: 68/463, loss: 0.07841938734054565 2023-01-24 01:33:02.979127: step: 70/463, loss: 0.37415292859077454 2023-01-24 01:33:03.624663: step: 72/463, loss: 0.08683978021144867 2023-01-24 01:33:04.279215: step: 74/463, loss: 0.10190006345510483 2023-01-24 01:33:04.872109: step: 76/463, loss: 0.8439013361930847 2023-01-24 01:33:05.475912: step: 78/463, loss: 0.20373421907424927 2023-01-24 01:33:06.070506: step: 80/463, loss: 0.16913099586963654 2023-01-24 01:33:06.712723: step: 82/463, loss: 0.12669765949249268 2023-01-24 01:33:07.292233: step: 84/463, loss: 0.2533620297908783 2023-01-24 01:33:07.913332: step: 86/463, loss: 0.22011765837669373 2023-01-24 01:33:08.527450: step: 88/463, loss: 0.12503284215927124 2023-01-24 01:33:09.134931: step: 90/463, loss: 0.1320025622844696 2023-01-24 01:33:09.748360: step: 92/463, loss: 0.14141732454299927 2023-01-24 01:33:10.361754: step: 94/463, loss: 0.22314311563968658 2023-01-24 01:33:10.952613: step: 96/463, loss: 0.8474243879318237 2023-01-24 01:33:11.568257: step: 98/463, loss: 0.1384890377521515 2023-01-24 01:33:12.218916: step: 100/463, loss: 0.18315958976745605 2023-01-24 01:33:12.838665: step: 102/463, loss: 1.4914090633392334 2023-01-24 01:33:13.433803: step: 104/463, loss: 0.12334001064300537 2023-01-24 01:33:14.079581: step: 106/463, loss: 0.4787156283855438 2023-01-24 01:33:14.669832: step: 108/463, loss: 0.14360813796520233 2023-01-24 01:33:15.266187: step: 110/463, loss: 0.21620948612689972 2023-01-24 01:33:15.846821: step: 112/463, loss: 0.06922974437475204 2023-01-24 01:33:16.478041: step: 114/463, loss: 0.2128233164548874 2023-01-24 01:33:17.088633: step: 116/463, loss: 0.23051264882087708 2023-01-24 01:33:17.697415: step: 118/463, loss: 0.2628442943096161 2023-01-24 01:33:18.334656: step: 120/463, loss: 0.18493597209453583 2023-01-24 01:33:18.901021: step: 122/463, loss: 0.14753176271915436 2023-01-24 01:33:19.517375: step: 124/463, loss: 0.10964064300060272 2023-01-24 01:33:20.161655: step: 126/463, loss: 0.7467198967933655 2023-01-24 01:33:20.820673: step: 128/463, loss: 0.22547577321529388 2023-01-24 01:33:21.428821: step: 130/463, loss: 0.3346400260925293 2023-01-24 01:33:22.158076: step: 132/463, loss: 0.1302620768547058 2023-01-24 01:33:22.786881: step: 134/463, loss: 0.11260505765676498 2023-01-24 01:33:23.391213: step: 136/463, loss: 0.6980242729187012 2023-01-24 01:33:24.047970: step: 138/463, loss: 0.15692003071308136 2023-01-24 01:33:24.643017: step: 140/463, loss: 0.21904289722442627 2023-01-24 01:33:25.216260: step: 142/463, loss: 0.17542700469493866 2023-01-24 01:33:25.841948: step: 144/463, loss: 0.08986686170101166 2023-01-24 01:33:26.440642: step: 146/463, loss: 0.44691795110702515 2023-01-24 01:33:27.026676: step: 148/463, loss: 0.10077380388975143 2023-01-24 01:33:27.603982: step: 150/463, loss: 0.17615720629692078 2023-01-24 01:33:28.223014: step: 152/463, loss: 0.27842947840690613 2023-01-24 01:33:28.817177: step: 154/463, loss: 0.7097365856170654 2023-01-24 01:33:29.421472: step: 156/463, loss: 0.15558592975139618 2023-01-24 01:33:30.033999: step: 158/463, loss: 0.1143701821565628 2023-01-24 01:33:30.708155: step: 160/463, loss: 0.1519109457731247 2023-01-24 01:33:31.297436: step: 162/463, loss: 0.06617710739374161 2023-01-24 01:33:31.906903: step: 164/463, loss: 0.26744601130485535 2023-01-24 01:33:32.505813: step: 166/463, loss: 0.13036678731441498 2023-01-24 01:33:33.131988: step: 168/463, loss: 0.34556275606155396 2023-01-24 01:33:33.745773: step: 170/463, loss: 0.0910409614443779 2023-01-24 01:33:34.402175: step: 172/463, loss: 0.21420818567276 2023-01-24 01:33:35.028564: step: 174/463, loss: 0.17701290547847748 2023-01-24 01:33:35.666092: step: 176/463, loss: 0.14173251390457153 2023-01-24 01:33:36.313744: step: 178/463, loss: 0.15513356029987335 2023-01-24 01:33:36.947187: step: 180/463, loss: 0.8054499626159668 2023-01-24 01:33:37.633020: step: 182/463, loss: 0.30540597438812256 2023-01-24 01:33:38.285251: step: 184/463, loss: 0.2150721251964569 2023-01-24 01:33:39.061381: step: 186/463, loss: 0.9401265978813171 2023-01-24 01:33:39.659063: step: 188/463, loss: 0.18503764271736145 2023-01-24 01:33:40.266401: step: 190/463, loss: 0.4754886031150818 2023-01-24 01:33:40.875068: step: 192/463, loss: 0.17268887162208557 2023-01-24 01:33:41.495918: step: 194/463, loss: 0.2025170475244522 2023-01-24 01:33:42.139194: step: 196/463, loss: 0.25502920150756836 2023-01-24 01:33:42.756610: step: 198/463, loss: 0.49979367852211 2023-01-24 01:33:43.339239: step: 200/463, loss: 0.5329362750053406 2023-01-24 01:33:43.973891: step: 202/463, loss: 0.18929441273212433 2023-01-24 01:33:44.622249: step: 204/463, loss: 0.1777879148721695 2023-01-24 01:33:45.254707: step: 206/463, loss: 0.18135026097297668 2023-01-24 01:33:45.914095: step: 208/463, loss: 0.2549498677253723 2023-01-24 01:33:46.496074: step: 210/463, loss: 0.08592674136161804 2023-01-24 01:33:47.111416: step: 212/463, loss: 0.10911613702774048 2023-01-24 01:33:47.639530: step: 214/463, loss: 0.23726686835289001 2023-01-24 01:33:48.340988: step: 216/463, loss: 0.19466833770275116 2023-01-24 01:33:48.946642: step: 218/463, loss: 1.0695412158966064 2023-01-24 01:33:49.600740: step: 220/463, loss: 0.17979295551776886 2023-01-24 01:33:50.244816: step: 222/463, loss: 0.1768440157175064 2023-01-24 01:33:50.905781: step: 224/463, loss: 0.19502119719982147 2023-01-24 01:33:51.476715: step: 226/463, loss: 0.0430450439453125 2023-01-24 01:33:52.100899: step: 228/463, loss: 0.8410242199897766 2023-01-24 01:33:52.683513: step: 230/463, loss: 0.2063332200050354 2023-01-24 01:33:53.355860: step: 232/463, loss: 0.11113949865102768 2023-01-24 01:33:54.010397: step: 234/463, loss: 0.3075471818447113 2023-01-24 01:33:54.668460: step: 236/463, loss: 0.3676910996437073 2023-01-24 01:33:55.260677: step: 238/463, loss: 0.26097801327705383 2023-01-24 01:33:55.865560: step: 240/463, loss: 0.23086030781269073 2023-01-24 01:33:56.454462: step: 242/463, loss: 0.4896787405014038 2023-01-24 01:33:57.080967: step: 244/463, loss: 0.17657600343227386 2023-01-24 01:33:57.705208: step: 246/463, loss: 0.22201332449913025 2023-01-24 01:33:58.364631: step: 248/463, loss: 0.0837511196732521 2023-01-24 01:33:59.020417: step: 250/463, loss: 0.09013333916664124 2023-01-24 01:33:59.669890: step: 252/463, loss: 0.21067605912685394 2023-01-24 01:34:00.289860: step: 254/463, loss: 0.6506162881851196 2023-01-24 01:34:00.888148: step: 256/463, loss: 0.12501423060894012 2023-01-24 01:34:01.494764: step: 258/463, loss: 0.12942153215408325 2023-01-24 01:34:02.155069: step: 260/463, loss: 0.30929699540138245 2023-01-24 01:34:02.790532: step: 262/463, loss: 0.1980181485414505 2023-01-24 01:34:03.380922: step: 264/463, loss: 0.08108502626419067 2023-01-24 01:34:03.988267: step: 266/463, loss: 0.12446433305740356 2023-01-24 01:34:04.655573: step: 268/463, loss: 0.44919291138648987 2023-01-24 01:34:05.302336: step: 270/463, loss: 0.04071928188204765 2023-01-24 01:34:05.951180: step: 272/463, loss: 0.16166871786117554 2023-01-24 01:34:06.513229: step: 274/463, loss: 0.20751738548278809 2023-01-24 01:34:07.195782: step: 276/463, loss: 0.13233205676078796 2023-01-24 01:34:07.853332: step: 278/463, loss: 0.15093468129634857 2023-01-24 01:34:08.484101: step: 280/463, loss: 0.4615356922149658 2023-01-24 01:34:09.103394: step: 282/463, loss: 0.3105548918247223 2023-01-24 01:34:09.729358: step: 284/463, loss: 0.7079939842224121 2023-01-24 01:34:10.473494: step: 286/463, loss: 0.4655807614326477 2023-01-24 01:34:11.083872: step: 288/463, loss: 0.11099115014076233 2023-01-24 01:34:11.753048: step: 290/463, loss: 0.7836319804191589 2023-01-24 01:34:12.311114: step: 292/463, loss: 0.09697830677032471 2023-01-24 01:34:12.906239: step: 294/463, loss: 0.5950723886489868 2023-01-24 01:34:13.582665: step: 296/463, loss: 0.5558491945266724 2023-01-24 01:34:14.188164: step: 298/463, loss: 0.09996949881315231 2023-01-24 01:34:14.757981: step: 300/463, loss: 0.19306735694408417 2023-01-24 01:34:15.413399: step: 302/463, loss: 0.08822362124919891 2023-01-24 01:34:16.058581: step: 304/463, loss: 0.1660662740468979 2023-01-24 01:34:16.709053: step: 306/463, loss: 0.3569842576980591 2023-01-24 01:34:17.329150: step: 308/463, loss: 0.21397262811660767 2023-01-24 01:34:17.949412: step: 310/463, loss: 0.10757021605968475 2023-01-24 01:34:18.555553: step: 312/463, loss: 0.30004051327705383 2023-01-24 01:34:19.131098: step: 314/463, loss: 0.08736706525087357 2023-01-24 01:34:19.800737: step: 316/463, loss: 0.1873794049024582 2023-01-24 01:34:20.427044: step: 318/463, loss: 0.4362778961658478 2023-01-24 01:34:21.085096: step: 320/463, loss: 2.6224780082702637 2023-01-24 01:34:21.701551: step: 322/463, loss: 0.29017484188079834 2023-01-24 01:34:22.303735: step: 324/463, loss: 0.07917462289333344 2023-01-24 01:34:22.875036: step: 326/463, loss: 0.18867968022823334 2023-01-24 01:34:23.451019: step: 328/463, loss: 0.10311330109834671 2023-01-24 01:34:24.076497: step: 330/463, loss: 0.09096816927194595 2023-01-24 01:34:24.751258: step: 332/463, loss: 0.18914978206157684 2023-01-24 01:34:25.361425: step: 334/463, loss: 0.5593217611312866 2023-01-24 01:34:26.047429: step: 336/463, loss: 0.2107006311416626 2023-01-24 01:34:26.652256: step: 338/463, loss: 0.12867611646652222 2023-01-24 01:34:27.239241: step: 340/463, loss: 0.24063469469547272 2023-01-24 01:34:27.850817: step: 342/463, loss: 0.19312211871147156 2023-01-24 01:34:28.521994: step: 344/463, loss: 0.144821897149086 2023-01-24 01:34:29.117640: step: 346/463, loss: 0.8984965085983276 2023-01-24 01:34:29.828759: step: 348/463, loss: 0.126140296459198 2023-01-24 01:34:30.478267: step: 350/463, loss: 0.2617723345756531 2023-01-24 01:34:31.171313: step: 352/463, loss: 0.08560075610876083 2023-01-24 01:34:31.863155: step: 354/463, loss: 0.22636078298091888 2023-01-24 01:34:32.406266: step: 356/463, loss: 0.22749504446983337 2023-01-24 01:34:33.116667: step: 358/463, loss: 0.03907255083322525 2023-01-24 01:34:33.763093: step: 360/463, loss: 0.2906891703605652 2023-01-24 01:34:34.390400: step: 362/463, loss: 0.07086695730686188 2023-01-24 01:34:34.965153: step: 364/463, loss: 0.07967054843902588 2023-01-24 01:34:35.612168: step: 366/463, loss: 0.08674082159996033 2023-01-24 01:34:36.203648: step: 368/463, loss: 0.2185366004705429 2023-01-24 01:34:36.816147: step: 370/463, loss: 1.2157434225082397 2023-01-24 01:34:37.497078: step: 372/463, loss: 0.5268072485923767 2023-01-24 01:34:38.151502: step: 374/463, loss: 0.3283002972602844 2023-01-24 01:34:38.767212: step: 376/463, loss: 0.1777026653289795 2023-01-24 01:34:39.376762: step: 378/463, loss: 0.18592430651187897 2023-01-24 01:34:40.106570: step: 380/463, loss: 0.14597758650779724 2023-01-24 01:34:40.694338: step: 382/463, loss: 0.20625685155391693 2023-01-24 01:34:41.328066: step: 384/463, loss: 0.14868006110191345 2023-01-24 01:34:41.978776: step: 386/463, loss: 0.3777749836444855 2023-01-24 01:34:42.632456: step: 388/463, loss: 0.07952199131250381 2023-01-24 01:34:43.261641: step: 390/463, loss: 0.1866619884967804 2023-01-24 01:34:43.880868: step: 392/463, loss: 0.1291872262954712 2023-01-24 01:34:44.542297: step: 394/463, loss: 0.1945926994085312 2023-01-24 01:34:45.162127: step: 396/463, loss: 0.41470867395401 2023-01-24 01:34:45.850659: step: 398/463, loss: 0.08947636187076569 2023-01-24 01:34:46.576252: step: 400/463, loss: 9.032873153686523 2023-01-24 01:34:47.181697: step: 402/463, loss: 0.09846684336662292 2023-01-24 01:34:47.839847: step: 404/463, loss: 0.12085556983947754 2023-01-24 01:34:48.450579: step: 406/463, loss: 0.19592908024787903 2023-01-24 01:34:49.062707: step: 408/463, loss: 0.5958520770072937 2023-01-24 01:34:49.752760: step: 410/463, loss: 0.7844327092170715 2023-01-24 01:34:50.356886: step: 412/463, loss: 0.475271612405777 2023-01-24 01:34:51.002468: step: 414/463, loss: 0.24934270977973938 2023-01-24 01:34:51.668198: step: 416/463, loss: 0.2430102825164795 2023-01-24 01:34:52.333309: step: 418/463, loss: 0.13732518255710602 2023-01-24 01:34:52.922011: step: 420/463, loss: 0.12763077020645142 2023-01-24 01:34:53.550374: step: 422/463, loss: 0.05732860416173935 2023-01-24 01:34:54.121248: step: 424/463, loss: 0.17467300593852997 2023-01-24 01:34:54.732862: step: 426/463, loss: 0.3161345422267914 2023-01-24 01:34:55.387022: step: 428/463, loss: 0.1301148235797882 2023-01-24 01:34:55.998787: step: 430/463, loss: 0.2718263566493988 2023-01-24 01:34:56.679192: step: 432/463, loss: 0.37929511070251465 2023-01-24 01:34:57.286214: step: 434/463, loss: 0.5115262269973755 2023-01-24 01:34:57.930540: step: 436/463, loss: 0.37935271859169006 2023-01-24 01:34:58.584683: step: 438/463, loss: 0.08431170135736465 2023-01-24 01:34:59.194984: step: 440/463, loss: 0.14119510352611542 2023-01-24 01:34:59.874827: step: 442/463, loss: 0.3240480422973633 2023-01-24 01:35:00.451037: step: 444/463, loss: 0.04110392555594444 2023-01-24 01:35:01.014008: step: 446/463, loss: 0.12335552275180817 2023-01-24 01:35:01.654357: step: 448/463, loss: 0.13534028828144073 2023-01-24 01:35:02.254847: step: 450/463, loss: 0.06503622233867645 2023-01-24 01:35:02.855338: step: 452/463, loss: 0.14944955706596375 2023-01-24 01:35:03.482410: step: 454/463, loss: 0.14979034662246704 2023-01-24 01:35:04.104043: step: 456/463, loss: 0.3039358854293823 2023-01-24 01:35:04.695264: step: 458/463, loss: 0.21860185265541077 2023-01-24 01:35:05.287308: step: 460/463, loss: 0.18035124242305756 2023-01-24 01:35:05.873339: step: 462/463, loss: 0.6826392412185669 2023-01-24 01:35:06.457235: step: 464/463, loss: 0.2668013572692871 2023-01-24 01:35:07.132581: step: 466/463, loss: 4.867799758911133 2023-01-24 01:35:07.704134: step: 468/463, loss: 0.133463054895401 2023-01-24 01:35:08.393886: step: 470/463, loss: 0.1825987994670868 2023-01-24 01:35:09.061148: step: 472/463, loss: 0.2127266377210617 2023-01-24 01:35:09.698117: step: 474/463, loss: 0.582396388053894 2023-01-24 01:35:10.288617: step: 476/463, loss: 0.42772936820983887 2023-01-24 01:35:10.888385: step: 478/463, loss: 0.05908830463886261 2023-01-24 01:35:11.529579: step: 480/463, loss: 0.1736270934343338 2023-01-24 01:35:12.143103: step: 482/463, loss: 0.13673225045204163 2023-01-24 01:35:12.833662: step: 484/463, loss: 0.6216259598731995 2023-01-24 01:35:13.538695: step: 486/463, loss: 0.19415000081062317 2023-01-24 01:35:14.191889: step: 488/463, loss: 0.169523224234581 2023-01-24 01:35:14.825602: step: 490/463, loss: 0.1809360235929489 2023-01-24 01:35:15.404675: step: 492/463, loss: 0.17560748755931854 2023-01-24 01:35:15.966609: step: 494/463, loss: 0.14007432758808136 2023-01-24 01:35:16.559933: step: 496/463, loss: 0.35668808221817017 2023-01-24 01:35:17.211726: step: 498/463, loss: 0.06099766120314598 2023-01-24 01:35:17.847286: step: 500/463, loss: 0.09579218178987503 2023-01-24 01:35:18.513055: step: 502/463, loss: 0.15736544132232666 2023-01-24 01:35:19.123383: step: 504/463, loss: 0.09517703205347061 2023-01-24 01:35:19.739459: step: 506/463, loss: 0.05293704569339752 2023-01-24 01:35:20.437499: step: 508/463, loss: 0.3975214958190918 2023-01-24 01:35:21.107201: step: 510/463, loss: 0.1858622133731842 2023-01-24 01:35:21.788801: step: 512/463, loss: 0.4202653169631958 2023-01-24 01:35:22.391599: step: 514/463, loss: 0.5181907415390015 2023-01-24 01:35:23.040279: step: 516/463, loss: 0.19430164992809296 2023-01-24 01:35:23.667738: step: 518/463, loss: 0.05712796747684479 2023-01-24 01:35:24.322921: step: 520/463, loss: 0.5765514969825745 2023-01-24 01:35:24.972607: step: 522/463, loss: 0.14650504291057587 2023-01-24 01:35:25.606104: step: 524/463, loss: 0.23798295855522156 2023-01-24 01:35:26.277948: step: 526/463, loss: 0.10417530685663223 2023-01-24 01:35:26.910608: step: 528/463, loss: 0.1547343134880066 2023-01-24 01:35:27.600920: step: 530/463, loss: 0.30425867438316345 2023-01-24 01:35:28.245628: step: 532/463, loss: 0.0832023099064827 2023-01-24 01:35:28.841667: step: 534/463, loss: 0.11194518953561783 2023-01-24 01:35:29.502325: step: 536/463, loss: 0.2956486940383911 2023-01-24 01:35:30.132715: step: 538/463, loss: 0.2627185583114624 2023-01-24 01:35:30.732550: step: 540/463, loss: 0.11740078777074814 2023-01-24 01:35:31.374665: step: 542/463, loss: 0.2042766511440277 2023-01-24 01:35:32.017262: step: 544/463, loss: 0.11591403931379318 2023-01-24 01:35:32.627717: step: 546/463, loss: 0.14974018931388855 2023-01-24 01:35:33.254605: step: 548/463, loss: 0.3203867971897125 2023-01-24 01:35:33.944098: step: 550/463, loss: 0.17124783992767334 2023-01-24 01:35:34.588367: step: 552/463, loss: 0.495147705078125 2023-01-24 01:35:35.233375: step: 554/463, loss: 0.1449156403541565 2023-01-24 01:35:35.916685: step: 556/463, loss: 0.10526233911514282 2023-01-24 01:35:36.484520: step: 558/463, loss: 0.15353164076805115 2023-01-24 01:35:37.144153: step: 560/463, loss: 0.2102232724428177 2023-01-24 01:35:37.827345: step: 562/463, loss: 0.15538381040096283 2023-01-24 01:35:38.483119: step: 564/463, loss: 1.3404343128204346 2023-01-24 01:35:39.078866: step: 566/463, loss: 0.09365210682153702 2023-01-24 01:35:39.669180: step: 568/463, loss: 0.6984868049621582 2023-01-24 01:35:40.256401: step: 570/463, loss: 0.6858066916465759 2023-01-24 01:35:40.807915: step: 572/463, loss: 0.22985543310642242 2023-01-24 01:35:41.390852: step: 574/463, loss: 0.17677029967308044 2023-01-24 01:35:42.055298: step: 576/463, loss: 0.061814770102500916 2023-01-24 01:35:42.700890: step: 578/463, loss: 0.137438103556633 2023-01-24 01:35:43.333242: step: 580/463, loss: 0.16579896211624146 2023-01-24 01:35:43.975239: step: 582/463, loss: 0.8470388054847717 2023-01-24 01:35:44.571903: step: 584/463, loss: 0.3566119372844696 2023-01-24 01:35:45.236513: step: 586/463, loss: 0.11920452117919922 2023-01-24 01:35:45.839161: step: 588/463, loss: 0.3137648105621338 2023-01-24 01:35:46.473315: step: 590/463, loss: 0.2560758590698242 2023-01-24 01:35:47.101111: step: 592/463, loss: 0.0872649997472763 2023-01-24 01:35:47.690602: step: 594/463, loss: 0.6230356693267822 2023-01-24 01:35:48.326312: step: 596/463, loss: 0.22033986449241638 2023-01-24 01:35:49.000395: step: 598/463, loss: 0.187452495098114 2023-01-24 01:35:49.636475: step: 600/463, loss: 0.028370320796966553 2023-01-24 01:35:50.263234: step: 602/463, loss: 0.19138792157173157 2023-01-24 01:35:50.920878: step: 604/463, loss: 0.19379457831382751 2023-01-24 01:35:51.505481: step: 606/463, loss: 0.1803487241268158 2023-01-24 01:35:52.127979: step: 608/463, loss: 0.19775021076202393 2023-01-24 01:35:52.804319: step: 610/463, loss: 1.1458492279052734 2023-01-24 01:35:53.429841: step: 612/463, loss: 0.15410952270030975 2023-01-24 01:35:54.034612: step: 614/463, loss: 0.41053423285484314 2023-01-24 01:35:54.740301: step: 616/463, loss: 0.2990211546421051 2023-01-24 01:35:55.381642: step: 618/463, loss: 0.8369203805923462 2023-01-24 01:35:56.039734: step: 620/463, loss: 0.3215756118297577 2023-01-24 01:35:56.676289: step: 622/463, loss: 0.27169251441955566 2023-01-24 01:35:57.371297: step: 624/463, loss: 1.0006605386734009 2023-01-24 01:35:58.053895: step: 626/463, loss: 0.18092471361160278 2023-01-24 01:35:58.782775: step: 628/463, loss: 0.4520927667617798 2023-01-24 01:35:59.395276: step: 630/463, loss: 0.39694926142692566 2023-01-24 01:36:00.016998: step: 632/463, loss: 0.18954797089099884 2023-01-24 01:36:00.670588: step: 634/463, loss: 0.16187815368175507 2023-01-24 01:36:01.379135: step: 636/463, loss: 0.0561809316277504 2023-01-24 01:36:02.016418: step: 638/463, loss: 0.2044248878955841 2023-01-24 01:36:02.645288: step: 640/463, loss: 0.08575595915317535 2023-01-24 01:36:03.271950: step: 642/463, loss: 0.16775164008140564 2023-01-24 01:36:03.858740: step: 644/463, loss: 0.45177769660949707 2023-01-24 01:36:04.456217: step: 646/463, loss: 0.3119373023509979 2023-01-24 01:36:05.040542: step: 648/463, loss: 0.11421696096658707 2023-01-24 01:36:05.656137: step: 650/463, loss: 0.4472675919532776 2023-01-24 01:36:06.277601: step: 652/463, loss: 0.17896349728107452 2023-01-24 01:36:06.887454: step: 654/463, loss: 0.2625711262226105 2023-01-24 01:36:07.487769: step: 656/463, loss: 0.14007100462913513 2023-01-24 01:36:08.113335: step: 658/463, loss: 0.24776354432106018 2023-01-24 01:36:08.797488: step: 660/463, loss: 0.7137978672981262 2023-01-24 01:36:09.409876: step: 662/463, loss: 0.10590436309576035 2023-01-24 01:36:10.027259: step: 664/463, loss: 0.13663947582244873 2023-01-24 01:36:10.701015: step: 666/463, loss: 0.14541323482990265 2023-01-24 01:36:11.285816: step: 668/463, loss: 0.10218999534845352 2023-01-24 01:36:11.906110: step: 670/463, loss: 0.5882960557937622 2023-01-24 01:36:12.477801: step: 672/463, loss: 0.27695170044898987 2023-01-24 01:36:13.119503: step: 674/463, loss: 0.1596584916114807 2023-01-24 01:36:13.745570: step: 676/463, loss: 0.29103177785873413 2023-01-24 01:36:14.343777: step: 678/463, loss: 0.10411626100540161 2023-01-24 01:36:14.898903: step: 680/463, loss: 0.18873050808906555 2023-01-24 01:36:15.554301: step: 682/463, loss: 0.7171761989593506 2023-01-24 01:36:16.244076: step: 684/463, loss: 0.12166047096252441 2023-01-24 01:36:16.979406: step: 686/463, loss: 0.07058124244213104 2023-01-24 01:36:17.745197: step: 688/463, loss: 0.4099206328392029 2023-01-24 01:36:18.387229: step: 690/463, loss: 0.04027649387717247 2023-01-24 01:36:19.012363: step: 692/463, loss: 0.32075297832489014 2023-01-24 01:36:19.639973: step: 694/463, loss: 0.09883144497871399 2023-01-24 01:36:20.271930: step: 696/463, loss: 0.0920349732041359 2023-01-24 01:36:20.876260: step: 698/463, loss: 0.3034406900405884 2023-01-24 01:36:21.519358: step: 700/463, loss: 0.07000040262937546 2023-01-24 01:36:22.189717: step: 702/463, loss: 0.14461810886859894 2023-01-24 01:36:22.801976: step: 704/463, loss: 0.16660451889038086 2023-01-24 01:36:23.406420: step: 706/463, loss: 0.08364560455083847 2023-01-24 01:36:24.003910: step: 708/463, loss: 0.08456961810588837 2023-01-24 01:36:24.620448: step: 710/463, loss: 0.145141139626503 2023-01-24 01:36:25.268679: step: 712/463, loss: 1.0982517004013062 2023-01-24 01:36:25.895520: step: 714/463, loss: 1.2770613431930542 2023-01-24 01:36:26.491298: step: 716/463, loss: 0.20429536700248718 2023-01-24 01:36:27.113238: step: 718/463, loss: 0.25250017642974854 2023-01-24 01:36:27.702956: step: 720/463, loss: 0.1827576458454132 2023-01-24 01:36:28.320648: step: 722/463, loss: 0.11606207489967346 2023-01-24 01:36:28.921278: step: 724/463, loss: 0.23147019743919373 2023-01-24 01:36:29.568363: step: 726/463, loss: 0.7661488056182861 2023-01-24 01:36:30.168153: step: 728/463, loss: 0.07237375527620316 2023-01-24 01:36:30.802284: step: 730/463, loss: 0.22994258999824524 2023-01-24 01:36:31.351455: step: 732/463, loss: 0.11551771312952042 2023-01-24 01:36:31.922880: step: 734/463, loss: 0.26309841871261597 2023-01-24 01:36:32.559389: step: 736/463, loss: 0.07779472321271896 2023-01-24 01:36:33.220689: step: 738/463, loss: 0.3910158574581146 2023-01-24 01:36:33.884426: step: 740/463, loss: 0.22673861682415009 2023-01-24 01:36:34.497065: step: 742/463, loss: 0.30319470167160034 2023-01-24 01:36:35.127297: step: 744/463, loss: 0.10611624270677567 2023-01-24 01:36:35.734555: step: 746/463, loss: 0.23621544241905212 2023-01-24 01:36:36.359030: step: 748/463, loss: 0.5247398018836975 2023-01-24 01:36:36.956505: step: 750/463, loss: 0.18795108795166016 2023-01-24 01:36:37.616524: step: 752/463, loss: 0.1384354829788208 2023-01-24 01:36:38.306744: step: 754/463, loss: 0.06952177733182907 2023-01-24 01:36:38.958217: step: 756/463, loss: 0.48432061076164246 2023-01-24 01:36:39.581409: step: 758/463, loss: 0.16933438181877136 2023-01-24 01:36:40.173579: step: 760/463, loss: 0.33416056632995605 2023-01-24 01:36:40.787398: step: 762/463, loss: 0.17422151565551758 2023-01-24 01:36:41.435486: step: 764/463, loss: 0.1311761885881424 2023-01-24 01:36:41.994822: step: 766/463, loss: 0.16927458345890045 2023-01-24 01:36:42.603763: step: 768/463, loss: 0.22852876782417297 2023-01-24 01:36:43.425660: step: 770/463, loss: 0.06046190857887268 2023-01-24 01:36:44.030341: step: 772/463, loss: 0.2630150020122528 2023-01-24 01:36:44.662796: step: 774/463, loss: 0.26570048928260803 2023-01-24 01:36:45.252164: step: 776/463, loss: 0.0807984247803688 2023-01-24 01:36:45.891951: step: 778/463, loss: 0.08698214590549469 2023-01-24 01:36:46.652394: step: 780/463, loss: 0.6925492882728577 2023-01-24 01:36:47.396555: step: 782/463, loss: 0.1728895753622055 2023-01-24 01:36:48.037322: step: 784/463, loss: 0.15599322319030762 2023-01-24 01:36:48.682262: step: 786/463, loss: 0.207138329744339 2023-01-24 01:36:49.337251: step: 788/463, loss: 0.4441010355949402 2023-01-24 01:36:49.950625: step: 790/463, loss: 0.2658955752849579 2023-01-24 01:36:50.568529: step: 792/463, loss: 0.2987833023071289 2023-01-24 01:36:51.249774: step: 794/463, loss: 0.2642737329006195 2023-01-24 01:36:51.867672: step: 796/463, loss: 0.12995290756225586 2023-01-24 01:36:52.480923: step: 798/463, loss: 0.15699848532676697 2023-01-24 01:36:53.197905: step: 800/463, loss: 0.5807077288627625 2023-01-24 01:36:53.787984: step: 802/463, loss: 0.19632214307785034 2023-01-24 01:36:54.432312: step: 804/463, loss: 0.3042409420013428 2023-01-24 01:36:55.049230: step: 806/463, loss: 0.5361500978469849 2023-01-24 01:36:55.688882: step: 808/463, loss: 0.3842576742172241 2023-01-24 01:36:56.299798: step: 810/463, loss: 0.341902494430542 2023-01-24 01:36:56.872575: step: 812/463, loss: 0.2372943013906479 2023-01-24 01:36:57.493746: step: 814/463, loss: 0.1166607066988945 2023-01-24 01:36:58.001530: step: 816/463, loss: 0.12735337018966675 2023-01-24 01:36:58.556241: step: 818/463, loss: 0.5126550197601318 2023-01-24 01:36:59.176612: step: 820/463, loss: 0.15051767230033875 2023-01-24 01:36:59.857905: step: 822/463, loss: 0.19102871417999268 2023-01-24 01:37:00.416895: step: 824/463, loss: 0.12987522780895233 2023-01-24 01:37:01.053256: step: 826/463, loss: 0.23374895751476288 2023-01-24 01:37:01.617102: step: 828/463, loss: 0.30088508129119873 2023-01-24 01:37:02.214234: step: 830/463, loss: 0.2914123833179474 2023-01-24 01:37:02.790750: step: 832/463, loss: 0.13469421863555908 2023-01-24 01:37:03.363117: step: 834/463, loss: 0.14167901873588562 2023-01-24 01:37:03.982249: step: 836/463, loss: 0.11122599244117737 2023-01-24 01:37:04.586364: step: 838/463, loss: 0.3805815875530243 2023-01-24 01:37:05.237053: step: 840/463, loss: 0.13114216923713684 2023-01-24 01:37:05.803482: step: 842/463, loss: 0.10758160054683685 2023-01-24 01:37:06.458669: step: 844/463, loss: 0.562036395072937 2023-01-24 01:37:07.050872: step: 846/463, loss: 0.20366312563419342 2023-01-24 01:37:07.689436: step: 848/463, loss: 0.21946363151073456 2023-01-24 01:37:08.337836: step: 850/463, loss: 0.17439350485801697 2023-01-24 01:37:08.978634: step: 852/463, loss: 0.39914271235466003 2023-01-24 01:37:09.635853: step: 854/463, loss: 0.2576218545436859 2023-01-24 01:37:10.261042: step: 856/463, loss: 0.18035078048706055 2023-01-24 01:37:10.801921: step: 858/463, loss: 0.8589784502983093 2023-01-24 01:37:11.421084: step: 860/463, loss: 0.15273889899253845 2023-01-24 01:37:12.126482: step: 862/463, loss: 0.1505916863679886 2023-01-24 01:37:12.683403: step: 864/463, loss: 0.04896983504295349 2023-01-24 01:37:13.359456: step: 866/463, loss: 0.2881368398666382 2023-01-24 01:37:14.070291: step: 868/463, loss: 0.5736387372016907 2023-01-24 01:37:14.700393: step: 870/463, loss: 0.2225956916809082 2023-01-24 01:37:15.321778: step: 872/463, loss: 0.25594228506088257 2023-01-24 01:37:16.079129: step: 874/463, loss: 0.07375379651784897 2023-01-24 01:37:16.723479: step: 876/463, loss: 0.3371123969554901 2023-01-24 01:37:17.256365: step: 878/463, loss: 0.32306715846061707 2023-01-24 01:37:17.869372: step: 880/463, loss: 0.15837273001670837 2023-01-24 01:37:18.536880: step: 882/463, loss: 0.17974665760993958 2023-01-24 01:37:19.154042: step: 884/463, loss: 0.13219226896762848 2023-01-24 01:37:19.796062: step: 886/463, loss: 0.3048762083053589 2023-01-24 01:37:20.354690: step: 888/463, loss: 0.11188378930091858 2023-01-24 01:37:20.950354: step: 890/463, loss: 0.05948547646403313 2023-01-24 01:37:21.560336: step: 892/463, loss: 0.07781964540481567 2023-01-24 01:37:22.162749: step: 894/463, loss: 0.2466231733560562 2023-01-24 01:37:22.769162: step: 896/463, loss: 0.07881219685077667 2023-01-24 01:37:23.381819: step: 898/463, loss: 0.23536062240600586 2023-01-24 01:37:23.977421: step: 900/463, loss: 0.15793681144714355 2023-01-24 01:37:24.579585: step: 902/463, loss: 0.12293320149183273 2023-01-24 01:37:25.247256: step: 904/463, loss: 0.5695751905441284 2023-01-24 01:37:25.877910: step: 906/463, loss: 0.44951313734054565 2023-01-24 01:37:26.478375: step: 908/463, loss: 0.1546756625175476 2023-01-24 01:37:27.115678: step: 910/463, loss: 0.04586353152990341 2023-01-24 01:37:27.703466: step: 912/463, loss: 1.069309949874878 2023-01-24 01:37:28.345315: step: 914/463, loss: 0.10028410702943802 2023-01-24 01:37:28.939693: step: 916/463, loss: 0.6493948101997375 2023-01-24 01:37:29.553629: step: 918/463, loss: 0.20680983364582062 2023-01-24 01:37:30.193183: step: 920/463, loss: 0.2526209354400635 2023-01-24 01:37:30.852978: step: 922/463, loss: 0.13542540371418 2023-01-24 01:37:31.529400: step: 924/463, loss: 0.18513165414333344 2023-01-24 01:37:32.137011: step: 926/463, loss: 0.07138831913471222 ================================================== Loss: 0.317 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33153578900179315, 'r': 0.3007098807264841, 'f1': 0.3153713574982231}, 'combined': 0.2323788949986907, 'epoch': 11} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3499997745310245, 'r': 0.30812207225089316, 'f1': 0.3277285394586834}, 'combined': 0.23056279157897328, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32717161016949153, 'r': 0.2924715909090909, 'f1': 0.30885}, 'combined': 0.22757368421052632, 'epoch': 11} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3521318797918948, 'r': 0.306308604605002, 'f1': 0.3276257377606046}, 'combined': 0.23261427381002925, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35263949671772427, 'r': 0.3057993358633776, 'f1': 0.32755335365853655}, 'combined': 0.24135510269576377, 'epoch': 11} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36041256486568984, 'r': 0.2946254678727386, 'f1': 0.32421543557355664}, 'combined': 0.23019295925722522, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.266025641025641, 'r': 0.29642857142857143, 'f1': 0.2804054054054054}, 'combined': 0.18693693693693691, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29347826086956524, 'r': 0.29347826086956524, 'f1': 0.29347826086956524}, 'combined': 0.14673913043478262, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:40:10.736058: step: 2/463, loss: 0.34197765588760376 2023-01-24 01:40:11.358748: step: 4/463, loss: 0.1533501297235489 2023-01-24 01:40:11.922836: step: 6/463, loss: 0.10390349477529526 2023-01-24 01:40:12.604401: step: 8/463, loss: 0.17737986147403717 2023-01-24 01:40:13.250933: step: 10/463, loss: 0.09041804820299149 2023-01-24 01:40:13.908800: step: 12/463, loss: 0.09724597632884979 2023-01-24 01:40:14.514729: step: 14/463, loss: 0.30833110213279724 2023-01-24 01:40:15.107858: step: 16/463, loss: 0.029852263629436493 2023-01-24 01:40:15.695064: step: 18/463, loss: 0.18121203780174255 2023-01-24 01:40:16.314373: step: 20/463, loss: 0.11334497481584549 2023-01-24 01:40:16.956239: step: 22/463, loss: 0.22209399938583374 2023-01-24 01:40:17.581792: step: 24/463, loss: 0.08219563215970993 2023-01-24 01:40:18.169635: step: 26/463, loss: 0.06467556953430176 2023-01-24 01:40:18.848201: step: 28/463, loss: 0.11557453870773315 2023-01-24 01:40:19.415492: step: 30/463, loss: 0.14014387130737305 2023-01-24 01:40:20.060029: step: 32/463, loss: 0.1500731259584427 2023-01-24 01:40:20.687282: step: 34/463, loss: 0.05879034474492073 2023-01-24 01:40:21.276284: step: 36/463, loss: 0.048753973096609116 2023-01-24 01:40:21.889290: step: 38/463, loss: 0.09840979427099228 2023-01-24 01:40:22.474788: step: 40/463, loss: 0.2541049122810364 2023-01-24 01:40:23.123445: step: 42/463, loss: 0.1798819899559021 2023-01-24 01:40:23.744441: step: 44/463, loss: 0.09386767446994781 2023-01-24 01:40:24.379382: step: 46/463, loss: 0.35540565848350525 2023-01-24 01:40:25.008683: step: 48/463, loss: 0.12210364639759064 2023-01-24 01:40:25.655396: step: 50/463, loss: 0.19718556106090546 2023-01-24 01:40:26.351656: step: 52/463, loss: 0.09740059077739716 2023-01-24 01:40:26.959055: step: 54/463, loss: 0.21934853494167328 2023-01-24 01:40:27.572988: step: 56/463, loss: 0.18214760720729828 2023-01-24 01:40:28.167818: step: 58/463, loss: 0.13398641347885132 2023-01-24 01:40:28.781585: step: 60/463, loss: 0.14503853023052216 2023-01-24 01:40:29.407150: step: 62/463, loss: 0.11281847208738327 2023-01-24 01:40:30.046364: step: 64/463, loss: 0.09909256547689438 2023-01-24 01:40:30.704874: step: 66/463, loss: 0.07760182023048401 2023-01-24 01:40:31.323302: step: 68/463, loss: 0.1103949248790741 2023-01-24 01:40:31.985609: step: 70/463, loss: 0.10749391466379166 2023-01-24 01:40:32.654313: step: 72/463, loss: 0.357159823179245 2023-01-24 01:40:33.253342: step: 74/463, loss: 0.15469011664390564 2023-01-24 01:40:33.928928: step: 76/463, loss: 0.2622683346271515 2023-01-24 01:40:34.702995: step: 78/463, loss: 0.11571220308542252 2023-01-24 01:40:35.241898: step: 80/463, loss: 0.013331087306141853 2023-01-24 01:40:35.880047: step: 82/463, loss: 0.2537185549736023 2023-01-24 01:40:36.500910: step: 84/463, loss: 0.5151477456092834 2023-01-24 01:40:37.098852: step: 86/463, loss: 0.029469283297657967 2023-01-24 01:40:37.742097: step: 88/463, loss: 0.17114776372909546 2023-01-24 01:40:38.348288: step: 90/463, loss: 0.11693064868450165 2023-01-24 01:40:38.952057: step: 92/463, loss: 0.03392370045185089 2023-01-24 01:40:39.568344: step: 94/463, loss: 0.06575566530227661 2023-01-24 01:40:40.254447: step: 96/463, loss: 0.07638482004404068 2023-01-24 01:40:40.921846: step: 98/463, loss: 0.5633997917175293 2023-01-24 01:40:41.564978: step: 100/463, loss: 0.1883103847503662 2023-01-24 01:40:42.250140: step: 102/463, loss: 0.05599706619977951 2023-01-24 01:40:42.849191: step: 104/463, loss: 0.1711796671152115 2023-01-24 01:40:43.468890: step: 106/463, loss: 0.09866970777511597 2023-01-24 01:40:44.083782: step: 108/463, loss: 0.26045605540275574 2023-01-24 01:40:44.775522: step: 110/463, loss: 0.7119902968406677 2023-01-24 01:40:45.444907: step: 112/463, loss: 0.1435137689113617 2023-01-24 01:40:46.114626: step: 114/463, loss: 1.2725743055343628 2023-01-24 01:40:46.786683: step: 116/463, loss: 0.15724846720695496 2023-01-24 01:40:47.429003: step: 118/463, loss: 0.1532386839389801 2023-01-24 01:40:48.034714: step: 120/463, loss: 0.07204615324735641 2023-01-24 01:40:48.653405: step: 122/463, loss: 0.15915443003177643 2023-01-24 01:40:49.285229: step: 124/463, loss: 0.05263229459524155 2023-01-24 01:40:49.928747: step: 126/463, loss: 0.10201231390237808 2023-01-24 01:40:50.522701: step: 128/463, loss: 0.054472025483846664 2023-01-24 01:40:51.138999: step: 130/463, loss: 0.2844374179840088 2023-01-24 01:40:51.742956: step: 132/463, loss: 0.07035879790782928 2023-01-24 01:40:52.358222: step: 134/463, loss: 0.08179888129234314 2023-01-24 01:40:53.036854: step: 136/463, loss: 0.06709815561771393 2023-01-24 01:40:53.707893: step: 138/463, loss: 0.8633741736412048 2023-01-24 01:40:54.362246: step: 140/463, loss: 0.12531118094921112 2023-01-24 01:40:54.979019: step: 142/463, loss: 0.12658275663852692 2023-01-24 01:40:55.602936: step: 144/463, loss: 0.20998382568359375 2023-01-24 01:40:56.259844: step: 146/463, loss: 0.7228164672851562 2023-01-24 01:40:56.839259: step: 148/463, loss: 0.1573444902896881 2023-01-24 01:40:57.512724: step: 150/463, loss: 0.2776161730289459 2023-01-24 01:40:58.147870: step: 152/463, loss: 0.17548565566539764 2023-01-24 01:40:58.765967: step: 154/463, loss: 0.19316193461418152 2023-01-24 01:40:59.400836: step: 156/463, loss: 0.21854916214942932 2023-01-24 01:41:00.022803: step: 158/463, loss: 0.4416029751300812 2023-01-24 01:41:00.678856: step: 160/463, loss: 0.5560047030448914 2023-01-24 01:41:01.351131: step: 162/463, loss: 0.2570066750049591 2023-01-24 01:41:01.961884: step: 164/463, loss: 0.07668226212263107 2023-01-24 01:41:02.590641: step: 166/463, loss: 0.11153680831193924 2023-01-24 01:41:03.227000: step: 168/463, loss: 0.26291853189468384 2023-01-24 01:41:03.872125: step: 170/463, loss: 0.1521974503993988 2023-01-24 01:41:04.554856: step: 172/463, loss: 0.11137960106134415 2023-01-24 01:41:05.156782: step: 174/463, loss: 0.10303952544927597 2023-01-24 01:41:05.721762: step: 176/463, loss: 0.03495294228196144 2023-01-24 01:41:06.377431: step: 178/463, loss: 1.496561050415039 2023-01-24 01:41:07.014949: step: 180/463, loss: 0.15912805497646332 2023-01-24 01:41:07.633444: step: 182/463, loss: 0.26345524191856384 2023-01-24 01:41:08.247263: step: 184/463, loss: 0.08099766820669174 2023-01-24 01:41:08.848240: step: 186/463, loss: 0.05535869300365448 2023-01-24 01:41:09.508214: step: 188/463, loss: 0.13859950006008148 2023-01-24 01:41:10.097517: step: 190/463, loss: 0.0898204892873764 2023-01-24 01:41:10.710265: step: 192/463, loss: 0.4156183898448944 2023-01-24 01:41:11.419082: step: 194/463, loss: 0.10934385657310486 2023-01-24 01:41:12.074343: step: 196/463, loss: 0.16713257133960724 2023-01-24 01:41:12.667734: step: 198/463, loss: 0.04035454988479614 2023-01-24 01:41:13.292237: step: 200/463, loss: 0.253432035446167 2023-01-24 01:41:13.835285: step: 202/463, loss: 0.09646738320589066 2023-01-24 01:41:14.494663: step: 204/463, loss: 0.33751392364501953 2023-01-24 01:41:15.063335: step: 206/463, loss: 0.10355725139379501 2023-01-24 01:41:15.739995: step: 208/463, loss: 0.3154269754886627 2023-01-24 01:41:16.373939: step: 210/463, loss: 0.15998752415180206 2023-01-24 01:41:17.035282: step: 212/463, loss: 0.0676063820719719 2023-01-24 01:41:17.709834: step: 214/463, loss: 0.08083686977624893 2023-01-24 01:41:18.395718: step: 216/463, loss: 0.19557450711727142 2023-01-24 01:41:19.051026: step: 218/463, loss: 0.24356113374233246 2023-01-24 01:41:19.696608: step: 220/463, loss: 0.031922560185194016 2023-01-24 01:41:20.326523: step: 222/463, loss: 0.16581040620803833 2023-01-24 01:41:20.944612: step: 224/463, loss: 0.3467949628829956 2023-01-24 01:41:21.590247: step: 226/463, loss: 0.11666920781135559 2023-01-24 01:41:22.177103: step: 228/463, loss: 0.11344558745622635 2023-01-24 01:41:22.827721: step: 230/463, loss: 0.08254016190767288 2023-01-24 01:41:23.434414: step: 232/463, loss: 0.27026641368865967 2023-01-24 01:41:24.062925: step: 234/463, loss: 0.32602378726005554 2023-01-24 01:41:24.643374: step: 236/463, loss: 0.13874435424804688 2023-01-24 01:41:25.308646: step: 238/463, loss: 0.21382077038288116 2023-01-24 01:41:25.882347: step: 240/463, loss: 0.346485435962677 2023-01-24 01:41:26.468171: step: 242/463, loss: 0.10498189181089401 2023-01-24 01:41:27.072867: step: 244/463, loss: 0.15121930837631226 2023-01-24 01:41:27.691059: step: 246/463, loss: 0.8480350971221924 2023-01-24 01:41:28.296696: step: 248/463, loss: 0.0786808580160141 2023-01-24 01:41:28.860898: step: 250/463, loss: 0.12562072277069092 2023-01-24 01:41:29.464545: step: 252/463, loss: 0.1990298628807068 2023-01-24 01:41:30.114118: step: 254/463, loss: 0.32258129119873047 2023-01-24 01:41:30.741700: step: 256/463, loss: 0.31003543734550476 2023-01-24 01:41:31.377724: step: 258/463, loss: 0.11853492259979248 2023-01-24 01:41:32.015591: step: 260/463, loss: 0.14540258049964905 2023-01-24 01:41:32.655816: step: 262/463, loss: 0.09037131816148758 2023-01-24 01:41:33.282807: step: 264/463, loss: 0.21991512179374695 2023-01-24 01:41:33.853713: step: 266/463, loss: 0.13925594091415405 2023-01-24 01:41:34.466161: step: 268/463, loss: 0.1391882598400116 2023-01-24 01:41:35.025164: step: 270/463, loss: 0.16400665044784546 2023-01-24 01:41:35.751370: step: 272/463, loss: 0.20954331755638123 2023-01-24 01:41:36.371069: step: 274/463, loss: 0.10966300964355469 2023-01-24 01:41:37.051530: step: 276/463, loss: 0.30083832144737244 2023-01-24 01:41:37.651803: step: 278/463, loss: 0.2734852433204651 2023-01-24 01:41:38.330517: step: 280/463, loss: 0.39561375975608826 2023-01-24 01:41:39.006424: step: 282/463, loss: 2.0574252605438232 2023-01-24 01:41:39.629939: step: 284/463, loss: 0.10327503830194473 2023-01-24 01:41:40.255266: step: 286/463, loss: 0.14578378200531006 2023-01-24 01:41:40.818784: step: 288/463, loss: 0.0432501845061779 2023-01-24 01:41:41.443212: step: 290/463, loss: 0.09280187636613846 2023-01-24 01:41:42.002605: step: 292/463, loss: 0.22353342175483704 2023-01-24 01:41:42.641656: step: 294/463, loss: 0.6797869205474854 2023-01-24 01:41:43.213832: step: 296/463, loss: 0.43066519498825073 2023-01-24 01:41:43.860730: step: 298/463, loss: 0.04075444117188454 2023-01-24 01:41:44.508877: step: 300/463, loss: 0.09970996528863907 2023-01-24 01:41:45.150000: step: 302/463, loss: 0.05279269069433212 2023-01-24 01:41:45.726647: step: 304/463, loss: 0.6067184209823608 2023-01-24 01:41:46.359680: step: 306/463, loss: 0.09084320068359375 2023-01-24 01:41:47.040676: step: 308/463, loss: 0.24699880182743073 2023-01-24 01:41:47.665762: step: 310/463, loss: 0.2382107675075531 2023-01-24 01:41:48.311571: step: 312/463, loss: 0.616605818271637 2023-01-24 01:41:48.958047: step: 314/463, loss: 0.16448034346103668 2023-01-24 01:41:49.523810: step: 316/463, loss: 0.1294807344675064 2023-01-24 01:41:50.151824: step: 318/463, loss: 0.11885892599821091 2023-01-24 01:41:50.824180: step: 320/463, loss: 0.3176543712615967 2023-01-24 01:41:51.441779: step: 322/463, loss: 0.13634058833122253 2023-01-24 01:41:52.013394: step: 324/463, loss: 0.610985517501831 2023-01-24 01:41:52.641460: step: 326/463, loss: 0.10349148511886597 2023-01-24 01:41:53.247340: step: 328/463, loss: 0.1882731169462204 2023-01-24 01:41:53.818640: step: 330/463, loss: 0.1491113305091858 2023-01-24 01:41:54.497614: step: 332/463, loss: 0.16080190241336823 2023-01-24 01:41:55.107468: step: 334/463, loss: 0.11535745114088058 2023-01-24 01:41:55.819710: step: 336/463, loss: 0.20681701600551605 2023-01-24 01:41:56.447068: step: 338/463, loss: 0.1957293152809143 2023-01-24 01:41:57.128467: step: 340/463, loss: 0.08251967281103134 2023-01-24 01:41:57.796564: step: 342/463, loss: 0.0850360095500946 2023-01-24 01:41:58.435598: step: 344/463, loss: 1.3521161079406738 2023-01-24 01:41:59.057264: step: 346/463, loss: 0.14489100873470306 2023-01-24 01:41:59.810531: step: 348/463, loss: 0.5899819135665894 2023-01-24 01:42:00.397776: step: 350/463, loss: 0.036885347217321396 2023-01-24 01:42:01.018636: step: 352/463, loss: 0.4175459146499634 2023-01-24 01:42:01.642284: step: 354/463, loss: 0.2108113020658493 2023-01-24 01:42:02.265189: step: 356/463, loss: 0.15853843092918396 2023-01-24 01:42:02.907099: step: 358/463, loss: 0.31495052576065063 2023-01-24 01:42:03.576036: step: 360/463, loss: 0.44005003571510315 2023-01-24 01:42:04.197847: step: 362/463, loss: 0.14134806394577026 2023-01-24 01:42:04.817275: step: 364/463, loss: 0.04661629721522331 2023-01-24 01:42:05.437211: step: 366/463, loss: 0.47832930088043213 2023-01-24 01:42:06.050584: step: 368/463, loss: 0.13232572376728058 2023-01-24 01:42:06.653606: step: 370/463, loss: 0.23933523893356323 2023-01-24 01:42:07.208416: step: 372/463, loss: 0.1300169974565506 2023-01-24 01:42:07.860400: step: 374/463, loss: 0.2780449390411377 2023-01-24 01:42:08.485980: step: 376/463, loss: 0.2232193797826767 2023-01-24 01:42:09.200776: step: 378/463, loss: 0.10482568293809891 2023-01-24 01:42:09.813150: step: 380/463, loss: 0.029409250244498253 2023-01-24 01:42:10.363165: step: 382/463, loss: 0.20836682617664337 2023-01-24 01:42:10.980196: step: 384/463, loss: 0.08482760190963745 2023-01-24 01:42:11.635883: step: 386/463, loss: 0.21365922689437866 2023-01-24 01:42:12.329982: step: 388/463, loss: 0.10487748682498932 2023-01-24 01:42:12.911795: step: 390/463, loss: 0.32354769110679626 2023-01-24 01:42:13.536598: step: 392/463, loss: 0.2740028202533722 2023-01-24 01:42:14.124563: step: 394/463, loss: 0.24762241542339325 2023-01-24 01:42:14.760331: step: 396/463, loss: 0.16800114512443542 2023-01-24 01:42:15.352039: step: 398/463, loss: 0.3699037432670593 2023-01-24 01:42:16.011680: step: 400/463, loss: 0.9338496327400208 2023-01-24 01:42:16.697877: step: 402/463, loss: 0.17732645571231842 2023-01-24 01:42:17.289408: step: 404/463, loss: 0.18352626264095306 2023-01-24 01:42:17.886877: step: 406/463, loss: 0.08913999050855637 2023-01-24 01:42:18.540073: step: 408/463, loss: 0.158122718334198 2023-01-24 01:42:19.184293: step: 410/463, loss: 0.5490902066230774 2023-01-24 01:42:19.885889: step: 412/463, loss: 0.03415511175990105 2023-01-24 01:42:20.549973: step: 414/463, loss: 6.505986213684082 2023-01-24 01:42:21.140074: step: 416/463, loss: 0.22885532677173615 2023-01-24 01:42:21.757114: step: 418/463, loss: 0.4499245882034302 2023-01-24 01:42:22.482208: step: 420/463, loss: 0.2058737576007843 2023-01-24 01:42:23.148505: step: 422/463, loss: 0.26190832257270813 2023-01-24 01:42:23.790753: step: 424/463, loss: 0.2119787633419037 2023-01-24 01:42:24.382957: step: 426/463, loss: 0.07810167223215103 2023-01-24 01:42:25.005148: step: 428/463, loss: 0.08695562928915024 2023-01-24 01:42:25.583480: step: 430/463, loss: 0.02590763382613659 2023-01-24 01:42:26.238967: step: 432/463, loss: 0.3067600429058075 2023-01-24 01:42:26.910615: step: 434/463, loss: 0.11293908953666687 2023-01-24 01:42:27.555220: step: 436/463, loss: 0.2035467028617859 2023-01-24 01:42:28.210157: step: 438/463, loss: 0.7628684639930725 2023-01-24 01:42:28.810584: step: 440/463, loss: 0.5099074244499207 2023-01-24 01:42:29.437892: step: 442/463, loss: 0.10515103489160538 2023-01-24 01:42:30.008837: step: 444/463, loss: 0.3770093023777008 2023-01-24 01:42:30.617387: step: 446/463, loss: 0.5468544960021973 2023-01-24 01:42:31.187988: step: 448/463, loss: 0.1259739100933075 2023-01-24 01:42:31.826745: step: 450/463, loss: 0.12694409489631653 2023-01-24 01:42:32.443749: step: 452/463, loss: 0.1850188672542572 2023-01-24 01:42:33.030379: step: 454/463, loss: 0.15091940760612488 2023-01-24 01:42:33.599539: step: 456/463, loss: 0.1814865916967392 2023-01-24 01:42:34.263021: step: 458/463, loss: 0.05245333909988403 2023-01-24 01:42:34.862049: step: 460/463, loss: 0.15327942371368408 2023-01-24 01:42:35.478700: step: 462/463, loss: 0.20254862308502197 2023-01-24 01:42:36.060471: step: 464/463, loss: 2.921994209289551 2023-01-24 01:42:36.684965: step: 466/463, loss: 0.13665466010570526 2023-01-24 01:42:37.268021: step: 468/463, loss: 0.3945065438747406 2023-01-24 01:42:37.892895: step: 470/463, loss: 0.057316139340400696 2023-01-24 01:42:38.489285: step: 472/463, loss: 0.1911284327507019 2023-01-24 01:42:39.094913: step: 474/463, loss: 0.542300283908844 2023-01-24 01:42:39.703960: step: 476/463, loss: 0.07461020350456238 2023-01-24 01:42:40.383651: step: 478/463, loss: 0.1451764702796936 2023-01-24 01:42:40.996701: step: 480/463, loss: 0.11218343675136566 2023-01-24 01:42:41.560161: step: 482/463, loss: 0.3255275785923004 2023-01-24 01:42:42.212508: step: 484/463, loss: 0.18552474677562714 2023-01-24 01:42:42.834656: step: 486/463, loss: 0.5213183164596558 2023-01-24 01:42:43.431747: step: 488/463, loss: 0.12484761327505112 2023-01-24 01:42:44.012114: step: 490/463, loss: 0.09700513631105423 2023-01-24 01:42:44.584600: step: 492/463, loss: 0.10551420599222183 2023-01-24 01:42:45.152800: step: 494/463, loss: 0.3804340064525604 2023-01-24 01:42:45.875778: step: 496/463, loss: 0.18107792735099792 2023-01-24 01:42:46.515520: step: 498/463, loss: 0.17650939524173737 2023-01-24 01:42:47.149427: step: 500/463, loss: 0.04682953655719757 2023-01-24 01:42:47.797998: step: 502/463, loss: 0.21133457124233246 2023-01-24 01:42:48.397740: step: 504/463, loss: 0.21115027368068695 2023-01-24 01:42:49.067753: step: 506/463, loss: 0.07995347678661346 2023-01-24 01:42:49.691425: step: 508/463, loss: 0.09850002080202103 2023-01-24 01:42:50.273127: step: 510/463, loss: 0.3223021626472473 2023-01-24 01:42:51.051977: step: 512/463, loss: 0.12096260488033295 2023-01-24 01:42:51.670188: step: 514/463, loss: 0.11630367487668991 2023-01-24 01:42:52.260936: step: 516/463, loss: 1.0893641710281372 2023-01-24 01:42:52.879891: step: 518/463, loss: 0.06874978542327881 2023-01-24 01:42:53.548349: step: 520/463, loss: 0.10637974739074707 2023-01-24 01:42:54.205223: step: 522/463, loss: 0.24100622534751892 2023-01-24 01:42:54.801250: step: 524/463, loss: 0.11954198777675629 2023-01-24 01:42:55.414186: step: 526/463, loss: 0.29922863841056824 2023-01-24 01:42:56.032278: step: 528/463, loss: 0.30005699396133423 2023-01-24 01:42:56.705251: step: 530/463, loss: 0.16480515897274017 2023-01-24 01:42:57.336915: step: 532/463, loss: 0.09650325775146484 2023-01-24 01:42:57.921904: step: 534/463, loss: 0.11097333580255508 2023-01-24 01:42:58.519962: step: 536/463, loss: 0.22590099275112152 2023-01-24 01:42:59.133492: step: 538/463, loss: 0.1011693924665451 2023-01-24 01:42:59.806760: step: 540/463, loss: 0.6133821606636047 2023-01-24 01:43:00.456990: step: 542/463, loss: 0.2232075035572052 2023-01-24 01:43:01.092983: step: 544/463, loss: 0.10726714134216309 2023-01-24 01:43:01.714876: step: 546/463, loss: 0.2507132887840271 2023-01-24 01:43:02.269310: step: 548/463, loss: 0.20572566986083984 2023-01-24 01:43:02.922043: step: 550/463, loss: 1.2036218643188477 2023-01-24 01:43:03.555920: step: 552/463, loss: 0.043905001133680344 2023-01-24 01:43:04.186583: step: 554/463, loss: 0.177282452583313 2023-01-24 01:43:04.767517: step: 556/463, loss: 0.39414525032043457 2023-01-24 01:43:05.398293: step: 558/463, loss: 0.10579732060432434 2023-01-24 01:43:06.123226: step: 560/463, loss: 0.22690117359161377 2023-01-24 01:43:06.864958: step: 562/463, loss: 0.17361216247081757 2023-01-24 01:43:07.476631: step: 564/463, loss: 0.16745789349079132 2023-01-24 01:43:08.088137: step: 566/463, loss: 0.08048545569181442 2023-01-24 01:43:08.662222: step: 568/463, loss: 0.2532932758331299 2023-01-24 01:43:09.258586: step: 570/463, loss: 0.18356938660144806 2023-01-24 01:43:09.833727: step: 572/463, loss: 0.11372794955968857 2023-01-24 01:43:10.474438: step: 574/463, loss: 0.0735929012298584 2023-01-24 01:43:11.163041: step: 576/463, loss: 0.2808501124382019 2023-01-24 01:43:11.784196: step: 578/463, loss: 0.21926674246788025 2023-01-24 01:43:12.434585: step: 580/463, loss: 0.11593205481767654 2023-01-24 01:43:13.007468: step: 582/463, loss: 0.07882106304168701 2023-01-24 01:43:13.633965: step: 584/463, loss: 0.6042473912239075 2023-01-24 01:43:14.295287: step: 586/463, loss: 0.11208947002887726 2023-01-24 01:43:14.969656: step: 588/463, loss: 0.09803960472345352 2023-01-24 01:43:15.582854: step: 590/463, loss: 0.14788559079170227 2023-01-24 01:43:16.386124: step: 592/463, loss: 0.43674588203430176 2023-01-24 01:43:17.048543: step: 594/463, loss: 0.29116687178611755 2023-01-24 01:43:17.696383: step: 596/463, loss: 0.20990018546581268 2023-01-24 01:43:18.334418: step: 598/463, loss: 0.09308087825775146 2023-01-24 01:43:18.888712: step: 600/463, loss: 0.09691055119037628 2023-01-24 01:43:19.529156: step: 602/463, loss: 0.18259133398532867 2023-01-24 01:43:20.181131: step: 604/463, loss: 0.16762174665927887 2023-01-24 01:43:20.748364: step: 606/463, loss: 1.2885576486587524 2023-01-24 01:43:21.342288: step: 608/463, loss: 0.11559124290943146 2023-01-24 01:43:21.965683: step: 610/463, loss: 0.20327241718769073 2023-01-24 01:43:22.609693: step: 612/463, loss: 0.7383559942245483 2023-01-24 01:43:23.238405: step: 614/463, loss: 0.20116941630840302 2023-01-24 01:43:23.880899: step: 616/463, loss: 0.08069182932376862 2023-01-24 01:43:24.491109: step: 618/463, loss: 0.06375639140605927 2023-01-24 01:43:25.091582: step: 620/463, loss: 0.17031016945838928 2023-01-24 01:43:25.667316: step: 622/463, loss: 0.24734219908714294 2023-01-24 01:43:26.348708: step: 624/463, loss: 0.34365737438201904 2023-01-24 01:43:26.943024: step: 626/463, loss: 0.0944933071732521 2023-01-24 01:43:27.576550: step: 628/463, loss: 0.21452611684799194 2023-01-24 01:43:28.215003: step: 630/463, loss: 0.18214578926563263 2023-01-24 01:43:28.868709: step: 632/463, loss: 0.1485147774219513 2023-01-24 01:43:29.500437: step: 634/463, loss: 0.10581442713737488 2023-01-24 01:43:30.167993: step: 636/463, loss: 0.19922447204589844 2023-01-24 01:43:30.747491: step: 638/463, loss: 0.13740555942058563 2023-01-24 01:43:31.450978: step: 640/463, loss: 0.27541083097457886 2023-01-24 01:43:32.088163: step: 642/463, loss: 0.2650885581970215 2023-01-24 01:43:32.686012: step: 644/463, loss: 0.13503605127334595 2023-01-24 01:43:33.261106: step: 646/463, loss: 0.2300872504711151 2023-01-24 01:43:33.966347: step: 648/463, loss: 0.1925562471151352 2023-01-24 01:43:34.536375: step: 650/463, loss: 0.44086819887161255 2023-01-24 01:43:35.176764: step: 652/463, loss: 0.5781541466712952 2023-01-24 01:43:35.786073: step: 654/463, loss: 0.12554456293582916 2023-01-24 01:43:36.461287: step: 656/463, loss: 0.09108728915452957 2023-01-24 01:43:36.997177: step: 658/463, loss: 0.15431836247444153 2023-01-24 01:43:37.618747: step: 660/463, loss: 0.6667349338531494 2023-01-24 01:43:38.179723: step: 662/463, loss: 0.2485034167766571 2023-01-24 01:43:38.786319: step: 664/463, loss: 0.3360239863395691 2023-01-24 01:43:39.423044: step: 666/463, loss: 0.19277577102184296 2023-01-24 01:43:40.094581: step: 668/463, loss: 0.08894162625074387 2023-01-24 01:43:40.700801: step: 670/463, loss: 0.12091633677482605 2023-01-24 01:43:41.310019: step: 672/463, loss: 0.08755961060523987 2023-01-24 01:43:42.001135: step: 674/463, loss: 0.0740816667675972 2023-01-24 01:43:42.674857: step: 676/463, loss: 0.522407591342926 2023-01-24 01:43:43.241887: step: 678/463, loss: 0.06385527551174164 2023-01-24 01:43:43.792141: step: 680/463, loss: 0.09522353857755661 2023-01-24 01:43:44.429734: step: 682/463, loss: 0.07894842326641083 2023-01-24 01:43:45.102189: step: 684/463, loss: 1.1298984289169312 2023-01-24 01:43:45.686066: step: 686/463, loss: 0.08782923221588135 2023-01-24 01:43:46.285602: step: 688/463, loss: 0.39315420389175415 2023-01-24 01:43:47.010836: step: 690/463, loss: 0.058390017598867416 2023-01-24 01:43:47.651004: step: 692/463, loss: 0.093948133289814 2023-01-24 01:43:48.217787: step: 694/463, loss: 0.7958551645278931 2023-01-24 01:43:48.883229: step: 696/463, loss: 0.12597478926181793 2023-01-24 01:43:49.510684: step: 698/463, loss: 0.12066067010164261 2023-01-24 01:43:50.173126: step: 700/463, loss: 0.1732708215713501 2023-01-24 01:43:50.866509: step: 702/463, loss: 0.15589715540409088 2023-01-24 01:43:51.435478: step: 704/463, loss: 0.12504659593105316 2023-01-24 01:43:52.054838: step: 706/463, loss: 0.08416897058486938 2023-01-24 01:43:52.699545: step: 708/463, loss: 0.25884726643562317 2023-01-24 01:43:53.326053: step: 710/463, loss: 0.07357209175825119 2023-01-24 01:43:53.950775: step: 712/463, loss: 1.8720377683639526 2023-01-24 01:43:54.667833: step: 714/463, loss: 0.23082883656024933 2023-01-24 01:43:55.300069: step: 716/463, loss: 0.16086243093013763 2023-01-24 01:43:55.917580: step: 718/463, loss: 0.06895843893289566 2023-01-24 01:43:56.534187: step: 720/463, loss: 0.15479770302772522 2023-01-24 01:43:57.121583: step: 722/463, loss: 0.7274724245071411 2023-01-24 01:43:57.832476: step: 724/463, loss: 0.5339646935462952 2023-01-24 01:43:58.402165: step: 726/463, loss: 0.03434988856315613 2023-01-24 01:43:59.045805: step: 728/463, loss: 0.43183112144470215 2023-01-24 01:43:59.670364: step: 730/463, loss: 0.14177823066711426 2023-01-24 01:44:00.334818: step: 732/463, loss: 0.05052487552165985 2023-01-24 01:44:00.980478: step: 734/463, loss: 0.30410340428352356 2023-01-24 01:44:01.523434: step: 736/463, loss: 0.11337306350469589 2023-01-24 01:44:02.147534: step: 738/463, loss: 0.07960277795791626 2023-01-24 01:44:02.777468: step: 740/463, loss: 0.1879837065935135 2023-01-24 01:44:03.512452: step: 742/463, loss: 0.10435333102941513 2023-01-24 01:44:04.147791: step: 744/463, loss: 0.7943699359893799 2023-01-24 01:44:04.797950: step: 746/463, loss: 0.1815556287765503 2023-01-24 01:44:05.400908: step: 748/463, loss: 6.026325225830078 2023-01-24 01:44:05.964495: step: 750/463, loss: 0.06954225897789001 2023-01-24 01:44:06.614739: step: 752/463, loss: 0.33254802227020264 2023-01-24 01:44:07.177710: step: 754/463, loss: 0.14527590572834015 2023-01-24 01:44:07.768329: step: 756/463, loss: 0.12501434981822968 2023-01-24 01:44:08.387163: step: 758/463, loss: 0.3037288188934326 2023-01-24 01:44:08.986129: step: 760/463, loss: 0.17949213087558746 2023-01-24 01:44:09.588600: step: 762/463, loss: 0.11923478543758392 2023-01-24 01:44:10.231998: step: 764/463, loss: 0.1872435212135315 2023-01-24 01:44:10.810005: step: 766/463, loss: 0.42351803183555603 2023-01-24 01:44:11.421968: step: 768/463, loss: 0.14755286276340485 2023-01-24 01:44:12.112505: step: 770/463, loss: 0.30463990569114685 2023-01-24 01:44:12.793562: step: 772/463, loss: 0.05567483231425285 2023-01-24 01:44:13.432737: step: 774/463, loss: 0.6206821203231812 2023-01-24 01:44:14.044637: step: 776/463, loss: 0.08761653304100037 2023-01-24 01:44:14.643991: step: 778/463, loss: 0.4029819965362549 2023-01-24 01:44:15.245415: step: 780/463, loss: 0.23647277057170868 2023-01-24 01:44:15.910127: step: 782/463, loss: 0.19018125534057617 2023-01-24 01:44:16.496489: step: 784/463, loss: 0.4268651008605957 2023-01-24 01:44:17.104841: step: 786/463, loss: 0.26236891746520996 2023-01-24 01:44:17.700037: step: 788/463, loss: 0.15035861730575562 2023-01-24 01:44:18.282320: step: 790/463, loss: 0.2712308466434479 2023-01-24 01:44:18.888080: step: 792/463, loss: 0.28279125690460205 2023-01-24 01:44:19.592179: step: 794/463, loss: 0.313493937253952 2023-01-24 01:44:20.227146: step: 796/463, loss: 0.17327666282653809 2023-01-24 01:44:20.915763: step: 798/463, loss: 0.11048874258995056 2023-01-24 01:44:21.511026: step: 800/463, loss: 3.0018017292022705 2023-01-24 01:44:22.142393: step: 802/463, loss: 0.5758911371231079 2023-01-24 01:44:22.783108: step: 804/463, loss: 0.14720159769058228 2023-01-24 01:44:23.458601: step: 806/463, loss: 0.09125930815935135 2023-01-24 01:44:24.090127: step: 808/463, loss: 0.10682923346757889 2023-01-24 01:44:24.707272: step: 810/463, loss: 0.5281385779380798 2023-01-24 01:44:25.358321: step: 812/463, loss: 0.17217954993247986 2023-01-24 01:44:25.955971: step: 814/463, loss: 0.11705406755208969 2023-01-24 01:44:26.586384: step: 816/463, loss: 0.24329277873039246 2023-01-24 01:44:27.118303: step: 818/463, loss: 0.09143783897161484 2023-01-24 01:44:27.714291: step: 820/463, loss: 0.05377238988876343 2023-01-24 01:44:28.355999: step: 822/463, loss: 0.1849215179681778 2023-01-24 01:44:28.972219: step: 824/463, loss: 2.818272590637207 2023-01-24 01:44:29.597703: step: 826/463, loss: 0.1024990901350975 2023-01-24 01:44:30.248868: step: 828/463, loss: 0.10002215206623077 2023-01-24 01:44:30.891650: step: 830/463, loss: 0.19257207214832306 2023-01-24 01:44:31.525595: step: 832/463, loss: 0.09470025449991226 2023-01-24 01:44:32.197141: step: 834/463, loss: 0.18126580119132996 2023-01-24 01:44:32.832943: step: 836/463, loss: 0.18168815970420837 2023-01-24 01:44:33.472985: step: 838/463, loss: 0.09921485185623169 2023-01-24 01:44:34.129898: step: 840/463, loss: 0.039065852761268616 2023-01-24 01:44:34.722379: step: 842/463, loss: 0.18291246891021729 2023-01-24 01:44:35.316379: step: 844/463, loss: 0.09681746363639832 2023-01-24 01:44:35.921834: step: 846/463, loss: 0.34084174036979675 2023-01-24 01:44:36.492295: step: 848/463, loss: 0.10119979828596115 2023-01-24 01:44:37.191233: step: 850/463, loss: 0.08702563494443893 2023-01-24 01:44:37.801867: step: 852/463, loss: 0.8252895474433899 2023-01-24 01:44:38.370054: step: 854/463, loss: 0.05417141690850258 2023-01-24 01:44:39.040455: step: 856/463, loss: 0.32311388850212097 2023-01-24 01:44:39.635966: step: 858/463, loss: 0.2278389036655426 2023-01-24 01:44:40.327299: step: 860/463, loss: 0.17602664232254028 2023-01-24 01:44:40.958020: step: 862/463, loss: 1.5238887071609497 2023-01-24 01:44:41.534931: step: 864/463, loss: 0.9015859961509705 2023-01-24 01:44:42.143561: step: 866/463, loss: 0.18757116794586182 2023-01-24 01:44:42.766404: step: 868/463, loss: 0.06808634102344513 2023-01-24 01:44:43.401749: step: 870/463, loss: 0.1751035451889038 2023-01-24 01:44:44.045076: step: 872/463, loss: 0.8377909660339355 2023-01-24 01:44:44.661649: step: 874/463, loss: 0.46844834089279175 2023-01-24 01:44:45.290841: step: 876/463, loss: 0.21880125999450684 2023-01-24 01:44:45.921165: step: 878/463, loss: 0.08367852121591568 2023-01-24 01:44:46.595096: step: 880/463, loss: 0.1920214146375656 2023-01-24 01:44:47.283464: step: 882/463, loss: 0.11448926478624344 2023-01-24 01:44:47.935901: step: 884/463, loss: 0.11607978492975235 2023-01-24 01:44:48.575850: step: 886/463, loss: 0.6288167834281921 2023-01-24 01:44:49.136068: step: 888/463, loss: 0.11310462653636932 2023-01-24 01:44:49.780476: step: 890/463, loss: 0.14403803646564484 2023-01-24 01:44:50.478766: step: 892/463, loss: 0.13629819452762604 2023-01-24 01:44:51.091116: step: 894/463, loss: 0.1250964105129242 2023-01-24 01:44:51.665662: step: 896/463, loss: 0.19234906136989594 2023-01-24 01:44:52.280848: step: 898/463, loss: 0.42414477467536926 2023-01-24 01:44:52.872688: step: 900/463, loss: 0.06574060767889023 2023-01-24 01:44:53.457010: step: 902/463, loss: 0.09011495113372803 2023-01-24 01:44:54.179871: step: 904/463, loss: 0.606460452079773 2023-01-24 01:44:54.852172: step: 906/463, loss: 0.8385716080665588 2023-01-24 01:44:55.456985: step: 908/463, loss: 0.14117975533008575 2023-01-24 01:44:56.070693: step: 910/463, loss: 0.21920403838157654 2023-01-24 01:44:56.750920: step: 912/463, loss: 0.04663897678256035 2023-01-24 01:44:57.394210: step: 914/463, loss: 0.18976335227489471 2023-01-24 01:44:58.013941: step: 916/463, loss: 0.08910458534955978 2023-01-24 01:44:58.627560: step: 918/463, loss: 0.15939036011695862 2023-01-24 01:44:59.252999: step: 920/463, loss: 0.251723051071167 2023-01-24 01:44:59.877019: step: 922/463, loss: 0.030758770182728767 2023-01-24 01:45:00.588455: step: 924/463, loss: 0.33404502272605896 2023-01-24 01:45:01.207825: step: 926/463, loss: 0.2339412271976471 ================================================== Loss: 0.281 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3014310948988971, 'r': 0.2922968192959002, 'f1': 0.29679369343891404}, 'combined': 0.2186900899023577, 'epoch': 12} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3646512769512629, 'r': 0.29655758300050356, 'f1': 0.3270981618080626}, 'combined': 0.23011930981471743, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29660087719298245, 'r': 0.2881747159090909, 'f1': 0.29232708933717577}, 'combined': 0.21539890793265581, 'epoch': 12} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36975379881647924, 'r': 0.30032404620028447, 'f1': 0.3314419594210368}, 'combined': 0.23532379118893612, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31444919517102615, 'r': 0.2959872159090909, 'f1': 0.3049390243902439}, 'combined': 0.22469191270860076, 'epoch': 12} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37687140630343324, 'r': 0.2806769564427667, 'f1': 0.3217379139985221}, 'combined': 0.22843391893895068, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3229166666666667, 'r': 0.22142857142857142, 'f1': 0.2627118644067797}, 'combined': 0.1751412429378531, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3076923076923077, 'r': 0.13793103448275862, 'f1': 0.1904761904761905}, 'combined': 0.12698412698412698, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:47:39.319038: step: 2/463, loss: 0.07715045660734177 2023-01-24 01:47:39.902377: step: 4/463, loss: 0.17626816034317017 2023-01-24 01:47:40.439944: step: 6/463, loss: 0.09047288447618484 2023-01-24 01:47:41.093633: step: 8/463, loss: 0.10111837834119797 2023-01-24 01:47:41.705966: step: 10/463, loss: 0.09663622826337814 2023-01-24 01:47:42.353943: step: 12/463, loss: 0.048565465956926346 2023-01-24 01:47:42.992569: step: 14/463, loss: 0.2606889009475708 2023-01-24 01:47:43.620857: step: 16/463, loss: 0.7640372514724731 2023-01-24 01:47:44.258885: step: 18/463, loss: 0.13398267328739166 2023-01-24 01:47:44.899793: step: 20/463, loss: 0.09927663207054138 2023-01-24 01:47:45.506117: step: 22/463, loss: 0.09460526704788208 2023-01-24 01:47:46.108391: step: 24/463, loss: 0.2822362184524536 2023-01-24 01:47:46.713452: step: 26/463, loss: 0.1689906269311905 2023-01-24 01:47:47.329373: step: 28/463, loss: 0.242484450340271 2023-01-24 01:47:47.961877: step: 30/463, loss: 0.08186733722686768 2023-01-24 01:47:48.613732: step: 32/463, loss: 0.3471973240375519 2023-01-24 01:47:49.267292: step: 34/463, loss: 1.8456997871398926 2023-01-24 01:47:49.924124: step: 36/463, loss: 0.03691093251109123 2023-01-24 01:47:50.564328: step: 38/463, loss: 0.21354997158050537 2023-01-24 01:47:51.160280: step: 40/463, loss: 0.21336926519870758 2023-01-24 01:47:51.819571: step: 42/463, loss: 2.050889015197754 2023-01-24 01:47:52.406700: step: 44/463, loss: 0.06599429994821548 2023-01-24 01:47:53.021559: step: 46/463, loss: 0.1654549241065979 2023-01-24 01:47:53.666884: step: 48/463, loss: 0.11499933153390884 2023-01-24 01:47:54.273869: step: 50/463, loss: 0.2938278317451477 2023-01-24 01:47:54.920593: step: 52/463, loss: 0.9715784788131714 2023-01-24 01:47:55.571721: step: 54/463, loss: 0.14135387539863586 2023-01-24 01:47:56.257124: step: 56/463, loss: 0.15973469614982605 2023-01-24 01:47:56.869961: step: 58/463, loss: 0.1307053416967392 2023-01-24 01:47:57.530378: step: 60/463, loss: 0.0721079632639885 2023-01-24 01:47:58.199561: step: 62/463, loss: 0.2540896236896515 2023-01-24 01:47:58.800752: step: 64/463, loss: 0.1040586605668068 2023-01-24 01:47:59.481224: step: 66/463, loss: 2.51369047164917 2023-01-24 01:48:00.109762: step: 68/463, loss: 0.12186824530363083 2023-01-24 01:48:00.772191: step: 70/463, loss: 0.08692692220211029 2023-01-24 01:48:01.352742: step: 72/463, loss: 0.11394850164651871 2023-01-24 01:48:01.956299: step: 74/463, loss: 0.18425855040550232 2023-01-24 01:48:02.579701: step: 76/463, loss: 0.2875189185142517 2023-01-24 01:48:03.310736: step: 78/463, loss: 0.06512501090765 2023-01-24 01:48:03.908396: step: 80/463, loss: 0.10049843788146973 2023-01-24 01:48:04.539171: step: 82/463, loss: 0.12004828453063965 2023-01-24 01:48:05.174724: step: 84/463, loss: 0.6750378012657166 2023-01-24 01:48:05.771106: step: 86/463, loss: 0.09382369369268417 2023-01-24 01:48:06.363765: step: 88/463, loss: 0.34363341331481934 2023-01-24 01:48:06.954340: step: 90/463, loss: 0.0973958745598793 2023-01-24 01:48:07.529815: step: 92/463, loss: 0.1947895884513855 2023-01-24 01:48:08.158656: step: 94/463, loss: 0.08603136241436005 2023-01-24 01:48:08.770329: step: 96/463, loss: 0.13107547163963318 2023-01-24 01:48:09.325991: step: 98/463, loss: 0.09782901406288147 2023-01-24 01:48:09.944909: step: 100/463, loss: 0.0840311124920845 2023-01-24 01:48:10.535204: step: 102/463, loss: 0.18765923380851746 2023-01-24 01:48:11.137876: step: 104/463, loss: 0.1287512630224228 2023-01-24 01:48:11.824303: step: 106/463, loss: 0.10870682448148727 2023-01-24 01:48:12.481009: step: 108/463, loss: 0.051599256694316864 2023-01-24 01:48:13.108586: step: 110/463, loss: 0.09492052346467972 2023-01-24 01:48:13.767601: step: 112/463, loss: 0.10218461602926254 2023-01-24 01:48:14.441793: step: 114/463, loss: 0.8727315068244934 2023-01-24 01:48:15.064005: step: 116/463, loss: 0.07555047422647476 2023-01-24 01:48:15.687638: step: 118/463, loss: 0.04539705067873001 2023-01-24 01:48:16.366774: step: 120/463, loss: 0.13439643383026123 2023-01-24 01:48:17.019861: step: 122/463, loss: 0.13678649067878723 2023-01-24 01:48:17.716844: step: 124/463, loss: 0.06640574336051941 2023-01-24 01:48:18.312388: step: 126/463, loss: 0.30759304761886597 2023-01-24 01:48:19.009088: step: 128/463, loss: 0.07167794555425644 2023-01-24 01:48:19.622264: step: 130/463, loss: 0.33377605676651 2023-01-24 01:48:20.228423: step: 132/463, loss: 0.3663799464702606 2023-01-24 01:48:20.908574: step: 134/463, loss: 0.13543781638145447 2023-01-24 01:48:21.538068: step: 136/463, loss: 0.22562988102436066 2023-01-24 01:48:22.154215: step: 138/463, loss: 0.5998987555503845 2023-01-24 01:48:22.787594: step: 140/463, loss: 0.12311425060033798 2023-01-24 01:48:23.422867: step: 142/463, loss: 0.15009371936321259 2023-01-24 01:48:24.013204: step: 144/463, loss: 0.10054293274879456 2023-01-24 01:48:24.617357: step: 146/463, loss: 0.16743461787700653 2023-01-24 01:48:25.269901: step: 148/463, loss: 0.13950590789318085 2023-01-24 01:48:25.902709: step: 150/463, loss: 0.12653060257434845 2023-01-24 01:48:26.586348: step: 152/463, loss: 0.33352425694465637 2023-01-24 01:48:27.220942: step: 154/463, loss: 0.04626433923840523 2023-01-24 01:48:27.851989: step: 156/463, loss: 0.15094798803329468 2023-01-24 01:48:28.462721: step: 158/463, loss: 0.3544413447380066 2023-01-24 01:48:29.045244: step: 160/463, loss: 0.0764884278178215 2023-01-24 01:48:29.656410: step: 162/463, loss: 0.14624816179275513 2023-01-24 01:48:30.398137: step: 164/463, loss: 0.20005744695663452 2023-01-24 01:48:31.062257: step: 166/463, loss: 0.9576687812805176 2023-01-24 01:48:31.682649: step: 168/463, loss: 0.048742443323135376 2023-01-24 01:48:32.342307: step: 170/463, loss: 0.20183056592941284 2023-01-24 01:48:32.978078: step: 172/463, loss: 0.13144376873970032 2023-01-24 01:48:33.672895: step: 174/463, loss: 0.09268185496330261 2023-01-24 01:48:34.247673: step: 176/463, loss: 0.08770421147346497 2023-01-24 01:48:34.928062: step: 178/463, loss: 0.07421009242534637 2023-01-24 01:48:35.552583: step: 180/463, loss: 0.09843643009662628 2023-01-24 01:48:36.195802: step: 182/463, loss: 0.15081526339054108 2023-01-24 01:48:36.836904: step: 184/463, loss: 0.04560175538063049 2023-01-24 01:48:37.463491: step: 186/463, loss: 0.5173816680908203 2023-01-24 01:48:38.070525: step: 188/463, loss: 0.07922437787055969 2023-01-24 01:48:38.683044: step: 190/463, loss: 0.04861622303724289 2023-01-24 01:48:39.244022: step: 192/463, loss: 0.19472859799861908 2023-01-24 01:48:39.918136: step: 194/463, loss: 0.2374739646911621 2023-01-24 01:48:40.536625: step: 196/463, loss: 0.05982042849063873 2023-01-24 01:48:41.192322: step: 198/463, loss: 0.08126036077737808 2023-01-24 01:48:41.839192: step: 200/463, loss: 0.07501547038555145 2023-01-24 01:48:42.437984: step: 202/463, loss: 0.24157409369945526 2023-01-24 01:48:43.078632: step: 204/463, loss: 0.11012373864650726 2023-01-24 01:48:43.769662: step: 206/463, loss: 0.0658709779381752 2023-01-24 01:48:44.463072: step: 208/463, loss: 0.16326116025447845 2023-01-24 01:48:45.089750: step: 210/463, loss: 0.0833217054605484 2023-01-24 01:48:45.707845: step: 212/463, loss: 0.17286795377731323 2023-01-24 01:48:46.356804: step: 214/463, loss: 0.1687588095664978 2023-01-24 01:48:46.990609: step: 216/463, loss: 0.03524814173579216 2023-01-24 01:48:47.639078: step: 218/463, loss: 0.14507904648780823 2023-01-24 01:48:48.242073: step: 220/463, loss: 0.08137906342744827 2023-01-24 01:48:48.829977: step: 222/463, loss: 0.055628810077905655 2023-01-24 01:48:49.450118: step: 224/463, loss: 0.12875062227249146 2023-01-24 01:48:50.042159: step: 226/463, loss: 0.2636549472808838 2023-01-24 01:48:50.718554: step: 228/463, loss: 0.05585656687617302 2023-01-24 01:48:51.422064: step: 230/463, loss: 0.12462087720632553 2023-01-24 01:48:52.005320: step: 232/463, loss: 0.1585254669189453 2023-01-24 01:48:52.597889: step: 234/463, loss: 0.23692777752876282 2023-01-24 01:48:53.233712: step: 236/463, loss: 0.12348209321498871 2023-01-24 01:48:53.839125: step: 238/463, loss: 0.1301085352897644 2023-01-24 01:48:54.424588: step: 240/463, loss: 0.13280557096004486 2023-01-24 01:48:55.040329: step: 242/463, loss: 0.20724812150001526 2023-01-24 01:48:55.724234: step: 244/463, loss: 0.9284301996231079 2023-01-24 01:48:56.306358: step: 246/463, loss: 0.11805537343025208 2023-01-24 01:48:56.874733: step: 248/463, loss: 0.1631237268447876 2023-01-24 01:48:57.553524: step: 250/463, loss: 0.09987673908472061 2023-01-24 01:48:58.202668: step: 252/463, loss: 0.5556800961494446 2023-01-24 01:48:58.840606: step: 254/463, loss: 0.19669793546199799 2023-01-24 01:48:59.439412: step: 256/463, loss: 0.7739332914352417 2023-01-24 01:49:00.054085: step: 258/463, loss: 0.18807286024093628 2023-01-24 01:49:00.660378: step: 260/463, loss: 0.08698822557926178 2023-01-24 01:49:01.218223: step: 262/463, loss: 0.08953163027763367 2023-01-24 01:49:01.809689: step: 264/463, loss: 0.11042303591966629 2023-01-24 01:49:02.404779: step: 266/463, loss: 0.07850117236375809 2023-01-24 01:49:03.013320: step: 268/463, loss: 0.08845565468072891 2023-01-24 01:49:03.614034: step: 270/463, loss: 0.12781357765197754 2023-01-24 01:49:04.177372: step: 272/463, loss: 0.22111476957798004 2023-01-24 01:49:04.763545: step: 274/463, loss: 0.09485345333814621 2023-01-24 01:49:05.421005: step: 276/463, loss: 0.323935866355896 2023-01-24 01:49:06.043607: step: 278/463, loss: 0.08854546397924423 2023-01-24 01:49:06.708857: step: 280/463, loss: 0.06878294795751572 2023-01-24 01:49:07.261404: step: 282/463, loss: 0.15798306465148926 2023-01-24 01:49:07.845306: step: 284/463, loss: 0.0872483029961586 2023-01-24 01:49:08.502246: step: 286/463, loss: 0.17523923516273499 2023-01-24 01:49:09.140196: step: 288/463, loss: 0.1494188755750656 2023-01-24 01:49:09.767657: step: 290/463, loss: 0.11170358955860138 2023-01-24 01:49:10.473533: step: 292/463, loss: 0.13842348754405975 2023-01-24 01:49:11.053042: step: 294/463, loss: 0.07883201539516449 2023-01-24 01:49:11.653351: step: 296/463, loss: 1.0998618602752686 2023-01-24 01:49:12.302318: step: 298/463, loss: 0.12263484299182892 2023-01-24 01:49:12.958650: step: 300/463, loss: 0.18602775037288666 2023-01-24 01:49:13.617438: step: 302/463, loss: 0.7644849419593811 2023-01-24 01:49:14.258514: step: 304/463, loss: 0.2587268650531769 2023-01-24 01:49:14.814477: step: 306/463, loss: 0.848766565322876 2023-01-24 01:49:15.503092: step: 308/463, loss: 0.2991057336330414 2023-01-24 01:49:16.105837: step: 310/463, loss: 0.1259213387966156 2023-01-24 01:49:16.699872: step: 312/463, loss: 0.030424101278185844 2023-01-24 01:49:17.265026: step: 314/463, loss: 0.05634269490838051 2023-01-24 01:49:17.875366: step: 316/463, loss: 0.14841006696224213 2023-01-24 01:49:18.433890: step: 318/463, loss: 0.10852553695440292 2023-01-24 01:49:19.031660: step: 320/463, loss: 0.07619000971317291 2023-01-24 01:49:19.723620: step: 322/463, loss: 0.0630456730723381 2023-01-24 01:49:20.345333: step: 324/463, loss: 0.1681043654680252 2023-01-24 01:49:20.956279: step: 326/463, loss: 0.12949876487255096 2023-01-24 01:49:21.553875: step: 328/463, loss: 0.24695871770381927 2023-01-24 01:49:22.239922: step: 330/463, loss: 0.07368968427181244 2023-01-24 01:49:22.825489: step: 332/463, loss: 0.1901990920305252 2023-01-24 01:49:23.449215: step: 334/463, loss: 0.1145344078540802 2023-01-24 01:49:24.046316: step: 336/463, loss: 0.592545211315155 2023-01-24 01:49:24.692621: step: 338/463, loss: 0.07059413194656372 2023-01-24 01:49:25.337182: step: 340/463, loss: 0.37856224179267883 2023-01-24 01:49:25.921532: step: 342/463, loss: 0.1419743001461029 2023-01-24 01:49:26.617014: step: 344/463, loss: 0.10860243439674377 2023-01-24 01:49:27.181829: step: 346/463, loss: 0.15411370992660522 2023-01-24 01:49:27.746980: step: 348/463, loss: 0.048836492002010345 2023-01-24 01:49:28.279541: step: 350/463, loss: 0.2796850800514221 2023-01-24 01:49:28.972333: step: 352/463, loss: 1.5251041650772095 2023-01-24 01:49:29.634237: step: 354/463, loss: 0.1807471364736557 2023-01-24 01:49:30.245254: step: 356/463, loss: 0.1309095025062561 2023-01-24 01:49:30.959999: step: 358/463, loss: 0.5338518023490906 2023-01-24 01:49:31.639839: step: 360/463, loss: 0.5842429995536804 2023-01-24 01:49:32.290783: step: 362/463, loss: 0.4199536442756653 2023-01-24 01:49:32.906715: step: 364/463, loss: 0.3594641387462616 2023-01-24 01:49:33.496004: step: 366/463, loss: 0.45880165696144104 2023-01-24 01:49:34.107881: step: 368/463, loss: 0.2952485680580139 2023-01-24 01:49:34.724334: step: 370/463, loss: 0.25958818197250366 2023-01-24 01:49:35.365010: step: 372/463, loss: 0.13238611817359924 2023-01-24 01:49:35.976798: step: 374/463, loss: 0.10453855246305466 2023-01-24 01:49:36.664879: step: 376/463, loss: 1.0471060276031494 2023-01-24 01:49:37.274848: step: 378/463, loss: 0.1188134029507637 2023-01-24 01:49:37.912444: step: 380/463, loss: 0.035368937999010086 2023-01-24 01:49:38.512849: step: 382/463, loss: 1.0965557098388672 2023-01-24 01:49:39.090938: step: 384/463, loss: 0.13825328648090363 2023-01-24 01:49:39.692972: step: 386/463, loss: 0.22155530750751495 2023-01-24 01:49:40.348810: step: 388/463, loss: 0.06577132642269135 2023-01-24 01:49:41.004960: step: 390/463, loss: 0.1691228151321411 2023-01-24 01:49:41.745174: step: 392/463, loss: 0.5017653107643127 2023-01-24 01:49:42.390647: step: 394/463, loss: 0.19190388917922974 2023-01-24 01:49:43.038130: step: 396/463, loss: 0.3000826835632324 2023-01-24 01:49:43.708198: step: 398/463, loss: 0.0748780220746994 2023-01-24 01:49:44.321177: step: 400/463, loss: 0.3376230001449585 2023-01-24 01:49:44.966859: step: 402/463, loss: 0.17965638637542725 2023-01-24 01:49:45.578425: step: 404/463, loss: 0.2102227360010147 2023-01-24 01:49:46.245706: step: 406/463, loss: 0.09339188039302826 2023-01-24 01:49:46.851783: step: 408/463, loss: 0.5088179111480713 2023-01-24 01:49:47.501050: step: 410/463, loss: 0.16513828933238983 2023-01-24 01:49:48.079085: step: 412/463, loss: 0.7525275349617004 2023-01-24 01:49:48.727596: step: 414/463, loss: 0.23754090070724487 2023-01-24 01:49:49.331757: step: 416/463, loss: 0.09586963057518005 2023-01-24 01:49:49.961343: step: 418/463, loss: 0.09333712607622147 2023-01-24 01:49:50.618546: step: 420/463, loss: 0.06053384393453598 2023-01-24 01:49:51.180749: step: 422/463, loss: 0.13985615968704224 2023-01-24 01:49:51.882277: step: 424/463, loss: 3.033512592315674 2023-01-24 01:49:52.459164: step: 426/463, loss: 0.07044391334056854 2023-01-24 01:49:53.066222: step: 428/463, loss: 0.27722087502479553 2023-01-24 01:49:53.722382: step: 430/463, loss: 0.10515755414962769 2023-01-24 01:49:54.337399: step: 432/463, loss: 0.15704762935638428 2023-01-24 01:49:55.007554: step: 434/463, loss: 0.1223125085234642 2023-01-24 01:49:55.637044: step: 436/463, loss: 0.07780426740646362 2023-01-24 01:49:56.265027: step: 438/463, loss: 0.17271827161312103 2023-01-24 01:49:56.860233: step: 440/463, loss: 0.27617233991622925 2023-01-24 01:49:57.532985: step: 442/463, loss: 0.1036338284611702 2023-01-24 01:49:58.147280: step: 444/463, loss: 0.11270903795957565 2023-01-24 01:49:58.777209: step: 446/463, loss: 0.08207792788743973 2023-01-24 01:49:59.433601: step: 448/463, loss: 0.07351411879062653 2023-01-24 01:50:00.035226: step: 450/463, loss: 0.12301245331764221 2023-01-24 01:50:00.658175: step: 452/463, loss: 0.28248003125190735 2023-01-24 01:50:01.311663: step: 454/463, loss: 0.04307496175169945 2023-01-24 01:50:01.930336: step: 456/463, loss: 0.15754453837871552 2023-01-24 01:50:02.595136: step: 458/463, loss: 0.10318835824728012 2023-01-24 01:50:03.247979: step: 460/463, loss: 0.18405689299106598 2023-01-24 01:50:03.927486: step: 462/463, loss: 0.14340944588184357 2023-01-24 01:50:04.488920: step: 464/463, loss: 0.24692212045192719 2023-01-24 01:50:05.057285: step: 466/463, loss: 0.1584959179162979 2023-01-24 01:50:05.742556: step: 468/463, loss: 0.25680702924728394 2023-01-24 01:50:06.361907: step: 470/463, loss: 0.7350091934204102 2023-01-24 01:50:06.962254: step: 472/463, loss: 0.08311799168586731 2023-01-24 01:50:07.629160: step: 474/463, loss: 4.053161144256592 2023-01-24 01:50:08.219182: step: 476/463, loss: 0.014629831537604332 2023-01-24 01:50:08.806315: step: 478/463, loss: 0.11329811066389084 2023-01-24 01:50:09.448913: step: 480/463, loss: 0.5797063112258911 2023-01-24 01:50:10.027190: step: 482/463, loss: 0.18678393959999084 2023-01-24 01:50:10.651620: step: 484/463, loss: 0.12744112312793732 2023-01-24 01:50:11.355481: step: 486/463, loss: 0.39340704679489136 2023-01-24 01:50:11.945684: step: 488/463, loss: 0.11956602334976196 2023-01-24 01:50:12.489880: step: 490/463, loss: 0.06216290965676308 2023-01-24 01:50:13.203320: step: 492/463, loss: 0.0504252165555954 2023-01-24 01:50:13.840887: step: 494/463, loss: 0.1524675488471985 2023-01-24 01:50:14.485994: step: 496/463, loss: 0.024282824248075485 2023-01-24 01:50:15.113614: step: 498/463, loss: 0.11945275217294693 2023-01-24 01:50:15.723776: step: 500/463, loss: 0.19682088494300842 2023-01-24 01:50:16.318081: step: 502/463, loss: 0.21585847437381744 2023-01-24 01:50:16.934440: step: 504/463, loss: 0.19128264486789703 2023-01-24 01:50:17.623248: step: 506/463, loss: 0.15279895067214966 2023-01-24 01:50:18.311736: step: 508/463, loss: 0.09002558141946793 2023-01-24 01:50:18.911974: step: 510/463, loss: 0.09578165411949158 2023-01-24 01:50:19.593763: step: 512/463, loss: 0.030026594176888466 2023-01-24 01:50:20.228376: step: 514/463, loss: 0.1714993417263031 2023-01-24 01:50:20.840997: step: 516/463, loss: 0.09291969239711761 2023-01-24 01:50:21.462929: step: 518/463, loss: 0.04177824780344963 2023-01-24 01:50:22.097702: step: 520/463, loss: 0.060084983706474304 2023-01-24 01:50:22.775620: step: 522/463, loss: 0.11740418523550034 2023-01-24 01:50:23.405288: step: 524/463, loss: 0.15426595509052277 2023-01-24 01:50:23.998158: step: 526/463, loss: 0.21642716228961945 2023-01-24 01:50:24.595581: step: 528/463, loss: 0.2500775456428528 2023-01-24 01:50:25.161349: step: 530/463, loss: 0.10034143924713135 2023-01-24 01:50:25.800679: step: 532/463, loss: 0.11458557099103928 2023-01-24 01:50:26.410174: step: 534/463, loss: 0.0942230299115181 2023-01-24 01:50:27.066637: step: 536/463, loss: 0.2646922469139099 2023-01-24 01:50:27.733715: step: 538/463, loss: 0.34488171339035034 2023-01-24 01:50:28.375252: step: 540/463, loss: 0.13709674775600433 2023-01-24 01:50:28.969174: step: 542/463, loss: 0.3543678820133209 2023-01-24 01:50:29.607900: step: 544/463, loss: 0.13262538611888885 2023-01-24 01:50:30.193291: step: 546/463, loss: 0.7900317907333374 2023-01-24 01:50:30.811005: step: 548/463, loss: 0.07775469124317169 2023-01-24 01:50:31.434296: step: 550/463, loss: 0.07805902510881424 2023-01-24 01:50:32.122590: step: 552/463, loss: 0.09013087302446365 2023-01-24 01:50:32.762812: step: 554/463, loss: 0.08888848125934601 2023-01-24 01:50:33.404525: step: 556/463, loss: 0.1380084902048111 2023-01-24 01:50:33.968957: step: 558/463, loss: 0.01655413582921028 2023-01-24 01:50:34.616221: step: 560/463, loss: 0.10006501525640488 2023-01-24 01:50:35.195713: step: 562/463, loss: 0.1177874505519867 2023-01-24 01:50:35.930598: step: 564/463, loss: 0.9455326795578003 2023-01-24 01:50:36.559146: step: 566/463, loss: 0.21906502544879913 2023-01-24 01:50:37.323840: step: 568/463, loss: 0.3474324941635132 2023-01-24 01:50:37.874164: step: 570/463, loss: 0.10896273702383041 2023-01-24 01:50:38.519171: step: 572/463, loss: 0.7591073513031006 2023-01-24 01:50:39.151478: step: 574/463, loss: 0.3682263493537903 2023-01-24 01:50:39.825033: step: 576/463, loss: 0.13381652534008026 2023-01-24 01:50:40.470361: step: 578/463, loss: 0.18982787430286407 2023-01-24 01:50:41.135946: step: 580/463, loss: 0.1310027688741684 2023-01-24 01:50:41.777392: step: 582/463, loss: 0.1028677448630333 2023-01-24 01:50:42.469996: step: 584/463, loss: 0.05163278430700302 2023-01-24 01:50:43.089201: step: 586/463, loss: 0.0647062361240387 2023-01-24 01:50:43.757192: step: 588/463, loss: 0.14167732000350952 2023-01-24 01:50:44.344615: step: 590/463, loss: 0.16348639130592346 2023-01-24 01:50:44.944216: step: 592/463, loss: 0.15096527338027954 2023-01-24 01:50:45.511974: step: 594/463, loss: 0.1730213463306427 2023-01-24 01:50:46.125753: step: 596/463, loss: 0.17887790501117706 2023-01-24 01:50:46.758206: step: 598/463, loss: 0.3739832937717438 2023-01-24 01:50:47.444434: step: 600/463, loss: 0.07661117613315582 2023-01-24 01:50:48.013619: step: 602/463, loss: 0.037345800548791885 2023-01-24 01:50:48.632498: step: 604/463, loss: 0.08917001634836197 2023-01-24 01:50:49.248648: step: 606/463, loss: 0.15392132103443146 2023-01-24 01:50:49.862301: step: 608/463, loss: 0.37207385897636414 2023-01-24 01:50:50.469003: step: 610/463, loss: 0.023572387173771858 2023-01-24 01:50:51.120991: step: 612/463, loss: 0.2799219489097595 2023-01-24 01:50:51.805188: step: 614/463, loss: 0.807952344417572 2023-01-24 01:50:52.402023: step: 616/463, loss: 0.10051819682121277 2023-01-24 01:50:53.031007: step: 618/463, loss: 0.36269262433052063 2023-01-24 01:50:53.608061: step: 620/463, loss: 0.39271581172943115 2023-01-24 01:50:54.274087: step: 622/463, loss: 0.4996991455554962 2023-01-24 01:50:54.923860: step: 624/463, loss: 0.06382884830236435 2023-01-24 01:50:55.504801: step: 626/463, loss: 1.0828782320022583 2023-01-24 01:50:56.210580: step: 628/463, loss: 0.1181720569729805 2023-01-24 01:50:56.874659: step: 630/463, loss: 0.06995480507612228 2023-01-24 01:50:57.470404: step: 632/463, loss: 0.18481090664863586 2023-01-24 01:50:58.066360: step: 634/463, loss: 0.08411180227994919 2023-01-24 01:50:58.710893: step: 636/463, loss: 0.3115050494670868 2023-01-24 01:50:59.358846: step: 638/463, loss: 0.08337608724832535 2023-01-24 01:50:59.951076: step: 640/463, loss: 0.40739715099334717 2023-01-24 01:51:00.590667: step: 642/463, loss: 0.07842908799648285 2023-01-24 01:51:01.213122: step: 644/463, loss: 0.10911142081022263 2023-01-24 01:51:01.828923: step: 646/463, loss: 0.14877241849899292 2023-01-24 01:51:02.410937: step: 648/463, loss: 0.07862099260091782 2023-01-24 01:51:03.033082: step: 650/463, loss: 0.4135875999927521 2023-01-24 01:51:03.676642: step: 652/463, loss: 0.14661435782909393 2023-01-24 01:51:04.323545: step: 654/463, loss: 0.054847702383995056 2023-01-24 01:51:04.961229: step: 656/463, loss: 1.139731764793396 2023-01-24 01:51:05.530420: step: 658/463, loss: 0.09918849170207977 2023-01-24 01:51:06.080061: step: 660/463, loss: 0.14243076741695404 2023-01-24 01:51:06.694176: step: 662/463, loss: 0.7089715003967285 2023-01-24 01:51:07.289607: step: 664/463, loss: 0.04949033632874489 2023-01-24 01:51:07.914133: step: 666/463, loss: 0.0663076713681221 2023-01-24 01:51:08.565206: step: 668/463, loss: 0.24517185986042023 2023-01-24 01:51:09.190868: step: 670/463, loss: 0.1950610727071762 2023-01-24 01:51:09.779876: step: 672/463, loss: 0.18894287943840027 2023-01-24 01:51:10.398268: step: 674/463, loss: 0.17281474173069 2023-01-24 01:51:11.038257: step: 676/463, loss: 1.1485049724578857 2023-01-24 01:51:11.649391: step: 678/463, loss: 0.17677846550941467 2023-01-24 01:51:12.334258: step: 680/463, loss: 0.07211374491453171 2023-01-24 01:51:13.016473: step: 682/463, loss: 0.05439876392483711 2023-01-24 01:51:13.648647: step: 684/463, loss: 0.08759650588035583 2023-01-24 01:51:14.239298: step: 686/463, loss: 0.45135581493377686 2023-01-24 01:51:14.906639: step: 688/463, loss: 0.10819895565509796 2023-01-24 01:51:15.498405: step: 690/463, loss: 0.061925120651721954 2023-01-24 01:51:16.185105: step: 692/463, loss: 0.15177924931049347 2023-01-24 01:51:16.816193: step: 694/463, loss: 0.16498692333698273 2023-01-24 01:51:17.448112: step: 696/463, loss: 0.08842567354440689 2023-01-24 01:51:18.051101: step: 698/463, loss: 0.0699852705001831 2023-01-24 01:51:18.663256: step: 700/463, loss: 0.33573243021965027 2023-01-24 01:51:19.336796: step: 702/463, loss: 0.044539425522089005 2023-01-24 01:51:19.984890: step: 704/463, loss: 0.048490721732378006 2023-01-24 01:51:20.673133: step: 706/463, loss: 0.12905213236808777 2023-01-24 01:51:21.323365: step: 708/463, loss: 0.06528699398040771 2023-01-24 01:51:21.935702: step: 710/463, loss: 0.020870089530944824 2023-01-24 01:51:22.597320: step: 712/463, loss: 0.0738677978515625 2023-01-24 01:51:23.186752: step: 714/463, loss: 0.5037636756896973 2023-01-24 01:51:23.812145: step: 716/463, loss: 0.07096880674362183 2023-01-24 01:51:24.396294: step: 718/463, loss: 0.18188151717185974 2023-01-24 01:51:24.975278: step: 720/463, loss: 0.1535424143075943 2023-01-24 01:51:25.615042: step: 722/463, loss: 0.15016430616378784 2023-01-24 01:51:26.213620: step: 724/463, loss: 0.08882687240839005 2023-01-24 01:51:26.801845: step: 726/463, loss: 0.11528695374727249 2023-01-24 01:51:27.459912: step: 728/463, loss: 0.042608533054590225 2023-01-24 01:51:28.061650: step: 730/463, loss: 0.13893210887908936 2023-01-24 01:51:28.619808: step: 732/463, loss: 0.1275879442691803 2023-01-24 01:51:29.302249: step: 734/463, loss: 0.1544790118932724 2023-01-24 01:51:29.956886: step: 736/463, loss: 0.16303294897079468 2023-01-24 01:51:30.561982: step: 738/463, loss: 0.574346661567688 2023-01-24 01:51:31.177772: step: 740/463, loss: 0.20579670369625092 2023-01-24 01:51:31.855050: step: 742/463, loss: 0.12907066941261292 2023-01-24 01:51:32.672740: step: 744/463, loss: 0.10023466497659683 2023-01-24 01:51:33.267780: step: 746/463, loss: 0.15243661403656006 2023-01-24 01:51:33.914634: step: 748/463, loss: 0.6531656384468079 2023-01-24 01:51:34.571460: step: 750/463, loss: 0.751803457736969 2023-01-24 01:51:35.155396: step: 752/463, loss: 0.13373959064483643 2023-01-24 01:51:35.841379: step: 754/463, loss: 0.15216472744941711 2023-01-24 01:51:36.468376: step: 756/463, loss: 0.24474845826625824 2023-01-24 01:51:37.072353: step: 758/463, loss: 0.09836621582508087 2023-01-24 01:51:37.686038: step: 760/463, loss: 0.12467021495103836 2023-01-24 01:51:38.313860: step: 762/463, loss: 0.19514428079128265 2023-01-24 01:51:38.980889: step: 764/463, loss: 0.28752002120018005 2023-01-24 01:51:39.574632: step: 766/463, loss: 0.061785344034433365 2023-01-24 01:51:40.190573: step: 768/463, loss: 0.1934160441160202 2023-01-24 01:51:40.786571: step: 770/463, loss: 0.21600143611431122 2023-01-24 01:51:41.416825: step: 772/463, loss: 0.12552256882190704 2023-01-24 01:51:41.999707: step: 774/463, loss: 0.09164809435606003 2023-01-24 01:51:42.649437: step: 776/463, loss: 0.10684353113174438 2023-01-24 01:51:43.363137: step: 778/463, loss: 0.8338512778282166 2023-01-24 01:51:43.969351: step: 780/463, loss: 0.17680566012859344 2023-01-24 01:51:44.561819: step: 782/463, loss: 0.07987955212593079 2023-01-24 01:51:45.149729: step: 784/463, loss: 0.09165406227111816 2023-01-24 01:51:45.804080: step: 786/463, loss: 0.12681661546230316 2023-01-24 01:51:46.472868: step: 788/463, loss: 0.5251785516738892 2023-01-24 01:51:47.097069: step: 790/463, loss: 0.06170034036040306 2023-01-24 01:51:47.725774: step: 792/463, loss: 0.16815626621246338 2023-01-24 01:51:48.362732: step: 794/463, loss: 0.11161208897829056 2023-01-24 01:51:48.962976: step: 796/463, loss: 0.18152792751789093 2023-01-24 01:51:49.516837: step: 798/463, loss: 0.16339319944381714 2023-01-24 01:51:50.113412: step: 800/463, loss: 0.1047753244638443 2023-01-24 01:51:50.757199: step: 802/463, loss: 0.046365249902009964 2023-01-24 01:51:51.428893: step: 804/463, loss: 0.2142631709575653 2023-01-24 01:51:52.035222: step: 806/463, loss: 0.055721621960401535 2023-01-24 01:51:52.656224: step: 808/463, loss: 0.09182435274124146 2023-01-24 01:51:53.379587: step: 810/463, loss: 0.05447874590754509 2023-01-24 01:51:53.965476: step: 812/463, loss: 0.15230315923690796 2023-01-24 01:51:54.557611: step: 814/463, loss: 0.17928914725780487 2023-01-24 01:51:55.197132: step: 816/463, loss: 0.20551984012126923 2023-01-24 01:51:55.840476: step: 818/463, loss: 0.15087704360485077 2023-01-24 01:51:56.424506: step: 820/463, loss: 0.0357186459004879 2023-01-24 01:51:57.043049: step: 822/463, loss: 0.3146533668041229 2023-01-24 01:51:57.660919: step: 824/463, loss: 0.13276340067386627 2023-01-24 01:51:58.345461: step: 826/463, loss: 0.1698043793439865 2023-01-24 01:51:58.945249: step: 828/463, loss: 0.08129042387008667 2023-01-24 01:51:59.555676: step: 830/463, loss: 0.15380629897117615 2023-01-24 01:52:00.169404: step: 832/463, loss: 0.2728731632232666 2023-01-24 01:52:00.763697: step: 834/463, loss: 0.2058904618024826 2023-01-24 01:52:01.330309: step: 836/463, loss: 0.13499538600444794 2023-01-24 01:52:01.940325: step: 838/463, loss: 0.16113750636577606 2023-01-24 01:52:02.602771: step: 840/463, loss: 0.13392409682273865 2023-01-24 01:52:03.213866: step: 842/463, loss: 0.24133694171905518 2023-01-24 01:52:03.916313: step: 844/463, loss: 0.24763691425323486 2023-01-24 01:52:04.526523: step: 846/463, loss: 0.05439392104744911 2023-01-24 01:52:05.123338: step: 848/463, loss: 0.20345056056976318 2023-01-24 01:52:05.687810: step: 850/463, loss: 0.3996322751045227 2023-01-24 01:52:06.311891: step: 852/463, loss: 0.3392321765422821 2023-01-24 01:52:07.001631: step: 854/463, loss: 0.09084964543581009 2023-01-24 01:52:07.616548: step: 856/463, loss: 0.12288843095302582 2023-01-24 01:52:08.312488: step: 858/463, loss: 0.5640449523925781 2023-01-24 01:52:08.895958: step: 860/463, loss: 0.2216900885105133 2023-01-24 01:52:09.494372: step: 862/463, loss: 0.1531306803226471 2023-01-24 01:52:10.121226: step: 864/463, loss: 0.6109407544136047 2023-01-24 01:52:10.715356: step: 866/463, loss: 0.046287838369607925 2023-01-24 01:52:11.291658: step: 868/463, loss: 0.061131514608860016 2023-01-24 01:52:11.894064: step: 870/463, loss: 0.18641991913318634 2023-01-24 01:52:12.476267: step: 872/463, loss: 0.09152209013700485 2023-01-24 01:52:13.106659: step: 874/463, loss: 0.03575148805975914 2023-01-24 01:52:13.752061: step: 876/463, loss: 0.41226446628570557 2023-01-24 01:52:14.345747: step: 878/463, loss: 0.20341157913208008 2023-01-24 01:52:14.972485: step: 880/463, loss: 0.11690565198659897 2023-01-24 01:52:15.582776: step: 882/463, loss: 0.4283813536167145 2023-01-24 01:52:16.177924: step: 884/463, loss: 0.1917329728603363 2023-01-24 01:52:16.839163: step: 886/463, loss: 0.1519365757703781 2023-01-24 01:52:17.517181: step: 888/463, loss: 0.06250420212745667 2023-01-24 01:52:18.172722: step: 890/463, loss: 0.3563598096370697 2023-01-24 01:52:18.970897: step: 892/463, loss: 0.4314788579940796 2023-01-24 01:52:19.586809: step: 894/463, loss: 0.07296870648860931 2023-01-24 01:52:20.269343: step: 896/463, loss: 0.21870729327201843 2023-01-24 01:52:20.904853: step: 898/463, loss: 0.12574389576911926 2023-01-24 01:52:21.532130: step: 900/463, loss: 0.15787373483181 2023-01-24 01:52:22.133835: step: 902/463, loss: 0.24513542652130127 2023-01-24 01:52:22.801603: step: 904/463, loss: 0.07110174000263214 2023-01-24 01:52:23.427410: step: 906/463, loss: 0.1304849237203598 2023-01-24 01:52:24.051716: step: 908/463, loss: 0.2568942606449127 2023-01-24 01:52:24.666030: step: 910/463, loss: 0.07392503321170807 2023-01-24 01:52:25.370170: step: 912/463, loss: 0.10732297599315643 2023-01-24 01:52:26.095606: step: 914/463, loss: 3.5148022174835205 2023-01-24 01:52:26.624292: step: 916/463, loss: 0.19926095008850098 2023-01-24 01:52:27.242780: step: 918/463, loss: 0.16653738915920258 2023-01-24 01:52:27.814347: step: 920/463, loss: 0.4748832881450653 2023-01-24 01:52:28.399502: step: 922/463, loss: 0.11434020847082138 2023-01-24 01:52:29.014569: step: 924/463, loss: 0.0834210067987442 2023-01-24 01:52:29.624310: step: 926/463, loss: 0.2237122803926468 ================================================== Loss: 0.241 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3025465049656226, 'r': 0.3151526093391902, 'f1': 0.3087209234343088}, 'combined': 0.22747857516212225, 'epoch': 13} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.36528415003310943, 'r': 0.3063159408218309, 'f1': 0.33321126547870733}, 'combined': 0.23441998576391473, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30500000000000005, 'r': 0.30904356060606064, 'f1': 0.3070084666039511}, 'combined': 0.2262167648660692, 'epoch': 13} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35840909653994985, 'r': 0.3008132242575474, 'f1': 0.327095101400657}, 'combined': 0.23223752199446646, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3140267498359849, 'r': 0.3187847308941059, 'f1': 0.31638785321821034}, 'combined': 0.23312789184499708, 'epoch': 13} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3721426265495868, 'r': 0.28601354704247717, 'f1': 0.323442480359147}, 'combined': 0.22964416105499438, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22916666666666663, 'r': 0.28809523809523807, 'f1': 0.2552742616033755}, 'combined': 0.170182841068917, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2777777777777778, 'r': 0.32608695652173914, 'f1': 0.3}, 'combined': 0.15, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.13793103448275862, 'f1': 0.2}, 'combined': 0.13333333333333333, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:55:07.613644: step: 2/463, loss: 0.20947067439556122 2023-01-24 01:55:08.243803: step: 4/463, loss: 0.15164996683597565 2023-01-24 01:55:08.841829: step: 6/463, loss: 0.07437420636415482 2023-01-24 01:55:09.481949: step: 8/463, loss: 0.04952705278992653 2023-01-24 01:55:10.092620: step: 10/463, loss: 0.3587245047092438 2023-01-24 01:55:10.659277: step: 12/463, loss: 0.1311754286289215 2023-01-24 01:55:11.331769: step: 14/463, loss: 0.288726806640625 2023-01-24 01:55:11.955243: step: 16/463, loss: 0.37175294756889343 2023-01-24 01:55:12.570470: step: 18/463, loss: 0.17820286750793457 2023-01-24 01:55:13.299408: step: 20/463, loss: 0.26085057854652405 2023-01-24 01:55:13.962609: step: 22/463, loss: 0.04513678327202797 2023-01-24 01:55:14.601894: step: 24/463, loss: 0.23613856732845306 2023-01-24 01:55:15.370231: step: 26/463, loss: 0.06714557856321335 2023-01-24 01:55:16.048556: step: 28/463, loss: 0.4933387041091919 2023-01-24 01:55:16.683731: step: 30/463, loss: 0.0973745808005333 2023-01-24 01:55:17.339875: step: 32/463, loss: 0.07875955104827881 2023-01-24 01:55:18.022147: step: 34/463, loss: 0.12586729228496552 2023-01-24 01:55:18.620951: step: 36/463, loss: 0.22822314500808716 2023-01-24 01:55:19.276951: step: 38/463, loss: 0.07251940667629242 2023-01-24 01:55:19.949480: step: 40/463, loss: 0.016826802864670753 2023-01-24 01:55:20.571059: step: 42/463, loss: 0.06446699798107147 2023-01-24 01:55:21.211444: step: 44/463, loss: 0.1502590924501419 2023-01-24 01:55:21.911578: step: 46/463, loss: 0.03530489280819893 2023-01-24 01:55:22.474852: step: 48/463, loss: 0.052879028022289276 2023-01-24 01:55:23.092741: step: 50/463, loss: 0.1033657044172287 2023-01-24 01:55:23.708076: step: 52/463, loss: 0.13728781044483185 2023-01-24 01:55:24.294277: step: 54/463, loss: 0.08481057733297348 2023-01-24 01:55:24.915319: step: 56/463, loss: 0.025502268224954605 2023-01-24 01:55:25.560832: step: 58/463, loss: 1.7531509399414062 2023-01-24 01:55:26.135333: step: 60/463, loss: 0.36124059557914734 2023-01-24 01:55:26.732707: step: 62/463, loss: 0.2261190563440323 2023-01-24 01:55:27.362273: step: 64/463, loss: 0.13966961205005646 2023-01-24 01:55:27.944145: step: 66/463, loss: 0.02513640746474266 2023-01-24 01:55:28.639426: step: 68/463, loss: 0.19988282024860382 2023-01-24 01:55:29.279427: step: 70/463, loss: 0.1241600289940834 2023-01-24 01:55:29.941771: step: 72/463, loss: 0.6913430094718933 2023-01-24 01:55:30.640485: step: 74/463, loss: 0.36797046661376953 2023-01-24 01:55:31.273533: step: 76/463, loss: 0.06372004002332687 2023-01-24 01:55:31.887744: step: 78/463, loss: 0.052815426141023636 2023-01-24 01:55:32.483404: step: 80/463, loss: 0.19598768651485443 2023-01-24 01:55:33.072521: step: 82/463, loss: 0.022714056074619293 2023-01-24 01:55:33.706571: step: 84/463, loss: 0.24591150879859924 2023-01-24 01:55:34.380281: step: 86/463, loss: 0.1968807578086853 2023-01-24 01:55:35.003279: step: 88/463, loss: 0.22650715708732605 2023-01-24 01:55:35.631185: step: 90/463, loss: 0.095381960272789 2023-01-24 01:55:36.221230: step: 92/463, loss: 0.13556598126888275 2023-01-24 01:55:36.835596: step: 94/463, loss: 0.5408778786659241 2023-01-24 01:55:37.498167: step: 96/463, loss: 0.16535621881484985 2023-01-24 01:55:38.257622: step: 98/463, loss: 0.06927015632390976 2023-01-24 01:55:38.850763: step: 100/463, loss: 0.22137171030044556 2023-01-24 01:55:39.449797: step: 102/463, loss: 0.05958465486764908 2023-01-24 01:55:40.115319: step: 104/463, loss: 0.18621687591075897 2023-01-24 01:55:40.708677: step: 106/463, loss: 0.042426928877830505 2023-01-24 01:55:41.312321: step: 108/463, loss: 0.1488846391439438 2023-01-24 01:55:41.884254: step: 110/463, loss: 0.13703671097755432 2023-01-24 01:55:42.526967: step: 112/463, loss: 0.1270436942577362 2023-01-24 01:55:43.170438: step: 114/463, loss: 0.0927397608757019 2023-01-24 01:55:43.776407: step: 116/463, loss: 0.23358607292175293 2023-01-24 01:55:44.414058: step: 118/463, loss: 0.09120461344718933 2023-01-24 01:55:44.998949: step: 120/463, loss: 0.17258015275001526 2023-01-24 01:55:45.641138: step: 122/463, loss: 0.10142479836940765 2023-01-24 01:55:46.259522: step: 124/463, loss: 0.26694542169570923 2023-01-24 01:55:46.871444: step: 126/463, loss: 0.2230229526758194 2023-01-24 01:55:47.538867: step: 128/463, loss: 0.14308978617191315 2023-01-24 01:55:48.184314: step: 130/463, loss: 0.20323997735977173 2023-01-24 01:55:48.863124: step: 132/463, loss: 0.17066068947315216 2023-01-24 01:55:49.496887: step: 134/463, loss: 0.9108459949493408 2023-01-24 01:55:50.100245: step: 136/463, loss: 0.1547584980726242 2023-01-24 01:55:50.730893: step: 138/463, loss: 0.12853823602199554 2023-01-24 01:55:51.355923: step: 140/463, loss: 0.2995719313621521 2023-01-24 01:55:52.097007: step: 142/463, loss: 0.5496703386306763 2023-01-24 01:55:52.644794: step: 144/463, loss: 0.12188218533992767 2023-01-24 01:55:53.283903: step: 146/463, loss: 0.4085542857646942 2023-01-24 01:55:53.910914: step: 148/463, loss: 0.12613235414028168 2023-01-24 01:55:54.494173: step: 150/463, loss: 0.10050056129693985 2023-01-24 01:55:55.021982: step: 152/463, loss: 0.046520132571458817 2023-01-24 01:55:55.653045: step: 154/463, loss: 0.1378660798072815 2023-01-24 01:55:56.270686: step: 156/463, loss: 0.6739879250526428 2023-01-24 01:55:56.934837: step: 158/463, loss: 0.07254955172538757 2023-01-24 01:55:57.490207: step: 160/463, loss: 0.08598794043064117 2023-01-24 01:55:58.017738: step: 162/463, loss: 0.11975187808275223 2023-01-24 01:55:58.635372: step: 164/463, loss: 0.10833889991044998 2023-01-24 01:55:59.258157: step: 166/463, loss: 0.04555443301796913 2023-01-24 01:55:59.813817: step: 168/463, loss: 0.053522590547800064 2023-01-24 01:56:00.442754: step: 170/463, loss: 0.08040792495012283 2023-01-24 01:56:01.155338: step: 172/463, loss: 0.09878729283809662 2023-01-24 01:56:01.832063: step: 174/463, loss: 0.20678234100341797 2023-01-24 01:56:02.421935: step: 176/463, loss: 0.3886348307132721 2023-01-24 01:56:02.977309: step: 178/463, loss: 0.07748033106327057 2023-01-24 01:56:03.618225: step: 180/463, loss: 0.17381542921066284 2023-01-24 01:56:04.183426: step: 182/463, loss: 0.10872005671262741 2023-01-24 01:56:04.807639: step: 184/463, loss: 0.18492889404296875 2023-01-24 01:56:05.425842: step: 186/463, loss: 0.06112923473119736 2023-01-24 01:56:06.043581: step: 188/463, loss: 0.1483878344297409 2023-01-24 01:56:06.668830: step: 190/463, loss: 0.20669974386692047 2023-01-24 01:56:07.319563: step: 192/463, loss: 0.0915333703160286 2023-01-24 01:56:07.943462: step: 194/463, loss: 0.053997766226530075 2023-01-24 01:56:08.610458: step: 196/463, loss: 0.08680137991905212 2023-01-24 01:56:09.171295: step: 198/463, loss: 0.11950904130935669 2023-01-24 01:56:09.731113: step: 200/463, loss: 0.075925812125206 2023-01-24 01:56:10.364656: step: 202/463, loss: 0.27722862362861633 2023-01-24 01:56:11.006018: step: 204/463, loss: 0.06445423513650894 2023-01-24 01:56:11.662847: step: 206/463, loss: 0.1055389791727066 2023-01-24 01:56:12.244188: step: 208/463, loss: 0.027460621669888496 2023-01-24 01:56:12.880887: step: 210/463, loss: 0.10954947769641876 2023-01-24 01:56:13.432692: step: 212/463, loss: 0.16115546226501465 2023-01-24 01:56:14.149679: step: 214/463, loss: 0.04821352660655975 2023-01-24 01:56:14.758917: step: 216/463, loss: 0.09073083102703094 2023-01-24 01:56:15.339395: step: 218/463, loss: 0.20985910296440125 2023-01-24 01:56:15.946097: step: 220/463, loss: 0.1097373515367508 2023-01-24 01:56:16.557084: step: 222/463, loss: 0.08712872117757797 2023-01-24 01:56:17.154577: step: 224/463, loss: 0.18095597624778748 2023-01-24 01:56:17.753768: step: 226/463, loss: 0.255423903465271 2023-01-24 01:56:18.392813: step: 228/463, loss: 0.14474639296531677 2023-01-24 01:56:18.996061: step: 230/463, loss: 0.10198097676038742 2023-01-24 01:56:19.585920: step: 232/463, loss: 0.4927991032600403 2023-01-24 01:56:20.232689: step: 234/463, loss: 0.059066321700811386 2023-01-24 01:56:20.841727: step: 236/463, loss: 0.33800721168518066 2023-01-24 01:56:21.472676: step: 238/463, loss: 0.012814831919968128 2023-01-24 01:56:22.130115: step: 240/463, loss: 0.912204384803772 2023-01-24 01:56:22.709002: step: 242/463, loss: 0.07113976031541824 2023-01-24 01:56:23.347377: step: 244/463, loss: 0.28881216049194336 2023-01-24 01:56:23.928782: step: 246/463, loss: 0.15286098420619965 2023-01-24 01:56:24.533554: step: 248/463, loss: 0.039570000022649765 2023-01-24 01:56:25.201840: step: 250/463, loss: 0.04519778862595558 2023-01-24 01:56:25.808016: step: 252/463, loss: 0.12473180145025253 2023-01-24 01:56:26.416833: step: 254/463, loss: 0.09850889444351196 2023-01-24 01:56:27.055877: step: 256/463, loss: 0.05537969246506691 2023-01-24 01:56:27.702425: step: 258/463, loss: 0.10062341392040253 2023-01-24 01:56:28.331802: step: 260/463, loss: 0.29461216926574707 2023-01-24 01:56:28.991288: step: 262/463, loss: 0.45529505610466003 2023-01-24 01:56:29.538672: step: 264/463, loss: 0.16464997828006744 2023-01-24 01:56:30.121143: step: 266/463, loss: 0.12176385521888733 2023-01-24 01:56:30.790260: step: 268/463, loss: 0.11417698115110397 2023-01-24 01:56:31.407388: step: 270/463, loss: 0.20928838849067688 2023-01-24 01:56:31.997741: step: 272/463, loss: 0.08467858284711838 2023-01-24 01:56:32.635591: step: 274/463, loss: 0.11293527483940125 2023-01-24 01:56:33.279715: step: 276/463, loss: 0.10631817579269409 2023-01-24 01:56:33.844960: step: 278/463, loss: 0.018439168110489845 2023-01-24 01:56:34.547990: step: 280/463, loss: 0.07177327573299408 2023-01-24 01:56:35.238927: step: 282/463, loss: 0.09680847823619843 2023-01-24 01:56:35.847035: step: 284/463, loss: 0.0930999219417572 2023-01-24 01:56:36.469129: step: 286/463, loss: 0.21028707921504974 2023-01-24 01:56:37.092411: step: 288/463, loss: 0.1737503558397293 2023-01-24 01:56:37.711165: step: 290/463, loss: 0.03787235915660858 2023-01-24 01:56:38.316965: step: 292/463, loss: 0.0408172644674778 2023-01-24 01:56:38.954610: step: 294/463, loss: 0.0721878856420517 2023-01-24 01:56:39.557295: step: 296/463, loss: 0.24882261455059052 2023-01-24 01:56:40.180062: step: 298/463, loss: 0.1513444185256958 2023-01-24 01:56:40.841232: step: 300/463, loss: 0.42690926790237427 2023-01-24 01:56:41.515468: step: 302/463, loss: 0.04912591353058815 2023-01-24 01:56:42.165683: step: 304/463, loss: 0.6604412794113159 2023-01-24 01:56:42.789944: step: 306/463, loss: 0.07731329649686813 2023-01-24 01:56:43.454345: step: 308/463, loss: 0.10890667140483856 2023-01-24 01:56:43.987549: step: 310/463, loss: 0.1050528883934021 2023-01-24 01:56:44.617330: step: 312/463, loss: 0.440743088722229 2023-01-24 01:56:45.215474: step: 314/463, loss: 0.07563351839780807 2023-01-24 01:56:45.847452: step: 316/463, loss: 0.13913071155548096 2023-01-24 01:56:46.426984: step: 318/463, loss: 0.04890662431716919 2023-01-24 01:56:47.015400: step: 320/463, loss: 0.22135961055755615 2023-01-24 01:56:47.609051: step: 322/463, loss: 0.025710294023156166 2023-01-24 01:56:48.227679: step: 324/463, loss: 0.820571780204773 2023-01-24 01:56:48.847221: step: 326/463, loss: 0.07446911185979843 2023-01-24 01:56:49.546558: step: 328/463, loss: 0.06884047389030457 2023-01-24 01:56:50.195508: step: 330/463, loss: 0.19954916834831238 2023-01-24 01:56:50.841765: step: 332/463, loss: 0.03605741634964943 2023-01-24 01:56:51.441114: step: 334/463, loss: 0.29288601875305176 2023-01-24 01:56:52.058003: step: 336/463, loss: 0.42978596687316895 2023-01-24 01:56:52.660753: step: 338/463, loss: 0.15287433564662933 2023-01-24 01:56:53.278716: step: 340/463, loss: 0.18952493369579315 2023-01-24 01:56:53.869151: step: 342/463, loss: 0.31624099612236023 2023-01-24 01:56:54.581321: step: 344/463, loss: 1.0986188650131226 2023-01-24 01:56:55.158418: step: 346/463, loss: 0.03494536504149437 2023-01-24 01:56:55.799210: step: 348/463, loss: 0.6324616074562073 2023-01-24 01:56:56.369901: step: 350/463, loss: 0.14105699956417084 2023-01-24 01:56:56.944490: step: 352/463, loss: 0.0512370765209198 2023-01-24 01:56:57.517798: step: 354/463, loss: 0.3215285837650299 2023-01-24 01:56:58.107673: step: 356/463, loss: 0.13629095256328583 2023-01-24 01:56:58.775735: step: 358/463, loss: 0.10889004915952682 2023-01-24 01:56:59.334949: step: 360/463, loss: 0.7635015249252319 2023-01-24 01:56:59.964014: step: 362/463, loss: 0.0896754115819931 2023-01-24 01:57:00.578125: step: 364/463, loss: 0.058855701237916946 2023-01-24 01:57:01.220689: step: 366/463, loss: 0.16003604233264923 2023-01-24 01:57:01.831171: step: 368/463, loss: 0.1338960975408554 2023-01-24 01:57:02.467647: step: 370/463, loss: 0.38046205043792725 2023-01-24 01:57:03.072524: step: 372/463, loss: 0.1277269423007965 2023-01-24 01:57:03.692627: step: 374/463, loss: 0.172410249710083 2023-01-24 01:57:04.338264: step: 376/463, loss: 0.05194571986794472 2023-01-24 01:57:04.916068: step: 378/463, loss: 0.41297999024391174 2023-01-24 01:57:05.512449: step: 380/463, loss: 0.4520055055618286 2023-01-24 01:57:06.119472: step: 382/463, loss: 0.3761194348335266 2023-01-24 01:57:06.723770: step: 384/463, loss: 0.12173573672771454 2023-01-24 01:57:07.365112: step: 386/463, loss: 0.13181892037391663 2023-01-24 01:57:07.990742: step: 388/463, loss: 0.08874129503965378 2023-01-24 01:57:08.630834: step: 390/463, loss: 0.06165522709488869 2023-01-24 01:57:09.196981: step: 392/463, loss: 0.13682669401168823 2023-01-24 01:57:09.891468: step: 394/463, loss: 0.01730811595916748 2023-01-24 01:57:10.535304: step: 396/463, loss: 0.5122079849243164 2023-01-24 01:57:11.122765: step: 398/463, loss: 0.03469402715563774 2023-01-24 01:57:11.774162: step: 400/463, loss: 0.09379014372825623 2023-01-24 01:57:12.418816: step: 402/463, loss: 0.07855597883462906 2023-01-24 01:57:13.006792: step: 404/463, loss: 0.1251240372657776 2023-01-24 01:57:13.626882: step: 406/463, loss: 0.12323852628469467 2023-01-24 01:57:14.262856: step: 408/463, loss: 0.2522580027580261 2023-01-24 01:57:14.895161: step: 410/463, loss: 0.0612805001437664 2023-01-24 01:57:15.442314: step: 412/463, loss: 0.052484180778265 2023-01-24 01:57:16.034288: step: 414/463, loss: 0.09456530213356018 2023-01-24 01:57:16.625604: step: 416/463, loss: 0.14989073574543 2023-01-24 01:57:17.236139: step: 418/463, loss: 0.12146855890750885 2023-01-24 01:57:17.905224: step: 420/463, loss: 0.04999549686908722 2023-01-24 01:57:18.526022: step: 422/463, loss: 0.08902577310800552 2023-01-24 01:57:19.139205: step: 424/463, loss: 0.08543996512889862 2023-01-24 01:57:19.756549: step: 426/463, loss: 0.4487992227077484 2023-01-24 01:57:20.343451: step: 428/463, loss: 0.14005769789218903 2023-01-24 01:57:20.996197: step: 430/463, loss: 0.07268275320529938 2023-01-24 01:57:21.663438: step: 432/463, loss: 0.08636981248855591 2023-01-24 01:57:22.281591: step: 434/463, loss: 0.03948669508099556 2023-01-24 01:57:22.944351: step: 436/463, loss: 0.05753329023718834 2023-01-24 01:57:23.578873: step: 438/463, loss: 0.21155887842178345 2023-01-24 01:57:24.184924: step: 440/463, loss: 0.04406146705150604 2023-01-24 01:57:24.880796: step: 442/463, loss: 0.05342789366841316 2023-01-24 01:57:25.555259: step: 444/463, loss: 0.1786039173603058 2023-01-24 01:57:26.197565: step: 446/463, loss: 0.032521288841962814 2023-01-24 01:57:26.819830: step: 448/463, loss: 0.07045623660087585 2023-01-24 01:57:27.495315: step: 450/463, loss: 0.09353309869766235 2023-01-24 01:57:28.057136: step: 452/463, loss: 0.10855165123939514 2023-01-24 01:57:28.675111: step: 454/463, loss: 0.6815056204795837 2023-01-24 01:57:29.313102: step: 456/463, loss: 0.28537923097610474 2023-01-24 01:57:29.893203: step: 458/463, loss: 0.15348763763904572 2023-01-24 01:57:30.555409: step: 460/463, loss: 1.9067739248275757 2023-01-24 01:57:31.196622: step: 462/463, loss: 0.1206866055727005 2023-01-24 01:57:31.796432: step: 464/463, loss: 0.11111979931592941 2023-01-24 01:57:32.400552: step: 466/463, loss: 0.10513848811388016 2023-01-24 01:57:33.007268: step: 468/463, loss: 0.20087479054927826 2023-01-24 01:57:33.599149: step: 470/463, loss: 0.19176924228668213 2023-01-24 01:57:34.223844: step: 472/463, loss: 0.03665177524089813 2023-01-24 01:57:34.915768: step: 474/463, loss: 0.08420693129301071 2023-01-24 01:57:35.482366: step: 476/463, loss: 0.030108727514743805 2023-01-24 01:57:36.099118: step: 478/463, loss: 0.022803660482168198 2023-01-24 01:57:36.680797: step: 480/463, loss: 0.09242691099643707 2023-01-24 01:57:37.334182: step: 482/463, loss: 0.11656904965639114 2023-01-24 01:57:37.944431: step: 484/463, loss: 0.19817963242530823 2023-01-24 01:57:38.502139: step: 486/463, loss: 0.17308945953845978 2023-01-24 01:57:39.144346: step: 488/463, loss: 0.06358519941568375 2023-01-24 01:57:39.784537: step: 490/463, loss: 0.07707725465297699 2023-01-24 01:57:40.443240: step: 492/463, loss: 0.17215988039970398 2023-01-24 01:57:41.034273: step: 494/463, loss: 0.09996546804904938 2023-01-24 01:57:41.609933: step: 496/463, loss: 0.15917626023292542 2023-01-24 01:57:42.180478: step: 498/463, loss: 0.1067061647772789 2023-01-24 01:57:42.752432: step: 500/463, loss: 0.11725277453660965 2023-01-24 01:57:43.367290: step: 502/463, loss: 1.2853498458862305 2023-01-24 01:57:44.032960: step: 504/463, loss: 0.2767886221408844 2023-01-24 01:57:44.653442: step: 506/463, loss: 0.26015838980674744 2023-01-24 01:57:45.280959: step: 508/463, loss: 0.22985103726387024 2023-01-24 01:57:45.886502: step: 510/463, loss: 0.17112188041210175 2023-01-24 01:57:46.483439: step: 512/463, loss: 0.3354334831237793 2023-01-24 01:57:47.114033: step: 514/463, loss: 0.027064772322773933 2023-01-24 01:57:47.744092: step: 516/463, loss: 0.13486523926258087 2023-01-24 01:57:48.430113: step: 518/463, loss: 0.12651915848255157 2023-01-24 01:57:49.153554: step: 520/463, loss: 0.23901620507240295 2023-01-24 01:57:49.742023: step: 522/463, loss: 0.32971879839897156 2023-01-24 01:57:50.364723: step: 524/463, loss: 0.040283069014549255 2023-01-24 01:57:50.958678: step: 526/463, loss: 0.09978746622800827 2023-01-24 01:57:51.632721: step: 528/463, loss: 0.14024436473846436 2023-01-24 01:57:52.295786: step: 530/463, loss: 0.2856617271900177 2023-01-24 01:57:52.968218: step: 532/463, loss: 0.11049336940050125 2023-01-24 01:57:53.572674: step: 534/463, loss: 0.15583792328834534 2023-01-24 01:57:54.220339: step: 536/463, loss: 0.04650764912366867 2023-01-24 01:57:54.789874: step: 538/463, loss: 0.12168373167514801 2023-01-24 01:57:55.372009: step: 540/463, loss: 0.6767510771751404 2023-01-24 01:57:56.031455: step: 542/463, loss: 0.12667514383792877 2023-01-24 01:57:56.609732: step: 544/463, loss: 0.09154399484395981 2023-01-24 01:57:57.166706: step: 546/463, loss: 0.09960877150297165 2023-01-24 01:57:57.764278: step: 548/463, loss: 0.04335862025618553 2023-01-24 01:57:58.419502: step: 550/463, loss: 0.11040492355823517 2023-01-24 01:57:59.049391: step: 552/463, loss: 0.18182040750980377 2023-01-24 01:57:59.710580: step: 554/463, loss: 0.13269519805908203 2023-01-24 01:58:00.304907: step: 556/463, loss: 0.045539844781160355 2023-01-24 01:58:00.866207: step: 558/463, loss: 0.2577725648880005 2023-01-24 01:58:01.484792: step: 560/463, loss: 0.11310486495494843 2023-01-24 01:58:02.055786: step: 562/463, loss: 0.10646673291921616 2023-01-24 01:58:02.640926: step: 564/463, loss: 0.04126184806227684 2023-01-24 01:58:03.216876: step: 566/463, loss: 0.03399910405278206 2023-01-24 01:58:03.883863: step: 568/463, loss: 0.270864874124527 2023-01-24 01:58:04.502561: step: 570/463, loss: 0.21416796743869781 2023-01-24 01:58:05.042799: step: 572/463, loss: 0.5707639455795288 2023-01-24 01:58:05.662928: step: 574/463, loss: 0.3545154631137848 2023-01-24 01:58:06.343027: step: 576/463, loss: 0.07658907026052475 2023-01-24 01:58:06.930716: step: 578/463, loss: 0.7380666136741638 2023-01-24 01:58:07.587236: step: 580/463, loss: 0.22135727107524872 2023-01-24 01:58:08.218051: step: 582/463, loss: 0.06001212075352669 2023-01-24 01:58:08.807551: step: 584/463, loss: 0.05593065917491913 2023-01-24 01:58:09.447967: step: 586/463, loss: 0.18825268745422363 2023-01-24 01:58:10.053168: step: 588/463, loss: 0.17381080985069275 2023-01-24 01:58:10.738801: step: 590/463, loss: 0.22605355083942413 2023-01-24 01:58:11.403382: step: 592/463, loss: 0.06703544408082962 2023-01-24 01:58:12.008978: step: 594/463, loss: 0.1642419844865799 2023-01-24 01:58:12.612136: step: 596/463, loss: 0.26907047629356384 2023-01-24 01:58:13.378806: step: 598/463, loss: 0.1269286572933197 2023-01-24 01:58:14.125738: step: 600/463, loss: 0.11625716090202332 2023-01-24 01:58:14.764856: step: 602/463, loss: 0.34603291749954224 2023-01-24 01:58:15.346925: step: 604/463, loss: 0.20562690496444702 2023-01-24 01:58:15.943501: step: 606/463, loss: 0.1108081266283989 2023-01-24 01:58:16.576545: step: 608/463, loss: 0.021895915269851685 2023-01-24 01:58:17.167879: step: 610/463, loss: 0.08722681552171707 2023-01-24 01:58:17.805660: step: 612/463, loss: 0.033898528665304184 2023-01-24 01:58:18.423423: step: 614/463, loss: 0.036815352737903595 2023-01-24 01:58:19.046044: step: 616/463, loss: 0.07896837592124939 2023-01-24 01:58:19.666182: step: 618/463, loss: 0.3109944462776184 2023-01-24 01:58:20.308959: step: 620/463, loss: 0.06547165662050247 2023-01-24 01:58:20.882110: step: 622/463, loss: 0.06932200491428375 2023-01-24 01:58:21.528635: step: 624/463, loss: 0.07772151380777359 2023-01-24 01:58:22.193300: step: 626/463, loss: 0.0884062647819519 2023-01-24 01:58:22.860963: step: 628/463, loss: 0.20545999705791473 2023-01-24 01:58:23.413316: step: 630/463, loss: 0.22252774238586426 2023-01-24 01:58:23.999340: step: 632/463, loss: 0.15800395607948303 2023-01-24 01:58:24.599085: step: 634/463, loss: 0.46769943833351135 2023-01-24 01:58:25.257743: step: 636/463, loss: 0.14007198810577393 2023-01-24 01:58:25.857431: step: 638/463, loss: 0.07331300526857376 2023-01-24 01:58:26.501249: step: 640/463, loss: 0.15569837391376495 2023-01-24 01:58:27.125062: step: 642/463, loss: 0.05941243842244148 2023-01-24 01:58:27.810496: step: 644/463, loss: 0.04472799971699715 2023-01-24 01:58:28.419889: step: 646/463, loss: 0.039112940430641174 2023-01-24 01:58:29.060829: step: 648/463, loss: 0.08512268960475922 2023-01-24 01:58:29.732022: step: 650/463, loss: 0.07595560699701309 2023-01-24 01:58:30.353547: step: 652/463, loss: 0.05009163171052933 2023-01-24 01:58:30.941278: step: 654/463, loss: 0.31379133462905884 2023-01-24 01:58:31.572428: step: 656/463, loss: 0.13011176884174347 2023-01-24 01:58:32.210274: step: 658/463, loss: 0.3043382167816162 2023-01-24 01:58:32.839552: step: 660/463, loss: 0.1349799484014511 2023-01-24 01:58:33.513522: step: 662/463, loss: 0.022213637828826904 2023-01-24 01:58:34.160822: step: 664/463, loss: 0.3820936977863312 2023-01-24 01:58:34.780276: step: 666/463, loss: 0.1869480013847351 2023-01-24 01:58:35.389260: step: 668/463, loss: 0.045273665338754654 2023-01-24 01:58:36.029566: step: 670/463, loss: 0.3349805176258087 2023-01-24 01:58:36.672148: step: 672/463, loss: 0.14581331610679626 2023-01-24 01:58:37.258413: step: 674/463, loss: 0.07396627962589264 2023-01-24 01:58:37.870042: step: 676/463, loss: 0.0904916375875473 2023-01-24 01:58:38.481697: step: 678/463, loss: 0.07654202729463577 2023-01-24 01:58:39.187709: step: 680/463, loss: 0.13261403143405914 2023-01-24 01:58:39.799384: step: 682/463, loss: 0.0523698627948761 2023-01-24 01:58:40.422417: step: 684/463, loss: 0.42026373744010925 2023-01-24 01:58:41.022999: step: 686/463, loss: 0.4124631881713867 2023-01-24 01:58:41.676929: step: 688/463, loss: 0.08937067538499832 2023-01-24 01:58:42.272539: step: 690/463, loss: 0.13619309663772583 2023-01-24 01:58:42.895120: step: 692/463, loss: 0.06393127143383026 2023-01-24 01:58:43.514477: step: 694/463, loss: 0.18974505364894867 2023-01-24 01:58:44.127290: step: 696/463, loss: 0.1054297685623169 2023-01-24 01:58:44.760038: step: 698/463, loss: 0.11710680276155472 2023-01-24 01:58:45.377330: step: 700/463, loss: 0.1280592978000641 2023-01-24 01:58:46.011800: step: 702/463, loss: 0.12455994635820389 2023-01-24 01:58:46.677157: step: 704/463, loss: 0.3318396210670471 2023-01-24 01:58:47.359970: step: 706/463, loss: 0.1767960637807846 2023-01-24 01:58:48.041026: step: 708/463, loss: 1.0036784410476685 2023-01-24 01:58:48.772138: step: 710/463, loss: 0.49046623706817627 2023-01-24 01:58:49.430092: step: 712/463, loss: 0.10382171720266342 2023-01-24 01:58:50.127295: step: 714/463, loss: 0.8380345106124878 2023-01-24 01:58:50.792176: step: 716/463, loss: 0.03781145066022873 2023-01-24 01:58:51.464727: step: 718/463, loss: 0.2780391573905945 2023-01-24 01:58:52.133022: step: 720/463, loss: 0.08166279643774033 2023-01-24 01:58:52.771743: step: 722/463, loss: 0.0748092532157898 2023-01-24 01:58:53.355300: step: 724/463, loss: 0.10759352147579193 2023-01-24 01:58:53.951772: step: 726/463, loss: 0.15913042426109314 2023-01-24 01:58:54.535854: step: 728/463, loss: 0.21838082373142242 2023-01-24 01:58:55.154870: step: 730/463, loss: 0.07645073533058167 2023-01-24 01:58:55.764427: step: 732/463, loss: 0.0647900328040123 2023-01-24 01:58:56.427692: step: 734/463, loss: 0.08762484043836594 2023-01-24 01:58:57.027489: step: 736/463, loss: 0.12671323120594025 2023-01-24 01:58:57.666236: step: 738/463, loss: 0.17756374180316925 2023-01-24 01:58:58.309133: step: 740/463, loss: 0.16403988003730774 2023-01-24 01:58:58.949847: step: 742/463, loss: 0.15573322772979736 2023-01-24 01:58:59.595624: step: 744/463, loss: 0.10202080011367798 2023-01-24 01:59:00.329589: step: 746/463, loss: 0.11958621442317963 2023-01-24 01:59:00.998036: step: 748/463, loss: 0.2071763426065445 2023-01-24 01:59:01.699538: step: 750/463, loss: 0.6519031524658203 2023-01-24 01:59:02.315447: step: 752/463, loss: 0.04815414920449257 2023-01-24 01:59:02.951810: step: 754/463, loss: 0.5734113454818726 2023-01-24 01:59:03.575474: step: 756/463, loss: 0.2853534519672394 2023-01-24 01:59:04.227488: step: 758/463, loss: 0.05410519242286682 2023-01-24 01:59:04.821702: step: 760/463, loss: 0.046185001730918884 2023-01-24 01:59:05.471138: step: 762/463, loss: 0.14726080000400543 2023-01-24 01:59:06.110794: step: 764/463, loss: 1.7470054626464844 2023-01-24 01:59:06.690063: step: 766/463, loss: 1.0499917268753052 2023-01-24 01:59:07.350072: step: 768/463, loss: 0.361607164144516 2023-01-24 01:59:07.931467: step: 770/463, loss: 0.06778234243392944 2023-01-24 01:59:08.563099: step: 772/463, loss: 0.0575651153922081 2023-01-24 01:59:09.201078: step: 774/463, loss: 0.06311508268117905 2023-01-24 01:59:09.844801: step: 776/463, loss: 0.2709875702857971 2023-01-24 01:59:10.483735: step: 778/463, loss: 0.1865103393793106 2023-01-24 01:59:11.083811: step: 780/463, loss: 0.5807956457138062 2023-01-24 01:59:11.716628: step: 782/463, loss: 0.0966925248503685 2023-01-24 01:59:12.336495: step: 784/463, loss: 0.172201007604599 2023-01-24 01:59:12.926771: step: 786/463, loss: 0.120591901242733 2023-01-24 01:59:13.566219: step: 788/463, loss: 0.23708239197731018 2023-01-24 01:59:14.193610: step: 790/463, loss: 0.053314317017793655 2023-01-24 01:59:14.883941: step: 792/463, loss: 0.14076444506645203 2023-01-24 01:59:15.481274: step: 794/463, loss: 0.08281038701534271 2023-01-24 01:59:16.148718: step: 796/463, loss: 0.07729481160640717 2023-01-24 01:59:16.779582: step: 798/463, loss: 0.09750064462423325 2023-01-24 01:59:17.381293: step: 800/463, loss: 0.2641031742095947 2023-01-24 01:59:18.015573: step: 802/463, loss: 0.3851913511753082 2023-01-24 01:59:18.700993: step: 804/463, loss: 0.15936818718910217 2023-01-24 01:59:19.473289: step: 806/463, loss: 0.16375510394573212 2023-01-24 01:59:20.067148: step: 808/463, loss: 0.0914027988910675 2023-01-24 01:59:20.677297: step: 810/463, loss: 0.026156218722462654 2023-01-24 01:59:21.284804: step: 812/463, loss: 0.10759551078081131 2023-01-24 01:59:21.961124: step: 814/463, loss: 0.04365018382668495 2023-01-24 01:59:22.596695: step: 816/463, loss: 0.14214031398296356 2023-01-24 01:59:23.227039: step: 818/463, loss: 0.11195086687803268 2023-01-24 01:59:23.817882: step: 820/463, loss: 1.2626415491104126 2023-01-24 01:59:24.435514: step: 822/463, loss: 0.07693120092153549 2023-01-24 01:59:25.013737: step: 824/463, loss: 0.11863190680742264 2023-01-24 01:59:25.829142: step: 826/463, loss: 0.18664751946926117 2023-01-24 01:59:26.527615: step: 828/463, loss: 0.21061241626739502 2023-01-24 01:59:27.164851: step: 830/463, loss: 0.10795089602470398 2023-01-24 01:59:27.830478: step: 832/463, loss: 0.23394255340099335 2023-01-24 01:59:28.500092: step: 834/463, loss: 0.2522965967655182 2023-01-24 01:59:29.142524: step: 836/463, loss: 0.1798715889453888 2023-01-24 01:59:29.752445: step: 838/463, loss: 0.5537818074226379 2023-01-24 01:59:30.371594: step: 840/463, loss: 0.22895345091819763 2023-01-24 01:59:31.004111: step: 842/463, loss: 0.05286431685090065 2023-01-24 01:59:31.737958: step: 844/463, loss: 0.09981387853622437 2023-01-24 01:59:32.336453: step: 846/463, loss: 0.9053740501403809 2023-01-24 01:59:32.921378: step: 848/463, loss: 0.10698530822992325 2023-01-24 01:59:33.581478: step: 850/463, loss: 1.179131269454956 2023-01-24 01:59:34.167105: step: 852/463, loss: 0.13164712488651276 2023-01-24 01:59:34.828765: step: 854/463, loss: 0.017396269366145134 2023-01-24 01:59:35.550980: step: 856/463, loss: 0.18690995872020721 2023-01-24 01:59:36.183656: step: 858/463, loss: 0.09969016909599304 2023-01-24 01:59:36.799635: step: 860/463, loss: 0.22827470302581787 2023-01-24 01:59:37.451189: step: 862/463, loss: 0.06844737380743027 2023-01-24 01:59:38.032674: step: 864/463, loss: 0.030802123248577118 2023-01-24 01:59:38.648223: step: 866/463, loss: 0.09806010127067566 2023-01-24 01:59:39.280729: step: 868/463, loss: 0.11985277384519577 2023-01-24 01:59:39.871013: step: 870/463, loss: 0.21327286958694458 2023-01-24 01:59:40.568822: step: 872/463, loss: 0.5122323632240295 2023-01-24 01:59:41.119516: step: 874/463, loss: 0.10894346237182617 2023-01-24 01:59:41.763892: step: 876/463, loss: 0.514577329158783 2023-01-24 01:59:42.360586: step: 878/463, loss: 0.07537701725959778 2023-01-24 01:59:42.984628: step: 880/463, loss: 0.09374969452619553 2023-01-24 01:59:43.640438: step: 882/463, loss: 0.05985859036445618 2023-01-24 01:59:44.229789: step: 884/463, loss: 0.10622184723615646 2023-01-24 01:59:44.817058: step: 886/463, loss: 0.4521494209766388 2023-01-24 01:59:45.443025: step: 888/463, loss: 0.29602643847465515 2023-01-24 01:59:46.075518: step: 890/463, loss: 0.3119152784347534 2023-01-24 01:59:46.684552: step: 892/463, loss: 0.07774879783391953 2023-01-24 01:59:47.309212: step: 894/463, loss: 0.07959072291851044 2023-01-24 01:59:48.020154: step: 896/463, loss: 0.03204730153083801 2023-01-24 01:59:48.606513: step: 898/463, loss: 0.09456337988376617 2023-01-24 01:59:49.287086: step: 900/463, loss: 0.046307023614645004 2023-01-24 01:59:49.861982: step: 902/463, loss: 0.1056135892868042 2023-01-24 01:59:50.411885: step: 904/463, loss: 0.1681588739156723 2023-01-24 01:59:50.973626: step: 906/463, loss: 0.10231178253889084 2023-01-24 01:59:51.672675: step: 908/463, loss: 0.016128037124872208 2023-01-24 01:59:52.299697: step: 910/463, loss: 0.08243054896593094 2023-01-24 01:59:52.910129: step: 912/463, loss: 0.7484679818153381 2023-01-24 01:59:53.522321: step: 914/463, loss: 0.08425735682249069 2023-01-24 01:59:54.115248: step: 916/463, loss: 0.6359812617301941 2023-01-24 01:59:54.760949: step: 918/463, loss: 0.1961413025856018 2023-01-24 01:59:55.335411: step: 920/463, loss: 0.10283773392438889 2023-01-24 01:59:55.920944: step: 922/463, loss: 0.14781196415424347 2023-01-24 01:59:56.592598: step: 924/463, loss: 0.06199555844068527 2023-01-24 01:59:57.259492: step: 926/463, loss: 0.07957815378904343 ================================================== Loss: 0.196 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3259340883220365, 'r': 0.309884303669815, 'f1': 0.31770662589837345}, 'combined': 0.234099619083012, 'epoch': 14} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3690118739729741, 'r': 0.30493389585724817, 'f1': 0.333926655186246}, 'combined': 0.2349232750053992, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3259638596989492, 'r': 0.31052996482683987, 'f1': 0.3180597893861716}, 'combined': 0.23435984481086325, 'epoch': 14} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3765230969500743, 'r': 0.3088409346710906, 'f1': 0.3393400873759059}, 'combined': 0.24093146203689317, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34311026573827774, 'r': 0.3236153642758756, 'f1': 0.3330778018278018}, 'combined': 0.24542574871522235, 'epoch': 14} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3750493256914893, 'r': 0.29538412801401387, 'f1': 0.33048356229636366}, 'combined': 0.2346433292304182, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2982456140350877, 'r': 0.32380952380952377, 'f1': 0.3105022831050228}, 'combined': 0.2070015220700152, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2777777777777778, 'r': 0.32608695652173914, 'f1': 0.3}, 'combined': 0.15, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:02:35.818085: step: 2/463, loss: 0.37574899196624756 2023-01-24 02:02:36.459132: step: 4/463, loss: 0.0677657425403595 2023-01-24 02:02:37.161636: step: 6/463, loss: 0.19534721970558167 2023-01-24 02:02:37.804520: step: 8/463, loss: 0.19160549342632294 2023-01-24 02:02:38.415163: step: 10/463, loss: 0.06557293236255646 2023-01-24 02:02:39.058352: step: 12/463, loss: 0.08735430240631104 2023-01-24 02:02:39.668324: step: 14/463, loss: 0.07968301326036453 2023-01-24 02:02:40.354567: step: 16/463, loss: 0.040089137852191925 2023-01-24 02:02:40.955976: step: 18/463, loss: 0.1884065419435501 2023-01-24 02:02:41.569708: step: 20/463, loss: 0.028807157650589943 2023-01-24 02:02:42.173350: step: 22/463, loss: 0.00869088713079691 2023-01-24 02:02:42.791314: step: 24/463, loss: 0.2006780356168747 2023-01-24 02:02:43.491782: step: 26/463, loss: 0.11487504839897156 2023-01-24 02:02:44.129233: step: 28/463, loss: 0.028129952028393745 2023-01-24 02:02:44.760696: step: 30/463, loss: 0.12091419100761414 2023-01-24 02:02:45.340632: step: 32/463, loss: 0.10680311918258667 2023-01-24 02:02:46.094315: step: 34/463, loss: 2.917120933532715 2023-01-24 02:02:46.682396: step: 36/463, loss: 0.04698164016008377 2023-01-24 02:02:47.305775: step: 38/463, loss: 0.11034693568944931 2023-01-24 02:02:47.949292: step: 40/463, loss: 0.3315912187099457 2023-01-24 02:02:48.553712: step: 42/463, loss: 0.10442382842302322 2023-01-24 02:02:49.138190: step: 44/463, loss: 0.10350078344345093 2023-01-24 02:02:49.818123: step: 46/463, loss: 0.0668860450387001 2023-01-24 02:02:50.437634: step: 48/463, loss: 0.05382747948169708 2023-01-24 02:02:51.145145: step: 50/463, loss: 0.07797696441411972 2023-01-24 02:02:51.754651: step: 52/463, loss: 0.2698434293270111 2023-01-24 02:02:52.371538: step: 54/463, loss: 0.059612397104501724 2023-01-24 02:02:53.040661: step: 56/463, loss: 0.42994365096092224 2023-01-24 02:02:53.667515: step: 58/463, loss: 0.03361697494983673 2023-01-24 02:02:54.241990: step: 60/463, loss: 0.03127128630876541 2023-01-24 02:02:54.841129: step: 62/463, loss: 0.05584460869431496 2023-01-24 02:02:55.465574: step: 64/463, loss: 0.1160743460059166 2023-01-24 02:02:56.145759: step: 66/463, loss: 0.9928321838378906 2023-01-24 02:02:56.784642: step: 68/463, loss: 0.07758496701717377 2023-01-24 02:02:57.373652: step: 70/463, loss: 0.04754911735653877 2023-01-24 02:02:58.118468: step: 72/463, loss: 0.14585909247398376 2023-01-24 02:02:58.738426: step: 74/463, loss: 0.09362704306840897 2023-01-24 02:02:59.332969: step: 76/463, loss: 0.07655180990695953 2023-01-24 02:02:59.985735: step: 78/463, loss: 0.05262964218854904 2023-01-24 02:03:00.577214: step: 80/463, loss: 0.15142206847667694 2023-01-24 02:03:01.271837: step: 82/463, loss: 0.12524470686912537 2023-01-24 02:03:01.964026: step: 84/463, loss: 0.029682707041502 2023-01-24 02:03:02.589499: step: 86/463, loss: 0.16101986169815063 2023-01-24 02:03:03.205440: step: 88/463, loss: 0.16534247994422913 2023-01-24 02:03:03.785448: step: 90/463, loss: 0.08351434022188187 2023-01-24 02:03:04.391438: step: 92/463, loss: 0.032328296452760696 2023-01-24 02:03:05.010711: step: 94/463, loss: 0.033557821065187454 2023-01-24 02:03:05.610537: step: 96/463, loss: 0.23030395805835724 2023-01-24 02:03:06.199186: step: 98/463, loss: 0.10151595622301102 2023-01-24 02:03:06.771099: step: 100/463, loss: 0.04364335536956787 2023-01-24 02:03:07.418182: step: 102/463, loss: 0.07381221652030945 2023-01-24 02:03:07.994757: step: 104/463, loss: 0.032436057925224304 2023-01-24 02:03:08.668451: step: 106/463, loss: 0.04410495609045029 2023-01-24 02:03:09.362370: step: 108/463, loss: 0.062239695340394974 2023-01-24 02:03:10.033617: step: 110/463, loss: 0.15239250659942627 2023-01-24 02:03:10.672859: step: 112/463, loss: 0.16718746721744537 2023-01-24 02:03:11.270308: step: 114/463, loss: 0.05621245130896568 2023-01-24 02:03:11.848395: step: 116/463, loss: 0.07214046269655228 2023-01-24 02:03:12.452655: step: 118/463, loss: 0.05933195725083351 2023-01-24 02:03:13.141851: step: 120/463, loss: 0.15812531113624573 2023-01-24 02:03:13.741311: step: 122/463, loss: 0.025902528315782547 2023-01-24 02:03:14.362466: step: 124/463, loss: 0.06814964860677719 2023-01-24 02:03:14.906942: step: 126/463, loss: 0.2700980603694916 2023-01-24 02:03:15.520663: step: 128/463, loss: 0.061028674244880676 2023-01-24 02:03:16.153404: step: 130/463, loss: 0.06606525182723999 2023-01-24 02:03:16.763020: step: 132/463, loss: 0.12197516113519669 2023-01-24 02:03:17.402545: step: 134/463, loss: 0.5318504571914673 2023-01-24 02:03:18.078007: step: 136/463, loss: 0.14315272867679596 2023-01-24 02:03:18.675147: step: 138/463, loss: 0.14755088090896606 2023-01-24 02:03:19.263163: step: 140/463, loss: 0.06662347912788391 2023-01-24 02:03:19.881766: step: 142/463, loss: 0.041549935936927795 2023-01-24 02:03:20.454121: step: 144/463, loss: 0.07188858836889267 2023-01-24 02:03:21.037657: step: 146/463, loss: 0.21436037123203278 2023-01-24 02:03:21.587733: step: 148/463, loss: 0.24756450951099396 2023-01-24 02:03:22.178087: step: 150/463, loss: 0.035237278789281845 2023-01-24 02:03:22.768947: step: 152/463, loss: 0.23047223687171936 2023-01-24 02:03:23.418966: step: 154/463, loss: 0.12927041947841644 2023-01-24 02:03:24.075484: step: 156/463, loss: 0.6472316980361938 2023-01-24 02:03:24.713636: step: 158/463, loss: 0.20025455951690674 2023-01-24 02:03:25.431991: step: 160/463, loss: 0.1520618051290512 2023-01-24 02:03:26.017024: step: 162/463, loss: 0.03362012282013893 2023-01-24 02:03:26.600994: step: 164/463, loss: 0.08039239048957825 2023-01-24 02:03:27.321895: step: 166/463, loss: 0.18855880200862885 2023-01-24 02:03:27.961238: step: 168/463, loss: 0.2586096227169037 2023-01-24 02:03:28.573806: step: 170/463, loss: 0.08380363881587982 2023-01-24 02:03:29.163856: step: 172/463, loss: 0.1004822850227356 2023-01-24 02:03:29.813341: step: 174/463, loss: 0.15591993927955627 2023-01-24 02:03:30.468723: step: 176/463, loss: 0.09693150222301483 2023-01-24 02:03:31.059340: step: 178/463, loss: 0.10397061705589294 2023-01-24 02:03:31.697029: step: 180/463, loss: 0.0905425027012825 2023-01-24 02:03:32.334906: step: 182/463, loss: 0.09639497846364975 2023-01-24 02:03:32.994505: step: 184/463, loss: 0.3277745246887207 2023-01-24 02:03:33.567797: step: 186/463, loss: 0.12540876865386963 2023-01-24 02:03:34.196161: step: 188/463, loss: 0.06558287143707275 2023-01-24 02:03:34.907558: step: 190/463, loss: 0.07140213251113892 2023-01-24 02:03:35.535773: step: 192/463, loss: 0.08562780916690826 2023-01-24 02:03:36.107741: step: 194/463, loss: 0.10962515324354172 2023-01-24 02:03:36.760519: step: 196/463, loss: 0.09236840158700943 2023-01-24 02:03:37.415002: step: 198/463, loss: 0.09022004157304764 2023-01-24 02:03:37.983305: step: 200/463, loss: 0.10171408951282501 2023-01-24 02:03:38.592280: step: 202/463, loss: 0.08939573913812637 2023-01-24 02:03:39.247563: step: 204/463, loss: 0.13828085362911224 2023-01-24 02:03:39.858663: step: 206/463, loss: 0.3319973051548004 2023-01-24 02:03:40.486258: step: 208/463, loss: 0.0494571290910244 2023-01-24 02:03:41.124447: step: 210/463, loss: 0.13319213688373566 2023-01-24 02:03:41.780429: step: 212/463, loss: 0.1728745996952057 2023-01-24 02:03:42.372377: step: 214/463, loss: 0.049197569489479065 2023-01-24 02:03:42.996693: step: 216/463, loss: 0.11694785952568054 2023-01-24 02:03:43.570251: step: 218/463, loss: 0.10500544309616089 2023-01-24 02:03:44.190893: step: 220/463, loss: 0.19085471332073212 2023-01-24 02:03:44.799303: step: 222/463, loss: 0.02245273068547249 2023-01-24 02:03:45.461352: step: 224/463, loss: 1.0217676162719727 2023-01-24 02:03:46.089895: step: 226/463, loss: 0.13524484634399414 2023-01-24 02:03:46.786949: step: 228/463, loss: 0.24269038438796997 2023-01-24 02:03:47.390274: step: 230/463, loss: 0.06316595524549484 2023-01-24 02:03:48.011034: step: 232/463, loss: 0.09273193776607513 2023-01-24 02:03:48.642349: step: 234/463, loss: 0.5748989582061768 2023-01-24 02:03:49.233173: step: 236/463, loss: 0.02372279390692711 2023-01-24 02:03:49.804636: step: 238/463, loss: 0.24757127463817596 2023-01-24 02:03:50.372400: step: 240/463, loss: 0.18826375901699066 2023-01-24 02:03:51.070818: step: 242/463, loss: 0.09686724096536636 2023-01-24 02:03:51.650187: step: 244/463, loss: 0.06762459874153137 2023-01-24 02:03:52.266534: step: 246/463, loss: 0.11566980928182602 2023-01-24 02:03:52.837357: step: 248/463, loss: 0.1399739682674408 2023-01-24 02:03:53.509974: step: 250/463, loss: 0.07535848766565323 2023-01-24 02:03:54.132075: step: 252/463, loss: 0.05882122740149498 2023-01-24 02:03:54.696245: step: 254/463, loss: 0.13481780886650085 2023-01-24 02:03:55.327340: step: 256/463, loss: 0.08950658142566681 2023-01-24 02:03:56.055509: step: 258/463, loss: 0.06289654225111008 2023-01-24 02:03:56.659230: step: 260/463, loss: 0.07826128602027893 2023-01-24 02:03:57.225314: step: 262/463, loss: 0.08833647519350052 2023-01-24 02:03:57.860026: step: 264/463, loss: 0.06483607739210129 2023-01-24 02:03:58.509801: step: 266/463, loss: 0.08986536413431168 2023-01-24 02:03:59.102956: step: 268/463, loss: 0.08346419036388397 2023-01-24 02:03:59.793719: step: 270/463, loss: 0.06113533303141594 2023-01-24 02:04:00.348403: step: 272/463, loss: 0.3562765121459961 2023-01-24 02:04:00.948925: step: 274/463, loss: 1.230552077293396 2023-01-24 02:04:01.531076: step: 276/463, loss: 0.07371169328689575 2023-01-24 02:04:02.192221: step: 278/463, loss: 0.05225658044219017 2023-01-24 02:04:02.813413: step: 280/463, loss: 0.03248327970504761 2023-01-24 02:04:03.455607: step: 282/463, loss: 0.7758573293685913 2023-01-24 02:04:04.028079: step: 284/463, loss: 0.08258502185344696 2023-01-24 02:04:04.592256: step: 286/463, loss: 0.24196283519268036 2023-01-24 02:04:05.219307: step: 288/463, loss: 0.07256581634283066 2023-01-24 02:04:05.888439: step: 290/463, loss: 0.1190979927778244 2023-01-24 02:04:06.531286: step: 292/463, loss: 0.09783028066158295 2023-01-24 02:04:07.127423: step: 294/463, loss: 0.04431343078613281 2023-01-24 02:04:07.713493: step: 296/463, loss: 0.05923990160226822 2023-01-24 02:04:08.255284: step: 298/463, loss: 0.16111038625240326 2023-01-24 02:04:08.917874: step: 300/463, loss: 0.019586173817515373 2023-01-24 02:04:09.500611: step: 302/463, loss: 0.06555364280939102 2023-01-24 02:04:10.164780: step: 304/463, loss: 0.04023829847574234 2023-01-24 02:04:10.843207: step: 306/463, loss: 0.04585335776209831 2023-01-24 02:04:11.472662: step: 308/463, loss: 0.06712823361158371 2023-01-24 02:04:12.118601: step: 310/463, loss: 0.17280402779579163 2023-01-24 02:04:12.667111: step: 312/463, loss: 0.1898375302553177 2023-01-24 02:04:13.300407: step: 314/463, loss: 0.6103030443191528 2023-01-24 02:04:13.904789: step: 316/463, loss: 0.03551330044865608 2023-01-24 02:04:14.528570: step: 318/463, loss: 0.6151849031448364 2023-01-24 02:04:15.196179: step: 320/463, loss: 0.1630861908197403 2023-01-24 02:04:15.801051: step: 322/463, loss: 0.1280488818883896 2023-01-24 02:04:16.443516: step: 324/463, loss: 0.07145529985427856 2023-01-24 02:04:17.063442: step: 326/463, loss: 0.051909249275922775 2023-01-24 02:04:17.678482: step: 328/463, loss: 0.4025866389274597 2023-01-24 02:04:18.282252: step: 330/463, loss: 0.09855577349662781 2023-01-24 02:04:18.900145: step: 332/463, loss: 0.06750572472810745 2023-01-24 02:04:19.532595: step: 334/463, loss: 0.15437734127044678 2023-01-24 02:04:20.088537: step: 336/463, loss: 0.025198359042406082 2023-01-24 02:04:20.739334: step: 338/463, loss: 0.11712972074747086 2023-01-24 02:04:21.301929: step: 340/463, loss: 0.03912020102143288 2023-01-24 02:04:21.922229: step: 342/463, loss: 0.0591328926384449 2023-01-24 02:04:22.521006: step: 344/463, loss: 0.04068215563893318 2023-01-24 02:04:23.169563: step: 346/463, loss: 0.14015378057956696 2023-01-24 02:04:23.806694: step: 348/463, loss: 0.0755583867430687 2023-01-24 02:04:24.431990: step: 350/463, loss: 0.019134480506181717 2023-01-24 02:04:25.050227: step: 352/463, loss: 0.190431609749794 2023-01-24 02:04:25.686415: step: 354/463, loss: 0.0696704238653183 2023-01-24 02:04:26.232067: step: 356/463, loss: 0.1882435381412506 2023-01-24 02:04:26.892640: step: 358/463, loss: 0.16901789605617523 2023-01-24 02:04:27.544845: step: 360/463, loss: 0.9937379360198975 2023-01-24 02:04:28.223633: step: 362/463, loss: 0.024372436106204987 2023-01-24 02:04:28.830584: step: 364/463, loss: 0.07591821998357773 2023-01-24 02:04:29.448271: step: 366/463, loss: 0.21318279206752777 2023-01-24 02:04:30.087832: step: 368/463, loss: 1.2692232131958008 2023-01-24 02:04:30.700455: step: 370/463, loss: 0.0484287329018116 2023-01-24 02:04:31.278298: step: 372/463, loss: 0.16226132214069366 2023-01-24 02:04:31.901569: step: 374/463, loss: 0.0332791768014431 2023-01-24 02:04:32.548112: step: 376/463, loss: 0.0632445216178894 2023-01-24 02:04:33.157876: step: 378/463, loss: 0.020670965313911438 2023-01-24 02:04:33.797725: step: 380/463, loss: 0.06403839588165283 2023-01-24 02:04:34.470712: step: 382/463, loss: 0.034208815544843674 2023-01-24 02:04:35.193750: step: 384/463, loss: 0.0998130813241005 2023-01-24 02:04:35.785890: step: 386/463, loss: 0.04646628350019455 2023-01-24 02:04:36.376035: step: 388/463, loss: 0.03413665294647217 2023-01-24 02:04:37.001986: step: 390/463, loss: 0.04216647893190384 2023-01-24 02:04:37.662848: step: 392/463, loss: 0.06765627861022949 2023-01-24 02:04:38.275252: step: 394/463, loss: 0.19159431755542755 2023-01-24 02:04:38.886026: step: 396/463, loss: 0.1833222508430481 2023-01-24 02:04:39.553614: step: 398/463, loss: 0.05454331263899803 2023-01-24 02:04:40.190984: step: 400/463, loss: 0.03878122940659523 2023-01-24 02:04:40.917879: step: 402/463, loss: 0.12163243442773819 2023-01-24 02:04:41.540705: step: 404/463, loss: 0.08163407444953918 2023-01-24 02:04:42.109295: step: 406/463, loss: 0.08099263906478882 2023-01-24 02:04:42.677566: step: 408/463, loss: 0.0671556368470192 2023-01-24 02:04:43.301257: step: 410/463, loss: 0.3450915813446045 2023-01-24 02:04:43.899781: step: 412/463, loss: 0.031015567481517792 2023-01-24 02:04:44.542078: step: 414/463, loss: 0.16625258326530457 2023-01-24 02:04:45.188147: step: 416/463, loss: 0.18451786041259766 2023-01-24 02:04:45.879711: step: 418/463, loss: 0.16421553492546082 2023-01-24 02:04:46.442533: step: 420/463, loss: 0.41243690252304077 2023-01-24 02:04:47.037676: step: 422/463, loss: 0.18312746286392212 2023-01-24 02:04:47.661662: step: 424/463, loss: 0.1225733757019043 2023-01-24 02:04:48.257314: step: 426/463, loss: 0.05283263698220253 2023-01-24 02:04:48.859935: step: 428/463, loss: 0.025848938152194023 2023-01-24 02:04:49.555426: step: 430/463, loss: 0.07256818562746048 2023-01-24 02:04:50.153886: step: 432/463, loss: 0.14591927826404572 2023-01-24 02:04:50.802870: step: 434/463, loss: 0.1359742283821106 2023-01-24 02:04:51.464004: step: 436/463, loss: 0.28610509634017944 2023-01-24 02:04:52.092670: step: 438/463, loss: 0.016158733516931534 2023-01-24 02:04:52.693678: step: 440/463, loss: 0.13816548883914948 2023-01-24 02:04:53.538011: step: 442/463, loss: 0.7651013135910034 2023-01-24 02:04:54.177093: step: 444/463, loss: 0.3391355276107788 2023-01-24 02:04:54.766465: step: 446/463, loss: 0.08545777201652527 2023-01-24 02:04:55.406926: step: 448/463, loss: 0.39576491713523865 2023-01-24 02:04:56.075292: step: 450/463, loss: 0.1569921374320984 2023-01-24 02:04:56.700292: step: 452/463, loss: 0.03774363547563553 2023-01-24 02:04:57.397646: step: 454/463, loss: 0.14062732458114624 2023-01-24 02:04:58.029095: step: 456/463, loss: 0.12749691307544708 2023-01-24 02:04:58.644806: step: 458/463, loss: 0.1275521069765091 2023-01-24 02:04:59.264971: step: 460/463, loss: 1.315794825553894 2023-01-24 02:04:59.921893: step: 462/463, loss: 0.07987815886735916 2023-01-24 02:05:00.649512: step: 464/463, loss: 0.08078937232494354 2023-01-24 02:05:01.294428: step: 466/463, loss: 0.23071438074111938 2023-01-24 02:05:01.884445: step: 468/463, loss: 0.029704896733164787 2023-01-24 02:05:02.512059: step: 470/463, loss: 0.13874444365501404 2023-01-24 02:05:03.106504: step: 472/463, loss: 0.1677381694316864 2023-01-24 02:05:03.694798: step: 474/463, loss: 0.061464499682188034 2023-01-24 02:05:04.271245: step: 476/463, loss: 0.06742092221975327 2023-01-24 02:05:04.871872: step: 478/463, loss: 0.0413065031170845 2023-01-24 02:05:05.577708: step: 480/463, loss: 0.0989803597331047 2023-01-24 02:05:06.194212: step: 482/463, loss: 0.28596484661102295 2023-01-24 02:05:06.851118: step: 484/463, loss: 0.19452278316020966 2023-01-24 02:05:07.494208: step: 486/463, loss: 0.3935384750366211 2023-01-24 02:05:08.058741: step: 488/463, loss: 0.18445083498954773 2023-01-24 02:05:08.682421: step: 490/463, loss: 0.14163996279239655 2023-01-24 02:05:09.272037: step: 492/463, loss: 0.1293335258960724 2023-01-24 02:05:09.839952: step: 494/463, loss: 0.35400980710983276 2023-01-24 02:05:10.499439: step: 496/463, loss: 0.11709580570459366 2023-01-24 02:05:11.121493: step: 498/463, loss: 0.18629080057144165 2023-01-24 02:05:11.710587: step: 500/463, loss: 0.02366311103105545 2023-01-24 02:05:12.328223: step: 502/463, loss: 0.33345532417297363 2023-01-24 02:05:12.951741: step: 504/463, loss: 0.04570294916629791 2023-01-24 02:05:13.571958: step: 506/463, loss: 0.1046527773141861 2023-01-24 02:05:14.198145: step: 508/463, loss: 0.08069693297147751 2023-01-24 02:05:14.765718: step: 510/463, loss: 0.08436884731054306 2023-01-24 02:05:15.408690: step: 512/463, loss: 0.6150622367858887 2023-01-24 02:05:16.041418: step: 514/463, loss: 0.19387143850326538 2023-01-24 02:05:16.641884: step: 516/463, loss: 0.0584542416036129 2023-01-24 02:05:17.263318: step: 518/463, loss: 0.14595402777194977 2023-01-24 02:05:17.894723: step: 520/463, loss: 0.07883159816265106 2023-01-24 02:05:18.469010: step: 522/463, loss: 0.0434037409722805 2023-01-24 02:05:19.063403: step: 524/463, loss: 0.041953202337026596 2023-01-24 02:05:19.681447: step: 526/463, loss: 0.31554731726646423 2023-01-24 02:05:20.301841: step: 528/463, loss: 0.20852969586849213 2023-01-24 02:05:20.901501: step: 530/463, loss: 0.04690924286842346 2023-01-24 02:05:21.502322: step: 532/463, loss: 0.059421174228191376 2023-01-24 02:05:22.153575: step: 534/463, loss: 0.18664877116680145 2023-01-24 02:05:22.750865: step: 536/463, loss: 0.19141604006290436 2023-01-24 02:05:23.340023: step: 538/463, loss: 0.034036364406347275 2023-01-24 02:05:23.994486: step: 540/463, loss: 0.10108893364667892 2023-01-24 02:05:24.630661: step: 542/463, loss: 0.014809003099799156 2023-01-24 02:05:25.314456: step: 544/463, loss: 0.19320805370807648 2023-01-24 02:05:25.892045: step: 546/463, loss: 0.07855547964572906 2023-01-24 02:05:26.466110: step: 548/463, loss: 0.06752552092075348 2023-01-24 02:05:27.084844: step: 550/463, loss: 0.1004817858338356 2023-01-24 02:05:27.683476: step: 552/463, loss: 0.004086704459041357 2023-01-24 02:05:28.304114: step: 554/463, loss: 0.12679423391819 2023-01-24 02:05:28.945069: step: 556/463, loss: 0.5138322710990906 2023-01-24 02:05:29.607412: step: 558/463, loss: 0.11624351143836975 2023-01-24 02:05:30.138198: step: 560/463, loss: 0.09587419033050537 2023-01-24 02:05:30.740561: step: 562/463, loss: 0.15056060254573822 2023-01-24 02:05:31.310769: step: 564/463, loss: 0.11790279299020767 2023-01-24 02:05:31.935751: step: 566/463, loss: 0.08192232996225357 2023-01-24 02:05:32.541174: step: 568/463, loss: 0.5618078112602234 2023-01-24 02:05:33.132593: step: 570/463, loss: 0.054043881595134735 2023-01-24 02:05:33.780242: step: 572/463, loss: 0.06617221236228943 2023-01-24 02:05:34.462746: step: 574/463, loss: 0.12145736068487167 2023-01-24 02:05:35.082413: step: 576/463, loss: 0.08456163108348846 2023-01-24 02:05:35.733174: step: 578/463, loss: 0.6334165334701538 2023-01-24 02:05:36.442634: step: 580/463, loss: 0.11128795146942139 2023-01-24 02:05:37.027365: step: 582/463, loss: 0.08787459880113602 2023-01-24 02:05:37.657418: step: 584/463, loss: 0.19508840143680573 2023-01-24 02:05:38.272543: step: 586/463, loss: 0.03513340279459953 2023-01-24 02:05:38.886013: step: 588/463, loss: 0.06402891129255295 2023-01-24 02:05:39.551677: step: 590/463, loss: 0.1323527693748474 2023-01-24 02:05:40.185023: step: 592/463, loss: 0.12853868305683136 2023-01-24 02:05:40.776348: step: 594/463, loss: 0.03878480941057205 2023-01-24 02:05:41.332931: step: 596/463, loss: 0.18105536699295044 2023-01-24 02:05:41.906640: step: 598/463, loss: 0.2505473494529724 2023-01-24 02:05:42.584918: step: 600/463, loss: 0.06974760442972183 2023-01-24 02:05:43.250440: step: 602/463, loss: 0.15701879560947418 2023-01-24 02:05:43.844231: step: 604/463, loss: 0.03407521918416023 2023-01-24 02:05:44.443245: step: 606/463, loss: 0.14836502075195312 2023-01-24 02:05:45.081189: step: 608/463, loss: 0.15862950682640076 2023-01-24 02:05:45.671722: step: 610/463, loss: 0.051963452249765396 2023-01-24 02:05:46.321159: step: 612/463, loss: 0.057458147406578064 2023-01-24 02:05:46.965946: step: 614/463, loss: 0.13861650228500366 2023-01-24 02:05:47.583765: step: 616/463, loss: 0.22107192873954773 2023-01-24 02:05:48.327133: step: 618/463, loss: 0.2524477243423462 2023-01-24 02:05:49.033364: step: 620/463, loss: 0.20717266201972961 2023-01-24 02:05:49.717074: step: 622/463, loss: 0.051771584898233414 2023-01-24 02:05:50.349985: step: 624/463, loss: 0.14541447162628174 2023-01-24 02:05:51.112898: step: 626/463, loss: 0.14401060342788696 2023-01-24 02:05:51.787919: step: 628/463, loss: 0.04054912552237511 2023-01-24 02:05:52.380320: step: 630/463, loss: 0.12473101168870926 2023-01-24 02:05:52.940639: step: 632/463, loss: 0.2970418632030487 2023-01-24 02:05:53.552798: step: 634/463, loss: 0.08723095059394836 2023-01-24 02:05:54.140680: step: 636/463, loss: 0.3151036202907562 2023-01-24 02:05:54.798373: step: 638/463, loss: 0.1251479834318161 2023-01-24 02:05:55.449427: step: 640/463, loss: 0.17847107350826263 2023-01-24 02:05:55.992211: step: 642/463, loss: 0.11038211733102798 2023-01-24 02:05:56.628729: step: 644/463, loss: 0.0876292809844017 2023-01-24 02:05:57.252069: step: 646/463, loss: 0.18848489224910736 2023-01-24 02:05:57.914458: step: 648/463, loss: 0.06734482944011688 2023-01-24 02:05:58.605684: step: 650/463, loss: 0.1134590432047844 2023-01-24 02:05:59.184192: step: 652/463, loss: 0.20322325825691223 2023-01-24 02:05:59.772981: step: 654/463, loss: 0.032814424484968185 2023-01-24 02:06:00.405924: step: 656/463, loss: 0.08749209344387054 2023-01-24 02:06:01.038776: step: 658/463, loss: 0.13342177867889404 2023-01-24 02:06:01.628302: step: 660/463, loss: 0.05845380574464798 2023-01-24 02:06:02.257005: step: 662/463, loss: 0.30600234866142273 2023-01-24 02:06:02.830616: step: 664/463, loss: 0.11250080168247223 2023-01-24 02:06:03.476226: step: 666/463, loss: 0.04897581785917282 2023-01-24 02:06:04.125470: step: 668/463, loss: 0.05232224240899086 2023-01-24 02:06:04.709446: step: 670/463, loss: 0.10170705616474152 2023-01-24 02:06:05.328481: step: 672/463, loss: 0.07619114220142365 2023-01-24 02:06:06.010440: step: 674/463, loss: 0.08241769671440125 2023-01-24 02:06:06.633139: step: 676/463, loss: 0.08130361884832382 2023-01-24 02:06:07.228179: step: 678/463, loss: 0.12676593661308289 2023-01-24 02:06:07.804890: step: 680/463, loss: 0.0970408096909523 2023-01-24 02:06:08.554994: step: 682/463, loss: 0.08336395770311356 2023-01-24 02:06:09.222453: step: 684/463, loss: 1.0979692935943604 2023-01-24 02:06:09.787483: step: 686/463, loss: 0.04682242125272751 2023-01-24 02:06:10.325206: step: 688/463, loss: 0.07537610828876495 2023-01-24 02:06:10.928987: step: 690/463, loss: 0.04064637050032616 2023-01-24 02:06:11.598728: step: 692/463, loss: 0.15301351249217987 2023-01-24 02:06:12.204337: step: 694/463, loss: 0.039094872772693634 2023-01-24 02:06:12.811833: step: 696/463, loss: 0.44552844762802124 2023-01-24 02:06:13.447520: step: 698/463, loss: 0.2252071052789688 2023-01-24 02:06:14.121606: step: 700/463, loss: 0.0813409835100174 2023-01-24 02:06:14.699690: step: 702/463, loss: 0.09288883954286575 2023-01-24 02:06:15.332380: step: 704/463, loss: 0.24866166710853577 2023-01-24 02:06:15.928382: step: 706/463, loss: 0.15857137739658356 2023-01-24 02:06:16.559734: step: 708/463, loss: 0.05469042435288429 2023-01-24 02:06:17.156179: step: 710/463, loss: 0.049566250294446945 2023-01-24 02:06:17.791949: step: 712/463, loss: 0.08571276813745499 2023-01-24 02:06:18.433074: step: 714/463, loss: 0.7613111734390259 2023-01-24 02:06:19.005913: step: 716/463, loss: 0.05207202956080437 2023-01-24 02:06:19.600475: step: 718/463, loss: 0.16250920295715332 2023-01-24 02:06:20.203981: step: 720/463, loss: 0.07451052218675613 2023-01-24 02:06:20.891165: step: 722/463, loss: 0.20322565734386444 2023-01-24 02:06:21.512113: step: 724/463, loss: 0.08348311483860016 2023-01-24 02:06:22.111344: step: 726/463, loss: 0.04207231476902962 2023-01-24 02:06:22.776277: step: 728/463, loss: 0.0433342345058918 2023-01-24 02:06:23.427899: step: 730/463, loss: 0.13829071819782257 2023-01-24 02:06:24.109787: step: 732/463, loss: 0.19547854363918304 2023-01-24 02:06:24.698377: step: 734/463, loss: 0.04663119837641716 2023-01-24 02:06:25.311467: step: 736/463, loss: 0.059376638382673264 2023-01-24 02:06:25.959117: step: 738/463, loss: 0.1038469672203064 2023-01-24 02:06:26.523369: step: 740/463, loss: 0.06900583952665329 2023-01-24 02:06:27.149963: step: 742/463, loss: 0.10799318552017212 2023-01-24 02:06:27.801456: step: 744/463, loss: 1.1072874069213867 2023-01-24 02:06:28.420071: step: 746/463, loss: 0.06331072747707367 2023-01-24 02:06:29.012547: step: 748/463, loss: 0.07384846359491348 2023-01-24 02:06:29.594212: step: 750/463, loss: 0.08801267296075821 2023-01-24 02:06:30.209058: step: 752/463, loss: 0.019884739071130753 2023-01-24 02:06:30.930997: step: 754/463, loss: 0.1052960529923439 2023-01-24 02:06:31.480498: step: 756/463, loss: 0.28753241896629333 2023-01-24 02:06:32.119543: step: 758/463, loss: 0.2592926025390625 2023-01-24 02:06:32.676703: step: 760/463, loss: 0.036619726568460464 2023-01-24 02:06:33.317983: step: 762/463, loss: 0.0926491990685463 2023-01-24 02:06:33.918560: step: 764/463, loss: 0.03014616295695305 2023-01-24 02:06:34.538730: step: 766/463, loss: 0.04071695730090141 2023-01-24 02:06:35.159417: step: 768/463, loss: 0.03062344342470169 2023-01-24 02:06:35.762472: step: 770/463, loss: 0.15976634621620178 2023-01-24 02:06:36.430333: step: 772/463, loss: 0.14107783138751984 2023-01-24 02:06:37.190521: step: 774/463, loss: 0.013370687142014503 2023-01-24 02:06:37.792012: step: 776/463, loss: 0.06648870557546616 2023-01-24 02:06:38.392219: step: 778/463, loss: 0.0666055828332901 2023-01-24 02:06:38.973339: step: 780/463, loss: 1.1472997665405273 2023-01-24 02:06:39.567281: step: 782/463, loss: 0.06279977411031723 2023-01-24 02:06:40.183333: step: 784/463, loss: 0.05403710901737213 2023-01-24 02:06:40.895609: step: 786/463, loss: 0.06123936548829079 2023-01-24 02:06:41.437031: step: 788/463, loss: 0.07915898412466049 2023-01-24 02:06:42.114775: step: 790/463, loss: 0.11340490728616714 2023-01-24 02:06:42.793827: step: 792/463, loss: 0.35799869894981384 2023-01-24 02:06:43.409364: step: 794/463, loss: 0.08787037432193756 2023-01-24 02:06:44.017019: step: 796/463, loss: 0.09761966019868851 2023-01-24 02:06:44.600551: step: 798/463, loss: 0.049700889736413956 2023-01-24 02:06:45.247659: step: 800/463, loss: 0.1048021912574768 2023-01-24 02:06:45.866439: step: 802/463, loss: 0.12816712260246277 2023-01-24 02:06:46.494675: step: 804/463, loss: 0.06761758774518967 2023-01-24 02:06:47.091849: step: 806/463, loss: 0.32025444507598877 2023-01-24 02:06:47.761452: step: 808/463, loss: 0.15245705842971802 2023-01-24 02:06:48.360095: step: 810/463, loss: 0.3774135708808899 2023-01-24 02:06:48.965256: step: 812/463, loss: 0.11960123479366302 2023-01-24 02:06:49.637595: step: 814/463, loss: 0.2792623043060303 2023-01-24 02:06:50.280240: step: 816/463, loss: 0.11186840385198593 2023-01-24 02:06:50.878671: step: 818/463, loss: 0.3528594374656677 2023-01-24 02:06:51.560084: step: 820/463, loss: 0.0696972981095314 2023-01-24 02:06:52.217993: step: 822/463, loss: 0.03897329792380333 2023-01-24 02:06:52.828117: step: 824/463, loss: 0.09429900348186493 2023-01-24 02:06:53.466004: step: 826/463, loss: 0.29325351119041443 2023-01-24 02:06:54.179559: step: 828/463, loss: 0.1400083750486374 2023-01-24 02:06:54.799731: step: 830/463, loss: 1.169524073600769 2023-01-24 02:06:55.430369: step: 832/463, loss: 0.29024481773376465 2023-01-24 02:06:56.097600: step: 834/463, loss: 0.20523470640182495 2023-01-24 02:06:56.711668: step: 836/463, loss: 0.8502233624458313 2023-01-24 02:06:57.336219: step: 838/463, loss: 0.13381369411945343 2023-01-24 02:06:57.885175: step: 840/463, loss: 0.07221262902021408 2023-01-24 02:06:58.557359: step: 842/463, loss: 0.05190090835094452 2023-01-24 02:06:59.168608: step: 844/463, loss: 0.054994385689496994 2023-01-24 02:06:59.850382: step: 846/463, loss: 0.12340638786554337 2023-01-24 02:07:00.527013: step: 848/463, loss: 0.07367422431707382 2023-01-24 02:07:01.165954: step: 850/463, loss: 0.06786884367465973 2023-01-24 02:07:01.777193: step: 852/463, loss: 0.10432659834623337 2023-01-24 02:07:02.398899: step: 854/463, loss: 0.13188181817531586 2023-01-24 02:07:03.075567: step: 856/463, loss: 0.24995963275432587 2023-01-24 02:07:03.657220: step: 858/463, loss: 0.18584007024765015 2023-01-24 02:07:04.382067: step: 860/463, loss: 0.044459499418735504 2023-01-24 02:07:04.984277: step: 862/463, loss: 0.20221099257469177 2023-01-24 02:07:05.589608: step: 864/463, loss: 0.03824824467301369 2023-01-24 02:07:06.265871: step: 866/463, loss: 0.11863574385643005 2023-01-24 02:07:06.882493: step: 868/463, loss: 0.12545019388198853 2023-01-24 02:07:07.432796: step: 870/463, loss: 0.21239539980888367 2023-01-24 02:07:08.025293: step: 872/463, loss: 0.20869790017604828 2023-01-24 02:07:08.645571: step: 874/463, loss: 0.37494558095932007 2023-01-24 02:07:09.252572: step: 876/463, loss: 0.1598743051290512 2023-01-24 02:07:09.922463: step: 878/463, loss: 0.05281732976436615 2023-01-24 02:07:10.611758: step: 880/463, loss: 0.546518862247467 2023-01-24 02:07:11.202805: step: 882/463, loss: 0.09117013216018677 2023-01-24 02:07:11.814122: step: 884/463, loss: 0.24250690639019012 2023-01-24 02:07:12.481732: step: 886/463, loss: 0.1971885710954666 2023-01-24 02:07:13.091442: step: 888/463, loss: 0.09749986231327057 2023-01-24 02:07:13.712578: step: 890/463, loss: 0.09703745692968369 2023-01-24 02:07:14.318493: step: 892/463, loss: 0.5026171803474426 2023-01-24 02:07:14.946414: step: 894/463, loss: 0.26787233352661133 2023-01-24 02:07:15.607016: step: 896/463, loss: 0.04004192352294922 2023-01-24 02:07:16.265066: step: 898/463, loss: 0.08530757576227188 2023-01-24 02:07:16.879120: step: 900/463, loss: 0.08850790560245514 2023-01-24 02:07:17.530318: step: 902/463, loss: 0.08501417934894562 2023-01-24 02:07:18.242457: step: 904/463, loss: 0.06405603885650635 2023-01-24 02:07:18.804366: step: 906/463, loss: 0.023974983021616936 2023-01-24 02:07:19.452817: step: 908/463, loss: 0.1296330839395523 2023-01-24 02:07:20.102884: step: 910/463, loss: 0.08433392643928528 2023-01-24 02:07:20.667008: step: 912/463, loss: 0.028403690084815025 2023-01-24 02:07:21.295305: step: 914/463, loss: 0.16750521957874298 2023-01-24 02:07:21.900572: step: 916/463, loss: 0.07274569571018219 2023-01-24 02:07:22.563775: step: 918/463, loss: 0.2535453736782074 2023-01-24 02:07:23.195868: step: 920/463, loss: 0.039651405066251755 2023-01-24 02:07:23.827227: step: 922/463, loss: 0.03896435722708702 2023-01-24 02:07:24.491863: step: 924/463, loss: 0.5061761140823364 2023-01-24 02:07:25.134606: step: 926/463, loss: 0.2580636441707611 ================================================== Loss: 0.165 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3113089390962672, 'r': 0.3001065340909091, 'f1': 0.30560511089681774}, 'combined': 0.225182713292392, 'epoch': 15} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3570838972372597, 'r': 0.30375521040095277, 'f1': 0.32826778283066627}, 'combined': 0.2309421587753431, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30817978087649406, 'r': 0.2930042613636364, 'f1': 0.30040048543689324}, 'combined': 0.221347726111395, 'epoch': 15} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36142543345829437, 'r': 0.30618573838824936, 'f1': 0.3315202557489792}, 'combined': 0.2353793815817752, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32758580347508204, 'r': 0.31018086515003024, 'f1': 0.318645840027419}, 'combined': 0.23479167159915085, 'epoch': 15} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3652422420448443, 'r': 0.2858140164822537, 'f1': 0.320683046420559}, 'combined': 0.2276849629585969, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2620614035087719, 'r': 0.2845238095238095, 'f1': 0.27283105022831045}, 'combined': 0.18188736681887363, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27205882352941174, 'r': 0.40217391304347827, 'f1': 0.32456140350877194}, 'combined': 0.16228070175438597, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:10:03.781207: step: 2/463, loss: 0.2913963496685028 2023-01-24 02:10:04.380115: step: 4/463, loss: 0.04182754084467888 2023-01-24 02:10:04.992216: step: 6/463, loss: 0.05101656913757324 2023-01-24 02:10:05.573303: step: 8/463, loss: 0.02856874279677868 2023-01-24 02:10:06.138162: step: 10/463, loss: 0.06305862218141556 2023-01-24 02:10:06.729106: step: 12/463, loss: 0.1590539962053299 2023-01-24 02:10:07.311663: step: 14/463, loss: 0.07042015343904495 2023-01-24 02:10:07.964531: step: 16/463, loss: 0.034858863800764084 2023-01-24 02:10:08.556494: step: 18/463, loss: 0.054131049662828445 2023-01-24 02:10:09.252795: step: 20/463, loss: 0.05718056112527847 2023-01-24 02:10:09.843032: step: 22/463, loss: 0.7192906737327576 2023-01-24 02:10:10.486191: step: 24/463, loss: 0.05905047804117203 2023-01-24 02:10:11.127534: step: 26/463, loss: 0.2823033332824707 2023-01-24 02:10:11.745497: step: 28/463, loss: 0.09095868468284607 2023-01-24 02:10:12.395834: step: 30/463, loss: 0.10659557580947876 2023-01-24 02:10:12.995463: step: 32/463, loss: 0.1005793958902359 2023-01-24 02:10:13.735285: step: 34/463, loss: 0.08173109591007233 2023-01-24 02:10:14.384284: step: 36/463, loss: 0.050920549780130386 2023-01-24 02:10:14.968832: step: 38/463, loss: 0.023472312837839127 2023-01-24 02:10:15.646956: step: 40/463, loss: 0.12075196951627731 2023-01-24 02:10:16.353537: step: 42/463, loss: 0.11275298148393631 2023-01-24 02:10:17.014093: step: 44/463, loss: 1.3385741710662842 2023-01-24 02:10:17.680956: step: 46/463, loss: 0.3201028108596802 2023-01-24 02:10:18.356409: step: 48/463, loss: 0.1128128170967102 2023-01-24 02:10:18.973932: step: 50/463, loss: 0.08292372524738312 2023-01-24 02:10:19.574418: step: 52/463, loss: 0.06245117262005806 2023-01-24 02:10:20.217158: step: 54/463, loss: 0.06557394564151764 2023-01-24 02:10:20.850738: step: 56/463, loss: 0.13119153678417206 2023-01-24 02:10:21.497907: step: 58/463, loss: 0.14556430280208588 2023-01-24 02:10:22.139510: step: 60/463, loss: 0.06358668953180313 2023-01-24 02:10:22.825154: step: 62/463, loss: 0.14330533146858215 2023-01-24 02:10:23.440735: step: 64/463, loss: 0.06443493068218231 2023-01-24 02:10:24.052048: step: 66/463, loss: 0.11028265953063965 2023-01-24 02:10:24.674656: step: 68/463, loss: 0.7357125282287598 2023-01-24 02:10:25.338979: step: 70/463, loss: 6.936621189117432 2023-01-24 02:10:25.934995: step: 72/463, loss: 0.03837861493229866 2023-01-24 02:10:26.520135: step: 74/463, loss: 0.049534283578395844 2023-01-24 02:10:27.143487: step: 76/463, loss: 0.6876099705696106 2023-01-24 02:10:27.821603: step: 78/463, loss: 0.1378263533115387 2023-01-24 02:10:28.566498: step: 80/463, loss: 0.02922586351633072 2023-01-24 02:10:29.136531: step: 82/463, loss: 0.05071862041950226 2023-01-24 02:10:29.831169: step: 84/463, loss: 0.08514934778213501 2023-01-24 02:10:30.546957: step: 86/463, loss: 0.13848863542079926 2023-01-24 02:10:31.198822: step: 88/463, loss: 0.9525688886642456 2023-01-24 02:10:31.831740: step: 90/463, loss: 0.05686838924884796 2023-01-24 02:10:32.437672: step: 92/463, loss: 0.08867298811674118 2023-01-24 02:10:33.045885: step: 94/463, loss: 0.032017968595027924 2023-01-24 02:10:33.632511: step: 96/463, loss: 0.22102946043014526 2023-01-24 02:10:34.318498: step: 98/463, loss: 0.07132144272327423 2023-01-24 02:10:34.878996: step: 100/463, loss: 0.00511888274922967 2023-01-24 02:10:35.507158: step: 102/463, loss: 0.025969967246055603 2023-01-24 02:10:36.086047: step: 104/463, loss: 0.09879129379987717 2023-01-24 02:10:36.741884: step: 106/463, loss: 0.08674966543912888 2023-01-24 02:10:37.310411: step: 108/463, loss: 0.0846218466758728 2023-01-24 02:10:37.905660: step: 110/463, loss: 0.05746942013502121 2023-01-24 02:10:38.541811: step: 112/463, loss: 0.05283408239483833 2023-01-24 02:10:39.155519: step: 114/463, loss: 0.09668025374412537 2023-01-24 02:10:39.730186: step: 116/463, loss: 0.7979894280433655 2023-01-24 02:10:40.320324: step: 118/463, loss: 0.2613188326358795 2023-01-24 02:10:40.943691: step: 120/463, loss: 0.18348784744739532 2023-01-24 02:10:41.627848: step: 122/463, loss: 0.10874827951192856 2023-01-24 02:10:42.240546: step: 124/463, loss: 0.019415101036429405 2023-01-24 02:10:42.817593: step: 126/463, loss: 0.6263710260391235 2023-01-24 02:10:43.393992: step: 128/463, loss: 0.027255920693278313 2023-01-24 02:10:44.011053: step: 130/463, loss: 0.1701149344444275 2023-01-24 02:10:44.738642: step: 132/463, loss: 0.0449967123568058 2023-01-24 02:10:45.405470: step: 134/463, loss: 0.051819611340761185 2023-01-24 02:10:46.073259: step: 136/463, loss: 0.15255579352378845 2023-01-24 02:10:46.672534: step: 138/463, loss: 0.016846580430865288 2023-01-24 02:10:47.356997: step: 140/463, loss: 0.4620560109615326 2023-01-24 02:10:47.916915: step: 142/463, loss: 0.1192578375339508 2023-01-24 02:10:48.558352: step: 144/463, loss: 0.20602180063724518 2023-01-24 02:10:49.164952: step: 146/463, loss: 0.05257246643304825 2023-01-24 02:10:49.837528: step: 148/463, loss: 0.8619434833526611 2023-01-24 02:10:50.525250: step: 150/463, loss: 0.08631691336631775 2023-01-24 02:10:51.164638: step: 152/463, loss: 0.07517030835151672 2023-01-24 02:10:51.795937: step: 154/463, loss: 0.32205599546432495 2023-01-24 02:10:52.426976: step: 156/463, loss: 0.24659520387649536 2023-01-24 02:10:53.049310: step: 158/463, loss: 0.028219208121299744 2023-01-24 02:10:53.718930: step: 160/463, loss: 0.11022521555423737 2023-01-24 02:10:54.318804: step: 162/463, loss: 0.09095776826143265 2023-01-24 02:10:54.888951: step: 164/463, loss: 0.12615244090557098 2023-01-24 02:10:55.497531: step: 166/463, loss: 0.3455350995063782 2023-01-24 02:10:56.235780: step: 168/463, loss: 0.057068292051553726 2023-01-24 02:10:56.847432: step: 170/463, loss: 0.06072228029370308 2023-01-24 02:10:57.473095: step: 172/463, loss: 0.11259964108467102 2023-01-24 02:10:58.071374: step: 174/463, loss: 0.14218805730342865 2023-01-24 02:10:58.688637: step: 176/463, loss: 0.11040753126144409 2023-01-24 02:10:59.253146: step: 178/463, loss: 0.02022375352680683 2023-01-24 02:10:59.828707: step: 180/463, loss: 0.02123785950243473 2023-01-24 02:11:00.455155: step: 182/463, loss: 0.04386677220463753 2023-01-24 02:11:01.125449: step: 184/463, loss: 0.04755381867289543 2023-01-24 02:11:01.813113: step: 186/463, loss: 0.0792277380824089 2023-01-24 02:11:02.432867: step: 188/463, loss: 0.07259953767061234 2023-01-24 02:11:03.071489: step: 190/463, loss: 0.10129690915346146 2023-01-24 02:11:03.688627: step: 192/463, loss: 0.07336320728063583 2023-01-24 02:11:04.312918: step: 194/463, loss: 0.2157449722290039 2023-01-24 02:11:04.941192: step: 196/463, loss: 0.10400427877902985 2023-01-24 02:11:05.505248: step: 198/463, loss: 0.008940343745052814 2023-01-24 02:11:06.118509: step: 200/463, loss: 0.07371383905410767 2023-01-24 02:11:06.728966: step: 202/463, loss: 0.07834586501121521 2023-01-24 02:11:07.313433: step: 204/463, loss: 0.05166313424706459 2023-01-24 02:11:07.937617: step: 206/463, loss: 0.14689359068870544 2023-01-24 02:11:08.576546: step: 208/463, loss: 0.22977465391159058 2023-01-24 02:11:09.214980: step: 210/463, loss: 0.12805192172527313 2023-01-24 02:11:09.844601: step: 212/463, loss: 0.050497692078351974 2023-01-24 02:11:10.506594: step: 214/463, loss: 0.0565444752573967 2023-01-24 02:11:11.100675: step: 216/463, loss: 0.22361978888511658 2023-01-24 02:11:11.683605: step: 218/463, loss: 0.40682244300842285 2023-01-24 02:11:12.414837: step: 220/463, loss: 0.0518532358109951 2023-01-24 02:11:12.975617: step: 222/463, loss: 0.3018381893634796 2023-01-24 02:11:13.601654: step: 224/463, loss: 0.2839329242706299 2023-01-24 02:11:14.252837: step: 226/463, loss: 0.03541671857237816 2023-01-24 02:11:14.959547: step: 228/463, loss: 0.15210461616516113 2023-01-24 02:11:15.592704: step: 230/463, loss: 0.5781492590904236 2023-01-24 02:11:16.231385: step: 232/463, loss: 0.06265892833471298 2023-01-24 02:11:16.914799: step: 234/463, loss: 0.14596182107925415 2023-01-24 02:11:17.464817: step: 236/463, loss: 0.7924005389213562 2023-01-24 02:11:18.050307: step: 238/463, loss: 0.08466563373804092 2023-01-24 02:11:18.643666: step: 240/463, loss: 0.05961338058114052 2023-01-24 02:11:19.291084: step: 242/463, loss: 0.032194651663303375 2023-01-24 02:11:19.933526: step: 244/463, loss: 0.10812166333198547 2023-01-24 02:11:20.583038: step: 246/463, loss: 0.17903758585453033 2023-01-24 02:11:21.193477: step: 248/463, loss: 0.07014741748571396 2023-01-24 02:11:21.753915: step: 250/463, loss: 0.11321991682052612 2023-01-24 02:11:22.355172: step: 252/463, loss: 0.11420640349388123 2023-01-24 02:11:23.044888: step: 254/463, loss: 0.2799254357814789 2023-01-24 02:11:23.621104: step: 256/463, loss: 0.08883524686098099 2023-01-24 02:11:24.205745: step: 258/463, loss: 0.09584785252809525 2023-01-24 02:11:24.883970: step: 260/463, loss: 0.014304104261100292 2023-01-24 02:11:25.528054: step: 262/463, loss: 0.14867983758449554 2023-01-24 02:11:26.128005: step: 264/463, loss: 0.2609354555606842 2023-01-24 02:11:26.703015: step: 266/463, loss: 0.1269156038761139 2023-01-24 02:11:27.310183: step: 268/463, loss: 0.0609101839363575 2023-01-24 02:11:27.942082: step: 270/463, loss: 0.03929482027888298 2023-01-24 02:11:28.503746: step: 272/463, loss: 0.1015809178352356 2023-01-24 02:11:29.137125: step: 274/463, loss: 0.03814343735575676 2023-01-24 02:11:29.760602: step: 276/463, loss: 0.23390695452690125 2023-01-24 02:11:30.367984: step: 278/463, loss: 0.021549632772803307 2023-01-24 02:11:30.998518: step: 280/463, loss: 0.04204830154776573 2023-01-24 02:11:31.655279: step: 282/463, loss: 0.7377480268478394 2023-01-24 02:11:32.294132: step: 284/463, loss: 0.0883864164352417 2023-01-24 02:11:32.928491: step: 286/463, loss: 0.03172440454363823 2023-01-24 02:11:33.560086: step: 288/463, loss: 0.027946872636675835 2023-01-24 02:11:34.142382: step: 290/463, loss: 0.07397580146789551 2023-01-24 02:11:34.795584: step: 292/463, loss: 0.22440029680728912 2023-01-24 02:11:35.357521: step: 294/463, loss: 0.06464869529008865 2023-01-24 02:11:36.015114: step: 296/463, loss: 0.16162537038326263 2023-01-24 02:11:36.633497: step: 298/463, loss: 0.20214436948299408 2023-01-24 02:11:37.245923: step: 300/463, loss: 0.11136691272258759 2023-01-24 02:11:37.829182: step: 302/463, loss: 0.051797281950712204 2023-01-24 02:11:38.439511: step: 304/463, loss: 0.5902112722396851 2023-01-24 02:11:39.009825: step: 306/463, loss: 0.04924645647406578 2023-01-24 02:11:39.667221: step: 308/463, loss: 0.02982781082391739 2023-01-24 02:11:40.285068: step: 310/463, loss: 0.26291364431381226 2023-01-24 02:11:40.971728: step: 312/463, loss: 0.8258624076843262 2023-01-24 02:11:41.565248: step: 314/463, loss: 0.026051459833979607 2023-01-24 02:11:42.230727: step: 316/463, loss: 0.02704383060336113 2023-01-24 02:11:42.844310: step: 318/463, loss: 0.04543539881706238 2023-01-24 02:11:43.378780: step: 320/463, loss: 0.019694460555911064 2023-01-24 02:11:43.984426: step: 322/463, loss: 0.05132810398936272 2023-01-24 02:11:44.593132: step: 324/463, loss: 0.09404495358467102 2023-01-24 02:11:45.203374: step: 326/463, loss: 0.032932598143815994 2023-01-24 02:11:45.891025: step: 328/463, loss: 0.2734318673610687 2023-01-24 02:11:46.453732: step: 330/463, loss: 0.23885181546211243 2023-01-24 02:11:47.109254: step: 332/463, loss: 0.11816208064556122 2023-01-24 02:11:47.688862: step: 334/463, loss: 0.06376126408576965 2023-01-24 02:11:48.328859: step: 336/463, loss: 0.011529277078807354 2023-01-24 02:11:48.935292: step: 338/463, loss: 0.11591578274965286 2023-01-24 02:11:49.612369: step: 340/463, loss: 0.07801269739866257 2023-01-24 02:11:50.343869: step: 342/463, loss: 0.02974596433341503 2023-01-24 02:11:50.950613: step: 344/463, loss: 0.28842779994010925 2023-01-24 02:11:51.741572: step: 346/463, loss: 0.061843808740377426 2023-01-24 02:11:52.375142: step: 348/463, loss: 0.03825025260448456 2023-01-24 02:11:53.064075: step: 350/463, loss: 0.05216598138213158 2023-01-24 02:11:53.703788: step: 352/463, loss: 0.15878503024578094 2023-01-24 02:11:54.314159: step: 354/463, loss: 0.08859121799468994 2023-01-24 02:11:54.899422: step: 356/463, loss: 0.051707178354263306 2023-01-24 02:11:55.444318: step: 358/463, loss: 0.10792721062898636 2023-01-24 02:11:56.076608: step: 360/463, loss: 0.14859209954738617 2023-01-24 02:11:56.689531: step: 362/463, loss: 0.08033035695552826 2023-01-24 02:11:57.306865: step: 364/463, loss: 0.009696626104414463 2023-01-24 02:11:58.007736: step: 366/463, loss: 0.050398990511894226 2023-01-24 02:11:58.630990: step: 368/463, loss: 0.09118001908063889 2023-01-24 02:11:59.249574: step: 370/463, loss: 0.9347143173217773 2023-01-24 02:11:59.902119: step: 372/463, loss: 0.062182530760765076 2023-01-24 02:12:00.545011: step: 374/463, loss: 0.06313618272542953 2023-01-24 02:12:01.137448: step: 376/463, loss: 0.10903055965900421 2023-01-24 02:12:01.827339: step: 378/463, loss: 0.0948883593082428 2023-01-24 02:12:02.499802: step: 380/463, loss: 0.03903518617153168 2023-01-24 02:12:03.111220: step: 382/463, loss: 0.0341244712471962 2023-01-24 02:12:03.720664: step: 384/463, loss: 0.2753047049045563 2023-01-24 02:12:04.427839: step: 386/463, loss: 2.3943796157836914 2023-01-24 02:12:05.054252: step: 388/463, loss: 0.09915794432163239 2023-01-24 02:12:05.702564: step: 390/463, loss: 0.05849439650774002 2023-01-24 02:12:06.338114: step: 392/463, loss: 0.18315984308719635 2023-01-24 02:12:06.906016: step: 394/463, loss: 2.675945281982422 2023-01-24 02:12:07.533561: step: 396/463, loss: 0.03818321228027344 2023-01-24 02:12:08.173891: step: 398/463, loss: 0.16692696511745453 2023-01-24 02:12:08.878466: step: 400/463, loss: 0.09519656747579575 2023-01-24 02:12:09.481501: step: 402/463, loss: 0.321374773979187 2023-01-24 02:12:10.158733: step: 404/463, loss: 0.3765060007572174 2023-01-24 02:12:10.731316: step: 406/463, loss: 0.7029393315315247 2023-01-24 02:12:11.275740: step: 408/463, loss: 0.10345228761434555 2023-01-24 02:12:11.895428: step: 410/463, loss: 0.05593247711658478 2023-01-24 02:12:12.513643: step: 412/463, loss: 0.28402405977249146 2023-01-24 02:12:13.166853: step: 414/463, loss: 0.03231664001941681 2023-01-24 02:12:13.751670: step: 416/463, loss: 0.09443497657775879 2023-01-24 02:12:14.330390: step: 418/463, loss: 0.055081263184547424 2023-01-24 02:12:14.928782: step: 420/463, loss: 0.282522976398468 2023-01-24 02:12:15.508717: step: 422/463, loss: 0.22225913405418396 2023-01-24 02:12:16.166003: step: 424/463, loss: 0.021947884932160378 2023-01-24 02:12:16.783565: step: 426/463, loss: 0.48614659905433655 2023-01-24 02:12:17.397226: step: 428/463, loss: 0.35980114340782166 2023-01-24 02:12:18.018688: step: 430/463, loss: 0.1040845587849617 2023-01-24 02:12:18.607122: step: 432/463, loss: 0.0753161609172821 2023-01-24 02:12:19.222609: step: 434/463, loss: 0.055321697145700455 2023-01-24 02:12:19.848770: step: 436/463, loss: 0.3290179371833801 2023-01-24 02:12:20.444327: step: 438/463, loss: 0.1040557324886322 2023-01-24 02:12:21.056163: step: 440/463, loss: 0.0917719379067421 2023-01-24 02:12:21.656232: step: 442/463, loss: 0.15578919649124146 2023-01-24 02:12:22.246454: step: 444/463, loss: 0.09460049122571945 2023-01-24 02:12:22.846190: step: 446/463, loss: 0.12242695689201355 2023-01-24 02:12:23.440820: step: 448/463, loss: 0.0048758587799966335 2023-01-24 02:12:24.126129: step: 450/463, loss: 0.1127769872546196 2023-01-24 02:12:24.810638: step: 452/463, loss: 0.18970920145511627 2023-01-24 02:12:25.364023: step: 454/463, loss: 0.03840430825948715 2023-01-24 02:12:25.927659: step: 456/463, loss: 0.02626114711165428 2023-01-24 02:12:26.486291: step: 458/463, loss: 0.03433183580636978 2023-01-24 02:12:27.058708: step: 460/463, loss: 0.0974435806274414 2023-01-24 02:12:27.781510: step: 462/463, loss: 0.05579617992043495 2023-01-24 02:12:28.386298: step: 464/463, loss: 0.2703886330127716 2023-01-24 02:12:29.006938: step: 466/463, loss: 0.2712680697441101 2023-01-24 02:12:29.566487: step: 468/463, loss: 0.01869746670126915 2023-01-24 02:12:30.177533: step: 470/463, loss: 0.06799181550741196 2023-01-24 02:12:30.798502: step: 472/463, loss: 1.1552932262420654 2023-01-24 02:12:31.400366: step: 474/463, loss: 0.19610624015331268 2023-01-24 02:12:32.062340: step: 476/463, loss: 0.24501655995845795 2023-01-24 02:12:32.701045: step: 478/463, loss: 0.030422119423747063 2023-01-24 02:12:33.330758: step: 480/463, loss: 0.12276224792003632 2023-01-24 02:12:33.974719: step: 482/463, loss: 0.044159166514873505 2023-01-24 02:12:34.587071: step: 484/463, loss: 0.12751327455043793 2023-01-24 02:12:35.166914: step: 486/463, loss: 1.0999943017959595 2023-01-24 02:12:35.781813: step: 488/463, loss: 0.16189442574977875 2023-01-24 02:12:36.402128: step: 490/463, loss: 0.07988111674785614 2023-01-24 02:12:37.054230: step: 492/463, loss: 0.17654818296432495 2023-01-24 02:12:37.715303: step: 494/463, loss: 0.12888847291469574 2023-01-24 02:12:38.294805: step: 496/463, loss: 0.04206554964184761 2023-01-24 02:12:38.881936: step: 498/463, loss: 0.059632062911987305 2023-01-24 02:12:39.496061: step: 500/463, loss: 0.561504065990448 2023-01-24 02:12:40.097936: step: 502/463, loss: 0.39858028292655945 2023-01-24 02:12:40.764311: step: 504/463, loss: 0.11772263050079346 2023-01-24 02:12:41.406684: step: 506/463, loss: 0.08277016133069992 2023-01-24 02:12:42.020292: step: 508/463, loss: 0.05144251883029938 2023-01-24 02:12:42.569008: step: 510/463, loss: 0.14916586875915527 2023-01-24 02:12:43.138017: step: 512/463, loss: 0.11689009517431259 2023-01-24 02:12:43.798149: step: 514/463, loss: 0.03516891226172447 2023-01-24 02:12:44.424303: step: 516/463, loss: 0.04014124721288681 2023-01-24 02:12:44.961144: step: 518/463, loss: 0.03278473764657974 2023-01-24 02:12:45.542103: step: 520/463, loss: 0.058791108429431915 2023-01-24 02:12:46.181613: step: 522/463, loss: 0.05252000316977501 2023-01-24 02:12:46.825910: step: 524/463, loss: 0.07872029393911362 2023-01-24 02:12:47.383651: step: 526/463, loss: 0.1172046884894371 2023-01-24 02:12:48.049363: step: 528/463, loss: 0.29004091024398804 2023-01-24 02:12:48.614007: step: 530/463, loss: 0.15440554916858673 2023-01-24 02:12:49.258378: step: 532/463, loss: 0.5038001537322998 2023-01-24 02:12:49.840057: step: 534/463, loss: 0.30311453342437744 2023-01-24 02:12:50.450700: step: 536/463, loss: 0.07705842703580856 2023-01-24 02:12:50.999289: step: 538/463, loss: 0.04890380799770355 2023-01-24 02:12:51.616795: step: 540/463, loss: 0.08820336312055588 2023-01-24 02:12:52.279504: step: 542/463, loss: 0.07475754618644714 2023-01-24 02:12:52.873671: step: 544/463, loss: 0.046591755002737045 2023-01-24 02:12:53.440338: step: 546/463, loss: 0.006303261965513229 2023-01-24 02:12:54.075372: step: 548/463, loss: 0.304513156414032 2023-01-24 02:12:54.700458: step: 550/463, loss: 0.33593547344207764 2023-01-24 02:12:55.288751: step: 552/463, loss: 0.06830251961946487 2023-01-24 02:12:55.907732: step: 554/463, loss: 0.06998834013938904 2023-01-24 02:12:56.466904: step: 556/463, loss: 0.030180655419826508 2023-01-24 02:12:57.076719: step: 558/463, loss: 0.05729088559746742 2023-01-24 02:12:57.654834: step: 560/463, loss: 0.06597680598497391 2023-01-24 02:12:58.276020: step: 562/463, loss: 0.06604691594839096 2023-01-24 02:12:58.912397: step: 564/463, loss: 0.0838196873664856 2023-01-24 02:12:59.530574: step: 566/463, loss: 0.06589663773775101 2023-01-24 02:13:00.125806: step: 568/463, loss: 0.13239498436450958 2023-01-24 02:13:00.789010: step: 570/463, loss: 0.1035396009683609 2023-01-24 02:13:01.376123: step: 572/463, loss: 0.05458400025963783 2023-01-24 02:13:01.968266: step: 574/463, loss: 0.8319043517112732 2023-01-24 02:13:02.566735: step: 576/463, loss: 0.0764361023902893 2023-01-24 02:13:03.385432: step: 578/463, loss: 0.039345718920230865 2023-01-24 02:13:03.975691: step: 580/463, loss: 0.08117703348398209 2023-01-24 02:13:04.595998: step: 582/463, loss: 0.05374583974480629 2023-01-24 02:13:05.207469: step: 584/463, loss: 0.14211954176425934 2023-01-24 02:13:05.806687: step: 586/463, loss: 0.9832000136375427 2023-01-24 02:13:06.441822: step: 588/463, loss: 0.023922232910990715 2023-01-24 02:13:07.134135: step: 590/463, loss: 0.061450205743312836 2023-01-24 02:13:07.758883: step: 592/463, loss: 0.16145987808704376 2023-01-24 02:13:08.389760: step: 594/463, loss: 0.08633865416049957 2023-01-24 02:13:09.083166: step: 596/463, loss: 0.03697962313890457 2023-01-24 02:13:09.634212: step: 598/463, loss: 0.08412707597017288 2023-01-24 02:13:10.266828: step: 600/463, loss: 0.18807554244995117 2023-01-24 02:13:10.862135: step: 602/463, loss: 0.29362112283706665 2023-01-24 02:13:11.492149: step: 604/463, loss: 0.029087388888001442 2023-01-24 02:13:12.207237: step: 606/463, loss: 0.07219064980745316 2023-01-24 02:13:12.833363: step: 608/463, loss: 0.07840555906295776 2023-01-24 02:13:13.488183: step: 610/463, loss: 0.07978782802820206 2023-01-24 02:13:14.058765: step: 612/463, loss: 0.08097044378519058 2023-01-24 02:13:14.689524: step: 614/463, loss: 0.038549017161130905 2023-01-24 02:13:15.282130: step: 616/463, loss: 0.06934382766485214 2023-01-24 02:13:15.943377: step: 618/463, loss: 0.038497116416692734 2023-01-24 02:13:16.564136: step: 620/463, loss: 0.14110159873962402 2023-01-24 02:13:17.209928: step: 622/463, loss: 0.04984660446643829 2023-01-24 02:13:17.819961: step: 624/463, loss: 0.018314948305487633 2023-01-24 02:13:18.522742: step: 626/463, loss: 0.43215423822402954 2023-01-24 02:13:19.164550: step: 628/463, loss: 0.05763465538620949 2023-01-24 02:13:19.782658: step: 630/463, loss: 0.034755803644657135 2023-01-24 02:13:20.357622: step: 632/463, loss: 0.04421640932559967 2023-01-24 02:13:20.980014: step: 634/463, loss: 0.11227025091648102 2023-01-24 02:13:21.557520: step: 636/463, loss: 0.015532471239566803 2023-01-24 02:13:22.157253: step: 638/463, loss: 0.09059596061706543 2023-01-24 02:13:22.743068: step: 640/463, loss: 0.07333651185035706 2023-01-24 02:13:23.335663: step: 642/463, loss: 0.05921321362257004 2023-01-24 02:13:23.982691: step: 644/463, loss: 0.10366473346948624 2023-01-24 02:13:24.636990: step: 646/463, loss: 0.12462721019983292 2023-01-24 02:13:25.243984: step: 648/463, loss: 0.04913271218538284 2023-01-24 02:13:25.855003: step: 650/463, loss: 0.11728756129741669 2023-01-24 02:13:26.538069: step: 652/463, loss: 0.06504454463720322 2023-01-24 02:13:27.172990: step: 654/463, loss: 0.11987568438053131 2023-01-24 02:13:27.799003: step: 656/463, loss: 0.06708894670009613 2023-01-24 02:13:28.384536: step: 658/463, loss: 0.016766754910349846 2023-01-24 02:13:29.027586: step: 660/463, loss: 0.0872633159160614 2023-01-24 02:13:29.689554: step: 662/463, loss: 0.08582579344511032 2023-01-24 02:13:30.313659: step: 664/463, loss: 0.09649263322353363 2023-01-24 02:13:30.970714: step: 666/463, loss: 0.07609466463327408 2023-01-24 02:13:31.644155: step: 668/463, loss: 0.04966834932565689 2023-01-24 02:13:32.316517: step: 670/463, loss: 0.04317600652575493 2023-01-24 02:13:32.883714: step: 672/463, loss: 0.3034360110759735 2023-01-24 02:13:33.496954: step: 674/463, loss: 0.13160070776939392 2023-01-24 02:13:34.120885: step: 676/463, loss: 0.00912652537226677 2023-01-24 02:13:34.742719: step: 678/463, loss: 0.06286082416772842 2023-01-24 02:13:35.421285: step: 680/463, loss: 0.22653363645076752 2023-01-24 02:13:36.030793: step: 682/463, loss: 0.16736716032028198 2023-01-24 02:13:36.698664: step: 684/463, loss: 0.04976850748062134 2023-01-24 02:13:37.327727: step: 686/463, loss: 0.05625550448894501 2023-01-24 02:13:37.962030: step: 688/463, loss: 0.07868002355098724 2023-01-24 02:13:38.564191: step: 690/463, loss: 0.03424704819917679 2023-01-24 02:13:39.169911: step: 692/463, loss: 0.048810943961143494 2023-01-24 02:13:39.813915: step: 694/463, loss: 0.1076931282877922 2023-01-24 02:13:40.493097: step: 696/463, loss: 0.047551922500133514 2023-01-24 02:13:41.194796: step: 698/463, loss: 0.3536331355571747 2023-01-24 02:13:41.821050: step: 700/463, loss: 0.06586913019418716 2023-01-24 02:13:42.422554: step: 702/463, loss: 0.058073487132787704 2023-01-24 02:13:43.043294: step: 704/463, loss: 0.11957833915948868 2023-01-24 02:13:43.666337: step: 706/463, loss: 0.14274469017982483 2023-01-24 02:13:44.320984: step: 708/463, loss: 0.08570787310600281 2023-01-24 02:13:44.925129: step: 710/463, loss: 0.13309448957443237 2023-01-24 02:13:45.604026: step: 712/463, loss: 0.037763502448797226 2023-01-24 02:13:46.301240: step: 714/463, loss: 0.07927140593528748 2023-01-24 02:13:46.887909: step: 716/463, loss: 0.06095125898718834 2023-01-24 02:13:47.526479: step: 718/463, loss: 0.11263326555490494 2023-01-24 02:13:48.130762: step: 720/463, loss: 0.10554513335227966 2023-01-24 02:13:48.880592: step: 722/463, loss: 0.14761584997177124 2023-01-24 02:13:49.532529: step: 724/463, loss: 0.15627829730510712 2023-01-24 02:13:50.128184: step: 726/463, loss: 0.08813484013080597 2023-01-24 02:13:50.769827: step: 728/463, loss: 0.06741240620613098 2023-01-24 02:13:51.351604: step: 730/463, loss: 0.20221546292304993 2023-01-24 02:13:51.947948: step: 732/463, loss: 0.05490097403526306 2023-01-24 02:13:52.569058: step: 734/463, loss: 0.04640427976846695 2023-01-24 02:13:53.192107: step: 736/463, loss: 0.22361038625240326 2023-01-24 02:13:53.866679: step: 738/463, loss: 0.2258925437927246 2023-01-24 02:13:54.524580: step: 740/463, loss: 0.16541452705860138 2023-01-24 02:13:55.109759: step: 742/463, loss: 0.0810089111328125 2023-01-24 02:13:55.776173: step: 744/463, loss: 0.039693523198366165 2023-01-24 02:13:56.407627: step: 746/463, loss: 0.10746898502111435 2023-01-24 02:13:57.017610: step: 748/463, loss: 0.18768249452114105 2023-01-24 02:13:57.640503: step: 750/463, loss: 0.16318951547145844 2023-01-24 02:13:58.193127: step: 752/463, loss: 0.10648421943187714 2023-01-24 02:13:58.817663: step: 754/463, loss: 0.03557172790169716 2023-01-24 02:13:59.491319: step: 756/463, loss: 0.2713887691497803 2023-01-24 02:14:00.161567: step: 758/463, loss: 0.22806307673454285 2023-01-24 02:14:00.842388: step: 760/463, loss: 0.12515021860599518 2023-01-24 02:14:01.374974: step: 762/463, loss: 0.08978737145662308 2023-01-24 02:14:01.977470: step: 764/463, loss: 0.2649135887622833 2023-01-24 02:14:02.615186: step: 766/463, loss: 0.05425082892179489 2023-01-24 02:14:03.208489: step: 768/463, loss: 0.2324332743883133 2023-01-24 02:14:03.792902: step: 770/463, loss: 0.10351134836673737 2023-01-24 02:14:04.359517: step: 772/463, loss: 0.16275502741336823 2023-01-24 02:14:04.981820: step: 774/463, loss: 0.11691991239786148 2023-01-24 02:14:05.614899: step: 776/463, loss: 0.12113033980131149 2023-01-24 02:14:06.225136: step: 778/463, loss: 0.07178838551044464 2023-01-24 02:14:06.862557: step: 780/463, loss: 0.3418838679790497 2023-01-24 02:14:07.531085: step: 782/463, loss: 0.4102662205696106 2023-01-24 02:14:08.095004: step: 784/463, loss: 0.02895628660917282 2023-01-24 02:14:08.713544: step: 786/463, loss: 0.08663608133792877 2023-01-24 02:14:09.318658: step: 788/463, loss: 0.047971226274967194 2023-01-24 02:14:09.954189: step: 790/463, loss: 0.30212798714637756 2023-01-24 02:14:10.572132: step: 792/463, loss: 0.03290561959147453 2023-01-24 02:14:11.203110: step: 794/463, loss: 0.0367802232503891 2023-01-24 02:14:11.821612: step: 796/463, loss: 0.05134870857000351 2023-01-24 02:14:12.434822: step: 798/463, loss: 0.09388405829668045 2023-01-24 02:14:13.046434: step: 800/463, loss: 0.15657436847686768 2023-01-24 02:14:13.785914: step: 802/463, loss: 0.08418697118759155 2023-01-24 02:14:14.403882: step: 804/463, loss: 0.05196158587932587 2023-01-24 02:14:15.019616: step: 806/463, loss: 0.04673195630311966 2023-01-24 02:14:15.607179: step: 808/463, loss: 0.04733739793300629 2023-01-24 02:14:16.281684: step: 810/463, loss: 0.20673198997974396 2023-01-24 02:14:16.895891: step: 812/463, loss: 0.07239073514938354 2023-01-24 02:14:17.481102: step: 814/463, loss: 0.01880723424255848 2023-01-24 02:14:18.178271: step: 816/463, loss: 0.09542897343635559 2023-01-24 02:14:18.789296: step: 818/463, loss: 0.05367107689380646 2023-01-24 02:14:19.437504: step: 820/463, loss: 0.04213620722293854 2023-01-24 02:14:20.070867: step: 822/463, loss: 0.206650048494339 2023-01-24 02:14:20.737139: step: 824/463, loss: 0.042048800736665726 2023-01-24 02:14:21.346668: step: 826/463, loss: 0.09008020162582397 2023-01-24 02:14:21.930348: step: 828/463, loss: 0.024968769401311874 2023-01-24 02:14:22.557010: step: 830/463, loss: 0.1866833120584488 2023-01-24 02:14:23.244029: step: 832/463, loss: 0.26731371879577637 2023-01-24 02:14:23.859645: step: 834/463, loss: 0.13451258838176727 2023-01-24 02:14:24.508058: step: 836/463, loss: 0.05749315023422241 2023-01-24 02:14:25.124353: step: 838/463, loss: 0.13455025851726532 2023-01-24 02:14:25.774921: step: 840/463, loss: 0.08344903588294983 2023-01-24 02:14:26.396035: step: 842/463, loss: 0.061431046575307846 2023-01-24 02:14:27.007066: step: 844/463, loss: 0.09953182935714722 2023-01-24 02:14:27.584801: step: 846/463, loss: 0.12022727727890015 2023-01-24 02:14:28.196236: step: 848/463, loss: 0.07580035924911499 2023-01-24 02:14:28.785115: step: 850/463, loss: 0.09966645389795303 2023-01-24 02:14:29.380855: step: 852/463, loss: 0.29602909088134766 2023-01-24 02:14:30.031599: step: 854/463, loss: 0.09476074576377869 2023-01-24 02:14:30.612397: step: 856/463, loss: 0.00628704996779561 2023-01-24 02:14:31.234577: step: 858/463, loss: 0.6302266716957092 2023-01-24 02:14:31.825606: step: 860/463, loss: 0.18641330301761627 2023-01-24 02:14:32.480355: step: 862/463, loss: 0.07095812261104584 2023-01-24 02:14:33.181116: step: 864/463, loss: 0.16209836304187775 2023-01-24 02:14:33.838363: step: 866/463, loss: 0.1011529192328453 2023-01-24 02:14:34.437713: step: 868/463, loss: 0.6888899207115173 2023-01-24 02:14:35.028836: step: 870/463, loss: 0.08251048624515533 2023-01-24 02:14:35.658758: step: 872/463, loss: 0.13046742975711823 2023-01-24 02:14:36.339815: step: 874/463, loss: 0.06372523307800293 2023-01-24 02:14:36.926804: step: 876/463, loss: 0.4667150676250458 2023-01-24 02:14:37.469515: step: 878/463, loss: 0.06428442895412445 2023-01-24 02:14:38.179135: step: 880/463, loss: 0.2215728908777237 2023-01-24 02:14:38.816954: step: 882/463, loss: 0.17224302887916565 2023-01-24 02:14:39.509753: step: 884/463, loss: 0.1438239961862564 2023-01-24 02:14:40.153758: step: 886/463, loss: 0.10929226875305176 2023-01-24 02:14:40.818589: step: 888/463, loss: 0.6935329437255859 2023-01-24 02:14:41.486273: step: 890/463, loss: 0.17415548861026764 2023-01-24 02:14:42.118473: step: 892/463, loss: 0.2666018009185791 2023-01-24 02:14:42.716161: step: 894/463, loss: 0.031130531802773476 2023-01-24 02:14:43.319256: step: 896/463, loss: 0.06472066789865494 2023-01-24 02:14:43.939546: step: 898/463, loss: 0.041858822107315063 2023-01-24 02:14:44.621695: step: 900/463, loss: 0.31978991627693176 2023-01-24 02:14:45.154269: step: 902/463, loss: 0.05470659211277962 2023-01-24 02:14:45.857450: step: 904/463, loss: 0.11891169846057892 2023-01-24 02:14:46.485525: step: 906/463, loss: 0.021871507167816162 2023-01-24 02:14:47.067600: step: 908/463, loss: 0.011662687174975872 2023-01-24 02:14:47.667340: step: 910/463, loss: 0.08630596101284027 2023-01-24 02:14:48.308899: step: 912/463, loss: 0.11495700478553772 2023-01-24 02:14:48.878258: step: 914/463, loss: 0.020362814888358116 2023-01-24 02:14:49.547861: step: 916/463, loss: 0.2139567732810974 2023-01-24 02:14:50.107359: step: 918/463, loss: 0.08537845313549042 2023-01-24 02:14:50.769923: step: 920/463, loss: 0.3175278604030609 2023-01-24 02:14:51.451651: step: 922/463, loss: 0.032151322811841965 2023-01-24 02:14:52.067105: step: 924/463, loss: 0.478193998336792 2023-01-24 02:14:52.710750: step: 926/463, loss: 0.3930867910385132 ================================================== Loss: 0.174 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3115449582645715, 'r': 0.32100362872421695, 'f1': 0.31620357446292024}, 'combined': 0.23299210749899385, 'epoch': 16} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3560342351937457, 'r': 0.2981981061579058, 'f1': 0.32455972580874726}, 'combined': 0.2283334754433398, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3112427113702624, 'r': 0.3183298319327731, 'f1': 0.31474638166711333}, 'combined': 0.2319183864915572, 'epoch': 16} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3540826730501815, 'r': 0.2950688942084846, 'f1': 0.3218933391365286}, 'combined': 0.22854427078693532, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3143893793472445, 'r': 0.31856532935754944, 'f1': 0.316463578833984}, 'combined': 0.23318368966714612, 'epoch': 16} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37765796509991567, 'r': 0.29816838467277185, 'f1': 0.3332384582238397}, 'combined': 0.23659930533892617, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23574561403508767, 'r': 0.25595238095238093, 'f1': 0.24543378995433787}, 'combined': 0.16362252663622523, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23214285714285715, 'r': 0.2826086956521739, 'f1': 0.2549019607843137}, 'combined': 0.12745098039215685, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.13793103448275862, 'f1': 0.2}, 'combined': 0.13333333333333333, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:17:31.432161: step: 2/463, loss: 0.029034849256277084 2023-01-24 02:17:32.058781: step: 4/463, loss: 0.22093425691127777 2023-01-24 02:17:32.638511: step: 6/463, loss: 0.00756616098806262 2023-01-24 02:17:33.224043: step: 8/463, loss: 0.28214526176452637 2023-01-24 02:17:33.797350: step: 10/463, loss: 0.022260377183556557 2023-01-24 02:17:34.412736: step: 12/463, loss: 0.13380399346351624 2023-01-24 02:17:35.024096: step: 14/463, loss: 0.1893235594034195 2023-01-24 02:17:35.650802: step: 16/463, loss: 0.06238662078976631 2023-01-24 02:17:36.237509: step: 18/463, loss: 0.09264401346445084 2023-01-24 02:17:36.856614: step: 20/463, loss: 0.203669935464859 2023-01-24 02:17:37.453851: step: 22/463, loss: 0.09931498765945435 2023-01-24 02:17:38.089603: step: 24/463, loss: 0.09885022789239883 2023-01-24 02:17:38.717061: step: 26/463, loss: 0.07992254197597504 2023-01-24 02:17:39.377138: step: 28/463, loss: 0.3850100338459015 2023-01-24 02:17:40.009347: step: 30/463, loss: 0.3253202736377716 2023-01-24 02:17:40.671997: step: 32/463, loss: 0.17329412698745728 2023-01-24 02:17:41.280300: step: 34/463, loss: 0.013410177081823349 2023-01-24 02:17:41.963457: step: 36/463, loss: 0.055330343544483185 2023-01-24 02:17:42.584326: step: 38/463, loss: 0.06731942296028137 2023-01-24 02:17:43.237771: step: 40/463, loss: 0.08899140357971191 2023-01-24 02:17:43.873796: step: 42/463, loss: 0.20343875885009766 2023-01-24 02:17:44.451876: step: 44/463, loss: 0.2535842955112457 2023-01-24 02:17:45.109495: step: 46/463, loss: 0.3322671353816986 2023-01-24 02:17:45.741919: step: 48/463, loss: 0.010143361054360867 2023-01-24 02:17:46.327682: step: 50/463, loss: 0.06210484355688095 2023-01-24 02:17:47.014019: step: 52/463, loss: 0.061271265149116516 2023-01-24 02:17:47.654471: step: 54/463, loss: 0.08144853264093399 2023-01-24 02:17:48.287401: step: 56/463, loss: 0.07478325068950653 2023-01-24 02:17:49.021563: step: 58/463, loss: 0.12806600332260132 2023-01-24 02:17:49.600754: step: 60/463, loss: 0.012012496590614319 2023-01-24 02:17:50.226915: step: 62/463, loss: 0.06775784492492676 2023-01-24 02:17:50.915983: step: 64/463, loss: 0.09305430203676224 2023-01-24 02:17:51.506735: step: 66/463, loss: 0.34926578402519226 2023-01-24 02:17:52.162257: step: 68/463, loss: 0.0875922217965126 2023-01-24 02:17:52.765997: step: 70/463, loss: 0.054321758449077606 2023-01-24 02:17:53.358498: step: 72/463, loss: 0.030726756900548935 2023-01-24 02:17:53.967752: step: 74/463, loss: 0.10560249537229538 2023-01-24 02:17:54.589954: step: 76/463, loss: 0.02105158194899559 2023-01-24 02:17:55.243919: step: 78/463, loss: 0.0969020426273346 2023-01-24 02:17:55.842822: step: 80/463, loss: 0.5207117795944214 2023-01-24 02:17:56.483843: step: 82/463, loss: 0.025276830419898033 2023-01-24 02:17:57.188232: step: 84/463, loss: 0.039534978568553925 2023-01-24 02:17:57.785762: step: 86/463, loss: 0.09580382704734802 2023-01-24 02:17:58.439907: step: 88/463, loss: 0.37756773829460144 2023-01-24 02:17:59.078583: step: 90/463, loss: 0.027377020567655563 2023-01-24 02:17:59.711845: step: 92/463, loss: 0.04312293231487274 2023-01-24 02:18:00.301594: step: 94/463, loss: 0.06815198063850403 2023-01-24 02:18:00.950130: step: 96/463, loss: 0.1101764366030693 2023-01-24 02:18:01.623615: step: 98/463, loss: 0.11030927300453186 2023-01-24 02:18:02.232760: step: 100/463, loss: 0.014747390523552895 2023-01-24 02:18:02.826237: step: 102/463, loss: 0.03146219626069069 2023-01-24 02:18:03.410757: step: 104/463, loss: 0.03724946454167366 2023-01-24 02:18:04.028460: step: 106/463, loss: 0.18131880462169647 2023-01-24 02:18:04.623031: step: 108/463, loss: 0.06576752662658691 2023-01-24 02:18:05.211058: step: 110/463, loss: 0.04469558969140053 2023-01-24 02:18:05.840873: step: 112/463, loss: 0.046593986451625824 2023-01-24 02:18:06.496844: step: 114/463, loss: 0.06973162293434143 2023-01-24 02:18:07.057131: step: 116/463, loss: 0.13751785457134247 2023-01-24 02:18:07.797141: step: 118/463, loss: 0.06381981819868088 2023-01-24 02:18:08.445460: step: 120/463, loss: 0.058335740119218826 2023-01-24 02:18:09.060909: step: 122/463, loss: 0.18013426661491394 2023-01-24 02:18:09.629774: step: 124/463, loss: 0.004820437636226416 2023-01-24 02:18:10.164282: step: 126/463, loss: 0.04647801071405411 2023-01-24 02:18:10.810657: step: 128/463, loss: 0.5799987316131592 2023-01-24 02:18:11.423494: step: 130/463, loss: 0.03481203317642212 2023-01-24 02:18:12.107798: step: 132/463, loss: 0.08384177833795547 2023-01-24 02:18:12.700064: step: 134/463, loss: 0.11829312890768051 2023-01-24 02:18:13.307818: step: 136/463, loss: 0.09662014991044998 2023-01-24 02:18:13.936248: step: 138/463, loss: 0.024125222116708755 2023-01-24 02:18:14.577159: step: 140/463, loss: 0.17336325347423553 2023-01-24 02:18:15.150523: step: 142/463, loss: 0.20215526223182678 2023-01-24 02:18:15.776222: step: 144/463, loss: 0.16927184164524078 2023-01-24 02:18:16.388967: step: 146/463, loss: 0.14632245898246765 2023-01-24 02:18:16.937736: step: 148/463, loss: 0.06771758198738098 2023-01-24 02:18:17.605369: step: 150/463, loss: 0.10773110389709473 2023-01-24 02:18:18.284062: step: 152/463, loss: 0.03911542519927025 2023-01-24 02:18:18.935853: step: 154/463, loss: 0.05351945385336876 2023-01-24 02:18:19.590581: step: 156/463, loss: 0.06525156646966934 2023-01-24 02:18:20.180461: step: 158/463, loss: 0.03254387155175209 2023-01-24 02:18:20.785630: step: 160/463, loss: 0.04241790249943733 2023-01-24 02:18:21.388424: step: 162/463, loss: 0.18097466230392456 2023-01-24 02:18:22.028808: step: 164/463, loss: 0.03834306821227074 2023-01-24 02:18:22.661249: step: 166/463, loss: 0.06934341788291931 2023-01-24 02:18:23.278404: step: 168/463, loss: 0.05456938594579697 2023-01-24 02:18:23.897614: step: 170/463, loss: 0.16488027572631836 2023-01-24 02:18:24.508095: step: 172/463, loss: 0.1156223714351654 2023-01-24 02:18:25.026096: step: 174/463, loss: 0.056563910096883774 2023-01-24 02:18:25.640629: step: 176/463, loss: 0.0214778371155262 2023-01-24 02:18:26.214551: step: 178/463, loss: 0.0872640460729599 2023-01-24 02:18:26.854072: step: 180/463, loss: 0.09556082636117935 2023-01-24 02:18:27.454367: step: 182/463, loss: 0.0657586082816124 2023-01-24 02:18:28.119683: step: 184/463, loss: 0.0580468513071537 2023-01-24 02:18:28.778648: step: 186/463, loss: 0.045053258538246155 2023-01-24 02:18:29.374155: step: 188/463, loss: 0.0856487974524498 2023-01-24 02:18:29.939285: step: 190/463, loss: 0.1363142877817154 2023-01-24 02:18:30.552172: step: 192/463, loss: 0.0822499617934227 2023-01-24 02:18:31.175053: step: 194/463, loss: 0.3345452547073364 2023-01-24 02:18:31.795427: step: 196/463, loss: 0.029816951602697372 2023-01-24 02:18:32.400444: step: 198/463, loss: 0.01016508974134922 2023-01-24 02:18:33.004761: step: 200/463, loss: 0.049746714532375336 2023-01-24 02:18:33.631748: step: 202/463, loss: 0.05185399949550629 2023-01-24 02:18:34.218247: step: 204/463, loss: 0.01918785087764263 2023-01-24 02:18:34.873127: step: 206/463, loss: 0.15286079049110413 2023-01-24 02:18:35.481179: step: 208/463, loss: 0.11843424290418625 2023-01-24 02:18:36.159812: step: 210/463, loss: 0.03941408917307854 2023-01-24 02:18:36.757298: step: 212/463, loss: 0.04696370288729668 2023-01-24 02:18:37.381088: step: 214/463, loss: 0.060797084122896194 2023-01-24 02:18:37.906858: step: 216/463, loss: 0.2623785138130188 2023-01-24 02:18:38.561955: step: 218/463, loss: 0.045666322112083435 2023-01-24 02:18:39.149895: step: 220/463, loss: 0.05242841690778732 2023-01-24 02:18:39.900013: step: 222/463, loss: 0.2503933012485504 2023-01-24 02:18:40.475886: step: 224/463, loss: 0.41708531975746155 2023-01-24 02:18:41.065641: step: 226/463, loss: 0.07233820110559464 2023-01-24 02:18:41.655410: step: 228/463, loss: 0.08544005453586578 2023-01-24 02:18:42.224318: step: 230/463, loss: 0.02535875141620636 2023-01-24 02:18:42.818228: step: 232/463, loss: 0.23826785385608673 2023-01-24 02:18:43.428453: step: 234/463, loss: 0.14742770791053772 2023-01-24 02:18:44.042794: step: 236/463, loss: 0.043618232011795044 2023-01-24 02:18:44.801774: step: 238/463, loss: 0.16767582297325134 2023-01-24 02:18:45.444467: step: 240/463, loss: 0.13874492049217224 2023-01-24 02:18:46.075106: step: 242/463, loss: 0.011119361035525799 2023-01-24 02:18:46.677743: step: 244/463, loss: 0.11054425686597824 2023-01-24 02:18:47.280464: step: 246/463, loss: 0.03749373182654381 2023-01-24 02:18:47.886762: step: 248/463, loss: 0.07733451575040817 2023-01-24 02:18:48.468269: step: 250/463, loss: 0.046678315848112106 2023-01-24 02:18:49.068515: step: 252/463, loss: 0.10797601193189621 2023-01-24 02:18:49.687023: step: 254/463, loss: 0.1093277707695961 2023-01-24 02:18:50.319244: step: 256/463, loss: 0.16328951716423035 2023-01-24 02:18:50.965697: step: 258/463, loss: 0.060336556285619736 2023-01-24 02:18:51.587660: step: 260/463, loss: 0.09798993915319443 2023-01-24 02:18:52.173545: step: 262/463, loss: 0.0422724112868309 2023-01-24 02:18:52.761884: step: 264/463, loss: 0.011197797022759914 2023-01-24 02:18:53.378605: step: 266/463, loss: 0.08418838679790497 2023-01-24 02:18:54.043740: step: 268/463, loss: 0.07822441309690475 2023-01-24 02:18:54.684566: step: 270/463, loss: 0.05509977787733078 2023-01-24 02:18:55.364737: step: 272/463, loss: 0.07182900607585907 2023-01-24 02:18:56.049462: step: 274/463, loss: 0.08164497464895248 2023-01-24 02:18:56.720682: step: 276/463, loss: 0.1699485331773758 2023-01-24 02:18:57.465419: step: 278/463, loss: 0.09366940706968307 2023-01-24 02:18:58.088545: step: 280/463, loss: 0.1280447542667389 2023-01-24 02:18:58.727929: step: 282/463, loss: 0.09516076743602753 2023-01-24 02:18:59.330284: step: 284/463, loss: 0.41636207699775696 2023-01-24 02:18:59.959374: step: 286/463, loss: 0.05019990727305412 2023-01-24 02:19:00.610023: step: 288/463, loss: 0.13951672613620758 2023-01-24 02:19:01.201227: step: 290/463, loss: 0.050364334136247635 2023-01-24 02:19:01.813519: step: 292/463, loss: 0.0702374204993248 2023-01-24 02:19:02.500486: step: 294/463, loss: 0.09036526829004288 2023-01-24 02:19:03.129874: step: 296/463, loss: 0.4497077465057373 2023-01-24 02:19:03.737861: step: 298/463, loss: 0.0466700904071331 2023-01-24 02:19:04.352399: step: 300/463, loss: 0.22296854853630066 2023-01-24 02:19:04.984296: step: 302/463, loss: 0.061509981751441956 2023-01-24 02:19:05.582409: step: 304/463, loss: 0.15550975501537323 2023-01-24 02:19:06.185674: step: 306/463, loss: 0.031397100538015366 2023-01-24 02:19:06.800190: step: 308/463, loss: 0.020209359005093575 2023-01-24 02:19:07.442105: step: 310/463, loss: 0.035365283489227295 2023-01-24 02:19:08.078571: step: 312/463, loss: 0.017602289095520973 2023-01-24 02:19:08.708484: step: 314/463, loss: 0.09598478674888611 2023-01-24 02:19:09.448622: step: 316/463, loss: 0.11810272186994553 2023-01-24 02:19:10.074518: step: 318/463, loss: 0.08775065839290619 2023-01-24 02:19:10.678835: step: 320/463, loss: 0.18310783803462982 2023-01-24 02:19:11.327226: step: 322/463, loss: 0.13989941775798798 2023-01-24 02:19:11.928469: step: 324/463, loss: 0.08269048482179642 2023-01-24 02:19:12.586405: step: 326/463, loss: 0.018245438113808632 2023-01-24 02:19:13.209894: step: 328/463, loss: 0.4302862584590912 2023-01-24 02:19:13.796600: step: 330/463, loss: 0.13050180673599243 2023-01-24 02:19:14.421844: step: 332/463, loss: 0.05292873829603195 2023-01-24 02:19:15.047670: step: 334/463, loss: 0.06900018453598022 2023-01-24 02:19:15.693329: step: 336/463, loss: 0.04996662214398384 2023-01-24 02:19:16.248787: step: 338/463, loss: 0.14513204991817474 2023-01-24 02:19:16.820337: step: 340/463, loss: 0.07549646496772766 2023-01-24 02:19:17.453799: step: 342/463, loss: 0.21071001887321472 2023-01-24 02:19:18.092973: step: 344/463, loss: 0.029665878042578697 2023-01-24 02:19:18.721920: step: 346/463, loss: 0.023606767877936363 2023-01-24 02:19:19.372721: step: 348/463, loss: 0.07389508187770844 2023-01-24 02:19:19.948548: step: 350/463, loss: 0.15014660358428955 2023-01-24 02:19:20.598616: step: 352/463, loss: 0.0464511401951313 2023-01-24 02:19:21.241245: step: 354/463, loss: 0.4810974597930908 2023-01-24 02:19:21.861587: step: 356/463, loss: 0.10514376312494278 2023-01-24 02:19:22.442353: step: 358/463, loss: 0.06526500731706619 2023-01-24 02:19:23.032788: step: 360/463, loss: 0.06499184668064117 2023-01-24 02:19:23.628421: step: 362/463, loss: 0.02133612334728241 2023-01-24 02:19:24.263421: step: 364/463, loss: 0.08671732246875763 2023-01-24 02:19:24.885187: step: 366/463, loss: 0.044668301939964294 2023-01-24 02:19:25.497727: step: 368/463, loss: 0.03444363921880722 2023-01-24 02:19:26.145087: step: 370/463, loss: 0.06637953221797943 2023-01-24 02:19:26.707055: step: 372/463, loss: 0.09314967691898346 2023-01-24 02:19:27.405456: step: 374/463, loss: 0.08343088626861572 2023-01-24 02:19:28.013401: step: 376/463, loss: 0.07742507010698318 2023-01-24 02:19:28.643324: step: 378/463, loss: 0.16385512053966522 2023-01-24 02:19:29.226208: step: 380/463, loss: 0.19298282265663147 2023-01-24 02:19:29.809424: step: 382/463, loss: 0.13990089297294617 2023-01-24 02:19:30.405931: step: 384/463, loss: 0.031508754938840866 2023-01-24 02:19:30.948599: step: 386/463, loss: 3.766152858734131 2023-01-24 02:19:31.552077: step: 388/463, loss: 0.08627770841121674 2023-01-24 02:19:32.167676: step: 390/463, loss: 0.08352816104888916 2023-01-24 02:19:32.733404: step: 392/463, loss: 0.053070154041051865 2023-01-24 02:19:33.398289: step: 394/463, loss: 0.06351369619369507 2023-01-24 02:19:34.025907: step: 396/463, loss: 5.1273298263549805 2023-01-24 02:19:34.657856: step: 398/463, loss: 0.055428922176361084 2023-01-24 02:19:35.358798: step: 400/463, loss: 7.128698348999023 2023-01-24 02:19:36.022883: step: 402/463, loss: 0.1031441017985344 2023-01-24 02:19:36.609460: step: 404/463, loss: 0.038023822009563446 2023-01-24 02:19:37.228710: step: 406/463, loss: 0.04604694992303848 2023-01-24 02:19:37.836589: step: 408/463, loss: 0.04603227227926254 2023-01-24 02:19:38.517872: step: 410/463, loss: 0.10586929321289062 2023-01-24 02:19:39.161021: step: 412/463, loss: 0.13325907289981842 2023-01-24 02:19:39.859002: step: 414/463, loss: 0.04768155887722969 2023-01-24 02:19:40.432312: step: 416/463, loss: 0.2525966465473175 2023-01-24 02:19:41.048850: step: 418/463, loss: 0.08468204736709595 2023-01-24 02:19:41.733515: step: 420/463, loss: 0.510571300983429 2023-01-24 02:19:42.370152: step: 422/463, loss: 0.3775698244571686 2023-01-24 02:19:42.954388: step: 424/463, loss: 0.139085054397583 2023-01-24 02:19:43.530179: step: 426/463, loss: 0.41908368468284607 2023-01-24 02:19:44.163308: step: 428/463, loss: 0.0986214131116867 2023-01-24 02:19:44.835625: step: 430/463, loss: 0.11293192207813263 2023-01-24 02:19:45.522857: step: 432/463, loss: 0.014350133016705513 2023-01-24 02:19:46.143280: step: 434/463, loss: 0.08067140728235245 2023-01-24 02:19:46.807433: step: 436/463, loss: 0.19078268110752106 2023-01-24 02:19:47.375418: step: 438/463, loss: 0.023809120059013367 2023-01-24 02:19:47.970884: step: 440/463, loss: 0.1458756923675537 2023-01-24 02:19:48.609527: step: 442/463, loss: 0.030741514638066292 2023-01-24 02:19:49.223242: step: 444/463, loss: 0.09430131316184998 2023-01-24 02:19:49.851765: step: 446/463, loss: 0.6587414741516113 2023-01-24 02:19:50.477086: step: 448/463, loss: 0.035407211631536484 2023-01-24 02:19:51.067035: step: 450/463, loss: 0.025182154029607773 2023-01-24 02:19:51.684910: step: 452/463, loss: 0.12250196188688278 2023-01-24 02:19:52.297764: step: 454/463, loss: 0.17395715415477753 2023-01-24 02:19:52.910196: step: 456/463, loss: 0.09798737615346909 2023-01-24 02:19:53.616627: step: 458/463, loss: 0.04729187861084938 2023-01-24 02:19:54.227053: step: 460/463, loss: 0.15045617520809174 2023-01-24 02:19:54.825777: step: 462/463, loss: 0.30898764729499817 2023-01-24 02:19:55.452338: step: 464/463, loss: 0.1067158654332161 2023-01-24 02:19:56.086946: step: 466/463, loss: 0.10836595296859741 2023-01-24 02:19:56.709330: step: 468/463, loss: 0.05782720819115639 2023-01-24 02:19:57.380393: step: 470/463, loss: 0.0649682804942131 2023-01-24 02:19:57.992355: step: 472/463, loss: 0.037112701684236526 2023-01-24 02:19:58.620729: step: 474/463, loss: 0.09479672461748123 2023-01-24 02:19:59.222316: step: 476/463, loss: 0.27847355604171753 2023-01-24 02:19:59.834763: step: 478/463, loss: 0.012021733447909355 2023-01-24 02:20:00.521468: step: 480/463, loss: 0.05747959390282631 2023-01-24 02:20:01.132964: step: 482/463, loss: 0.06275523453950882 2023-01-24 02:20:01.781254: step: 484/463, loss: 0.03628915548324585 2023-01-24 02:20:02.439236: step: 486/463, loss: 0.16306975483894348 2023-01-24 02:20:03.050384: step: 488/463, loss: 0.028391726315021515 2023-01-24 02:20:03.655306: step: 490/463, loss: 0.015284285880625248 2023-01-24 02:20:04.271175: step: 492/463, loss: 0.014179985038936138 2023-01-24 02:20:04.908564: step: 494/463, loss: 0.08794019371271133 2023-01-24 02:20:05.491598: step: 496/463, loss: 0.058747753500938416 2023-01-24 02:20:06.103831: step: 498/463, loss: 0.03603939712047577 2023-01-24 02:20:06.705408: step: 500/463, loss: 0.7916725277900696 2023-01-24 02:20:07.334974: step: 502/463, loss: 0.021125685423612595 2023-01-24 02:20:08.000202: step: 504/463, loss: 0.1550394594669342 2023-01-24 02:20:08.580128: step: 506/463, loss: 0.44374364614486694 2023-01-24 02:20:09.251390: step: 508/463, loss: 0.0642244890332222 2023-01-24 02:20:09.898833: step: 510/463, loss: 0.18471509218215942 2023-01-24 02:20:10.544335: step: 512/463, loss: 0.17971080541610718 2023-01-24 02:20:11.167356: step: 514/463, loss: 0.018195820972323418 2023-01-24 02:20:11.781549: step: 516/463, loss: 0.025728531181812286 2023-01-24 02:20:12.540833: step: 518/463, loss: 0.028869153931736946 2023-01-24 02:20:13.151487: step: 520/463, loss: 0.28341394662857056 2023-01-24 02:20:13.806835: step: 522/463, loss: 0.032239723950624466 2023-01-24 02:20:14.383815: step: 524/463, loss: 0.13039375841617584 2023-01-24 02:20:14.967715: step: 526/463, loss: 0.08739438652992249 2023-01-24 02:20:15.571092: step: 528/463, loss: 0.054887864738702774 2023-01-24 02:20:16.183819: step: 530/463, loss: 0.02433670125901699 2023-01-24 02:20:16.874054: step: 532/463, loss: 0.09389333426952362 2023-01-24 02:20:17.499711: step: 534/463, loss: 0.17845278978347778 2023-01-24 02:20:18.117255: step: 536/463, loss: 0.03882387652993202 2023-01-24 02:20:18.754686: step: 538/463, loss: 0.027972061187028885 2023-01-24 02:20:19.342005: step: 540/463, loss: 0.008406776003539562 2023-01-24 02:20:19.967063: step: 542/463, loss: 0.0682532787322998 2023-01-24 02:20:20.594826: step: 544/463, loss: 0.33657553791999817 2023-01-24 02:20:21.238920: step: 546/463, loss: 0.07878078520298004 2023-01-24 02:20:21.850942: step: 548/463, loss: 0.3340403735637665 2023-01-24 02:20:22.507642: step: 550/463, loss: 0.031923409551382065 2023-01-24 02:20:23.151658: step: 552/463, loss: 0.4538400173187256 2023-01-24 02:20:23.769268: step: 554/463, loss: 0.1442594975233078 2023-01-24 02:20:24.373885: step: 556/463, loss: 0.03016781620681286 2023-01-24 02:20:24.959714: step: 558/463, loss: 0.10315482318401337 2023-01-24 02:20:25.604502: step: 560/463, loss: 0.211754709482193 2023-01-24 02:20:26.234130: step: 562/463, loss: 0.06668829917907715 2023-01-24 02:20:26.809557: step: 564/463, loss: 0.13477875292301178 2023-01-24 02:20:27.456866: step: 566/463, loss: 0.13255611062049866 2023-01-24 02:20:28.091760: step: 568/463, loss: 0.06533139944076538 2023-01-24 02:20:28.682207: step: 570/463, loss: 0.10416175425052643 2023-01-24 02:20:29.362976: step: 572/463, loss: 0.051260169595479965 2023-01-24 02:20:29.984184: step: 574/463, loss: 0.11982867121696472 2023-01-24 02:20:30.549912: step: 576/463, loss: 0.08487684279680252 2023-01-24 02:20:31.138366: step: 578/463, loss: 0.02357851155102253 2023-01-24 02:20:31.779203: step: 580/463, loss: 0.16621030867099762 2023-01-24 02:20:32.390889: step: 582/463, loss: 0.034836385399103165 2023-01-24 02:20:32.998039: step: 584/463, loss: 0.11000345647335052 2023-01-24 02:20:33.695812: step: 586/463, loss: 1.5045338869094849 2023-01-24 02:20:34.308000: step: 588/463, loss: 0.05056216940283775 2023-01-24 02:20:34.925811: step: 590/463, loss: 0.17940892279148102 2023-01-24 02:20:35.502390: step: 592/463, loss: 0.03265633434057236 2023-01-24 02:20:36.128538: step: 594/463, loss: 0.015787692740559578 2023-01-24 02:20:36.775201: step: 596/463, loss: 0.1446114182472229 2023-01-24 02:20:37.410382: step: 598/463, loss: 0.029480354860424995 2023-01-24 02:20:38.053437: step: 600/463, loss: 0.015403005294501781 2023-01-24 02:20:38.742818: step: 602/463, loss: 0.0729077160358429 2023-01-24 02:20:39.406260: step: 604/463, loss: 0.4061950147151947 2023-01-24 02:20:40.038733: step: 606/463, loss: 0.036948636174201965 2023-01-24 02:20:40.718262: step: 608/463, loss: 0.094474658370018 2023-01-24 02:20:41.360121: step: 610/463, loss: 0.7870875000953674 2023-01-24 02:20:41.975313: step: 612/463, loss: 0.016426661983132362 2023-01-24 02:20:42.572531: step: 614/463, loss: 0.06732980161905289 2023-01-24 02:20:43.203366: step: 616/463, loss: 0.22032684087753296 2023-01-24 02:20:43.907653: step: 618/463, loss: 0.35561156272888184 2023-01-24 02:20:44.533807: step: 620/463, loss: 0.14483454823493958 2023-01-24 02:20:45.171755: step: 622/463, loss: 0.011325111612677574 2023-01-24 02:20:45.722797: step: 624/463, loss: 0.09582098573446274 2023-01-24 02:20:46.375152: step: 626/463, loss: 0.03957049548625946 2023-01-24 02:20:47.038968: step: 628/463, loss: 0.07907479256391525 2023-01-24 02:20:47.722044: step: 630/463, loss: 0.25349071621894836 2023-01-24 02:20:48.330206: step: 632/463, loss: 0.024663593620061874 2023-01-24 02:20:48.879412: step: 634/463, loss: 0.22915031015872955 2023-01-24 02:20:49.553190: step: 636/463, loss: 0.045975808054208755 2023-01-24 02:20:50.260396: step: 638/463, loss: 0.5734086632728577 2023-01-24 02:20:50.873793: step: 640/463, loss: 0.03392540290951729 2023-01-24 02:20:51.460019: step: 642/463, loss: 0.07448966056108475 2023-01-24 02:20:52.078062: step: 644/463, loss: 0.036999545991420746 2023-01-24 02:20:52.592971: step: 646/463, loss: 0.07148760557174683 2023-01-24 02:20:53.226936: step: 648/463, loss: 0.03880679979920387 2023-01-24 02:20:53.855933: step: 650/463, loss: 0.11866156756877899 2023-01-24 02:20:54.456335: step: 652/463, loss: 0.06142021343111992 2023-01-24 02:20:55.078266: step: 654/463, loss: 0.4327996075153351 2023-01-24 02:20:55.663628: step: 656/463, loss: 0.19716161489486694 2023-01-24 02:20:56.278205: step: 658/463, loss: 0.042747754603624344 2023-01-24 02:20:56.835784: step: 660/463, loss: 0.12779834866523743 2023-01-24 02:20:57.474667: step: 662/463, loss: 0.26424509286880493 2023-01-24 02:20:58.066013: step: 664/463, loss: 0.09766629338264465 2023-01-24 02:20:58.702848: step: 666/463, loss: 0.07671283185482025 2023-01-24 02:20:59.345147: step: 668/463, loss: 0.09335873275995255 2023-01-24 02:20:59.981309: step: 670/463, loss: 0.08527130633592606 2023-01-24 02:21:00.662125: step: 672/463, loss: 0.1957973837852478 2023-01-24 02:21:01.378033: step: 674/463, loss: 0.08800293505191803 2023-01-24 02:21:02.082699: step: 676/463, loss: 0.044853873550891876 2023-01-24 02:21:02.700691: step: 678/463, loss: 0.06648443639278412 2023-01-24 02:21:03.293521: step: 680/463, loss: 0.37577372789382935 2023-01-24 02:21:03.879983: step: 682/463, loss: 0.25884437561035156 2023-01-24 02:21:04.499851: step: 684/463, loss: 0.08484003692865372 2023-01-24 02:21:05.168813: step: 686/463, loss: 0.07422657310962677 2023-01-24 02:21:05.817555: step: 688/463, loss: 0.1635560542345047 2023-01-24 02:21:06.491300: step: 690/463, loss: 0.1623799055814743 2023-01-24 02:21:07.114031: step: 692/463, loss: 0.10525618493556976 2023-01-24 02:21:07.783633: step: 694/463, loss: 0.21033009886741638 2023-01-24 02:21:08.389028: step: 696/463, loss: 3.641453504562378 2023-01-24 02:21:08.982383: step: 698/463, loss: 0.04440350830554962 2023-01-24 02:21:09.607273: step: 700/463, loss: 0.06111063435673714 2023-01-24 02:21:10.204120: step: 702/463, loss: 0.18166281282901764 2023-01-24 02:21:10.790022: step: 704/463, loss: 0.09364775568246841 2023-01-24 02:21:11.385356: step: 706/463, loss: 0.03369786590337753 2023-01-24 02:21:12.004651: step: 708/463, loss: 0.06748832762241364 2023-01-24 02:21:12.584351: step: 710/463, loss: 0.0444084107875824 2023-01-24 02:21:13.204947: step: 712/463, loss: 0.07992536574602127 2023-01-24 02:21:13.887352: step: 714/463, loss: 0.15537388622760773 2023-01-24 02:21:14.457357: step: 716/463, loss: 0.08213411271572113 2023-01-24 02:21:15.080715: step: 718/463, loss: 0.06947049498558044 2023-01-24 02:21:15.736663: step: 720/463, loss: 0.04616475850343704 2023-01-24 02:21:16.374385: step: 722/463, loss: 0.021023746579885483 2023-01-24 02:21:16.993776: step: 724/463, loss: 0.055215150117874146 2023-01-24 02:21:17.629215: step: 726/463, loss: 0.09734038263559341 2023-01-24 02:21:18.324978: step: 728/463, loss: 0.090332992374897 2023-01-24 02:21:18.949345: step: 730/463, loss: 0.02742054872214794 2023-01-24 02:21:19.529833: step: 732/463, loss: 0.03850579261779785 2023-01-24 02:21:20.146011: step: 734/463, loss: 0.0677909180521965 2023-01-24 02:21:20.772943: step: 736/463, loss: 0.07665809988975525 2023-01-24 02:21:21.390807: step: 738/463, loss: 0.04111507907509804 2023-01-24 02:21:22.010857: step: 740/463, loss: 0.03949854150414467 2023-01-24 02:21:22.631169: step: 742/463, loss: 0.07002540677785873 2023-01-24 02:21:23.202933: step: 744/463, loss: 0.061483800411224365 2023-01-24 02:21:23.776550: step: 746/463, loss: 0.16534587740898132 2023-01-24 02:21:24.342580: step: 748/463, loss: 0.09157510101795197 2023-01-24 02:21:24.902507: step: 750/463, loss: 0.18512137234210968 2023-01-24 02:21:25.516455: step: 752/463, loss: 0.0620095320045948 2023-01-24 02:21:26.148462: step: 754/463, loss: 0.07434961199760437 2023-01-24 02:21:26.733292: step: 756/463, loss: 0.062027379870414734 2023-01-24 02:21:27.372773: step: 758/463, loss: 0.07949919998645782 2023-01-24 02:21:28.022284: step: 760/463, loss: 0.42312484979629517 2023-01-24 02:21:28.695295: step: 762/463, loss: 0.24034172296524048 2023-01-24 02:21:29.292295: step: 764/463, loss: 0.12589865922927856 2023-01-24 02:21:29.984944: step: 766/463, loss: 0.1013868898153305 2023-01-24 02:21:30.664722: step: 768/463, loss: 0.039633892476558685 2023-01-24 02:21:31.289072: step: 770/463, loss: 0.02084234543144703 2023-01-24 02:21:32.027723: step: 772/463, loss: 0.30262988805770874 2023-01-24 02:21:32.693297: step: 774/463, loss: 0.06847862899303436 2023-01-24 02:21:33.310458: step: 776/463, loss: 0.1463647335767746 2023-01-24 02:21:34.009766: step: 778/463, loss: 0.19242940843105316 2023-01-24 02:21:34.645650: step: 780/463, loss: 0.048475779592990875 2023-01-24 02:21:35.337360: step: 782/463, loss: 0.006628789473325014 2023-01-24 02:21:35.923848: step: 784/463, loss: 0.1461603343486786 2023-01-24 02:21:36.545505: step: 786/463, loss: 0.0678638443350792 2023-01-24 02:21:37.089833: step: 788/463, loss: 0.325725257396698 2023-01-24 02:21:37.677312: step: 790/463, loss: 0.018788587301969528 2023-01-24 02:21:38.402168: step: 792/463, loss: 0.3423551917076111 2023-01-24 02:21:39.040633: step: 794/463, loss: 0.16853122413158417 2023-01-24 02:21:39.677482: step: 796/463, loss: 0.09534242004156113 2023-01-24 02:21:40.402987: step: 798/463, loss: 0.004724225029349327 2023-01-24 02:21:40.971908: step: 800/463, loss: 0.12400691211223602 2023-01-24 02:21:41.597963: step: 802/463, loss: 0.040639910846948624 2023-01-24 02:21:42.206342: step: 804/463, loss: 0.05848333239555359 2023-01-24 02:21:42.808125: step: 806/463, loss: 0.0273519866168499 2023-01-24 02:21:43.408192: step: 808/463, loss: 0.1450655609369278 2023-01-24 02:21:43.977466: step: 810/463, loss: 0.07365318387746811 2023-01-24 02:21:44.558773: step: 812/463, loss: 0.03905070573091507 2023-01-24 02:21:45.156728: step: 814/463, loss: 0.10898259282112122 2023-01-24 02:21:45.838040: step: 816/463, loss: 0.1624394655227661 2023-01-24 02:21:46.484673: step: 818/463, loss: 0.06054326146841049 2023-01-24 02:21:47.104325: step: 820/463, loss: 0.09155477583408356 2023-01-24 02:21:47.712771: step: 822/463, loss: 0.04081856831908226 2023-01-24 02:21:48.252210: step: 824/463, loss: 0.029471836984157562 2023-01-24 02:21:48.849132: step: 826/463, loss: 0.16651517152786255 2023-01-24 02:21:49.497197: step: 828/463, loss: 0.5576082468032837 2023-01-24 02:21:50.179278: step: 830/463, loss: 0.10109864920377731 2023-01-24 02:21:50.763959: step: 832/463, loss: 0.14816658198833466 2023-01-24 02:21:51.382839: step: 834/463, loss: 0.41683343052864075 2023-01-24 02:21:52.031487: step: 836/463, loss: 0.06710539013147354 2023-01-24 02:21:52.756199: step: 838/463, loss: 0.12059952318668365 2023-01-24 02:21:53.382849: step: 840/463, loss: 1.7472076416015625 2023-01-24 02:21:53.958844: step: 842/463, loss: 0.02582356333732605 2023-01-24 02:21:54.558525: step: 844/463, loss: 0.022556236013770103 2023-01-24 02:21:55.184335: step: 846/463, loss: 0.03765268251299858 2023-01-24 02:21:55.800871: step: 848/463, loss: 0.002607722533866763 2023-01-24 02:21:56.432946: step: 850/463, loss: 0.8765310645103455 2023-01-24 02:21:57.068695: step: 852/463, loss: 0.09977966547012329 2023-01-24 02:21:57.706874: step: 854/463, loss: 0.16473069787025452 2023-01-24 02:21:58.291311: step: 856/463, loss: 0.134609192609787 2023-01-24 02:21:58.943005: step: 858/463, loss: 0.12209677696228027 2023-01-24 02:21:59.560654: step: 860/463, loss: 0.08143538981676102 2023-01-24 02:22:00.258533: step: 862/463, loss: 0.16555899381637573 2023-01-24 02:22:00.994085: step: 864/463, loss: 0.0493137463927269 2023-01-24 02:22:01.570842: step: 866/463, loss: 0.05469152703881264 2023-01-24 02:22:02.171073: step: 868/463, loss: 0.05459444597363472 2023-01-24 02:22:02.780817: step: 870/463, loss: 0.050362154841423035 2023-01-24 02:22:03.472637: step: 872/463, loss: 0.11006397753953934 2023-01-24 02:22:04.098425: step: 874/463, loss: 0.02549937181174755 2023-01-24 02:22:04.670964: step: 876/463, loss: 0.08144998550415039 2023-01-24 02:22:05.292459: step: 878/463, loss: 0.060430046170949936 2023-01-24 02:22:05.888989: step: 880/463, loss: 0.0760221853852272 2023-01-24 02:22:06.509819: step: 882/463, loss: 0.08889337629079819 2023-01-24 02:22:07.141108: step: 884/463, loss: 0.10870590060949326 2023-01-24 02:22:07.784761: step: 886/463, loss: 0.09921004623174667 2023-01-24 02:22:08.395142: step: 888/463, loss: 0.25388333201408386 2023-01-24 02:22:09.022392: step: 890/463, loss: 0.18940065801143646 2023-01-24 02:22:09.656831: step: 892/463, loss: 0.047829944640398026 2023-01-24 02:22:10.399982: step: 894/463, loss: 0.30890750885009766 2023-01-24 02:22:11.045854: step: 896/463, loss: 0.039594266563653946 2023-01-24 02:22:11.684919: step: 898/463, loss: 0.09997475147247314 2023-01-24 02:22:12.309343: step: 900/463, loss: 0.04879271984100342 2023-01-24 02:22:12.954639: step: 902/463, loss: 0.1596883088350296 2023-01-24 02:22:13.621872: step: 904/463, loss: 0.051398664712905884 2023-01-24 02:22:14.235803: step: 906/463, loss: 0.08819674700498581 2023-01-24 02:22:14.840066: step: 908/463, loss: 0.031007234007120132 2023-01-24 02:22:15.457036: step: 910/463, loss: 0.05106445774435997 2023-01-24 02:22:16.044163: step: 912/463, loss: 0.03426836431026459 2023-01-24 02:22:16.727166: step: 914/463, loss: 0.04149501025676727 2023-01-24 02:22:17.328430: step: 916/463, loss: 0.06882750988006592 2023-01-24 02:22:18.001401: step: 918/463, loss: 0.012824597768485546 2023-01-24 02:22:18.625104: step: 920/463, loss: 0.07905211299657822 2023-01-24 02:22:19.269738: step: 922/463, loss: 0.056551262736320496 2023-01-24 02:22:19.929893: step: 924/463, loss: 0.2920866310596466 2023-01-24 02:22:20.547055: step: 926/463, loss: 0.07523776590824127 ================================================== Loss: 0.165 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32030043331429564, 'r': 0.31786931048079053, 'f1': 0.31908024118738404}, 'combined': 0.23511175666438822, 'epoch': 17} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3768848252442484, 'r': 0.30321798331169025, 'f1': 0.33606171071102225}, 'combined': 0.23642532411830713, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.317287872568045, 'r': 0.3142775511964317, 'f1': 0.31577553761776833}, 'combined': 0.23267671192888192, 'epoch': 17} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36944698004366433, 'r': 0.30013537384001004, 'f1': 0.3312037924127602}, 'combined': 0.23515469261305974, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3340506455007443, 'r': 0.32073932945612266, 'f1': 0.3272596836851435}, 'combined': 0.24113871429431627, 'epoch': 17} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3915721315179609, 'r': 0.29479054792007187, 'f1': 0.3363579246322693}, 'combined': 0.2388141264889112, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3131313131313131, 'r': 0.2952380952380952, 'f1': 0.30392156862745096}, 'combined': 0.2026143790849673, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3148148148148148, 'r': 0.3695652173913043, 'f1': 0.34}, 'combined': 0.17, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:24:58.046116: step: 2/463, loss: 0.09575677663087845 2023-01-24 02:24:58.673906: step: 4/463, loss: 0.051463693380355835 2023-01-24 02:24:59.317937: step: 6/463, loss: 0.03267783671617508 2023-01-24 02:25:00.002180: step: 8/463, loss: 0.07870075851678848 2023-01-24 02:25:00.646660: step: 10/463, loss: 0.016744211316108704 2023-01-24 02:25:01.229244: step: 12/463, loss: 0.14256741106510162 2023-01-24 02:25:01.792275: step: 14/463, loss: 0.0809490755200386 2023-01-24 02:25:02.479295: step: 16/463, loss: 0.029062137007713318 2023-01-24 02:25:03.126360: step: 18/463, loss: 0.28690600395202637 2023-01-24 02:25:03.732169: step: 20/463, loss: 0.05500372126698494 2023-01-24 02:25:04.339462: step: 22/463, loss: 0.10214834660291672 2023-01-24 02:25:04.951655: step: 24/463, loss: 0.3384849429130554 2023-01-24 02:25:05.531629: step: 26/463, loss: 0.13370181620121002 2023-01-24 02:25:06.157237: step: 28/463, loss: 0.04160981997847557 2023-01-24 02:25:06.713146: step: 30/463, loss: 0.02520735003054142 2023-01-24 02:25:07.312456: step: 32/463, loss: 0.017283234745264053 2023-01-24 02:25:07.969545: step: 34/463, loss: 0.03380315378308296 2023-01-24 02:25:08.612024: step: 36/463, loss: 0.08482465893030167 2023-01-24 02:25:09.273827: step: 38/463, loss: 0.6763509511947632 2023-01-24 02:25:09.932415: step: 40/463, loss: 0.03958573564887047 2023-01-24 02:25:10.587012: step: 42/463, loss: 0.03320345655083656 2023-01-24 02:25:11.156005: step: 44/463, loss: 0.038296572864055634 2023-01-24 02:25:11.759914: step: 46/463, loss: 0.15046554803848267 2023-01-24 02:25:12.298549: step: 48/463, loss: 0.011378531344234943 2023-01-24 02:25:12.974463: step: 50/463, loss: 0.0927916094660759 2023-01-24 02:25:13.611262: step: 52/463, loss: 0.06472223997116089 2023-01-24 02:25:14.245245: step: 54/463, loss: 0.13122619688510895 2023-01-24 02:25:14.846952: step: 56/463, loss: 0.08959688246250153 2023-01-24 02:25:15.450645: step: 58/463, loss: 0.03567549213767052 2023-01-24 02:25:16.092366: step: 60/463, loss: 0.03740888088941574 2023-01-24 02:25:16.699659: step: 62/463, loss: 0.032983213663101196 2023-01-24 02:25:17.258342: step: 64/463, loss: 0.04049016535282135 2023-01-24 02:25:17.854309: step: 66/463, loss: 0.450126051902771 2023-01-24 02:25:18.498027: step: 68/463, loss: 0.11147656291723251 2023-01-24 02:25:19.137654: step: 70/463, loss: 0.08527442812919617 2023-01-24 02:25:19.781783: step: 72/463, loss: 0.07592001557350159 2023-01-24 02:25:20.406613: step: 74/463, loss: 0.06345807760953903 2023-01-24 02:25:21.103938: step: 76/463, loss: 0.09653505682945251 2023-01-24 02:25:21.732456: step: 78/463, loss: 0.08373561501502991 2023-01-24 02:25:22.343304: step: 80/463, loss: 0.0696195513010025 2023-01-24 02:25:22.991349: step: 82/463, loss: 0.06211879104375839 2023-01-24 02:25:23.643998: step: 84/463, loss: 0.021936163306236267 2023-01-24 02:25:24.244997: step: 86/463, loss: 0.0727575346827507 2023-01-24 02:25:24.924535: step: 88/463, loss: 0.0510992705821991 2023-01-24 02:25:25.518990: step: 90/463, loss: 0.068656787276268 2023-01-24 02:25:26.159105: step: 92/463, loss: 0.03275775909423828 2023-01-24 02:25:26.762688: step: 94/463, loss: 0.1420625001192093 2023-01-24 02:25:27.372418: step: 96/463, loss: 0.03200783580541611 2023-01-24 02:25:28.028446: step: 98/463, loss: 0.08256643265485764 2023-01-24 02:25:28.718226: step: 100/463, loss: 0.19330386817455292 2023-01-24 02:25:29.255853: step: 102/463, loss: 0.04319307953119278 2023-01-24 02:25:29.862067: step: 104/463, loss: 0.06816912442445755 2023-01-24 02:25:30.488268: step: 106/463, loss: 0.10066258907318115 2023-01-24 02:25:31.130323: step: 108/463, loss: 0.03464088961482048 2023-01-24 02:25:31.706146: step: 110/463, loss: 0.10461732745170593 2023-01-24 02:25:32.343970: step: 112/463, loss: 0.05488435551524162 2023-01-24 02:25:32.932190: step: 114/463, loss: 0.21697209775447845 2023-01-24 02:25:33.582687: step: 116/463, loss: 0.07251782715320587 2023-01-24 02:25:34.207081: step: 118/463, loss: 0.22030234336853027 2023-01-24 02:25:34.838574: step: 120/463, loss: 0.15928255021572113 2023-01-24 02:25:35.527117: step: 122/463, loss: 0.013751746155321598 2023-01-24 02:25:36.078480: step: 124/463, loss: 0.061291132122278214 2023-01-24 02:25:36.734828: step: 126/463, loss: 0.10262162983417511 2023-01-24 02:25:37.385213: step: 128/463, loss: 0.04329666495323181 2023-01-24 02:25:37.955723: step: 130/463, loss: 0.07350394874811172 2023-01-24 02:25:38.592183: step: 132/463, loss: 0.05380255728960037 2023-01-24 02:25:39.212497: step: 134/463, loss: 0.02735436148941517 2023-01-24 02:25:39.781564: step: 136/463, loss: 0.0862264633178711 2023-01-24 02:25:40.369383: step: 138/463, loss: 0.082193523645401 2023-01-24 02:25:41.002765: step: 140/463, loss: 0.1999729871749878 2023-01-24 02:25:41.640338: step: 142/463, loss: 0.08959734439849854 2023-01-24 02:25:42.249219: step: 144/463, loss: 0.07589547336101532 2023-01-24 02:25:42.862280: step: 146/463, loss: 0.06073533371090889 2023-01-24 02:25:43.559261: step: 148/463, loss: 0.048256538808345795 2023-01-24 02:25:44.202056: step: 150/463, loss: 0.09829704463481903 2023-01-24 02:25:44.796245: step: 152/463, loss: 0.04360481724143028 2023-01-24 02:25:45.447173: step: 154/463, loss: 0.18388868868350983 2023-01-24 02:25:46.080085: step: 156/463, loss: 0.16645021736621857 2023-01-24 02:25:46.727565: step: 158/463, loss: 0.019304951652884483 2023-01-24 02:25:47.374264: step: 160/463, loss: 0.13994832336902618 2023-01-24 02:25:48.003002: step: 162/463, loss: 0.2306874841451645 2023-01-24 02:25:48.602765: step: 164/463, loss: 0.054710280150175095 2023-01-24 02:25:49.234472: step: 166/463, loss: 0.05973265320062637 2023-01-24 02:25:49.799720: step: 168/463, loss: 0.03398183360695839 2023-01-24 02:25:50.428395: step: 170/463, loss: 0.044246166944503784 2023-01-24 02:25:51.077849: step: 172/463, loss: 0.24284932017326355 2023-01-24 02:25:51.769224: step: 174/463, loss: 0.11185333132743835 2023-01-24 02:25:52.362845: step: 176/463, loss: 0.008806613273918629 2023-01-24 02:25:52.897334: step: 178/463, loss: 0.3086085617542267 2023-01-24 02:25:53.460831: step: 180/463, loss: 0.10723006725311279 2023-01-24 02:25:54.063001: step: 182/463, loss: 0.3624266982078552 2023-01-24 02:25:54.632685: step: 184/463, loss: 0.023005586117506027 2023-01-24 02:25:55.260487: step: 186/463, loss: 0.05536022037267685 2023-01-24 02:25:55.853427: step: 188/463, loss: 0.022764110937714577 2023-01-24 02:25:56.495722: step: 190/463, loss: 0.03978153318166733 2023-01-24 02:25:57.138735: step: 192/463, loss: 0.21162763237953186 2023-01-24 02:25:57.757465: step: 194/463, loss: 0.0310394074767828 2023-01-24 02:25:58.392934: step: 196/463, loss: 0.11853180825710297 2023-01-24 02:25:58.966247: step: 198/463, loss: 0.11983560025691986 2023-01-24 02:25:59.619502: step: 200/463, loss: 0.1432884931564331 2023-01-24 02:26:00.207300: step: 202/463, loss: 0.0055761332623660564 2023-01-24 02:26:00.835190: step: 204/463, loss: 0.1375092715024948 2023-01-24 02:26:01.452744: step: 206/463, loss: 0.17699439823627472 2023-01-24 02:26:02.066351: step: 208/463, loss: 0.061360474675893784 2023-01-24 02:26:02.767116: step: 210/463, loss: 0.135872483253479 2023-01-24 02:26:03.417946: step: 212/463, loss: 0.15860754251480103 2023-01-24 02:26:04.035516: step: 214/463, loss: 0.1589597463607788 2023-01-24 02:26:04.699988: step: 216/463, loss: 0.11243332177400589 2023-01-24 02:26:05.315162: step: 218/463, loss: 0.6215667128562927 2023-01-24 02:26:05.986701: step: 220/463, loss: 0.07644777745008469 2023-01-24 02:26:06.530509: step: 222/463, loss: 0.09187624603509903 2023-01-24 02:26:07.124958: step: 224/463, loss: 0.14404645562171936 2023-01-24 02:26:07.764691: step: 226/463, loss: 0.08291461318731308 2023-01-24 02:26:08.366984: step: 228/463, loss: 0.06733614206314087 2023-01-24 02:26:08.969072: step: 230/463, loss: 1.127895474433899 2023-01-24 02:26:09.600599: step: 232/463, loss: 0.07475470751523972 2023-01-24 02:26:10.200379: step: 234/463, loss: 0.03836668282747269 2023-01-24 02:26:10.717604: step: 236/463, loss: 0.047745365649461746 2023-01-24 02:26:11.350943: step: 238/463, loss: 0.03367101773619652 2023-01-24 02:26:12.009725: step: 240/463, loss: 0.10433202236890793 2023-01-24 02:26:12.672838: step: 242/463, loss: 0.5015838742256165 2023-01-24 02:26:13.319474: step: 244/463, loss: 0.08400120586156845 2023-01-24 02:26:13.985461: step: 246/463, loss: 0.1120542660355568 2023-01-24 02:26:14.572557: step: 248/463, loss: 0.03129056841135025 2023-01-24 02:26:15.231348: step: 250/463, loss: 0.5522140860557556 2023-01-24 02:26:15.793858: step: 252/463, loss: 0.03513151407241821 2023-01-24 02:26:16.417690: step: 254/463, loss: 0.04025160148739815 2023-01-24 02:26:16.996512: step: 256/463, loss: 0.03733723610639572 2023-01-24 02:26:17.624217: step: 258/463, loss: 0.06205623596906662 2023-01-24 02:26:18.263746: step: 260/463, loss: 0.05162912607192993 2023-01-24 02:26:19.001456: step: 262/463, loss: 0.37755337357521057 2023-01-24 02:26:19.638081: step: 264/463, loss: 0.042662378400564194 2023-01-24 02:26:20.270883: step: 266/463, loss: 0.05240899696946144 2023-01-24 02:26:20.858636: step: 268/463, loss: 0.03140811249613762 2023-01-24 02:26:21.504452: step: 270/463, loss: 0.36606335639953613 2023-01-24 02:26:22.143170: step: 272/463, loss: 0.06856602430343628 2023-01-24 02:26:22.815427: step: 274/463, loss: 0.3330520987510681 2023-01-24 02:26:23.479460: step: 276/463, loss: 0.2629096508026123 2023-01-24 02:26:24.038522: step: 278/463, loss: 0.0025068826507776976 2023-01-24 02:26:24.639405: step: 280/463, loss: 0.01652650535106659 2023-01-24 02:26:25.243632: step: 282/463, loss: 0.03511057794094086 2023-01-24 02:26:25.884342: step: 284/463, loss: 0.9420105218887329 2023-01-24 02:26:26.593490: step: 286/463, loss: 0.044942669570446014 2023-01-24 02:26:27.228682: step: 288/463, loss: 0.1643531173467636 2023-01-24 02:26:27.783572: step: 290/463, loss: 0.009117883630096912 2023-01-24 02:26:28.381776: step: 292/463, loss: 0.7116032242774963 2023-01-24 02:26:29.010959: step: 294/463, loss: 0.050776753574609756 2023-01-24 02:26:29.610972: step: 296/463, loss: 0.030754417181015015 2023-01-24 02:26:30.200839: step: 298/463, loss: 0.0742848739027977 2023-01-24 02:26:30.856063: step: 300/463, loss: 0.06053052470088005 2023-01-24 02:26:31.461086: step: 302/463, loss: 0.0697970911860466 2023-01-24 02:26:32.102351: step: 304/463, loss: 0.02903003618121147 2023-01-24 02:26:32.762054: step: 306/463, loss: 0.031727951020002365 2023-01-24 02:26:33.357597: step: 308/463, loss: 0.03808044642210007 2023-01-24 02:26:34.021967: step: 310/463, loss: 0.3082350790500641 2023-01-24 02:26:34.638101: step: 312/463, loss: 0.08461116999387741 2023-01-24 02:26:35.197428: step: 314/463, loss: 0.04212109372019768 2023-01-24 02:26:35.814901: step: 316/463, loss: 0.18113531172275543 2023-01-24 02:26:36.397492: step: 318/463, loss: 0.08807864040136337 2023-01-24 02:26:37.070411: step: 320/463, loss: 2.593810796737671 2023-01-24 02:26:37.793295: step: 322/463, loss: 0.1946956068277359 2023-01-24 02:26:38.379291: step: 324/463, loss: 0.5349016189575195 2023-01-24 02:26:39.007448: step: 326/463, loss: 0.37395352125167847 2023-01-24 02:26:39.569983: step: 328/463, loss: 0.18361110985279083 2023-01-24 02:26:40.194898: step: 330/463, loss: 0.06555017828941345 2023-01-24 02:26:40.763066: step: 332/463, loss: 0.011233783327043056 2023-01-24 02:26:41.449646: step: 334/463, loss: 0.04576491937041283 2023-01-24 02:26:42.058218: step: 336/463, loss: 0.04909808933734894 2023-01-24 02:26:42.738377: step: 338/463, loss: 0.025527065619826317 2023-01-24 02:26:43.379981: step: 340/463, loss: 0.056881360709667206 2023-01-24 02:26:43.979764: step: 342/463, loss: 0.07874500751495361 2023-01-24 02:26:44.568642: step: 344/463, loss: 0.026527736335992813 2023-01-24 02:26:45.168549: step: 346/463, loss: 0.07117772102355957 2023-01-24 02:26:45.868249: step: 348/463, loss: 0.14647628366947174 2023-01-24 02:26:46.478265: step: 350/463, loss: 0.07334475964307785 2023-01-24 02:26:47.142754: step: 352/463, loss: 0.07348714768886566 2023-01-24 02:26:47.704627: step: 354/463, loss: 0.05241123586893082 2023-01-24 02:26:48.328942: step: 356/463, loss: 0.031441327184438705 2023-01-24 02:26:48.936788: step: 358/463, loss: 0.037145137786865234 2023-01-24 02:26:49.581795: step: 360/463, loss: 0.04266975447535515 2023-01-24 02:26:50.239658: step: 362/463, loss: 0.11917664110660553 2023-01-24 02:26:50.846932: step: 364/463, loss: 0.10840092599391937 2023-01-24 02:26:51.464936: step: 366/463, loss: 0.0876476839184761 2023-01-24 02:26:52.101605: step: 368/463, loss: 0.08520875871181488 2023-01-24 02:26:52.772043: step: 370/463, loss: 0.10809303820133209 2023-01-24 02:26:53.412030: step: 372/463, loss: 0.23933884501457214 2023-01-24 02:26:54.110349: step: 374/463, loss: 0.026513447985053062 2023-01-24 02:26:54.679611: step: 376/463, loss: 0.04930666461586952 2023-01-24 02:26:55.349476: step: 378/463, loss: 0.0922093614935875 2023-01-24 02:26:55.949380: step: 380/463, loss: 0.0243531484156847 2023-01-24 02:26:56.555132: step: 382/463, loss: 0.1148984432220459 2023-01-24 02:26:57.146820: step: 384/463, loss: 0.04660365357995033 2023-01-24 02:26:57.766801: step: 386/463, loss: 0.07341088354587555 2023-01-24 02:26:58.406023: step: 388/463, loss: 0.052628036588430405 2023-01-24 02:26:59.084452: step: 390/463, loss: 0.05970011651515961 2023-01-24 02:26:59.765256: step: 392/463, loss: 0.022383589297533035 2023-01-24 02:27:00.329305: step: 394/463, loss: 0.14790454506874084 2023-01-24 02:27:00.912251: step: 396/463, loss: 0.02282249554991722 2023-01-24 02:27:01.516110: step: 398/463, loss: 0.22740936279296875 2023-01-24 02:27:02.113248: step: 400/463, loss: 0.13446523249149323 2023-01-24 02:27:02.712274: step: 402/463, loss: 0.09999919682741165 2023-01-24 02:27:03.355256: step: 404/463, loss: 0.10480587184429169 2023-01-24 02:27:04.052899: step: 406/463, loss: 0.12094102799892426 2023-01-24 02:27:04.656290: step: 408/463, loss: 0.03079869970679283 2023-01-24 02:27:05.259103: step: 410/463, loss: 0.07265552878379822 2023-01-24 02:27:05.880511: step: 412/463, loss: 0.012553132139146328 2023-01-24 02:27:06.491887: step: 414/463, loss: 0.02456044591963291 2023-01-24 02:27:07.028025: step: 416/463, loss: 0.11063763499259949 2023-01-24 02:27:07.594574: step: 418/463, loss: 0.13517877459526062 2023-01-24 02:27:08.221025: step: 420/463, loss: 0.19115613400936127 2023-01-24 02:27:08.896963: step: 422/463, loss: 0.023905646055936813 2023-01-24 02:27:09.493094: step: 424/463, loss: 0.16697262227535248 2023-01-24 02:27:10.090784: step: 426/463, loss: 0.06683908402919769 2023-01-24 02:27:10.772672: step: 428/463, loss: 0.22334596514701843 2023-01-24 02:27:11.381436: step: 430/463, loss: 0.2207944691181183 2023-01-24 02:27:11.963854: step: 432/463, loss: 0.07285561412572861 2023-01-24 02:27:12.623116: step: 434/463, loss: 0.0343484953045845 2023-01-24 02:27:13.253871: step: 436/463, loss: 0.02543281763792038 2023-01-24 02:27:13.871144: step: 438/463, loss: 0.06723618507385254 2023-01-24 02:27:14.527954: step: 440/463, loss: 0.10826583951711655 2023-01-24 02:27:15.177349: step: 442/463, loss: 0.10975110530853271 2023-01-24 02:27:15.874017: step: 444/463, loss: 0.009133405052125454 2023-01-24 02:27:16.447377: step: 446/463, loss: 0.11722607910633087 2023-01-24 02:27:17.093126: step: 448/463, loss: 0.08113104104995728 2023-01-24 02:27:17.777586: step: 450/463, loss: 0.09234187752008438 2023-01-24 02:27:18.426832: step: 452/463, loss: 0.11121436953544617 2023-01-24 02:27:19.036028: step: 454/463, loss: 0.05085174739360809 2023-01-24 02:27:19.685737: step: 456/463, loss: 0.1991308331489563 2023-01-24 02:27:20.306130: step: 458/463, loss: 0.19277028739452362 2023-01-24 02:27:20.959878: step: 460/463, loss: 0.045133326202631 2023-01-24 02:27:21.604931: step: 462/463, loss: 0.024942439049482346 2023-01-24 02:27:22.273967: step: 464/463, loss: 0.22040243446826935 2023-01-24 02:27:22.921942: step: 466/463, loss: 0.1710156947374344 2023-01-24 02:27:23.548061: step: 468/463, loss: 0.13089902698993683 2023-01-24 02:27:24.193978: step: 470/463, loss: 0.01344236359000206 2023-01-24 02:27:24.969439: step: 472/463, loss: 0.626793622970581 2023-01-24 02:27:25.600677: step: 474/463, loss: 0.030428308993577957 2023-01-24 02:27:26.220282: step: 476/463, loss: 0.703050434589386 2023-01-24 02:27:26.886175: step: 478/463, loss: 0.03369858115911484 2023-01-24 02:27:27.520297: step: 480/463, loss: 0.10277866572141647 2023-01-24 02:27:28.193868: step: 482/463, loss: 0.25428739190101624 2023-01-24 02:27:28.807620: step: 484/463, loss: 0.13795559108257294 2023-01-24 02:27:29.435559: step: 486/463, loss: 0.06727771461009979 2023-01-24 02:27:30.029697: step: 488/463, loss: 0.06711052358150482 2023-01-24 02:27:30.699969: step: 490/463, loss: 0.05068530887365341 2023-01-24 02:27:31.334143: step: 492/463, loss: 0.057210471481084824 2023-01-24 02:27:31.957689: step: 494/463, loss: 2.3437981605529785 2023-01-24 02:27:32.526845: step: 496/463, loss: 0.19252783060073853 2023-01-24 02:27:33.137599: step: 498/463, loss: 0.43491172790527344 2023-01-24 02:27:33.825756: step: 500/463, loss: 0.12306785583496094 2023-01-24 02:27:34.435893: step: 502/463, loss: 1.0540437698364258 2023-01-24 02:27:35.029244: step: 504/463, loss: 0.13390810787677765 2023-01-24 02:27:35.670003: step: 506/463, loss: 0.042330581694841385 2023-01-24 02:27:36.243833: step: 508/463, loss: 0.04916974902153015 2023-01-24 02:27:36.871582: step: 510/463, loss: 0.14345765113830566 2023-01-24 02:27:37.487477: step: 512/463, loss: 0.47553956508636475 2023-01-24 02:27:38.104602: step: 514/463, loss: 0.08969658613204956 2023-01-24 02:27:38.716269: step: 516/463, loss: 0.09693299233913422 2023-01-24 02:27:39.338995: step: 518/463, loss: 0.0882391631603241 2023-01-24 02:27:39.974768: step: 520/463, loss: 0.08298903703689575 2023-01-24 02:27:40.614390: step: 522/463, loss: 0.37382107973098755 2023-01-24 02:27:41.204862: step: 524/463, loss: 0.07863723486661911 2023-01-24 02:27:41.911144: step: 526/463, loss: 0.06845583021640778 2023-01-24 02:27:42.510754: step: 528/463, loss: 0.036440592259168625 2023-01-24 02:27:43.180549: step: 530/463, loss: 0.05292169004678726 2023-01-24 02:27:43.831154: step: 532/463, loss: 0.033587746322155 2023-01-24 02:27:44.367929: step: 534/463, loss: 0.019419776275753975 2023-01-24 02:27:44.908678: step: 536/463, loss: 0.01702827215194702 2023-01-24 02:27:45.493610: step: 538/463, loss: 0.22136741876602173 2023-01-24 02:27:46.088238: step: 540/463, loss: 0.05431777983903885 2023-01-24 02:27:46.647303: step: 542/463, loss: 0.24639171361923218 2023-01-24 02:27:47.274480: step: 544/463, loss: 0.06504960358142853 2023-01-24 02:27:47.882466: step: 546/463, loss: 0.023148806765675545 2023-01-24 02:27:48.476138: step: 548/463, loss: 0.07710578292608261 2023-01-24 02:27:49.258596: step: 550/463, loss: 0.08692342042922974 2023-01-24 02:27:49.862727: step: 552/463, loss: 0.025866152718663216 2023-01-24 02:27:50.614196: step: 554/463, loss: 0.039410192519426346 2023-01-24 02:27:51.178952: step: 556/463, loss: 0.028719371184706688 2023-01-24 02:27:51.771120: step: 558/463, loss: 0.06714773178100586 2023-01-24 02:27:52.404533: step: 560/463, loss: 0.037261974066495895 2023-01-24 02:27:53.046882: step: 562/463, loss: 0.09637626260519028 2023-01-24 02:27:53.685597: step: 564/463, loss: 0.1088004782795906 2023-01-24 02:27:54.263264: step: 566/463, loss: 0.003913927357643843 2023-01-24 02:27:54.948248: step: 568/463, loss: 0.020106682553887367 2023-01-24 02:27:55.598819: step: 570/463, loss: 0.02026687189936638 2023-01-24 02:27:56.268576: step: 572/463, loss: 0.35392263531684875 2023-01-24 02:27:56.911019: step: 574/463, loss: 0.044078901410102844 2023-01-24 02:27:57.569351: step: 576/463, loss: 0.097461998462677 2023-01-24 02:27:58.269369: step: 578/463, loss: 0.07299451529979706 2023-01-24 02:27:58.888532: step: 580/463, loss: 0.09503714740276337 2023-01-24 02:27:59.544159: step: 582/463, loss: 0.17548733949661255 2023-01-24 02:28:00.159314: step: 584/463, loss: 0.09227161109447479 2023-01-24 02:28:00.782364: step: 586/463, loss: 0.1005164384841919 2023-01-24 02:28:01.411718: step: 588/463, loss: 0.010354729369282722 2023-01-24 02:28:02.060313: step: 590/463, loss: 0.10135121643543243 2023-01-24 02:28:02.852341: step: 592/463, loss: 0.04163278639316559 2023-01-24 02:28:03.514656: step: 594/463, loss: 0.07552412152290344 2023-01-24 02:28:04.170608: step: 596/463, loss: 0.06280016899108887 2023-01-24 02:28:04.749195: step: 598/463, loss: 0.07988286763429642 2023-01-24 02:28:05.358001: step: 600/463, loss: 0.07060851156711578 2023-01-24 02:28:05.935732: step: 602/463, loss: 0.10283144563436508 2023-01-24 02:28:06.599153: step: 604/463, loss: 0.38483208417892456 2023-01-24 02:28:07.187149: step: 606/463, loss: 0.08559473603963852 2023-01-24 02:28:07.797783: step: 608/463, loss: 0.25530701875686646 2023-01-24 02:28:08.521447: step: 610/463, loss: 0.056317463517189026 2023-01-24 02:28:09.076237: step: 612/463, loss: 0.007873989641666412 2023-01-24 02:28:09.646679: step: 614/463, loss: 0.03178169205784798 2023-01-24 02:28:10.328794: step: 616/463, loss: 0.4389042258262634 2023-01-24 02:28:10.940077: step: 618/463, loss: 0.04801509901881218 2023-01-24 02:28:11.588982: step: 620/463, loss: 0.009181219153106213 2023-01-24 02:28:12.192247: step: 622/463, loss: 0.07159817218780518 2023-01-24 02:28:12.836938: step: 624/463, loss: 0.0677419900894165 2023-01-24 02:28:13.467120: step: 626/463, loss: 0.0765475332736969 2023-01-24 02:28:14.120193: step: 628/463, loss: 0.03095046430826187 2023-01-24 02:28:14.762910: step: 630/463, loss: 0.31884270906448364 2023-01-24 02:28:15.389218: step: 632/463, loss: 0.09111306071281433 2023-01-24 02:28:16.069451: step: 634/463, loss: 0.04686432704329491 2023-01-24 02:28:16.729187: step: 636/463, loss: 0.057231348007917404 2023-01-24 02:28:17.339282: step: 638/463, loss: 0.06721889227628708 2023-01-24 02:28:18.007328: step: 640/463, loss: 0.058040618896484375 2023-01-24 02:28:18.623613: step: 642/463, loss: 0.09345799684524536 2023-01-24 02:28:19.291560: step: 644/463, loss: 0.16906502842903137 2023-01-24 02:28:19.926848: step: 646/463, loss: 0.2579292058944702 2023-01-24 02:28:20.532801: step: 648/463, loss: 0.1756364107131958 2023-01-24 02:28:21.159665: step: 650/463, loss: 0.07652164250612259 2023-01-24 02:28:21.731599: step: 652/463, loss: 0.10970866680145264 2023-01-24 02:28:22.359549: step: 654/463, loss: 0.014016704633831978 2023-01-24 02:28:22.985124: step: 656/463, loss: 6.679182052612305 2023-01-24 02:28:23.616897: step: 658/463, loss: 0.1376757174730301 2023-01-24 02:28:24.239419: step: 660/463, loss: 0.0993683934211731 2023-01-24 02:28:24.832880: step: 662/463, loss: 0.16253648698329926 2023-01-24 02:28:25.412319: step: 664/463, loss: 0.09620658308267593 2023-01-24 02:28:25.995964: step: 666/463, loss: 0.12352486699819565 2023-01-24 02:28:26.694132: step: 668/463, loss: 0.05761401727795601 2023-01-24 02:28:27.340225: step: 670/463, loss: 0.0636560395359993 2023-01-24 02:28:27.935707: step: 672/463, loss: 0.19490425288677216 2023-01-24 02:28:28.484024: step: 674/463, loss: 0.015664970502257347 2023-01-24 02:28:29.094570: step: 676/463, loss: 0.044512778520584106 2023-01-24 02:28:29.626826: step: 678/463, loss: 0.10259106755256653 2023-01-24 02:28:30.262243: step: 680/463, loss: 0.03903176635503769 2023-01-24 02:28:30.887971: step: 682/463, loss: 0.04648810252547264 2023-01-24 02:28:31.544087: step: 684/463, loss: 0.1719157099723816 2023-01-24 02:28:32.162183: step: 686/463, loss: 0.03018346056342125 2023-01-24 02:28:32.771330: step: 688/463, loss: 0.052614159882068634 2023-01-24 02:28:33.386482: step: 690/463, loss: 0.25435757637023926 2023-01-24 02:28:33.952633: step: 692/463, loss: 0.03407071530818939 2023-01-24 02:28:34.584940: step: 694/463, loss: 0.056672610342502594 2023-01-24 02:28:35.218952: step: 696/463, loss: 0.0890408307313919 2023-01-24 02:28:35.811035: step: 698/463, loss: 0.11598458141088486 2023-01-24 02:28:36.462026: step: 700/463, loss: 0.0641142874956131 2023-01-24 02:28:37.194872: step: 702/463, loss: 0.13280530273914337 2023-01-24 02:28:37.865412: step: 704/463, loss: 0.0904615968465805 2023-01-24 02:28:38.451437: step: 706/463, loss: 0.11961737275123596 2023-01-24 02:28:39.037506: step: 708/463, loss: 0.04505046829581261 2023-01-24 02:28:39.585986: step: 710/463, loss: 0.03640441596508026 2023-01-24 02:28:40.291521: step: 712/463, loss: 0.07452978193759918 2023-01-24 02:28:40.877041: step: 714/463, loss: 0.011070553213357925 2023-01-24 02:28:41.490277: step: 716/463, loss: 0.05546391382813454 2023-01-24 02:28:42.096892: step: 718/463, loss: 0.07795993983745575 2023-01-24 02:28:42.695170: step: 720/463, loss: 0.0749930590391159 2023-01-24 02:28:43.364862: step: 722/463, loss: 0.01869048736989498 2023-01-24 02:28:44.008978: step: 724/463, loss: 7.052923679351807 2023-01-24 02:28:44.631859: step: 726/463, loss: 0.14601360261440277 2023-01-24 02:28:45.264869: step: 728/463, loss: 0.13445410132408142 2023-01-24 02:28:45.889171: step: 730/463, loss: 0.03088410571217537 2023-01-24 02:28:46.441582: step: 732/463, loss: 0.24109312891960144 2023-01-24 02:28:47.057683: step: 734/463, loss: 0.14700418710708618 2023-01-24 02:28:47.624085: step: 736/463, loss: 0.037802934646606445 2023-01-24 02:28:48.252323: step: 738/463, loss: 0.4844880700111389 2023-01-24 02:28:48.991699: step: 740/463, loss: 0.1648467630147934 2023-01-24 02:28:49.637350: step: 742/463, loss: 0.19964702427387238 2023-01-24 02:28:50.255725: step: 744/463, loss: 0.14758339524269104 2023-01-24 02:28:51.025086: step: 746/463, loss: 0.18700455129146576 2023-01-24 02:28:51.684425: step: 748/463, loss: 0.03303791210055351 2023-01-24 02:28:52.337276: step: 750/463, loss: 0.011539335362613201 2023-01-24 02:28:53.018889: step: 752/463, loss: 0.30045410990715027 2023-01-24 02:28:53.578733: step: 754/463, loss: 0.091391921043396 2023-01-24 02:28:54.213863: step: 756/463, loss: 0.07764239609241486 2023-01-24 02:28:54.868564: step: 758/463, loss: 0.10730011761188507 2023-01-24 02:28:55.488146: step: 760/463, loss: 0.1372426301240921 2023-01-24 02:28:56.150588: step: 762/463, loss: 0.05005718022584915 2023-01-24 02:28:56.716906: step: 764/463, loss: 0.8890200257301331 2023-01-24 02:28:57.347412: step: 766/463, loss: 0.29781195521354675 2023-01-24 02:28:57.971538: step: 768/463, loss: 0.03434671834111214 2023-01-24 02:28:58.556439: step: 770/463, loss: 0.1583227962255478 2023-01-24 02:28:59.182016: step: 772/463, loss: 0.058938320726156235 2023-01-24 02:28:59.821739: step: 774/463, loss: 0.04012853652238846 2023-01-24 02:29:00.446344: step: 776/463, loss: 0.08627588301897049 2023-01-24 02:29:01.012014: step: 778/463, loss: 0.05946168303489685 2023-01-24 02:29:01.724377: step: 780/463, loss: 0.12694990634918213 2023-01-24 02:29:02.306015: step: 782/463, loss: 0.09992090612649918 2023-01-24 02:29:02.982032: step: 784/463, loss: 0.04740028455853462 2023-01-24 02:29:03.601536: step: 786/463, loss: 0.2043129801750183 2023-01-24 02:29:04.207680: step: 788/463, loss: 0.05588803067803383 2023-01-24 02:29:04.816094: step: 790/463, loss: 0.0523567870259285 2023-01-24 02:29:05.493596: step: 792/463, loss: 0.07567299902439117 2023-01-24 02:29:06.177662: step: 794/463, loss: 0.04652217775583267 2023-01-24 02:29:06.761188: step: 796/463, loss: 0.20958667993545532 2023-01-24 02:29:07.360565: step: 798/463, loss: 0.021559996530413628 2023-01-24 02:29:08.032734: step: 800/463, loss: 0.06934312731027603 2023-01-24 02:29:08.696659: step: 802/463, loss: 0.20477113127708435 2023-01-24 02:29:09.326613: step: 804/463, loss: 0.20227386057376862 2023-01-24 02:29:09.892996: step: 806/463, loss: 0.015886545181274414 2023-01-24 02:29:10.572709: step: 808/463, loss: 0.13469412922859192 2023-01-24 02:29:11.156192: step: 810/463, loss: 0.0581338070333004 2023-01-24 02:29:11.702850: step: 812/463, loss: 0.2700718343257904 2023-01-24 02:29:12.354627: step: 814/463, loss: 0.22858625650405884 2023-01-24 02:29:12.888970: step: 816/463, loss: 0.03130469471216202 2023-01-24 02:29:13.453352: step: 818/463, loss: 0.045551739633083344 2023-01-24 02:29:14.214743: step: 820/463, loss: 0.15722081065177917 2023-01-24 02:29:14.863387: step: 822/463, loss: 0.06179779767990112 2023-01-24 02:29:15.605689: step: 824/463, loss: 0.035228431224823 2023-01-24 02:29:16.224687: step: 826/463, loss: 0.12140107154846191 2023-01-24 02:29:16.857033: step: 828/463, loss: 0.029524633660912514 2023-01-24 02:29:17.489705: step: 830/463, loss: 0.0999583899974823 2023-01-24 02:29:18.138376: step: 832/463, loss: 0.06350894272327423 2023-01-24 02:29:18.807350: step: 834/463, loss: 0.07063741981983185 2023-01-24 02:29:19.499259: step: 836/463, loss: 0.051202442497015 2023-01-24 02:29:20.186731: step: 838/463, loss: 0.017461463809013367 2023-01-24 02:29:20.831549: step: 840/463, loss: 0.05457300692796707 2023-01-24 02:29:21.423778: step: 842/463, loss: 0.04418656602501869 2023-01-24 02:29:22.058692: step: 844/463, loss: 0.03016381524503231 2023-01-24 02:29:22.709662: step: 846/463, loss: 0.27756282687187195 2023-01-24 02:29:23.357925: step: 848/463, loss: 0.21358510851860046 2023-01-24 02:29:23.963658: step: 850/463, loss: 0.08542738854885101 2023-01-24 02:29:24.599929: step: 852/463, loss: 0.1412522792816162 2023-01-24 02:29:25.173037: step: 854/463, loss: 0.5848680734634399 2023-01-24 02:29:25.792079: step: 856/463, loss: 0.050907451659440994 2023-01-24 02:29:26.459215: step: 858/463, loss: 0.04106101393699646 2023-01-24 02:29:27.048178: step: 860/463, loss: 0.07985787093639374 2023-01-24 02:29:27.652066: step: 862/463, loss: 0.3205718994140625 2023-01-24 02:29:28.202323: step: 864/463, loss: 0.12309283018112183 2023-01-24 02:29:28.828560: step: 866/463, loss: 0.0836111530661583 2023-01-24 02:29:29.419271: step: 868/463, loss: 0.03659941256046295 2023-01-24 02:29:30.007602: step: 870/463, loss: 0.37124237418174744 2023-01-24 02:29:30.580622: step: 872/463, loss: 0.5246379375457764 2023-01-24 02:29:31.225080: step: 874/463, loss: 0.21049022674560547 2023-01-24 02:29:31.830510: step: 876/463, loss: 0.04738573729991913 2023-01-24 02:29:32.454130: step: 878/463, loss: 0.02092921733856201 2023-01-24 02:29:33.064858: step: 880/463, loss: 0.05016832798719406 2023-01-24 02:29:33.688705: step: 882/463, loss: 0.017665335908532143 2023-01-24 02:29:34.310963: step: 884/463, loss: 0.22971075773239136 2023-01-24 02:29:34.922873: step: 886/463, loss: 0.4689822196960449 2023-01-24 02:29:35.469435: step: 888/463, loss: 0.056294891983270645 2023-01-24 02:29:36.103262: step: 890/463, loss: 0.03705482929944992 2023-01-24 02:29:36.750115: step: 892/463, loss: 0.6104450225830078 2023-01-24 02:29:37.376275: step: 894/463, loss: 0.03918782249093056 2023-01-24 02:29:37.989165: step: 896/463, loss: 0.04459298029541969 2023-01-24 02:29:38.593388: step: 898/463, loss: 0.03534926846623421 2023-01-24 02:29:39.225916: step: 900/463, loss: 0.0701533854007721 2023-01-24 02:29:39.836665: step: 902/463, loss: 0.12147903442382812 2023-01-24 02:29:40.499997: step: 904/463, loss: 0.7455633282661438 2023-01-24 02:29:41.129316: step: 906/463, loss: 0.03264284506440163 2023-01-24 02:29:41.804658: step: 908/463, loss: 0.21156252920627594 2023-01-24 02:29:42.459190: step: 910/463, loss: 0.08874695003032684 2023-01-24 02:29:43.102515: step: 912/463, loss: 0.09979986399412155 2023-01-24 02:29:43.760298: step: 914/463, loss: 0.0074354554526507854 2023-01-24 02:29:44.439515: step: 916/463, loss: 0.0552276186645031 2023-01-24 02:29:45.018844: step: 918/463, loss: 0.0534919910132885 2023-01-24 02:29:45.604922: step: 920/463, loss: 0.01637696847319603 2023-01-24 02:29:46.257201: step: 922/463, loss: 0.023427320644259453 2023-01-24 02:29:46.877431: step: 924/463, loss: 0.016922233626246452 2023-01-24 02:29:47.508354: step: 926/463, loss: 0.10794581472873688 ================================================== Loss: 0.160 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31781049067354217, 'r': 0.31841354663307453, 'f1': 0.3181117328447967}, 'combined': 0.23439811893827125, 'epoch': 18} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.38169093706666846, 'r': 0.3240709264972674, 'f1': 0.35052881714569933}, 'combined': 0.24660318794169805, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32405524061215835, 'r': 0.31790618481306615, 'f1': 0.32095126321165873}, 'combined': 0.23649040447174852, 'epoch': 18} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.381873815100232, 'r': 0.32022751859626786, 'f1': 0.34834431543552247}, 'combined': 0.24732446395922095, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3386737397977516, 'r': 0.3251782017792454, 'f1': 0.3317887944582039}, 'combined': 0.24447595381130813, 'epoch': 18} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.39622752197872135, 'r': 0.31352151520761706, 'f1': 0.3500557142006061}, 'combined': 0.24853955708243033, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29285714285714287, 'r': 0.29285714285714287, 'f1': 0.29285714285714287}, 'combined': 0.19523809523809524, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.32608695652173914, 'f1': 0.2830188679245283}, 'combined': 0.14150943396226415, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.13793103448275862, 'f1': 0.2}, 'combined': 0.13333333333333333, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:32:25.757800: step: 2/463, loss: 0.057663850486278534 2023-01-24 02:32:26.357510: step: 4/463, loss: 0.06531534343957901 2023-01-24 02:32:26.968832: step: 6/463, loss: 0.14307349920272827 2023-01-24 02:32:27.577955: step: 8/463, loss: 0.004983075428754091 2023-01-24 02:32:28.139938: step: 10/463, loss: 0.0293290913105011 2023-01-24 02:32:28.689366: step: 12/463, loss: 0.010837848298251629 2023-01-24 02:32:29.270959: step: 14/463, loss: 0.2199220359325409 2023-01-24 02:32:29.914573: step: 16/463, loss: 0.1097944974899292 2023-01-24 02:32:30.508277: step: 18/463, loss: 0.04457506909966469 2023-01-24 02:32:31.136910: step: 20/463, loss: 0.03419800102710724 2023-01-24 02:32:31.755346: step: 22/463, loss: 0.20045742392539978 2023-01-24 02:32:32.382243: step: 24/463, loss: 0.03296763449907303 2023-01-24 02:32:32.965904: step: 26/463, loss: 0.21869856119155884 2023-01-24 02:32:33.597082: step: 28/463, loss: 0.05435926467180252 2023-01-24 02:32:34.148730: step: 30/463, loss: 0.33518853783607483 2023-01-24 02:32:34.812313: step: 32/463, loss: 0.4432954490184784 2023-01-24 02:32:35.455168: step: 34/463, loss: 0.0195598304271698 2023-01-24 02:32:36.118951: step: 36/463, loss: 0.042495060712099075 2023-01-24 02:32:36.725347: step: 38/463, loss: 0.09044361859560013 2023-01-24 02:32:37.315763: step: 40/463, loss: 0.029805108904838562 2023-01-24 02:32:37.920792: step: 42/463, loss: 0.06167149543762207 2023-01-24 02:32:38.466757: step: 44/463, loss: 0.12277710437774658 2023-01-24 02:32:39.087388: step: 46/463, loss: 0.15870416164398193 2023-01-24 02:32:39.724950: step: 48/463, loss: 0.03428998589515686 2023-01-24 02:32:40.327707: step: 50/463, loss: 0.049448806792497635 2023-01-24 02:32:40.968475: step: 52/463, loss: 0.014284425415098667 2023-01-24 02:32:41.599990: step: 54/463, loss: 0.1226578876376152 2023-01-24 02:32:42.210650: step: 56/463, loss: 0.02689017355442047 2023-01-24 02:32:42.816608: step: 58/463, loss: 0.037967748939991 2023-01-24 02:32:43.468514: step: 60/463, loss: 0.06935139000415802 2023-01-24 02:32:44.085769: step: 62/463, loss: 0.03027157671749592 2023-01-24 02:32:44.639059: step: 64/463, loss: 0.1905042827129364 2023-01-24 02:32:45.244187: step: 66/463, loss: 0.08262830227613449 2023-01-24 02:32:45.926149: step: 68/463, loss: 0.030859321355819702 2023-01-24 02:32:46.547947: step: 70/463, loss: 0.0846133828163147 2023-01-24 02:32:47.167029: step: 72/463, loss: 0.04663458466529846 2023-01-24 02:32:47.811371: step: 74/463, loss: 0.05767703428864479 2023-01-24 02:32:48.324197: step: 76/463, loss: 0.04482051730155945 2023-01-24 02:32:49.026179: step: 78/463, loss: 0.07119882106781006 2023-01-24 02:32:49.740050: step: 80/463, loss: 0.03546573594212532 2023-01-24 02:32:50.301887: step: 82/463, loss: 0.09066437184810638 2023-01-24 02:32:50.912995: step: 84/463, loss: 0.09702186286449432 2023-01-24 02:32:51.558320: step: 86/463, loss: 0.01249510608613491 2023-01-24 02:32:52.169996: step: 88/463, loss: 1.8856229782104492 2023-01-24 02:32:52.806285: step: 90/463, loss: 0.007770392578095198 2023-01-24 02:32:53.368317: step: 92/463, loss: 0.02285427413880825 2023-01-24 02:32:53.968768: step: 94/463, loss: 0.11249106377363205 2023-01-24 02:32:54.628924: step: 96/463, loss: 0.07266402244567871 2023-01-24 02:32:55.229773: step: 98/463, loss: 0.015225473791360855 2023-01-24 02:32:55.908746: step: 100/463, loss: 0.11769969016313553 2023-01-24 02:32:56.488954: step: 102/463, loss: 0.06549448519945145 2023-01-24 02:32:57.120190: step: 104/463, loss: 0.0033746410626918077 2023-01-24 02:32:57.689330: step: 106/463, loss: 0.0426170751452446 2023-01-24 02:32:58.279526: step: 108/463, loss: 0.0010525058023631573 2023-01-24 02:32:58.815784: step: 110/463, loss: 0.037894781678915024 2023-01-24 02:32:59.381886: step: 112/463, loss: 0.007128462195396423 2023-01-24 02:33:00.026509: step: 114/463, loss: 0.07518046349287033 2023-01-24 02:33:00.613654: step: 116/463, loss: 0.08321899175643921 2023-01-24 02:33:01.250264: step: 118/463, loss: 0.029285278171300888 2023-01-24 02:33:01.875048: step: 120/463, loss: 0.04156965762376785 2023-01-24 02:33:02.456020: step: 122/463, loss: 0.20080217719078064 2023-01-24 02:33:03.056041: step: 124/463, loss: 0.06461069732904434 2023-01-24 02:33:03.668618: step: 126/463, loss: 0.03330009803175926 2023-01-24 02:33:04.240448: step: 128/463, loss: 0.06380369514226913 2023-01-24 02:33:04.897529: step: 130/463, loss: 0.056565068662166595 2023-01-24 02:33:05.494136: step: 132/463, loss: 0.013667251914739609 2023-01-24 02:33:06.170678: step: 134/463, loss: 0.10740712285041809 2023-01-24 02:33:06.772105: step: 136/463, loss: 0.50336754322052 2023-01-24 02:33:07.398538: step: 138/463, loss: 0.19837845861911774 2023-01-24 02:33:08.033657: step: 140/463, loss: 0.0579318068921566 2023-01-24 02:33:08.645532: step: 142/463, loss: 0.10924737900495529 2023-01-24 02:33:09.239496: step: 144/463, loss: 0.02724529057741165 2023-01-24 02:33:09.835803: step: 146/463, loss: 0.008631836622953415 2023-01-24 02:33:10.469609: step: 148/463, loss: 0.030618157237768173 2023-01-24 02:33:11.072679: step: 150/463, loss: 0.03173861652612686 2023-01-24 02:33:11.717572: step: 152/463, loss: 0.09096605330705643 2023-01-24 02:33:12.382309: step: 154/463, loss: 0.10002270340919495 2023-01-24 02:33:12.978205: step: 156/463, loss: 0.1705314815044403 2023-01-24 02:33:13.635163: step: 158/463, loss: 0.05439681187272072 2023-01-24 02:33:14.212560: step: 160/463, loss: 0.25841134786605835 2023-01-24 02:33:15.080243: step: 162/463, loss: 0.29883143305778503 2023-01-24 02:33:15.664620: step: 164/463, loss: 0.03439442440867424 2023-01-24 02:33:16.278764: step: 166/463, loss: 0.19055183231830597 2023-01-24 02:33:16.982651: step: 168/463, loss: 0.7458613514900208 2023-01-24 02:33:17.566830: step: 170/463, loss: 0.07055769860744476 2023-01-24 02:33:18.262712: step: 172/463, loss: 0.16610155999660492 2023-01-24 02:33:18.907457: step: 174/463, loss: 0.0035463592503219843 2023-01-24 02:33:19.557024: step: 176/463, loss: 0.09198539704084396 2023-01-24 02:33:20.218687: step: 178/463, loss: 0.18888093531131744 2023-01-24 02:33:20.821896: step: 180/463, loss: 0.11001752316951752 2023-01-24 02:33:21.370776: step: 182/463, loss: 0.024090329185128212 2023-01-24 02:33:22.033138: step: 184/463, loss: 0.03409184515476227 2023-01-24 02:33:22.686513: step: 186/463, loss: 0.058531444519758224 2023-01-24 02:33:23.283011: step: 188/463, loss: 0.12792712450027466 2023-01-24 02:33:23.860909: step: 190/463, loss: 0.04001501202583313 2023-01-24 02:33:24.460896: step: 192/463, loss: 0.3894304633140564 2023-01-24 02:33:25.069198: step: 194/463, loss: 0.0381576232612133 2023-01-24 02:33:25.693337: step: 196/463, loss: 0.1341898888349533 2023-01-24 02:33:26.321942: step: 198/463, loss: 0.6760268211364746 2023-01-24 02:33:26.933647: step: 200/463, loss: 0.031808916479349136 2023-01-24 02:33:27.540674: step: 202/463, loss: 0.05850139260292053 2023-01-24 02:33:28.139779: step: 204/463, loss: 0.05272908881306648 2023-01-24 02:33:28.746748: step: 206/463, loss: 0.01802256517112255 2023-01-24 02:33:29.428792: step: 208/463, loss: 0.07984461635351181 2023-01-24 02:33:30.033830: step: 210/463, loss: 0.06188400089740753 2023-01-24 02:33:30.659500: step: 212/463, loss: 0.02958844043314457 2023-01-24 02:33:31.264379: step: 214/463, loss: 0.10545573383569717 2023-01-24 02:33:31.910421: step: 216/463, loss: 0.01938929222524166 2023-01-24 02:33:32.514661: step: 218/463, loss: 0.06803062558174133 2023-01-24 02:33:33.127698: step: 220/463, loss: 0.007822384126484394 2023-01-24 02:33:33.702248: step: 222/463, loss: 0.13430574536323547 2023-01-24 02:33:34.289961: step: 224/463, loss: 0.035468827933073044 2023-01-24 02:33:34.980740: step: 226/463, loss: 0.1327413022518158 2023-01-24 02:33:35.543734: step: 228/463, loss: 0.09445680677890778 2023-01-24 02:33:36.170329: step: 230/463, loss: 0.19202904403209686 2023-01-24 02:33:36.822097: step: 232/463, loss: 0.061921581625938416 2023-01-24 02:33:37.484406: step: 234/463, loss: 0.031398456543684006 2023-01-24 02:33:38.078233: step: 236/463, loss: 0.02656283788383007 2023-01-24 02:33:38.721624: step: 238/463, loss: 0.06516967713832855 2023-01-24 02:33:39.344235: step: 240/463, loss: 0.02176661416888237 2023-01-24 02:33:39.974802: step: 242/463, loss: 0.022336352616548538 2023-01-24 02:33:40.591903: step: 244/463, loss: 0.24535243213176727 2023-01-24 02:33:41.227749: step: 246/463, loss: 0.054133132100105286 2023-01-24 02:33:41.856789: step: 248/463, loss: 0.15670238435268402 2023-01-24 02:33:42.486626: step: 250/463, loss: 0.07044002413749695 2023-01-24 02:33:43.082929: step: 252/463, loss: 0.08846363425254822 2023-01-24 02:33:43.678734: step: 254/463, loss: 0.0777878537774086 2023-01-24 02:33:44.252507: step: 256/463, loss: 0.07350872457027435 2023-01-24 02:33:44.867418: step: 258/463, loss: 0.08348819613456726 2023-01-24 02:33:45.407986: step: 260/463, loss: 0.022751731798052788 2023-01-24 02:33:46.022153: step: 262/463, loss: 0.133905827999115 2023-01-24 02:33:46.656368: step: 264/463, loss: 0.03884967043995857 2023-01-24 02:33:47.307604: step: 266/463, loss: 0.02001109905540943 2023-01-24 02:33:47.922833: step: 268/463, loss: 0.011482705362141132 2023-01-24 02:33:48.568794: step: 270/463, loss: 0.3049876391887665 2023-01-24 02:33:49.250529: step: 272/463, loss: 0.12228597700595856 2023-01-24 02:33:49.829094: step: 274/463, loss: 0.03657298907637596 2023-01-24 02:33:50.525675: step: 276/463, loss: 0.07017376273870468 2023-01-24 02:33:51.211921: step: 278/463, loss: 0.016814880073070526 2023-01-24 02:33:51.790001: step: 280/463, loss: 0.01963457092642784 2023-01-24 02:33:52.381787: step: 282/463, loss: 0.04889999330043793 2023-01-24 02:33:53.077571: step: 284/463, loss: 0.05324851721525192 2023-01-24 02:33:53.816512: step: 286/463, loss: 0.0632835403084755 2023-01-24 02:33:54.444209: step: 288/463, loss: 0.31622806191444397 2023-01-24 02:33:55.055905: step: 290/463, loss: 0.02735239267349243 2023-01-24 02:33:55.714331: step: 292/463, loss: 0.09889932721853256 2023-01-24 02:33:56.348840: step: 294/463, loss: 0.04951997101306915 2023-01-24 02:33:56.960003: step: 296/463, loss: 0.0054313987493515015 2023-01-24 02:33:57.544807: step: 298/463, loss: 0.13621781766414642 2023-01-24 02:33:58.122467: step: 300/463, loss: 0.012562744319438934 2023-01-24 02:33:58.719856: step: 302/463, loss: 0.255750447511673 2023-01-24 02:33:59.482338: step: 304/463, loss: 0.09218160063028336 2023-01-24 02:34:00.236738: step: 306/463, loss: 0.02507704868912697 2023-01-24 02:34:00.903262: step: 308/463, loss: 0.039742760360240936 2023-01-24 02:34:01.487059: step: 310/463, loss: 0.03173178434371948 2023-01-24 02:34:02.079741: step: 312/463, loss: 0.08574745804071426 2023-01-24 02:34:02.746176: step: 314/463, loss: 0.06955710053443909 2023-01-24 02:34:03.418768: step: 316/463, loss: 0.061920735985040665 2023-01-24 02:34:04.020439: step: 318/463, loss: 0.029942195862531662 2023-01-24 02:34:04.556264: step: 320/463, loss: 0.049738869071006775 2023-01-24 02:34:05.247736: step: 322/463, loss: 0.11598402261734009 2023-01-24 02:34:05.809099: step: 324/463, loss: 0.031032908707857132 2023-01-24 02:34:06.461928: step: 326/463, loss: 0.2690066397190094 2023-01-24 02:34:07.029604: step: 328/463, loss: 0.01738431490957737 2023-01-24 02:34:07.628504: step: 330/463, loss: 0.07562378793954849 2023-01-24 02:34:08.238564: step: 332/463, loss: 0.021284054964780807 2023-01-24 02:34:08.912022: step: 334/463, loss: 0.1342237889766693 2023-01-24 02:34:09.579913: step: 336/463, loss: 0.034867364913225174 2023-01-24 02:34:10.228309: step: 338/463, loss: 0.16932062804698944 2023-01-24 02:34:10.842928: step: 340/463, loss: 0.027970703318715096 2023-01-24 02:34:11.489933: step: 342/463, loss: 0.045921023935079575 2023-01-24 02:34:12.181078: step: 344/463, loss: 0.061698418110609055 2023-01-24 02:34:12.793841: step: 346/463, loss: 0.14280752837657928 2023-01-24 02:34:13.392498: step: 348/463, loss: 0.07568220794200897 2023-01-24 02:34:13.990155: step: 350/463, loss: 0.07714620977640152 2023-01-24 02:34:14.578922: step: 352/463, loss: 0.03765660524368286 2023-01-24 02:34:15.261479: step: 354/463, loss: 0.02231885865330696 2023-01-24 02:34:15.879914: step: 356/463, loss: 0.05618496239185333 2023-01-24 02:34:16.453054: step: 358/463, loss: 0.023203175514936447 2023-01-24 02:34:17.036240: step: 360/463, loss: 0.01189499069005251 2023-01-24 02:34:17.630467: step: 362/463, loss: 0.05973991006612778 2023-01-24 02:34:18.184100: step: 364/463, loss: 0.023440046235919 2023-01-24 02:34:18.748054: step: 366/463, loss: 0.06703011691570282 2023-01-24 02:34:19.479166: step: 368/463, loss: 0.05737769231200218 2023-01-24 02:34:20.049070: step: 370/463, loss: 0.07963032275438309 2023-01-24 02:34:20.625037: step: 372/463, loss: 0.07609833031892776 2023-01-24 02:34:21.310816: step: 374/463, loss: 0.01505844946950674 2023-01-24 02:34:21.975805: step: 376/463, loss: 0.03988732025027275 2023-01-24 02:34:22.545021: step: 378/463, loss: 0.02279466763138771 2023-01-24 02:34:23.166359: step: 380/463, loss: 0.05537089705467224 2023-01-24 02:34:23.785124: step: 382/463, loss: 0.11076018959283829 2023-01-24 02:34:24.342310: step: 384/463, loss: 0.04419126361608505 2023-01-24 02:34:24.993688: step: 386/463, loss: 0.07816631346940994 2023-01-24 02:34:25.590162: step: 388/463, loss: 0.0680868849158287 2023-01-24 02:34:26.279441: step: 390/463, loss: 0.05570054054260254 2023-01-24 02:34:26.917227: step: 392/463, loss: 0.07119104266166687 2023-01-24 02:34:27.522240: step: 394/463, loss: 0.07600091397762299 2023-01-24 02:34:28.203393: step: 396/463, loss: 0.04519832879304886 2023-01-24 02:34:28.829803: step: 398/463, loss: 0.06042061373591423 2023-01-24 02:34:29.398508: step: 400/463, loss: 0.025133123621344566 2023-01-24 02:34:29.941390: step: 402/463, loss: 0.04175051674246788 2023-01-24 02:34:30.581936: step: 404/463, loss: 0.12120967358350754 2023-01-24 02:34:31.154594: step: 406/463, loss: 0.022938016802072525 2023-01-24 02:34:31.860899: step: 408/463, loss: 0.30307725071907043 2023-01-24 02:34:32.416078: step: 410/463, loss: 0.169505313038826 2023-01-24 02:34:33.035524: step: 412/463, loss: 0.0359751358628273 2023-01-24 02:34:33.611408: step: 414/463, loss: 0.08587028831243515 2023-01-24 02:34:34.263316: step: 416/463, loss: 0.04293084144592285 2023-01-24 02:34:34.876161: step: 418/463, loss: 0.06693749129772186 2023-01-24 02:34:35.459744: step: 420/463, loss: 0.01470076572149992 2023-01-24 02:34:36.097689: step: 422/463, loss: 0.12186209857463837 2023-01-24 02:34:36.753115: step: 424/463, loss: 0.06544449180364609 2023-01-24 02:34:37.333206: step: 426/463, loss: 0.056250739842653275 2023-01-24 02:34:37.970984: step: 428/463, loss: 0.03437524288892746 2023-01-24 02:34:38.584621: step: 430/463, loss: 0.0354839488863945 2023-01-24 02:34:39.210644: step: 432/463, loss: 0.06988158822059631 2023-01-24 02:34:39.781236: step: 434/463, loss: 0.010853128507733345 2023-01-24 02:34:40.340000: step: 436/463, loss: 0.013711227104067802 2023-01-24 02:34:40.978234: step: 438/463, loss: 0.01762387529015541 2023-01-24 02:34:41.575217: step: 440/463, loss: 0.010208251886069775 2023-01-24 02:34:42.171038: step: 442/463, loss: 0.0454680360853672 2023-01-24 02:34:42.789766: step: 444/463, loss: 0.12070836126804352 2023-01-24 02:34:43.454982: step: 446/463, loss: 0.14142417907714844 2023-01-24 02:34:44.050327: step: 448/463, loss: 0.04261640086770058 2023-01-24 02:34:44.667122: step: 450/463, loss: 0.043217938393354416 2023-01-24 02:34:45.269559: step: 452/463, loss: 0.011621658690273762 2023-01-24 02:34:45.953710: step: 454/463, loss: 0.021213434636592865 2023-01-24 02:34:46.534102: step: 456/463, loss: 0.09116818010807037 2023-01-24 02:34:47.206851: step: 458/463, loss: 0.02029240131378174 2023-01-24 02:34:47.911202: step: 460/463, loss: 0.04831793159246445 2023-01-24 02:34:48.482578: step: 462/463, loss: 0.013079005293548107 2023-01-24 02:34:49.109025: step: 464/463, loss: 0.0548066571354866 2023-01-24 02:34:49.765252: step: 466/463, loss: 0.11882969737052917 2023-01-24 02:34:50.378508: step: 468/463, loss: 0.006007003597915173 2023-01-24 02:34:51.019931: step: 470/463, loss: 0.11424409598112106 2023-01-24 02:34:51.654520: step: 472/463, loss: 0.04134739935398102 2023-01-24 02:34:52.304362: step: 474/463, loss: 0.09170585870742798 2023-01-24 02:34:52.956064: step: 476/463, loss: 0.12008154392242432 2023-01-24 02:34:53.539224: step: 478/463, loss: 0.1369221806526184 2023-01-24 02:34:54.133163: step: 480/463, loss: 0.03309846296906471 2023-01-24 02:34:54.734192: step: 482/463, loss: 0.06507931649684906 2023-01-24 02:34:55.332852: step: 484/463, loss: 0.023631833493709564 2023-01-24 02:34:55.927437: step: 486/463, loss: 0.04850677773356438 2023-01-24 02:34:56.518137: step: 488/463, loss: 0.053245481103658676 2023-01-24 02:34:57.132224: step: 490/463, loss: 0.05617958679795265 2023-01-24 02:34:57.775009: step: 492/463, loss: 0.04027038440108299 2023-01-24 02:34:58.469638: step: 494/463, loss: 0.14176468551158905 2023-01-24 02:34:59.042503: step: 496/463, loss: 0.07842358946800232 2023-01-24 02:34:59.657235: step: 498/463, loss: 0.05717800185084343 2023-01-24 02:35:00.309145: step: 500/463, loss: 0.13985928893089294 2023-01-24 02:35:00.836262: step: 502/463, loss: 0.04344790801405907 2023-01-24 02:35:01.411462: step: 504/463, loss: 0.370912104845047 2023-01-24 02:35:02.022212: step: 506/463, loss: 0.14582957327365875 2023-01-24 02:35:02.656309: step: 508/463, loss: 0.10058252513408661 2023-01-24 02:35:03.252842: step: 510/463, loss: 0.07816660404205322 2023-01-24 02:35:03.831118: step: 512/463, loss: 0.03520788997411728 2023-01-24 02:35:04.466006: step: 514/463, loss: 0.0190377626568079 2023-01-24 02:35:05.130135: step: 516/463, loss: 0.041128091514110565 2023-01-24 02:35:05.759031: step: 518/463, loss: 0.06718459725379944 2023-01-24 02:35:06.428487: step: 520/463, loss: 0.04229677841067314 2023-01-24 02:35:07.024963: step: 522/463, loss: 0.037229619920253754 2023-01-24 02:35:07.628547: step: 524/463, loss: 0.08129183202981949 2023-01-24 02:35:08.243423: step: 526/463, loss: 0.2073834389448166 2023-01-24 02:35:08.854342: step: 528/463, loss: 0.14947672188282013 2023-01-24 02:35:09.556080: step: 530/463, loss: 0.15078558027744293 2023-01-24 02:35:10.164696: step: 532/463, loss: 0.014261019416153431 2023-01-24 02:35:10.818509: step: 534/463, loss: 0.08572380244731903 2023-01-24 02:35:11.433609: step: 536/463, loss: 0.09056472778320312 2023-01-24 02:35:12.118808: step: 538/463, loss: 0.05427723750472069 2023-01-24 02:35:12.752148: step: 540/463, loss: 0.09648444503545761 2023-01-24 02:35:13.428798: step: 542/463, loss: 0.03852071985602379 2023-01-24 02:35:14.014371: step: 544/463, loss: 0.05020419880747795 2023-01-24 02:35:14.600335: step: 546/463, loss: 0.015779076144099236 2023-01-24 02:35:15.241439: step: 548/463, loss: 0.0553361140191555 2023-01-24 02:35:15.810980: step: 550/463, loss: 0.0898786261677742 2023-01-24 02:35:16.398465: step: 552/463, loss: 0.050421036779880524 2023-01-24 02:35:17.037311: step: 554/463, loss: 0.008112074807286263 2023-01-24 02:35:17.666107: step: 556/463, loss: 0.08083385229110718 2023-01-24 02:35:18.294067: step: 558/463, loss: 0.08644532412290573 2023-01-24 02:35:18.873394: step: 560/463, loss: 0.17815184593200684 2023-01-24 02:35:19.535461: step: 562/463, loss: 0.03795737400650978 2023-01-24 02:35:20.207791: step: 564/463, loss: 0.036291301250457764 2023-01-24 02:35:20.843064: step: 566/463, loss: 0.06497196853160858 2023-01-24 02:35:21.509099: step: 568/463, loss: 0.023327374830842018 2023-01-24 02:35:22.118999: step: 570/463, loss: 0.2165520042181015 2023-01-24 02:35:22.768081: step: 572/463, loss: 0.008921490050852299 2023-01-24 02:35:23.415052: step: 574/463, loss: 0.09711457043886185 2023-01-24 02:35:24.033918: step: 576/463, loss: 0.017849082127213478 2023-01-24 02:35:24.614598: step: 578/463, loss: 0.04216998443007469 2023-01-24 02:35:25.169515: step: 580/463, loss: 0.050681985914707184 2023-01-24 02:35:25.813908: step: 582/463, loss: 0.0298304446041584 2023-01-24 02:35:26.402412: step: 584/463, loss: 0.04195041209459305 2023-01-24 02:35:27.047838: step: 586/463, loss: 0.081159807741642 2023-01-24 02:35:27.714242: step: 588/463, loss: 0.03500701114535332 2023-01-24 02:35:28.381166: step: 590/463, loss: 0.07850270718336105 2023-01-24 02:35:29.021332: step: 592/463, loss: 0.036264337599277496 2023-01-24 02:35:29.624665: step: 594/463, loss: 0.06138039380311966 2023-01-24 02:35:30.203157: step: 596/463, loss: 0.029265770688652992 2023-01-24 02:35:30.848890: step: 598/463, loss: 0.020987076684832573 2023-01-24 02:35:31.453959: step: 600/463, loss: 0.0410863496363163 2023-01-24 02:35:32.062282: step: 602/463, loss: 0.1002855971455574 2023-01-24 02:35:32.713550: step: 604/463, loss: 0.027873344719409943 2023-01-24 02:35:33.297571: step: 606/463, loss: 0.11421670019626617 2023-01-24 02:35:33.876129: step: 608/463, loss: 0.2390560805797577 2023-01-24 02:35:34.477171: step: 610/463, loss: 0.031235449016094208 2023-01-24 02:35:35.038190: step: 612/463, loss: 0.01492144726216793 2023-01-24 02:35:35.661279: step: 614/463, loss: 0.39039748907089233 2023-01-24 02:35:36.303360: step: 616/463, loss: 0.7089836597442627 2023-01-24 02:35:36.927514: step: 618/463, loss: 0.4227707087993622 2023-01-24 02:35:37.474989: step: 620/463, loss: 0.07317569106817245 2023-01-24 02:35:38.121672: step: 622/463, loss: 0.0839766263961792 2023-01-24 02:35:38.744253: step: 624/463, loss: 0.04584223031997681 2023-01-24 02:35:39.372647: step: 626/463, loss: 0.07505856454372406 2023-01-24 02:35:39.979429: step: 628/463, loss: 0.013723655603826046 2023-01-24 02:35:40.682004: step: 630/463, loss: 0.7202251553535461 2023-01-24 02:35:41.331316: step: 632/463, loss: 0.08581442385911942 2023-01-24 02:35:41.973407: step: 634/463, loss: 0.000960512668825686 2023-01-24 02:35:42.555334: step: 636/463, loss: 0.048127587884664536 2023-01-24 02:35:43.117530: step: 638/463, loss: 0.035912975668907166 2023-01-24 02:35:43.675960: step: 640/463, loss: 0.3048136234283447 2023-01-24 02:35:44.291382: step: 642/463, loss: 0.1339782327413559 2023-01-24 02:35:44.924480: step: 644/463, loss: 0.09748111665248871 2023-01-24 02:35:45.558832: step: 646/463, loss: 0.023222634568810463 2023-01-24 02:35:46.136190: step: 648/463, loss: 0.05282842367887497 2023-01-24 02:35:46.695934: step: 650/463, loss: 0.008082824759185314 2023-01-24 02:35:47.281952: step: 652/463, loss: 0.048148877918720245 2023-01-24 02:35:47.838547: step: 654/463, loss: 0.13145798444747925 2023-01-24 02:35:48.461702: step: 656/463, loss: 0.09967157244682312 2023-01-24 02:35:49.076191: step: 658/463, loss: 0.038168225437402725 2023-01-24 02:35:49.652673: step: 660/463, loss: 0.0965147316455841 2023-01-24 02:35:50.304759: step: 662/463, loss: 0.07145313173532486 2023-01-24 02:35:50.883731: step: 664/463, loss: 0.06286177784204483 2023-01-24 02:35:51.531157: step: 666/463, loss: 0.01964842528104782 2023-01-24 02:35:52.100951: step: 668/463, loss: 0.011269272305071354 2023-01-24 02:35:52.828732: step: 670/463, loss: 0.11999113857746124 2023-01-24 02:35:53.416211: step: 672/463, loss: 0.1592174470424652 2023-01-24 02:35:54.059992: step: 674/463, loss: 0.13893483579158783 2023-01-24 02:35:54.677901: step: 676/463, loss: 0.23567824065685272 2023-01-24 02:35:55.320592: step: 678/463, loss: 0.12234888970851898 2023-01-24 02:35:55.958248: step: 680/463, loss: 0.07196137309074402 2023-01-24 02:35:56.496316: step: 682/463, loss: 0.4420085549354553 2023-01-24 02:35:57.153907: step: 684/463, loss: 0.03603744134306908 2023-01-24 02:35:57.781107: step: 686/463, loss: 0.35498881340026855 2023-01-24 02:35:58.348891: step: 688/463, loss: 0.06362433731555939 2023-01-24 02:35:58.974559: step: 690/463, loss: 0.061724890023469925 2023-01-24 02:35:59.587986: step: 692/463, loss: 0.06445822864770889 2023-01-24 02:36:00.153449: step: 694/463, loss: 0.05932959169149399 2023-01-24 02:36:00.748459: step: 696/463, loss: 0.15033738315105438 2023-01-24 02:36:01.484740: step: 698/463, loss: 0.08636143803596497 2023-01-24 02:36:02.044965: step: 700/463, loss: 0.11946927011013031 2023-01-24 02:36:02.669886: step: 702/463, loss: 0.2889096736907959 2023-01-24 02:36:03.255880: step: 704/463, loss: 0.16874463856220245 2023-01-24 02:36:03.860728: step: 706/463, loss: 0.051500361412763596 2023-01-24 02:36:04.462461: step: 708/463, loss: 0.05109116807579994 2023-01-24 02:36:05.168744: step: 710/463, loss: 0.2085372656583786 2023-01-24 02:36:05.846145: step: 712/463, loss: 0.03704514354467392 2023-01-24 02:36:06.480757: step: 714/463, loss: 0.13466857373714447 2023-01-24 02:36:07.058040: step: 716/463, loss: 0.1807176172733307 2023-01-24 02:36:07.650655: step: 718/463, loss: 0.17046858370304108 2023-01-24 02:36:08.209982: step: 720/463, loss: 0.007995630614459515 2023-01-24 02:36:08.850384: step: 722/463, loss: 0.1008811891078949 2023-01-24 02:36:09.490747: step: 724/463, loss: 0.02395818941295147 2023-01-24 02:36:10.083789: step: 726/463, loss: 0.01714463159441948 2023-01-24 02:36:10.725400: step: 728/463, loss: 0.11893889307975769 2023-01-24 02:36:11.325057: step: 730/463, loss: 0.15034231543540955 2023-01-24 02:36:11.985056: step: 732/463, loss: 0.01824524998664856 2023-01-24 02:36:12.586769: step: 734/463, loss: 0.042697370052337646 2023-01-24 02:36:13.224988: step: 736/463, loss: 0.422715961933136 2023-01-24 02:36:13.817551: step: 738/463, loss: 0.03376415744423866 2023-01-24 02:36:14.519831: step: 740/463, loss: 0.11297562718391418 2023-01-24 02:36:15.217119: step: 742/463, loss: 0.09425202757120132 2023-01-24 02:36:15.900005: step: 744/463, loss: 0.06407539546489716 2023-01-24 02:36:16.482322: step: 746/463, loss: 0.008910772390663624 2023-01-24 02:36:17.091357: step: 748/463, loss: 0.041763756424188614 2023-01-24 02:36:17.656788: step: 750/463, loss: 1.9369014501571655 2023-01-24 02:36:18.236868: step: 752/463, loss: 0.24664917588233948 2023-01-24 02:36:18.849531: step: 754/463, loss: 0.05473898723721504 2023-01-24 02:36:19.550025: step: 756/463, loss: 0.07380196452140808 2023-01-24 02:36:20.204003: step: 758/463, loss: 0.09692689031362534 2023-01-24 02:36:20.833689: step: 760/463, loss: 0.12395976483821869 2023-01-24 02:36:21.401101: step: 762/463, loss: 0.05682516470551491 2023-01-24 02:36:21.990385: step: 764/463, loss: 0.06350929290056229 2023-01-24 02:36:22.629607: step: 766/463, loss: 0.02421751618385315 2023-01-24 02:36:23.225697: step: 768/463, loss: 0.027995552867650986 2023-01-24 02:36:23.847013: step: 770/463, loss: 0.029817581176757812 2023-01-24 02:36:24.454396: step: 772/463, loss: 0.4331117570400238 2023-01-24 02:36:25.056154: step: 774/463, loss: 0.08772415667772293 2023-01-24 02:36:25.653822: step: 776/463, loss: 0.43814387917518616 2023-01-24 02:36:26.204836: step: 778/463, loss: 0.06586246937513351 2023-01-24 02:36:26.740413: step: 780/463, loss: 0.14400525391101837 2023-01-24 02:36:27.295347: step: 782/463, loss: 0.07666605710983276 2023-01-24 02:36:27.898442: step: 784/463, loss: 0.051273733377456665 2023-01-24 02:36:28.491359: step: 786/463, loss: 0.12104083597660065 2023-01-24 02:36:29.102851: step: 788/463, loss: 0.06918870657682419 2023-01-24 02:36:29.662031: step: 790/463, loss: 0.019210804253816605 2023-01-24 02:36:30.383633: step: 792/463, loss: 0.0370732806622982 2023-01-24 02:36:30.987728: step: 794/463, loss: 0.08835723251104355 2023-01-24 02:36:31.564157: step: 796/463, loss: 1.8714908361434937 2023-01-24 02:36:32.143533: step: 798/463, loss: 0.13103893399238586 2023-01-24 02:36:32.824409: step: 800/463, loss: 0.018155893310904503 2023-01-24 02:36:33.439747: step: 802/463, loss: 0.10668099671602249 2023-01-24 02:36:34.040682: step: 804/463, loss: 0.06346654146909714 2023-01-24 02:36:34.632759: step: 806/463, loss: 0.027522891759872437 2023-01-24 02:36:35.305442: step: 808/463, loss: 0.03650457784533501 2023-01-24 02:36:35.869219: step: 810/463, loss: 0.01802133023738861 2023-01-24 02:36:36.490198: step: 812/463, loss: 0.07952763140201569 2023-01-24 02:36:37.053341: step: 814/463, loss: 0.10670528560876846 2023-01-24 02:36:37.606045: step: 816/463, loss: 0.041709836572408676 2023-01-24 02:36:38.224594: step: 818/463, loss: 0.33363908529281616 2023-01-24 02:36:38.835870: step: 820/463, loss: 0.5890706181526184 2023-01-24 02:36:39.463170: step: 822/463, loss: 0.021159078925848007 2023-01-24 02:36:40.113900: step: 824/463, loss: 0.034833136945962906 2023-01-24 02:36:40.760882: step: 826/463, loss: 0.04707980901002884 2023-01-24 02:36:41.350818: step: 828/463, loss: 0.07694581151008606 2023-01-24 02:36:41.997999: step: 830/463, loss: 0.04444859176874161 2023-01-24 02:36:42.614746: step: 832/463, loss: 0.005036717746406794 2023-01-24 02:36:43.205881: step: 834/463, loss: 0.1377003937959671 2023-01-24 02:36:43.754332: step: 836/463, loss: 0.0704628974199295 2023-01-24 02:36:44.367982: step: 838/463, loss: 0.08279097825288773 2023-01-24 02:36:44.975028: step: 840/463, loss: 0.045990075916051865 2023-01-24 02:36:45.575483: step: 842/463, loss: 0.03581404685974121 2023-01-24 02:36:46.195755: step: 844/463, loss: 0.019937526434659958 2023-01-24 02:36:46.774938: step: 846/463, loss: 0.10041891783475876 2023-01-24 02:36:47.440053: step: 848/463, loss: 0.10559139400720596 2023-01-24 02:36:48.043582: step: 850/463, loss: 0.021949928253889084 2023-01-24 02:36:48.648574: step: 852/463, loss: 0.0410609133541584 2023-01-24 02:36:49.243997: step: 854/463, loss: 0.015931054949760437 2023-01-24 02:36:49.871301: step: 856/463, loss: 0.0982465147972107 2023-01-24 02:36:50.524710: step: 858/463, loss: 0.03595297411084175 2023-01-24 02:36:51.142915: step: 860/463, loss: 3.054748058319092 2023-01-24 02:36:51.746662: step: 862/463, loss: 0.06384914368391037 2023-01-24 02:36:52.469563: step: 864/463, loss: 0.011823407374322414 2023-01-24 02:36:53.037355: step: 866/463, loss: 0.1454210877418518 2023-01-24 02:36:53.631381: step: 868/463, loss: 0.4407978653907776 2023-01-24 02:36:54.153959: step: 870/463, loss: 0.13729149103164673 2023-01-24 02:36:54.771288: step: 872/463, loss: 0.07273836433887482 2023-01-24 02:36:55.353908: step: 874/463, loss: 0.10191710293292999 2023-01-24 02:36:55.927489: step: 876/463, loss: 0.2369750738143921 2023-01-24 02:36:56.570887: step: 878/463, loss: 0.13763144612312317 2023-01-24 02:36:57.215814: step: 880/463, loss: 0.030223777517676353 2023-01-24 02:36:57.846080: step: 882/463, loss: 0.02678307145833969 2023-01-24 02:36:58.469062: step: 884/463, loss: 0.32804879546165466 2023-01-24 02:36:59.022806: step: 886/463, loss: 0.061210762709379196 2023-01-24 02:36:59.581606: step: 888/463, loss: 0.034541208297014236 2023-01-24 02:37:00.230877: step: 890/463, loss: 0.21213310956954956 2023-01-24 02:37:00.843416: step: 892/463, loss: 0.0755695253610611 2023-01-24 02:37:01.465641: step: 894/463, loss: 0.013081223703920841 2023-01-24 02:37:02.135903: step: 896/463, loss: 0.028600936755537987 2023-01-24 02:37:02.722637: step: 898/463, loss: 0.005399456713348627 2023-01-24 02:37:03.352391: step: 900/463, loss: 0.0005356702022254467 2023-01-24 02:37:03.986547: step: 902/463, loss: 0.03229433298110962 2023-01-24 02:37:04.559518: step: 904/463, loss: 0.06504955142736435 2023-01-24 02:37:05.197623: step: 906/463, loss: 0.09735742956399918 2023-01-24 02:37:05.839207: step: 908/463, loss: 0.4849313497543335 2023-01-24 02:37:06.478421: step: 910/463, loss: 0.1784490942955017 2023-01-24 02:37:07.028818: step: 912/463, loss: 0.008699133060872555 2023-01-24 02:37:07.564160: step: 914/463, loss: 0.10033126175403595 2023-01-24 02:37:08.214636: step: 916/463, loss: 0.18439491093158722 2023-01-24 02:37:08.834011: step: 918/463, loss: 0.07299764454364777 2023-01-24 02:37:09.431423: step: 920/463, loss: 0.1074349582195282 2023-01-24 02:37:09.990601: step: 922/463, loss: 0.02622360549867153 2023-01-24 02:37:10.577674: step: 924/463, loss: 0.015146640129387379 2023-01-24 02:37:11.178351: step: 926/463, loss: 0.11928309500217438 ================================================== Loss: 0.110 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3232468467233342, 'r': 0.31956661696936833, 'f1': 0.3213961968375136}, 'combined': 0.23681825030132578, 'epoch': 19} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3505913085657442, 'r': 0.305008319930233, 'f1': 0.32621515132062245}, 'combined': 0.22949809640646807, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32056513012181614, 'r': 0.313874017348875, 'f1': 0.3171842898233118}, 'combined': 0.2337147398698087, 'epoch': 19} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3551471230148429, 'r': 0.3027737689227762, 'f1': 0.32687587299623316}, 'combined': 0.23208186982732554, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3347862073823612, 'r': 0.3220808484684196, 'f1': 0.32831065211384364}, 'combined': 0.24191311208388477, 'epoch': 19} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36907956755256294, 'r': 0.2959083345093911, 'f1': 0.3284682918208946}, 'combined': 0.23321248719283516, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23529411764705882, 'r': 0.22857142857142856, 'f1': 0.23188405797101447}, 'combined': 0.1545893719806763, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24107142857142858, 'r': 0.29347826086956524, 'f1': 0.2647058823529412}, 'combined': 0.1323529411764706, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3076923076923077, 'r': 0.13793103448275862, 'f1': 0.1904761904761905}, 'combined': 0.12698412698412698, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:39:42.974684: step: 2/463, loss: 0.13108888268470764 2023-01-24 02:39:43.611181: step: 4/463, loss: 0.007744430098682642 2023-01-24 02:39:44.183532: step: 6/463, loss: 0.04112390801310539 2023-01-24 02:39:44.777102: step: 8/463, loss: 1.0136065483093262 2023-01-24 02:39:45.418023: step: 10/463, loss: 0.005063914228230715 2023-01-24 02:39:46.050142: step: 12/463, loss: 0.09696917980909348 2023-01-24 02:39:46.699589: step: 14/463, loss: 2.410466432571411 2023-01-24 02:39:47.346375: step: 16/463, loss: 0.06648506969213486 2023-01-24 02:39:47.987627: step: 18/463, loss: 0.03352315351366997 2023-01-24 02:39:48.551161: step: 20/463, loss: 0.03659912943840027 2023-01-24 02:39:49.137687: step: 22/463, loss: 0.044807806611061096 2023-01-24 02:39:49.817647: step: 24/463, loss: 0.08430596441030502 2023-01-24 02:39:50.481968: step: 26/463, loss: 0.19304130971431732 2023-01-24 02:39:51.140806: step: 28/463, loss: 0.07004347443580627 2023-01-24 02:39:51.776170: step: 30/463, loss: 0.09909825026988983 2023-01-24 02:39:52.457829: step: 32/463, loss: 0.08207787573337555 2023-01-24 02:39:53.034195: step: 34/463, loss: 0.03129976615309715 2023-01-24 02:39:53.693314: step: 36/463, loss: 0.14356175065040588 2023-01-24 02:39:54.386174: step: 38/463, loss: 0.05557386204600334 2023-01-24 02:39:54.996989: step: 40/463, loss: 0.1078815907239914 2023-01-24 02:39:55.632907: step: 42/463, loss: 0.2008986473083496 2023-01-24 02:39:56.221740: step: 44/463, loss: 0.050663627684116364 2023-01-24 02:39:56.816649: step: 46/463, loss: 0.011423388496041298 2023-01-24 02:39:57.386852: step: 48/463, loss: 0.016239482909440994 2023-01-24 02:39:58.106529: step: 50/463, loss: 0.21427732706069946 2023-01-24 02:39:58.692021: step: 52/463, loss: 0.003945622127503157 2023-01-24 02:39:59.288257: step: 54/463, loss: 0.21888144314289093 2023-01-24 02:39:59.907247: step: 56/463, loss: 0.03560647368431091 2023-01-24 02:40:00.548303: step: 58/463, loss: 0.15679240226745605 2023-01-24 02:40:01.163947: step: 60/463, loss: 0.021058719605207443 2023-01-24 02:40:01.762325: step: 62/463, loss: 0.013887453824281693 2023-01-24 02:40:02.372122: step: 64/463, loss: 0.00923172663897276 2023-01-24 02:40:02.954543: step: 66/463, loss: 0.06406152248382568 2023-01-24 02:40:03.636813: step: 68/463, loss: 0.1123422235250473 2023-01-24 02:40:04.242672: step: 70/463, loss: 0.1009870320558548 2023-01-24 02:40:04.903227: step: 72/463, loss: 0.0788048729300499 2023-01-24 02:40:05.478436: step: 74/463, loss: 0.1209428533911705 2023-01-24 02:40:06.036762: step: 76/463, loss: 0.041499435901641846 2023-01-24 02:40:06.593926: step: 78/463, loss: 0.09040295332670212 2023-01-24 02:40:07.187688: step: 80/463, loss: 0.05383515730500221 2023-01-24 02:40:07.789633: step: 82/463, loss: 0.006640410982072353 2023-01-24 02:40:08.357573: step: 84/463, loss: 0.10898908227682114 2023-01-24 02:40:08.987044: step: 86/463, loss: 0.1159944087266922 2023-01-24 02:40:09.603439: step: 88/463, loss: 0.0039574988186359406 2023-01-24 02:40:10.171258: step: 90/463, loss: 0.25417351722717285 2023-01-24 02:40:10.732928: step: 92/463, loss: 0.02369525469839573 2023-01-24 02:40:11.329364: step: 94/463, loss: 0.051845647394657135 2023-01-24 02:40:11.944874: step: 96/463, loss: 0.008575499057769775 2023-01-24 02:40:12.573339: step: 98/463, loss: 0.45369023084640503 2023-01-24 02:40:13.183894: step: 100/463, loss: 0.07007813453674316 2023-01-24 02:40:13.782050: step: 102/463, loss: 0.43530306220054626 2023-01-24 02:40:14.460192: step: 104/463, loss: 0.17113350331783295 2023-01-24 02:40:15.088717: step: 106/463, loss: 0.4750254452228546 2023-01-24 02:40:15.718239: step: 108/463, loss: 0.05123943090438843 2023-01-24 02:40:16.348959: step: 110/463, loss: 0.006551268044859171 2023-01-24 02:40:16.916501: step: 112/463, loss: 0.03674391284584999 2023-01-24 02:40:17.498366: step: 114/463, loss: 0.890353798866272 2023-01-24 02:40:18.131533: step: 116/463, loss: 0.09334063529968262 2023-01-24 02:40:18.720354: step: 118/463, loss: 0.03126668184995651 2023-01-24 02:40:19.350128: step: 120/463, loss: 0.1091681718826294 2023-01-24 02:40:19.932248: step: 122/463, loss: 0.06366941332817078 2023-01-24 02:40:20.558585: step: 124/463, loss: 0.01213535200804472 2023-01-24 02:40:21.183171: step: 126/463, loss: 0.02465950697660446 2023-01-24 02:40:21.784251: step: 128/463, loss: 0.03635284677147865 2023-01-24 02:40:22.369344: step: 130/463, loss: 0.06686977297067642 2023-01-24 02:40:22.938432: step: 132/463, loss: 0.03539236634969711 2023-01-24 02:40:23.506506: step: 134/463, loss: 0.06202876195311546 2023-01-24 02:40:24.130832: step: 136/463, loss: 0.06992828845977783 2023-01-24 02:40:24.762038: step: 138/463, loss: 0.07149277627468109 2023-01-24 02:40:25.379773: step: 140/463, loss: 0.0011162133887410164 2023-01-24 02:40:25.941922: step: 142/463, loss: 0.046798527240753174 2023-01-24 02:40:26.564756: step: 144/463, loss: 0.045043595135211945 2023-01-24 02:40:27.095624: step: 146/463, loss: 0.008591432124376297 2023-01-24 02:40:27.651905: step: 148/463, loss: 0.047459784895181656 2023-01-24 02:40:28.234245: step: 150/463, loss: 0.18178406357765198 2023-01-24 02:40:28.911706: step: 152/463, loss: 0.021565545350313187 2023-01-24 02:40:29.486258: step: 154/463, loss: 0.021964237093925476 2023-01-24 02:40:30.112528: step: 156/463, loss: 0.026930946856737137 2023-01-24 02:40:30.712008: step: 158/463, loss: 0.0404137447476387 2023-01-24 02:40:31.387582: step: 160/463, loss: 0.04967912286520004 2023-01-24 02:40:32.026469: step: 162/463, loss: 0.15527154505252838 2023-01-24 02:40:32.586698: step: 164/463, loss: 0.02418016456067562 2023-01-24 02:40:33.193476: step: 166/463, loss: 0.10544124245643616 2023-01-24 02:40:33.799126: step: 168/463, loss: 0.03524501621723175 2023-01-24 02:40:34.358106: step: 170/463, loss: 0.010375584475696087 2023-01-24 02:40:34.924225: step: 172/463, loss: 0.020366376265883446 2023-01-24 02:40:35.540326: step: 174/463, loss: 0.29529502987861633 2023-01-24 02:40:36.195306: step: 176/463, loss: 0.028343264013528824 2023-01-24 02:40:36.814692: step: 178/463, loss: 0.01547433715313673 2023-01-24 02:40:37.364558: step: 180/463, loss: 0.19547009468078613 2023-01-24 02:40:37.973189: step: 182/463, loss: 0.23442836105823517 2023-01-24 02:40:38.598104: step: 184/463, loss: 0.012127815745770931 2023-01-24 02:40:39.203005: step: 186/463, loss: 0.07039374113082886 2023-01-24 02:40:39.790300: step: 188/463, loss: 0.004310412332415581 2023-01-24 02:40:40.348842: step: 190/463, loss: 0.06011488288640976 2023-01-24 02:40:41.019503: step: 192/463, loss: 0.27764615416526794 2023-01-24 02:40:41.574714: step: 194/463, loss: 0.02298612892627716 2023-01-24 02:40:42.168253: step: 196/463, loss: 0.4246273338794708 2023-01-24 02:40:42.770101: step: 198/463, loss: 0.10619235783815384 2023-01-24 02:40:43.369755: step: 200/463, loss: 0.01365176122635603 2023-01-24 02:40:43.915346: step: 202/463, loss: 0.017292262986302376 2023-01-24 02:40:44.573241: step: 204/463, loss: 0.1256209909915924 2023-01-24 02:40:45.216814: step: 206/463, loss: 0.22187282145023346 2023-01-24 02:40:45.795980: step: 208/463, loss: 0.08835422247648239 2023-01-24 02:40:46.373393: step: 210/463, loss: 0.04758817330002785 2023-01-24 02:40:46.972852: step: 212/463, loss: 0.015825502574443817 2023-01-24 02:40:47.551416: step: 214/463, loss: 0.023752372711896896 2023-01-24 02:40:48.157241: step: 216/463, loss: 0.04566870629787445 2023-01-24 02:40:48.726817: step: 218/463, loss: 0.02901461347937584 2023-01-24 02:40:49.339387: step: 220/463, loss: 0.012823263183236122 2023-01-24 02:40:49.940650: step: 222/463, loss: 0.10892003774642944 2023-01-24 02:40:50.632983: step: 224/463, loss: 0.6869062781333923 2023-01-24 02:40:51.182208: step: 226/463, loss: 0.05883302167057991 2023-01-24 02:40:51.816322: step: 228/463, loss: 0.04543779417872429 2023-01-24 02:40:52.447612: step: 230/463, loss: 0.015544440597295761 2023-01-24 02:40:53.038921: step: 232/463, loss: 0.05624644085764885 2023-01-24 02:40:53.631277: step: 234/463, loss: 0.0899229347705841 2023-01-24 02:40:54.331076: step: 236/463, loss: 0.06731760501861572 2023-01-24 02:40:54.905057: step: 238/463, loss: 0.08191238343715668 2023-01-24 02:40:55.627945: step: 240/463, loss: 0.0425315760076046 2023-01-24 02:40:56.267476: step: 242/463, loss: 0.014066780917346478 2023-01-24 02:40:56.866621: step: 244/463, loss: 0.07018076628446579 2023-01-24 02:40:57.445642: step: 246/463, loss: 0.09150727838277817 2023-01-24 02:40:58.068850: step: 248/463, loss: 0.07547071576118469 2023-01-24 02:40:58.633114: step: 250/463, loss: 0.09766188263893127 2023-01-24 02:40:59.243896: step: 252/463, loss: 0.10057984292507172 2023-01-24 02:40:59.803105: step: 254/463, loss: 0.04396006092429161 2023-01-24 02:41:00.387349: step: 256/463, loss: 0.029547078534960747 2023-01-24 02:41:00.983024: step: 258/463, loss: 0.09830475598573685 2023-01-24 02:41:01.581075: step: 260/463, loss: 0.01698497124016285 2023-01-24 02:41:02.174531: step: 262/463, loss: 0.030634604394435883 2023-01-24 02:41:02.752423: step: 264/463, loss: 0.09152662009000778 2023-01-24 02:41:03.389668: step: 266/463, loss: 0.7158637046813965 2023-01-24 02:41:04.085928: step: 268/463, loss: 0.0339268334209919 2023-01-24 02:41:04.697655: step: 270/463, loss: 0.06865404546260834 2023-01-24 02:41:05.322104: step: 272/463, loss: 0.012598121538758278 2023-01-24 02:41:05.932426: step: 274/463, loss: 0.06606987118721008 2023-01-24 02:41:06.532751: step: 276/463, loss: 0.03861470893025398 2023-01-24 02:41:07.120838: step: 278/463, loss: 0.07627788186073303 2023-01-24 02:41:07.728465: step: 280/463, loss: 0.050848301500082016 2023-01-24 02:41:08.309557: step: 282/463, loss: 0.043009448796510696 2023-01-24 02:41:08.898896: step: 284/463, loss: 0.0773545429110527 2023-01-24 02:41:09.429303: step: 286/463, loss: 0.03020893782377243 2023-01-24 02:41:10.032561: step: 288/463, loss: 0.07331472635269165 2023-01-24 02:41:10.640974: step: 290/463, loss: 0.131510928273201 2023-01-24 02:41:11.249501: step: 292/463, loss: 0.08709429949522018 2023-01-24 02:41:11.838429: step: 294/463, loss: 0.013114800676703453 2023-01-24 02:41:12.470686: step: 296/463, loss: 0.024493617936968803 2023-01-24 02:41:13.073026: step: 298/463, loss: 0.14748232066631317 2023-01-24 02:41:13.703666: step: 300/463, loss: 0.04636673256754875 2023-01-24 02:41:14.345784: step: 302/463, loss: 0.7277219295501709 2023-01-24 02:41:14.926155: step: 304/463, loss: 0.015047241933643818 2023-01-24 02:41:15.533443: step: 306/463, loss: 0.4223773181438446 2023-01-24 02:41:16.127377: step: 308/463, loss: 0.01533450372517109 2023-01-24 02:41:16.739831: step: 310/463, loss: 0.03066941164433956 2023-01-24 02:41:17.342119: step: 312/463, loss: 0.05414155125617981 2023-01-24 02:41:17.932411: step: 314/463, loss: 0.04685007035732269 2023-01-24 02:41:18.541325: step: 316/463, loss: 0.012041223235428333 2023-01-24 02:41:19.185188: step: 318/463, loss: 0.2921014130115509 2023-01-24 02:41:19.820592: step: 320/463, loss: 0.04580307379364967 2023-01-24 02:41:20.432809: step: 322/463, loss: 0.0015697299968451262 2023-01-24 02:41:21.043411: step: 324/463, loss: 0.03859100490808487 2023-01-24 02:41:21.598044: step: 326/463, loss: 0.04205000400543213 2023-01-24 02:41:22.221273: step: 328/463, loss: 0.16374894976615906 2023-01-24 02:41:22.784558: step: 330/463, loss: 0.05210031941533089 2023-01-24 02:41:23.370606: step: 332/463, loss: 0.025535326451063156 2023-01-24 02:41:23.931013: step: 334/463, loss: 0.01757572963833809 2023-01-24 02:41:24.555970: step: 336/463, loss: 0.042950164526700974 2023-01-24 02:41:25.186807: step: 338/463, loss: 0.06796921044588089 2023-01-24 02:41:25.727526: step: 340/463, loss: 0.08607564121484756 2023-01-24 02:41:26.344995: step: 342/463, loss: 0.029710374772548676 2023-01-24 02:41:26.973829: step: 344/463, loss: 0.018487241119146347 2023-01-24 02:41:27.692288: step: 346/463, loss: 0.34796085953712463 2023-01-24 02:41:28.311523: step: 348/463, loss: 0.12820908427238464 2023-01-24 02:41:28.993929: step: 350/463, loss: 0.08360454440116882 2023-01-24 02:41:29.585567: step: 352/463, loss: 0.013252809643745422 2023-01-24 02:41:30.237478: step: 354/463, loss: 0.034033384174108505 2023-01-24 02:41:30.840743: step: 356/463, loss: 0.014914345927536488 2023-01-24 02:41:31.455066: step: 358/463, loss: 0.019612206146121025 2023-01-24 02:41:32.066064: step: 360/463, loss: 0.002982083708047867 2023-01-24 02:41:32.721102: step: 362/463, loss: 0.05039303004741669 2023-01-24 02:41:33.324810: step: 364/463, loss: 0.025120878592133522 2023-01-24 02:41:33.989486: step: 366/463, loss: 0.0387108288705349 2023-01-24 02:41:34.553279: step: 368/463, loss: 0.033777158707380295 2023-01-24 02:41:35.144300: step: 370/463, loss: 0.09976933151483536 2023-01-24 02:41:35.793951: step: 372/463, loss: 0.05051356181502342 2023-01-24 02:41:36.381845: step: 374/463, loss: 0.035390209406614304 2023-01-24 02:41:36.981113: step: 376/463, loss: 0.00826394185423851 2023-01-24 02:41:37.576339: step: 378/463, loss: 1.9432106018066406 2023-01-24 02:41:38.231600: step: 380/463, loss: 0.07122845947742462 2023-01-24 02:41:38.842769: step: 382/463, loss: 0.044987645000219345 2023-01-24 02:41:39.462902: step: 384/463, loss: 0.021774208173155785 2023-01-24 02:41:40.096700: step: 386/463, loss: 0.08198747783899307 2023-01-24 02:41:40.689104: step: 388/463, loss: 0.05349550023674965 2023-01-24 02:41:41.287719: step: 390/463, loss: 0.049013759940862656 2023-01-24 02:41:41.892468: step: 392/463, loss: 1.3238413333892822 2023-01-24 02:41:42.540924: step: 394/463, loss: 0.14086422324180603 2023-01-24 02:41:43.128982: step: 396/463, loss: 0.014744400978088379 2023-01-24 02:41:43.698882: step: 398/463, loss: 0.057977937161922455 2023-01-24 02:41:44.348117: step: 400/463, loss: 0.02441348135471344 2023-01-24 02:41:44.930072: step: 402/463, loss: 0.2218967080116272 2023-01-24 02:41:45.539556: step: 404/463, loss: 0.13170303404331207 2023-01-24 02:41:46.175064: step: 406/463, loss: 0.16127349436283112 2023-01-24 02:41:46.818326: step: 408/463, loss: 0.18552856147289276 2023-01-24 02:41:47.326522: step: 410/463, loss: 0.010025545954704285 2023-01-24 02:41:47.890946: step: 412/463, loss: 0.013762307353317738 2023-01-24 02:41:48.478100: step: 414/463, loss: 0.015593850053846836 2023-01-24 02:41:49.111889: step: 416/463, loss: 0.021236173808574677 2023-01-24 02:41:49.768205: step: 418/463, loss: 0.05681182071566582 2023-01-24 02:41:50.392388: step: 420/463, loss: 0.051978472620248795 2023-01-24 02:41:51.042305: step: 422/463, loss: 0.042125970125198364 2023-01-24 02:41:51.586842: step: 424/463, loss: 0.06367136538028717 2023-01-24 02:41:52.185167: step: 426/463, loss: 0.30084607005119324 2023-01-24 02:41:52.821407: step: 428/463, loss: 0.05489318072795868 2023-01-24 02:41:53.355196: step: 430/463, loss: 0.035814009606838226 2023-01-24 02:41:53.953936: step: 432/463, loss: 0.073130764067173 2023-01-24 02:41:54.593497: step: 434/463, loss: 0.015217277221381664 2023-01-24 02:41:55.172552: step: 436/463, loss: 0.05580509454011917 2023-01-24 02:41:55.828456: step: 438/463, loss: 0.04246161878108978 2023-01-24 02:41:56.430313: step: 440/463, loss: 0.018206318840384483 2023-01-24 02:41:57.064467: step: 442/463, loss: 0.038009125739336014 2023-01-24 02:41:57.684858: step: 444/463, loss: 0.045807309448719025 2023-01-24 02:41:58.258582: step: 446/463, loss: 0.0009162528440356255 2023-01-24 02:41:58.859705: step: 448/463, loss: 0.03169405832886696 2023-01-24 02:41:59.548363: step: 450/463, loss: 0.26710906624794006 2023-01-24 02:42:00.186357: step: 452/463, loss: 0.06173240765929222 2023-01-24 02:42:00.854882: step: 454/463, loss: 0.02830238826572895 2023-01-24 02:42:01.486225: step: 456/463, loss: 0.08217926323413849 2023-01-24 02:42:02.070676: step: 458/463, loss: 0.07195863127708435 2023-01-24 02:42:02.735648: step: 460/463, loss: 0.06001008301973343 2023-01-24 02:42:03.301230: step: 462/463, loss: 0.005454337224364281 2023-01-24 02:42:03.872083: step: 464/463, loss: 0.025255750864744186 2023-01-24 02:42:04.556833: step: 466/463, loss: 0.043913133442401886 2023-01-24 02:42:05.194484: step: 468/463, loss: 0.11013973504304886 2023-01-24 02:42:05.805914: step: 470/463, loss: 0.014288020320236683 2023-01-24 02:42:06.418492: step: 472/463, loss: 0.03777780756354332 2023-01-24 02:42:07.063976: step: 474/463, loss: 0.03862384706735611 2023-01-24 02:42:07.714153: step: 476/463, loss: 0.031083086505532265 2023-01-24 02:42:08.316745: step: 478/463, loss: 0.47147080302238464 2023-01-24 02:42:08.950786: step: 480/463, loss: 0.08347045630216599 2023-01-24 02:42:09.587573: step: 482/463, loss: 0.012621729634702206 2023-01-24 02:42:10.236310: step: 484/463, loss: 0.01439143531024456 2023-01-24 02:42:10.839680: step: 486/463, loss: 0.033606261014938354 2023-01-24 02:42:11.420177: step: 488/463, loss: 0.03604334592819214 2023-01-24 02:42:12.040443: step: 490/463, loss: 0.06880361586809158 2023-01-24 02:42:12.708409: step: 492/463, loss: 0.011974734254181385 2023-01-24 02:42:13.323651: step: 494/463, loss: 0.10388503223657608 2023-01-24 02:42:13.940248: step: 496/463, loss: 0.05729467421770096 2023-01-24 02:42:14.550479: step: 498/463, loss: 0.09987930208444595 2023-01-24 02:42:15.164980: step: 500/463, loss: 0.028818931430578232 2023-01-24 02:42:15.780007: step: 502/463, loss: 0.10456182807683945 2023-01-24 02:42:16.397604: step: 504/463, loss: 0.12826204299926758 2023-01-24 02:42:17.032094: step: 506/463, loss: 0.0219345074146986 2023-01-24 02:42:17.645203: step: 508/463, loss: 0.13756239414215088 2023-01-24 02:42:18.213961: step: 510/463, loss: 0.05208529531955719 2023-01-24 02:42:18.861686: step: 512/463, loss: 0.027840791270136833 2023-01-24 02:42:19.459351: step: 514/463, loss: 0.007644210010766983 2023-01-24 02:42:19.977018: step: 516/463, loss: 0.06095527485013008 2023-01-24 02:42:20.749286: step: 518/463, loss: 0.297176331281662 2023-01-24 02:42:21.369709: step: 520/463, loss: 0.04855719208717346 2023-01-24 02:42:21.943391: step: 522/463, loss: 0.023118440061807632 2023-01-24 02:42:22.490237: step: 524/463, loss: 0.14092254638671875 2023-01-24 02:42:23.108304: step: 526/463, loss: 0.05067698284983635 2023-01-24 02:42:23.693708: step: 528/463, loss: 0.37454336881637573 2023-01-24 02:42:24.268143: step: 530/463, loss: 0.0003124147478956729 2023-01-24 02:42:24.935933: step: 532/463, loss: 0.06446278840303421 2023-01-24 02:42:25.560891: step: 534/463, loss: 0.061855874955654144 2023-01-24 02:42:26.144736: step: 536/463, loss: 0.06842956691980362 2023-01-24 02:42:26.818197: step: 538/463, loss: 0.1182306557893753 2023-01-24 02:42:27.540078: step: 540/463, loss: 0.09114644676446915 2023-01-24 02:42:28.157912: step: 542/463, loss: 0.05039096996188164 2023-01-24 02:42:28.769985: step: 544/463, loss: 0.05894893780350685 2023-01-24 02:42:29.340466: step: 546/463, loss: 0.27761387825012207 2023-01-24 02:42:29.884712: step: 548/463, loss: 0.045422594994306564 2023-01-24 02:42:30.442215: step: 550/463, loss: 0.04738742858171463 2023-01-24 02:42:31.061510: step: 552/463, loss: 0.050242166966199875 2023-01-24 02:42:31.674599: step: 554/463, loss: 0.06943824142217636 2023-01-24 02:42:32.235722: step: 556/463, loss: 0.1211751252412796 2023-01-24 02:42:32.917636: step: 558/463, loss: 0.012478468008339405 2023-01-24 02:42:33.545097: step: 560/463, loss: 0.03552207723259926 2023-01-24 02:42:34.192766: step: 562/463, loss: 0.05879603326320648 2023-01-24 02:42:34.771835: step: 564/463, loss: 0.08543559163808823 2023-01-24 02:42:35.387213: step: 566/463, loss: 0.014799576252698898 2023-01-24 02:42:36.038811: step: 568/463, loss: 0.11024191230535507 2023-01-24 02:42:36.698616: step: 570/463, loss: 0.007236973848193884 2023-01-24 02:42:37.302011: step: 572/463, loss: 0.037508539855480194 2023-01-24 02:42:37.939532: step: 574/463, loss: 0.08413052558898926 2023-01-24 02:42:38.629721: step: 576/463, loss: 0.007277923636138439 2023-01-24 02:42:39.271222: step: 578/463, loss: 0.05028689280152321 2023-01-24 02:42:39.866585: step: 580/463, loss: 0.04847828298807144 2023-01-24 02:42:40.488116: step: 582/463, loss: 0.0575786791741848 2023-01-24 02:42:41.141755: step: 584/463, loss: 0.018657485023140907 2023-01-24 02:42:41.837705: step: 586/463, loss: 0.07365526258945465 2023-01-24 02:42:42.461056: step: 588/463, loss: 0.041060928255319595 2023-01-24 02:42:43.113000: step: 590/463, loss: 0.027042098343372345 2023-01-24 02:42:43.707453: step: 592/463, loss: 0.02319258451461792 2023-01-24 02:42:44.393803: step: 594/463, loss: 0.14291676878929138 2023-01-24 02:42:44.905418: step: 596/463, loss: 0.010482601821422577 2023-01-24 02:42:45.490227: step: 598/463, loss: 0.13141010701656342 2023-01-24 02:42:46.087288: step: 600/463, loss: 0.08058881014585495 2023-01-24 02:42:46.662014: step: 602/463, loss: 0.971682608127594 2023-01-24 02:42:47.264263: step: 604/463, loss: 0.1004226803779602 2023-01-24 02:42:47.804771: step: 606/463, loss: 0.08578328788280487 2023-01-24 02:42:48.421752: step: 608/463, loss: 0.048386551439762115 2023-01-24 02:42:49.055814: step: 610/463, loss: 0.03937898576259613 2023-01-24 02:42:49.888822: step: 612/463, loss: 0.12500327825546265 2023-01-24 02:42:50.536095: step: 614/463, loss: 0.13241419196128845 2023-01-24 02:42:51.135413: step: 616/463, loss: 0.1263732612133026 2023-01-24 02:42:51.750646: step: 618/463, loss: 0.02611301653087139 2023-01-24 02:42:52.352373: step: 620/463, loss: 0.10451508313417435 2023-01-24 02:42:52.925454: step: 622/463, loss: 0.0364653617143631 2023-01-24 02:42:53.580891: step: 624/463, loss: 0.03791540116071701 2023-01-24 02:42:54.254546: step: 626/463, loss: 0.08013985306024551 2023-01-24 02:42:54.918375: step: 628/463, loss: 0.08997684717178345 2023-01-24 02:42:55.627861: step: 630/463, loss: 0.04728971794247627 2023-01-24 02:42:56.249430: step: 632/463, loss: 0.033159125596284866 2023-01-24 02:42:56.873769: step: 634/463, loss: 0.018858404830098152 2023-01-24 02:42:57.508510: step: 636/463, loss: 0.015180638991296291 2023-01-24 02:42:58.124908: step: 638/463, loss: 1.6358081102371216 2023-01-24 02:42:58.647445: step: 640/463, loss: 0.039670679718256 2023-01-24 02:42:59.373727: step: 642/463, loss: 0.12126841396093369 2023-01-24 02:42:59.977048: step: 644/463, loss: 0.02292550355195999 2023-01-24 02:43:00.647135: step: 646/463, loss: 0.06877373903989792 2023-01-24 02:43:01.316784: step: 648/463, loss: 0.2184620350599289 2023-01-24 02:43:01.895632: step: 650/463, loss: 0.06955863535404205 2023-01-24 02:43:02.499360: step: 652/463, loss: 0.03877412527799606 2023-01-24 02:43:03.053992: step: 654/463, loss: 0.023934034630656242 2023-01-24 02:43:03.739531: step: 656/463, loss: 0.48907899856567383 2023-01-24 02:43:04.420212: step: 658/463, loss: 0.022903582081198692 2023-01-24 02:43:05.014160: step: 660/463, loss: 0.05416338890790939 2023-01-24 02:43:05.711379: step: 662/463, loss: 0.007132525555789471 2023-01-24 02:43:06.303379: step: 664/463, loss: 0.10816720873117447 2023-01-24 02:43:06.928890: step: 666/463, loss: 0.05070408061146736 2023-01-24 02:43:07.515263: step: 668/463, loss: 0.07011789083480835 2023-01-24 02:43:08.171177: step: 670/463, loss: 0.23798958957195282 2023-01-24 02:43:08.716507: step: 672/463, loss: 0.026800908148288727 2023-01-24 02:43:09.352331: step: 674/463, loss: 0.11792853474617004 2023-01-24 02:43:09.963814: step: 676/463, loss: 0.04966336861252785 2023-01-24 02:43:10.565845: step: 678/463, loss: 0.012719443067908287 2023-01-24 02:43:11.115633: step: 680/463, loss: 0.05221040919423103 2023-01-24 02:43:11.736257: step: 682/463, loss: 0.026425659656524658 2023-01-24 02:43:12.369520: step: 684/463, loss: 0.05606555938720703 2023-01-24 02:43:13.026445: step: 686/463, loss: 0.06381009519100189 2023-01-24 02:43:13.628507: step: 688/463, loss: 0.07811594754457474 2023-01-24 02:43:14.230723: step: 690/463, loss: 0.03005896881222725 2023-01-24 02:43:14.825426: step: 692/463, loss: 0.02880208194255829 2023-01-24 02:43:15.436347: step: 694/463, loss: 0.833987832069397 2023-01-24 02:43:16.010502: step: 696/463, loss: 0.011742332950234413 2023-01-24 02:43:16.557768: step: 698/463, loss: 0.0356086827814579 2023-01-24 02:43:17.152402: step: 700/463, loss: 0.11257192492485046 2023-01-24 02:43:17.767507: step: 702/463, loss: 0.4020608961582184 2023-01-24 02:43:18.474001: step: 704/463, loss: 0.04160220921039581 2023-01-24 02:43:19.068419: step: 706/463, loss: 0.01498582772910595 2023-01-24 02:43:19.685838: step: 708/463, loss: 0.016668368130922318 2023-01-24 02:43:20.281697: step: 710/463, loss: 0.10086587071418762 2023-01-24 02:43:20.880827: step: 712/463, loss: 0.0879906490445137 2023-01-24 02:43:21.503725: step: 714/463, loss: 0.07832398265600204 2023-01-24 02:43:22.260952: step: 716/463, loss: 0.026821769773960114 2023-01-24 02:43:22.951046: step: 718/463, loss: 0.10035472363233566 2023-01-24 02:43:23.584264: step: 720/463, loss: 0.06936372816562653 2023-01-24 02:43:24.236899: step: 722/463, loss: 0.08366847038269043 2023-01-24 02:43:24.844382: step: 724/463, loss: 0.1112753227353096 2023-01-24 02:43:25.509426: step: 726/463, loss: 0.04369613155722618 2023-01-24 02:43:26.096854: step: 728/463, loss: 0.06087537854909897 2023-01-24 02:43:26.721899: step: 730/463, loss: 0.19273433089256287 2023-01-24 02:43:27.355579: step: 732/463, loss: 0.010241077281534672 2023-01-24 02:43:27.972900: step: 734/463, loss: 0.20904210209846497 2023-01-24 02:43:28.605729: step: 736/463, loss: 0.03538528457283974 2023-01-24 02:43:29.232421: step: 738/463, loss: 0.08569483458995819 2023-01-24 02:43:29.813758: step: 740/463, loss: 0.11197716742753983 2023-01-24 02:43:30.389977: step: 742/463, loss: 0.008918947540223598 2023-01-24 02:43:30.942087: step: 744/463, loss: 0.38994312286376953 2023-01-24 02:43:31.486795: step: 746/463, loss: 0.0118242297321558 2023-01-24 02:43:32.120596: step: 748/463, loss: 0.045131146907806396 2023-01-24 02:43:32.747518: step: 750/463, loss: 0.09290646761655807 2023-01-24 02:43:33.398531: step: 752/463, loss: 0.08361370861530304 2023-01-24 02:43:33.983960: step: 754/463, loss: 0.006104010157287121 2023-01-24 02:43:34.618003: step: 756/463, loss: 0.35692906379699707 2023-01-24 02:43:35.256304: step: 758/463, loss: 0.044656481593847275 2023-01-24 02:43:35.941718: step: 760/463, loss: 0.1878575086593628 2023-01-24 02:43:36.544462: step: 762/463, loss: 0.025486163794994354 2023-01-24 02:43:37.119924: step: 764/463, loss: 0.36057910323143005 2023-01-24 02:43:37.734728: step: 766/463, loss: 0.054459549486637115 2023-01-24 02:43:38.318982: step: 768/463, loss: 0.0604981929063797 2023-01-24 02:43:38.921395: step: 770/463, loss: 0.05799954757094383 2023-01-24 02:43:39.551777: step: 772/463, loss: 0.11873850971460342 2023-01-24 02:43:40.122471: step: 774/463, loss: 0.011418478563427925 2023-01-24 02:43:40.753681: step: 776/463, loss: 0.060693755745887756 2023-01-24 02:43:41.343903: step: 778/463, loss: 0.056260380893945694 2023-01-24 02:43:41.918789: step: 780/463, loss: 0.021543052047491074 2023-01-24 02:43:42.555926: step: 782/463, loss: 0.04891766607761383 2023-01-24 02:43:43.111357: step: 784/463, loss: 0.02159399539232254 2023-01-24 02:43:43.805975: step: 786/463, loss: 0.019410137087106705 2023-01-24 02:43:44.447582: step: 788/463, loss: 0.009411297738552094 2023-01-24 02:43:45.078729: step: 790/463, loss: 0.04808952286839485 2023-01-24 02:43:45.664974: step: 792/463, loss: 0.1450478434562683 2023-01-24 02:43:46.326199: step: 794/463, loss: 0.3976404368877411 2023-01-24 02:43:46.895494: step: 796/463, loss: 0.04770657792687416 2023-01-24 02:43:47.561807: step: 798/463, loss: 0.03463771939277649 2023-01-24 02:43:48.123242: step: 800/463, loss: 0.38851138949394226 2023-01-24 02:43:48.767565: step: 802/463, loss: 0.0036132321693003178 2023-01-24 02:43:49.342677: step: 804/463, loss: 0.07504495978355408 2023-01-24 02:43:49.947760: step: 806/463, loss: 0.058052774518728256 2023-01-24 02:43:50.609928: step: 808/463, loss: 0.13431502878665924 2023-01-24 02:43:51.203675: step: 810/463, loss: 0.13638107478618622 2023-01-24 02:43:51.728134: step: 812/463, loss: 0.022614341229200363 2023-01-24 02:43:52.357273: step: 814/463, loss: 0.05623955279588699 2023-01-24 02:43:52.987940: step: 816/463, loss: 0.11784551292657852 2023-01-24 02:43:53.567714: step: 818/463, loss: 0.12680330872535706 2023-01-24 02:43:54.177309: step: 820/463, loss: 0.066986583173275 2023-01-24 02:43:54.808897: step: 822/463, loss: 0.0422324612736702 2023-01-24 02:43:55.398623: step: 824/463, loss: 0.02435671165585518 2023-01-24 02:43:55.965059: step: 826/463, loss: 0.09781000018119812 2023-01-24 02:43:56.577362: step: 828/463, loss: 0.05380280315876007 2023-01-24 02:43:57.148077: step: 830/463, loss: 0.030096054077148438 2023-01-24 02:43:57.780894: step: 832/463, loss: 0.05681603401899338 2023-01-24 02:43:58.401327: step: 834/463, loss: 0.17927226424217224 2023-01-24 02:43:59.012890: step: 836/463, loss: 0.053143709897994995 2023-01-24 02:43:59.657924: step: 838/463, loss: 0.02649344503879547 2023-01-24 02:44:00.182747: step: 840/463, loss: 0.01447279378771782 2023-01-24 02:44:00.877073: step: 842/463, loss: 0.2728446125984192 2023-01-24 02:44:01.492763: step: 844/463, loss: 0.035889916121959686 2023-01-24 02:44:02.100332: step: 846/463, loss: 0.0705445408821106 2023-01-24 02:44:02.774997: step: 848/463, loss: 0.10170207172632217 2023-01-24 02:44:03.360501: step: 850/463, loss: 0.1412568837404251 2023-01-24 02:44:04.005719: step: 852/463, loss: 0.12600982189178467 2023-01-24 02:44:04.625827: step: 854/463, loss: 0.005890231113880873 2023-01-24 02:44:05.283861: step: 856/463, loss: 0.1892230361700058 2023-01-24 02:44:05.938215: step: 858/463, loss: 0.3495054543018341 2023-01-24 02:44:06.563188: step: 860/463, loss: 0.022730978205800056 2023-01-24 02:44:07.190550: step: 862/463, loss: 0.0680256262421608 2023-01-24 02:44:07.807779: step: 864/463, loss: 0.03566683456301689 2023-01-24 02:44:08.403718: step: 866/463, loss: 0.13140955567359924 2023-01-24 02:44:08.975688: step: 868/463, loss: 0.2673231065273285 2023-01-24 02:44:09.628633: step: 870/463, loss: 0.2639882564544678 2023-01-24 02:44:10.154858: step: 872/463, loss: 0.12282765656709671 2023-01-24 02:44:10.778112: step: 874/463, loss: 0.04380814731121063 2023-01-24 02:44:11.351990: step: 876/463, loss: 0.4656218886375427 2023-01-24 02:44:11.879915: step: 878/463, loss: 0.03215242549777031 2023-01-24 02:44:12.402061: step: 880/463, loss: 0.586729109287262 2023-01-24 02:44:13.063893: step: 882/463, loss: 0.17380765080451965 2023-01-24 02:44:13.715546: step: 884/463, loss: 0.038389842957258224 2023-01-24 02:44:14.330194: step: 886/463, loss: 0.08317163586616516 2023-01-24 02:44:14.923269: step: 888/463, loss: 0.02627774141728878 2023-01-24 02:44:15.493225: step: 890/463, loss: 0.002366928616538644 2023-01-24 02:44:16.064455: step: 892/463, loss: 0.06421678513288498 2023-01-24 02:44:16.668944: step: 894/463, loss: 0.020329998806118965 2023-01-24 02:44:17.278183: step: 896/463, loss: 0.01803649589419365 2023-01-24 02:44:17.870973: step: 898/463, loss: 0.16368302702903748 2023-01-24 02:44:18.449237: step: 900/463, loss: 0.04065460339188576 2023-01-24 02:44:19.041019: step: 902/463, loss: 0.018340211361646652 2023-01-24 02:44:19.664461: step: 904/463, loss: 0.1049102395772934 2023-01-24 02:44:20.300266: step: 906/463, loss: 0.0614774152636528 2023-01-24 02:44:20.962765: step: 908/463, loss: 0.22259950637817383 2023-01-24 02:44:21.553601: step: 910/463, loss: 0.025566045194864273 2023-01-24 02:44:22.176104: step: 912/463, loss: 0.01949593611061573 2023-01-24 02:44:22.740006: step: 914/463, loss: 0.025840267539024353 2023-01-24 02:44:23.367547: step: 916/463, loss: 0.00959770753979683 2023-01-24 02:44:23.944336: step: 918/463, loss: 0.029936637729406357 2023-01-24 02:44:24.577660: step: 920/463, loss: 0.03699206933379173 2023-01-24 02:44:25.113137: step: 922/463, loss: 0.08385531604290009 2023-01-24 02:44:25.806096: step: 924/463, loss: 0.14310820400714874 2023-01-24 02:44:26.424420: step: 926/463, loss: 0.023693200200796127 ================================================== Loss: 0.108 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3302436617369065, 'r': 0.3314969583658891, 'f1': 0.33086912321746886}, 'combined': 0.24379830131813493, 'epoch': 20} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3559644705303427, 'r': 0.3093722623457429, 'f1': 0.3310369865996465}, 'combined': 0.2328903423314096, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33007023705004396, 'r': 0.33570711016854565, 'f1': 0.3328648110231864}, 'combined': 0.24526880812234786, 'epoch': 20} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35630851911868944, 'r': 0.30714105534510605, 'f1': 0.32990291591946197}, 'combined': 0.23423107030281798, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3389495079358525, 'r': 0.33316099641512636, 'f1': 0.3360303255708547}, 'combined': 0.24760129252589294, 'epoch': 20} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36411434447685176, 'r': 0.29345579161508506, 'f1': 0.3249887953607525}, 'combined': 0.23074204470613424, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2785714285714286, 'r': 0.2785714285714286, 'f1': 0.2785714285714286}, 'combined': 0.18571428571428572, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24107142857142858, 'r': 0.29347826086956524, 'f1': 0.2647058823529412}, 'combined': 0.1323529411764706, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.13793103448275862, 'f1': 0.2}, 'combined': 0.13333333333333333, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:46:59.139407: step: 2/463, loss: 0.052805934101343155 2023-01-24 02:46:59.734739: step: 4/463, loss: 0.005280214827507734 2023-01-24 02:47:00.332520: step: 6/463, loss: 0.0209872517734766 2023-01-24 02:47:00.983495: step: 8/463, loss: 0.07757161557674408 2023-01-24 02:47:01.615881: step: 10/463, loss: 0.028902823105454445 2023-01-24 02:47:02.196993: step: 12/463, loss: 0.010830058716237545 2023-01-24 02:47:02.802770: step: 14/463, loss: 0.024415012449026108 2023-01-24 02:47:03.332756: step: 16/463, loss: 0.05593577399849892 2023-01-24 02:47:03.875494: step: 18/463, loss: 0.037127211689949036 2023-01-24 02:47:04.492532: step: 20/463, loss: 0.041090451180934906 2023-01-24 02:47:05.135724: step: 22/463, loss: 0.018926946446299553 2023-01-24 02:47:05.745434: step: 24/463, loss: 0.02345609851181507 2023-01-24 02:47:06.357396: step: 26/463, loss: 0.016389314085245132 2023-01-24 02:47:06.989061: step: 28/463, loss: 0.03763389587402344 2023-01-24 02:47:07.594078: step: 30/463, loss: 0.08482816070318222 2023-01-24 02:47:08.204770: step: 32/463, loss: 0.007437416817992926 2023-01-24 02:47:08.820230: step: 34/463, loss: 0.02596116252243519 2023-01-24 02:47:09.472428: step: 36/463, loss: 0.12972821295261383 2023-01-24 02:47:10.086953: step: 38/463, loss: 0.09052193909883499 2023-01-24 02:47:10.788219: step: 40/463, loss: 0.2692042887210846 2023-01-24 02:47:11.352230: step: 42/463, loss: 0.022179635241627693 2023-01-24 02:47:11.950313: step: 44/463, loss: 0.053789641708135605 2023-01-24 02:47:12.525010: step: 46/463, loss: 0.02860049530863762 2023-01-24 02:47:13.095634: step: 48/463, loss: 0.02107861638069153 2023-01-24 02:47:13.705035: step: 50/463, loss: 0.045981716364622116 2023-01-24 02:47:14.348067: step: 52/463, loss: 0.07436763495206833 2023-01-24 02:47:14.949585: step: 54/463, loss: 0.2806832790374756 2023-01-24 02:47:15.558047: step: 56/463, loss: 0.007427280303090811 2023-01-24 02:47:16.145759: step: 58/463, loss: 0.06127117574214935 2023-01-24 02:47:16.793862: step: 60/463, loss: 0.03628745675086975 2023-01-24 02:47:17.425615: step: 62/463, loss: 0.0566580556333065 2023-01-24 02:47:18.009247: step: 64/463, loss: 0.0790780782699585 2023-01-24 02:47:18.574964: step: 66/463, loss: 0.005393788684159517 2023-01-24 02:47:19.175725: step: 68/463, loss: 0.09038905054330826 2023-01-24 02:47:19.859895: step: 70/463, loss: 0.049519192427396774 2023-01-24 02:47:20.463788: step: 72/463, loss: 0.09583064913749695 2023-01-24 02:47:21.070766: step: 74/463, loss: 0.027369143441319466 2023-01-24 02:47:21.780942: step: 76/463, loss: 0.04082915186882019 2023-01-24 02:47:22.428657: step: 78/463, loss: 0.05848073586821556 2023-01-24 02:47:23.082501: step: 80/463, loss: 0.1018986627459526 2023-01-24 02:47:23.641549: step: 82/463, loss: 0.02781439758837223 2023-01-24 02:47:24.243806: step: 84/463, loss: 0.02759961411356926 2023-01-24 02:47:24.825142: step: 86/463, loss: 0.010477924719452858 2023-01-24 02:47:25.362379: step: 88/463, loss: 0.07666408270597458 2023-01-24 02:47:25.984195: step: 90/463, loss: 0.019670767709612846 2023-01-24 02:47:26.623564: step: 92/463, loss: 0.04746634513139725 2023-01-24 02:47:27.194911: step: 94/463, loss: 0.048526912927627563 2023-01-24 02:47:27.816197: step: 96/463, loss: 0.12710115313529968 2023-01-24 02:47:28.437825: step: 98/463, loss: 0.03891502320766449 2023-01-24 02:47:29.071820: step: 100/463, loss: 0.04751014709472656 2023-01-24 02:47:29.640098: step: 102/463, loss: 0.07285762578248978 2023-01-24 02:47:30.214635: step: 104/463, loss: 0.0434085838496685 2023-01-24 02:47:30.892947: step: 106/463, loss: 0.032872360199689865 2023-01-24 02:47:31.543571: step: 108/463, loss: 0.022393669933080673 2023-01-24 02:47:32.185444: step: 110/463, loss: 0.41061416268348694 2023-01-24 02:47:32.845278: step: 112/463, loss: 0.010092181153595448 2023-01-24 02:47:33.486395: step: 114/463, loss: 0.0032727422658354044 2023-01-24 02:47:34.070436: step: 116/463, loss: 0.11555947363376617 2023-01-24 02:47:34.663629: step: 118/463, loss: 0.03185407072305679 2023-01-24 02:47:35.271857: step: 120/463, loss: 0.0479784719645977 2023-01-24 02:47:35.844391: step: 122/463, loss: 0.0495445542037487 2023-01-24 02:47:36.426305: step: 124/463, loss: 0.4252864718437195 2023-01-24 02:47:37.069559: step: 126/463, loss: 0.02524767816066742 2023-01-24 02:47:37.690881: step: 128/463, loss: 0.11267712712287903 2023-01-24 02:47:38.318903: step: 130/463, loss: 0.5347533226013184 2023-01-24 02:47:38.906290: step: 132/463, loss: 0.042268406599760056 2023-01-24 02:47:39.516546: step: 134/463, loss: 0.014296318404376507 2023-01-24 02:47:40.042220: step: 136/463, loss: 0.005015531554818153 2023-01-24 02:47:40.745397: step: 138/463, loss: 0.04335004463791847 2023-01-24 02:47:41.300787: step: 140/463, loss: 0.005318753886967897 2023-01-24 02:47:41.838002: step: 142/463, loss: 0.05417543649673462 2023-01-24 02:47:42.505959: step: 144/463, loss: 0.05934911593794823 2023-01-24 02:47:43.056095: step: 146/463, loss: 0.027582895010709763 2023-01-24 02:47:43.682261: step: 148/463, loss: 0.01103629358112812 2023-01-24 02:47:44.268421: step: 150/463, loss: 0.03793769329786301 2023-01-24 02:47:44.865264: step: 152/463, loss: 0.10358862578868866 2023-01-24 02:47:45.497708: step: 154/463, loss: 0.08555654436349869 2023-01-24 02:47:46.150985: step: 156/463, loss: 0.007161549758166075 2023-01-24 02:47:46.770999: step: 158/463, loss: 0.008572019636631012 2023-01-24 02:47:47.407504: step: 160/463, loss: 0.03005935810506344 2023-01-24 02:47:48.000137: step: 162/463, loss: 0.02099655196070671 2023-01-24 02:47:48.683178: step: 164/463, loss: 0.4778994023799896 2023-01-24 02:47:49.346865: step: 166/463, loss: 0.06818339973688126 2023-01-24 02:47:49.977222: step: 168/463, loss: 0.0015536098508164287 2023-01-24 02:47:50.611749: step: 170/463, loss: 0.08982303738594055 2023-01-24 02:47:51.245946: step: 172/463, loss: 0.01580573059618473 2023-01-24 02:47:51.882164: step: 174/463, loss: 0.03619617596268654 2023-01-24 02:47:52.512857: step: 176/463, loss: 0.04753108695149422 2023-01-24 02:47:53.073209: step: 178/463, loss: 0.0020915325731039047 2023-01-24 02:47:53.665233: step: 180/463, loss: 0.038808777928352356 2023-01-24 02:47:54.282872: step: 182/463, loss: 0.016792375594377518 2023-01-24 02:47:54.909843: step: 184/463, loss: 0.012551767751574516 2023-01-24 02:47:55.589245: step: 186/463, loss: 0.031778447329998016 2023-01-24 02:47:56.194069: step: 188/463, loss: 0.09829704463481903 2023-01-24 02:47:56.890026: step: 190/463, loss: 0.09777379035949707 2023-01-24 02:47:57.501597: step: 192/463, loss: 0.033458419144153595 2023-01-24 02:47:58.170094: step: 194/463, loss: 0.024563565850257874 2023-01-24 02:47:58.781442: step: 196/463, loss: 0.04299747571349144 2023-01-24 02:47:59.375016: step: 198/463, loss: 0.017844732850790024 2023-01-24 02:47:59.970517: step: 200/463, loss: 0.09021145105361938 2023-01-24 02:48:00.621035: step: 202/463, loss: 0.04659707471728325 2023-01-24 02:48:01.267025: step: 204/463, loss: 0.05085458606481552 2023-01-24 02:48:01.926347: step: 206/463, loss: 0.23234103620052338 2023-01-24 02:48:02.517852: step: 208/463, loss: 0.01665985956788063 2023-01-24 02:48:03.141768: step: 210/463, loss: 0.015283294022083282 2023-01-24 02:48:03.768027: step: 212/463, loss: 0.0900910496711731 2023-01-24 02:48:04.398634: step: 214/463, loss: 0.04040846228599548 2023-01-24 02:48:05.050618: step: 216/463, loss: 0.00805171113461256 2023-01-24 02:48:05.734114: step: 218/463, loss: 0.11671198904514313 2023-01-24 02:48:06.428095: step: 220/463, loss: 0.047064829617738724 2023-01-24 02:48:07.031418: step: 222/463, loss: 0.018253648653626442 2023-01-24 02:48:07.656585: step: 224/463, loss: 0.03642360493540764 2023-01-24 02:48:08.276483: step: 226/463, loss: 0.016041485592722893 2023-01-24 02:48:08.905545: step: 228/463, loss: 0.21796952188014984 2023-01-24 02:48:09.557792: step: 230/463, loss: 0.015873370692133904 2023-01-24 02:48:10.172606: step: 232/463, loss: 0.009241413325071335 2023-01-24 02:48:10.789076: step: 234/463, loss: 0.17009501159191132 2023-01-24 02:48:11.389562: step: 236/463, loss: 0.012724455446004868 2023-01-24 02:48:11.985190: step: 238/463, loss: 0.025599796324968338 2023-01-24 02:48:12.544350: step: 240/463, loss: 0.005936420056968927 2023-01-24 02:48:13.146660: step: 242/463, loss: 0.003109918674454093 2023-01-24 02:48:13.715845: step: 244/463, loss: 0.043586768209934235 2023-01-24 02:48:14.320620: step: 246/463, loss: 0.057338401675224304 2023-01-24 02:48:14.867596: step: 248/463, loss: 0.05306411162018776 2023-01-24 02:48:15.421114: step: 250/463, loss: 0.03533834218978882 2023-01-24 02:48:16.014311: step: 252/463, loss: 0.11657828837633133 2023-01-24 02:48:16.650273: step: 254/463, loss: 0.09283142536878586 2023-01-24 02:48:17.310265: step: 256/463, loss: 0.04471800848841667 2023-01-24 02:48:17.914231: step: 258/463, loss: 0.01856229081749916 2023-01-24 02:48:18.467448: step: 260/463, loss: 0.851204514503479 2023-01-24 02:48:19.095745: step: 262/463, loss: 0.04751712828874588 2023-01-24 02:48:19.680073: step: 264/463, loss: 0.0714932307600975 2023-01-24 02:48:20.328142: step: 266/463, loss: 0.05642734095454216 2023-01-24 02:48:20.944583: step: 268/463, loss: 0.016180405393242836 2023-01-24 02:48:21.619433: step: 270/463, loss: 0.028840703889727592 2023-01-24 02:48:22.232674: step: 272/463, loss: 0.005382420029491186 2023-01-24 02:48:22.830922: step: 274/463, loss: 0.050887033343315125 2023-01-24 02:48:23.449602: step: 276/463, loss: 0.03661896660923958 2023-01-24 02:48:24.076135: step: 278/463, loss: 0.04574006795883179 2023-01-24 02:48:24.708272: step: 280/463, loss: 0.08624318242073059 2023-01-24 02:48:25.348584: step: 282/463, loss: 0.04667637497186661 2023-01-24 02:48:25.946883: step: 284/463, loss: 0.020211387425661087 2023-01-24 02:48:26.551842: step: 286/463, loss: 0.014522523619234562 2023-01-24 02:48:27.140783: step: 288/463, loss: 0.018540900200605392 2023-01-24 02:48:27.796784: step: 290/463, loss: 0.02065294422209263 2023-01-24 02:48:28.414103: step: 292/463, loss: 0.0503668375313282 2023-01-24 02:48:29.094387: step: 294/463, loss: 0.10224588215351105 2023-01-24 02:48:29.711405: step: 296/463, loss: 0.03150312602519989 2023-01-24 02:48:30.236801: step: 298/463, loss: 0.02479672245681286 2023-01-24 02:48:30.859968: step: 300/463, loss: 0.018423892557621002 2023-01-24 02:48:32.162765: step: 302/463, loss: 0.062025584280490875 2023-01-24 02:48:32.865343: step: 304/463, loss: 0.03412716090679169 2023-01-24 02:48:33.462838: step: 306/463, loss: 0.08321302384138107 2023-01-24 02:48:34.102955: step: 308/463, loss: 0.07518155127763748 2023-01-24 02:48:34.625030: step: 310/463, loss: 0.09294262528419495 2023-01-24 02:48:35.161759: step: 312/463, loss: 0.09483817964792252 2023-01-24 02:48:35.790699: step: 314/463, loss: 0.016237877309322357 2023-01-24 02:48:36.403663: step: 316/463, loss: 0.09513919800519943 2023-01-24 02:48:37.023462: step: 318/463, loss: 0.019893726333975792 2023-01-24 02:48:37.591640: step: 320/463, loss: 0.05328378081321716 2023-01-24 02:48:38.284363: step: 322/463, loss: 0.040353063493967056 2023-01-24 02:48:38.882160: step: 324/463, loss: 0.027618389576673508 2023-01-24 02:48:39.506951: step: 326/463, loss: 0.03855499252676964 2023-01-24 02:48:40.083167: step: 328/463, loss: 0.044588856399059296 2023-01-24 02:48:40.750828: step: 330/463, loss: 0.009817119687795639 2023-01-24 02:48:41.389867: step: 332/463, loss: 0.04192299023270607 2023-01-24 02:48:42.035298: step: 334/463, loss: 0.031670961529016495 2023-01-24 02:48:42.653734: step: 336/463, loss: 0.04987918585538864 2023-01-24 02:48:43.282886: step: 338/463, loss: 0.014425459317862988 2023-01-24 02:48:43.885143: step: 340/463, loss: 0.023135565221309662 2023-01-24 02:48:44.442028: step: 342/463, loss: 0.02354900725185871 2023-01-24 02:48:45.114667: step: 344/463, loss: 0.046821966767311096 2023-01-24 02:48:45.790395: step: 346/463, loss: 0.11811544001102448 2023-01-24 02:48:46.442469: step: 348/463, loss: 0.04246433079242706 2023-01-24 02:48:47.009901: step: 350/463, loss: 0.007374891545623541 2023-01-24 02:48:47.618891: step: 352/463, loss: 0.12883704900741577 2023-01-24 02:48:48.214279: step: 354/463, loss: 0.04109273478388786 2023-01-24 02:48:48.816209: step: 356/463, loss: 0.01887623965740204 2023-01-24 02:48:49.496011: step: 358/463, loss: 0.04643211513757706 2023-01-24 02:48:50.067295: step: 360/463, loss: 0.07001553475856781 2023-01-24 02:48:50.741776: step: 362/463, loss: 0.06755838543176651 2023-01-24 02:48:51.384784: step: 364/463, loss: 0.04537321254611015 2023-01-24 02:48:52.084665: step: 366/463, loss: 0.027377452701330185 2023-01-24 02:48:52.665130: step: 368/463, loss: 0.05450871214270592 2023-01-24 02:48:53.224920: step: 370/463, loss: 0.02903909794986248 2023-01-24 02:48:53.876188: step: 372/463, loss: 0.050726454704999924 2023-01-24 02:48:54.549902: step: 374/463, loss: 0.0627223327755928 2023-01-24 02:48:55.202644: step: 376/463, loss: 0.029790252447128296 2023-01-24 02:48:55.847311: step: 378/463, loss: 0.15702681243419647 2023-01-24 02:48:56.503067: step: 380/463, loss: 0.044734083116054535 2023-01-24 02:48:57.139859: step: 382/463, loss: 0.0010955273173749447 2023-01-24 02:48:57.705694: step: 384/463, loss: 0.02171688713133335 2023-01-24 02:48:58.314998: step: 386/463, loss: 0.024631086736917496 2023-01-24 02:48:58.917601: step: 388/463, loss: 0.0019867976661771536 2023-01-24 02:48:59.546773: step: 390/463, loss: 0.08755309879779816 2023-01-24 02:49:00.179561: step: 392/463, loss: 0.08520431071519852 2023-01-24 02:49:00.763571: step: 394/463, loss: 0.022563429549336433 2023-01-24 02:49:01.373565: step: 396/463, loss: 0.11337301880121231 2023-01-24 02:49:01.954686: step: 398/463, loss: 0.02838166616857052 2023-01-24 02:49:02.564294: step: 400/463, loss: 0.5266143083572388 2023-01-24 02:49:03.183772: step: 402/463, loss: 0.029172316193580627 2023-01-24 02:49:03.759855: step: 404/463, loss: 0.002860425505787134 2023-01-24 02:49:04.374097: step: 406/463, loss: 0.10505472123622894 2023-01-24 02:49:04.975452: step: 408/463, loss: 0.021124836057424545 2023-01-24 02:49:05.597645: step: 410/463, loss: 0.14692135155200958 2023-01-24 02:49:06.189877: step: 412/463, loss: 0.474401593208313 2023-01-24 02:49:06.821059: step: 414/463, loss: 0.011616557836532593 2023-01-24 02:49:07.449913: step: 416/463, loss: 0.027481263503432274 2023-01-24 02:49:08.111440: step: 418/463, loss: 0.0726519227027893 2023-01-24 02:49:08.686129: step: 420/463, loss: 0.006278494838625193 2023-01-24 02:49:09.275339: step: 422/463, loss: 0.021756159141659737 2023-01-24 02:49:09.911915: step: 424/463, loss: 0.03532163053750992 2023-01-24 02:49:10.576212: step: 426/463, loss: 0.05163384974002838 2023-01-24 02:49:11.165376: step: 428/463, loss: 0.08235520124435425 2023-01-24 02:49:11.789612: step: 430/463, loss: 0.04819801449775696 2023-01-24 02:49:12.400516: step: 432/463, loss: 0.04027056321501732 2023-01-24 02:49:13.014290: step: 434/463, loss: 0.0341426245868206 2023-01-24 02:49:13.590573: step: 436/463, loss: 0.05605063587427139 2023-01-24 02:49:14.200309: step: 438/463, loss: 0.1275501549243927 2023-01-24 02:49:14.763624: step: 440/463, loss: 0.025085454806685448 2023-01-24 02:49:15.364358: step: 442/463, loss: 0.10149203985929489 2023-01-24 02:49:15.981627: step: 444/463, loss: 0.020172027871012688 2023-01-24 02:49:16.658884: step: 446/463, loss: 0.00274521391838789 2023-01-24 02:49:17.237474: step: 448/463, loss: 0.011035444214940071 2023-01-24 02:49:17.841460: step: 450/463, loss: 0.10494759678840637 2023-01-24 02:49:18.432634: step: 452/463, loss: 0.020514480769634247 2023-01-24 02:49:19.007266: step: 454/463, loss: 0.05467884987592697 2023-01-24 02:49:19.625934: step: 456/463, loss: 0.04846052825450897 2023-01-24 02:49:20.286017: step: 458/463, loss: 0.2002474069595337 2023-01-24 02:49:20.915437: step: 460/463, loss: 0.0316212996840477 2023-01-24 02:49:21.526793: step: 462/463, loss: 0.09253125637769699 2023-01-24 02:49:22.165307: step: 464/463, loss: 0.052695032209157944 2023-01-24 02:49:22.767141: step: 466/463, loss: 0.24146237969398499 2023-01-24 02:49:23.383359: step: 468/463, loss: 0.007043098099529743 2023-01-24 02:49:23.969824: step: 470/463, loss: 0.01447104848921299 2023-01-24 02:49:24.588575: step: 472/463, loss: 0.036741647869348526 2023-01-24 02:49:25.187784: step: 474/463, loss: 0.01047175470739603 2023-01-24 02:49:25.951591: step: 476/463, loss: 0.005745860747992992 2023-01-24 02:49:26.570383: step: 478/463, loss: 0.014148136600852013 2023-01-24 02:49:27.210228: step: 480/463, loss: 0.06311214715242386 2023-01-24 02:49:27.801489: step: 482/463, loss: 0.007022074423730373 2023-01-24 02:49:28.399241: step: 484/463, loss: 0.06155131757259369 2023-01-24 02:49:28.970816: step: 486/463, loss: 0.046174995601177216 2023-01-24 02:49:29.622344: step: 488/463, loss: 0.05919300764799118 2023-01-24 02:49:30.215909: step: 490/463, loss: 0.07884790748357773 2023-01-24 02:49:30.860891: step: 492/463, loss: 0.04722708836197853 2023-01-24 02:49:31.515815: step: 494/463, loss: 0.046047113835811615 2023-01-24 02:49:32.130161: step: 496/463, loss: 2.6746463775634766 2023-01-24 02:49:32.803038: step: 498/463, loss: 0.029875969514250755 2023-01-24 02:49:33.439398: step: 500/463, loss: 0.06378640234470367 2023-01-24 02:49:34.047407: step: 502/463, loss: 0.03541964665055275 2023-01-24 02:49:34.632871: step: 504/463, loss: 0.017437372356653214 2023-01-24 02:49:35.289201: step: 506/463, loss: 0.23214848339557648 2023-01-24 02:49:35.834702: step: 508/463, loss: 0.4504941999912262 2023-01-24 02:49:36.489009: step: 510/463, loss: 0.012039203196763992 2023-01-24 02:49:37.085434: step: 512/463, loss: 0.03287268057465553 2023-01-24 02:49:37.717385: step: 514/463, loss: 0.018859634175896645 2023-01-24 02:49:38.328390: step: 516/463, loss: 0.08024318516254425 2023-01-24 02:49:38.907654: step: 518/463, loss: 0.012824405916035175 2023-01-24 02:49:39.512391: step: 520/463, loss: 0.007280994672328234 2023-01-24 02:49:40.140052: step: 522/463, loss: 0.26957663893699646 2023-01-24 02:49:40.776557: step: 524/463, loss: 0.11160249263048172 2023-01-24 02:49:41.390565: step: 526/463, loss: 0.022811762988567352 2023-01-24 02:49:41.998250: step: 528/463, loss: 0.018752921372652054 2023-01-24 02:49:42.591056: step: 530/463, loss: 0.04444326087832451 2023-01-24 02:49:43.206983: step: 532/463, loss: 0.07569348812103271 2023-01-24 02:49:43.784425: step: 534/463, loss: 0.11439692974090576 2023-01-24 02:49:44.371001: step: 536/463, loss: 0.10484132170677185 2023-01-24 02:49:45.043446: step: 538/463, loss: 0.026877908036112785 2023-01-24 02:49:45.718996: step: 540/463, loss: 0.058349501341581345 2023-01-24 02:49:46.388952: step: 542/463, loss: 0.07941265404224396 2023-01-24 02:49:46.989033: step: 544/463, loss: 0.050750862807035446 2023-01-24 02:49:47.588168: step: 546/463, loss: 0.003130796365439892 2023-01-24 02:49:48.172872: step: 548/463, loss: 0.033792074769735336 2023-01-24 02:49:48.799219: step: 550/463, loss: 0.49499449133872986 2023-01-24 02:49:49.377124: step: 552/463, loss: 0.05910259857773781 2023-01-24 02:49:49.975200: step: 554/463, loss: 0.07274140417575836 2023-01-24 02:49:50.531548: step: 556/463, loss: 0.041626498103141785 2023-01-24 02:49:51.230339: step: 558/463, loss: 0.03873985633254051 2023-01-24 02:49:51.812155: step: 560/463, loss: 0.021355608478188515 2023-01-24 02:49:52.380551: step: 562/463, loss: 0.0546957328915596 2023-01-24 02:49:52.998030: step: 564/463, loss: 0.03451968729496002 2023-01-24 02:49:53.702101: step: 566/463, loss: 0.05962375923991203 2023-01-24 02:49:54.302105: step: 568/463, loss: 0.017664875835180283 2023-01-24 02:49:54.885320: step: 570/463, loss: 0.02653919719159603 2023-01-24 02:49:55.511046: step: 572/463, loss: 0.017811376601457596 2023-01-24 02:49:56.144911: step: 574/463, loss: 0.012446126900613308 2023-01-24 02:49:56.762375: step: 576/463, loss: 0.006736402399837971 2023-01-24 02:49:57.433241: step: 578/463, loss: 0.028044695034623146 2023-01-24 02:49:58.049199: step: 580/463, loss: 0.07248085737228394 2023-01-24 02:49:58.663570: step: 582/463, loss: 3.4473843574523926 2023-01-24 02:49:59.264437: step: 584/463, loss: 0.04147389531135559 2023-01-24 02:49:59.905281: step: 586/463, loss: 0.041255999356508255 2023-01-24 02:50:00.498421: step: 588/463, loss: 0.0020315495785325766 2023-01-24 02:50:01.091846: step: 590/463, loss: 0.017633603885769844 2023-01-24 02:50:01.701398: step: 592/463, loss: 0.009242093190550804 2023-01-24 02:50:02.256197: step: 594/463, loss: 0.06248195469379425 2023-01-24 02:50:02.875015: step: 596/463, loss: 0.020495880395174026 2023-01-24 02:50:03.478317: step: 598/463, loss: 0.031155623495578766 2023-01-24 02:50:04.055359: step: 600/463, loss: 0.03219562768936157 2023-01-24 02:50:04.671907: step: 602/463, loss: 0.0577225461602211 2023-01-24 02:50:05.346263: step: 604/463, loss: 0.36348503828048706 2023-01-24 02:50:05.954191: step: 606/463, loss: 0.10026220977306366 2023-01-24 02:50:06.650813: step: 608/463, loss: 0.0029298823792487383 2023-01-24 02:50:07.286073: step: 610/463, loss: 0.05633306875824928 2023-01-24 02:50:07.893485: step: 612/463, loss: 0.019489897415041924 2023-01-24 02:50:08.494906: step: 614/463, loss: 0.0033064973540604115 2023-01-24 02:50:09.120391: step: 616/463, loss: 1.157387375831604 2023-01-24 02:50:09.738637: step: 618/463, loss: 0.21891532838344574 2023-01-24 02:50:10.318733: step: 620/463, loss: 0.07003778219223022 2023-01-24 02:50:10.877539: step: 622/463, loss: 0.011980635114014149 2023-01-24 02:50:11.462244: step: 624/463, loss: 0.07498269528150558 2023-01-24 02:50:12.121712: step: 626/463, loss: 0.12212085723876953 2023-01-24 02:50:12.759977: step: 628/463, loss: 0.08997271209955215 2023-01-24 02:50:13.442782: step: 630/463, loss: 0.041998837143182755 2023-01-24 02:50:14.041164: step: 632/463, loss: 0.04643499478697777 2023-01-24 02:50:14.663319: step: 634/463, loss: 0.04832078889012337 2023-01-24 02:50:15.238981: step: 636/463, loss: 0.047090813517570496 2023-01-24 02:50:15.811609: step: 638/463, loss: 0.03820930793881416 2023-01-24 02:50:16.436496: step: 640/463, loss: 0.04151812568306923 2023-01-24 02:50:17.088705: step: 642/463, loss: 0.03998343646526337 2023-01-24 02:50:17.695650: step: 644/463, loss: 0.021000875160098076 2023-01-24 02:50:18.277618: step: 646/463, loss: 0.03808119520545006 2023-01-24 02:50:18.912244: step: 648/463, loss: 0.02483983151614666 2023-01-24 02:50:19.554149: step: 650/463, loss: 0.06232593581080437 2023-01-24 02:50:20.160482: step: 652/463, loss: 0.2498684823513031 2023-01-24 02:50:20.804183: step: 654/463, loss: 0.11248897016048431 2023-01-24 02:50:21.480697: step: 656/463, loss: 0.020178822800517082 2023-01-24 02:50:22.061420: step: 658/463, loss: 0.0027732765302062035 2023-01-24 02:50:22.688477: step: 660/463, loss: 0.09825561940670013 2023-01-24 02:50:23.299052: step: 662/463, loss: 0.05490712821483612 2023-01-24 02:50:23.951412: step: 664/463, loss: 0.04641588777303696 2023-01-24 02:50:24.581268: step: 666/463, loss: 0.11539147794246674 2023-01-24 02:50:25.251323: step: 668/463, loss: 0.03922904282808304 2023-01-24 02:50:25.871372: step: 670/463, loss: 0.030917752534151077 2023-01-24 02:50:26.557370: step: 672/463, loss: 0.13984455168247223 2023-01-24 02:50:27.144190: step: 674/463, loss: 0.04922737553715706 2023-01-24 02:50:27.808991: step: 676/463, loss: 0.12831442058086395 2023-01-24 02:50:28.358990: step: 678/463, loss: 0.0511726550757885 2023-01-24 02:50:28.966287: step: 680/463, loss: 0.14181457459926605 2023-01-24 02:50:29.576444: step: 682/463, loss: 0.16475841403007507 2023-01-24 02:50:30.163807: step: 684/463, loss: 0.01586306095123291 2023-01-24 02:50:30.772734: step: 686/463, loss: 0.26028886437416077 2023-01-24 02:50:31.370963: step: 688/463, loss: 0.03954197093844414 2023-01-24 02:50:31.960182: step: 690/463, loss: 0.031624484807252884 2023-01-24 02:50:32.639505: step: 692/463, loss: 0.05536042898893356 2023-01-24 02:50:33.240189: step: 694/463, loss: 0.05666022002696991 2023-01-24 02:50:33.770687: step: 696/463, loss: 0.009800752624869347 2023-01-24 02:50:34.402339: step: 698/463, loss: 0.03423064947128296 2023-01-24 02:50:35.005273: step: 700/463, loss: 0.01812233217060566 2023-01-24 02:50:35.613058: step: 702/463, loss: 0.09569603204727173 2023-01-24 02:50:36.170188: step: 704/463, loss: 0.25385528802871704 2023-01-24 02:50:36.767826: step: 706/463, loss: 0.10880713164806366 2023-01-24 02:50:37.382660: step: 708/463, loss: 0.06892018020153046 2023-01-24 02:50:37.969276: step: 710/463, loss: 0.019073614850640297 2023-01-24 02:50:38.582266: step: 712/463, loss: 0.021823126822710037 2023-01-24 02:50:39.192046: step: 714/463, loss: 0.017520377412438393 2023-01-24 02:50:39.779110: step: 716/463, loss: 0.0797148197889328 2023-01-24 02:50:40.325734: step: 718/463, loss: 0.01773039624094963 2023-01-24 02:50:40.977042: step: 720/463, loss: 0.0038297895807772875 2023-01-24 02:50:41.611105: step: 722/463, loss: 0.011625467799603939 2023-01-24 02:50:42.227005: step: 724/463, loss: 0.013727680779993534 2023-01-24 02:50:42.806363: step: 726/463, loss: 0.035362113267183304 2023-01-24 02:50:43.417915: step: 728/463, loss: 0.019789647310972214 2023-01-24 02:50:44.043742: step: 730/463, loss: 0.35200369358062744 2023-01-24 02:50:44.657012: step: 732/463, loss: 0.025750525295734406 2023-01-24 02:50:45.252555: step: 734/463, loss: 0.008122463710606098 2023-01-24 02:50:45.842068: step: 736/463, loss: 0.051228299736976624 2023-01-24 02:50:46.409556: step: 738/463, loss: 0.0032816026359796524 2023-01-24 02:50:47.055195: step: 740/463, loss: 0.03438328951597214 2023-01-24 02:50:47.642482: step: 742/463, loss: 0.041927583515644073 2023-01-24 02:50:48.301908: step: 744/463, loss: 0.037916216999292374 2023-01-24 02:50:48.886055: step: 746/463, loss: 0.022013312205672264 2023-01-24 02:50:49.565362: step: 748/463, loss: 0.28563371300697327 2023-01-24 02:50:50.148247: step: 750/463, loss: 0.08321613818407059 2023-01-24 02:50:50.756018: step: 752/463, loss: 0.046197500079870224 2023-01-24 02:50:51.306536: step: 754/463, loss: 0.028734834864735603 2023-01-24 02:50:51.921664: step: 756/463, loss: 0.026988299563527107 2023-01-24 02:50:52.501608: step: 758/463, loss: 0.20448052883148193 2023-01-24 02:50:53.177630: step: 760/463, loss: 0.011914691887795925 2023-01-24 02:50:53.735967: step: 762/463, loss: 0.11408733576536179 2023-01-24 02:50:54.325315: step: 764/463, loss: 0.021337401121854782 2023-01-24 02:50:54.958178: step: 766/463, loss: 0.0528281144797802 2023-01-24 02:50:55.555768: step: 768/463, loss: 0.02012871764600277 2023-01-24 02:50:56.214328: step: 770/463, loss: 0.024880554527044296 2023-01-24 02:50:56.821624: step: 772/463, loss: 0.01807067170739174 2023-01-24 02:50:57.438354: step: 774/463, loss: 0.068910613656044 2023-01-24 02:50:58.060265: step: 776/463, loss: 0.08288536965847015 2023-01-24 02:50:58.723423: step: 778/463, loss: 0.1458849459886551 2023-01-24 02:50:59.316272: step: 780/463, loss: 0.08817189931869507 2023-01-24 02:50:59.917202: step: 782/463, loss: 0.022250236943364143 2023-01-24 02:51:00.483832: step: 784/463, loss: 0.3784802258014679 2023-01-24 02:51:01.084486: step: 786/463, loss: 0.035895541310310364 2023-01-24 02:51:01.637081: step: 788/463, loss: 0.023873750120401382 2023-01-24 02:51:02.249829: step: 790/463, loss: 0.06067821383476257 2023-01-24 02:51:02.879792: step: 792/463, loss: 0.018436333164572716 2023-01-24 02:51:03.473778: step: 794/463, loss: 0.03176938369870186 2023-01-24 02:51:04.072202: step: 796/463, loss: 0.047730088233947754 2023-01-24 02:51:04.754595: step: 798/463, loss: 0.05090329423546791 2023-01-24 02:51:05.409442: step: 800/463, loss: 0.47200807929039 2023-01-24 02:51:06.020785: step: 802/463, loss: 0.017136910930275917 2023-01-24 02:51:06.658834: step: 804/463, loss: 0.4148656725883484 2023-01-24 02:51:07.275656: step: 806/463, loss: 0.41079065203666687 2023-01-24 02:51:07.938638: step: 808/463, loss: 0.12139978259801865 2023-01-24 02:51:08.613724: step: 810/463, loss: 0.032090455293655396 2023-01-24 02:51:09.179987: step: 812/463, loss: 0.04385112226009369 2023-01-24 02:51:09.738243: step: 814/463, loss: 0.015612964518368244 2023-01-24 02:51:10.337608: step: 816/463, loss: 0.024952249601483345 2023-01-24 02:51:11.014488: step: 818/463, loss: 0.017510656267404556 2023-01-24 02:51:11.639243: step: 820/463, loss: 0.03368077427148819 2023-01-24 02:51:12.205198: step: 822/463, loss: 0.10691804438829422 2023-01-24 02:51:12.800982: step: 824/463, loss: 0.3177550733089447 2023-01-24 02:51:13.404768: step: 826/463, loss: 0.17177239060401917 2023-01-24 02:51:13.992250: step: 828/463, loss: 0.038262732326984406 2023-01-24 02:51:14.562193: step: 830/463, loss: 0.07411154359579086 2023-01-24 02:51:15.162709: step: 832/463, loss: 0.021607955917716026 2023-01-24 02:51:15.763264: step: 834/463, loss: 0.018612314015626907 2023-01-24 02:51:16.406124: step: 836/463, loss: 0.06034029647707939 2023-01-24 02:51:17.044619: step: 838/463, loss: 0.07143083214759827 2023-01-24 02:51:17.641694: step: 840/463, loss: 0.03643713891506195 2023-01-24 02:51:18.296821: step: 842/463, loss: 0.04487483575940132 2023-01-24 02:51:18.902095: step: 844/463, loss: 0.04642195254564285 2023-01-24 02:51:19.461347: step: 846/463, loss: 0.04301004856824875 2023-01-24 02:51:20.030820: step: 848/463, loss: 0.05738275498151779 2023-01-24 02:51:20.645400: step: 850/463, loss: 0.07412850856781006 2023-01-24 02:51:21.257283: step: 852/463, loss: 0.062225326895713806 2023-01-24 02:51:21.863002: step: 854/463, loss: 0.4306562840938568 2023-01-24 02:51:22.449154: step: 856/463, loss: 0.01618577167391777 2023-01-24 02:51:23.000259: step: 858/463, loss: 0.027972936630249023 2023-01-24 02:51:23.613510: step: 860/463, loss: 0.007615078706294298 2023-01-24 02:51:24.277805: step: 862/463, loss: 0.020337432622909546 2023-01-24 02:51:24.857045: step: 864/463, loss: 0.04651791974902153 2023-01-24 02:51:25.485703: step: 866/463, loss: 0.04454297572374344 2023-01-24 02:51:26.106892: step: 868/463, loss: 0.05176641047000885 2023-01-24 02:51:26.755151: step: 870/463, loss: 0.0438494011759758 2023-01-24 02:51:27.395767: step: 872/463, loss: 0.14950431883335114 2023-01-24 02:51:28.032024: step: 874/463, loss: 0.06976141035556793 2023-01-24 02:51:28.721915: step: 876/463, loss: 0.01836182363331318 2023-01-24 02:51:29.334702: step: 878/463, loss: 0.06435605883598328 2023-01-24 02:51:29.982541: step: 880/463, loss: 0.1706569939851761 2023-01-24 02:51:30.607361: step: 882/463, loss: 0.049718715250492096 2023-01-24 02:51:31.251136: step: 884/463, loss: 0.11519704014062881 2023-01-24 02:51:31.845067: step: 886/463, loss: 0.061909019947052 2023-01-24 02:51:32.538370: step: 888/463, loss: 0.10663385689258575 2023-01-24 02:51:33.160978: step: 890/463, loss: 1.5389924049377441 2023-01-24 02:51:33.743533: step: 892/463, loss: 0.03288953751325607 2023-01-24 02:51:34.325271: step: 894/463, loss: 0.01727985590696335 2023-01-24 02:51:34.928021: step: 896/463, loss: 0.08463269472122192 2023-01-24 02:51:35.551551: step: 898/463, loss: 0.012989690527319908 2023-01-24 02:51:36.135504: step: 900/463, loss: 0.05048949643969536 2023-01-24 02:51:36.747357: step: 902/463, loss: 0.10495632886886597 2023-01-24 02:51:37.409145: step: 904/463, loss: 0.5655423998832703 2023-01-24 02:51:37.930051: step: 906/463, loss: 0.010460744611918926 2023-01-24 02:51:38.582296: step: 908/463, loss: 0.08847657591104507 2023-01-24 02:51:39.216000: step: 910/463, loss: 0.03688330203294754 2023-01-24 02:51:39.799909: step: 912/463, loss: 0.0001059873538906686 2023-01-24 02:51:40.598193: step: 914/463, loss: 0.07086379826068878 2023-01-24 02:51:41.236972: step: 916/463, loss: 0.1865016222000122 2023-01-24 02:51:41.912006: step: 918/463, loss: 0.015397122129797935 2023-01-24 02:51:42.536609: step: 920/463, loss: 0.01931653544306755 2023-01-24 02:51:43.131818: step: 922/463, loss: 0.023542214184999466 2023-01-24 02:51:43.713963: step: 924/463, loss: 0.04463417828083038 2023-01-24 02:51:44.345542: step: 926/463, loss: 0.12339504063129425 ================================================== Loss: 0.088 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32046585332606325, 'r': 0.3186415695310382, 'f1': 0.31955110778850077}, 'combined': 0.23545871100205318, 'epoch': 21} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.35191724388020873, 'r': 0.31933800558212827, 'f1': 0.3348370113475613}, 'combined': 0.23556372657617378, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31636169201520914, 'r': 0.31576138519924096, 'f1': 0.31606125356125353}, 'combined': 0.2328872394661868, 'epoch': 21} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3520061048820081, 'r': 0.3148072064621627, 'f1': 0.3323690653749897}, 'combined': 0.23598203641624269, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3283446991001125, 'r': 0.3165068446733532, 'f1': 0.3223171152518979}, 'combined': 0.23749682176455633, 'epoch': 21} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3641420809822215, 'r': 0.3063114887145389, 'f1': 0.3327326692576887}, 'combined': 0.23624019517295897, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27960526315789475, 'r': 0.30357142857142855, 'f1': 0.2910958904109589}, 'combined': 0.19406392694063926, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.30357142857142855, 'r': 0.3695652173913043, 'f1': 0.3333333333333333}, 'combined': 0.16666666666666666, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45454545454545453, 'r': 0.1724137931034483, 'f1': 0.25000000000000006}, 'combined': 0.16666666666666669, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:54:16.625253: step: 2/463, loss: 0.027307109907269478 2023-01-24 02:54:17.269539: step: 4/463, loss: 0.037473686039447784 2023-01-24 02:54:17.803350: step: 6/463, loss: 0.06267278641462326 2023-01-24 02:54:18.366581: step: 8/463, loss: 0.021190013736486435 2023-01-24 02:54:18.970927: step: 10/463, loss: 0.013477391563355923 2023-01-24 02:54:19.593112: step: 12/463, loss: 0.01570427231490612 2023-01-24 02:54:20.286065: step: 14/463, loss: 0.04132172092795372 2023-01-24 02:54:20.820246: step: 16/463, loss: 0.06055578589439392 2023-01-24 02:54:21.421593: step: 18/463, loss: 0.012751455418765545 2023-01-24 02:54:22.113955: step: 20/463, loss: 0.037001240998506546 2023-01-24 02:54:22.724271: step: 22/463, loss: 0.10948446393013 2023-01-24 02:54:23.318021: step: 24/463, loss: 0.08681850135326385 2023-01-24 02:54:23.865916: step: 26/463, loss: 0.017269693315029144 2023-01-24 02:54:24.435538: step: 28/463, loss: 0.006153902970254421 2023-01-24 02:54:25.073954: step: 30/463, loss: 0.007208934053778648 2023-01-24 02:54:25.717571: step: 32/463, loss: 0.07320139557123184 2023-01-24 02:54:26.370319: step: 34/463, loss: 0.04111438989639282 2023-01-24 02:54:27.037370: step: 36/463, loss: 0.024946611374616623 2023-01-24 02:54:27.685658: step: 38/463, loss: 0.02011711150407791 2023-01-24 02:54:28.359248: step: 40/463, loss: 0.013324334286153316 2023-01-24 02:54:29.017634: step: 42/463, loss: 0.06823496520519257 2023-01-24 02:54:29.642543: step: 44/463, loss: 0.028058571740984917 2023-01-24 02:54:30.183605: step: 46/463, loss: 0.00903240218758583 2023-01-24 02:54:30.795818: step: 48/463, loss: 0.08277113735675812 2023-01-24 02:54:31.405772: step: 50/463, loss: 0.03626325726509094 2023-01-24 02:54:31.991835: step: 52/463, loss: 0.0208125039935112 2023-01-24 02:54:32.633772: step: 54/463, loss: 0.014014114625751972 2023-01-24 02:54:33.180515: step: 56/463, loss: 0.24550214409828186 2023-01-24 02:54:33.735993: step: 58/463, loss: 0.012373863719403744 2023-01-24 02:54:34.367390: step: 60/463, loss: 0.00256345490925014 2023-01-24 02:54:34.965093: step: 62/463, loss: 0.02941410429775715 2023-01-24 02:54:35.575392: step: 64/463, loss: 0.03804181516170502 2023-01-24 02:54:36.204496: step: 66/463, loss: 0.046513915061950684 2023-01-24 02:54:36.794986: step: 68/463, loss: 0.014291265048086643 2023-01-24 02:54:37.395175: step: 70/463, loss: 0.019305240362882614 2023-01-24 02:54:38.052308: step: 72/463, loss: 0.11295080929994583 2023-01-24 02:54:38.684511: step: 74/463, loss: 0.021382540464401245 2023-01-24 02:54:39.267501: step: 76/463, loss: 0.03604750335216522 2023-01-24 02:54:39.835682: step: 78/463, loss: 0.011811971664428711 2023-01-24 02:54:40.356419: step: 80/463, loss: 0.005003360565751791 2023-01-24 02:54:40.970128: step: 82/463, loss: 0.1388377547264099 2023-01-24 02:54:41.647071: step: 84/463, loss: 0.033270686864852905 2023-01-24 02:54:42.298680: step: 86/463, loss: 0.02405732497572899 2023-01-24 02:54:42.890959: step: 88/463, loss: 0.00955925788730383 2023-01-24 02:54:43.462737: step: 90/463, loss: 0.174737349152565 2023-01-24 02:54:44.039926: step: 92/463, loss: 0.1475657969713211 2023-01-24 02:54:44.667626: step: 94/463, loss: 0.01156697142869234 2023-01-24 02:54:45.248422: step: 96/463, loss: 0.02755185402929783 2023-01-24 02:54:45.859185: step: 98/463, loss: 0.0944242998957634 2023-01-24 02:54:46.445395: step: 100/463, loss: 0.04024284705519676 2023-01-24 02:54:47.061534: step: 102/463, loss: 0.04573199898004532 2023-01-24 02:54:47.710449: step: 104/463, loss: 0.0005249805981293321 2023-01-24 02:54:48.287962: step: 106/463, loss: 0.09447552263736725 2023-01-24 02:54:48.968890: step: 108/463, loss: 0.04967257380485535 2023-01-24 02:54:49.583883: step: 110/463, loss: 0.04291202872991562 2023-01-24 02:54:50.189048: step: 112/463, loss: 0.1726692020893097 2023-01-24 02:54:50.767185: step: 114/463, loss: 0.04216316342353821 2023-01-24 02:54:51.366128: step: 116/463, loss: 0.062183886766433716 2023-01-24 02:54:51.992665: step: 118/463, loss: 0.08621944487094879 2023-01-24 02:54:52.564642: step: 120/463, loss: 0.06988248974084854 2023-01-24 02:54:53.067002: step: 122/463, loss: 0.01880536787211895 2023-01-24 02:54:53.584245: step: 124/463, loss: 0.03986737132072449 2023-01-24 02:54:54.140874: step: 126/463, loss: 0.01400669477880001 2023-01-24 02:54:54.716119: step: 128/463, loss: 0.004456786904484034 2023-01-24 02:54:55.306069: step: 130/463, loss: 0.0007280800491571426 2023-01-24 02:54:55.943860: step: 132/463, loss: 0.04782773554325104 2023-01-24 02:54:56.578398: step: 134/463, loss: 0.1466175764799118 2023-01-24 02:54:57.155913: step: 136/463, loss: 0.016023272648453712 2023-01-24 02:54:57.757591: step: 138/463, loss: 0.013586165383458138 2023-01-24 02:54:58.367086: step: 140/463, loss: 0.01544534508138895 2023-01-24 02:54:58.969149: step: 142/463, loss: 0.024155104532837868 2023-01-24 02:54:59.623180: step: 144/463, loss: 0.02183278650045395 2023-01-24 02:55:00.211690: step: 146/463, loss: 0.0641411691904068 2023-01-24 02:55:00.839967: step: 148/463, loss: 0.10418061912059784 2023-01-24 02:55:01.489406: step: 150/463, loss: 0.08173997700214386 2023-01-24 02:55:02.077139: step: 152/463, loss: 0.0738450288772583 2023-01-24 02:55:02.672326: step: 154/463, loss: 0.0035604427102953196 2023-01-24 02:55:03.246083: step: 156/463, loss: 0.024154599756002426 2023-01-24 02:55:03.819739: step: 158/463, loss: 0.05782266706228256 2023-01-24 02:55:04.357232: step: 160/463, loss: 0.021431274712085724 2023-01-24 02:55:04.965025: step: 162/463, loss: 0.018270336091518402 2023-01-24 02:55:05.522459: step: 164/463, loss: 0.01551727019250393 2023-01-24 02:55:06.151154: step: 166/463, loss: 0.03260162100195885 2023-01-24 02:55:06.730667: step: 168/463, loss: 0.1287730634212494 2023-01-24 02:55:07.357055: step: 170/463, loss: 0.02307039499282837 2023-01-24 02:55:07.940987: step: 172/463, loss: 0.3131958544254303 2023-01-24 02:55:08.501622: step: 174/463, loss: 0.0222307201474905 2023-01-24 02:55:09.117360: step: 176/463, loss: 0.04144064337015152 2023-01-24 02:55:09.855238: step: 178/463, loss: 0.0496835894882679 2023-01-24 02:55:10.503573: step: 180/463, loss: 0.04616256058216095 2023-01-24 02:55:11.118489: step: 182/463, loss: 0.029609067365527153 2023-01-24 02:55:11.756183: step: 184/463, loss: 0.008352709002792835 2023-01-24 02:55:12.386506: step: 186/463, loss: 0.1040663868188858 2023-01-24 02:55:12.966506: step: 188/463, loss: 0.01757684536278248 2023-01-24 02:55:13.590143: step: 190/463, loss: 0.01669790968298912 2023-01-24 02:55:14.172235: step: 192/463, loss: 0.06879208981990814 2023-01-24 02:55:14.844654: step: 194/463, loss: 0.0655992180109024 2023-01-24 02:55:15.492997: step: 196/463, loss: 0.0479159951210022 2023-01-24 02:55:16.115789: step: 198/463, loss: 0.050756655633449554 2023-01-24 02:55:16.728075: step: 200/463, loss: 0.09488651156425476 2023-01-24 02:55:17.350121: step: 202/463, loss: 0.05930163711309433 2023-01-24 02:55:17.947948: step: 204/463, loss: 0.001928434707224369 2023-01-24 02:55:18.576649: step: 206/463, loss: 0.11096645146608353 2023-01-24 02:55:19.336916: step: 208/463, loss: 0.3430955410003662 2023-01-24 02:55:20.021966: step: 210/463, loss: 0.0470639131963253 2023-01-24 02:55:20.581078: step: 212/463, loss: 0.037407781928777695 2023-01-24 02:55:21.205082: step: 214/463, loss: 0.045205775648355484 2023-01-24 02:55:21.832181: step: 216/463, loss: 0.0607411190867424 2023-01-24 02:55:22.431601: step: 218/463, loss: 0.05128609389066696 2023-01-24 02:55:23.085087: step: 220/463, loss: 0.16991913318634033 2023-01-24 02:55:23.751715: step: 222/463, loss: 0.11553668975830078 2023-01-24 02:55:24.281473: step: 224/463, loss: 0.05036891996860504 2023-01-24 02:55:24.901514: step: 226/463, loss: 0.019152626395225525 2023-01-24 02:55:25.520799: step: 228/463, loss: 0.015525446273386478 2023-01-24 02:55:26.142221: step: 230/463, loss: 0.03112723119556904 2023-01-24 02:55:26.734773: step: 232/463, loss: 0.03061089664697647 2023-01-24 02:55:27.363126: step: 234/463, loss: 0.02143695577979088 2023-01-24 02:55:27.923677: step: 236/463, loss: 0.06377431005239487 2023-01-24 02:55:28.553897: step: 238/463, loss: 0.019154507666826248 2023-01-24 02:55:29.174159: step: 240/463, loss: 0.5521926879882812 2023-01-24 02:55:29.797029: step: 242/463, loss: 0.07064840942621231 2023-01-24 02:55:30.358138: step: 244/463, loss: 0.11905047297477722 2023-01-24 02:55:30.979071: step: 246/463, loss: 0.013040137477219105 2023-01-24 02:55:31.622260: step: 248/463, loss: 0.055910978466272354 2023-01-24 02:55:32.224179: step: 250/463, loss: 0.06872695684432983 2023-01-24 02:55:32.830343: step: 252/463, loss: 0.0483228974044323 2023-01-24 02:55:33.463938: step: 254/463, loss: 0.045597486197948456 2023-01-24 02:55:34.030869: step: 256/463, loss: 0.02932564914226532 2023-01-24 02:55:34.640901: step: 258/463, loss: 0.012591350823640823 2023-01-24 02:55:35.275314: step: 260/463, loss: 0.05620492994785309 2023-01-24 02:55:35.853502: step: 262/463, loss: 0.009699014946818352 2023-01-24 02:55:36.456411: step: 264/463, loss: 0.06700722873210907 2023-01-24 02:55:37.042552: step: 266/463, loss: 0.05172678828239441 2023-01-24 02:55:37.621695: step: 268/463, loss: 0.027006186544895172 2023-01-24 02:55:38.187678: step: 270/463, loss: 0.013789490796625614 2023-01-24 02:55:38.786302: step: 272/463, loss: 0.37797629833221436 2023-01-24 02:55:39.436021: step: 274/463, loss: 0.3350977897644043 2023-01-24 02:55:40.104809: step: 276/463, loss: 0.026617761701345444 2023-01-24 02:55:40.709878: step: 278/463, loss: 0.018763842061161995 2023-01-24 02:55:41.394851: step: 280/463, loss: 0.11493569612503052 2023-01-24 02:55:42.074860: step: 282/463, loss: 0.21757283806800842 2023-01-24 02:55:42.717996: step: 284/463, loss: 0.3814856708049774 2023-01-24 02:55:43.317058: step: 286/463, loss: 0.04027283936738968 2023-01-24 02:55:43.895445: step: 288/463, loss: 0.08117479830980301 2023-01-24 02:55:44.531577: step: 290/463, loss: 0.029753416776657104 2023-01-24 02:55:45.149274: step: 292/463, loss: 0.24690839648246765 2023-01-24 02:55:45.774639: step: 294/463, loss: 0.009888594038784504 2023-01-24 02:55:46.436553: step: 296/463, loss: 0.0247699823230505 2023-01-24 02:55:47.027139: step: 298/463, loss: 0.007117341738194227 2023-01-24 02:55:47.653556: step: 300/463, loss: 0.12451712042093277 2023-01-24 02:55:48.292365: step: 302/463, loss: 0.011781606823205948 2023-01-24 02:55:48.927535: step: 304/463, loss: 0.014765860512852669 2023-01-24 02:55:49.534450: step: 306/463, loss: 0.3099306523799896 2023-01-24 02:55:50.165244: step: 308/463, loss: 0.09211153537034988 2023-01-24 02:55:50.695927: step: 310/463, loss: 0.009809508919715881 2023-01-24 02:55:51.270027: step: 312/463, loss: 0.03408985584974289 2023-01-24 02:55:51.929737: step: 314/463, loss: 0.4526342451572418 2023-01-24 02:55:52.545667: step: 316/463, loss: 0.01655806228518486 2023-01-24 02:55:53.145258: step: 318/463, loss: 0.062120869755744934 2023-01-24 02:55:53.850051: step: 320/463, loss: 0.034328263252973557 2023-01-24 02:55:54.492494: step: 322/463, loss: 0.054967351257801056 2023-01-24 02:55:55.122559: step: 324/463, loss: 0.03814755380153656 2023-01-24 02:55:55.682599: step: 326/463, loss: 0.028077369555830956 2023-01-24 02:55:56.294534: step: 328/463, loss: 0.015220968052744865 2023-01-24 02:55:56.930839: step: 330/463, loss: 0.05941757932305336 2023-01-24 02:55:57.565731: step: 332/463, loss: 0.03243464231491089 2023-01-24 02:55:58.213401: step: 334/463, loss: 0.03327122703194618 2023-01-24 02:55:58.848728: step: 336/463, loss: 0.04492618516087532 2023-01-24 02:55:59.402654: step: 338/463, loss: 0.025846682488918304 2023-01-24 02:55:59.992419: step: 340/463, loss: 0.02328733168542385 2023-01-24 02:56:00.623246: step: 342/463, loss: 0.023147432133555412 2023-01-24 02:56:01.203494: step: 344/463, loss: 0.014578692615032196 2023-01-24 02:56:01.817146: step: 346/463, loss: 0.15572340786457062 2023-01-24 02:56:02.429145: step: 348/463, loss: 0.0454864539206028 2023-01-24 02:56:03.005848: step: 350/463, loss: 0.10536032915115356 2023-01-24 02:56:03.620121: step: 352/463, loss: 0.00830143503844738 2023-01-24 02:56:04.316414: step: 354/463, loss: 0.048262178897857666 2023-01-24 02:56:04.909703: step: 356/463, loss: 0.053041957318782806 2023-01-24 02:56:05.558249: step: 358/463, loss: 0.0022999390494078398 2023-01-24 02:56:06.152254: step: 360/463, loss: 0.02009984292089939 2023-01-24 02:56:06.797184: step: 362/463, loss: 0.23052583634853363 2023-01-24 02:56:07.370825: step: 364/463, loss: 0.04431082680821419 2023-01-24 02:56:08.016217: step: 366/463, loss: 0.06494922190904617 2023-01-24 02:56:08.587955: step: 368/463, loss: 0.3324483036994934 2023-01-24 02:56:09.213338: step: 370/463, loss: 0.11793359369039536 2023-01-24 02:56:09.785990: step: 372/463, loss: 0.04714483022689819 2023-01-24 02:56:10.489338: step: 374/463, loss: 0.12025386095046997 2023-01-24 02:56:11.160392: step: 376/463, loss: 0.04301057755947113 2023-01-24 02:56:11.733241: step: 378/463, loss: 0.05795026570558548 2023-01-24 02:56:12.377415: step: 380/463, loss: 0.017216719686985016 2023-01-24 02:56:12.958211: step: 382/463, loss: 0.13666464388370514 2023-01-24 02:56:13.562663: step: 384/463, loss: 0.12699364125728607 2023-01-24 02:56:14.281419: step: 386/463, loss: 0.12757234275341034 2023-01-24 02:56:14.853447: step: 388/463, loss: 0.060375016182661057 2023-01-24 02:56:15.441709: step: 390/463, loss: 0.007756464183330536 2023-01-24 02:56:15.991118: step: 392/463, loss: 0.10254328697919846 2023-01-24 02:56:16.616232: step: 394/463, loss: 0.01230544038116932 2023-01-24 02:56:17.218933: step: 396/463, loss: 0.014242680743336678 2023-01-24 02:56:17.865480: step: 398/463, loss: 0.008879739791154861 2023-01-24 02:56:18.513129: step: 400/463, loss: 0.00943006668239832 2023-01-24 02:56:19.121796: step: 402/463, loss: 0.06302223354578018 2023-01-24 02:56:19.700923: step: 404/463, loss: 0.03651362285017967 2023-01-24 02:56:20.285991: step: 406/463, loss: 0.012782162986695766 2023-01-24 02:56:20.862496: step: 408/463, loss: 0.010192939080297947 2023-01-24 02:56:21.526338: step: 410/463, loss: 0.004714061506092548 2023-01-24 02:56:22.128877: step: 412/463, loss: 0.01033987756818533 2023-01-24 02:56:22.691643: step: 414/463, loss: 0.03748365864157677 2023-01-24 02:56:23.301569: step: 416/463, loss: 0.03365595266222954 2023-01-24 02:56:23.894735: step: 418/463, loss: 0.02491934411227703 2023-01-24 02:56:24.519909: step: 420/463, loss: 0.21708276867866516 2023-01-24 02:56:25.133958: step: 422/463, loss: 0.013111262582242489 2023-01-24 02:56:25.831759: step: 424/463, loss: 0.015632115304470062 2023-01-24 02:56:26.402995: step: 426/463, loss: 0.03767094388604164 2023-01-24 02:56:27.020621: step: 428/463, loss: 0.016659876331686974 2023-01-24 02:56:27.660565: step: 430/463, loss: 0.06389115750789642 2023-01-24 02:56:28.342467: step: 432/463, loss: 0.01746070384979248 2023-01-24 02:56:28.954426: step: 434/463, loss: 0.09630382061004639 2023-01-24 02:56:29.570596: step: 436/463, loss: 0.04486192390322685 2023-01-24 02:56:30.148964: step: 438/463, loss: 0.020552944391965866 2023-01-24 02:56:30.745361: step: 440/463, loss: 0.052226655185222626 2023-01-24 02:56:31.406683: step: 442/463, loss: 0.01587195321917534 2023-01-24 02:56:32.003901: step: 444/463, loss: 0.05374220013618469 2023-01-24 02:56:32.548848: step: 446/463, loss: 0.006284336093813181 2023-01-24 02:56:33.154911: step: 448/463, loss: 0.05105775594711304 2023-01-24 02:56:33.806559: step: 450/463, loss: 0.23414009809494019 2023-01-24 02:56:34.382840: step: 452/463, loss: 0.03003222681581974 2023-01-24 02:56:34.961198: step: 454/463, loss: 0.02326984331011772 2023-01-24 02:56:35.572022: step: 456/463, loss: 0.058105818927288055 2023-01-24 02:56:36.223323: step: 458/463, loss: 0.060882341116666794 2023-01-24 02:56:36.834276: step: 460/463, loss: 0.011673569679260254 2023-01-24 02:56:37.418779: step: 462/463, loss: 0.033384136855602264 2023-01-24 02:56:38.089278: step: 464/463, loss: 0.008067108690738678 2023-01-24 02:56:38.671681: step: 466/463, loss: 0.02684682048857212 2023-01-24 02:56:39.313126: step: 468/463, loss: 0.00514283636584878 2023-01-24 02:56:39.806167: step: 470/463, loss: 0.022212877869606018 2023-01-24 02:56:40.368608: step: 472/463, loss: 0.04347573220729828 2023-01-24 02:56:40.971728: step: 474/463, loss: 0.08882444351911545 2023-01-24 02:56:41.528996: step: 476/463, loss: 0.15299150347709656 2023-01-24 02:56:42.170519: step: 478/463, loss: 0.044049546122550964 2023-01-24 02:56:42.834976: step: 480/463, loss: 0.07663381099700928 2023-01-24 02:56:43.492404: step: 482/463, loss: 0.01907028630375862 2023-01-24 02:56:44.084426: step: 484/463, loss: 0.015243188478052616 2023-01-24 02:56:44.718802: step: 486/463, loss: 0.038284897804260254 2023-01-24 02:56:45.342846: step: 488/463, loss: 0.022857200354337692 2023-01-24 02:56:45.996394: step: 490/463, loss: 0.2573881149291992 2023-01-24 02:56:46.620433: step: 492/463, loss: 0.15773296356201172 2023-01-24 02:56:47.188153: step: 494/463, loss: 0.016086386516690254 2023-01-24 02:56:47.852776: step: 496/463, loss: 0.04167087376117706 2023-01-24 02:56:48.391791: step: 498/463, loss: 0.03391839191317558 2023-01-24 02:56:48.927017: step: 500/463, loss: 0.0413346067070961 2023-01-24 02:56:49.580727: step: 502/463, loss: 0.05528819188475609 2023-01-24 02:56:50.219909: step: 504/463, loss: 0.0442725345492363 2023-01-24 02:56:50.733600: step: 506/463, loss: 0.005538606084883213 2023-01-24 02:56:51.332484: step: 508/463, loss: 0.054893553256988525 2023-01-24 02:56:51.946475: step: 510/463, loss: 0.057096075266599655 2023-01-24 02:56:52.602329: step: 512/463, loss: 0.03208644315600395 2023-01-24 02:56:53.168218: step: 514/463, loss: 0.016278330236673355 2023-01-24 02:56:53.773670: step: 516/463, loss: 0.07532282918691635 2023-01-24 02:56:54.346127: step: 518/463, loss: 0.004500460810959339 2023-01-24 02:56:55.057981: step: 520/463, loss: 0.028330031782388687 2023-01-24 02:56:55.681968: step: 522/463, loss: 0.064474917948246 2023-01-24 02:56:56.432532: step: 524/463, loss: 0.0065143899992108345 2023-01-24 02:56:56.995172: step: 526/463, loss: 0.03988111764192581 2023-01-24 02:56:57.573262: step: 528/463, loss: 0.10069181770086288 2023-01-24 02:56:58.266564: step: 530/463, loss: 0.010335694998502731 2023-01-24 02:56:58.890921: step: 532/463, loss: 0.07569997012615204 2023-01-24 02:56:59.511206: step: 534/463, loss: 0.004323458764702082 2023-01-24 02:57:00.136207: step: 536/463, loss: 0.013647849671542645 2023-01-24 02:57:00.737585: step: 538/463, loss: 0.18416380882263184 2023-01-24 02:57:01.321666: step: 540/463, loss: 0.17598333954811096 2023-01-24 02:57:01.894534: step: 542/463, loss: 0.015335590578615665 2023-01-24 02:57:02.492356: step: 544/463, loss: 0.09275980293750763 2023-01-24 02:57:03.038462: step: 546/463, loss: 0.07587829977273941 2023-01-24 02:57:03.604871: step: 548/463, loss: 0.015564859844744205 2023-01-24 02:57:04.135292: step: 550/463, loss: 0.003187124617397785 2023-01-24 02:57:04.841928: step: 552/463, loss: 0.0635417029261589 2023-01-24 02:57:05.450874: step: 554/463, loss: 0.2858201861381531 2023-01-24 02:57:06.055102: step: 556/463, loss: 0.018598847091197968 2023-01-24 02:57:06.630345: step: 558/463, loss: 0.14892862737178802 2023-01-24 02:57:07.273469: step: 560/463, loss: 0.05545175075531006 2023-01-24 02:57:07.915591: step: 562/463, loss: 0.009108692407608032 2023-01-24 02:57:08.518156: step: 564/463, loss: 0.056194692850112915 2023-01-24 02:57:09.122711: step: 566/463, loss: 0.0345550999045372 2023-01-24 02:57:09.861519: step: 568/463, loss: 0.034227609634399414 2023-01-24 02:57:10.583147: step: 570/463, loss: 0.1109544187784195 2023-01-24 02:57:11.131615: step: 572/463, loss: 0.01987023837864399 2023-01-24 02:57:11.714099: step: 574/463, loss: 0.05919257923960686 2023-01-24 02:57:12.294815: step: 576/463, loss: 0.021943096071481705 2023-01-24 02:57:12.979432: step: 578/463, loss: 0.036531876772642136 2023-01-24 02:57:13.554393: step: 580/463, loss: 0.044673267751932144 2023-01-24 02:57:14.140828: step: 582/463, loss: 0.016790423542261124 2023-01-24 02:57:14.749832: step: 584/463, loss: 0.017928099259734154 2023-01-24 02:57:15.338429: step: 586/463, loss: 0.03736891970038414 2023-01-24 02:57:15.980998: step: 588/463, loss: 0.05322084203362465 2023-01-24 02:57:16.566094: step: 590/463, loss: 0.0111657390370965 2023-01-24 02:57:17.219319: step: 592/463, loss: 0.04067065566778183 2023-01-24 02:57:17.844112: step: 594/463, loss: 0.2293587327003479 2023-01-24 02:57:18.504622: step: 596/463, loss: 0.0110015282407403 2023-01-24 02:57:19.133889: step: 598/463, loss: 0.022349612787365913 2023-01-24 02:57:19.717574: step: 600/463, loss: 0.04624270275235176 2023-01-24 02:57:20.328173: step: 602/463, loss: 0.0088732298463583 2023-01-24 02:57:20.931414: step: 604/463, loss: 0.04240732640028 2023-01-24 02:57:21.529309: step: 606/463, loss: 0.6057572960853577 2023-01-24 02:57:22.117103: step: 608/463, loss: 0.028889348730444908 2023-01-24 02:57:22.693075: step: 610/463, loss: 0.042693447321653366 2023-01-24 02:57:23.292379: step: 612/463, loss: 0.023404410108923912 2023-01-24 02:57:23.952838: step: 614/463, loss: 0.0210364181548357 2023-01-24 02:57:24.540678: step: 616/463, loss: 0.07394241541624069 2023-01-24 02:57:25.212170: step: 618/463, loss: 0.13347145915031433 2023-01-24 02:57:25.835979: step: 620/463, loss: 0.22575202584266663 2023-01-24 02:57:26.420197: step: 622/463, loss: 0.010912074707448483 2023-01-24 02:57:26.993381: step: 624/463, loss: 0.069573312997818 2023-01-24 02:57:27.632712: step: 626/463, loss: 0.08853700757026672 2023-01-24 02:57:28.189679: step: 628/463, loss: 0.008282721973955631 2023-01-24 02:57:28.840128: step: 630/463, loss: 0.007997180335223675 2023-01-24 02:57:29.533500: step: 632/463, loss: 0.8594401478767395 2023-01-24 02:57:30.111144: step: 634/463, loss: 0.05402370169758797 2023-01-24 02:57:30.664905: step: 636/463, loss: 0.03924823924899101 2023-01-24 02:57:31.259509: step: 638/463, loss: 0.018613280728459358 2023-01-24 02:57:31.864002: step: 640/463, loss: 0.0222090482711792 2023-01-24 02:57:32.553355: step: 642/463, loss: 0.015582130290567875 2023-01-24 02:57:33.141877: step: 644/463, loss: 0.03298481926321983 2023-01-24 02:57:33.784250: step: 646/463, loss: 0.027118481695652008 2023-01-24 02:57:34.337600: step: 648/463, loss: 0.036445774137973785 2023-01-24 02:57:34.986250: step: 650/463, loss: 0.023732291534543037 2023-01-24 02:57:35.597632: step: 652/463, loss: 0.05395600572228432 2023-01-24 02:57:36.207789: step: 654/463, loss: 0.02892528474330902 2023-01-24 02:57:36.884350: step: 656/463, loss: 0.07855701446533203 2023-01-24 02:57:37.478751: step: 658/463, loss: 0.4196735620498657 2023-01-24 02:57:38.048258: step: 660/463, loss: 0.020453108474612236 2023-01-24 02:57:38.645191: step: 662/463, loss: 0.03943797945976257 2023-01-24 02:57:39.196876: step: 664/463, loss: 0.06709140539169312 2023-01-24 02:57:39.799846: step: 666/463, loss: 0.043501000851392746 2023-01-24 02:57:40.359297: step: 668/463, loss: 0.05584729462862015 2023-01-24 02:57:41.048961: step: 670/463, loss: 0.024247171357274055 2023-01-24 02:57:41.699360: step: 672/463, loss: 0.043673284351825714 2023-01-24 02:57:42.263584: step: 674/463, loss: 0.05581480637192726 2023-01-24 02:57:42.827526: step: 676/463, loss: 0.4347115457057953 2023-01-24 02:57:43.413928: step: 678/463, loss: 0.01929638721048832 2023-01-24 02:57:44.030027: step: 680/463, loss: 0.04030774533748627 2023-01-24 02:57:44.573630: step: 682/463, loss: 0.03296920284628868 2023-01-24 02:57:45.214205: step: 684/463, loss: 0.20981547236442566 2023-01-24 02:57:45.823980: step: 686/463, loss: 0.0039622788317501545 2023-01-24 02:57:46.437163: step: 688/463, loss: 0.3171810507774353 2023-01-24 02:57:46.995598: step: 690/463, loss: 0.035749197006225586 2023-01-24 02:57:47.620189: step: 692/463, loss: 0.006789928302168846 2023-01-24 02:57:48.212083: step: 694/463, loss: 0.07700510323047638 2023-01-24 02:57:48.755292: step: 696/463, loss: 0.004115153104066849 2023-01-24 02:57:49.287394: step: 698/463, loss: 0.03172479569911957 2023-01-24 02:57:49.852523: step: 700/463, loss: 0.003815131727606058 2023-01-24 02:57:50.482972: step: 702/463, loss: 0.027652086690068245 2023-01-24 02:57:51.112227: step: 704/463, loss: 0.039657846093177795 2023-01-24 02:57:51.797854: step: 706/463, loss: 0.025318479165434837 2023-01-24 02:57:52.400098: step: 708/463, loss: 0.024459894746541977 2023-01-24 02:57:53.037300: step: 710/463, loss: 0.13830170035362244 2023-01-24 02:57:53.648720: step: 712/463, loss: 0.11719582229852676 2023-01-24 02:57:54.277514: step: 714/463, loss: 0.05310874804854393 2023-01-24 02:57:54.802691: step: 716/463, loss: 0.08931173384189606 2023-01-24 02:57:55.431919: step: 718/463, loss: 0.09405551850795746 2023-01-24 02:57:56.052111: step: 720/463, loss: 0.1177615225315094 2023-01-24 02:57:56.642058: step: 722/463, loss: 0.05384043976664543 2023-01-24 02:57:57.225756: step: 724/463, loss: 0.03341394662857056 2023-01-24 02:57:57.849927: step: 726/463, loss: 0.008840322494506836 2023-01-24 02:57:58.426777: step: 728/463, loss: 0.04292209446430206 2023-01-24 02:57:59.025152: step: 730/463, loss: 0.05097596347332001 2023-01-24 02:57:59.647066: step: 732/463, loss: 0.025359854102134705 2023-01-24 02:58:00.243788: step: 734/463, loss: 0.021372223272919655 2023-01-24 02:58:00.935219: step: 736/463, loss: 0.09978418797254562 2023-01-24 02:58:01.493854: step: 738/463, loss: 0.030770065262913704 2023-01-24 02:58:02.121236: step: 740/463, loss: 0.19172467291355133 2023-01-24 02:58:02.697238: step: 742/463, loss: 0.02478044480085373 2023-01-24 02:58:03.380662: step: 744/463, loss: 0.051965124905109406 2023-01-24 02:58:04.079391: step: 746/463, loss: 0.018788136541843414 2023-01-24 02:58:04.680308: step: 748/463, loss: 0.04408716410398483 2023-01-24 02:58:05.262433: step: 750/463, loss: 0.00019028560200240463 2023-01-24 02:58:05.869072: step: 752/463, loss: 0.024658353999257088 2023-01-24 02:58:06.572889: step: 754/463, loss: 0.08846825361251831 2023-01-24 02:58:07.166988: step: 756/463, loss: 0.027668211609125137 2023-01-24 02:58:07.772081: step: 758/463, loss: 0.036085546016693115 2023-01-24 02:58:08.367881: step: 760/463, loss: 0.16244789958000183 2023-01-24 02:58:09.027283: step: 762/463, loss: 0.13738738000392914 2023-01-24 02:58:09.630867: step: 764/463, loss: 0.10772093385457993 2023-01-24 02:58:10.217470: step: 766/463, loss: 0.02252275124192238 2023-01-24 02:58:10.843334: step: 768/463, loss: 0.009060605429112911 2023-01-24 02:58:11.444024: step: 770/463, loss: 0.048438090831041336 2023-01-24 02:58:12.025970: step: 772/463, loss: 0.02476799301803112 2023-01-24 02:58:12.693524: step: 774/463, loss: 0.0853457972407341 2023-01-24 02:58:13.276388: step: 776/463, loss: 0.017607679590582848 2023-01-24 02:58:13.969614: step: 778/463, loss: 0.027686890214681625 2023-01-24 02:58:14.643283: step: 780/463, loss: 0.04976583644747734 2023-01-24 02:58:15.188689: step: 782/463, loss: 0.04088650643825531 2023-01-24 02:58:15.754119: step: 784/463, loss: 0.008163291029632092 2023-01-24 02:58:16.428543: step: 786/463, loss: 0.12748965620994568 2023-01-24 02:58:17.022106: step: 788/463, loss: 0.05324763059616089 2023-01-24 02:58:17.659737: step: 790/463, loss: 0.0016227257438004017 2023-01-24 02:58:18.233649: step: 792/463, loss: 0.01225209515541792 2023-01-24 02:58:18.803965: step: 794/463, loss: 0.02927045151591301 2023-01-24 02:58:19.415241: step: 796/463, loss: 0.27668488025665283 2023-01-24 02:58:19.999995: step: 798/463, loss: 0.048540085554122925 2023-01-24 02:58:20.585551: step: 800/463, loss: 0.16501009464263916 2023-01-24 02:58:21.180664: step: 802/463, loss: 0.011918695643544197 2023-01-24 02:58:21.845221: step: 804/463, loss: 0.02003621868789196 2023-01-24 02:58:22.383044: step: 806/463, loss: 0.02932962216436863 2023-01-24 02:58:23.026565: step: 808/463, loss: 0.00890417117625475 2023-01-24 02:58:23.649835: step: 810/463, loss: 0.05604743957519531 2023-01-24 02:58:24.235468: step: 812/463, loss: 0.1096309944987297 2023-01-24 02:58:24.847466: step: 814/463, loss: 0.04792318865656853 2023-01-24 02:58:25.490853: step: 816/463, loss: 0.00931707676500082 2023-01-24 02:58:26.037073: step: 818/463, loss: 0.0021293533500283957 2023-01-24 02:58:26.691712: step: 820/463, loss: 0.022438695654273033 2023-01-24 02:58:27.216472: step: 822/463, loss: 0.013250455260276794 2023-01-24 02:58:27.860223: step: 824/463, loss: 0.03770682215690613 2023-01-24 02:58:28.553225: step: 826/463, loss: 0.018190696835517883 2023-01-24 02:58:29.148520: step: 828/463, loss: 0.14048126339912415 2023-01-24 02:58:29.729711: step: 830/463, loss: 0.04479968920350075 2023-01-24 02:58:30.407468: step: 832/463, loss: 0.0633826032280922 2023-01-24 02:58:30.999298: step: 834/463, loss: 0.043250855058431625 2023-01-24 02:58:31.611676: step: 836/463, loss: 0.09916787594556808 2023-01-24 02:58:32.218776: step: 838/463, loss: 0.21778972446918488 2023-01-24 02:58:32.791469: step: 840/463, loss: 0.01910831779241562 2023-01-24 02:58:33.371080: step: 842/463, loss: 0.0027309712022542953 2023-01-24 02:58:34.000414: step: 844/463, loss: 0.059405893087387085 2023-01-24 02:58:34.624628: step: 846/463, loss: 0.03741871938109398 2023-01-24 02:58:35.243698: step: 848/463, loss: 0.07094387710094452 2023-01-24 02:58:35.832675: step: 850/463, loss: 0.026074478402733803 2023-01-24 02:58:36.457320: step: 852/463, loss: 0.1769607961177826 2023-01-24 02:58:37.056625: step: 854/463, loss: 0.04753291979432106 2023-01-24 02:58:37.650730: step: 856/463, loss: 0.04340430721640587 2023-01-24 02:58:38.314318: step: 858/463, loss: 0.058906640857458115 2023-01-24 02:58:39.073468: step: 860/463, loss: 0.026164032518863678 2023-01-24 02:58:39.655776: step: 862/463, loss: 0.026972267776727676 2023-01-24 02:58:40.212050: step: 864/463, loss: 0.03876487910747528 2023-01-24 02:58:40.858765: step: 866/463, loss: 0.04996780306100845 2023-01-24 02:58:41.438557: step: 868/463, loss: 0.04396574944257736 2023-01-24 02:58:42.036327: step: 870/463, loss: 0.06949763745069504 2023-01-24 02:58:42.626874: step: 872/463, loss: 0.026053421199321747 2023-01-24 02:58:43.211757: step: 874/463, loss: 0.10872802138328552 2023-01-24 02:58:43.787040: step: 876/463, loss: 0.1126568540930748 2023-01-24 02:58:44.423821: step: 878/463, loss: 0.04822190850973129 2023-01-24 02:58:45.079823: step: 880/463, loss: 0.06492262333631516 2023-01-24 02:58:45.752343: step: 882/463, loss: 0.034169603139162064 2023-01-24 02:58:46.365437: step: 884/463, loss: 0.06735335290431976 2023-01-24 02:58:46.998302: step: 886/463, loss: 0.060896798968315125 2023-01-24 02:58:47.627882: step: 888/463, loss: 0.02256092242896557 2023-01-24 02:58:48.294703: step: 890/463, loss: 0.025461973622441292 2023-01-24 02:58:48.879500: step: 892/463, loss: 0.08786864578723907 2023-01-24 02:58:49.483865: step: 894/463, loss: 0.014913801103830338 2023-01-24 02:58:50.078908: step: 896/463, loss: 0.1522507667541504 2023-01-24 02:58:50.753528: step: 898/463, loss: 1.9961845874786377 2023-01-24 02:58:51.384942: step: 900/463, loss: 0.010216623544692993 2023-01-24 02:58:51.963033: step: 902/463, loss: 0.014999612234532833 2023-01-24 02:58:52.603738: step: 904/463, loss: 0.02270318567752838 2023-01-24 02:58:53.241757: step: 906/463, loss: 0.056541942059993744 2023-01-24 02:58:53.836445: step: 908/463, loss: 0.01890518330037594 2023-01-24 02:58:54.436307: step: 910/463, loss: 0.023744642734527588 2023-01-24 02:58:55.094212: step: 912/463, loss: 0.0528278723359108 2023-01-24 02:58:55.671730: step: 914/463, loss: 0.2787163555622101 2023-01-24 02:58:56.301824: step: 916/463, loss: 0.018158627673983574 2023-01-24 02:58:56.886778: step: 918/463, loss: 0.042585521936416626 2023-01-24 02:58:57.523877: step: 920/463, loss: 0.20878718793392181 2023-01-24 02:58:58.216628: step: 922/463, loss: 0.21594728529453278 2023-01-24 02:58:58.824044: step: 924/463, loss: 0.03421413153409958 2023-01-24 02:58:59.415541: step: 926/463, loss: 0.0780918300151825 ================================================== Loss: 0.069 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32344972016933343, 'r': 0.33326982552551815, 'f1': 0.32828635149896834}, 'combined': 0.24189520636766088, 'epoch': 22} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.36452980472764646, 'r': 0.30950043630017454, 'f1': 0.3347687588485135}, 'combined': 0.23551570974267283, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3291405310190754, 'r': 0.33038964119372083, 'f1': 0.32976390323691457}, 'combined': 0.2429839287008844, 'epoch': 22} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3609167911144297, 'r': 0.30701568082572445, 'f1': 0.33179136814106136}, 'combined': 0.23557187138015356, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3386253445407365, 'r': 0.3360551331969738, 'f1': 0.33733534322820036}, 'combined': 0.2485628844839371, 'epoch': 22} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38038042456279647, 'r': 0.3043043396502372, 'f1': 0.338115932944708}, 'combined': 0.24006231239074266, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.296875, 'r': 0.2714285714285714, 'f1': 0.2835820895522388}, 'combined': 0.18905472636815918, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.32608695652173914, 'f1': 0.2830188679245283}, 'combined': 0.14150943396226415, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:01:32.289776: step: 2/463, loss: 0.005096936598420143 2023-01-24 03:01:32.871213: step: 4/463, loss: 0.013270650990307331 2023-01-24 03:01:33.524333: step: 6/463, loss: 0.06148231029510498 2023-01-24 03:01:34.086442: step: 8/463, loss: 0.09030760824680328 2023-01-24 03:01:34.683274: step: 10/463, loss: 0.04196205362677574 2023-01-24 03:01:35.309010: step: 12/463, loss: 0.011687913909554482 2023-01-24 03:01:35.991474: step: 14/463, loss: 0.014541353099048138 2023-01-24 03:01:36.686869: step: 16/463, loss: 0.03782951831817627 2023-01-24 03:01:37.296183: step: 18/463, loss: 0.000943364982958883 2023-01-24 03:01:37.982460: step: 20/463, loss: 0.02188112586736679 2023-01-24 03:01:38.639009: step: 22/463, loss: 0.07310856133699417 2023-01-24 03:01:39.242331: step: 24/463, loss: 0.0009404273587279022 2023-01-24 03:01:39.886982: step: 26/463, loss: 0.018125278875231743 2023-01-24 03:01:40.570080: step: 28/463, loss: 0.7358763813972473 2023-01-24 03:01:41.253344: step: 30/463, loss: 0.02595190517604351 2023-01-24 03:01:41.825298: step: 32/463, loss: 0.03461407497525215 2023-01-24 03:01:42.399938: step: 34/463, loss: 0.0297158844769001 2023-01-24 03:01:43.052641: step: 36/463, loss: 0.02196580544114113 2023-01-24 03:01:43.749012: step: 38/463, loss: 0.02222277969121933 2023-01-24 03:01:44.371887: step: 40/463, loss: 0.0844925120472908 2023-01-24 03:01:45.000535: step: 42/463, loss: 0.3803735375404358 2023-01-24 03:01:45.610942: step: 44/463, loss: 0.01139041967689991 2023-01-24 03:01:46.192105: step: 46/463, loss: 0.026062356308102608 2023-01-24 03:01:46.759381: step: 48/463, loss: 0.40363287925720215 2023-01-24 03:01:47.460610: step: 50/463, loss: 0.026555925607681274 2023-01-24 03:01:48.024966: step: 52/463, loss: 0.021823246031999588 2023-01-24 03:01:48.668018: step: 54/463, loss: 0.04538324475288391 2023-01-24 03:01:49.287124: step: 56/463, loss: 0.045336753129959106 2023-01-24 03:01:49.935282: step: 58/463, loss: 0.02406403422355652 2023-01-24 03:01:50.600858: step: 60/463, loss: 0.004356552846729755 2023-01-24 03:01:51.257295: step: 62/463, loss: 0.044643599539995193 2023-01-24 03:01:51.856974: step: 64/463, loss: 0.04017464444041252 2023-01-24 03:01:52.429237: step: 66/463, loss: 0.12421777844429016 2023-01-24 03:01:52.975553: step: 68/463, loss: 0.05655898153781891 2023-01-24 03:01:53.581751: step: 70/463, loss: 0.012709209695458412 2023-01-24 03:01:54.191664: step: 72/463, loss: 0.0039043223951011896 2023-01-24 03:01:54.779050: step: 74/463, loss: 0.007372914347797632 2023-01-24 03:01:55.355248: step: 76/463, loss: 0.001461189822293818 2023-01-24 03:01:55.969458: step: 78/463, loss: 1.4245637655258179 2023-01-24 03:01:56.558638: step: 80/463, loss: 0.031314436346292496 2023-01-24 03:01:57.147457: step: 82/463, loss: 0.8809185028076172 2023-01-24 03:01:57.773895: step: 84/463, loss: 0.028347458690404892 2023-01-24 03:01:58.352554: step: 86/463, loss: 0.016763821244239807 2023-01-24 03:01:58.994190: step: 88/463, loss: 0.014377339743077755 2023-01-24 03:01:59.647899: step: 90/463, loss: 0.05745711922645569 2023-01-24 03:02:00.259912: step: 92/463, loss: 0.06223868951201439 2023-01-24 03:02:00.893771: step: 94/463, loss: 0.0642935261130333 2023-01-24 03:02:01.450480: step: 96/463, loss: 0.008150258101522923 2023-01-24 03:02:01.993323: step: 98/463, loss: 0.005802695639431477 2023-01-24 03:02:02.580566: step: 100/463, loss: 0.027272483333945274 2023-01-24 03:02:03.162376: step: 102/463, loss: 0.02095605432987213 2023-01-24 03:02:03.726592: step: 104/463, loss: 0.02383429929614067 2023-01-24 03:02:04.281668: step: 106/463, loss: 0.025600360706448555 2023-01-24 03:02:04.912479: step: 108/463, loss: 0.08145993947982788 2023-01-24 03:02:05.568461: step: 110/463, loss: 0.015820417553186417 2023-01-24 03:02:06.176305: step: 112/463, loss: 0.03775973618030548 2023-01-24 03:02:06.797055: step: 114/463, loss: 0.03502754867076874 2023-01-24 03:02:07.406788: step: 116/463, loss: 0.0590132512152195 2023-01-24 03:02:08.023458: step: 118/463, loss: 0.03576357290148735 2023-01-24 03:02:08.564580: step: 120/463, loss: 0.02751271240413189 2023-01-24 03:02:09.211479: step: 122/463, loss: 0.05653354898095131 2023-01-24 03:02:09.765637: step: 124/463, loss: 0.013758930377662182 2023-01-24 03:02:10.308520: step: 126/463, loss: 0.025532040745019913 2023-01-24 03:02:10.894550: step: 128/463, loss: 0.014858371578156948 2023-01-24 03:02:11.429723: step: 130/463, loss: 0.08084791898727417 2023-01-24 03:02:12.037746: step: 132/463, loss: 0.005618555936962366 2023-01-24 03:02:12.675753: step: 134/463, loss: 0.3325827717781067 2023-01-24 03:02:13.316597: step: 136/463, loss: 0.07356379926204681 2023-01-24 03:02:13.883196: step: 138/463, loss: 0.11285888403654099 2023-01-24 03:02:14.512844: step: 140/463, loss: 0.01807739958167076 2023-01-24 03:02:15.129229: step: 142/463, loss: 0.030326643958687782 2023-01-24 03:02:15.754284: step: 144/463, loss: 0.04113316535949707 2023-01-24 03:02:16.407996: step: 146/463, loss: 0.010979506187140942 2023-01-24 03:02:16.995050: step: 148/463, loss: 0.05487872287631035 2023-01-24 03:02:17.663455: step: 150/463, loss: 0.0007523614913225174 2023-01-24 03:02:18.256219: step: 152/463, loss: 0.10144391655921936 2023-01-24 03:02:18.914566: step: 154/463, loss: 0.06588702648878098 2023-01-24 03:02:19.549792: step: 156/463, loss: 0.008753701113164425 2023-01-24 03:02:20.221239: step: 158/463, loss: 0.015952788293361664 2023-01-24 03:02:20.778035: step: 160/463, loss: 0.005687625613063574 2023-01-24 03:02:21.416961: step: 162/463, loss: 0.04886649176478386 2023-01-24 03:02:22.077048: step: 164/463, loss: 0.041700903326272964 2023-01-24 03:02:22.708934: step: 166/463, loss: 0.04705686867237091 2023-01-24 03:02:23.298216: step: 168/463, loss: 0.3169006109237671 2023-01-24 03:02:23.960320: step: 170/463, loss: 0.037628576159477234 2023-01-24 03:02:24.569914: step: 172/463, loss: 0.02688625268638134 2023-01-24 03:02:25.195984: step: 174/463, loss: 0.03487686440348625 2023-01-24 03:02:25.752810: step: 176/463, loss: 0.0648430734872818 2023-01-24 03:02:26.416806: step: 178/463, loss: 0.009201576933264732 2023-01-24 03:02:27.023489: step: 180/463, loss: 0.04278188198804855 2023-01-24 03:02:27.665238: step: 182/463, loss: 0.04402580484747887 2023-01-24 03:02:28.290461: step: 184/463, loss: 0.030194999650120735 2023-01-24 03:02:28.921097: step: 186/463, loss: 0.03596680983901024 2023-01-24 03:02:29.518704: step: 188/463, loss: 0.03121047280728817 2023-01-24 03:02:30.060812: step: 190/463, loss: 0.026846546679735184 2023-01-24 03:02:30.748430: step: 192/463, loss: 0.19226320087909698 2023-01-24 03:02:31.350530: step: 194/463, loss: 0.04002191126346588 2023-01-24 03:02:31.954303: step: 196/463, loss: 0.01696052961051464 2023-01-24 03:02:32.576507: step: 198/463, loss: 0.03162397816777229 2023-01-24 03:02:33.134153: step: 200/463, loss: 0.023736823350191116 2023-01-24 03:02:33.741720: step: 202/463, loss: 0.03542514145374298 2023-01-24 03:02:34.365682: step: 204/463, loss: 0.01799800433218479 2023-01-24 03:02:35.026677: step: 206/463, loss: 0.03839549422264099 2023-01-24 03:02:35.642821: step: 208/463, loss: 0.13379539549350739 2023-01-24 03:02:36.250324: step: 210/463, loss: 0.037441596388816833 2023-01-24 03:02:36.897795: step: 212/463, loss: 0.025361573323607445 2023-01-24 03:02:37.493916: step: 214/463, loss: 0.05786699056625366 2023-01-24 03:02:38.086022: step: 216/463, loss: 0.005558215081691742 2023-01-24 03:02:38.675574: step: 218/463, loss: 0.022210484370589256 2023-01-24 03:02:39.260452: step: 220/463, loss: 0.013788296841084957 2023-01-24 03:02:39.915931: step: 222/463, loss: 0.006603210233151913 2023-01-24 03:02:40.516273: step: 224/463, loss: 0.1517052948474884 2023-01-24 03:02:41.075821: step: 226/463, loss: 0.0012966709909960628 2023-01-24 03:02:41.737323: step: 228/463, loss: 0.05307304114103317 2023-01-24 03:02:42.291426: step: 230/463, loss: 0.009028688073158264 2023-01-24 03:02:42.895146: step: 232/463, loss: 0.04618513956665993 2023-01-24 03:02:43.488914: step: 234/463, loss: 0.003027483355253935 2023-01-24 03:02:44.093876: step: 236/463, loss: 0.01970463991165161 2023-01-24 03:02:44.731205: step: 238/463, loss: 0.0063621350564062595 2023-01-24 03:02:45.312358: step: 240/463, loss: 0.013801174238324165 2023-01-24 03:02:45.942996: step: 242/463, loss: 0.008990975096821785 2023-01-24 03:02:46.584202: step: 244/463, loss: 0.007047166116535664 2023-01-24 03:02:47.157418: step: 246/463, loss: 0.020788155496120453 2023-01-24 03:02:47.873707: step: 248/463, loss: 0.058315057307481766 2023-01-24 03:02:48.485859: step: 250/463, loss: 0.049473389983177185 2023-01-24 03:02:49.165085: step: 252/463, loss: 0.0901971310377121 2023-01-24 03:02:49.744571: step: 254/463, loss: 0.04614248499274254 2023-01-24 03:02:50.358929: step: 256/463, loss: 0.09800442308187485 2023-01-24 03:02:50.962068: step: 258/463, loss: 0.06395212560892105 2023-01-24 03:02:51.566445: step: 260/463, loss: 0.002424429403617978 2023-01-24 03:02:52.316224: step: 262/463, loss: 0.025325989350676537 2023-01-24 03:02:52.905132: step: 264/463, loss: 0.08404811471700668 2023-01-24 03:02:53.538583: step: 266/463, loss: 0.02640512026846409 2023-01-24 03:02:54.140095: step: 268/463, loss: 0.010167410597205162 2023-01-24 03:02:54.805215: step: 270/463, loss: 0.10589731484651566 2023-01-24 03:02:55.430880: step: 272/463, loss: 0.012354912236332893 2023-01-24 03:02:55.976881: step: 274/463, loss: 0.013367678038775921 2023-01-24 03:02:56.532350: step: 276/463, loss: 0.002007046714425087 2023-01-24 03:02:57.093528: step: 278/463, loss: 0.001871357555501163 2023-01-24 03:02:57.661930: step: 280/463, loss: 0.05698772892355919 2023-01-24 03:02:58.237755: step: 282/463, loss: 0.06988706439733505 2023-01-24 03:02:58.818911: step: 284/463, loss: 0.0586654469370842 2023-01-24 03:02:59.402763: step: 286/463, loss: 0.024856537580490112 2023-01-24 03:02:59.997755: step: 288/463, loss: 0.046403832733631134 2023-01-24 03:03:00.639323: step: 290/463, loss: 0.11522916704416275 2023-01-24 03:03:01.299626: step: 292/463, loss: 0.04186617210507393 2023-01-24 03:03:02.005428: step: 294/463, loss: 0.022411443293094635 2023-01-24 03:03:02.631628: step: 296/463, loss: 0.10930786281824112 2023-01-24 03:03:03.210836: step: 298/463, loss: 0.008791784755885601 2023-01-24 03:03:03.815194: step: 300/463, loss: 0.34448447823524475 2023-01-24 03:03:04.366019: step: 302/463, loss: 0.0003086405631620437 2023-01-24 03:03:04.957158: step: 304/463, loss: 0.06195533275604248 2023-01-24 03:03:05.586645: step: 306/463, loss: 3.7575559616088867 2023-01-24 03:03:06.167023: step: 308/463, loss: 0.08836643397808075 2023-01-24 03:03:06.735977: step: 310/463, loss: 0.0765085220336914 2023-01-24 03:03:07.317185: step: 312/463, loss: 0.010536403395235538 2023-01-24 03:03:07.965353: step: 314/463, loss: 0.05229222774505615 2023-01-24 03:03:08.621540: step: 316/463, loss: 0.005376673303544521 2023-01-24 03:03:09.275851: step: 318/463, loss: 0.03871780261397362 2023-01-24 03:03:09.891389: step: 320/463, loss: 0.021163197234272957 2023-01-24 03:03:10.520639: step: 322/463, loss: 0.038284845650196075 2023-01-24 03:03:11.165669: step: 324/463, loss: 0.01636217162013054 2023-01-24 03:03:11.706297: step: 326/463, loss: 0.036273494362831116 2023-01-24 03:03:12.343904: step: 328/463, loss: 0.010017866268754005 2023-01-24 03:03:12.929100: step: 330/463, loss: 0.012987456284463406 2023-01-24 03:03:13.562769: step: 332/463, loss: 0.10996150970458984 2023-01-24 03:03:14.211185: step: 334/463, loss: 0.07108881324529648 2023-01-24 03:03:14.798206: step: 336/463, loss: 1.3048917055130005 2023-01-24 03:03:15.432059: step: 338/463, loss: 0.045702092349529266 2023-01-24 03:03:16.091975: step: 340/463, loss: 0.001985273091122508 2023-01-24 03:03:16.727320: step: 342/463, loss: 0.08605942130088806 2023-01-24 03:03:17.338938: step: 344/463, loss: 0.009351961314678192 2023-01-24 03:03:17.880418: step: 346/463, loss: 0.07398391515016556 2023-01-24 03:03:18.423406: step: 348/463, loss: 0.020257027819752693 2023-01-24 03:03:19.124828: step: 350/463, loss: 0.056005943566560745 2023-01-24 03:03:19.747902: step: 352/463, loss: 0.04283449426293373 2023-01-24 03:03:20.401305: step: 354/463, loss: 0.017264682799577713 2023-01-24 03:03:20.985624: step: 356/463, loss: 0.006373625248670578 2023-01-24 03:03:21.643522: step: 358/463, loss: 0.11350943893194199 2023-01-24 03:03:22.246825: step: 360/463, loss: 0.21139474213123322 2023-01-24 03:03:22.910446: step: 362/463, loss: 0.026628732681274414 2023-01-24 03:03:23.579924: step: 364/463, loss: 0.031578268855810165 2023-01-24 03:03:24.150446: step: 366/463, loss: 0.03399159759283066 2023-01-24 03:03:24.687068: step: 368/463, loss: 0.01496343407779932 2023-01-24 03:03:25.314525: step: 370/463, loss: 0.05713052675127983 2023-01-24 03:03:25.886883: step: 372/463, loss: 0.008241965435445309 2023-01-24 03:03:26.509808: step: 374/463, loss: 0.044275447726249695 2023-01-24 03:03:27.132265: step: 376/463, loss: 0.09603936970233917 2023-01-24 03:03:27.780633: step: 378/463, loss: 0.06562691926956177 2023-01-24 03:03:28.340646: step: 380/463, loss: 0.014318152330815792 2023-01-24 03:03:28.994780: step: 382/463, loss: 0.04899163171648979 2023-01-24 03:03:29.624372: step: 384/463, loss: 0.24723175168037415 2023-01-24 03:03:30.265993: step: 386/463, loss: 0.0681615099310875 2023-01-24 03:03:30.844261: step: 388/463, loss: 0.02611803635954857 2023-01-24 03:03:31.512934: step: 390/463, loss: 0.014665725640952587 2023-01-24 03:03:32.104403: step: 392/463, loss: 0.00932372733950615 2023-01-24 03:03:32.694436: step: 394/463, loss: 0.34211471676826477 2023-01-24 03:03:33.191888: step: 396/463, loss: 0.009036600589752197 2023-01-24 03:03:33.779241: step: 398/463, loss: 0.0802062451839447 2023-01-24 03:03:34.343593: step: 400/463, loss: 0.006733268965035677 2023-01-24 03:03:34.929431: step: 402/463, loss: 0.0168038010597229 2023-01-24 03:03:35.492359: step: 404/463, loss: 0.026164794340729713 2023-01-24 03:03:36.143967: step: 406/463, loss: 0.0670684427022934 2023-01-24 03:03:36.763754: step: 408/463, loss: 0.02316388301551342 2023-01-24 03:03:37.390623: step: 410/463, loss: 0.14573483169078827 2023-01-24 03:03:38.047276: step: 412/463, loss: 0.038855817168951035 2023-01-24 03:03:38.607129: step: 414/463, loss: 0.033104486763477325 2023-01-24 03:03:39.234508: step: 416/463, loss: 0.019254788756370544 2023-01-24 03:03:39.841996: step: 418/463, loss: 0.40515032410621643 2023-01-24 03:03:40.417725: step: 420/463, loss: 0.03507262095808983 2023-01-24 03:03:41.015310: step: 422/463, loss: 0.08524802327156067 2023-01-24 03:03:41.614248: step: 424/463, loss: 0.01159429457038641 2023-01-24 03:03:42.169913: step: 426/463, loss: 0.04514092579483986 2023-01-24 03:03:42.747463: step: 428/463, loss: 0.08638449013233185 2023-01-24 03:03:43.365440: step: 430/463, loss: 0.031418800354003906 2023-01-24 03:03:43.952191: step: 432/463, loss: 0.061216242611408234 2023-01-24 03:03:44.561314: step: 434/463, loss: 0.02077518403530121 2023-01-24 03:03:45.155816: step: 436/463, loss: 0.038224902004003525 2023-01-24 03:03:45.790466: step: 438/463, loss: 0.016339577734470367 2023-01-24 03:03:46.487024: step: 440/463, loss: 0.008132067508995533 2023-01-24 03:03:47.100451: step: 442/463, loss: 0.026998596265912056 2023-01-24 03:03:47.776778: step: 444/463, loss: 0.0810854583978653 2023-01-24 03:03:48.390716: step: 446/463, loss: 0.014631603844463825 2023-01-24 03:03:49.010867: step: 448/463, loss: 0.00880909152328968 2023-01-24 03:03:49.603521: step: 450/463, loss: 0.11519897729158401 2023-01-24 03:03:50.235080: step: 452/463, loss: 0.07256796211004257 2023-01-24 03:03:50.864321: step: 454/463, loss: 0.009963958524167538 2023-01-24 03:03:51.404251: step: 456/463, loss: 0.00932431872934103 2023-01-24 03:03:52.032176: step: 458/463, loss: 0.012884360738098621 2023-01-24 03:03:52.613542: step: 460/463, loss: 0.050746552646160126 2023-01-24 03:03:53.249173: step: 462/463, loss: 0.6527263522148132 2023-01-24 03:03:53.808784: step: 464/463, loss: 0.008358379825949669 2023-01-24 03:03:54.411602: step: 466/463, loss: 0.03967779874801636 2023-01-24 03:03:55.054526: step: 468/463, loss: 0.05550188943743706 2023-01-24 03:03:55.655404: step: 470/463, loss: 0.09276923537254333 2023-01-24 03:03:56.295940: step: 472/463, loss: 0.07674828916788101 2023-01-24 03:03:56.864076: step: 474/463, loss: 0.0265700314193964 2023-01-24 03:03:57.447329: step: 476/463, loss: 0.03339032828807831 2023-01-24 03:03:58.076881: step: 478/463, loss: 0.00848634447902441 2023-01-24 03:03:58.671902: step: 480/463, loss: 0.01570565067231655 2023-01-24 03:03:59.286861: step: 482/463, loss: 0.025028344243764877 2023-01-24 03:03:59.946227: step: 484/463, loss: 0.02584182843565941 2023-01-24 03:04:00.539577: step: 486/463, loss: 0.014970345422625542 2023-01-24 03:04:01.132694: step: 488/463, loss: 0.03295440971851349 2023-01-24 03:04:01.831356: step: 490/463, loss: 0.02505284734070301 2023-01-24 03:04:02.495105: step: 492/463, loss: 0.03633524104952812 2023-01-24 03:04:03.048169: step: 494/463, loss: 0.05275886878371239 2023-01-24 03:04:03.733059: step: 496/463, loss: 0.12582585215568542 2023-01-24 03:04:04.329301: step: 498/463, loss: 0.03214393928647041 2023-01-24 03:04:04.885168: step: 500/463, loss: 0.22791317105293274 2023-01-24 03:04:05.463968: step: 502/463, loss: 0.03206854313611984 2023-01-24 03:04:06.037859: step: 504/463, loss: 0.05119514837861061 2023-01-24 03:04:06.655839: step: 506/463, loss: 0.03073558770120144 2023-01-24 03:04:07.288389: step: 508/463, loss: 0.3199561834335327 2023-01-24 03:04:08.038816: step: 510/463, loss: 0.02415766380727291 2023-01-24 03:04:08.677476: step: 512/463, loss: 0.06260869652032852 2023-01-24 03:04:09.264198: step: 514/463, loss: 0.07782407104969025 2023-01-24 03:04:09.828624: step: 516/463, loss: 0.03402506560087204 2023-01-24 03:04:10.417680: step: 518/463, loss: 0.11775349825620651 2023-01-24 03:04:11.045491: step: 520/463, loss: 0.041348524391651154 2023-01-24 03:04:11.573327: step: 522/463, loss: 0.01716575212776661 2023-01-24 03:04:12.176040: step: 524/463, loss: 0.035621318966150284 2023-01-24 03:04:12.797383: step: 526/463, loss: 0.07884184271097183 2023-01-24 03:04:13.444751: step: 528/463, loss: 0.17825309932231903 2023-01-24 03:04:14.024212: step: 530/463, loss: 0.12087511271238327 2023-01-24 03:04:14.705622: step: 532/463, loss: 0.008257918991148472 2023-01-24 03:04:15.341216: step: 534/463, loss: 0.058856312185525894 2023-01-24 03:04:15.928673: step: 536/463, loss: 0.01987527683377266 2023-01-24 03:04:16.515655: step: 538/463, loss: 0.04442232474684715 2023-01-24 03:04:17.096720: step: 540/463, loss: 0.015050223097205162 2023-01-24 03:04:17.730857: step: 542/463, loss: 0.03745420649647713 2023-01-24 03:04:18.382066: step: 544/463, loss: 0.07239226251840591 2023-01-24 03:04:19.059230: step: 546/463, loss: 0.04585061967372894 2023-01-24 03:04:19.725378: step: 548/463, loss: 0.14604027569293976 2023-01-24 03:04:20.352659: step: 550/463, loss: 0.05398416891694069 2023-01-24 03:04:20.906671: step: 552/463, loss: 0.03154251351952553 2023-01-24 03:04:21.495334: step: 554/463, loss: 0.2035921812057495 2023-01-24 03:04:22.188889: step: 556/463, loss: 0.04157143831253052 2023-01-24 03:04:22.871747: step: 558/463, loss: 0.15549473464488983 2023-01-24 03:04:23.411498: step: 560/463, loss: 0.020670577883720398 2023-01-24 03:04:24.045380: step: 562/463, loss: 0.015088832005858421 2023-01-24 03:04:24.606521: step: 564/463, loss: 0.08446826785802841 2023-01-24 03:04:25.158416: step: 566/463, loss: 0.03026680275797844 2023-01-24 03:04:25.748585: step: 568/463, loss: 0.01578553207218647 2023-01-24 03:04:26.382774: step: 570/463, loss: 0.03532259911298752 2023-01-24 03:04:26.985734: step: 572/463, loss: 0.03016880340874195 2023-01-24 03:04:27.497076: step: 574/463, loss: 0.028288643807172775 2023-01-24 03:04:28.187017: step: 576/463, loss: 0.09895004332065582 2023-01-24 03:04:28.860921: step: 578/463, loss: 0.016342800110578537 2023-01-24 03:04:29.468897: step: 580/463, loss: 0.0074662938714027405 2023-01-24 03:04:30.074413: step: 582/463, loss: 0.013829294592142105 2023-01-24 03:04:30.651696: step: 584/463, loss: 0.06728711724281311 2023-01-24 03:04:31.250404: step: 586/463, loss: 0.09597694128751755 2023-01-24 03:04:31.861327: step: 588/463, loss: 0.06373672187328339 2023-01-24 03:04:32.501487: step: 590/463, loss: 0.01779930852353573 2023-01-24 03:04:33.120865: step: 592/463, loss: 0.0147006930783391 2023-01-24 03:04:33.663539: step: 594/463, loss: 0.387723833322525 2023-01-24 03:04:34.213250: step: 596/463, loss: 0.004438089672476053 2023-01-24 03:04:34.864337: step: 598/463, loss: 0.03009369596838951 2023-01-24 03:04:35.491746: step: 600/463, loss: 0.023197118192911148 2023-01-24 03:04:36.118348: step: 602/463, loss: 0.06278545409440994 2023-01-24 03:04:36.689298: step: 604/463, loss: 0.32562196254730225 2023-01-24 03:04:37.305778: step: 606/463, loss: 0.0519973523914814 2023-01-24 03:04:37.884369: step: 608/463, loss: 0.12682834267616272 2023-01-24 03:04:38.470551: step: 610/463, loss: 0.02542271837592125 2023-01-24 03:04:39.114364: step: 612/463, loss: 0.006264375988394022 2023-01-24 03:04:39.710417: step: 614/463, loss: 0.07178104668855667 2023-01-24 03:04:40.349010: step: 616/463, loss: 0.017693817615509033 2023-01-24 03:04:40.992790: step: 618/463, loss: 0.008118276484310627 2023-01-24 03:04:41.629994: step: 620/463, loss: 0.21454280614852905 2023-01-24 03:04:42.211206: step: 622/463, loss: 0.02879582718014717 2023-01-24 03:04:42.849013: step: 624/463, loss: 0.1912730187177658 2023-01-24 03:04:43.401030: step: 626/463, loss: 0.019410710781812668 2023-01-24 03:04:43.997663: step: 628/463, loss: 0.02241077832877636 2023-01-24 03:04:44.671627: step: 630/463, loss: 1.1033974885940552 2023-01-24 03:04:45.309775: step: 632/463, loss: 0.02533876709640026 2023-01-24 03:04:46.003210: step: 634/463, loss: 0.07534828037023544 2023-01-24 03:04:46.639438: step: 636/463, loss: 0.3957071304321289 2023-01-24 03:04:47.218777: step: 638/463, loss: 0.0414176806807518 2023-01-24 03:04:47.818259: step: 640/463, loss: 0.0315694659948349 2023-01-24 03:04:48.469482: step: 642/463, loss: 0.024500545114278793 2023-01-24 03:04:49.044484: step: 644/463, loss: 0.03906841203570366 2023-01-24 03:04:49.674257: step: 646/463, loss: 0.008605624549090862 2023-01-24 03:04:50.287336: step: 648/463, loss: 0.058674298226833344 2023-01-24 03:04:50.928942: step: 650/463, loss: 0.007269266061484814 2023-01-24 03:04:51.669718: step: 652/463, loss: 0.07943674176931381 2023-01-24 03:04:52.258401: step: 654/463, loss: 0.03246498852968216 2023-01-24 03:04:52.823461: step: 656/463, loss: 2.6792845726013184 2023-01-24 03:04:53.429555: step: 658/463, loss: 0.03697653114795685 2023-01-24 03:04:53.974202: step: 660/463, loss: 0.0018811600748449564 2023-01-24 03:04:54.491030: step: 662/463, loss: 0.018558546900749207 2023-01-24 03:04:55.061991: step: 664/463, loss: 0.019869863986968994 2023-01-24 03:04:55.713318: step: 666/463, loss: 0.04500496760010719 2023-01-24 03:04:56.276548: step: 668/463, loss: 0.08020011335611343 2023-01-24 03:04:56.918794: step: 670/463, loss: 0.06756056845188141 2023-01-24 03:04:57.543501: step: 672/463, loss: 0.3430831730365753 2023-01-24 03:04:58.150930: step: 674/463, loss: 0.0457264743745327 2023-01-24 03:04:58.772309: step: 676/463, loss: 0.061786748468875885 2023-01-24 03:04:59.424655: step: 678/463, loss: 0.036290787160396576 2023-01-24 03:05:00.069814: step: 680/463, loss: 0.0898541733622551 2023-01-24 03:05:00.661131: step: 682/463, loss: 0.020395949482917786 2023-01-24 03:05:01.218743: step: 684/463, loss: 0.12841841578483582 2023-01-24 03:05:01.788898: step: 686/463, loss: 0.009493554010987282 2023-01-24 03:05:02.359582: step: 688/463, loss: 0.026455730199813843 2023-01-24 03:05:02.993084: step: 690/463, loss: 0.04941210523247719 2023-01-24 03:05:03.604829: step: 692/463, loss: 0.12173417955636978 2023-01-24 03:05:04.179303: step: 694/463, loss: 0.059419915080070496 2023-01-24 03:05:04.781311: step: 696/463, loss: 0.056162260472774506 2023-01-24 03:05:05.319226: step: 698/463, loss: 0.0347118005156517 2023-01-24 03:05:05.921001: step: 700/463, loss: 0.04832358658313751 2023-01-24 03:05:06.496016: step: 702/463, loss: 0.05221627280116081 2023-01-24 03:05:07.200311: step: 704/463, loss: 0.4571799039840698 2023-01-24 03:05:07.869234: step: 706/463, loss: 0.06903350353240967 2023-01-24 03:05:08.429732: step: 708/463, loss: 0.02902594953775406 2023-01-24 03:05:09.004656: step: 710/463, loss: 0.01964576728641987 2023-01-24 03:05:09.604957: step: 712/463, loss: 0.7473657131195068 2023-01-24 03:05:10.176569: step: 714/463, loss: 2.6129543781280518 2023-01-24 03:05:10.772694: step: 716/463, loss: 0.07972441613674164 2023-01-24 03:05:11.352024: step: 718/463, loss: 0.00647320132702589 2023-01-24 03:05:11.976682: step: 720/463, loss: 0.009425453841686249 2023-01-24 03:05:12.546320: step: 722/463, loss: 0.02006380259990692 2023-01-24 03:05:13.187390: step: 724/463, loss: 0.06984228640794754 2023-01-24 03:05:13.905184: step: 726/463, loss: 0.04344932734966278 2023-01-24 03:05:14.519821: step: 728/463, loss: 0.06261858344078064 2023-01-24 03:05:15.169797: step: 730/463, loss: 0.05166703462600708 2023-01-24 03:05:15.771850: step: 732/463, loss: 0.1009795218706131 2023-01-24 03:05:16.364382: step: 734/463, loss: 0.15569937229156494 2023-01-24 03:05:16.958946: step: 736/463, loss: 0.01796805113554001 2023-01-24 03:05:17.600087: step: 738/463, loss: 0.19479985535144806 2023-01-24 03:05:18.200185: step: 740/463, loss: 0.033429719507694244 2023-01-24 03:05:18.748257: step: 742/463, loss: 0.005722632631659508 2023-01-24 03:05:19.334916: step: 744/463, loss: 0.03329009190201759 2023-01-24 03:05:19.917602: step: 746/463, loss: 0.037486616522073746 2023-01-24 03:05:20.508078: step: 748/463, loss: 5.83509635925293 2023-01-24 03:05:21.146956: step: 750/463, loss: 0.011488042771816254 2023-01-24 03:05:21.773749: step: 752/463, loss: 0.028934795409440994 2023-01-24 03:05:22.395593: step: 754/463, loss: 0.134428009390831 2023-01-24 03:05:23.067876: step: 756/463, loss: 0.09756171703338623 2023-01-24 03:05:23.628782: step: 758/463, loss: 0.07500789314508438 2023-01-24 03:05:24.222230: step: 760/463, loss: 0.05587540194392204 2023-01-24 03:05:24.809827: step: 762/463, loss: 0.15359681844711304 2023-01-24 03:05:25.382615: step: 764/463, loss: 0.01976839266717434 2023-01-24 03:05:25.966124: step: 766/463, loss: 0.014496381394565105 2023-01-24 03:05:26.551935: step: 768/463, loss: 0.01970326341688633 2023-01-24 03:05:27.082548: step: 770/463, loss: 0.002839399268850684 2023-01-24 03:05:27.727072: step: 772/463, loss: 0.039021603763103485 2023-01-24 03:05:28.425142: step: 774/463, loss: 0.04968719929456711 2023-01-24 03:05:28.993318: step: 776/463, loss: 0.021419554948806763 2023-01-24 03:05:29.700512: step: 778/463, loss: 0.023128392174839973 2023-01-24 03:05:30.327586: step: 780/463, loss: 0.02912815287709236 2023-01-24 03:05:30.916895: step: 782/463, loss: 0.015202147886157036 2023-01-24 03:05:31.486415: step: 784/463, loss: 0.09406363219022751 2023-01-24 03:05:32.145920: step: 786/463, loss: 0.014476080425083637 2023-01-24 03:05:32.783556: step: 788/463, loss: 0.07128921151161194 2023-01-24 03:05:33.391271: step: 790/463, loss: 0.04042304307222366 2023-01-24 03:05:34.061101: step: 792/463, loss: 0.0189340952783823 2023-01-24 03:05:34.620781: step: 794/463, loss: 0.009419827722012997 2023-01-24 03:05:35.255232: step: 796/463, loss: 0.015901731327176094 2023-01-24 03:05:35.880494: step: 798/463, loss: 0.042158033698797226 2023-01-24 03:05:36.537768: step: 800/463, loss: 0.05914309248328209 2023-01-24 03:05:37.157695: step: 802/463, loss: 0.0688527375459671 2023-01-24 03:05:37.748545: step: 804/463, loss: 0.1666351556777954 2023-01-24 03:05:38.350318: step: 806/463, loss: 0.1822570413351059 2023-01-24 03:05:38.971179: step: 808/463, loss: 0.729502260684967 2023-01-24 03:05:39.542718: step: 810/463, loss: 0.046657297760248184 2023-01-24 03:05:40.119724: step: 812/463, loss: 0.032568708062171936 2023-01-24 03:05:40.677535: step: 814/463, loss: 0.02207283489406109 2023-01-24 03:05:41.305296: step: 816/463, loss: 0.022646073251962662 2023-01-24 03:05:42.036964: step: 818/463, loss: 0.04154231399297714 2023-01-24 03:05:42.639959: step: 820/463, loss: 0.0008527428144589067 2023-01-24 03:05:43.216676: step: 822/463, loss: 0.03849545121192932 2023-01-24 03:05:43.828622: step: 824/463, loss: 0.028142722323536873 2023-01-24 03:05:44.459806: step: 826/463, loss: 0.029835142195224762 2023-01-24 03:05:45.012931: step: 828/463, loss: 0.04216647893190384 2023-01-24 03:05:45.640572: step: 830/463, loss: 0.04328504949808121 2023-01-24 03:05:46.284538: step: 832/463, loss: 0.008418519049882889 2023-01-24 03:05:46.866874: step: 834/463, loss: 0.08349798619747162 2023-01-24 03:05:47.492909: step: 836/463, loss: 0.06518686562776566 2023-01-24 03:05:48.075312: step: 838/463, loss: 0.016296187415719032 2023-01-24 03:05:48.913828: step: 840/463, loss: 0.019032509997487068 2023-01-24 03:05:49.557938: step: 842/463, loss: 0.06742745637893677 2023-01-24 03:05:50.159592: step: 844/463, loss: 0.012801941484212875 2023-01-24 03:05:50.776218: step: 846/463, loss: 0.014034632593393326 2023-01-24 03:05:51.403531: step: 848/463, loss: 0.07413165271282196 2023-01-24 03:05:52.008282: step: 850/463, loss: 0.012852827087044716 2023-01-24 03:05:52.579432: step: 852/463, loss: 0.23427478969097137 2023-01-24 03:05:53.171758: step: 854/463, loss: 0.029929308220744133 2023-01-24 03:05:53.772956: step: 856/463, loss: 0.027140161022543907 2023-01-24 03:05:54.378425: step: 858/463, loss: 0.031705450266599655 2023-01-24 03:05:54.996589: step: 860/463, loss: 0.015361717902123928 2023-01-24 03:05:55.605708: step: 862/463, loss: 0.08627556264400482 2023-01-24 03:05:56.208686: step: 864/463, loss: 0.06395214796066284 2023-01-24 03:05:56.916050: step: 866/463, loss: 0.0245131216943264 2023-01-24 03:05:57.539068: step: 868/463, loss: 0.028786538168787956 2023-01-24 03:05:58.107240: step: 870/463, loss: 0.011020555160939693 2023-01-24 03:05:58.734707: step: 872/463, loss: 0.17954382300376892 2023-01-24 03:05:59.356601: step: 874/463, loss: 0.15770022571086884 2023-01-24 03:05:59.958267: step: 876/463, loss: 0.24936971068382263 2023-01-24 03:06:00.585887: step: 878/463, loss: 0.22353777289390564 2023-01-24 03:06:01.165897: step: 880/463, loss: 0.044009730219841 2023-01-24 03:06:01.763708: step: 882/463, loss: 0.021632947027683258 2023-01-24 03:06:02.348137: step: 884/463, loss: 0.022124452516436577 2023-01-24 03:06:03.039449: step: 886/463, loss: 0.0385926328599453 2023-01-24 03:06:03.641374: step: 888/463, loss: 0.24293048679828644 2023-01-24 03:06:04.346889: step: 890/463, loss: 0.0149305434897542 2023-01-24 03:06:04.977890: step: 892/463, loss: 0.23296810686588287 2023-01-24 03:06:05.556261: step: 894/463, loss: 0.04253717139363289 2023-01-24 03:06:06.098770: step: 896/463, loss: 0.061262618750333786 2023-01-24 03:06:06.681341: step: 898/463, loss: 0.003988142590969801 2023-01-24 03:06:07.270177: step: 900/463, loss: 0.09775742143392563 2023-01-24 03:06:07.941640: step: 902/463, loss: 0.11408239603042603 2023-01-24 03:06:08.522298: step: 904/463, loss: 0.04647267982363701 2023-01-24 03:06:09.098306: step: 906/463, loss: 0.04064179211854935 2023-01-24 03:06:09.676566: step: 908/463, loss: 0.026339896023273468 2023-01-24 03:06:10.263873: step: 910/463, loss: 0.04754228889942169 2023-01-24 03:06:10.818555: step: 912/463, loss: 0.014151930809020996 2023-01-24 03:06:11.434313: step: 914/463, loss: 0.05089818686246872 2023-01-24 03:06:12.113901: step: 916/463, loss: 0.018807677552103996 2023-01-24 03:06:12.697285: step: 918/463, loss: 0.3928579092025757 2023-01-24 03:06:13.303750: step: 920/463, loss: 0.030363544821739197 2023-01-24 03:06:13.921761: step: 922/463, loss: 0.024525126442313194 2023-01-24 03:06:14.557266: step: 924/463, loss: 0.009850341826677322 2023-01-24 03:06:15.232059: step: 926/463, loss: 0.03586370497941971 ================================================== Loss: 0.104 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.322532233481628, 'r': 0.3121279678854465, 'f1': 0.31724481981799474}, 'combined': 0.23375934091852243, 'epoch': 23} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.36446154928825253, 'r': 0.30689825049141684, 'f1': 0.3332121222768012}, 'combined': 0.23442058853644307, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3262366685377491, 'r': 0.31509386012469504, 'f1': 0.3205684638720353}, 'combined': 0.23620834180044706, 'epoch': 23} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3687135215850542, 'r': 0.3117158854972336, 'f1': 0.33782743861271414}, 'combined': 0.23985748141502702, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33706229073660715, 'r': 0.327468487394958, 'f1': 0.3321961363948852}, 'combined': 0.24477610050149434, 'epoch': 23} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37758389249818575, 'r': 0.30299127037878887, 'f1': 0.3361998189266096}, 'combined': 0.23870187143789282, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27927927927927926, 'r': 0.2952380952380952, 'f1': 0.28703703703703703}, 'combined': 0.19135802469135801, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3046875, 'r': 0.42391304347826086, 'f1': 0.3545454545454545}, 'combined': 0.17727272727272725, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5277777777777778, 'r': 0.16379310344827586, 'f1': 0.24999999999999997}, 'combined': 0.16666666666666663, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:08:48.589368: step: 2/463, loss: 0.168624609708786 2023-01-24 03:08:49.155821: step: 4/463, loss: 0.1596832573413849 2023-01-24 03:08:49.837790: step: 6/463, loss: 0.015152639709413052 2023-01-24 03:08:50.428376: step: 8/463, loss: 0.021345142275094986 2023-01-24 03:08:51.033681: step: 10/463, loss: 0.9062269926071167 2023-01-24 03:08:51.615465: step: 12/463, loss: 0.010064271278679371 2023-01-24 03:08:52.247188: step: 14/463, loss: 0.005218781530857086 2023-01-24 03:08:52.831027: step: 16/463, loss: 0.058671530336141586 2023-01-24 03:08:53.464018: step: 18/463, loss: 0.02666182816028595 2023-01-24 03:08:54.057341: step: 20/463, loss: 0.005787883885204792 2023-01-24 03:08:54.702703: step: 22/463, loss: 0.1644509881734848 2023-01-24 03:08:55.305672: step: 24/463, loss: 0.05098911374807358 2023-01-24 03:08:55.923246: step: 26/463, loss: 0.05166210979223251 2023-01-24 03:08:56.704194: step: 28/463, loss: 0.022072214633226395 2023-01-24 03:08:57.248271: step: 30/463, loss: 0.004029449541121721 2023-01-24 03:08:57.865608: step: 32/463, loss: 0.032785434275865555 2023-01-24 03:08:58.453963: step: 34/463, loss: 0.028690647333860397 2023-01-24 03:08:59.135318: step: 36/463, loss: 0.09718084335327148 2023-01-24 03:08:59.762845: step: 38/463, loss: 0.07405851781368256 2023-01-24 03:09:00.427423: step: 40/463, loss: 0.13892057538032532 2023-01-24 03:09:01.021840: step: 42/463, loss: 0.015102247707545757 2023-01-24 03:09:01.582467: step: 44/463, loss: 0.004068958107382059 2023-01-24 03:09:02.100063: step: 46/463, loss: 0.07219784706830978 2023-01-24 03:09:02.808395: step: 48/463, loss: 0.008276185020804405 2023-01-24 03:09:03.425274: step: 50/463, loss: 0.07040373235940933 2023-01-24 03:09:04.040197: step: 52/463, loss: 0.0034492372069507837 2023-01-24 03:09:04.665671: step: 54/463, loss: 0.009248940274119377 2023-01-24 03:09:05.288982: step: 56/463, loss: 0.01978440023958683 2023-01-24 03:09:05.842430: step: 58/463, loss: 0.009476671926677227 2023-01-24 03:09:06.439980: step: 60/463, loss: 0.019692204892635345 2023-01-24 03:09:07.018578: step: 62/463, loss: 0.03643761947751045 2023-01-24 03:09:07.630951: step: 64/463, loss: 0.09649589657783508 2023-01-24 03:09:08.295365: step: 66/463, loss: 0.8919044733047485 2023-01-24 03:09:08.852241: step: 68/463, loss: 0.021456222981214523 2023-01-24 03:09:09.472147: step: 70/463, loss: 0.14469501376152039 2023-01-24 03:09:10.068351: step: 72/463, loss: 0.06159430742263794 2023-01-24 03:09:10.693476: step: 74/463, loss: 0.08320866525173187 2023-01-24 03:09:11.293913: step: 76/463, loss: 0.057258959859609604 2023-01-24 03:09:11.972076: step: 78/463, loss: 0.06622933596372604 2023-01-24 03:09:12.559182: step: 80/463, loss: 0.011328347027301788 2023-01-24 03:09:13.140359: step: 82/463, loss: 0.004046339076012373 2023-01-24 03:09:13.776055: step: 84/463, loss: 0.013327288441359997 2023-01-24 03:09:14.390213: step: 86/463, loss: 0.01771301031112671 2023-01-24 03:09:14.933783: step: 88/463, loss: 0.017181696370244026 2023-01-24 03:09:15.484183: step: 90/463, loss: 0.031847644597291946 2023-01-24 03:09:16.123921: step: 92/463, loss: 0.001734257093630731 2023-01-24 03:09:16.697373: step: 94/463, loss: 0.007677286397665739 2023-01-24 03:09:17.307230: step: 96/463, loss: 0.044134873896837234 2023-01-24 03:09:17.894242: step: 98/463, loss: 0.012573414482176304 2023-01-24 03:09:18.518595: step: 100/463, loss: 0.012284583412110806 2023-01-24 03:09:19.121486: step: 102/463, loss: 0.6099564433097839 2023-01-24 03:09:19.787308: step: 104/463, loss: 0.008521310053765774 2023-01-24 03:09:20.324396: step: 106/463, loss: 0.014910469762980938 2023-01-24 03:09:20.904710: step: 108/463, loss: 0.03237385302782059 2023-01-24 03:09:21.513895: step: 110/463, loss: 0.6833165884017944 2023-01-24 03:09:22.128646: step: 112/463, loss: 0.0018552248366177082 2023-01-24 03:09:22.807072: step: 114/463, loss: 0.06723944842815399 2023-01-24 03:09:23.430454: step: 116/463, loss: 0.020594270899891853 2023-01-24 03:09:24.056619: step: 118/463, loss: 0.013294066302478313 2023-01-24 03:09:24.573859: step: 120/463, loss: 0.00810252409428358 2023-01-24 03:09:25.162041: step: 122/463, loss: 0.1246970072388649 2023-01-24 03:09:25.775403: step: 124/463, loss: 0.0506364107131958 2023-01-24 03:09:26.332581: step: 126/463, loss: 0.02825508452951908 2023-01-24 03:09:26.976378: step: 128/463, loss: 0.2071886658668518 2023-01-24 03:09:27.535867: step: 130/463, loss: 0.0038398948963731527 2023-01-24 03:09:28.112018: step: 132/463, loss: 0.012503944337368011 2023-01-24 03:09:28.717214: step: 134/463, loss: 0.024172868579626083 2023-01-24 03:09:29.351411: step: 136/463, loss: 0.043314870446920395 2023-01-24 03:09:29.952745: step: 138/463, loss: 0.0985848605632782 2023-01-24 03:09:30.529730: step: 140/463, loss: 0.035090815275907516 2023-01-24 03:09:31.057927: step: 142/463, loss: 0.0018567469669505954 2023-01-24 03:09:31.690080: step: 144/463, loss: 0.06204001605510712 2023-01-24 03:09:32.362146: step: 146/463, loss: 0.013162667863070965 2023-01-24 03:09:32.924936: step: 148/463, loss: 0.00933629646897316 2023-01-24 03:09:33.588842: step: 150/463, loss: 0.05265234783291817 2023-01-24 03:09:34.170334: step: 152/463, loss: 0.03134272247552872 2023-01-24 03:09:34.791678: step: 154/463, loss: 0.024080056697130203 2023-01-24 03:09:35.329163: step: 156/463, loss: 0.04349636659026146 2023-01-24 03:09:35.987032: step: 158/463, loss: 0.006227976642549038 2023-01-24 03:09:36.592592: step: 160/463, loss: 0.0260805431753397 2023-01-24 03:09:37.164464: step: 162/463, loss: 0.030827375128865242 2023-01-24 03:09:37.807934: step: 164/463, loss: 0.056871235370635986 2023-01-24 03:09:38.472756: step: 166/463, loss: 0.04522751271724701 2023-01-24 03:09:39.061804: step: 168/463, loss: 0.0171738862991333 2023-01-24 03:09:39.697576: step: 170/463, loss: 0.008956853300333023 2023-01-24 03:09:40.236004: step: 172/463, loss: 0.030205553397536278 2023-01-24 03:09:40.813332: step: 174/463, loss: 0.007648610509932041 2023-01-24 03:09:41.409394: step: 176/463, loss: 0.011014658026397228 2023-01-24 03:09:41.991348: step: 178/463, loss: 0.01579192653298378 2023-01-24 03:09:42.600804: step: 180/463, loss: 0.03155839070677757 2023-01-24 03:09:43.144361: step: 182/463, loss: 0.06567239761352539 2023-01-24 03:09:43.813786: step: 184/463, loss: 0.099546417593956 2023-01-24 03:09:44.423045: step: 186/463, loss: 0.0055356938391923904 2023-01-24 03:09:45.066563: step: 188/463, loss: 0.03791023790836334 2023-01-24 03:09:45.743964: step: 190/463, loss: 0.06651432067155838 2023-01-24 03:09:46.378616: step: 192/463, loss: 0.02422924153506756 2023-01-24 03:09:46.997912: step: 194/463, loss: 0.028572889044880867 2023-01-24 03:09:47.590183: step: 196/463, loss: 0.03225933760404587 2023-01-24 03:09:48.194539: step: 198/463, loss: 0.00731132086366415 2023-01-24 03:09:48.745601: step: 200/463, loss: 0.01211745385080576 2023-01-24 03:09:49.342485: step: 202/463, loss: 0.045644015073776245 2023-01-24 03:09:49.925709: step: 204/463, loss: 0.03543860837817192 2023-01-24 03:09:50.582486: step: 206/463, loss: 0.03893708065152168 2023-01-24 03:09:51.230094: step: 208/463, loss: 0.014871427789330482 2023-01-24 03:09:51.899959: step: 210/463, loss: 0.02256651781499386 2023-01-24 03:09:52.495649: step: 212/463, loss: 0.0030168951489031315 2023-01-24 03:09:53.057926: step: 214/463, loss: 0.003802366554737091 2023-01-24 03:09:53.682286: step: 216/463, loss: 0.038135871291160583 2023-01-24 03:09:54.293808: step: 218/463, loss: 0.17675723135471344 2023-01-24 03:09:54.920605: step: 220/463, loss: 0.06983377784490585 2023-01-24 03:09:55.543519: step: 222/463, loss: 0.04411748796701431 2023-01-24 03:09:56.216928: step: 224/463, loss: 0.0026033688336610794 2023-01-24 03:09:56.775281: step: 226/463, loss: 0.04670077934861183 2023-01-24 03:09:57.373605: step: 228/463, loss: 0.020036470144987106 2023-01-24 03:09:57.977327: step: 230/463, loss: 0.008874769322574139 2023-01-24 03:09:58.571616: step: 232/463, loss: 0.015309696085751057 2023-01-24 03:09:59.106579: step: 234/463, loss: 0.009977023117244244 2023-01-24 03:09:59.669497: step: 236/463, loss: 0.10904641449451447 2023-01-24 03:10:00.257227: step: 238/463, loss: 0.047628216445446014 2023-01-24 03:10:00.907993: step: 240/463, loss: 0.052062246948480606 2023-01-24 03:10:01.505938: step: 242/463, loss: 0.052295878529548645 2023-01-24 03:10:02.084266: step: 244/463, loss: 0.029829440638422966 2023-01-24 03:10:02.707677: step: 246/463, loss: 0.0036751434672623873 2023-01-24 03:10:03.380742: step: 248/463, loss: 0.009762292727828026 2023-01-24 03:10:04.021736: step: 250/463, loss: 0.0010785594349727035 2023-01-24 03:10:04.618766: step: 252/463, loss: 0.006650690920650959 2023-01-24 03:10:05.212248: step: 254/463, loss: 0.14275754988193512 2023-01-24 03:10:05.760918: step: 256/463, loss: 0.0799219161272049 2023-01-24 03:10:06.396579: step: 258/463, loss: 0.032306261360645294 2023-01-24 03:10:07.091093: step: 260/463, loss: 0.03687409684062004 2023-01-24 03:10:07.736083: step: 262/463, loss: 0.022238124161958694 2023-01-24 03:10:08.342906: step: 264/463, loss: 2.174518585205078 2023-01-24 03:10:08.898723: step: 266/463, loss: 0.009077617898583412 2023-01-24 03:10:09.562385: step: 268/463, loss: 0.06946303695440292 2023-01-24 03:10:10.168701: step: 270/463, loss: 0.04468914866447449 2023-01-24 03:10:10.793702: step: 272/463, loss: 0.0542069710791111 2023-01-24 03:10:11.510896: step: 274/463, loss: 0.022078055888414383 2023-01-24 03:10:12.120348: step: 276/463, loss: 0.005696137901395559 2023-01-24 03:10:12.692030: step: 278/463, loss: 0.01198324654251337 2023-01-24 03:10:13.374235: step: 280/463, loss: 0.02280448004603386 2023-01-24 03:10:14.016351: step: 282/463, loss: 0.10266551375389099 2023-01-24 03:10:14.606368: step: 284/463, loss: 0.06152050197124481 2023-01-24 03:10:15.237678: step: 286/463, loss: 0.01716403104364872 2023-01-24 03:10:15.865381: step: 288/463, loss: 0.08088814467191696 2023-01-24 03:10:16.442412: step: 290/463, loss: 0.013772634789347649 2023-01-24 03:10:17.012608: step: 292/463, loss: 0.00019107619300484657 2023-01-24 03:10:17.575669: step: 294/463, loss: 0.051872022449970245 2023-01-24 03:10:18.143672: step: 296/463, loss: 0.056370675563812256 2023-01-24 03:10:18.769209: step: 298/463, loss: 0.02277160994708538 2023-01-24 03:10:19.327365: step: 300/463, loss: 0.011673275381326675 2023-01-24 03:10:19.922109: step: 302/463, loss: 0.060145217925310135 2023-01-24 03:10:20.473531: step: 304/463, loss: 0.7434048652648926 2023-01-24 03:10:21.084431: step: 306/463, loss: 0.03292294964194298 2023-01-24 03:10:21.692346: step: 308/463, loss: 0.07952801883220673 2023-01-24 03:10:22.280410: step: 310/463, loss: 0.012465964071452618 2023-01-24 03:10:22.945269: step: 312/463, loss: 0.010386046022176743 2023-01-24 03:10:23.487424: step: 314/463, loss: 0.008474062196910381 2023-01-24 03:10:24.115098: step: 316/463, loss: 0.4619000256061554 2023-01-24 03:10:24.729853: step: 318/463, loss: 0.02540082111954689 2023-01-24 03:10:25.302239: step: 320/463, loss: 0.02250864915549755 2023-01-24 03:10:25.918805: step: 322/463, loss: 0.052043505012989044 2023-01-24 03:10:26.558700: step: 324/463, loss: 0.09982141852378845 2023-01-24 03:10:27.226348: step: 326/463, loss: 0.0198634322732687 2023-01-24 03:10:27.802460: step: 328/463, loss: 0.004284702707082033 2023-01-24 03:10:28.402692: step: 330/463, loss: 0.029808690771460533 2023-01-24 03:10:29.020605: step: 332/463, loss: 0.016688918694853783 2023-01-24 03:10:29.614804: step: 334/463, loss: 0.03728481009602547 2023-01-24 03:10:30.268473: step: 336/463, loss: 0.011826831847429276 2023-01-24 03:10:30.871142: step: 338/463, loss: 0.0038214523810893297 2023-01-24 03:10:31.444384: step: 340/463, loss: 0.023856574669480324 2023-01-24 03:10:32.057383: step: 342/463, loss: 0.024996642023324966 2023-01-24 03:10:32.692349: step: 344/463, loss: 0.08463682979345322 2023-01-24 03:10:33.305985: step: 346/463, loss: 0.004424954764544964 2023-01-24 03:10:33.835030: step: 348/463, loss: 0.03760865703225136 2023-01-24 03:10:34.431271: step: 350/463, loss: 0.06560737639665604 2023-01-24 03:10:35.133120: step: 352/463, loss: 0.48020613193511963 2023-01-24 03:10:35.690800: step: 354/463, loss: 0.1660272777080536 2023-01-24 03:10:36.402845: step: 356/463, loss: 0.03192536532878876 2023-01-24 03:10:36.973169: step: 358/463, loss: 0.02583552524447441 2023-01-24 03:10:37.538347: step: 360/463, loss: 0.033906273543834686 2023-01-24 03:10:38.106163: step: 362/463, loss: 0.24974565207958221 2023-01-24 03:10:38.705478: step: 364/463, loss: 0.011646909639239311 2023-01-24 03:10:39.360095: step: 366/463, loss: 0.023839876055717468 2023-01-24 03:10:39.972454: step: 368/463, loss: 0.014882242307066917 2023-01-24 03:10:40.614711: step: 370/463, loss: 0.5279116630554199 2023-01-24 03:10:41.227262: step: 372/463, loss: 0.07768545299768448 2023-01-24 03:10:41.842765: step: 374/463, loss: 0.007295689545571804 2023-01-24 03:10:42.417695: step: 376/463, loss: 0.10735941678285599 2023-01-24 03:10:43.009854: step: 378/463, loss: 0.0015111529501155019 2023-01-24 03:10:43.578061: step: 380/463, loss: 0.029758017510175705 2023-01-24 03:10:44.166624: step: 382/463, loss: 0.04492565244436264 2023-01-24 03:10:44.801735: step: 384/463, loss: 0.017146963626146317 2023-01-24 03:10:45.433261: step: 386/463, loss: 0.141311913728714 2023-01-24 03:10:46.014398: step: 388/463, loss: 0.010343199595808983 2023-01-24 03:10:46.631467: step: 390/463, loss: 0.048823144286870956 2023-01-24 03:10:47.210196: step: 392/463, loss: 0.04591525346040726 2023-01-24 03:10:47.804182: step: 394/463, loss: 0.013156874105334282 2023-01-24 03:10:48.358463: step: 396/463, loss: 0.016817722469568253 2023-01-24 03:10:48.945658: step: 398/463, loss: 0.261803537607193 2023-01-24 03:10:49.610172: step: 400/463, loss: 0.03298725187778473 2023-01-24 03:10:50.194309: step: 402/463, loss: 0.011471742764115334 2023-01-24 03:10:50.896496: step: 404/463, loss: 0.05026385560631752 2023-01-24 03:10:51.550428: step: 406/463, loss: 0.04210500046610832 2023-01-24 03:10:52.109128: step: 408/463, loss: 0.42932701110839844 2023-01-24 03:10:52.708589: step: 410/463, loss: 0.14430645108222961 2023-01-24 03:10:53.327213: step: 412/463, loss: 0.6767376661300659 2023-01-24 03:10:53.883828: step: 414/463, loss: 0.018410829827189445 2023-01-24 03:10:54.439331: step: 416/463, loss: 0.0024917954578995705 2023-01-24 03:10:55.089246: step: 418/463, loss: 1.4730983972549438 2023-01-24 03:10:55.668525: step: 420/463, loss: 0.006804322823882103 2023-01-24 03:10:56.273188: step: 422/463, loss: 0.019152535125613213 2023-01-24 03:10:56.862065: step: 424/463, loss: 0.010303128510713577 2023-01-24 03:10:57.501037: step: 426/463, loss: 0.15601396560668945 2023-01-24 03:10:58.175908: step: 428/463, loss: 0.03503424674272537 2023-01-24 03:10:58.814320: step: 430/463, loss: 0.021573655307292938 2023-01-24 03:10:59.448594: step: 432/463, loss: 0.02043422870337963 2023-01-24 03:11:00.094548: step: 434/463, loss: 0.014082572422921658 2023-01-24 03:11:00.785444: step: 436/463, loss: 0.03358588367700577 2023-01-24 03:11:01.379932: step: 438/463, loss: 0.007290693465620279 2023-01-24 03:11:01.893453: step: 440/463, loss: 0.005060873925685883 2023-01-24 03:11:02.502717: step: 442/463, loss: 0.011918634176254272 2023-01-24 03:11:03.078137: step: 444/463, loss: 0.0010458058677613735 2023-01-24 03:11:03.650560: step: 446/463, loss: 0.0658491849899292 2023-01-24 03:11:04.345564: step: 448/463, loss: 0.14326946437358856 2023-01-24 03:11:04.979954: step: 450/463, loss: 0.016758499667048454 2023-01-24 03:11:05.590475: step: 452/463, loss: 0.10793881118297577 2023-01-24 03:11:06.191564: step: 454/463, loss: 0.02215990051627159 2023-01-24 03:11:06.811792: step: 456/463, loss: 0.05341756343841553 2023-01-24 03:11:07.401771: step: 458/463, loss: 0.030666833743453026 2023-01-24 03:11:07.947025: step: 460/463, loss: 0.11644411087036133 2023-01-24 03:11:08.491369: step: 462/463, loss: 0.004393420182168484 2023-01-24 03:11:09.104198: step: 464/463, loss: 0.028785977512598038 2023-01-24 03:11:09.742357: step: 466/463, loss: 0.004214954096823931 2023-01-24 03:11:10.402390: step: 468/463, loss: 0.06775260716676712 2023-01-24 03:11:10.992447: step: 470/463, loss: 0.011535215191543102 2023-01-24 03:11:11.549456: step: 472/463, loss: 0.04233601316809654 2023-01-24 03:11:12.154030: step: 474/463, loss: 0.05052860081195831 2023-01-24 03:11:12.904917: step: 476/463, loss: 0.0381799191236496 2023-01-24 03:11:13.575143: step: 478/463, loss: 0.044195424765348434 2023-01-24 03:11:14.179190: step: 480/463, loss: 0.043156664818525314 2023-01-24 03:11:14.777417: step: 482/463, loss: 0.01381735596805811 2023-01-24 03:11:15.322368: step: 484/463, loss: 0.07562321424484253 2023-01-24 03:11:15.937258: step: 486/463, loss: 0.020912349224090576 2023-01-24 03:11:16.578051: step: 488/463, loss: 0.025670697912573814 2023-01-24 03:11:17.170290: step: 490/463, loss: 0.028043637052178383 2023-01-24 03:11:17.826075: step: 492/463, loss: 0.08771458268165588 2023-01-24 03:11:18.408262: step: 494/463, loss: 0.008383357897400856 2023-01-24 03:11:18.998909: step: 496/463, loss: 0.008517872542142868 2023-01-24 03:11:19.616943: step: 498/463, loss: 0.02653774805366993 2023-01-24 03:11:20.273010: step: 500/463, loss: 0.03341764584183693 2023-01-24 03:11:20.854115: step: 502/463, loss: 0.021153457462787628 2023-01-24 03:11:21.485857: step: 504/463, loss: 0.048201218247413635 2023-01-24 03:11:22.147624: step: 506/463, loss: 0.07813188433647156 2023-01-24 03:11:22.819267: step: 508/463, loss: 0.03720022365450859 2023-01-24 03:11:23.480178: step: 510/463, loss: 0.033707670867443085 2023-01-24 03:11:24.133156: step: 512/463, loss: 0.05388626456260681 2023-01-24 03:11:24.691699: step: 514/463, loss: 0.017776835709810257 2023-01-24 03:11:25.270846: step: 516/463, loss: 0.061508361250162125 2023-01-24 03:11:25.898695: step: 518/463, loss: 0.03520200029015541 2023-01-24 03:11:26.522576: step: 520/463, loss: 0.028304416686296463 2023-01-24 03:11:27.115059: step: 522/463, loss: 0.13760294020175934 2023-01-24 03:11:27.647503: step: 524/463, loss: 0.0017929148161783814 2023-01-24 03:11:28.276568: step: 526/463, loss: 0.009729783982038498 2023-01-24 03:11:28.959162: step: 528/463, loss: 0.014832695946097374 2023-01-24 03:11:29.612837: step: 530/463, loss: 0.056658245623111725 2023-01-24 03:11:30.226008: step: 532/463, loss: 0.057059939950704575 2023-01-24 03:11:30.873293: step: 534/463, loss: 0.06851299852132797 2023-01-24 03:11:31.543754: step: 536/463, loss: 0.13439816236495972 2023-01-24 03:11:32.150304: step: 538/463, loss: 0.007310162764042616 2023-01-24 03:11:32.760865: step: 540/463, loss: 0.05349526181817055 2023-01-24 03:11:33.303335: step: 542/463, loss: 0.0293583907186985 2023-01-24 03:11:33.907291: step: 544/463, loss: 0.018998481333255768 2023-01-24 03:11:34.481213: step: 546/463, loss: 0.030599333345890045 2023-01-24 03:11:35.119151: step: 548/463, loss: 0.01808038353919983 2023-01-24 03:11:35.747825: step: 550/463, loss: 0.0006771114422008395 2023-01-24 03:11:36.382001: step: 552/463, loss: 0.027963101863861084 2023-01-24 03:11:37.078329: step: 554/463, loss: 0.02644319087266922 2023-01-24 03:11:37.736839: step: 556/463, loss: 0.029689282178878784 2023-01-24 03:11:38.355572: step: 558/463, loss: 0.15514349937438965 2023-01-24 03:11:38.919432: step: 560/463, loss: 0.03995957970619202 2023-01-24 03:11:39.514719: step: 562/463, loss: 0.024941669777035713 2023-01-24 03:11:40.070334: step: 564/463, loss: 0.03773942589759827 2023-01-24 03:11:40.564064: step: 566/463, loss: 0.06325981765985489 2023-01-24 03:11:41.210994: step: 568/463, loss: 0.06996597349643707 2023-01-24 03:11:41.820167: step: 570/463, loss: 0.07044245302677155 2023-01-24 03:11:42.421410: step: 572/463, loss: 0.2551673650741577 2023-01-24 03:11:43.068711: step: 574/463, loss: 0.019506286829710007 2023-01-24 03:11:43.727547: step: 576/463, loss: 0.012481668032705784 2023-01-24 03:11:44.347449: step: 578/463, loss: 0.13002590835094452 2023-01-24 03:11:44.947386: step: 580/463, loss: 0.2066579908132553 2023-01-24 03:11:45.605825: step: 582/463, loss: 0.19362099468708038 2023-01-24 03:11:46.282762: step: 584/463, loss: 0.06738457083702087 2023-01-24 03:11:46.849838: step: 586/463, loss: 0.023741072043776512 2023-01-24 03:11:47.476529: step: 588/463, loss: 0.020472431555390358 2023-01-24 03:11:48.111387: step: 590/463, loss: 0.02915227599442005 2023-01-24 03:11:48.707204: step: 592/463, loss: 0.11055063456296921 2023-01-24 03:11:49.335507: step: 594/463, loss: 0.06711766123771667 2023-01-24 03:11:49.940895: step: 596/463, loss: 0.0317876935005188 2023-01-24 03:11:50.567779: step: 598/463, loss: 0.035199277102947235 2023-01-24 03:11:51.167947: step: 600/463, loss: 0.027208806946873665 2023-01-24 03:11:51.786230: step: 602/463, loss: 0.04401131346821785 2023-01-24 03:11:52.417391: step: 604/463, loss: 0.07802709937095642 2023-01-24 03:11:53.033312: step: 606/463, loss: 0.03241603076457977 2023-01-24 03:11:53.630123: step: 608/463, loss: 0.027106909081339836 2023-01-24 03:11:54.228971: step: 610/463, loss: 0.028712373226881027 2023-01-24 03:11:54.735279: step: 612/463, loss: 0.008028710260987282 2023-01-24 03:11:55.321247: step: 614/463, loss: 0.7375069260597229 2023-01-24 03:11:55.917892: step: 616/463, loss: 0.11365193128585815 2023-01-24 03:11:56.491917: step: 618/463, loss: 0.008289830759167671 2023-01-24 03:11:57.090131: step: 620/463, loss: 0.08463103324174881 2023-01-24 03:11:57.721835: step: 622/463, loss: 0.0258532352745533 2023-01-24 03:11:58.283115: step: 624/463, loss: 0.00017131624917965382 2023-01-24 03:11:58.890773: step: 626/463, loss: 0.06887977570295334 2023-01-24 03:11:59.482909: step: 628/463, loss: 0.009932361543178558 2023-01-24 03:12:00.059000: step: 630/463, loss: 0.05218107998371124 2023-01-24 03:12:00.770520: step: 632/463, loss: 0.06111115217208862 2023-01-24 03:12:01.406973: step: 634/463, loss: 0.03795655071735382 2023-01-24 03:12:02.111886: step: 636/463, loss: 0.033981163054704666 2023-01-24 03:12:02.706027: step: 638/463, loss: 0.1751006841659546 2023-01-24 03:12:03.270904: step: 640/463, loss: 0.0020720541942864656 2023-01-24 03:12:03.885847: step: 642/463, loss: 0.025472475215792656 2023-01-24 03:12:04.453625: step: 644/463, loss: 0.00895337201654911 2023-01-24 03:12:05.055271: step: 646/463, loss: 0.009429015219211578 2023-01-24 03:12:05.733655: step: 648/463, loss: 0.009117132052779198 2023-01-24 03:12:06.341712: step: 650/463, loss: 0.01772463694214821 2023-01-24 03:12:06.955554: step: 652/463, loss: 0.41025906801223755 2023-01-24 03:12:07.625589: step: 654/463, loss: 0.17798997461795807 2023-01-24 03:12:08.215445: step: 656/463, loss: 0.026317836716771126 2023-01-24 03:12:08.800890: step: 658/463, loss: 0.013122315518558025 2023-01-24 03:12:09.401831: step: 660/463, loss: 0.0916406437754631 2023-01-24 03:12:10.045743: step: 662/463, loss: 0.04439610242843628 2023-01-24 03:12:10.653115: step: 664/463, loss: 0.035231947898864746 2023-01-24 03:12:11.274305: step: 666/463, loss: 0.06946726888418198 2023-01-24 03:12:11.885191: step: 668/463, loss: 0.06158921867609024 2023-01-24 03:12:12.507925: step: 670/463, loss: 0.025172999128699303 2023-01-24 03:12:13.072029: step: 672/463, loss: 0.006738292519003153 2023-01-24 03:12:13.668917: step: 674/463, loss: 0.011593194678425789 2023-01-24 03:12:14.229349: step: 676/463, loss: 0.03476410731673241 2023-01-24 03:12:14.849398: step: 678/463, loss: 0.018948696553707123 2023-01-24 03:12:15.479167: step: 680/463, loss: 0.03081817738711834 2023-01-24 03:12:16.151359: step: 682/463, loss: 0.0015254797181114554 2023-01-24 03:12:16.809244: step: 684/463, loss: 0.015690771862864494 2023-01-24 03:12:17.371539: step: 686/463, loss: 0.0177556611597538 2023-01-24 03:12:17.946571: step: 688/463, loss: 0.05072904750704765 2023-01-24 03:12:18.550481: step: 690/463, loss: 0.03051360324025154 2023-01-24 03:12:19.278802: step: 692/463, loss: 0.20636984705924988 2023-01-24 03:12:19.936621: step: 694/463, loss: 0.0039000685792416334 2023-01-24 03:12:20.576047: step: 696/463, loss: 0.015154576860368252 2023-01-24 03:12:21.150541: step: 698/463, loss: 0.013941802084445953 2023-01-24 03:12:21.754307: step: 700/463, loss: 0.13148829340934753 2023-01-24 03:12:22.491358: step: 702/463, loss: 0.013856504112482071 2023-01-24 03:12:23.011455: step: 704/463, loss: 0.006406472530215979 2023-01-24 03:12:23.581945: step: 706/463, loss: 0.0032963918056339025 2023-01-24 03:12:24.241162: step: 708/463, loss: 0.02176080085337162 2023-01-24 03:12:24.848308: step: 710/463, loss: 0.05759504809975624 2023-01-24 03:12:25.399076: step: 712/463, loss: 0.01230961550027132 2023-01-24 03:12:26.028538: step: 714/463, loss: 0.14241744577884674 2023-01-24 03:12:26.660757: step: 716/463, loss: 0.04254622757434845 2023-01-24 03:12:27.228153: step: 718/463, loss: 0.0002815830521285534 2023-01-24 03:12:27.868510: step: 720/463, loss: 0.002321084961295128 2023-01-24 03:12:28.567873: step: 722/463, loss: 0.054728373885154724 2023-01-24 03:12:29.197419: step: 724/463, loss: 0.027931924909353256 2023-01-24 03:12:29.787262: step: 726/463, loss: 0.008888617157936096 2023-01-24 03:12:30.420526: step: 728/463, loss: 0.007882009260356426 2023-01-24 03:12:30.982581: step: 730/463, loss: 0.0052911266684532166 2023-01-24 03:12:31.598269: step: 732/463, loss: 0.05516400933265686 2023-01-24 03:12:32.196010: step: 734/463, loss: 0.02546190656721592 2023-01-24 03:12:32.790709: step: 736/463, loss: 0.028896911069750786 2023-01-24 03:12:33.400034: step: 738/463, loss: 0.018578149378299713 2023-01-24 03:12:33.959776: step: 740/463, loss: 0.009061094373464584 2023-01-24 03:12:34.589611: step: 742/463, loss: 0.11628589034080505 2023-01-24 03:12:35.341525: step: 744/463, loss: 0.10193200409412384 2023-01-24 03:12:36.023451: step: 746/463, loss: 0.0436454713344574 2023-01-24 03:12:36.671432: step: 748/463, loss: 0.008284052833914757 2023-01-24 03:12:37.290161: step: 750/463, loss: 0.0009620767668820918 2023-01-24 03:12:37.923659: step: 752/463, loss: 0.056536704301834106 2023-01-24 03:12:38.604317: step: 754/463, loss: 0.05685668811202049 2023-01-24 03:12:39.177118: step: 756/463, loss: 0.005291768815368414 2023-01-24 03:12:39.722104: step: 758/463, loss: 0.04862922430038452 2023-01-24 03:12:40.328038: step: 760/463, loss: 0.010499270632863045 2023-01-24 03:12:40.919057: step: 762/463, loss: 0.04263368621468544 2023-01-24 03:12:41.466987: step: 764/463, loss: 0.10936841368675232 2023-01-24 03:12:42.088877: step: 766/463, loss: 0.019022652879357338 2023-01-24 03:12:42.716927: step: 768/463, loss: 0.010794872418045998 2023-01-24 03:12:43.302389: step: 770/463, loss: 0.004165175370872021 2023-01-24 03:12:43.880266: step: 772/463, loss: 0.014220666140317917 2023-01-24 03:12:44.524709: step: 774/463, loss: 0.008025393821299076 2023-01-24 03:12:45.139572: step: 776/463, loss: 0.02828812412917614 2023-01-24 03:12:45.789856: step: 778/463, loss: 0.03909818083047867 2023-01-24 03:12:46.406785: step: 780/463, loss: 0.07425030320882797 2023-01-24 03:12:47.070979: step: 782/463, loss: 0.038188543170690536 2023-01-24 03:12:47.736998: step: 784/463, loss: 0.036290157586336136 2023-01-24 03:12:48.302620: step: 786/463, loss: 0.0202342476695776 2023-01-24 03:12:48.947828: step: 788/463, loss: 0.02085955999791622 2023-01-24 03:12:49.580310: step: 790/463, loss: 0.07071888446807861 2023-01-24 03:12:50.221085: step: 792/463, loss: 0.041669812053442 2023-01-24 03:12:50.859518: step: 794/463, loss: 0.12150926887989044 2023-01-24 03:12:51.496622: step: 796/463, loss: 0.10607003420591354 2023-01-24 03:12:52.100995: step: 798/463, loss: 0.005398488603532314 2023-01-24 03:12:52.724742: step: 800/463, loss: 0.040510475635528564 2023-01-24 03:12:53.337727: step: 802/463, loss: 0.030537422746419907 2023-01-24 03:12:53.959586: step: 804/463, loss: 0.026942379772663116 2023-01-24 03:12:54.541292: step: 806/463, loss: 0.08702348917722702 2023-01-24 03:12:55.108662: step: 808/463, loss: 0.05339754745364189 2023-01-24 03:12:55.694926: step: 810/463, loss: 0.3117242157459259 2023-01-24 03:12:56.298608: step: 812/463, loss: 2.5038491003215313e-05 2023-01-24 03:12:56.865852: step: 814/463, loss: 0.03692660853266716 2023-01-24 03:12:57.495080: step: 816/463, loss: 0.011890384368598461 2023-01-24 03:12:58.059903: step: 818/463, loss: 0.04230646789073944 2023-01-24 03:12:58.701203: step: 820/463, loss: 0.04373534023761749 2023-01-24 03:12:59.330790: step: 822/463, loss: 0.053732555359601974 2023-01-24 03:13:00.013050: step: 824/463, loss: 0.08273197710514069 2023-01-24 03:13:00.596967: step: 826/463, loss: 0.06518137454986572 2023-01-24 03:13:01.217689: step: 828/463, loss: 0.04335102066397667 2023-01-24 03:13:01.790734: step: 830/463, loss: 0.03347724303603172 2023-01-24 03:13:02.365611: step: 832/463, loss: 0.019179463386535645 2023-01-24 03:13:02.989933: step: 834/463, loss: 0.020743053406476974 2023-01-24 03:13:03.595482: step: 836/463, loss: 0.043079227209091187 2023-01-24 03:13:04.188704: step: 838/463, loss: 0.03434205800294876 2023-01-24 03:13:04.892453: step: 840/463, loss: 0.03257459029555321 2023-01-24 03:13:05.531694: step: 842/463, loss: 0.047986019402742386 2023-01-24 03:13:06.247445: step: 844/463, loss: 0.036328207701444626 2023-01-24 03:13:06.925769: step: 846/463, loss: 0.08321975916624069 2023-01-24 03:13:07.606982: step: 848/463, loss: 0.06487030535936356 2023-01-24 03:13:08.194600: step: 850/463, loss: 0.08675437420606613 2023-01-24 03:13:08.791671: step: 852/463, loss: 0.018227146938443184 2023-01-24 03:13:09.454298: step: 854/463, loss: 0.004727873485535383 2023-01-24 03:13:10.082949: step: 856/463, loss: 0.2729973793029785 2023-01-24 03:13:10.725901: step: 858/463, loss: 0.026107655838131905 2023-01-24 03:13:11.282594: step: 860/463, loss: 0.04457389935851097 2023-01-24 03:13:11.878704: step: 862/463, loss: 0.053337857127189636 2023-01-24 03:13:12.459873: step: 864/463, loss: 0.011649386957287788 2023-01-24 03:13:13.023935: step: 866/463, loss: 0.02294936217367649 2023-01-24 03:13:13.620436: step: 868/463, loss: 0.0828322172164917 2023-01-24 03:13:14.203802: step: 870/463, loss: 0.019145460799336433 2023-01-24 03:13:14.757738: step: 872/463, loss: 0.06321202218532562 2023-01-24 03:13:15.417023: step: 874/463, loss: 0.09162592887878418 2023-01-24 03:13:16.030417: step: 876/463, loss: 0.03294839709997177 2023-01-24 03:13:16.637849: step: 878/463, loss: 0.018340542912483215 2023-01-24 03:13:17.235632: step: 880/463, loss: 0.026973869651556015 2023-01-24 03:13:17.881576: step: 882/463, loss: 0.010016737505793571 2023-01-24 03:13:18.514813: step: 884/463, loss: 0.02034001797437668 2023-01-24 03:13:19.130872: step: 886/463, loss: 0.04992104694247246 2023-01-24 03:13:19.781893: step: 888/463, loss: 0.14860928058624268 2023-01-24 03:13:20.302278: step: 890/463, loss: 0.02423548512160778 2023-01-24 03:13:20.932987: step: 892/463, loss: 0.018961971625685692 2023-01-24 03:13:21.484788: step: 894/463, loss: 0.004257969558238983 2023-01-24 03:13:22.084948: step: 896/463, loss: 0.0030107975471764803 2023-01-24 03:13:22.703557: step: 898/463, loss: 0.05586383491754532 2023-01-24 03:13:23.323116: step: 900/463, loss: 0.01192118413746357 2023-01-24 03:13:23.998623: step: 902/463, loss: 0.008415008895099163 2023-01-24 03:13:24.627904: step: 904/463, loss: 0.02253848873078823 2023-01-24 03:13:25.219668: step: 906/463, loss: 0.12708616256713867 2023-01-24 03:13:25.854609: step: 908/463, loss: 0.025114107877016068 2023-01-24 03:13:26.490385: step: 910/463, loss: 0.054132770746946335 2023-01-24 03:13:27.062333: step: 912/463, loss: 0.0266635213047266 2023-01-24 03:13:27.664486: step: 914/463, loss: 0.004844131413847208 2023-01-24 03:13:28.242448: step: 916/463, loss: 0.002099900506436825 2023-01-24 03:13:28.905520: step: 918/463, loss: 0.7106255292892456 2023-01-24 03:13:29.520162: step: 920/463, loss: 0.03257184848189354 2023-01-24 03:13:30.094815: step: 922/463, loss: 0.03203320503234863 2023-01-24 03:13:30.723999: step: 924/463, loss: 0.11766031384468079 2023-01-24 03:13:31.297856: step: 926/463, loss: 0.02466975897550583 ================================================== Loss: 0.068 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3251821483101393, 'r': 0.3214798847620163, 'f1': 0.32332041845340187}, 'combined': 0.23823609780776978, 'epoch': 24} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3604883981907118, 'r': 0.3012990367410578, 'f1': 0.32824680977022946}, 'combined': 0.2309274038584529, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32615090518875767, 'r': 0.3218187299775218, 'f1': 0.32397033562207067}, 'combined': 0.23871498414257838, 'epoch': 24} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3580123197650371, 'r': 0.3007928835056469, 'f1': 0.32691775188795985}, 'combined': 0.23211160384045149, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33161464842775296, 'r': 0.320917401704277, 'f1': 0.3261783427158226}, 'combined': 0.24034193673797452, 'epoch': 24} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37660084210159295, 'r': 0.29181635932479455, 'f1': 0.32883141375242336}, 'combined': 0.23347030376422057, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27927927927927926, 'r': 0.2952380952380952, 'f1': 0.28703703703703703}, 'combined': 0.19135802469135801, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28846153846153844, 'r': 0.32608695652173914, 'f1': 0.30612244897959184}, 'combined': 0.15306122448979592, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.13793103448275862, 'f1': 0.2}, 'combined': 0.13333333333333333, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32931238967343335, 'r': 0.3212043194731405, 'f1': 0.3252078248932276}, 'combined': 0.2396268183423782, 'epoch': 10} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38073307869771683, 'r': 0.30299176943483225, 'f1': 0.3374427286417082}, 'combined': 0.23958433733561282, 'epoch': 10} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.31666666666666665, 'r': 0.41304347826086957, 'f1': 0.3584905660377358}, 'combined': 0.1792452830188679, 'epoch': 10} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:16:02.663011: step: 2/463, loss: 0.011994456872344017 2023-01-24 03:16:03.306741: step: 4/463, loss: 0.11446678638458252 2023-01-24 03:16:03.933023: step: 6/463, loss: 0.394820898771286 2023-01-24 03:16:04.557700: step: 8/463, loss: 0.005592564586549997 2023-01-24 03:16:05.120245: step: 10/463, loss: 0.005851238965988159 2023-01-24 03:16:05.715444: step: 12/463, loss: 0.025797776877880096 2023-01-24 03:16:06.343317: step: 14/463, loss: 0.25505027174949646 2023-01-24 03:16:06.941186: step: 16/463, loss: 0.004577686544507742 2023-01-24 03:16:07.660227: step: 18/463, loss: 0.022393526509404182 2023-01-24 03:16:08.228684: step: 20/463, loss: 0.010907337069511414 2023-01-24 03:16:08.844531: step: 22/463, loss: 0.002406154992058873 2023-01-24 03:16:09.551892: step: 24/463, loss: 0.007225106004625559 2023-01-24 03:16:10.222458: step: 26/463, loss: 0.04541787877678871 2023-01-24 03:16:10.795424: step: 28/463, loss: 0.008653481490910053 2023-01-24 03:16:11.384191: step: 30/463, loss: 0.07138294726610184 2023-01-24 03:16:11.965401: step: 32/463, loss: 0.03730526939034462 2023-01-24 03:16:12.581015: step: 34/463, loss: 0.029595881700515747 2023-01-24 03:16:13.240784: step: 36/463, loss: 0.01764889620244503 2023-01-24 03:16:13.900358: step: 38/463, loss: 0.09015791863203049 2023-01-24 03:16:14.542528: step: 40/463, loss: 0.020653441548347473 2023-01-24 03:16:15.085391: step: 42/463, loss: 0.01915914937853813 2023-01-24 03:16:15.662659: step: 44/463, loss: 0.03847289830446243 2023-01-24 03:16:16.247321: step: 46/463, loss: 0.020643897354602814 2023-01-24 03:16:16.869560: step: 48/463, loss: 0.0004683421866502613 2023-01-24 03:16:17.402512: step: 50/463, loss: 0.001923603704199195 2023-01-24 03:16:18.038161: step: 52/463, loss: 0.0022171055898070335 2023-01-24 03:16:18.645903: step: 54/463, loss: 0.002603324595838785 2023-01-24 03:16:19.339728: step: 56/463, loss: 0.13605205714702606 2023-01-24 03:16:19.953435: step: 58/463, loss: 0.08406150341033936 2023-01-24 03:16:20.550338: step: 60/463, loss: 0.0008648224174976349 2023-01-24 03:16:21.183472: step: 62/463, loss: 0.060460664331912994 2023-01-24 03:16:21.718644: step: 64/463, loss: 0.009976508095860481 2023-01-24 03:16:22.304911: step: 66/463, loss: 0.02952761948108673 2023-01-24 03:16:22.912973: step: 68/463, loss: 0.041362687945365906 2023-01-24 03:16:23.494287: step: 70/463, loss: 0.007923880591988564 2023-01-24 03:16:24.080917: step: 72/463, loss: 0.0004196818626951426 2023-01-24 03:16:24.717471: step: 74/463, loss: 0.0023153056390583515 2023-01-24 03:16:25.262614: step: 76/463, loss: 0.10455886274576187 2023-01-24 03:16:25.858597: step: 78/463, loss: 0.0016595362685620785 2023-01-24 03:16:26.556313: step: 80/463, loss: 0.03995119407773018 2023-01-24 03:16:27.203726: step: 82/463, loss: 0.005408071912825108 2023-01-24 03:16:27.808561: step: 84/463, loss: 0.013827471993863583 2023-01-24 03:16:28.463556: step: 86/463, loss: 0.015399260446429253 2023-01-24 03:16:29.094835: step: 88/463, loss: 0.010601428337395191 2023-01-24 03:16:29.671228: step: 90/463, loss: 0.13037461042404175 2023-01-24 03:16:30.258440: step: 92/463, loss: 0.019582636654376984 2023-01-24 03:16:30.870887: step: 94/463, loss: 0.023700425401329994 2023-01-24 03:16:31.422959: step: 96/463, loss: 0.03560662642121315 2023-01-24 03:16:32.045056: step: 98/463, loss: 0.06753258407115936 2023-01-24 03:16:32.681394: step: 100/463, loss: 0.011389987543225288 2023-01-24 03:16:33.262823: step: 102/463, loss: 0.022834423929452896 2023-01-24 03:16:33.947923: step: 104/463, loss: 0.2608775496482849 2023-01-24 03:16:34.545029: step: 106/463, loss: 0.02167050540447235 2023-01-24 03:16:35.153955: step: 108/463, loss: 0.06865009665489197 2023-01-24 03:16:35.750150: step: 110/463, loss: 0.1754559576511383 2023-01-24 03:16:36.316305: step: 112/463, loss: 0.008110711351037025 2023-01-24 03:16:36.924199: step: 114/463, loss: 0.09730984270572662 2023-01-24 03:16:37.524991: step: 116/463, loss: 0.13307982683181763 2023-01-24 03:16:38.126823: step: 118/463, loss: 0.1132679209113121 2023-01-24 03:16:38.738818: step: 120/463, loss: 0.008675859309732914 2023-01-24 03:16:39.255880: step: 122/463, loss: 0.0031935819424688816 2023-01-24 03:16:39.874136: step: 124/463, loss: 0.008917358703911304 2023-01-24 03:16:40.419434: step: 126/463, loss: 0.06056595221161842 2023-01-24 03:16:41.021273: step: 128/463, loss: 0.0022261973936110735 2023-01-24 03:16:41.557948: step: 130/463, loss: 0.0037859813310205936 2023-01-24 03:16:42.139338: step: 132/463, loss: 0.040124353021383286 2023-01-24 03:16:42.754293: step: 134/463, loss: 0.0029475875198841095 2023-01-24 03:16:43.411921: step: 136/463, loss: 0.0024421459529548883 2023-01-24 03:16:44.037851: step: 138/463, loss: 0.023786328732967377 2023-01-24 03:16:44.716102: step: 140/463, loss: 0.012470797635614872 2023-01-24 03:16:45.304547: step: 142/463, loss: 0.006434897426515818 2023-01-24 03:16:45.926241: step: 144/463, loss: 0.022172195836901665 2023-01-24 03:16:46.506991: step: 146/463, loss: 0.023220354691147804 2023-01-24 03:16:47.200791: step: 148/463, loss: 0.007127760443836451 2023-01-24 03:16:47.813062: step: 150/463, loss: 0.11482023447751999 2023-01-24 03:16:48.483325: step: 152/463, loss: 0.014523623511195183 2023-01-24 03:16:49.164736: step: 154/463, loss: 0.04016435891389847 2023-01-24 03:16:49.825110: step: 156/463, loss: 0.03500409051775932 2023-01-24 03:16:50.427797: step: 158/463, loss: 0.018365168944001198 2023-01-24 03:16:51.042637: step: 160/463, loss: 0.00229447428137064 2023-01-24 03:16:51.606804: step: 162/463, loss: 0.010490692220628262 2023-01-24 03:16:52.187790: step: 164/463, loss: 0.09373822808265686 2023-01-24 03:16:52.798106: step: 166/463, loss: 0.03358762338757515 2023-01-24 03:16:53.400454: step: 168/463, loss: 0.014264012686908245 2023-01-24 03:16:54.065141: step: 170/463, loss: 0.09232599288225174 2023-01-24 03:16:54.679087: step: 172/463, loss: 0.021522151306271553 2023-01-24 03:16:55.258224: step: 174/463, loss: 0.003863373538479209 2023-01-24 03:16:55.828736: step: 176/463, loss: 0.02912088669836521 2023-01-24 03:16:56.434005: step: 178/463, loss: 0.005699397064745426 2023-01-24 03:16:57.053515: step: 180/463, loss: 3.8092844486236572 2023-01-24 03:16:57.672810: step: 182/463, loss: 0.005951498169451952 2023-01-24 03:16:58.323737: step: 184/463, loss: 0.0034042184706777334 2023-01-24 03:16:58.965599: step: 186/463, loss: 0.074593186378479 2023-01-24 03:16:59.551373: step: 188/463, loss: 0.08907707780599594 2023-01-24 03:17:00.187075: step: 190/463, loss: 0.04236428067088127 2023-01-24 03:17:00.781024: step: 192/463, loss: 0.04667920991778374 2023-01-24 03:17:01.399421: step: 194/463, loss: 0.023067820817232132 2023-01-24 03:17:01.984535: step: 196/463, loss: 0.047252506017684937 2023-01-24 03:17:02.601384: step: 198/463, loss: 0.007965140976011753 2023-01-24 03:17:03.160437: step: 200/463, loss: 0.018155701458454132 2023-01-24 03:17:03.758209: step: 202/463, loss: 0.1395353227853775 2023-01-24 03:17:04.449903: step: 204/463, loss: 0.06665574759244919 2023-01-24 03:17:05.087818: step: 206/463, loss: 0.00804069172590971 2023-01-24 03:17:05.698916: step: 208/463, loss: 0.3624227046966553 2023-01-24 03:17:06.292434: step: 210/463, loss: 0.007856902666389942 2023-01-24 03:17:06.860966: step: 212/463, loss: 0.10518589615821838 2023-01-24 03:17:07.513036: step: 214/463, loss: 0.0013028490357100964 2023-01-24 03:17:08.082226: step: 216/463, loss: 0.011699640192091465 2023-01-24 03:17:08.617055: step: 218/463, loss: 0.003419708926230669 2023-01-24 03:17:09.208526: step: 220/463, loss: 0.01940615102648735 2023-01-24 03:17:09.793942: step: 222/463, loss: 0.011524897068738937 2023-01-24 03:17:10.479140: step: 224/463, loss: 0.01838376559317112 2023-01-24 03:17:11.041279: step: 226/463, loss: 0.00018175947479903698 2023-01-24 03:17:11.652500: step: 228/463, loss: 0.019782619550824165 2023-01-24 03:17:12.284137: step: 230/463, loss: 0.03034944087266922 2023-01-24 03:17:12.886743: step: 232/463, loss: 0.07739511132240295 2023-01-24 03:17:13.544527: step: 234/463, loss: 0.08234003931283951 2023-01-24 03:17:14.160255: step: 236/463, loss: 0.38482773303985596 2023-01-24 03:17:14.752123: step: 238/463, loss: 0.006902043242007494 2023-01-24 03:17:15.399235: step: 240/463, loss: 0.008553264662623405 2023-01-24 03:17:16.098694: step: 242/463, loss: 0.19815745949745178 2023-01-24 03:17:16.744015: step: 244/463, loss: 0.01290412712842226 2023-01-24 03:17:17.375156: step: 246/463, loss: 0.0024579456076025963 2023-01-24 03:17:17.979405: step: 248/463, loss: 0.09928827732801437 2023-01-24 03:17:18.570494: step: 250/463, loss: 0.022468440234661102 2023-01-24 03:17:19.156623: step: 252/463, loss: 0.10673259198665619 2023-01-24 03:17:19.679903: step: 254/463, loss: 0.0011688238009810448 2023-01-24 03:17:20.264962: step: 256/463, loss: 0.038441918790340424 2023-01-24 03:17:20.888979: step: 258/463, loss: 0.02469075657427311 2023-01-24 03:17:21.509795: step: 260/463, loss: 0.014164132066071033 2023-01-24 03:17:22.116021: step: 262/463, loss: 0.006941921077668667 2023-01-24 03:17:22.713463: step: 264/463, loss: 0.011556431651115417 2023-01-24 03:17:23.394174: step: 266/463, loss: 0.05245332419872284 2023-01-24 03:17:23.945362: step: 268/463, loss: 0.06280305981636047 2023-01-24 03:17:24.593985: step: 270/463, loss: 0.08563365787267685 2023-01-24 03:17:25.151285: step: 272/463, loss: 0.05169442296028137 2023-01-24 03:17:25.727165: step: 274/463, loss: 0.31609228253364563 2023-01-24 03:17:26.330681: step: 276/463, loss: 0.017504477873444557 2023-01-24 03:17:26.948212: step: 278/463, loss: 0.03199387714266777 2023-01-24 03:17:27.545035: step: 280/463, loss: 0.28563493490219116 2023-01-24 03:17:28.119151: step: 282/463, loss: 0.00309227267280221 2023-01-24 03:17:28.702878: step: 284/463, loss: 0.01869712583720684 2023-01-24 03:17:29.386989: step: 286/463, loss: 0.032607145607471466 2023-01-24 03:17:30.035473: step: 288/463, loss: 0.18564218282699585 2023-01-24 03:17:30.620124: step: 290/463, loss: 0.02026744931936264 2023-01-24 03:17:31.199054: step: 292/463, loss: 0.09695117175579071 2023-01-24 03:17:31.801752: step: 294/463, loss: 0.004875087179243565 2023-01-24 03:17:32.392700: step: 296/463, loss: 0.0045811813324689865 2023-01-24 03:17:33.048265: step: 298/463, loss: 0.009535691700875759 2023-01-24 03:17:33.726955: step: 300/463, loss: 0.0016120598884299397 2023-01-24 03:17:34.347798: step: 302/463, loss: 0.01548510231077671 2023-01-24 03:17:34.916356: step: 304/463, loss: 0.03783949837088585 2023-01-24 03:17:35.463952: step: 306/463, loss: 0.11665921658277512 2023-01-24 03:17:36.180308: step: 308/463, loss: 0.019751353189349174 2023-01-24 03:17:36.817018: step: 310/463, loss: 0.023918254300951958 2023-01-24 03:17:37.492916: step: 312/463, loss: 0.027684660628437996 2023-01-24 03:17:38.083819: step: 314/463, loss: 0.01143191009759903 2023-01-24 03:17:38.660978: step: 316/463, loss: 0.0627913698554039 2023-01-24 03:17:39.258700: step: 318/463, loss: 0.17250046133995056 2023-01-24 03:17:39.835563: step: 320/463, loss: 0.03211867809295654 2023-01-24 03:17:40.491319: step: 322/463, loss: 2.358278512954712 2023-01-24 03:17:41.080753: step: 324/463, loss: 0.019838504493236542 2023-01-24 03:17:41.678674: step: 326/463, loss: 0.0006033214158378541 2023-01-24 03:17:42.217250: step: 328/463, loss: 0.01088511012494564 2023-01-24 03:17:42.794549: step: 330/463, loss: 0.026347029954195023 2023-01-24 03:17:43.414754: step: 332/463, loss: 0.03813530132174492 2023-01-24 03:17:44.069358: step: 334/463, loss: 0.015750139951705933 2023-01-24 03:17:44.639725: step: 336/463, loss: 0.06283487379550934 2023-01-24 03:17:45.237663: step: 338/463, loss: 0.008262271992862225 2023-01-24 03:17:45.847366: step: 340/463, loss: 0.07787179946899414 2023-01-24 03:17:46.481832: step: 342/463, loss: 0.04994373396039009 2023-01-24 03:17:47.009968: step: 344/463, loss: 0.018741769716143608 2023-01-24 03:17:47.640890: step: 346/463, loss: 0.03050718456506729 2023-01-24 03:17:48.205281: step: 348/463, loss: 0.004290836863219738 2023-01-24 03:17:48.884234: step: 350/463, loss: 0.13348209857940674 2023-01-24 03:17:49.511250: step: 352/463, loss: 0.03785382956266403 2023-01-24 03:17:50.110656: step: 354/463, loss: 0.008711045607924461 2023-01-24 03:17:50.748136: step: 356/463, loss: 0.04480462521314621 2023-01-24 03:17:51.427993: step: 358/463, loss: 0.03500044718384743 2023-01-24 03:17:52.121493: step: 360/463, loss: 0.06117703765630722 2023-01-24 03:17:52.671714: step: 362/463, loss: 0.02697446569800377 2023-01-24 03:17:53.313665: step: 364/463, loss: 0.008240891620516777 2023-01-24 03:17:53.907028: step: 366/463, loss: 0.30078738927841187 2023-01-24 03:17:54.492511: step: 368/463, loss: 0.0671444684267044 2023-01-24 03:17:55.158376: step: 370/463, loss: 0.3010883927345276 2023-01-24 03:17:55.728449: step: 372/463, loss: 0.0031308785546571016 2023-01-24 03:17:56.363540: step: 374/463, loss: 0.01051223836839199 2023-01-24 03:17:56.978283: step: 376/463, loss: 0.0028140456415712833 2023-01-24 03:17:57.600565: step: 378/463, loss: 0.016459867358207703 2023-01-24 03:17:58.207521: step: 380/463, loss: 0.0054246713407337666 2023-01-24 03:17:58.776854: step: 382/463, loss: 0.4300168454647064 2023-01-24 03:17:59.340348: step: 384/463, loss: 0.04056436941027641 2023-01-24 03:17:59.919615: step: 386/463, loss: 0.002791179809719324 2023-01-24 03:18:00.532309: step: 388/463, loss: 0.01932392083108425 2023-01-24 03:18:01.164580: step: 390/463, loss: 0.05334123596549034 2023-01-24 03:18:01.804233: step: 392/463, loss: 0.010038403794169426 2023-01-24 03:18:02.415708: step: 394/463, loss: 0.002209724858403206 2023-01-24 03:18:03.048579: step: 396/463, loss: 0.007955798879265785 2023-01-24 03:18:03.696666: step: 398/463, loss: 0.08591212332248688 2023-01-24 03:18:04.297008: step: 400/463, loss: 0.01339698676019907 2023-01-24 03:18:04.873837: step: 402/463, loss: 0.024328887462615967 2023-01-24 03:18:05.443005: step: 404/463, loss: 0.025037985295057297 2023-01-24 03:18:06.236807: step: 406/463, loss: 0.05490080639719963 2023-01-24 03:18:06.846513: step: 408/463, loss: 0.028156353160738945 2023-01-24 03:18:07.468606: step: 410/463, loss: 0.02459338866174221 2023-01-24 03:18:08.045801: step: 412/463, loss: 0.02804054692387581 2023-01-24 03:18:08.625708: step: 414/463, loss: 0.01364817377179861 2023-01-24 03:18:09.203106: step: 416/463, loss: 0.002992333611473441 2023-01-24 03:18:09.761351: step: 418/463, loss: 0.07254515588283539 2023-01-24 03:18:10.368880: step: 420/463, loss: 0.010559006594121456 2023-01-24 03:18:10.956903: step: 422/463, loss: 0.03507170453667641 2023-01-24 03:18:11.584620: step: 424/463, loss: 0.032590992748737335 2023-01-24 03:18:12.224813: step: 426/463, loss: 0.012571226805448532 2023-01-24 03:18:12.832655: step: 428/463, loss: 0.015370922163128853 2023-01-24 03:18:13.375455: step: 430/463, loss: 0.015016294084489346 2023-01-24 03:18:13.994253: step: 432/463, loss: 0.015839535742998123 2023-01-24 03:18:14.586869: step: 434/463, loss: 0.022866329178214073 2023-01-24 03:18:15.233303: step: 436/463, loss: 0.0008821549126878381 2023-01-24 03:18:15.892463: step: 438/463, loss: 0.01455927174538374 2023-01-24 03:18:16.478779: step: 440/463, loss: 0.05421319231390953 2023-01-24 03:18:17.048525: step: 442/463, loss: 0.006598927080631256 2023-01-24 03:18:17.619387: step: 444/463, loss: 0.00229642097838223 2023-01-24 03:18:18.234481: step: 446/463, loss: 0.05167998746037483 2023-01-24 03:18:18.788413: step: 448/463, loss: 0.018691029399633408 2023-01-24 03:18:19.415053: step: 450/463, loss: 0.046374645084142685 2023-01-24 03:18:20.060377: step: 452/463, loss: 0.011759958229959011 2023-01-24 03:18:20.738223: step: 454/463, loss: 0.04119403660297394 2023-01-24 03:18:21.332106: step: 456/463, loss: 0.011879232712090015 2023-01-24 03:18:21.938034: step: 458/463, loss: 0.0023767214734107256 2023-01-24 03:18:22.593693: step: 460/463, loss: 0.022252842783927917 2023-01-24 03:18:23.198074: step: 462/463, loss: 0.0527481734752655 2023-01-24 03:18:23.803296: step: 464/463, loss: 0.054680872708559036 2023-01-24 03:18:24.454615: step: 466/463, loss: 0.027724893763661385 2023-01-24 03:18:25.060131: step: 468/463, loss: 0.014130827970802784 2023-01-24 03:18:25.649696: step: 470/463, loss: 0.027212072163820267 2023-01-24 03:18:26.265753: step: 472/463, loss: 0.049686044454574585 2023-01-24 03:18:26.821358: step: 474/463, loss: 0.06290341913700104 2023-01-24 03:18:27.458261: step: 476/463, loss: 0.1428631842136383 2023-01-24 03:18:28.015826: step: 478/463, loss: 0.00436700414866209 2023-01-24 03:18:28.668600: step: 480/463, loss: 0.004989600740373135 2023-01-24 03:18:29.257369: step: 482/463, loss: 0.006692240014672279 2023-01-24 03:18:29.896960: step: 484/463, loss: 0.06531012803316116 2023-01-24 03:18:30.492591: step: 486/463, loss: 0.018087483942508698 2023-01-24 03:18:31.109363: step: 488/463, loss: 0.040351204574108124 2023-01-24 03:18:31.711860: step: 490/463, loss: 0.06540211290121078 2023-01-24 03:18:32.360996: step: 492/463, loss: 0.014081081375479698 2023-01-24 03:18:32.997873: step: 494/463, loss: 0.04387407749891281 2023-01-24 03:18:33.670766: step: 496/463, loss: 0.5250513553619385 2023-01-24 03:18:34.243328: step: 498/463, loss: 0.04821356385946274 2023-01-24 03:18:34.904831: step: 500/463, loss: 0.023202551528811455 2023-01-24 03:18:35.408697: step: 502/463, loss: 0.009744357317686081 2023-01-24 03:18:36.141531: step: 504/463, loss: 0.03400154411792755 2023-01-24 03:18:36.754628: step: 506/463, loss: 0.02604181319475174 2023-01-24 03:18:37.306325: step: 508/463, loss: 0.048765335232019424 2023-01-24 03:18:37.904753: step: 510/463, loss: 0.016649965196847916 2023-01-24 03:18:38.541133: step: 512/463, loss: 0.04908304661512375 2023-01-24 03:18:39.123921: step: 514/463, loss: 0.03420723229646683 2023-01-24 03:18:39.698187: step: 516/463, loss: 0.0599454864859581 2023-01-24 03:18:40.231649: step: 518/463, loss: 0.006992635317146778 2023-01-24 03:18:40.815345: step: 520/463, loss: 0.01740187034010887 2023-01-24 03:18:41.399041: step: 522/463, loss: 0.02817246876657009 2023-01-24 03:18:42.036808: step: 524/463, loss: 0.30401283502578735 2023-01-24 03:18:42.644027: step: 526/463, loss: 0.028936348855495453 2023-01-24 03:18:43.249537: step: 528/463, loss: 0.06215678155422211 2023-01-24 03:18:43.861315: step: 530/463, loss: 0.01248774491250515 2023-01-24 03:18:44.486036: step: 532/463, loss: 0.019079245626926422 2023-01-24 03:18:45.028943: step: 534/463, loss: 0.0038809399120509624 2023-01-24 03:18:45.664002: step: 536/463, loss: 0.018305420875549316 2023-01-24 03:18:46.274233: step: 538/463, loss: 0.04664703086018562 2023-01-24 03:18:46.847157: step: 540/463, loss: 0.024745231494307518 2023-01-24 03:18:47.455429: step: 542/463, loss: 0.04184908792376518 2023-01-24 03:18:48.131387: step: 544/463, loss: 0.00617001298815012 2023-01-24 03:18:48.747958: step: 546/463, loss: 0.023117080330848694 2023-01-24 03:18:49.344618: step: 548/463, loss: 0.17739702761173248 2023-01-24 03:18:49.958566: step: 550/463, loss: 0.01873868890106678 2023-01-24 03:18:50.560681: step: 552/463, loss: 0.034079890698194504 2023-01-24 03:18:51.172978: step: 554/463, loss: 0.03426097333431244 2023-01-24 03:18:51.803434: step: 556/463, loss: 0.009710846468806267 2023-01-24 03:18:52.453070: step: 558/463, loss: 0.14833053946495056 2023-01-24 03:18:53.064652: step: 560/463, loss: 0.02073727734386921 2023-01-24 03:18:53.697129: step: 562/463, loss: 0.0017812720034271479 2023-01-24 03:18:54.256814: step: 564/463, loss: 0.11400328576564789 2023-01-24 03:18:54.882764: step: 566/463, loss: 0.013126417063176632 2023-01-24 03:18:55.513691: step: 568/463, loss: 0.007262708619236946 2023-01-24 03:18:56.117235: step: 570/463, loss: 0.01319131813943386 2023-01-24 03:18:56.758938: step: 572/463, loss: 0.0692310556769371 2023-01-24 03:18:57.318538: step: 574/463, loss: 0.01344994455575943 2023-01-24 03:18:57.945911: step: 576/463, loss: 0.015567050315439701 2023-01-24 03:18:58.546656: step: 578/463, loss: 0.03850233927369118 2023-01-24 03:18:59.164130: step: 580/463, loss: 0.0022053057327866554 2023-01-24 03:18:59.709189: step: 582/463, loss: 0.03398047015070915 2023-01-24 03:19:00.341378: step: 584/463, loss: 0.018041282892227173 2023-01-24 03:19:00.955540: step: 586/463, loss: 0.064809650182724 2023-01-24 03:19:01.617529: step: 588/463, loss: 0.08524543046951294 2023-01-24 03:19:02.253095: step: 590/463, loss: 0.04248861223459244 2023-01-24 03:19:02.881357: step: 592/463, loss: 0.011953620240092278 2023-01-24 03:19:03.497612: step: 594/463, loss: 0.0006261427770368755 2023-01-24 03:19:04.118855: step: 596/463, loss: 0.0034261697437614202 2023-01-24 03:19:04.723751: step: 598/463, loss: 0.006827513687312603 2023-01-24 03:19:05.316843: step: 600/463, loss: 0.018274573609232903 2023-01-24 03:19:05.943432: step: 602/463, loss: 0.018307359889149666 2023-01-24 03:19:06.517615: step: 604/463, loss: 0.06857580691576004 2023-01-24 03:19:07.172678: step: 606/463, loss: 0.02184750884771347 2023-01-24 03:19:07.778725: step: 608/463, loss: 0.023041684180498123 2023-01-24 03:19:08.366960: step: 610/463, loss: 0.02008545771241188 2023-01-24 03:19:08.986612: step: 612/463, loss: 0.013561458326876163 2023-01-24 03:19:09.595928: step: 614/463, loss: 0.012581353075802326 2023-01-24 03:19:10.133495: step: 616/463, loss: 0.23835453391075134 2023-01-24 03:19:10.749627: step: 618/463, loss: 0.0791127011179924 2023-01-24 03:19:11.382991: step: 620/463, loss: 0.17193566262722015 2023-01-24 03:19:12.050208: step: 622/463, loss: 0.057223010808229446 2023-01-24 03:19:12.674835: step: 624/463, loss: 0.09653171896934509 2023-01-24 03:19:13.256610: step: 626/463, loss: 0.03891773521900177 2023-01-24 03:19:13.823172: step: 628/463, loss: 0.39777445793151855 2023-01-24 03:19:14.435151: step: 630/463, loss: 0.02997426874935627 2023-01-24 03:19:15.094673: step: 632/463, loss: 0.07181629538536072 2023-01-24 03:19:15.725121: step: 634/463, loss: 0.038252294063568115 2023-01-24 03:19:16.271895: step: 636/463, loss: 0.01464842539280653 2023-01-24 03:19:16.853131: step: 638/463, loss: 0.01847054809331894 2023-01-24 03:19:17.471765: step: 640/463, loss: 0.2605118155479431 2023-01-24 03:19:18.068393: step: 642/463, loss: 0.03208772838115692 2023-01-24 03:19:18.734810: step: 644/463, loss: 0.05462566018104553 2023-01-24 03:19:19.331960: step: 646/463, loss: 0.09218889474868774 2023-01-24 03:19:19.935297: step: 648/463, loss: 0.06949862092733383 2023-01-24 03:19:20.517939: step: 650/463, loss: 0.09131474792957306 2023-01-24 03:19:21.096228: step: 652/463, loss: 0.04107525199651718 2023-01-24 03:19:21.708179: step: 654/463, loss: 0.017127610743045807 2023-01-24 03:19:22.348379: step: 656/463, loss: 0.13086958229541779 2023-01-24 03:19:22.932353: step: 658/463, loss: 0.013378569856286049 2023-01-24 03:19:23.530445: step: 660/463, loss: 0.008858222514390945 2023-01-24 03:19:24.137639: step: 662/463, loss: 0.031793124973773956 2023-01-24 03:19:24.755887: step: 664/463, loss: 0.21793793141841888 2023-01-24 03:19:25.375062: step: 666/463, loss: 0.05038399249315262 2023-01-24 03:19:25.901482: step: 668/463, loss: 0.01588759385049343 2023-01-24 03:19:26.467709: step: 670/463, loss: 0.08518628776073456 2023-01-24 03:19:27.181144: step: 672/463, loss: 0.02056422457098961 2023-01-24 03:19:27.808962: step: 674/463, loss: 0.03868334740400314 2023-01-24 03:19:28.491537: step: 676/463, loss: 0.009915877133607864 2023-01-24 03:19:29.070483: step: 678/463, loss: 0.01674739643931389 2023-01-24 03:19:29.708811: step: 680/463, loss: 0.025166377425193787 2023-01-24 03:19:30.313866: step: 682/463, loss: 0.004388929810374975 2023-01-24 03:19:30.927073: step: 684/463, loss: 0.008305774070322514 2023-01-24 03:19:31.539799: step: 686/463, loss: 0.05645403638482094 2023-01-24 03:19:32.170209: step: 688/463, loss: 0.010286654345691204 2023-01-24 03:19:32.756234: step: 690/463, loss: 0.07165663689374924 2023-01-24 03:19:33.367324: step: 692/463, loss: 0.12136837840080261 2023-01-24 03:19:33.962959: step: 694/463, loss: 0.0047423746436834335 2023-01-24 03:19:34.574441: step: 696/463, loss: 0.0016389087541028857 2023-01-24 03:19:35.197100: step: 698/463, loss: 0.03139398992061615 2023-01-24 03:19:35.903126: step: 700/463, loss: 0.005238656885921955 2023-01-24 03:19:36.521130: step: 702/463, loss: 0.00564733799546957 2023-01-24 03:19:37.113205: step: 704/463, loss: 0.04745946824550629 2023-01-24 03:19:37.728244: step: 706/463, loss: 0.02068985439836979 2023-01-24 03:19:38.326864: step: 708/463, loss: 0.013989862985908985 2023-01-24 03:19:38.957398: step: 710/463, loss: 0.9687111973762512 2023-01-24 03:19:39.608046: step: 712/463, loss: 0.04142620414495468 2023-01-24 03:19:40.233851: step: 714/463, loss: 0.028417326509952545 2023-01-24 03:19:40.813508: step: 716/463, loss: 0.12421970814466476 2023-01-24 03:19:41.425442: step: 718/463, loss: 0.05384942516684532 2023-01-24 03:19:42.185965: step: 720/463, loss: 0.03321942687034607 2023-01-24 03:19:42.838849: step: 722/463, loss: 2.6157279014587402 2023-01-24 03:19:43.457705: step: 724/463, loss: 0.03431367501616478 2023-01-24 03:19:44.085137: step: 726/463, loss: 0.050460878759622574 2023-01-24 03:19:44.644519: step: 728/463, loss: 0.01666288822889328 2023-01-24 03:19:45.246199: step: 730/463, loss: 0.005533245857805014 2023-01-24 03:19:45.862808: step: 732/463, loss: 0.006701772101223469 2023-01-24 03:19:46.420205: step: 734/463, loss: 0.0025528701953589916 2023-01-24 03:19:47.052552: step: 736/463, loss: 0.0002340503706363961 2023-01-24 03:19:47.657178: step: 738/463, loss: 0.027590734884142876 2023-01-24 03:19:48.223040: step: 740/463, loss: 0.02521904744207859 2023-01-24 03:19:48.857123: step: 742/463, loss: 0.006873893551528454 2023-01-24 03:19:49.500915: step: 744/463, loss: 0.015169343911111355 2023-01-24 03:19:50.120212: step: 746/463, loss: 0.003592990804463625 2023-01-24 03:19:50.708566: step: 748/463, loss: 0.023545080795884132 2023-01-24 03:19:51.339788: step: 750/463, loss: 0.014496134594082832 2023-01-24 03:19:51.956266: step: 752/463, loss: 0.02951250784099102 2023-01-24 03:19:52.686370: step: 754/463, loss: 0.023887934163212776 2023-01-24 03:19:53.285385: step: 756/463, loss: 0.02027571201324463 2023-01-24 03:19:53.908989: step: 758/463, loss: 0.0028193145990371704 2023-01-24 03:19:54.485875: step: 760/463, loss: 0.03433002158999443 2023-01-24 03:19:55.166052: step: 762/463, loss: 1.0592200756072998 2023-01-24 03:19:55.796505: step: 764/463, loss: 0.043579377233982086 2023-01-24 03:19:56.399044: step: 766/463, loss: 0.0038164863362908363 2023-01-24 03:19:57.042476: step: 768/463, loss: 0.03466525301337242 2023-01-24 03:19:57.628752: step: 770/463, loss: 0.03664425387978554 2023-01-24 03:19:58.249431: step: 772/463, loss: 0.030102701857686043 2023-01-24 03:19:58.832331: step: 774/463, loss: 0.03587200492620468 2023-01-24 03:19:59.482826: step: 776/463, loss: 0.1532282531261444 2023-01-24 03:20:00.145863: step: 778/463, loss: 0.04082759842276573 2023-01-24 03:20:00.779490: step: 780/463, loss: 0.005741976201534271 2023-01-24 03:20:01.448562: step: 782/463, loss: 0.05541449412703514 2023-01-24 03:20:02.073054: step: 784/463, loss: 0.02886315807700157 2023-01-24 03:20:02.756516: step: 786/463, loss: 0.012926431372761726 2023-01-24 03:20:03.387380: step: 788/463, loss: 0.09245792776346207 2023-01-24 03:20:04.044634: step: 790/463, loss: 0.021347224712371826 2023-01-24 03:20:04.654124: step: 792/463, loss: 0.05797242000699043 2023-01-24 03:20:05.212762: step: 794/463, loss: 0.0007421908667311072 2023-01-24 03:20:05.851141: step: 796/463, loss: 0.02244095504283905 2023-01-24 03:20:06.480238: step: 798/463, loss: 0.0796673595905304 2023-01-24 03:20:07.160232: step: 800/463, loss: 0.020638255402445793 2023-01-24 03:20:07.730673: step: 802/463, loss: 0.036581769585609436 2023-01-24 03:20:08.392722: step: 804/463, loss: 0.07665160298347473 2023-01-24 03:20:09.012802: step: 806/463, loss: 0.0005397535860538483 2023-01-24 03:20:09.626483: step: 808/463, loss: 0.07760181277990341 2023-01-24 03:20:10.248201: step: 810/463, loss: 0.012861700728535652 2023-01-24 03:20:10.887507: step: 812/463, loss: 0.08782971650362015 2023-01-24 03:20:11.536553: step: 814/463, loss: 0.0327371247112751 2023-01-24 03:20:12.207617: step: 816/463, loss: 0.08753512799739838 2023-01-24 03:20:12.803165: step: 818/463, loss: 0.011592350900173187 2023-01-24 03:20:13.385651: step: 820/463, loss: 0.002077220007777214 2023-01-24 03:20:14.032739: step: 822/463, loss: 0.04932112619280815 2023-01-24 03:20:14.653383: step: 824/463, loss: 0.025132879614830017 2023-01-24 03:20:15.277452: step: 826/463, loss: 0.037757258862257004 2023-01-24 03:20:15.895909: step: 828/463, loss: 0.02531863935291767 2023-01-24 03:20:16.543533: step: 830/463, loss: 0.17589454352855682 2023-01-24 03:20:17.142079: step: 832/463, loss: 0.031591471284627914 2023-01-24 03:20:17.764580: step: 834/463, loss: 0.024644682183861732 2023-01-24 03:20:18.341247: step: 836/463, loss: 0.0023469352163374424 2023-01-24 03:20:18.879189: step: 838/463, loss: 0.022468073293566704 2023-01-24 03:20:19.500438: step: 840/463, loss: 0.013685453683137894 2023-01-24 03:20:20.203937: step: 842/463, loss: 0.09668903797864914 2023-01-24 03:20:20.752135: step: 844/463, loss: 0.0019599897786974907 2023-01-24 03:20:21.289206: step: 846/463, loss: 0.005608050152659416 2023-01-24 03:20:21.903657: step: 848/463, loss: 0.007824286818504333 2023-01-24 03:20:22.459590: step: 850/463, loss: 0.04946202039718628 2023-01-24 03:20:23.162637: step: 852/463, loss: 0.09730540961027145 2023-01-24 03:20:23.855721: step: 854/463, loss: 0.040604691952466965 2023-01-24 03:20:24.420493: step: 856/463, loss: 0.03105868399143219 2023-01-24 03:20:25.020876: step: 858/463, loss: 0.014554441906511784 2023-01-24 03:20:25.624770: step: 860/463, loss: 0.010317348875105381 2023-01-24 03:20:26.231150: step: 862/463, loss: 0.045255932956933975 2023-01-24 03:20:26.816778: step: 864/463, loss: 0.02020047977566719 2023-01-24 03:20:27.446722: step: 866/463, loss: 0.06606575101613998 2023-01-24 03:20:28.101566: step: 868/463, loss: 0.04144250229001045 2023-01-24 03:20:28.792637: step: 870/463, loss: 0.1169712170958519 2023-01-24 03:20:29.352184: step: 872/463, loss: 0.046892136335372925 2023-01-24 03:20:29.926880: step: 874/463, loss: 0.12203280627727509 2023-01-24 03:20:30.543684: step: 876/463, loss: 0.02287198044359684 2023-01-24 03:20:31.143723: step: 878/463, loss: 0.002557631814852357 2023-01-24 03:20:31.745659: step: 880/463, loss: 0.05058659613132477 2023-01-24 03:20:32.336825: step: 882/463, loss: 0.024215461686253548 2023-01-24 03:20:32.895600: step: 884/463, loss: 0.002111670561134815 2023-01-24 03:20:33.484329: step: 886/463, loss: 0.003887950209900737 2023-01-24 03:20:34.115900: step: 888/463, loss: 0.05878519266843796 2023-01-24 03:20:34.701744: step: 890/463, loss: 0.0011992199579253793 2023-01-24 03:20:35.195728: step: 892/463, loss: 0.006629703566431999 2023-01-24 03:20:35.797124: step: 894/463, loss: 0.0486271046102047 2023-01-24 03:20:36.470783: step: 896/463, loss: 0.006970013026148081 2023-01-24 03:20:37.107815: step: 898/463, loss: 0.14234429597854614 2023-01-24 03:20:37.661352: step: 900/463, loss: 5.4092022764962167e-05 2023-01-24 03:20:38.255981: step: 902/463, loss: 0.021152831614017487 2023-01-24 03:20:38.926037: step: 904/463, loss: 0.05984974279999733 2023-01-24 03:20:39.557332: step: 906/463, loss: 0.23540005087852478 2023-01-24 03:20:40.160709: step: 908/463, loss: 0.14759105443954468 2023-01-24 03:20:40.766278: step: 910/463, loss: 0.08375096321105957 2023-01-24 03:20:41.405656: step: 912/463, loss: 0.015068162232637405 2023-01-24 03:20:42.070044: step: 914/463, loss: 0.012109367176890373 2023-01-24 03:20:42.643611: step: 916/463, loss: 0.025881648063659668 2023-01-24 03:20:43.185965: step: 918/463, loss: 0.015250113792717457 2023-01-24 03:20:43.739079: step: 920/463, loss: 0.028133681043982506 2023-01-24 03:20:44.321609: step: 922/463, loss: 0.05591210350394249 2023-01-24 03:20:44.936923: step: 924/463, loss: 0.025108743458986282 2023-01-24 03:20:45.561087: step: 926/463, loss: 0.03305111452937126 ================================================== Loss: 0.069 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32137839147286823, 'r': 0.3146703036053131, 'f1': 0.31798897411313515}, 'combined': 0.2343076651359943, 'epoch': 25} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.36876999432039276, 'r': 0.316640204547353, 'f1': 0.34072269897771507}, 'combined': 0.2397044113411061, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33471534653465346, 'r': 0.3207424098671727, 'f1': 0.32757994186046513}, 'combined': 0.241374694002448, 'epoch': 25} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3844636334697517, 'r': 0.3066315540936763, 'f1': 0.3411648164964593}, 'combined': 0.24222701971248609, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24305555555555555, 'r': 0.25, 'f1': 0.2464788732394366}, 'combined': 0.16431924882629106, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.575, 'r': 0.19827586206896552, 'f1': 0.2948717948717949}, 'combined': 0.19658119658119658, 'epoch': 25} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:23:23.395505: step: 2/463, loss: 0.028914110735058784 2023-01-24 03:23:24.022445: step: 4/463, loss: 0.028303978964686394 2023-01-24 03:23:24.639579: step: 6/463, loss: 0.02428779937326908 2023-01-24 03:23:25.227248: step: 8/463, loss: 0.05932669714093208 2023-01-24 03:23:25.833703: step: 10/463, loss: 0.006147367414087057 2023-01-24 03:23:26.522125: step: 12/463, loss: 0.021394234150648117 2023-01-24 03:23:27.193581: step: 14/463, loss: 0.09853358566761017 2023-01-24 03:23:27.845761: step: 16/463, loss: 0.013529102317988873 2023-01-24 03:23:28.515732: step: 18/463, loss: 0.10093488544225693 2023-01-24 03:23:29.108050: step: 20/463, loss: 0.03464777022600174 2023-01-24 03:23:29.687015: step: 22/463, loss: 0.1414872109889984 2023-01-24 03:23:30.301757: step: 24/463, loss: 0.009785602800548077 2023-01-24 03:23:30.903642: step: 26/463, loss: 0.03662661835551262 2023-01-24 03:23:31.496569: step: 28/463, loss: 0.08329956233501434 2023-01-24 03:23:32.096643: step: 30/463, loss: 0.00921177864074707 2023-01-24 03:23:32.697881: step: 32/463, loss: 0.031598832458257675 2023-01-24 03:23:33.298682: step: 34/463, loss: 0.01825399324297905 2023-01-24 03:23:33.970849: step: 36/463, loss: 0.019767815247178078 2023-01-24 03:23:34.561909: step: 38/463, loss: 0.0013461182825267315 2023-01-24 03:23:35.229833: step: 40/463, loss: 0.062657929956913 2023-01-24 03:23:35.854613: step: 42/463, loss: 0.034742482006549835 2023-01-24 03:23:36.503240: step: 44/463, loss: 0.0017123231664299965 2023-01-24 03:23:37.129676: step: 46/463, loss: 0.022996125742793083 2023-01-24 03:23:37.725537: step: 48/463, loss: 0.07762524485588074 2023-01-24 03:23:38.263422: step: 50/463, loss: 0.03391828387975693 2023-01-24 03:23:38.865184: step: 52/463, loss: 0.03010445460677147 2023-01-24 03:23:39.449689: step: 54/463, loss: 0.17236974835395813 2023-01-24 03:23:40.126842: step: 56/463, loss: 0.018359364941716194 2023-01-24 03:23:40.699405: step: 58/463, loss: 0.025406034663319588 2023-01-24 03:23:41.364899: step: 60/463, loss: 0.013097105547785759 2023-01-24 03:23:42.131326: step: 62/463, loss: 0.025201816111803055 2023-01-24 03:23:42.764656: step: 64/463, loss: 0.015530485659837723 2023-01-24 03:23:43.349684: step: 66/463, loss: 0.09985840320587158 2023-01-24 03:23:43.956180: step: 68/463, loss: 0.0362643338739872 2023-01-24 03:23:44.670099: step: 70/463, loss: 0.006426764652132988 2023-01-24 03:23:45.220523: step: 72/463, loss: 0.011015200987458229 2023-01-24 03:23:45.787899: step: 74/463, loss: 0.0035735846031457186 2023-01-24 03:23:46.388102: step: 76/463, loss: 0.04338700696825981 2023-01-24 03:23:46.984788: step: 78/463, loss: 0.0022078927140682936 2023-01-24 03:23:47.594214: step: 80/463, loss: 0.009938274510204792 2023-01-24 03:23:48.151544: step: 82/463, loss: 0.005523581523448229 2023-01-24 03:23:48.769254: step: 84/463, loss: 0.01936269737780094 2023-01-24 03:23:49.462737: step: 86/463, loss: 0.23367302119731903 2023-01-24 03:23:50.041538: step: 88/463, loss: 0.01628846861422062 2023-01-24 03:23:50.598443: step: 90/463, loss: 0.018425000831484795 2023-01-24 03:23:51.225555: step: 92/463, loss: 0.015580820851027966 2023-01-24 03:23:51.842175: step: 94/463, loss: 0.01899946667253971 2023-01-24 03:23:52.384023: step: 96/463, loss: 0.040267203003168106 2023-01-24 03:23:52.992231: step: 98/463, loss: 0.08947502821683884 2023-01-24 03:23:53.650300: step: 100/463, loss: 0.08804885298013687 2023-01-24 03:23:54.249715: step: 102/463, loss: 0.005025687627494335 2023-01-24 03:23:54.866452: step: 104/463, loss: 3.369013575138524e-05 2023-01-24 03:23:55.476297: step: 106/463, loss: 0.5578446388244629 2023-01-24 03:23:56.085629: step: 108/463, loss: 0.0034256945364177227 2023-01-24 03:23:56.708183: step: 110/463, loss: 0.016213616356253624 2023-01-24 03:23:57.330723: step: 112/463, loss: 0.010411268100142479 2023-01-24 03:23:57.969359: step: 114/463, loss: 0.04533207416534424 2023-01-24 03:23:58.603694: step: 116/463, loss: 0.015449193306267262 2023-01-24 03:23:59.322716: step: 118/463, loss: 0.0034042494371533394 2023-01-24 03:23:59.981010: step: 120/463, loss: 0.01815728098154068 2023-01-24 03:24:00.683297: step: 122/463, loss: 0.04109128564596176 2023-01-24 03:24:01.346450: step: 124/463, loss: 0.008021771907806396 2023-01-24 03:24:01.934168: step: 126/463, loss: 0.005465569440275431 2023-01-24 03:24:02.479557: step: 128/463, loss: 0.014786922372877598 2023-01-24 03:24:03.004501: step: 130/463, loss: 0.005800647661089897 2023-01-24 03:24:03.609006: step: 132/463, loss: 0.03609246015548706 2023-01-24 03:24:04.221040: step: 134/463, loss: 0.04947640001773834 2023-01-24 03:24:04.779659: step: 136/463, loss: 0.015908099710941315 2023-01-24 03:24:05.355222: step: 138/463, loss: 0.023923663422465324 2023-01-24 03:24:05.975307: step: 140/463, loss: 0.08421610295772552 2023-01-24 03:24:06.648705: step: 142/463, loss: 0.011792836710810661 2023-01-24 03:24:07.222192: step: 144/463, loss: 0.0509210042655468 2023-01-24 03:24:07.809155: step: 146/463, loss: 0.001469593495130539 2023-01-24 03:24:08.305997: step: 148/463, loss: 0.008890076540410519 2023-01-24 03:24:08.878830: step: 150/463, loss: 0.02359352633357048 2023-01-24 03:24:09.507257: step: 152/463, loss: 0.056291613727808 2023-01-24 03:24:10.059981: step: 154/463, loss: 0.01657475344836712 2023-01-24 03:24:10.668685: step: 156/463, loss: 0.03409228473901749 2023-01-24 03:24:11.289028: step: 158/463, loss: 0.008118068799376488 2023-01-24 03:24:11.826549: step: 160/463, loss: 0.005723034031689167 2023-01-24 03:24:12.451319: step: 162/463, loss: 0.0024168335366994143 2023-01-24 03:24:13.069794: step: 164/463, loss: 0.0012158815516158938 2023-01-24 03:24:13.691985: step: 166/463, loss: 0.013071971014142036 2023-01-24 03:24:14.369969: step: 168/463, loss: 0.0038813031278550625 2023-01-24 03:24:15.007584: step: 170/463, loss: 0.03644895553588867 2023-01-24 03:24:15.616064: step: 172/463, loss: 0.014640239998698235 2023-01-24 03:24:16.194724: step: 174/463, loss: 0.04168757423758507 2023-01-24 03:24:16.829322: step: 176/463, loss: 0.05235612764954567 2023-01-24 03:24:17.470248: step: 178/463, loss: 0.05103658512234688 2023-01-24 03:24:18.102265: step: 180/463, loss: 0.027995172888040543 2023-01-24 03:24:18.695953: step: 182/463, loss: 0.018379710614681244 2023-01-24 03:24:19.382380: step: 184/463, loss: 0.11641687899827957 2023-01-24 03:24:19.973787: step: 186/463, loss: 0.041619788855314255 2023-01-24 03:24:20.580691: step: 188/463, loss: 0.08585845679044724 2023-01-24 03:24:21.218943: step: 190/463, loss: 0.035136476159095764 2023-01-24 03:24:21.780758: step: 192/463, loss: 0.017366521060466766 2023-01-24 03:24:22.367539: step: 194/463, loss: 0.02471463568508625 2023-01-24 03:24:22.965360: step: 196/463, loss: 0.07986804842948914 2023-01-24 03:24:23.504707: step: 198/463, loss: 0.43882715702056885 2023-01-24 03:24:24.133685: step: 200/463, loss: 0.013765200041234493 2023-01-24 03:24:24.784038: step: 202/463, loss: 0.000820895133074373 2023-01-24 03:24:25.375985: step: 204/463, loss: 0.11033257842063904 2023-01-24 03:24:26.020859: step: 206/463, loss: 0.006884297356009483 2023-01-24 03:24:26.693465: step: 208/463, loss: 0.027371415868401527 2023-01-24 03:24:27.247549: step: 210/463, loss: 0.04638984799385071 2023-01-24 03:24:27.850151: step: 212/463, loss: 0.0024904273450374603 2023-01-24 03:24:28.430771: step: 214/463, loss: 0.09669870883226395 2023-01-24 03:24:29.055617: step: 216/463, loss: 0.09323211759328842 2023-01-24 03:24:29.751375: step: 218/463, loss: 0.21737347543239594 2023-01-24 03:24:30.303691: step: 220/463, loss: 0.005320731550455093 2023-01-24 03:24:30.855913: step: 222/463, loss: 0.018331391736865044 2023-01-24 03:24:31.437705: step: 224/463, loss: 0.009497333317995071 2023-01-24 03:24:32.029861: step: 226/463, loss: 0.5109841227531433 2023-01-24 03:24:32.625615: step: 228/463, loss: 0.01999974623322487 2023-01-24 03:24:33.151271: step: 230/463, loss: 0.009726856835186481 2023-01-24 03:24:33.815784: step: 232/463, loss: 0.016373788937926292 2023-01-24 03:24:34.418613: step: 234/463, loss: 0.03392639011144638 2023-01-24 03:24:34.992460: step: 236/463, loss: 0.009816493839025497 2023-01-24 03:24:35.704013: step: 238/463, loss: 0.08238691091537476 2023-01-24 03:24:36.302741: step: 240/463, loss: 0.04414720460772514 2023-01-24 03:24:36.939088: step: 242/463, loss: 0.01298042293637991 2023-01-24 03:24:37.550546: step: 244/463, loss: 0.10446559637784958 2023-01-24 03:24:38.152948: step: 246/463, loss: 0.014225639402866364 2023-01-24 03:24:38.705041: step: 248/463, loss: 0.0007936302572488785 2023-01-24 03:24:39.327116: step: 250/463, loss: 0.015339717268943787 2023-01-24 03:24:39.892054: step: 252/463, loss: 0.040717143565416336 2023-01-24 03:24:40.460895: step: 254/463, loss: 0.009817059151828289 2023-01-24 03:24:41.031883: step: 256/463, loss: 0.018087003380060196 2023-01-24 03:24:41.744148: step: 258/463, loss: 0.004840802401304245 2023-01-24 03:24:42.412140: step: 260/463, loss: 0.02245919406414032 2023-01-24 03:24:43.046334: step: 262/463, loss: 0.019110403954982758 2023-01-24 03:24:43.676370: step: 264/463, loss: 0.004637002944946289 2023-01-24 03:24:44.294876: step: 266/463, loss: 0.03030952252447605 2023-01-24 03:24:44.934144: step: 268/463, loss: 0.0095899673178792 2023-01-24 03:24:45.549779: step: 270/463, loss: 0.0032416393514722586 2023-01-24 03:24:46.190476: step: 272/463, loss: 0.03164970502257347 2023-01-24 03:24:46.798018: step: 274/463, loss: 0.009296455420553684 2023-01-24 03:24:47.444142: step: 276/463, loss: 2.5594210624694824 2023-01-24 03:24:48.087137: step: 278/463, loss: 0.08889950811862946 2023-01-24 03:24:48.667455: step: 280/463, loss: 0.04368525743484497 2023-01-24 03:24:49.287120: step: 282/463, loss: 0.021617107093334198 2023-01-24 03:24:49.867700: step: 284/463, loss: 0.007380649447441101 2023-01-24 03:24:50.481117: step: 286/463, loss: 0.04279787838459015 2023-01-24 03:24:51.192555: step: 288/463, loss: 0.12959609925746918 2023-01-24 03:24:51.727011: step: 290/463, loss: 0.007202128879725933 2023-01-24 03:24:52.306670: step: 292/463, loss: 0.02553531900048256 2023-01-24 03:24:52.913584: step: 294/463, loss: 0.10999813675880432 2023-01-24 03:24:53.565018: step: 296/463, loss: 0.032414354383945465 2023-01-24 03:24:54.169781: step: 298/463, loss: 0.025089194998145103 2023-01-24 03:24:54.761858: step: 300/463, loss: 0.02359832637012005 2023-01-24 03:24:55.352630: step: 302/463, loss: 0.048460591584444046 2023-01-24 03:24:56.032330: step: 304/463, loss: 0.169951930642128 2023-01-24 03:24:56.603709: step: 306/463, loss: 0.06504900753498077 2023-01-24 03:24:57.318664: step: 308/463, loss: 0.013914791867136955 2023-01-24 03:24:57.920250: step: 310/463, loss: 0.017502907663583755 2023-01-24 03:24:58.485337: step: 312/463, loss: 0.024207260459661484 2023-01-24 03:24:59.076949: step: 314/463, loss: 0.06576535850763321 2023-01-24 03:24:59.688149: step: 316/463, loss: 0.04996226727962494 2023-01-24 03:25:00.297406: step: 318/463, loss: 0.0024642583448439837 2023-01-24 03:25:00.930180: step: 320/463, loss: 0.12623420357704163 2023-01-24 03:25:01.544493: step: 322/463, loss: 0.010801497846841812 2023-01-24 03:25:02.121555: step: 324/463, loss: 0.18569742143154144 2023-01-24 03:25:02.733470: step: 326/463, loss: 0.028876885771751404 2023-01-24 03:25:03.328104: step: 328/463, loss: 0.0309474915266037 2023-01-24 03:25:03.969873: step: 330/463, loss: 0.01327966433018446 2023-01-24 03:25:04.672751: step: 332/463, loss: 0.020495671778917313 2023-01-24 03:25:05.253794: step: 334/463, loss: 0.002778882160782814 2023-01-24 03:25:05.781867: step: 336/463, loss: 0.04927878826856613 2023-01-24 03:25:06.414267: step: 338/463, loss: 0.040122393518686295 2023-01-24 03:25:06.994356: step: 340/463, loss: 0.00938417762517929 2023-01-24 03:25:07.621641: step: 342/463, loss: 0.04312020167708397 2023-01-24 03:25:08.267586: step: 344/463, loss: 0.08035336434841156 2023-01-24 03:25:08.832264: step: 346/463, loss: 0.04796095937490463 2023-01-24 03:25:09.420948: step: 348/463, loss: 0.04057975485920906 2023-01-24 03:25:09.996705: step: 350/463, loss: 0.10430752485990524 2023-01-24 03:25:10.564409: step: 352/463, loss: 0.07370288670063019 2023-01-24 03:25:11.272678: step: 354/463, loss: 0.022665100172162056 2023-01-24 03:25:11.861293: step: 356/463, loss: 0.006624340545386076 2023-01-24 03:25:12.503272: step: 358/463, loss: 0.027500586584210396 2023-01-24 03:25:13.089351: step: 360/463, loss: 0.0032346746884286404 2023-01-24 03:25:13.689796: step: 362/463, loss: 0.03572477027773857 2023-01-24 03:25:14.280254: step: 364/463, loss: 0.027865003794431686 2023-01-24 03:25:14.925318: step: 366/463, loss: 0.05051290616393089 2023-01-24 03:25:15.502480: step: 368/463, loss: 0.005792407318949699 2023-01-24 03:25:16.111060: step: 370/463, loss: 0.28915953636169434 2023-01-24 03:25:16.670431: step: 372/463, loss: 0.002484036609530449 2023-01-24 03:25:17.269321: step: 374/463, loss: 0.013241644017398357 2023-01-24 03:25:17.878317: step: 376/463, loss: 0.031484343111515045 2023-01-24 03:25:18.539950: step: 378/463, loss: 0.0009562978520989418 2023-01-24 03:25:19.086291: step: 380/463, loss: 0.04574989899992943 2023-01-24 03:25:19.687324: step: 382/463, loss: 0.030804520472884178 2023-01-24 03:25:20.274957: step: 384/463, loss: 0.021833984181284904 2023-01-24 03:25:20.851335: step: 386/463, loss: 0.009540488943457603 2023-01-24 03:25:21.534656: step: 388/463, loss: 0.00454050162807107 2023-01-24 03:25:22.140278: step: 390/463, loss: 0.011509351432323456 2023-01-24 03:25:22.705512: step: 392/463, loss: 0.0020137266255915165 2023-01-24 03:25:23.381280: step: 394/463, loss: 0.013737120665609837 2023-01-24 03:25:23.958046: step: 396/463, loss: 0.645456850528717 2023-01-24 03:25:24.568913: step: 398/463, loss: 0.03145052492618561 2023-01-24 03:25:25.150334: step: 400/463, loss: 0.003723964560776949 2023-01-24 03:25:25.749459: step: 402/463, loss: 0.020070131868124008 2023-01-24 03:25:26.318774: step: 404/463, loss: 0.029841747134923935 2023-01-24 03:25:26.939814: step: 406/463, loss: 0.017329566180706024 2023-01-24 03:25:27.498355: step: 408/463, loss: 0.03677923604846001 2023-01-24 03:25:28.079115: step: 410/463, loss: 0.04318574070930481 2023-01-24 03:25:28.714590: step: 412/463, loss: 0.296417236328125 2023-01-24 03:25:29.343508: step: 414/463, loss: 0.07130864262580872 2023-01-24 03:25:29.998451: step: 416/463, loss: 0.03519712761044502 2023-01-24 03:25:30.576206: step: 418/463, loss: 0.02720903605222702 2023-01-24 03:25:31.185980: step: 420/463, loss: 0.03017040528357029 2023-01-24 03:25:31.806647: step: 422/463, loss: 0.2713550925254822 2023-01-24 03:25:32.412104: step: 424/463, loss: 6.255537986755371 2023-01-24 03:25:33.090511: step: 426/463, loss: 0.017811153084039688 2023-01-24 03:25:33.618773: step: 428/463, loss: 0.01626940257847309 2023-01-24 03:25:34.162113: step: 430/463, loss: 0.00044318835716694593 2023-01-24 03:25:34.734179: step: 432/463, loss: 0.05547376722097397 2023-01-24 03:25:35.356056: step: 434/463, loss: 0.010593359358608723 2023-01-24 03:25:35.974181: step: 436/463, loss: 0.06989578157663345 2023-01-24 03:25:36.604558: step: 438/463, loss: 0.03806183859705925 2023-01-24 03:25:37.180929: step: 440/463, loss: 0.035895880311727524 2023-01-24 03:25:37.773926: step: 442/463, loss: 0.0033863044809550047 2023-01-24 03:25:38.311657: step: 444/463, loss: 0.005208194255828857 2023-01-24 03:25:38.955767: step: 446/463, loss: 0.005416461266577244 2023-01-24 03:25:39.529338: step: 448/463, loss: 0.041393689811229706 2023-01-24 03:25:40.139874: step: 450/463, loss: 0.023604456335306168 2023-01-24 03:25:40.761447: step: 452/463, loss: 0.010884744115173817 2023-01-24 03:25:41.333475: step: 454/463, loss: 0.004828666802495718 2023-01-24 03:25:41.894412: step: 456/463, loss: 0.050308264791965485 2023-01-24 03:25:42.464426: step: 458/463, loss: 0.01851321943104267 2023-01-24 03:25:43.124728: step: 460/463, loss: 0.06013081967830658 2023-01-24 03:25:43.718396: step: 462/463, loss: 0.03565441817045212 2023-01-24 03:25:44.357024: step: 464/463, loss: 0.019030479714274406 2023-01-24 03:25:44.956185: step: 466/463, loss: 0.025017835199832916 2023-01-24 03:25:45.611825: step: 468/463, loss: 0.0025240499526262283 2023-01-24 03:25:46.236916: step: 470/463, loss: 6.463606357574463 2023-01-24 03:25:46.877648: step: 472/463, loss: 0.021489938721060753 2023-01-24 03:25:47.517284: step: 474/463, loss: 0.036023810505867004 2023-01-24 03:25:48.112132: step: 476/463, loss: 0.01999105140566826 2023-01-24 03:25:48.760205: step: 478/463, loss: 0.10215510427951813 2023-01-24 03:25:49.421483: step: 480/463, loss: 0.023146439343690872 2023-01-24 03:25:49.995981: step: 482/463, loss: 0.029886716976761818 2023-01-24 03:25:50.598679: step: 484/463, loss: 0.03676028922200203 2023-01-24 03:25:51.183082: step: 486/463, loss: 0.004740500822663307 2023-01-24 03:25:51.819643: step: 488/463, loss: 0.00524220010265708 2023-01-24 03:25:52.426949: step: 490/463, loss: 0.013000168837606907 2023-01-24 03:25:52.991764: step: 492/463, loss: 0.08954739570617676 2023-01-24 03:25:53.601899: step: 494/463, loss: 0.004636660683900118 2023-01-24 03:25:54.260569: step: 496/463, loss: 0.28967130184173584 2023-01-24 03:25:54.878088: step: 498/463, loss: 0.05652369558811188 2023-01-24 03:25:55.437095: step: 500/463, loss: 0.020583003759384155 2023-01-24 03:25:56.033596: step: 502/463, loss: 0.049137383699417114 2023-01-24 03:25:56.630742: step: 504/463, loss: 0.007868443615734577 2023-01-24 03:25:57.259034: step: 506/463, loss: 0.03621796518564224 2023-01-24 03:25:57.866680: step: 508/463, loss: 0.015578684397041798 2023-01-24 03:25:58.439369: step: 510/463, loss: 0.08594860881567001 2023-01-24 03:25:59.082651: step: 512/463, loss: 0.02430797927081585 2023-01-24 03:25:59.697804: step: 514/463, loss: 0.004946025088429451 2023-01-24 03:26:00.266999: step: 516/463, loss: 0.006358628161251545 2023-01-24 03:26:00.880125: step: 518/463, loss: 0.0262968260794878 2023-01-24 03:26:01.502235: step: 520/463, loss: 0.11719396710395813 2023-01-24 03:26:02.096389: step: 522/463, loss: 0.013965901918709278 2023-01-24 03:26:02.740909: step: 524/463, loss: 0.01842944510281086 2023-01-24 03:26:03.321158: step: 526/463, loss: 0.019373469054698944 2023-01-24 03:26:03.879425: step: 528/463, loss: 0.007129807490855455 2023-01-24 03:26:04.506303: step: 530/463, loss: 0.03721535950899124 2023-01-24 03:26:05.115863: step: 532/463, loss: 0.018526801839470863 2023-01-24 03:26:05.782534: step: 534/463, loss: 0.034435249865055084 2023-01-24 03:26:06.383984: step: 536/463, loss: 0.005327192135155201 2023-01-24 03:26:07.025038: step: 538/463, loss: 0.09119029343128204 2023-01-24 03:26:07.621404: step: 540/463, loss: 0.01935645192861557 2023-01-24 03:26:08.216023: step: 542/463, loss: 0.023961447179317474 2023-01-24 03:26:08.790034: step: 544/463, loss: 0.013390028849244118 2023-01-24 03:26:09.341155: step: 546/463, loss: 0.007185050286352634 2023-01-24 03:26:09.912444: step: 548/463, loss: 0.006648135371506214 2023-01-24 03:26:10.486163: step: 550/463, loss: 0.005679411347955465 2023-01-24 03:26:11.122118: step: 552/463, loss: 0.01637471467256546 2023-01-24 03:26:11.787000: step: 554/463, loss: 0.1112452819943428 2023-01-24 03:26:12.358850: step: 556/463, loss: 0.00843722838908434 2023-01-24 03:26:12.953355: step: 558/463, loss: 0.06292594969272614 2023-01-24 03:26:13.620588: step: 560/463, loss: 0.17798249423503876 2023-01-24 03:26:14.180181: step: 562/463, loss: 0.0001422189816366881 2023-01-24 03:26:14.803257: step: 564/463, loss: 0.01587064377963543 2023-01-24 03:26:15.446374: step: 566/463, loss: 0.0436808206140995 2023-01-24 03:26:16.042355: step: 568/463, loss: 0.044044796377420425 2023-01-24 03:26:16.653707: step: 570/463, loss: 0.0054512787610292435 2023-01-24 03:26:17.264300: step: 572/463, loss: 0.09136991947889328 2023-01-24 03:26:17.897231: step: 574/463, loss: 0.02294277213513851 2023-01-24 03:26:18.500426: step: 576/463, loss: 0.021406875923275948 2023-01-24 03:26:19.066220: step: 578/463, loss: 0.06447817385196686 2023-01-24 03:26:19.754446: step: 580/463, loss: 0.06923290342092514 2023-01-24 03:26:20.325667: step: 582/463, loss: 0.015940725803375244 2023-01-24 03:26:21.018033: step: 584/463, loss: 0.010096380487084389 2023-01-24 03:26:21.636028: step: 586/463, loss: 1.1466164588928223 2023-01-24 03:26:22.233886: step: 588/463, loss: 0.5154674649238586 2023-01-24 03:26:22.780150: step: 590/463, loss: 16.82015037536621 2023-01-24 03:26:23.404197: step: 592/463, loss: 0.04796361178159714 2023-01-24 03:26:24.076301: step: 594/463, loss: 0.007846719585359097 2023-01-24 03:26:24.678847: step: 596/463, loss: 0.1656419038772583 2023-01-24 03:26:25.289629: step: 598/463, loss: 0.025079844519495964 2023-01-24 03:26:25.867645: step: 600/463, loss: 0.01311549823731184 2023-01-24 03:26:26.457311: step: 602/463, loss: 0.053645290434360504 2023-01-24 03:26:27.036896: step: 604/463, loss: 0.000695643771905452 2023-01-24 03:26:27.667219: step: 606/463, loss: 0.014876767061650753 2023-01-24 03:26:28.199788: step: 608/463, loss: 0.010459434241056442 2023-01-24 03:26:28.814586: step: 610/463, loss: 0.09204825758934021 2023-01-24 03:26:29.465065: step: 612/463, loss: 0.0038285565096884966 2023-01-24 03:26:30.096429: step: 614/463, loss: 0.002252412959933281 2023-01-24 03:26:30.832836: step: 616/463, loss: 0.07031204551458359 2023-01-24 03:26:31.471642: step: 618/463, loss: 0.7204792499542236 2023-01-24 03:26:32.107526: step: 620/463, loss: 0.04958628863096237 2023-01-24 03:26:32.700643: step: 622/463, loss: 0.024105768650770187 2023-01-24 03:26:33.320953: step: 624/463, loss: 0.00809959415346384 2023-01-24 03:26:33.860905: step: 626/463, loss: 0.35124683380126953 2023-01-24 03:26:34.463677: step: 628/463, loss: 0.02890990860760212 2023-01-24 03:26:35.052958: step: 630/463, loss: 0.035276900976896286 2023-01-24 03:26:35.683997: step: 632/463, loss: 0.03078903630375862 2023-01-24 03:26:36.291339: step: 634/463, loss: 0.12095503509044647 2023-01-24 03:26:36.905220: step: 636/463, loss: 0.0423898883163929 2023-01-24 03:26:37.509865: step: 638/463, loss: 0.043148353695869446 2023-01-24 03:26:38.110010: step: 640/463, loss: 0.02438526600599289 2023-01-24 03:26:38.703867: step: 642/463, loss: 0.042559362947940826 2023-01-24 03:26:39.299799: step: 644/463, loss: 0.018337300047278404 2023-01-24 03:26:39.931057: step: 646/463, loss: 0.06092965602874756 2023-01-24 03:26:40.523645: step: 648/463, loss: 0.5253022313117981 2023-01-24 03:26:41.167497: step: 650/463, loss: 0.3532295227050781 2023-01-24 03:26:41.792783: step: 652/463, loss: 0.031348831951618195 2023-01-24 03:26:42.441150: step: 654/463, loss: 0.08186060190200806 2023-01-24 03:26:43.055735: step: 656/463, loss: 0.0018544044578447938 2023-01-24 03:26:43.657109: step: 658/463, loss: 0.03858217969536781 2023-01-24 03:26:44.274524: step: 660/463, loss: 0.032722800970077515 2023-01-24 03:26:44.901547: step: 662/463, loss: 0.036987368017435074 2023-01-24 03:26:45.505962: step: 664/463, loss: 0.03630704805254936 2023-01-24 03:26:46.182005: step: 666/463, loss: 0.0477924682199955 2023-01-24 03:26:46.789668: step: 668/463, loss: 0.01216850895434618 2023-01-24 03:26:47.415695: step: 670/463, loss: 0.024572396650910378 2023-01-24 03:26:48.007460: step: 672/463, loss: 0.010722465813159943 2023-01-24 03:26:48.572498: step: 674/463, loss: 0.17226718366146088 2023-01-24 03:26:49.165295: step: 676/463, loss: 0.0011509679025039077 2023-01-24 03:26:49.759240: step: 678/463, loss: 0.004970579408109188 2023-01-24 03:26:50.446856: step: 680/463, loss: 0.0023158614058047533 2023-01-24 03:26:51.050366: step: 682/463, loss: 0.02270033024251461 2023-01-24 03:26:51.632166: step: 684/463, loss: 0.0005886394064873457 2023-01-24 03:26:52.275720: step: 686/463, loss: 0.01853058859705925 2023-01-24 03:26:52.862583: step: 688/463, loss: 0.010849070735275745 2023-01-24 03:26:53.444442: step: 690/463, loss: 0.10974755883216858 2023-01-24 03:26:54.066302: step: 692/463, loss: 0.21199116110801697 2023-01-24 03:26:54.611408: step: 694/463, loss: 0.012342714704573154 2023-01-24 03:26:55.287824: step: 696/463, loss: 0.026952112093567848 2023-01-24 03:26:55.912587: step: 698/463, loss: 0.06262566894292831 2023-01-24 03:26:56.540182: step: 700/463, loss: 0.05700312554836273 2023-01-24 03:26:57.159746: step: 702/463, loss: 0.029804212972521782 2023-01-24 03:26:57.742817: step: 704/463, loss: 0.0506010502576828 2023-01-24 03:26:58.410168: step: 706/463, loss: 0.030028855428099632 2023-01-24 03:26:58.992426: step: 708/463, loss: 0.007984144613146782 2023-01-24 03:26:59.586449: step: 710/463, loss: 0.03824808448553085 2023-01-24 03:27:00.182063: step: 712/463, loss: 0.010975847020745277 2023-01-24 03:27:00.715004: step: 714/463, loss: 0.0386471264064312 2023-01-24 03:27:01.353531: step: 716/463, loss: 0.03903685137629509 2023-01-24 03:27:02.015584: step: 718/463, loss: 0.004551915917545557 2023-01-24 03:27:02.649752: step: 720/463, loss: 0.003930246457457542 2023-01-24 03:27:03.276876: step: 722/463, loss: 0.014148221351206303 2023-01-24 03:27:03.866439: step: 724/463, loss: 0.001101864967495203 2023-01-24 03:27:04.561834: step: 726/463, loss: 0.446911096572876 2023-01-24 03:27:05.132956: step: 728/463, loss: 0.013020098209381104 2023-01-24 03:27:05.748047: step: 730/463, loss: 0.01923557184636593 2023-01-24 03:27:06.384622: step: 732/463, loss: 0.016209620982408524 2023-01-24 03:27:07.021125: step: 734/463, loss: 0.017057260498404503 2023-01-24 03:27:07.683348: step: 736/463, loss: 0.1502707153558731 2023-01-24 03:27:08.346657: step: 738/463, loss: 0.03162705898284912 2023-01-24 03:27:08.949324: step: 740/463, loss: 0.012588385492563248 2023-01-24 03:27:09.588430: step: 742/463, loss: 0.05841897800564766 2023-01-24 03:27:10.147063: step: 744/463, loss: 0.0017107819439843297 2023-01-24 03:27:10.765832: step: 746/463, loss: 0.006520779337733984 2023-01-24 03:27:11.349459: step: 748/463, loss: 0.013491153717041016 2023-01-24 03:27:12.063597: step: 750/463, loss: 0.025427157059311867 2023-01-24 03:27:12.634270: step: 752/463, loss: 0.10344435274600983 2023-01-24 03:27:13.229820: step: 754/463, loss: 0.01465842965990305 2023-01-24 03:27:13.789760: step: 756/463, loss: 0.07367914170026779 2023-01-24 03:27:14.537771: step: 758/463, loss: 0.015110792592167854 2023-01-24 03:27:15.109705: step: 760/463, loss: 0.034766677767038345 2023-01-24 03:27:15.710150: step: 762/463, loss: 0.008609377779066563 2023-01-24 03:27:16.351584: step: 764/463, loss: 0.045852869749069214 2023-01-24 03:27:17.031062: step: 766/463, loss: 0.04151460528373718 2023-01-24 03:27:17.615783: step: 768/463, loss: 0.05072501674294472 2023-01-24 03:27:18.199561: step: 770/463, loss: 0.004216559696942568 2023-01-24 03:27:18.813714: step: 772/463, loss: 0.0033248234540224075 2023-01-24 03:27:19.460596: step: 774/463, loss: 0.027098001912236214 2023-01-24 03:27:20.060852: step: 776/463, loss: 0.04944375157356262 2023-01-24 03:27:20.698008: step: 778/463, loss: 0.39537110924720764 2023-01-24 03:27:21.283247: step: 780/463, loss: 0.11121958494186401 2023-01-24 03:27:21.900298: step: 782/463, loss: 0.01810850389301777 2023-01-24 03:27:22.460986: step: 784/463, loss: 0.02119883894920349 2023-01-24 03:27:23.295576: step: 786/463, loss: 0.232221320271492 2023-01-24 03:27:23.922167: step: 788/463, loss: 0.0020758602768182755 2023-01-24 03:27:24.617507: step: 790/463, loss: 0.009609261527657509 2023-01-24 03:27:25.336739: step: 792/463, loss: 0.024599405005574226 2023-01-24 03:27:25.981826: step: 794/463, loss: 0.01838378794491291 2023-01-24 03:27:26.600403: step: 796/463, loss: 0.06255815923213959 2023-01-24 03:27:27.205545: step: 798/463, loss: 0.053655222058296204 2023-01-24 03:27:27.828777: step: 800/463, loss: 0.03710925951600075 2023-01-24 03:27:28.426356: step: 802/463, loss: 0.005704346112906933 2023-01-24 03:27:29.065620: step: 804/463, loss: 0.009649454616010189 2023-01-24 03:27:29.644387: step: 806/463, loss: 0.0379645898938179 2023-01-24 03:27:30.204153: step: 808/463, loss: 0.06302111595869064 2023-01-24 03:27:30.814966: step: 810/463, loss: 0.007442212197929621 2023-01-24 03:27:31.469917: step: 812/463, loss: 0.04202255234122276 2023-01-24 03:27:32.144534: step: 814/463, loss: 0.41644036769866943 2023-01-24 03:27:32.753046: step: 816/463, loss: 0.043693166226148605 2023-01-24 03:27:33.377789: step: 818/463, loss: 0.0018782716942951083 2023-01-24 03:27:33.948900: step: 820/463, loss: 0.10154888033866882 2023-01-24 03:27:34.536460: step: 822/463, loss: 0.042115092277526855 2023-01-24 03:27:35.143528: step: 824/463, loss: 0.05075232312083244 2023-01-24 03:27:35.746687: step: 826/463, loss: 0.042812563478946686 2023-01-24 03:27:36.339005: step: 828/463, loss: 0.049556806683540344 2023-01-24 03:27:36.953756: step: 830/463, loss: 0.030318889766931534 2023-01-24 03:27:37.504295: step: 832/463, loss: 0.019143136218190193 2023-01-24 03:27:38.137423: step: 834/463, loss: 0.4883730113506317 2023-01-24 03:27:38.799115: step: 836/463, loss: 0.08980897814035416 2023-01-24 03:27:39.386557: step: 838/463, loss: 0.012964260764420033 2023-01-24 03:27:39.978944: step: 840/463, loss: 0.0057403347454965115 2023-01-24 03:27:40.668112: step: 842/463, loss: 0.03701893612742424 2023-01-24 03:27:41.264417: step: 844/463, loss: 0.011903492733836174 2023-01-24 03:27:41.844806: step: 846/463, loss: 0.0030617204029113054 2023-01-24 03:27:42.447573: step: 848/463, loss: 0.006507575511932373 2023-01-24 03:27:43.129637: step: 850/463, loss: 0.020978689193725586 2023-01-24 03:27:43.783643: step: 852/463, loss: 0.03329475224018097 2023-01-24 03:27:44.385291: step: 854/463, loss: 0.030348477885127068 2023-01-24 03:27:44.958367: step: 856/463, loss: 0.06927890330553055 2023-01-24 03:27:45.626319: step: 858/463, loss: 0.010020852088928223 2023-01-24 03:27:46.186691: step: 860/463, loss: 0.0009983470663428307 2023-01-24 03:27:46.869133: step: 862/463, loss: 0.2566572427749634 2023-01-24 03:27:47.426199: step: 864/463, loss: 0.0012166746892035007 2023-01-24 03:27:47.985045: step: 866/463, loss: 0.006107382941991091 2023-01-24 03:27:48.573530: step: 868/463, loss: 0.01768423244357109 2023-01-24 03:27:49.200525: step: 870/463, loss: 0.14530028402805328 2023-01-24 03:27:49.841842: step: 872/463, loss: 0.01466343179345131 2023-01-24 03:27:50.485223: step: 874/463, loss: 0.020448187366127968 2023-01-24 03:27:51.088271: step: 876/463, loss: 0.027431361377239227 2023-01-24 03:27:51.710913: step: 878/463, loss: 0.0190940760076046 2023-01-24 03:27:52.327080: step: 880/463, loss: 0.012967190705239773 2023-01-24 03:27:52.896498: step: 882/463, loss: 0.007806350942701101 2023-01-24 03:27:53.489627: step: 884/463, loss: 0.5386117696762085 2023-01-24 03:27:54.102819: step: 886/463, loss: 0.03715198487043381 2023-01-24 03:27:54.719638: step: 888/463, loss: 0.004462994169443846 2023-01-24 03:27:55.381598: step: 890/463, loss: 0.02491978369653225 2023-01-24 03:27:56.000107: step: 892/463, loss: 0.0040002549067139626 2023-01-24 03:27:56.615396: step: 894/463, loss: 0.0040604048408567905 2023-01-24 03:27:57.194347: step: 896/463, loss: 0.01509823091328144 2023-01-24 03:27:57.803667: step: 898/463, loss: 0.0037978440523147583 2023-01-24 03:27:58.442899: step: 900/463, loss: 0.0030037271790206432 2023-01-24 03:27:59.068084: step: 902/463, loss: 0.022114848718047142 2023-01-24 03:27:59.660965: step: 904/463, loss: 0.014554711058735847 2023-01-24 03:28:00.270993: step: 906/463, loss: 0.0024699338246136904 2023-01-24 03:28:00.928366: step: 908/463, loss: 0.07799001038074493 2023-01-24 03:28:01.597874: step: 910/463, loss: 0.026134997606277466 2023-01-24 03:28:02.200568: step: 912/463, loss: 0.009175576269626617 2023-01-24 03:28:02.834512: step: 914/463, loss: 0.019600925967097282 2023-01-24 03:28:03.469635: step: 916/463, loss: 0.06958712637424469 2023-01-24 03:28:04.084664: step: 918/463, loss: 0.09874261915683746 2023-01-24 03:28:04.690728: step: 920/463, loss: 0.892711341381073 2023-01-24 03:28:05.282595: step: 922/463, loss: 0.07872650772333145 2023-01-24 03:28:05.898404: step: 924/463, loss: 0.002478182315826416 2023-01-24 03:28:06.490675: step: 926/463, loss: 0.00986677035689354 ================================================== Loss: 0.124 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3283547794117647, 'r': 0.323993330728876, 'f1': 0.3261594752514186}, 'combined': 0.2403280343957821, 'epoch': 26} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3654660295111126, 'r': 0.3118898576456092, 'f1': 0.3365591119226631}, 'combined': 0.23677525461895896, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31964596430175124, 'r': 0.31782634780667485, 'f1': 0.3187335590753904}, 'combined': 0.23485630668712976, 'epoch': 26} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36732232010925536, 'r': 0.3112303427801806, 'f1': 0.33695793370438065}, 'combined': 0.23924013293011026, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34523890378380306, 'r': 0.332791960383628, 'f1': 0.33890118477714387}, 'combined': 0.24971666246736915, 'epoch': 26} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38525697111644214, 'r': 0.30820557689315375, 'f1': 0.34245064099239303}, 'combined': 0.24313995510459904, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2421171171171171, 'r': 0.25595238095238093, 'f1': 0.2488425925925926}, 'combined': 0.16589506172839505, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.30833333333333335, 'r': 0.40217391304347827, 'f1': 0.34905660377358494}, 'combined': 0.17452830188679247, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4791666666666667, 'r': 0.19827586206896552, 'f1': 0.28048780487804875}, 'combined': 0.18699186991869915, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:30:37.942489: step: 2/463, loss: 0.10810081660747528 2023-01-24 03:30:38.572217: step: 4/463, loss: 0.7005396485328674 2023-01-24 03:30:39.214246: step: 6/463, loss: 0.026744473725557327 2023-01-24 03:30:39.785695: step: 8/463, loss: 0.02255084551870823 2023-01-24 03:30:40.399700: step: 10/463, loss: 0.011988055892288685 2023-01-24 03:30:41.092224: step: 12/463, loss: 0.019548138603568077 2023-01-24 03:30:41.598624: step: 14/463, loss: 0.016927901655435562 2023-01-24 03:30:42.197082: step: 16/463, loss: 0.010045030154287815 2023-01-24 03:30:42.745833: step: 18/463, loss: 0.006517099216580391 2023-01-24 03:30:43.363413: step: 20/463, loss: 0.0038709358777850866 2023-01-24 03:30:43.971406: step: 22/463, loss: 0.0017009980510920286 2023-01-24 03:30:44.541415: step: 24/463, loss: 0.016784492880105972 2023-01-24 03:30:45.127410: step: 26/463, loss: 0.0015390602638944983 2023-01-24 03:30:45.750536: step: 28/463, loss: 0.045285359025001526 2023-01-24 03:30:46.416492: step: 30/463, loss: 0.011568275280296803 2023-01-24 03:30:47.002509: step: 32/463, loss: 0.014782729558646679 2023-01-24 03:30:47.668245: step: 34/463, loss: 0.018074406310915947 2023-01-24 03:30:48.235788: step: 36/463, loss: 0.17143841087818146 2023-01-24 03:30:48.795858: step: 38/463, loss: 0.006251054350286722 2023-01-24 03:30:49.371074: step: 40/463, loss: 0.023763319477438927 2023-01-24 03:30:49.932008: step: 42/463, loss: 0.008955862373113632 2023-01-24 03:30:50.547400: step: 44/463, loss: 0.0025618700310587883 2023-01-24 03:30:51.161652: step: 46/463, loss: 0.009447536431252956 2023-01-24 03:30:51.738847: step: 48/463, loss: 0.007507863454520702 2023-01-24 03:30:52.318983: step: 50/463, loss: 0.0015535459388047457 2023-01-24 03:30:52.985297: step: 52/463, loss: 0.0035795781295746565 2023-01-24 03:30:53.586714: step: 54/463, loss: 0.022123830392956734 2023-01-24 03:30:54.149659: step: 56/463, loss: 0.0058710952289402485 2023-01-24 03:30:54.653048: step: 58/463, loss: 0.016280047595500946 2023-01-24 03:30:55.197188: step: 60/463, loss: 0.015398838557302952 2023-01-24 03:30:55.942130: step: 62/463, loss: 0.0008789595449343324 2023-01-24 03:30:56.562968: step: 64/463, loss: 0.014697149395942688 2023-01-24 03:30:57.120044: step: 66/463, loss: 0.0013884829822927713 2023-01-24 03:30:57.748712: step: 68/463, loss: 0.012525445781648159 2023-01-24 03:30:58.317014: step: 70/463, loss: 0.046970050781965256 2023-01-24 03:30:58.897609: step: 72/463, loss: 0.03554767742753029 2023-01-24 03:30:59.541252: step: 74/463, loss: 0.09572996199131012 2023-01-24 03:31:00.231948: step: 76/463, loss: 0.05901506543159485 2023-01-24 03:31:00.909413: step: 78/463, loss: 0.08767624199390411 2023-01-24 03:31:01.526234: step: 80/463, loss: 0.017257459461688995 2023-01-24 03:31:02.180685: step: 82/463, loss: 0.058720264583826065 2023-01-24 03:31:02.857351: step: 84/463, loss: 0.03411078825592995 2023-01-24 03:31:03.481030: step: 86/463, loss: 0.0692259669303894 2023-01-24 03:31:04.060711: step: 88/463, loss: 0.05717358738183975 2023-01-24 03:31:04.687585: step: 90/463, loss: 0.016284804791212082 2023-01-24 03:31:05.281022: step: 92/463, loss: 0.0006750877946615219 2023-01-24 03:31:05.962194: step: 94/463, loss: 0.14631569385528564 2023-01-24 03:31:06.513230: step: 96/463, loss: 0.013286602683365345 2023-01-24 03:31:07.142523: step: 98/463, loss: 0.04678715020418167 2023-01-24 03:31:07.759256: step: 100/463, loss: 0.02401166781783104 2023-01-24 03:31:08.369928: step: 102/463, loss: 0.012389871291816235 2023-01-24 03:31:08.960645: step: 104/463, loss: 0.002644594991579652 2023-01-24 03:31:09.506026: step: 106/463, loss: 0.019223475828766823 2023-01-24 03:31:10.053405: step: 108/463, loss: 0.003031231928616762 2023-01-24 03:31:10.636078: step: 110/463, loss: 0.09378163516521454 2023-01-24 03:31:11.210189: step: 112/463, loss: 0.008066393435001373 2023-01-24 03:31:11.819105: step: 114/463, loss: 3.0499621061608195e-05 2023-01-24 03:31:12.453169: step: 116/463, loss: 0.022303147241473198 2023-01-24 03:31:13.115082: step: 118/463, loss: 0.013993948698043823 2023-01-24 03:31:13.698595: step: 120/463, loss: 0.017989473417401314 2023-01-24 03:31:14.257609: step: 122/463, loss: 0.031204361468553543 2023-01-24 03:31:14.872463: step: 124/463, loss: 0.02864096127450466 2023-01-24 03:31:15.468825: step: 126/463, loss: 0.045707523822784424 2023-01-24 03:31:16.050057: step: 128/463, loss: 0.11493198573589325 2023-01-24 03:31:16.721244: step: 130/463, loss: 0.03350294008851051 2023-01-24 03:31:17.306140: step: 132/463, loss: 0.03603452071547508 2023-01-24 03:31:17.923699: step: 134/463, loss: 0.0008437812211923301 2023-01-24 03:31:18.560586: step: 136/463, loss: 0.013291933573782444 2023-01-24 03:31:19.137841: step: 138/463, loss: 0.008090871386229992 2023-01-24 03:31:19.719060: step: 140/463, loss: 0.0006242513773031533 2023-01-24 03:31:20.297434: step: 142/463, loss: 0.003253805683925748 2023-01-24 03:31:20.905615: step: 144/463, loss: 0.32207363843917847 2023-01-24 03:31:21.612364: step: 146/463, loss: 0.01873563975095749 2023-01-24 03:31:22.229492: step: 148/463, loss: 0.005548995919525623 2023-01-24 03:31:22.763529: step: 150/463, loss: 0.005681161303073168 2023-01-24 03:31:23.373620: step: 152/463, loss: 0.019787469878792763 2023-01-24 03:31:23.930526: step: 154/463, loss: 0.02212131768465042 2023-01-24 03:31:24.526626: step: 156/463, loss: 0.1430254578590393 2023-01-24 03:31:25.111186: step: 158/463, loss: 0.01711195707321167 2023-01-24 03:31:25.720451: step: 160/463, loss: 0.01951378956437111 2023-01-24 03:31:26.277033: step: 162/463, loss: 0.9124522805213928 2023-01-24 03:31:26.909373: step: 164/463, loss: 0.037225160747766495 2023-01-24 03:31:27.515444: step: 166/463, loss: 0.046085603535175323 2023-01-24 03:31:28.069914: step: 168/463, loss: 0.007119439542293549 2023-01-24 03:31:28.830245: step: 170/463, loss: 0.005905916914343834 2023-01-24 03:31:29.379688: step: 172/463, loss: 0.013775191269814968 2023-01-24 03:31:30.019882: step: 174/463, loss: 0.025137480348348618 2023-01-24 03:31:30.631851: step: 176/463, loss: 0.034491345286369324 2023-01-24 03:31:31.201776: step: 178/463, loss: 0.004135912284255028 2023-01-24 03:31:31.790300: step: 180/463, loss: 0.09544871747493744 2023-01-24 03:31:32.368839: step: 182/463, loss: 0.06452227383852005 2023-01-24 03:31:32.928141: step: 184/463, loss: 0.029999999329447746 2023-01-24 03:31:33.526741: step: 186/463, loss: 0.028755823150277138 2023-01-24 03:31:34.128549: step: 188/463, loss: 0.19333359599113464 2023-01-24 03:31:34.704737: step: 190/463, loss: 0.010623103007674217 2023-01-24 03:31:35.286035: step: 192/463, loss: 0.020895952358841896 2023-01-24 03:31:35.922720: step: 194/463, loss: 0.014245104975998402 2023-01-24 03:31:36.530639: step: 196/463, loss: 0.061241455376148224 2023-01-24 03:31:37.162846: step: 198/463, loss: 0.8357118964195251 2023-01-24 03:31:37.813746: step: 200/463, loss: 0.012351348996162415 2023-01-24 03:31:38.428681: step: 202/463, loss: 0.0004974502953700721 2023-01-24 03:31:39.091064: step: 204/463, loss: 0.04945747181773186 2023-01-24 03:31:39.768143: step: 206/463, loss: 0.018848076462745667 2023-01-24 03:31:40.435559: step: 208/463, loss: 0.025442900136113167 2023-01-24 03:31:41.058785: step: 210/463, loss: 0.007649167440831661 2023-01-24 03:31:41.581473: step: 212/463, loss: 0.08098553121089935 2023-01-24 03:31:42.123297: step: 214/463, loss: 0.0014854449545964599 2023-01-24 03:31:42.732892: step: 216/463, loss: 0.08141910284757614 2023-01-24 03:31:43.311331: step: 218/463, loss: 0.014644263312220573 2023-01-24 03:31:43.896010: step: 220/463, loss: 0.0175805501639843 2023-01-24 03:31:44.536423: step: 222/463, loss: 0.01827835477888584 2023-01-24 03:31:45.116324: step: 224/463, loss: 0.014490031637251377 2023-01-24 03:31:45.782795: step: 226/463, loss: 0.005981156136840582 2023-01-24 03:31:46.435484: step: 228/463, loss: 0.017368996515870094 2023-01-24 03:31:47.014324: step: 230/463, loss: 0.11148547381162643 2023-01-24 03:31:47.623207: step: 232/463, loss: 0.00483257882297039 2023-01-24 03:31:48.180188: step: 234/463, loss: 0.02425764501094818 2023-01-24 03:31:48.732522: step: 236/463, loss: 0.00909570325165987 2023-01-24 03:31:49.363723: step: 238/463, loss: 0.035875167697668076 2023-01-24 03:31:49.964151: step: 240/463, loss: 0.05804046243429184 2023-01-24 03:31:50.561515: step: 242/463, loss: 0.019320111721754074 2023-01-24 03:31:51.169846: step: 244/463, loss: 0.8347344398498535 2023-01-24 03:31:51.769489: step: 246/463, loss: 0.007950733415782452 2023-01-24 03:31:52.319041: step: 248/463, loss: 0.015348824672400951 2023-01-24 03:31:52.966759: step: 250/463, loss: 0.053146492689847946 2023-01-24 03:31:53.589174: step: 252/463, loss: 0.26163047552108765 2023-01-24 03:31:54.164651: step: 254/463, loss: 0.14615389704704285 2023-01-24 03:31:54.731171: step: 256/463, loss: 0.0035487122368067503 2023-01-24 03:31:55.343664: step: 258/463, loss: 0.10561247169971466 2023-01-24 03:31:55.928949: step: 260/463, loss: 0.3660181164741516 2023-01-24 03:31:56.543764: step: 262/463, loss: 0.4390641748905182 2023-01-24 03:31:57.165142: step: 264/463, loss: 0.05702337622642517 2023-01-24 03:31:57.885350: step: 266/463, loss: 0.013443867675960064 2023-01-24 03:31:58.478253: step: 268/463, loss: 0.0829615443944931 2023-01-24 03:31:59.079121: step: 270/463, loss: 0.002018239814788103 2023-01-24 03:31:59.697165: step: 272/463, loss: 0.021229976788163185 2023-01-24 03:32:00.347617: step: 274/463, loss: 0.018657097592949867 2023-01-24 03:32:00.955098: step: 276/463, loss: 0.10127482563257217 2023-01-24 03:32:01.532623: step: 278/463, loss: 0.0008183062891475856 2023-01-24 03:32:02.106818: step: 280/463, loss: 1.054456353187561 2023-01-24 03:32:02.703418: step: 282/463, loss: 0.028098413720726967 2023-01-24 03:32:03.362490: step: 284/463, loss: 0.03384342044591904 2023-01-24 03:32:03.966358: step: 286/463, loss: 0.043100256472826004 2023-01-24 03:32:04.529554: step: 288/463, loss: 0.001659592380747199 2023-01-24 03:32:05.120811: step: 290/463, loss: 0.0021801237016916275 2023-01-24 03:32:05.704525: step: 292/463, loss: 0.012636505998671055 2023-01-24 03:32:06.395144: step: 294/463, loss: 0.015940986573696136 2023-01-24 03:32:07.013177: step: 296/463, loss: 2.171403408050537 2023-01-24 03:32:07.538161: step: 298/463, loss: 0.0048016756772994995 2023-01-24 03:32:08.170392: step: 300/463, loss: 0.027753757312893867 2023-01-24 03:32:08.817567: step: 302/463, loss: 0.06045348942279816 2023-01-24 03:32:09.471632: step: 304/463, loss: 0.06265581399202347 2023-01-24 03:32:10.120840: step: 306/463, loss: 0.05294894427061081 2023-01-24 03:32:10.749114: step: 308/463, loss: 0.004108102060854435 2023-01-24 03:32:11.335115: step: 310/463, loss: 0.023528404533863068 2023-01-24 03:32:11.903729: step: 312/463, loss: 0.06614720076322556 2023-01-24 03:32:12.587328: step: 314/463, loss: 0.059751853346824646 2023-01-24 03:32:13.154050: step: 316/463, loss: 0.008474999107420444 2023-01-24 03:32:13.801591: step: 318/463, loss: 0.03981170058250427 2023-01-24 03:32:14.461405: step: 320/463, loss: 0.033365726470947266 2023-01-24 03:32:15.060597: step: 322/463, loss: 0.0185982845723629 2023-01-24 03:32:15.658451: step: 324/463, loss: 0.05968184769153595 2023-01-24 03:32:16.224814: step: 326/463, loss: 0.0076089827343821526 2023-01-24 03:32:16.821039: step: 328/463, loss: 0.0015208962140604854 2023-01-24 03:32:17.395368: step: 330/463, loss: 0.009716600179672241 2023-01-24 03:32:18.021228: step: 332/463, loss: 0.0012459717690944672 2023-01-24 03:32:18.560686: step: 334/463, loss: 0.055479153990745544 2023-01-24 03:32:19.231778: step: 336/463, loss: 0.007042493671178818 2023-01-24 03:32:19.804683: step: 338/463, loss: 0.040745802223682404 2023-01-24 03:32:20.413853: step: 340/463, loss: 0.001430749543942511 2023-01-24 03:32:21.041648: step: 342/463, loss: 0.04804610460996628 2023-01-24 03:32:21.638770: step: 344/463, loss: 0.02269960753619671 2023-01-24 03:32:22.222680: step: 346/463, loss: 0.008948476985096931 2023-01-24 03:32:22.761391: step: 348/463, loss: 0.025630393996834755 2023-01-24 03:32:23.343563: step: 350/463, loss: 0.009737594984471798 2023-01-24 03:32:23.985458: step: 352/463, loss: 0.06916005909442902 2023-01-24 03:32:24.576674: step: 354/463, loss: 0.011646032333374023 2023-01-24 03:32:25.155771: step: 356/463, loss: 0.06140728294849396 2023-01-24 03:32:25.756193: step: 358/463, loss: 0.0034357132390141487 2023-01-24 03:32:26.354155: step: 360/463, loss: 0.0025694253854453564 2023-01-24 03:32:26.947986: step: 362/463, loss: 0.0033465770538896322 2023-01-24 03:32:27.633267: step: 364/463, loss: 0.0182969868183136 2023-01-24 03:32:28.362304: step: 366/463, loss: 0.3954954743385315 2023-01-24 03:32:28.970610: step: 368/463, loss: 0.10175547748804092 2023-01-24 03:32:29.554454: step: 370/463, loss: 0.697431206703186 2023-01-24 03:32:30.182113: step: 372/463, loss: 0.03084360808134079 2023-01-24 03:32:30.881935: step: 374/463, loss: 0.007076824549585581 2023-01-24 03:32:31.498741: step: 376/463, loss: 0.01868254691362381 2023-01-24 03:32:32.063282: step: 378/463, loss: 0.022236432880163193 2023-01-24 03:32:32.610456: step: 380/463, loss: 0.020475313067436218 2023-01-24 03:32:33.199991: step: 382/463, loss: 0.006963799707591534 2023-01-24 03:32:33.781900: step: 384/463, loss: 0.11570148915052414 2023-01-24 03:32:34.384364: step: 386/463, loss: 0.04060349613428116 2023-01-24 03:32:35.005845: step: 388/463, loss: 0.037258777767419815 2023-01-24 03:32:35.657143: step: 390/463, loss: 0.02341860719025135 2023-01-24 03:32:36.254296: step: 392/463, loss: 0.0038023367524147034 2023-01-24 03:32:36.930673: step: 394/463, loss: 0.03924330696463585 2023-01-24 03:32:37.557239: step: 396/463, loss: 0.05705631151795387 2023-01-24 03:32:38.113539: step: 398/463, loss: 0.0667424350976944 2023-01-24 03:32:38.766133: step: 400/463, loss: 0.021884499117732048 2023-01-24 03:32:39.367002: step: 402/463, loss: 0.22581720352172852 2023-01-24 03:32:39.935767: step: 404/463, loss: 0.008024967275559902 2023-01-24 03:32:40.522408: step: 406/463, loss: 0.0023133379872888327 2023-01-24 03:32:41.125807: step: 408/463, loss: 0.0016851628897711635 2023-01-24 03:32:41.749846: step: 410/463, loss: 0.002136522438377142 2023-01-24 03:32:42.366877: step: 412/463, loss: 0.025797491893172264 2023-01-24 03:32:42.913705: step: 414/463, loss: 0.003049284452572465 2023-01-24 03:32:43.544951: step: 416/463, loss: 0.0210671778768301 2023-01-24 03:32:44.158135: step: 418/463, loss: 0.009987413883209229 2023-01-24 03:32:44.853599: step: 420/463, loss: 0.0023306147195398808 2023-01-24 03:32:45.438648: step: 422/463, loss: 0.02188442088663578 2023-01-24 03:32:46.039603: step: 424/463, loss: 0.009603126905858517 2023-01-24 03:32:46.638109: step: 426/463, loss: 0.07842833548784256 2023-01-24 03:32:47.294805: step: 428/463, loss: 0.030183803290128708 2023-01-24 03:32:47.897257: step: 430/463, loss: 0.003511171555146575 2023-01-24 03:32:48.525277: step: 432/463, loss: 0.00016248953761532903 2023-01-24 03:32:49.142744: step: 434/463, loss: 0.044615406543016434 2023-01-24 03:32:49.754278: step: 436/463, loss: 0.12006062269210815 2023-01-24 03:32:50.377816: step: 438/463, loss: 0.01151471957564354 2023-01-24 03:32:51.004925: step: 440/463, loss: 0.014761421829462051 2023-01-24 03:32:51.642049: step: 442/463, loss: 0.024571551010012627 2023-01-24 03:32:52.295754: step: 444/463, loss: 0.05245470628142357 2023-01-24 03:32:52.911949: step: 446/463, loss: 0.017844708636403084 2023-01-24 03:32:53.566944: step: 448/463, loss: 0.02095145918428898 2023-01-24 03:32:54.179096: step: 450/463, loss: 0.010195751674473286 2023-01-24 03:32:54.778067: step: 452/463, loss: 0.0019804544281214476 2023-01-24 03:32:55.327960: step: 454/463, loss: 0.04100055247545242 2023-01-24 03:32:55.898162: step: 456/463, loss: 0.02135274186730385 2023-01-24 03:32:56.527798: step: 458/463, loss: 0.01571192592382431 2023-01-24 03:32:57.112706: step: 460/463, loss: 0.016840903088450432 2023-01-24 03:32:57.633514: step: 462/463, loss: 0.011004820466041565 2023-01-24 03:32:58.197058: step: 464/463, loss: 0.030369063839316368 2023-01-24 03:32:58.782953: step: 466/463, loss: 0.0020900126546621323 2023-01-24 03:32:59.401789: step: 468/463, loss: 0.14339910447597504 2023-01-24 03:32:59.953483: step: 470/463, loss: 0.02705247886478901 2023-01-24 03:33:00.555634: step: 472/463, loss: 0.08540941774845123 2023-01-24 03:33:01.144377: step: 474/463, loss: 0.005210475064814091 2023-01-24 03:33:01.819303: step: 476/463, loss: 0.04303750768303871 2023-01-24 03:33:02.403710: step: 478/463, loss: 0.008578203618526459 2023-01-24 03:33:03.014464: step: 480/463, loss: 0.021204667165875435 2023-01-24 03:33:03.553037: step: 482/463, loss: 0.013729634694755077 2023-01-24 03:33:04.120018: step: 484/463, loss: 0.4580730199813843 2023-01-24 03:33:04.707821: step: 486/463, loss: 0.16728097200393677 2023-01-24 03:33:05.304649: step: 488/463, loss: 0.007054818328469992 2023-01-24 03:33:05.952448: step: 490/463, loss: 0.0013441347982734442 2023-01-24 03:33:06.576070: step: 492/463, loss: 0.031044242903590202 2023-01-24 03:33:07.158275: step: 494/463, loss: 0.0023791478015482426 2023-01-24 03:33:07.871385: step: 496/463, loss: 0.27703148126602173 2023-01-24 03:33:08.456173: step: 498/463, loss: 0.026965584605932236 2023-01-24 03:33:09.088346: step: 500/463, loss: 0.14814478158950806 2023-01-24 03:33:09.655468: step: 502/463, loss: 0.019771523773670197 2023-01-24 03:33:10.181632: step: 504/463, loss: 0.0008140561985783279 2023-01-24 03:33:10.779679: step: 506/463, loss: 0.12422594428062439 2023-01-24 03:33:11.415911: step: 508/463, loss: 0.04691915214061737 2023-01-24 03:33:12.020910: step: 510/463, loss: 0.020429035648703575 2023-01-24 03:33:12.640784: step: 512/463, loss: 0.008157152682542801 2023-01-24 03:33:13.304895: step: 514/463, loss: 0.029862575232982635 2023-01-24 03:33:13.945952: step: 516/463, loss: 0.1043967753648758 2023-01-24 03:33:14.562908: step: 518/463, loss: 0.03785110265016556 2023-01-24 03:33:15.121799: step: 520/463, loss: 0.01049832534044981 2023-01-24 03:33:15.789364: step: 522/463, loss: 0.050049059092998505 2023-01-24 03:33:16.382493: step: 524/463, loss: 0.044682711362838745 2023-01-24 03:33:16.960284: step: 526/463, loss: 0.011775128543376923 2023-01-24 03:33:17.527377: step: 528/463, loss: 0.028339875862002373 2023-01-24 03:33:18.128409: step: 530/463, loss: 0.05144622549414635 2023-01-24 03:33:18.741098: step: 532/463, loss: 0.02373473159968853 2023-01-24 03:33:19.461161: step: 534/463, loss: 0.1648435890674591 2023-01-24 03:33:20.074031: step: 536/463, loss: 0.028965381905436516 2023-01-24 03:33:20.698683: step: 538/463, loss: 0.014665551483631134 2023-01-24 03:33:21.322718: step: 540/463, loss: 0.02409207820892334 2023-01-24 03:33:21.945971: step: 542/463, loss: 0.011589819565415382 2023-01-24 03:33:22.588831: step: 544/463, loss: 0.004896755330264568 2023-01-24 03:33:23.192234: step: 546/463, loss: 0.028788888826966286 2023-01-24 03:33:23.854484: step: 548/463, loss: 0.01309922430664301 2023-01-24 03:33:24.531686: step: 550/463, loss: 0.09015355259180069 2023-01-24 03:33:25.179582: step: 552/463, loss: 0.002236754633486271 2023-01-24 03:33:25.866313: step: 554/463, loss: 0.026113120838999748 2023-01-24 03:33:26.479442: step: 556/463, loss: 0.0016027316451072693 2023-01-24 03:33:27.018927: step: 558/463, loss: 0.004909296985715628 2023-01-24 03:33:27.636284: step: 560/463, loss: 0.007274754345417023 2023-01-24 03:33:28.203193: step: 562/463, loss: 0.07633068412542343 2023-01-24 03:33:28.868632: step: 564/463, loss: 0.005412163212895393 2023-01-24 03:33:29.444616: step: 566/463, loss: 0.08457167446613312 2023-01-24 03:33:30.028472: step: 568/463, loss: 0.024759851396083832 2023-01-24 03:33:30.695176: step: 570/463, loss: 0.001575371832586825 2023-01-24 03:33:31.329749: step: 572/463, loss: 0.02197249047458172 2023-01-24 03:33:31.943419: step: 574/463, loss: 0.007283914368599653 2023-01-24 03:33:32.614282: step: 576/463, loss: 0.026442022994160652 2023-01-24 03:33:33.227179: step: 578/463, loss: 0.005099490284919739 2023-01-24 03:33:33.908697: step: 580/463, loss: 0.007485698442906141 2023-01-24 03:33:34.573009: step: 582/463, loss: 0.07185299694538116 2023-01-24 03:33:35.209540: step: 584/463, loss: 3.755336284637451 2023-01-24 03:33:35.821115: step: 586/463, loss: 0.0181448794901371 2023-01-24 03:33:36.404167: step: 588/463, loss: 0.006662244442850351 2023-01-24 03:33:37.006079: step: 590/463, loss: 0.11142446100711823 2023-01-24 03:33:37.671511: step: 592/463, loss: 0.009003101848065853 2023-01-24 03:33:38.284954: step: 594/463, loss: 0.05388050526380539 2023-01-24 03:33:38.871042: step: 596/463, loss: 0.2398618459701538 2023-01-24 03:33:39.458109: step: 598/463, loss: 0.004974694922566414 2023-01-24 03:33:40.109039: step: 600/463, loss: 0.012414185330271721 2023-01-24 03:33:40.643600: step: 602/463, loss: 6.222172737121582 2023-01-24 03:33:41.287894: step: 604/463, loss: 0.05887557938694954 2023-01-24 03:33:41.873961: step: 606/463, loss: 0.042618248611688614 2023-01-24 03:33:42.475790: step: 608/463, loss: 0.040541745722293854 2023-01-24 03:33:43.132451: step: 610/463, loss: 0.0050706276670098305 2023-01-24 03:33:43.737131: step: 612/463, loss: 0.020333915948867798 2023-01-24 03:33:44.323769: step: 614/463, loss: 0.03240125626325607 2023-01-24 03:33:45.020325: step: 616/463, loss: 0.059388697147369385 2023-01-24 03:33:45.701796: step: 618/463, loss: 0.026850087568163872 2023-01-24 03:33:46.302306: step: 620/463, loss: 0.06632231920957565 2023-01-24 03:33:46.839660: step: 622/463, loss: 0.006276223808526993 2023-01-24 03:33:47.430725: step: 624/463, loss: 0.010272067040205002 2023-01-24 03:33:48.077506: step: 626/463, loss: 0.03971908241510391 2023-01-24 03:33:48.683364: step: 628/463, loss: 0.010558020323514938 2023-01-24 03:33:49.282197: step: 630/463, loss: 0.0003640044014900923 2023-01-24 03:33:49.883773: step: 632/463, loss: 0.08950820565223694 2023-01-24 03:33:50.485711: step: 634/463, loss: 0.02039186842739582 2023-01-24 03:33:51.115288: step: 636/463, loss: 0.008002677001059055 2023-01-24 03:33:51.799847: step: 638/463, loss: 0.05598391219973564 2023-01-24 03:33:52.393984: step: 640/463, loss: 0.025696421042084694 2023-01-24 03:33:52.999231: step: 642/463, loss: 0.02529430016875267 2023-01-24 03:33:53.778227: step: 644/463, loss: 0.020288467407226562 2023-01-24 03:33:54.414973: step: 646/463, loss: 0.20400039851665497 2023-01-24 03:33:55.150744: step: 648/463, loss: 0.03771872818470001 2023-01-24 03:33:55.770530: step: 650/463, loss: 0.045595426112413406 2023-01-24 03:33:56.331361: step: 652/463, loss: 0.0006583099020645022 2023-01-24 03:33:56.976297: step: 654/463, loss: 0.02377987653017044 2023-01-24 03:33:57.622619: step: 656/463, loss: 4.1291985511779785 2023-01-24 03:33:58.147251: step: 658/463, loss: 0.005685825366526842 2023-01-24 03:33:58.739682: step: 660/463, loss: 0.030626436695456505 2023-01-24 03:33:59.321716: step: 662/463, loss: 0.010232782922685146 2023-01-24 03:33:59.885044: step: 664/463, loss: 6.291020690696314e-05 2023-01-24 03:34:00.492792: step: 666/463, loss: 0.010630270466208458 2023-01-24 03:34:01.105517: step: 668/463, loss: 0.03143630176782608 2023-01-24 03:34:01.676015: step: 670/463, loss: 0.030541373416781425 2023-01-24 03:34:02.254189: step: 672/463, loss: 0.08802639693021774 2023-01-24 03:34:02.898227: step: 674/463, loss: 0.053573381155729294 2023-01-24 03:34:03.510525: step: 676/463, loss: 0.017686206847429276 2023-01-24 03:34:04.141939: step: 678/463, loss: 0.024272581562399864 2023-01-24 03:34:04.709850: step: 680/463, loss: 0.09272061288356781 2023-01-24 03:34:05.289650: step: 682/463, loss: 0.037139296531677246 2023-01-24 03:34:05.901460: step: 684/463, loss: 0.0037428420037031174 2023-01-24 03:34:06.558530: step: 686/463, loss: 0.16962510347366333 2023-01-24 03:34:07.153125: step: 688/463, loss: 0.007502918131649494 2023-01-24 03:34:07.747916: step: 690/463, loss: 0.005123793613165617 2023-01-24 03:34:08.287442: step: 692/463, loss: 0.0011209029471501708 2023-01-24 03:34:08.877596: step: 694/463, loss: 0.002184850163757801 2023-01-24 03:34:09.571564: step: 696/463, loss: 0.025770273059606552 2023-01-24 03:34:10.190116: step: 698/463, loss: 0.015004278160631657 2023-01-24 03:34:10.729006: step: 700/463, loss: 0.033192068338394165 2023-01-24 03:34:11.373435: step: 702/463, loss: 0.029985714703798294 2023-01-24 03:34:12.013877: step: 704/463, loss: 0.05722131207585335 2023-01-24 03:34:12.641613: step: 706/463, loss: 0.04271312430500984 2023-01-24 03:34:13.218477: step: 708/463, loss: 0.0034064296633005142 2023-01-24 03:34:13.841862: step: 710/463, loss: 0.004297764040529728 2023-01-24 03:34:14.438043: step: 712/463, loss: 0.01988023705780506 2023-01-24 03:34:15.037682: step: 714/463, loss: 0.017208542674779892 2023-01-24 03:34:15.754083: step: 716/463, loss: 0.034595783799886703 2023-01-24 03:34:16.435324: step: 718/463, loss: 0.031115800142288208 2023-01-24 03:34:17.066106: step: 720/463, loss: 0.04353680834174156 2023-01-24 03:34:17.696234: step: 722/463, loss: 0.03923806920647621 2023-01-24 03:34:18.308888: step: 724/463, loss: 0.0055387746542692184 2023-01-24 03:34:18.890254: step: 726/463, loss: 0.0060660806484520435 2023-01-24 03:34:19.527772: step: 728/463, loss: 0.0197359099984169 2023-01-24 03:34:20.130824: step: 730/463, loss: 0.015321108512580395 2023-01-24 03:34:20.832732: step: 732/463, loss: 0.09007415920495987 2023-01-24 03:34:21.337815: step: 734/463, loss: 0.00329673383384943 2023-01-24 03:34:21.976437: step: 736/463, loss: 0.005438614170998335 2023-01-24 03:34:22.625525: step: 738/463, loss: 0.07912580668926239 2023-01-24 03:34:23.190956: step: 740/463, loss: 0.031550608575344086 2023-01-24 03:34:23.791714: step: 742/463, loss: 0.0314120277762413 2023-01-24 03:34:24.402147: step: 744/463, loss: 0.07189822942018509 2023-01-24 03:34:25.029233: step: 746/463, loss: 0.1546442210674286 2023-01-24 03:34:25.561717: step: 748/463, loss: 0.08666994422674179 2023-01-24 03:34:26.150918: step: 750/463, loss: 0.026747409254312515 2023-01-24 03:34:26.865988: step: 752/463, loss: 0.004589967895299196 2023-01-24 03:34:27.508017: step: 754/463, loss: 0.020737668499350548 2023-01-24 03:34:28.100633: step: 756/463, loss: 0.05693775787949562 2023-01-24 03:34:28.746016: step: 758/463, loss: 0.020858045667409897 2023-01-24 03:34:29.390141: step: 760/463, loss: 0.05491577088832855 2023-01-24 03:34:29.982703: step: 762/463, loss: 0.006807927507907152 2023-01-24 03:34:30.572626: step: 764/463, loss: 0.010334908030927181 2023-01-24 03:34:31.175341: step: 766/463, loss: 0.04307602718472481 2023-01-24 03:34:31.744848: step: 768/463, loss: 0.002000360516831279 2023-01-24 03:34:32.315863: step: 770/463, loss: 0.01827973872423172 2023-01-24 03:34:32.929916: step: 772/463, loss: 0.06460175663232803 2023-01-24 03:34:33.515840: step: 774/463, loss: 0.002909178612753749 2023-01-24 03:34:34.062832: step: 776/463, loss: 0.041658952832221985 2023-01-24 03:34:34.694348: step: 778/463, loss: 0.03283506631851196 2023-01-24 03:34:35.285649: step: 780/463, loss: 0.08127497136592865 2023-01-24 03:34:35.889323: step: 782/463, loss: 0.17956604063510895 2023-01-24 03:34:36.503788: step: 784/463, loss: 0.0005525234737433493 2023-01-24 03:34:37.064412: step: 786/463, loss: 0.03993967920541763 2023-01-24 03:34:37.585435: step: 788/463, loss: 0.3063792586326599 2023-01-24 03:34:38.197835: step: 790/463, loss: 0.00014687444490846246 2023-01-24 03:34:38.824449: step: 792/463, loss: 0.1285935938358307 2023-01-24 03:34:39.475328: step: 794/463, loss: 0.013791092671453953 2023-01-24 03:34:40.108819: step: 796/463, loss: 0.018938418477773666 2023-01-24 03:34:40.702323: step: 798/463, loss: 0.05720193311572075 2023-01-24 03:34:41.254530: step: 800/463, loss: 0.06429475545883179 2023-01-24 03:34:41.858352: step: 802/463, loss: 0.009252408519387245 2023-01-24 03:34:42.497028: step: 804/463, loss: 0.0028328222688287497 2023-01-24 03:34:43.131882: step: 806/463, loss: 0.02547774650156498 2023-01-24 03:34:43.761618: step: 808/463, loss: 0.09550485014915466 2023-01-24 03:34:44.323776: step: 810/463, loss: 0.014451436698436737 2023-01-24 03:34:44.932064: step: 812/463, loss: 0.017834436148405075 2023-01-24 03:34:45.483351: step: 814/463, loss: 0.001944947405718267 2023-01-24 03:34:46.132393: step: 816/463, loss: 0.4038428068161011 2023-01-24 03:34:46.707842: step: 818/463, loss: 0.010683872736990452 2023-01-24 03:34:47.312550: step: 820/463, loss: 0.42256858944892883 2023-01-24 03:34:47.972008: step: 822/463, loss: 0.032819997519254684 2023-01-24 03:34:48.598711: step: 824/463, loss: 0.008480922318994999 2023-01-24 03:34:49.225225: step: 826/463, loss: 0.025343777611851692 2023-01-24 03:34:49.885950: step: 828/463, loss: 0.029101723805069923 2023-01-24 03:34:50.508368: step: 830/463, loss: 0.007121522445231676 2023-01-24 03:34:51.060989: step: 832/463, loss: 0.005402555223554373 2023-01-24 03:34:51.812266: step: 834/463, loss: 29.403575897216797 2023-01-24 03:34:52.490457: step: 836/463, loss: 0.014429937116801739 2023-01-24 03:34:53.086268: step: 838/463, loss: 0.009643325582146645 2023-01-24 03:34:53.624715: step: 840/463, loss: 0.0009644138626754284 2023-01-24 03:34:54.231353: step: 842/463, loss: 0.0006747469888068736 2023-01-24 03:34:54.876939: step: 844/463, loss: 0.12073063105344772 2023-01-24 03:34:55.529031: step: 846/463, loss: 0.025475937873125076 2023-01-24 03:34:56.152095: step: 848/463, loss: 0.3224758207798004 2023-01-24 03:34:56.780828: step: 850/463, loss: 0.048331018537282944 2023-01-24 03:34:57.484388: step: 852/463, loss: 0.15742968022823334 2023-01-24 03:34:57.999449: step: 854/463, loss: 0.0025444370694458485 2023-01-24 03:34:58.609428: step: 856/463, loss: 0.023655910044908524 2023-01-24 03:34:59.178602: step: 858/463, loss: 0.026293465867638588 2023-01-24 03:34:59.787409: step: 860/463, loss: 0.02204839512705803 2023-01-24 03:35:00.392251: step: 862/463, loss: 0.01141758356243372 2023-01-24 03:35:00.982136: step: 864/463, loss: 0.028984904289245605 2023-01-24 03:35:01.632963: step: 866/463, loss: 0.017688116058707237 2023-01-24 03:35:02.188621: step: 868/463, loss: 0.01317436620593071 2023-01-24 03:35:02.806297: step: 870/463, loss: 0.044012319296598434 2023-01-24 03:35:03.424307: step: 872/463, loss: 0.03740110620856285 2023-01-24 03:35:04.049474: step: 874/463, loss: 0.12904725968837738 2023-01-24 03:35:04.716916: step: 876/463, loss: 0.13272738456726074 2023-01-24 03:35:05.339109: step: 878/463, loss: 0.011271555908024311 2023-01-24 03:35:05.959922: step: 880/463, loss: 0.007409457117319107 2023-01-24 03:35:06.641063: step: 882/463, loss: 0.007528779562562704 2023-01-24 03:35:07.280618: step: 884/463, loss: 0.03765789791941643 2023-01-24 03:35:07.902363: step: 886/463, loss: 0.006987639702856541 2023-01-24 03:35:08.513808: step: 888/463, loss: 0.02598324604332447 2023-01-24 03:35:09.069001: step: 890/463, loss: 0.008604762144386768 2023-01-24 03:35:09.640166: step: 892/463, loss: 0.003988097421824932 2023-01-24 03:35:10.251710: step: 894/463, loss: 0.5083101391792297 2023-01-24 03:35:10.836358: step: 896/463, loss: 0.05885894596576691 2023-01-24 03:35:11.479874: step: 898/463, loss: 0.017299719154834747 2023-01-24 03:35:12.100277: step: 900/463, loss: 0.0748797208070755 2023-01-24 03:35:12.739052: step: 902/463, loss: 0.008220141753554344 2023-01-24 03:35:13.348586: step: 904/463, loss: 0.0016796613344922662 2023-01-24 03:35:13.992021: step: 906/463, loss: 0.009716348722577095 2023-01-24 03:35:14.604968: step: 908/463, loss: 0.02669823355972767 2023-01-24 03:35:15.156311: step: 910/463, loss: 0.02601594105362892 2023-01-24 03:35:15.785076: step: 912/463, loss: 0.041832294315099716 2023-01-24 03:35:16.333904: step: 914/463, loss: 0.011544491164386272 2023-01-24 03:35:16.962761: step: 916/463, loss: 0.020028000697493553 2023-01-24 03:35:17.578118: step: 918/463, loss: 0.08814343810081482 2023-01-24 03:35:18.259030: step: 920/463, loss: 0.02797769382596016 2023-01-24 03:35:18.878251: step: 922/463, loss: 0.006204092875123024 2023-01-24 03:35:19.539183: step: 924/463, loss: 0.011144855059683323 2023-01-24 03:35:20.150160: step: 926/463, loss: 0.010132873430848122 ================================================== Loss: 0.151 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.322691279979145, 'r': 0.33554994578476555, 'f1': 0.3289950166112957}, 'combined': 0.24241738066095472, 'epoch': 27} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.362185520658355, 'r': 0.3290009834252597, 'f1': 0.34479664106570423}, 'combined': 0.24257050125225424, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3230914154652687, 'r': 0.3341267958796422, 'f1': 0.3285164578891258}, 'combined': 0.242064758444619, 'epoch': 27} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3551986295176832, 'r': 0.32079457377905946, 'f1': 0.3371211201749676}, 'combined': 0.23935599532422697, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3430845461269288, 'r': 0.3476416463601138, 'f1': 0.34534806339638074}, 'combined': 0.2544669940815437, 'epoch': 27} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3695130790947199, 'r': 0.31335999982617735, 'f1': 0.339127788091657}, 'combined': 0.24078072954507646, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2693798449612403, 'r': 0.33095238095238094, 'f1': 0.297008547008547}, 'combined': 0.198005698005698, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26, 'r': 0.2826086956521739, 'f1': 0.27083333333333337}, 'combined': 0.13541666666666669, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.13793103448275862, 'f1': 0.18604651162790697}, 'combined': 0.12403100775193798, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:37:53.626471: step: 2/463, loss: 0.0094114039093256 2023-01-24 03:37:54.250789: step: 4/463, loss: 0.018685780465602875 2023-01-24 03:37:54.868803: step: 6/463, loss: 0.03451846167445183 2023-01-24 03:37:55.481579: step: 8/463, loss: 3.0645575523376465 2023-01-24 03:37:56.054267: step: 10/463, loss: 0.01305769756436348 2023-01-24 03:37:56.599737: step: 12/463, loss: 0.029703734442591667 2023-01-24 03:37:57.161708: step: 14/463, loss: 0.022056741639971733 2023-01-24 03:37:57.822849: step: 16/463, loss: 0.0272580087184906 2023-01-24 03:37:58.433684: step: 18/463, loss: 0.023659275844693184 2023-01-24 03:37:59.047138: step: 20/463, loss: 0.0013171244645491242 2023-01-24 03:37:59.645383: step: 22/463, loss: 0.23501162230968475 2023-01-24 03:38:00.226950: step: 24/463, loss: 0.00974312238395214 2023-01-24 03:38:00.774465: step: 26/463, loss: 0.0005643118638545275 2023-01-24 03:38:01.377555: step: 28/463, loss: 0.011606477200984955 2023-01-24 03:38:01.931454: step: 30/463, loss: 0.006575976498425007 2023-01-24 03:38:02.524412: step: 32/463, loss: 0.005622623022645712 2023-01-24 03:38:03.052275: step: 34/463, loss: 0.00031984649831429124 2023-01-24 03:38:03.667665: step: 36/463, loss: 0.0028818915598094463 2023-01-24 03:38:04.306978: step: 38/463, loss: 0.0102292625233531 2023-01-24 03:38:04.882243: step: 40/463, loss: 0.0094269635155797 2023-01-24 03:38:05.512903: step: 42/463, loss: 0.09355154633522034 2023-01-24 03:38:06.080867: step: 44/463, loss: 0.0012203538790345192 2023-01-24 03:38:06.652116: step: 46/463, loss: 0.002033093711361289 2023-01-24 03:38:07.204084: step: 48/463, loss: 0.054403308779001236 2023-01-24 03:38:07.772326: step: 50/463, loss: 0.003431854536756873 2023-01-24 03:38:08.323974: step: 52/463, loss: 0.0537886768579483 2023-01-24 03:38:08.913559: step: 54/463, loss: 0.07206442952156067 2023-01-24 03:38:09.514918: step: 56/463, loss: 0.02260226011276245 2023-01-24 03:38:10.200188: step: 58/463, loss: 0.03864336386322975 2023-01-24 03:38:10.806030: step: 60/463, loss: 0.010186826810240746 2023-01-24 03:38:11.444377: step: 62/463, loss: 0.03418241813778877 2023-01-24 03:38:12.059704: step: 64/463, loss: 0.0029639401473104954 2023-01-24 03:38:12.683379: step: 66/463, loss: 0.011508042924106121 2023-01-24 03:38:13.355920: step: 68/463, loss: 0.021908538416028023 2023-01-24 03:38:13.927403: step: 70/463, loss: 0.019057676196098328 2023-01-24 03:38:14.506743: step: 72/463, loss: 0.0021469180937856436 2023-01-24 03:38:15.059207: step: 74/463, loss: 0.005736156366765499 2023-01-24 03:38:15.649166: step: 76/463, loss: 0.07120401412248611 2023-01-24 03:38:16.235974: step: 78/463, loss: 0.08171851933002472 2023-01-24 03:38:16.843781: step: 80/463, loss: 0.04262061417102814 2023-01-24 03:38:17.430504: step: 82/463, loss: 0.00713580846786499 2023-01-24 03:38:18.010009: step: 84/463, loss: 0.09724953770637512 2023-01-24 03:38:18.633197: step: 86/463, loss: 0.028251199051737785 2023-01-24 03:38:19.277702: step: 88/463, loss: 0.0009444767492823303 2023-01-24 03:38:19.887527: step: 90/463, loss: 0.02227882668375969 2023-01-24 03:38:20.493484: step: 92/463, loss: 0.042087260633707047 2023-01-24 03:38:21.081367: step: 94/463, loss: 0.020812494680285454 2023-01-24 03:38:21.652889: step: 96/463, loss: 0.0022624318953603506 2023-01-24 03:38:22.306147: step: 98/463, loss: 0.01456405594944954 2023-01-24 03:38:22.916467: step: 100/463, loss: 0.018219057470560074 2023-01-24 03:38:23.520193: step: 102/463, loss: 0.04269354045391083 2023-01-24 03:38:24.194696: step: 104/463, loss: 0.1175481528043747 2023-01-24 03:38:24.801793: step: 106/463, loss: 1.071733832359314 2023-01-24 03:38:25.407791: step: 108/463, loss: 0.02362072467803955 2023-01-24 03:38:25.949301: step: 110/463, loss: 0.0014922478003427386 2023-01-24 03:38:26.535718: step: 112/463, loss: 0.02679336443543434 2023-01-24 03:38:27.184467: step: 114/463, loss: 0.0065894965082407 2023-01-24 03:38:27.737177: step: 116/463, loss: 0.019396642223000526 2023-01-24 03:38:28.325717: step: 118/463, loss: 0.00023608235642313957 2023-01-24 03:38:29.011970: step: 120/463, loss: 0.005696733016520739 2023-01-24 03:38:29.659581: step: 122/463, loss: 0.008305935189127922 2023-01-24 03:38:30.315310: step: 124/463, loss: 0.0009965893113985658 2023-01-24 03:38:31.052683: step: 126/463, loss: 0.03738455846905708 2023-01-24 03:38:31.693530: step: 128/463, loss: 0.011120429262518883 2023-01-24 03:38:32.289153: step: 130/463, loss: 0.02868262492120266 2023-01-24 03:38:32.997512: step: 132/463, loss: 0.0014348744880408049 2023-01-24 03:38:33.558419: step: 134/463, loss: 0.0028967875987291336 2023-01-24 03:38:34.156760: step: 136/463, loss: 0.016666606068611145 2023-01-24 03:38:34.706630: step: 138/463, loss: 0.12573203444480896 2023-01-24 03:38:35.334310: step: 140/463, loss: 0.03234294056892395 2023-01-24 03:38:35.881145: step: 142/463, loss: 0.032723743468523026 2023-01-24 03:38:36.486907: step: 144/463, loss: 0.0040484098717570305 2023-01-24 03:38:37.174490: step: 146/463, loss: 0.0064890724606812 2023-01-24 03:38:37.776253: step: 148/463, loss: 0.9297333359718323 2023-01-24 03:38:38.402282: step: 150/463, loss: 0.029401717707514763 2023-01-24 03:38:39.026971: step: 152/463, loss: 0.026337511837482452 2023-01-24 03:38:39.606609: step: 154/463, loss: 0.44838017225265503 2023-01-24 03:38:40.193164: step: 156/463, loss: 0.008685996755957603 2023-01-24 03:38:40.798699: step: 158/463, loss: 0.06602252274751663 2023-01-24 03:38:41.424840: step: 160/463, loss: 0.09414766728878021 2023-01-24 03:38:42.038139: step: 162/463, loss: 0.013883078470826149 2023-01-24 03:38:42.722059: step: 164/463, loss: 0.05239792913198471 2023-01-24 03:38:43.300487: step: 166/463, loss: 0.005629599094390869 2023-01-24 03:38:43.930254: step: 168/463, loss: 0.0012871966464444995 2023-01-24 03:38:44.578962: step: 170/463, loss: 0.03614477813243866 2023-01-24 03:38:45.171426: step: 172/463, loss: 0.009129579178988934 2023-01-24 03:38:45.731021: step: 174/463, loss: 0.054187651723623276 2023-01-24 03:38:46.440145: step: 176/463, loss: 0.14119203388690948 2023-01-24 03:38:47.093766: step: 178/463, loss: 0.02736249379813671 2023-01-24 03:38:47.707072: step: 180/463, loss: 0.002471701707690954 2023-01-24 03:38:48.334546: step: 182/463, loss: 0.00560792488977313 2023-01-24 03:38:48.963520: step: 184/463, loss: 0.004041376523673534 2023-01-24 03:38:49.572146: step: 186/463, loss: 0.01592455804347992 2023-01-24 03:38:50.147195: step: 188/463, loss: 0.002086956985294819 2023-01-24 03:38:50.767877: step: 190/463, loss: 0.012361017055809498 2023-01-24 03:38:51.431592: step: 192/463, loss: 0.005097741261124611 2023-01-24 03:38:52.025852: step: 194/463, loss: 0.055650629103183746 2023-01-24 03:38:52.586288: step: 196/463, loss: 0.009985994547605515 2023-01-24 03:38:53.270787: step: 198/463, loss: 0.0005802005180157721 2023-01-24 03:38:53.944785: step: 200/463, loss: 0.04267861694097519 2023-01-24 03:38:54.600128: step: 202/463, loss: 0.0027537494897842407 2023-01-24 03:38:55.169425: step: 204/463, loss: 0.0033441137056797743 2023-01-24 03:38:55.797794: step: 206/463, loss: 0.005019445437937975 2023-01-24 03:38:56.382164: step: 208/463, loss: 0.03665948286652565 2023-01-24 03:38:57.013581: step: 210/463, loss: 0.008905143477022648 2023-01-24 03:38:57.620062: step: 212/463, loss: 0.01699245534837246 2023-01-24 03:38:58.256594: step: 214/463, loss: 0.09151430428028107 2023-01-24 03:38:58.852783: step: 216/463, loss: 0.001789913629181683 2023-01-24 03:38:59.488456: step: 218/463, loss: 0.029690571129322052 2023-01-24 03:39:00.076162: step: 220/463, loss: 0.05167558789253235 2023-01-24 03:39:00.699247: step: 222/463, loss: 0.06293807923793793 2023-01-24 03:39:01.382598: step: 224/463, loss: 0.011712886393070221 2023-01-24 03:39:02.009123: step: 226/463, loss: 0.0312473326921463 2023-01-24 03:39:02.626913: step: 228/463, loss: 0.025774087756872177 2023-01-24 03:39:03.280348: step: 230/463, loss: 0.009725930169224739 2023-01-24 03:39:03.950242: step: 232/463, loss: 0.01459576841443777 2023-01-24 03:39:04.524678: step: 234/463, loss: 0.06974886357784271 2023-01-24 03:39:05.099675: step: 236/463, loss: 0.00025815912522375584 2023-01-24 03:39:05.633890: step: 238/463, loss: 0.009435570798814297 2023-01-24 03:39:06.238467: step: 240/463, loss: 0.019668634980916977 2023-01-24 03:39:06.806457: step: 242/463, loss: 0.019598564133048058 2023-01-24 03:39:07.368559: step: 244/463, loss: 0.01707700826227665 2023-01-24 03:39:07.950276: step: 246/463, loss: 0.00201628846116364 2023-01-24 03:39:08.484927: step: 248/463, loss: 0.07893752306699753 2023-01-24 03:39:09.091221: step: 250/463, loss: 0.004160521551966667 2023-01-24 03:39:09.684138: step: 252/463, loss: 0.020496880635619164 2023-01-24 03:39:10.306312: step: 254/463, loss: 0.06230583041906357 2023-01-24 03:39:10.912272: step: 256/463, loss: 0.03881112113595009 2023-01-24 03:39:11.515841: step: 258/463, loss: 0.04203377664089203 2023-01-24 03:39:12.208910: step: 260/463, loss: 0.009571530856192112 2023-01-24 03:39:12.806585: step: 262/463, loss: 0.02805357426404953 2023-01-24 03:39:13.392682: step: 264/463, loss: 0.0027334801852703094 2023-01-24 03:39:14.045631: step: 266/463, loss: 0.04806035757064819 2023-01-24 03:39:14.667252: step: 268/463, loss: 0.010828250087797642 2023-01-24 03:39:15.335849: step: 270/463, loss: 0.00046235573245212436 2023-01-24 03:39:15.902292: step: 272/463, loss: 0.035727664828300476 2023-01-24 03:39:16.606450: step: 274/463, loss: 0.005988476797938347 2023-01-24 03:39:17.221645: step: 276/463, loss: 0.0008796174079179764 2023-01-24 03:39:17.858313: step: 278/463, loss: 0.015921253710985184 2023-01-24 03:39:18.511842: step: 280/463, loss: 0.06030372157692909 2023-01-24 03:39:19.119183: step: 282/463, loss: 0.0024519022554159164 2023-01-24 03:39:19.829842: step: 284/463, loss: 0.005946831312030554 2023-01-24 03:39:20.427627: step: 286/463, loss: 0.006681984756141901 2023-01-24 03:39:21.057007: step: 288/463, loss: 0.03429936617612839 2023-01-24 03:39:21.673509: step: 290/463, loss: 0.8806936144828796 2023-01-24 03:39:22.272594: step: 292/463, loss: 0.01708347536623478 2023-01-24 03:39:22.875022: step: 294/463, loss: 0.028100555762648582 2023-01-24 03:39:23.452288: step: 296/463, loss: 0.027679922059178352 2023-01-24 03:39:24.047945: step: 298/463, loss: 0.02088392898440361 2023-01-24 03:39:24.619768: step: 300/463, loss: 0.020939256995916367 2023-01-24 03:39:25.276975: step: 302/463, loss: 0.01078676339238882 2023-01-24 03:39:25.883121: step: 304/463, loss: 0.02099660597741604 2023-01-24 03:39:26.475859: step: 306/463, loss: 0.04372677206993103 2023-01-24 03:39:27.039892: step: 308/463, loss: 0.003208781126886606 2023-01-24 03:39:27.675702: step: 310/463, loss: 0.1561008244752884 2023-01-24 03:39:28.365361: step: 312/463, loss: 0.04627768695354462 2023-01-24 03:39:29.011230: step: 314/463, loss: 0.01110000442713499 2023-01-24 03:39:29.595962: step: 316/463, loss: 0.2112077921628952 2023-01-24 03:39:30.194896: step: 318/463, loss: 0.0017648686189204454 2023-01-24 03:39:30.810419: step: 320/463, loss: 0.006241344381123781 2023-01-24 03:39:31.431589: step: 322/463, loss: 0.013828374445438385 2023-01-24 03:39:31.999146: step: 324/463, loss: 0.14416570961475372 2023-01-24 03:39:32.587660: step: 326/463, loss: 0.00909028947353363 2023-01-24 03:39:33.221749: step: 328/463, loss: 0.03809945657849312 2023-01-24 03:39:33.772799: step: 330/463, loss: 0.015388833358883858 2023-01-24 03:39:34.343432: step: 332/463, loss: 0.01017171423882246 2023-01-24 03:39:34.959747: step: 334/463, loss: 0.033783961087465286 2023-01-24 03:39:35.547120: step: 336/463, loss: 0.02200094610452652 2023-01-24 03:39:36.182558: step: 338/463, loss: 0.005440219305455685 2023-01-24 03:39:36.819650: step: 340/463, loss: 0.009808176197111607 2023-01-24 03:39:37.393531: step: 342/463, loss: 0.04874523729085922 2023-01-24 03:39:38.054769: step: 344/463, loss: 0.01786033809185028 2023-01-24 03:39:38.628965: step: 346/463, loss: 0.014066345058381557 2023-01-24 03:39:39.201379: step: 348/463, loss: 0.0058465502224862576 2023-01-24 03:39:39.862472: step: 350/463, loss: 0.04660263657569885 2023-01-24 03:39:40.554205: step: 352/463, loss: 0.00616673706099391 2023-01-24 03:39:41.198539: step: 354/463, loss: 0.02571684494614601 2023-01-24 03:39:41.810530: step: 356/463, loss: 1.9225596189498901 2023-01-24 03:39:42.429505: step: 358/463, loss: 0.01210328470915556 2023-01-24 03:39:43.083153: step: 360/463, loss: 0.1056513637304306 2023-01-24 03:39:43.691000: step: 362/463, loss: 0.02444971725344658 2023-01-24 03:39:44.323049: step: 364/463, loss: 0.006076412741094828 2023-01-24 03:39:44.903367: step: 366/463, loss: 0.0009519129525870085 2023-01-24 03:39:45.562066: step: 368/463, loss: 0.03343582898378372 2023-01-24 03:39:46.116674: step: 370/463, loss: 0.015244416892528534 2023-01-24 03:39:46.768896: step: 372/463, loss: 0.06221891567111015 2023-01-24 03:39:47.364400: step: 374/463, loss: 0.004993189126253128 2023-01-24 03:39:48.028156: step: 376/463, loss: 0.06571966409683228 2023-01-24 03:39:48.652178: step: 378/463, loss: 0.026839129626750946 2023-01-24 03:39:49.322711: step: 380/463, loss: 0.0077428556978702545 2023-01-24 03:39:49.918669: step: 382/463, loss: 0.010815393179655075 2023-01-24 03:39:50.551965: step: 384/463, loss: 0.0032293631229549646 2023-01-24 03:39:51.090919: step: 386/463, loss: 0.006198230665177107 2023-01-24 03:39:51.720280: step: 388/463, loss: 0.0001307763159275055 2023-01-24 03:39:52.297530: step: 390/463, loss: 0.028234299272298813 2023-01-24 03:39:52.853410: step: 392/463, loss: 0.023824693635106087 2023-01-24 03:39:53.414799: step: 394/463, loss: 0.012242504395544529 2023-01-24 03:39:54.000039: step: 396/463, loss: 0.05077259987592697 2023-01-24 03:39:54.669710: step: 398/463, loss: 0.010380647145211697 2023-01-24 03:39:55.343774: step: 400/463, loss: 0.045438408851623535 2023-01-24 03:39:55.940403: step: 402/463, loss: 0.0843072310090065 2023-01-24 03:39:56.522512: step: 404/463, loss: 0.02419174648821354 2023-01-24 03:39:57.119343: step: 406/463, loss: 0.018402796238660812 2023-01-24 03:39:57.722326: step: 408/463, loss: 0.0003849692875519395 2023-01-24 03:39:58.414037: step: 410/463, loss: 0.09641210734844208 2023-01-24 03:39:59.119375: step: 412/463, loss: 0.009553419426083565 2023-01-24 03:39:59.755076: step: 414/463, loss: 0.01568782515823841 2023-01-24 03:40:00.321621: step: 416/463, loss: 0.0003593371657188982 2023-01-24 03:40:00.920234: step: 418/463, loss: 0.0017379079945385456 2023-01-24 03:40:01.581957: step: 420/463, loss: 0.0013122939271852374 2023-01-24 03:40:02.163096: step: 422/463, loss: 0.027147578075528145 2023-01-24 03:40:02.765211: step: 424/463, loss: 0.016015416011214256 2023-01-24 03:40:03.619267: step: 426/463, loss: 0.008706910535693169 2023-01-24 03:40:04.238995: step: 428/463, loss: 0.21891699731349945 2023-01-24 03:40:04.849543: step: 430/463, loss: 0.00027748767752200365 2023-01-24 03:40:05.464992: step: 432/463, loss: 0.007370186969637871 2023-01-24 03:40:06.157375: step: 434/463, loss: 0.017778893932700157 2023-01-24 03:40:06.679510: step: 436/463, loss: 0.0158107727766037 2023-01-24 03:40:07.305873: step: 438/463, loss: 0.010479582473635674 2023-01-24 03:40:07.868991: step: 440/463, loss: 0.002477864967659116 2023-01-24 03:40:08.418924: step: 442/463, loss: 0.011267402209341526 2023-01-24 03:40:09.047520: step: 444/463, loss: 0.034514401108026505 2023-01-24 03:40:09.598896: step: 446/463, loss: 0.011906526982784271 2023-01-24 03:40:10.139270: step: 448/463, loss: 0.019880026578903198 2023-01-24 03:40:10.729480: step: 450/463, loss: 0.014738235622644424 2023-01-24 03:40:11.338498: step: 452/463, loss: 0.060199178755283356 2023-01-24 03:40:11.920899: step: 454/463, loss: 0.023058969527482986 2023-01-24 03:40:12.626087: step: 456/463, loss: 0.03245280683040619 2023-01-24 03:40:13.191509: step: 458/463, loss: 0.042753927409648895 2023-01-24 03:40:13.768867: step: 460/463, loss: 0.012718960642814636 2023-01-24 03:40:14.375333: step: 462/463, loss: 0.002615524223074317 2023-01-24 03:40:14.957375: step: 464/463, loss: 0.0018019424751400948 2023-01-24 03:40:15.543740: step: 466/463, loss: 0.1324785202741623 2023-01-24 03:40:16.133948: step: 468/463, loss: 0.04881930351257324 2023-01-24 03:40:16.747692: step: 470/463, loss: 0.10375823080539703 2023-01-24 03:40:17.298140: step: 472/463, loss: 0.029996084049344063 2023-01-24 03:40:17.935619: step: 474/463, loss: 0.0023371244315057993 2023-01-24 03:40:18.541371: step: 476/463, loss: 0.011762343347072601 2023-01-24 03:40:19.154033: step: 478/463, loss: 8.540609269402921e-05 2023-01-24 03:40:19.775976: step: 480/463, loss: 0.05414687097072601 2023-01-24 03:40:20.344072: step: 482/463, loss: 0.03130633383989334 2023-01-24 03:40:20.946581: step: 484/463, loss: 0.00214497372508049 2023-01-24 03:40:21.561057: step: 486/463, loss: 0.0071307518519461155 2023-01-24 03:40:22.145420: step: 488/463, loss: 0.014421028085052967 2023-01-24 03:40:22.720442: step: 490/463, loss: 0.002405288629233837 2023-01-24 03:40:23.338187: step: 492/463, loss: 0.13037297129631042 2023-01-24 03:40:23.970546: step: 494/463, loss: 0.023149263113737106 2023-01-24 03:40:24.589370: step: 496/463, loss: 0.005963576026260853 2023-01-24 03:40:25.346935: step: 498/463, loss: 0.055207520723342896 2023-01-24 03:40:25.892665: step: 500/463, loss: 0.052463822066783905 2023-01-24 03:40:26.572865: step: 502/463, loss: 0.2744207978248596 2023-01-24 03:40:27.187812: step: 504/463, loss: 0.004651006311178207 2023-01-24 03:40:27.807460: step: 506/463, loss: 0.018878160044550896 2023-01-24 03:40:28.448167: step: 508/463, loss: 0.005654373671859503 2023-01-24 03:40:28.996457: step: 510/463, loss: 0.002270003082230687 2023-01-24 03:40:29.577549: step: 512/463, loss: 0.010357078164815903 2023-01-24 03:40:30.159711: step: 514/463, loss: 0.008263622410595417 2023-01-24 03:40:30.806957: step: 516/463, loss: 0.0056144422851502895 2023-01-24 03:40:31.433279: step: 518/463, loss: 0.039998725056648254 2023-01-24 03:40:32.008127: step: 520/463, loss: 0.10791662335395813 2023-01-24 03:40:32.666911: step: 522/463, loss: 0.041056033223867416 2023-01-24 03:40:33.307016: step: 524/463, loss: 0.014341713860630989 2023-01-24 03:40:33.895360: step: 526/463, loss: 0.014691070653498173 2023-01-24 03:40:34.509520: step: 528/463, loss: 0.007194605655968189 2023-01-24 03:40:35.149312: step: 530/463, loss: 0.006286827381700277 2023-01-24 03:40:35.773946: step: 532/463, loss: 0.009523698128759861 2023-01-24 03:40:36.370424: step: 534/463, loss: 0.019739393144845963 2023-01-24 03:40:36.971325: step: 536/463, loss: 0.015433765016496181 2023-01-24 03:40:37.562663: step: 538/463, loss: 0.03792887553572655 2023-01-24 03:40:38.195708: step: 540/463, loss: 0.010310611687600613 2023-01-24 03:40:38.821234: step: 542/463, loss: 0.03257577866315842 2023-01-24 03:40:39.412886: step: 544/463, loss: 0.007608963176608086 2023-01-24 03:40:40.003844: step: 546/463, loss: 0.06911224871873856 2023-01-24 03:40:40.619792: step: 548/463, loss: 0.013060934841632843 2023-01-24 03:40:41.228496: step: 550/463, loss: 0.0040275235660374165 2023-01-24 03:40:41.847387: step: 552/463, loss: 0.2817552089691162 2023-01-24 03:40:42.464355: step: 554/463, loss: 0.023738233372569084 2023-01-24 03:40:43.071086: step: 556/463, loss: 0.006241391412913799 2023-01-24 03:40:43.647263: step: 558/463, loss: 0.01831924356520176 2023-01-24 03:40:44.239757: step: 560/463, loss: 0.09170430153608322 2023-01-24 03:40:44.919442: step: 562/463, loss: 0.39634883403778076 2023-01-24 03:40:45.504108: step: 564/463, loss: 0.0022586702834814787 2023-01-24 03:40:46.110836: step: 566/463, loss: 0.00035654715611599386 2023-01-24 03:40:46.748047: step: 568/463, loss: 0.0026433414313942194 2023-01-24 03:40:47.309800: step: 570/463, loss: 0.054576147347688675 2023-01-24 03:40:47.935564: step: 572/463, loss: 0.009118203073740005 2023-01-24 03:40:48.571492: step: 574/463, loss: 0.0007201213156804442 2023-01-24 03:40:49.141602: step: 576/463, loss: 0.004443039186298847 2023-01-24 03:40:49.807922: step: 578/463, loss: 0.008360975421965122 2023-01-24 03:40:50.397008: step: 580/463, loss: 0.004762766882777214 2023-01-24 03:40:50.945785: step: 582/463, loss: 0.0008809916325844824 2023-01-24 03:40:51.555269: step: 584/463, loss: 0.15798285603523254 2023-01-24 03:40:52.168149: step: 586/463, loss: 0.06177102401852608 2023-01-24 03:40:52.838019: step: 588/463, loss: 0.014172892086207867 2023-01-24 03:40:53.527142: step: 590/463, loss: 0.02769440785050392 2023-01-24 03:40:54.154555: step: 592/463, loss: 0.029659157618880272 2023-01-24 03:40:54.687064: step: 594/463, loss: 0.0017867953283712268 2023-01-24 03:40:55.320900: step: 596/463, loss: 0.007080799899995327 2023-01-24 03:40:55.869026: step: 598/463, loss: 0.012448270805180073 2023-01-24 03:40:56.499673: step: 600/463, loss: 0.0012532654218375683 2023-01-24 03:40:57.202038: step: 602/463, loss: 0.19566722214221954 2023-01-24 03:40:57.740332: step: 604/463, loss: 0.01258488092571497 2023-01-24 03:40:58.377229: step: 606/463, loss: 0.027136001735925674 2023-01-24 03:40:59.024030: step: 608/463, loss: 0.028170375153422356 2023-01-24 03:40:59.573404: step: 610/463, loss: 0.009966004639863968 2023-01-24 03:41:00.166382: step: 612/463, loss: 0.0473136380314827 2023-01-24 03:41:00.842592: step: 614/463, loss: 0.010855158790946007 2023-01-24 03:41:01.424160: step: 616/463, loss: 0.9202931523323059 2023-01-24 03:41:01.985084: step: 618/463, loss: 0.012152641080319881 2023-01-24 03:41:02.611404: step: 620/463, loss: 0.09394562989473343 2023-01-24 03:41:03.247251: step: 622/463, loss: 0.004621611908078194 2023-01-24 03:41:03.840799: step: 624/463, loss: 0.05052351579070091 2023-01-24 03:41:04.466600: step: 626/463, loss: 0.0034582207445055246 2023-01-24 03:41:05.059622: step: 628/463, loss: 0.005184145178645849 2023-01-24 03:41:05.617682: step: 630/463, loss: 0.03857438638806343 2023-01-24 03:41:06.189290: step: 632/463, loss: 0.010583666153252125 2023-01-24 03:41:06.769708: step: 634/463, loss: 0.009097250178456306 2023-01-24 03:41:07.357520: step: 636/463, loss: 0.010491464287042618 2023-01-24 03:41:07.953041: step: 638/463, loss: 0.04440511390566826 2023-01-24 03:41:08.560810: step: 640/463, loss: 0.14334440231323242 2023-01-24 03:41:09.246165: step: 642/463, loss: 0.010508140549063683 2023-01-24 03:41:09.810828: step: 644/463, loss: 0.007657899055629969 2023-01-24 03:41:10.417281: step: 646/463, loss: 0.031026851385831833 2023-01-24 03:41:11.074135: step: 648/463, loss: 0.008932389318943024 2023-01-24 03:41:11.683715: step: 650/463, loss: 0.010069259442389011 2023-01-24 03:41:12.299245: step: 652/463, loss: 0.17673733830451965 2023-01-24 03:41:12.898714: step: 654/463, loss: 0.001381595153361559 2023-01-24 03:41:13.518205: step: 656/463, loss: 0.025268323719501495 2023-01-24 03:41:14.107146: step: 658/463, loss: 0.003486864734441042 2023-01-24 03:41:14.735136: step: 660/463, loss: 0.028655294328927994 2023-01-24 03:41:15.320155: step: 662/463, loss: 0.029254019260406494 2023-01-24 03:41:15.936697: step: 664/463, loss: 0.7486350536346436 2023-01-24 03:41:16.514378: step: 666/463, loss: 0.008631639182567596 2023-01-24 03:41:17.120444: step: 668/463, loss: 0.006991442292928696 2023-01-24 03:41:17.725478: step: 670/463, loss: 0.010942311026155949 2023-01-24 03:41:18.379073: step: 672/463, loss: 0.0010776869021356106 2023-01-24 03:41:18.980606: step: 674/463, loss: 0.04713824391365051 2023-01-24 03:41:19.626613: step: 676/463, loss: 0.008728515356779099 2023-01-24 03:41:20.251871: step: 678/463, loss: 0.016712427139282227 2023-01-24 03:41:20.853318: step: 680/463, loss: 0.006846567150205374 2023-01-24 03:41:21.428151: step: 682/463, loss: 0.011238078586757183 2023-01-24 03:41:22.032062: step: 684/463, loss: 0.027104878798127174 2023-01-24 03:41:22.710025: step: 686/463, loss: 0.0318196639418602 2023-01-24 03:41:23.292593: step: 688/463, loss: 0.08023776113986969 2023-01-24 03:41:23.917969: step: 690/463, loss: 0.06684182584285736 2023-01-24 03:41:24.533467: step: 692/463, loss: 0.015705466270446777 2023-01-24 03:41:25.085501: step: 694/463, loss: 0.0036078535486012697 2023-01-24 03:41:25.789399: step: 696/463, loss: 0.036834534257650375 2023-01-24 03:41:26.439667: step: 698/463, loss: 0.004487849771976471 2023-01-24 03:41:27.126450: step: 700/463, loss: 0.016603006049990654 2023-01-24 03:41:27.693765: step: 702/463, loss: 0.02072596363723278 2023-01-24 03:41:28.278616: step: 704/463, loss: 0.00033220628392882645 2023-01-24 03:41:28.859629: step: 706/463, loss: 0.0004275748797226697 2023-01-24 03:41:29.511793: step: 708/463, loss: 0.04547533392906189 2023-01-24 03:41:30.188177: step: 710/463, loss: 1.3034616708755493 2023-01-24 03:41:30.747101: step: 712/463, loss: 0.060133978724479675 2023-01-24 03:41:31.360540: step: 714/463, loss: 0.04757550358772278 2023-01-24 03:41:32.030478: step: 716/463, loss: 0.003463569562882185 2023-01-24 03:41:32.696359: step: 718/463, loss: 0.22638371586799622 2023-01-24 03:41:33.337907: step: 720/463, loss: 0.053261350840330124 2023-01-24 03:41:34.005437: step: 722/463, loss: 0.01680070161819458 2023-01-24 03:41:34.641570: step: 724/463, loss: 0.007585356943309307 2023-01-24 03:41:35.243011: step: 726/463, loss: 0.07690487802028656 2023-01-24 03:41:35.842621: step: 728/463, loss: 0.006224292330443859 2023-01-24 03:41:36.446292: step: 730/463, loss: 0.011508041992783546 2023-01-24 03:41:37.095602: step: 732/463, loss: 0.11315851658582687 2023-01-24 03:41:37.680296: step: 734/463, loss: 0.009591448120772839 2023-01-24 03:41:38.255570: step: 736/463, loss: 0.013155676424503326 2023-01-24 03:41:38.851562: step: 738/463, loss: 0.06940951943397522 2023-01-24 03:41:39.433575: step: 740/463, loss: 0.025186529383063316 2023-01-24 03:41:40.028063: step: 742/463, loss: 0.10394732654094696 2023-01-24 03:41:40.610983: step: 744/463, loss: 0.018419858068227768 2023-01-24 03:41:41.149054: step: 746/463, loss: 0.014440762810409069 2023-01-24 03:41:41.778465: step: 748/463, loss: 0.0007638024399057031 2023-01-24 03:41:42.395210: step: 750/463, loss: 0.020407652482390404 2023-01-24 03:41:42.935689: step: 752/463, loss: 0.2658552825450897 2023-01-24 03:41:43.621929: step: 754/463, loss: 0.0038578954990953207 2023-01-24 03:41:44.176702: step: 756/463, loss: 0.013122270815074444 2023-01-24 03:41:44.786866: step: 758/463, loss: 0.03840680792927742 2023-01-24 03:41:45.354452: step: 760/463, loss: 0.06689032912254333 2023-01-24 03:41:45.998376: step: 762/463, loss: 0.0030081712175160646 2023-01-24 03:41:46.675995: step: 764/463, loss: 0.15457689762115479 2023-01-24 03:41:47.252181: step: 766/463, loss: 0.003761076834052801 2023-01-24 03:41:47.873674: step: 768/463, loss: 0.007147878874093294 2023-01-24 03:41:48.476442: step: 770/463, loss: 0.0002053235803032294 2023-01-24 03:41:49.020374: step: 772/463, loss: 0.012021270580589771 2023-01-24 03:41:49.605801: step: 774/463, loss: 0.06681259721517563 2023-01-24 03:41:50.228136: step: 776/463, loss: 0.058772627264261246 2023-01-24 03:41:50.871472: step: 778/463, loss: 0.006977768149226904 2023-01-24 03:41:51.413088: step: 780/463, loss: 0.015997055917978287 2023-01-24 03:41:52.019221: step: 782/463, loss: 0.010821414180099964 2023-01-24 03:41:52.634075: step: 784/463, loss: 0.008865704759955406 2023-01-24 03:41:53.211215: step: 786/463, loss: 0.07396086305379868 2023-01-24 03:41:53.859544: step: 788/463, loss: 0.42404547333717346 2023-01-24 03:41:54.458630: step: 790/463, loss: 0.011348032392561436 2023-01-24 03:41:55.082402: step: 792/463, loss: 0.009671107865869999 2023-01-24 03:41:55.663844: step: 794/463, loss: 0.03527585417032242 2023-01-24 03:41:56.314202: step: 796/463, loss: 0.00681795971468091 2023-01-24 03:41:56.958025: step: 798/463, loss: 0.06025619059801102 2023-01-24 03:41:57.566728: step: 800/463, loss: 0.000343977939337492 2023-01-24 03:41:58.175239: step: 802/463, loss: 0.015294400975108147 2023-01-24 03:41:58.772688: step: 804/463, loss: 0.03596680611371994 2023-01-24 03:41:59.341699: step: 806/463, loss: 0.0005599417490884662 2023-01-24 03:41:59.963843: step: 808/463, loss: 0.011023275554180145 2023-01-24 03:42:00.565767: step: 810/463, loss: 0.002478193026036024 2023-01-24 03:42:01.096728: step: 812/463, loss: 0.003548489883542061 2023-01-24 03:42:01.687501: step: 814/463, loss: 0.02921109087765217 2023-01-24 03:42:02.283513: step: 816/463, loss: 0.04729032889008522 2023-01-24 03:42:02.918707: step: 818/463, loss: 0.07469239830970764 2023-01-24 03:42:03.540736: step: 820/463, loss: 0.10500919818878174 2023-01-24 03:42:04.077159: step: 822/463, loss: 0.0032622546423226595 2023-01-24 03:42:04.694584: step: 824/463, loss: 0.0298160370439291 2023-01-24 03:42:05.388013: step: 826/463, loss: 0.027722541242837906 2023-01-24 03:42:06.003654: step: 828/463, loss: 0.035371653735637665 2023-01-24 03:42:06.554576: step: 830/463, loss: 0.04024410992860794 2023-01-24 03:42:07.226629: step: 832/463, loss: 0.00798481609672308 2023-01-24 03:42:07.821115: step: 834/463, loss: 1.5167996934906114e-05 2023-01-24 03:42:08.400907: step: 836/463, loss: 0.07696697860956192 2023-01-24 03:42:09.031496: step: 838/463, loss: 0.26267147064208984 2023-01-24 03:42:09.664388: step: 840/463, loss: 0.012133552692830563 2023-01-24 03:42:10.287432: step: 842/463, loss: 0.0115380072966218 2023-01-24 03:42:10.879996: step: 844/463, loss: 0.004663337953388691 2023-01-24 03:42:11.469623: step: 846/463, loss: 0.04373577982187271 2023-01-24 03:42:12.079263: step: 848/463, loss: 0.009380367584526539 2023-01-24 03:42:12.678069: step: 850/463, loss: 0.002140294061973691 2023-01-24 03:42:13.299750: step: 852/463, loss: 0.12312598526477814 2023-01-24 03:42:13.921533: step: 854/463, loss: 0.003718203166499734 2023-01-24 03:42:14.483209: step: 856/463, loss: 0.020757125690579414 2023-01-24 03:42:15.094728: step: 858/463, loss: 0.039247479289770126 2023-01-24 03:42:15.719433: step: 860/463, loss: 0.03997030481696129 2023-01-24 03:42:16.327726: step: 862/463, loss: 0.007333823945373297 2023-01-24 03:42:16.967375: step: 864/463, loss: 0.0615944042801857 2023-01-24 03:42:17.666187: step: 866/463, loss: 0.021436268463730812 2023-01-24 03:42:18.267256: step: 868/463, loss: 0.004637757781893015 2023-01-24 03:42:18.900146: step: 870/463, loss: 0.0484282486140728 2023-01-24 03:42:19.528431: step: 872/463, loss: 0.006482153199613094 2023-01-24 03:42:20.150303: step: 874/463, loss: 0.0072102732956409454 2023-01-24 03:42:20.747305: step: 876/463, loss: 0.03346136957406998 2023-01-24 03:42:21.364971: step: 878/463, loss: 0.20229314267635345 2023-01-24 03:42:21.960248: step: 880/463, loss: 0.04593103379011154 2023-01-24 03:42:22.577386: step: 882/463, loss: 0.053609780967235565 2023-01-24 03:42:23.104959: step: 884/463, loss: 0.002430025255307555 2023-01-24 03:42:23.699202: step: 886/463, loss: 0.005082909949123859 2023-01-24 03:42:24.295995: step: 888/463, loss: 0.005060563329607248 2023-01-24 03:42:24.923317: step: 890/463, loss: 0.04339815303683281 2023-01-24 03:42:25.528711: step: 892/463, loss: 0.04856053367257118 2023-01-24 03:42:26.079856: step: 894/463, loss: 0.0014748625690117478 2023-01-24 03:42:26.699045: step: 896/463, loss: 0.030917707830667496 2023-01-24 03:42:27.325040: step: 898/463, loss: 0.02743251994252205 2023-01-24 03:42:27.966237: step: 900/463, loss: 0.02999575063586235 2023-01-24 03:42:28.562011: step: 902/463, loss: 0.059717290103435516 2023-01-24 03:42:29.200670: step: 904/463, loss: 0.020601164549589157 2023-01-24 03:42:29.807890: step: 906/463, loss: 0.007337001617997885 2023-01-24 03:42:30.367951: step: 908/463, loss: 0.03473038598895073 2023-01-24 03:42:30.949501: step: 910/463, loss: 0.0038016666658222675 2023-01-24 03:42:31.537249: step: 912/463, loss: 0.07348848134279251 2023-01-24 03:42:32.126570: step: 914/463, loss: 0.036427028477191925 2023-01-24 03:42:32.787254: step: 916/463, loss: 0.014665556140244007 2023-01-24 03:42:33.387779: step: 918/463, loss: 0.02003687247633934 2023-01-24 03:42:33.988097: step: 920/463, loss: 0.014129444025456905 2023-01-24 03:42:34.622259: step: 922/463, loss: 0.0910794734954834 2023-01-24 03:42:35.306571: step: 924/463, loss: 0.03989724442362785 2023-01-24 03:42:35.903124: step: 926/463, loss: 0.006505344994366169 ================================================== Loss: 0.057 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31577174409089476, 'r': 0.31996605568223496, 'f1': 0.3178550637974323}, 'combined': 0.23420899437705536, 'epoch': 28} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.35560448098547776, 'r': 0.3230229185915204, 'f1': 0.3385315635170392}, 'combined': 0.2381629090069623, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31733630952380953, 'r': 0.3251643399295202, 'f1': 0.32120263756861694}, 'combined': 0.2366756276821388, 'epoch': 28} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35305992660013263, 'r': 0.3182164578614296, 'f1': 0.334733894580925}, 'combined': 0.23766106515245675, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32655303053846607, 'r': 0.32221551400379955, 'f1': 0.3243697724546368}, 'combined': 0.23900930601920603, 'epoch': 28} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3689265749411292, 'r': 0.31254041719903525, 'f1': 0.3384007354069933}, 'combined': 0.2402645221389652, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24404761904761904, 'r': 0.29285714285714287, 'f1': 0.26623376623376627}, 'combined': 0.1774891774891775, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25862068965517243, 'r': 0.32608695652173914, 'f1': 0.2884615384615385}, 'combined': 0.14423076923076925, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19129554655870445, 'r': 0.08575317604355717, 'f1': 0.11842105263157893}, 'combined': 0.07894736842105261, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:45:07.949023: step: 2/463, loss: 0.004380635917186737 2023-01-24 03:45:08.535948: step: 4/463, loss: 0.0070947688072919846 2023-01-24 03:45:09.107850: step: 6/463, loss: 0.0009509260416962206 2023-01-24 03:45:09.739452: step: 8/463, loss: 0.01029531005769968 2023-01-24 03:45:10.337300: step: 10/463, loss: 0.01598275825381279 2023-01-24 03:45:10.918947: step: 12/463, loss: 0.0037804781459271908 2023-01-24 03:45:11.547094: step: 14/463, loss: 0.07563035935163498 2023-01-24 03:45:12.164853: step: 16/463, loss: 0.041709817945957184 2023-01-24 03:45:12.733144: step: 18/463, loss: 0.011170117184519768 2023-01-24 03:45:13.325045: step: 20/463, loss: 0.011230136267840862 2023-01-24 03:45:13.968007: step: 22/463, loss: 0.0015222060028463602 2023-01-24 03:45:14.583616: step: 24/463, loss: 0.016722390428185463 2023-01-24 03:45:15.128859: step: 26/463, loss: 0.010837954469025135 2023-01-24 03:45:15.726700: step: 28/463, loss: 0.008565469644963741 2023-01-24 03:45:16.387989: step: 30/463, loss: 0.00267376028932631 2023-01-24 03:45:17.007130: step: 32/463, loss: 0.03678575158119202 2023-01-24 03:45:17.581363: step: 34/463, loss: 0.0027802889235317707 2023-01-24 03:45:18.139972: step: 36/463, loss: 0.010277082212269306 2023-01-24 03:45:18.722599: step: 38/463, loss: 0.000484473945107311 2023-01-24 03:45:19.322659: step: 40/463, loss: 0.0062171281315386295 2023-01-24 03:45:19.951014: step: 42/463, loss: 0.04894643649458885 2023-01-24 03:45:20.534200: step: 44/463, loss: 0.0010040757479146123 2023-01-24 03:45:21.112072: step: 46/463, loss: 0.03525928780436516 2023-01-24 03:45:21.705042: step: 48/463, loss: 0.0003600752097554505 2023-01-24 03:45:22.330323: step: 50/463, loss: 0.018433842808008194 2023-01-24 03:45:22.928298: step: 52/463, loss: 0.09335824847221375 2023-01-24 03:45:23.492925: step: 54/463, loss: 0.10398725420236588 2023-01-24 03:45:24.031230: step: 56/463, loss: 0.0007458103937096894 2023-01-24 03:45:24.632131: step: 58/463, loss: 0.16252681612968445 2023-01-24 03:45:25.382522: step: 60/463, loss: 0.03060820885002613 2023-01-24 03:45:25.979585: step: 62/463, loss: 0.03827627748250961 2023-01-24 03:45:26.582062: step: 64/463, loss: 0.08785887062549591 2023-01-24 03:45:27.254007: step: 66/463, loss: 0.7580986022949219 2023-01-24 03:45:27.899773: step: 68/463, loss: 0.008053522557020187 2023-01-24 03:45:28.474390: step: 70/463, loss: 0.007859273813664913 2023-01-24 03:45:29.093839: step: 72/463, loss: 0.03533100709319115 2023-01-24 03:45:29.732927: step: 74/463, loss: 0.010689815506339073 2023-01-24 03:45:30.286639: step: 76/463, loss: 0.19603368639945984 2023-01-24 03:45:30.877029: step: 78/463, loss: 0.020246591418981552 2023-01-24 03:45:31.483746: step: 80/463, loss: 0.02563784271478653 2023-01-24 03:45:32.078410: step: 82/463, loss: 0.001660398207604885 2023-01-24 03:45:32.696605: step: 84/463, loss: 0.02008209563791752 2023-01-24 03:45:33.325440: step: 86/463, loss: 0.190560981631279 2023-01-24 03:45:33.930568: step: 88/463, loss: 0.01352010853588581 2023-01-24 03:45:34.570254: step: 90/463, loss: 0.07073134928941727 2023-01-24 03:45:35.093607: step: 92/463, loss: 0.0022149388678371906 2023-01-24 03:45:35.643469: step: 94/463, loss: 0.0018431834178045392 2023-01-24 03:45:36.230158: step: 96/463, loss: 0.029913771897554398 2023-01-24 03:45:36.842911: step: 98/463, loss: 0.0111335264518857 2023-01-24 03:45:37.445654: step: 100/463, loss: 0.03270331770181656 2023-01-24 03:45:38.132379: step: 102/463, loss: 0.010210965760052204 2023-01-24 03:45:38.797224: step: 104/463, loss: 0.07748111337423325 2023-01-24 03:45:39.369434: step: 106/463, loss: 0.00037291672197170556 2023-01-24 03:45:39.984908: step: 108/463, loss: 0.052861474454402924 2023-01-24 03:45:40.601515: step: 110/463, loss: 0.03789301961660385 2023-01-24 03:45:41.238287: step: 112/463, loss: 0.05794849991798401 2023-01-24 03:45:41.835253: step: 114/463, loss: 0.05645669251680374 2023-01-24 03:45:42.437794: step: 116/463, loss: 0.0062922462821006775 2023-01-24 03:45:43.006399: step: 118/463, loss: 0.016136767342686653 2023-01-24 03:45:43.587474: step: 120/463, loss: 0.0016100145876407623 2023-01-24 03:45:44.107466: step: 122/463, loss: 0.008434058167040348 2023-01-24 03:45:44.677937: step: 124/463, loss: 0.003835408017039299 2023-01-24 03:45:45.268972: step: 126/463, loss: 0.003269659820944071 2023-01-24 03:45:45.863084: step: 128/463, loss: 0.0037822891026735306 2023-01-24 03:45:46.518781: step: 130/463, loss: 1.2035846710205078 2023-01-24 03:45:47.062101: step: 132/463, loss: 0.08562230318784714 2023-01-24 03:45:47.626877: step: 134/463, loss: 0.0024702753871679306 2023-01-24 03:45:48.276819: step: 136/463, loss: 0.005450582597404718 2023-01-24 03:45:48.908851: step: 138/463, loss: 0.023134222254157066 2023-01-24 03:45:49.571280: step: 140/463, loss: 0.05530443787574768 2023-01-24 03:45:50.189126: step: 142/463, loss: 0.006459028460085392 2023-01-24 03:45:50.752556: step: 144/463, loss: 0.0026149468030780554 2023-01-24 03:45:51.388422: step: 146/463, loss: 0.04686799645423889 2023-01-24 03:45:52.049075: step: 148/463, loss: 0.008257562294602394 2023-01-24 03:45:52.652306: step: 150/463, loss: 0.000860980711877346 2023-01-24 03:45:53.262097: step: 152/463, loss: 0.027413036674261093 2023-01-24 03:45:53.813990: step: 154/463, loss: 0.013699023984372616 2023-01-24 03:45:54.469811: step: 156/463, loss: 0.1479162871837616 2023-01-24 03:45:55.111805: step: 158/463, loss: 0.003801989369094372 2023-01-24 03:45:55.752597: step: 160/463, loss: 0.003425316885113716 2023-01-24 03:45:56.377209: step: 162/463, loss: 0.013186258263885975 2023-01-24 03:45:56.977540: step: 164/463, loss: 0.01934143714606762 2023-01-24 03:45:57.587148: step: 166/463, loss: 0.013339914381504059 2023-01-24 03:45:58.168413: step: 168/463, loss: 0.01656736247241497 2023-01-24 03:45:58.845067: step: 170/463, loss: 0.015703503042459488 2023-01-24 03:45:59.485649: step: 172/463, loss: 0.0040563554503023624 2023-01-24 03:46:00.132000: step: 174/463, loss: 0.01879901811480522 2023-01-24 03:46:00.711374: step: 176/463, loss: 0.006600400432944298 2023-01-24 03:46:01.347144: step: 178/463, loss: 0.03067195788025856 2023-01-24 03:46:01.969920: step: 180/463, loss: 0.024428708478808403 2023-01-24 03:46:02.592761: step: 182/463, loss: 0.009383450262248516 2023-01-24 03:46:03.191039: step: 184/463, loss: 0.06784610450267792 2023-01-24 03:46:03.777153: step: 186/463, loss: 0.00339857442304492 2023-01-24 03:46:04.380177: step: 188/463, loss: 0.00434576440602541 2023-01-24 03:46:04.921780: step: 190/463, loss: 0.06592497229576111 2023-01-24 03:46:05.633046: step: 192/463, loss: 0.014984349720180035 2023-01-24 03:46:06.248411: step: 194/463, loss: 0.001984932227060199 2023-01-24 03:46:06.870511: step: 196/463, loss: 0.08807288110256195 2023-01-24 03:46:07.464812: step: 198/463, loss: 0.017598956823349 2023-01-24 03:46:07.974005: step: 200/463, loss: 0.005741733592003584 2023-01-24 03:46:08.604671: step: 202/463, loss: 0.10296190530061722 2023-01-24 03:46:09.184137: step: 204/463, loss: 0.023693719878792763 2023-01-24 03:46:09.779188: step: 206/463, loss: 0.04344288632273674 2023-01-24 03:46:10.425652: step: 208/463, loss: 0.008584708906710148 2023-01-24 03:46:11.054200: step: 210/463, loss: 0.02717828005552292 2023-01-24 03:46:11.603851: step: 212/463, loss: 0.012712454423308372 2023-01-24 03:46:12.225894: step: 214/463, loss: 0.011692480184137821 2023-01-24 03:46:12.836911: step: 216/463, loss: 0.02314925566315651 2023-01-24 03:46:13.442900: step: 218/463, loss: 0.014478124678134918 2023-01-24 03:46:14.098888: step: 220/463, loss: 0.023174090310931206 2023-01-24 03:46:14.669659: step: 222/463, loss: 0.030549174174666405 2023-01-24 03:46:15.249924: step: 224/463, loss: 0.06488758325576782 2023-01-24 03:46:15.894657: step: 226/463, loss: 0.005796562414616346 2023-01-24 03:46:16.477393: step: 228/463, loss: 0.019609447568655014 2023-01-24 03:46:17.096737: step: 230/463, loss: 0.024187076836824417 2023-01-24 03:46:17.736458: step: 232/463, loss: 0.0011143870651721954 2023-01-24 03:46:18.303214: step: 234/463, loss: 0.0287034809589386 2023-01-24 03:46:18.886830: step: 236/463, loss: 0.00680895522236824 2023-01-24 03:46:19.489867: step: 238/463, loss: 0.022547168657183647 2023-01-24 03:46:20.058329: step: 240/463, loss: 0.028393058106303215 2023-01-24 03:46:20.587298: step: 242/463, loss: 0.0033917813561856747 2023-01-24 03:46:21.165022: step: 244/463, loss: 0.028707144781947136 2023-01-24 03:46:21.778597: step: 246/463, loss: 0.015977637842297554 2023-01-24 03:46:22.363731: step: 248/463, loss: 0.10701405256986618 2023-01-24 03:46:23.011174: step: 250/463, loss: 0.06411196291446686 2023-01-24 03:46:23.561475: step: 252/463, loss: 0.02231753244996071 2023-01-24 03:46:24.205692: step: 254/463, loss: 0.00871640257537365 2023-01-24 03:46:24.837634: step: 256/463, loss: 0.0007997779175639153 2023-01-24 03:46:25.421110: step: 258/463, loss: 0.02113805152475834 2023-01-24 03:46:26.069910: step: 260/463, loss: 0.010754906572401524 2023-01-24 03:46:26.691108: step: 262/463, loss: 0.027655132114887238 2023-01-24 03:46:27.289875: step: 264/463, loss: 0.009080632589757442 2023-01-24 03:46:27.891899: step: 266/463, loss: 0.0023966992739588022 2023-01-24 03:46:28.508185: step: 268/463, loss: 0.0013873097486793995 2023-01-24 03:46:29.072247: step: 270/463, loss: 0.0005182753084227443 2023-01-24 03:46:29.703934: step: 272/463, loss: 0.0059996736235916615 2023-01-24 03:46:30.275542: step: 274/463, loss: 0.028909733518958092 2023-01-24 03:46:30.862926: step: 276/463, loss: 0.005479123443365097 2023-01-24 03:46:31.492234: step: 278/463, loss: 0.021239696070551872 2023-01-24 03:46:32.091103: step: 280/463, loss: 0.02595089003443718 2023-01-24 03:46:32.671706: step: 282/463, loss: 0.02252337709069252 2023-01-24 03:46:33.248614: step: 284/463, loss: 0.007514844182878733 2023-01-24 03:46:33.984388: step: 286/463, loss: 0.03357216343283653 2023-01-24 03:46:34.596249: step: 288/463, loss: 0.002075939904898405 2023-01-24 03:46:35.229552: step: 290/463, loss: 0.011798621155321598 2023-01-24 03:46:35.838924: step: 292/463, loss: 0.004116870928555727 2023-01-24 03:46:36.437054: step: 294/463, loss: 0.0023125680163502693 2023-01-24 03:46:37.027190: step: 296/463, loss: 0.06705563515424728 2023-01-24 03:46:37.671545: step: 298/463, loss: 0.13823872804641724 2023-01-24 03:46:38.296019: step: 300/463, loss: 0.0015346243744716048 2023-01-24 03:46:38.972874: step: 302/463, loss: 0.0012140690814703703 2023-01-24 03:46:39.610230: step: 304/463, loss: 0.0028591908048838377 2023-01-24 03:46:40.304415: step: 306/463, loss: 0.002871573669835925 2023-01-24 03:46:40.877426: step: 308/463, loss: 0.005184788256883621 2023-01-24 03:46:41.514194: step: 310/463, loss: 0.0005973773659206927 2023-01-24 03:46:42.117740: step: 312/463, loss: 0.05233674868941307 2023-01-24 03:46:42.773071: step: 314/463, loss: 0.0024328839499503374 2023-01-24 03:46:43.418914: step: 316/463, loss: 0.0043953522108495235 2023-01-24 03:46:44.010567: step: 318/463, loss: 0.04687422141432762 2023-01-24 03:46:44.625690: step: 320/463, loss: 0.051389653235673904 2023-01-24 03:46:45.255447: step: 322/463, loss: 0.016346603631973267 2023-01-24 03:46:45.862223: step: 324/463, loss: 0.017772749066352844 2023-01-24 03:46:46.460467: step: 326/463, loss: 0.04208219423890114 2023-01-24 03:46:47.051279: step: 328/463, loss: 0.011833593249320984 2023-01-24 03:46:47.682124: step: 330/463, loss: 0.026876062154769897 2023-01-24 03:46:48.365619: step: 332/463, loss: 0.0017764260992407799 2023-01-24 03:46:48.992103: step: 334/463, loss: 0.020989537239074707 2023-01-24 03:46:49.642265: step: 336/463, loss: 0.0009535251883789897 2023-01-24 03:46:50.291147: step: 338/463, loss: 0.0013991156592965126 2023-01-24 03:46:50.870495: step: 340/463, loss: 0.06606505811214447 2023-01-24 03:46:51.490045: step: 342/463, loss: 0.03693550452589989 2023-01-24 03:46:52.089656: step: 344/463, loss: 0.03895249217748642 2023-01-24 03:46:52.687595: step: 346/463, loss: 0.009876221418380737 2023-01-24 03:46:53.267097: step: 348/463, loss: 0.06646093726158142 2023-01-24 03:46:53.873954: step: 350/463, loss: 0.11452026665210724 2023-01-24 03:46:54.492441: step: 352/463, loss: 0.011863459832966328 2023-01-24 03:46:55.128547: step: 354/463, loss: 0.03681686148047447 2023-01-24 03:46:55.734168: step: 356/463, loss: 0.013312109746038914 2023-01-24 03:46:56.377849: step: 358/463, loss: 0.06067591905593872 2023-01-24 03:46:56.932529: step: 360/463, loss: 0.014860141091048717 2023-01-24 03:46:57.544756: step: 362/463, loss: 0.04609623923897743 2023-01-24 03:46:58.087109: step: 364/463, loss: 0.036772437393665314 2023-01-24 03:46:58.662964: step: 366/463, loss: 0.03516719117760658 2023-01-24 03:46:59.320770: step: 368/463, loss: 0.030207833275198936 2023-01-24 03:46:59.914892: step: 370/463, loss: 0.001773285330273211 2023-01-24 03:47:00.580703: step: 372/463, loss: 0.004824051167815924 2023-01-24 03:47:01.171057: step: 374/463, loss: 0.0038818514440208673 2023-01-24 03:47:01.767557: step: 376/463, loss: 0.012768728658556938 2023-01-24 03:47:02.320777: step: 378/463, loss: 0.1762952208518982 2023-01-24 03:47:02.993863: step: 380/463, loss: 0.050335370004177094 2023-01-24 03:47:03.618387: step: 382/463, loss: 0.027813494205474854 2023-01-24 03:47:04.228140: step: 384/463, loss: 0.002753387438133359 2023-01-24 03:47:04.840563: step: 386/463, loss: 0.01771734282374382 2023-01-24 03:47:05.426689: step: 388/463, loss: 0.01523977518081665 2023-01-24 03:47:06.073456: step: 390/463, loss: 0.037160713225603104 2023-01-24 03:47:06.685489: step: 392/463, loss: 0.004081880208104849 2023-01-24 03:47:07.326605: step: 394/463, loss: 0.018009141087532043 2023-01-24 03:47:08.004441: step: 396/463, loss: 0.045583952218294144 2023-01-24 03:47:08.601083: step: 398/463, loss: 0.003747469512745738 2023-01-24 03:47:09.218589: step: 400/463, loss: 0.024374959990382195 2023-01-24 03:47:09.833663: step: 402/463, loss: 0.174274742603302 2023-01-24 03:47:10.423212: step: 404/463, loss: 0.41446101665496826 2023-01-24 03:47:11.020022: step: 406/463, loss: 0.038285426795482635 2023-01-24 03:47:11.663605: step: 408/463, loss: 0.11137422919273376 2023-01-24 03:47:12.316617: step: 410/463, loss: 0.024938086047768593 2023-01-24 03:47:12.913742: step: 412/463, loss: 0.011550969444215298 2023-01-24 03:47:13.583212: step: 414/463, loss: 0.19035404920578003 2023-01-24 03:47:14.226485: step: 416/463, loss: 0.037760473787784576 2023-01-24 03:47:14.918822: step: 418/463, loss: 0.006483147852122784 2023-01-24 03:47:15.546361: step: 420/463, loss: 0.1274777054786682 2023-01-24 03:47:16.227006: step: 422/463, loss: 0.0077507770620286465 2023-01-24 03:47:16.830729: step: 424/463, loss: 0.003077563364058733 2023-01-24 03:47:17.458323: step: 426/463, loss: 0.018887178972363472 2023-01-24 03:47:18.045217: step: 428/463, loss: 0.9808109998703003 2023-01-24 03:47:18.646485: step: 430/463, loss: 0.00970432534813881 2023-01-24 03:47:19.278530: step: 432/463, loss: 0.06310426443815231 2023-01-24 03:47:19.830456: step: 434/463, loss: 0.004448199179023504 2023-01-24 03:47:20.455157: step: 436/463, loss: 0.01336941123008728 2023-01-24 03:47:21.041467: step: 438/463, loss: 0.010471925139427185 2023-01-24 03:47:21.627402: step: 440/463, loss: 0.04117922484874725 2023-01-24 03:47:22.265130: step: 442/463, loss: 0.009846989996731281 2023-01-24 03:47:22.938452: step: 444/463, loss: 0.0002828371652867645 2023-01-24 03:47:23.510320: step: 446/463, loss: 0.019245265051722527 2023-01-24 03:47:24.188833: step: 448/463, loss: 0.0018374890787526965 2023-01-24 03:47:24.753654: step: 450/463, loss: 0.005506450310349464 2023-01-24 03:47:25.497529: step: 452/463, loss: 0.012649435549974442 2023-01-24 03:47:26.102466: step: 454/463, loss: 2.7570627935347147e-05 2023-01-24 03:47:26.672626: step: 456/463, loss: 0.022142071276903152 2023-01-24 03:47:27.314868: step: 458/463, loss: 0.03736577555537224 2023-01-24 03:47:27.930461: step: 460/463, loss: 0.0029905557166785 2023-01-24 03:47:28.537752: step: 462/463, loss: 0.05215125530958176 2023-01-24 03:47:29.098602: step: 464/463, loss: 0.00645318953320384 2023-01-24 03:47:29.666778: step: 466/463, loss: 0.023902246728539467 2023-01-24 03:47:30.327585: step: 468/463, loss: 0.025015046820044518 2023-01-24 03:47:30.947384: step: 470/463, loss: 0.010080143809318542 2023-01-24 03:47:31.497867: step: 472/463, loss: 0.05360304191708565 2023-01-24 03:47:32.166837: step: 474/463, loss: 0.01861857809126377 2023-01-24 03:47:32.734095: step: 476/463, loss: 0.003182116197422147 2023-01-24 03:47:33.332654: step: 478/463, loss: 0.01166730560362339 2023-01-24 03:47:33.925117: step: 480/463, loss: 0.03854798898100853 2023-01-24 03:47:34.534838: step: 482/463, loss: 0.007883825339376926 2023-01-24 03:47:35.176334: step: 484/463, loss: 0.0169207863509655 2023-01-24 03:47:35.715794: step: 486/463, loss: 0.00558114517480135 2023-01-24 03:47:36.316754: step: 488/463, loss: 0.006999274715781212 2023-01-24 03:47:36.960839: step: 490/463, loss: 0.01548024546355009 2023-01-24 03:47:37.565216: step: 492/463, loss: 0.022451763972640038 2023-01-24 03:47:38.167540: step: 494/463, loss: 0.1336677372455597 2023-01-24 03:47:38.722792: step: 496/463, loss: 0.016053596511483192 2023-01-24 03:47:39.293728: step: 498/463, loss: 0.011148281395435333 2023-01-24 03:47:39.892576: step: 500/463, loss: 0.005491623654961586 2023-01-24 03:47:40.506762: step: 502/463, loss: 0.038604266941547394 2023-01-24 03:47:41.100299: step: 504/463, loss: 0.02555009163916111 2023-01-24 03:47:41.713663: step: 506/463, loss: 0.05609583109617233 2023-01-24 03:47:42.365743: step: 508/463, loss: 0.027381815016269684 2023-01-24 03:47:42.911253: step: 510/463, loss: 0.007321010809391737 2023-01-24 03:47:43.532360: step: 512/463, loss: 0.012267068959772587 2023-01-24 03:47:44.143990: step: 514/463, loss: 0.010858491063117981 2023-01-24 03:47:44.781163: step: 516/463, loss: 0.0332740843296051 2023-01-24 03:47:45.398000: step: 518/463, loss: 0.02040177956223488 2023-01-24 03:47:45.969795: step: 520/463, loss: 0.004633566364645958 2023-01-24 03:47:46.603811: step: 522/463, loss: 0.047325070947408676 2023-01-24 03:47:47.224200: step: 524/463, loss: 0.04332440719008446 2023-01-24 03:47:47.840741: step: 526/463, loss: 0.0020474002230912447 2023-01-24 03:47:48.424372: step: 528/463, loss: 0.009091081097722054 2023-01-24 03:47:49.070081: step: 530/463, loss: 0.9428552389144897 2023-01-24 03:47:49.690794: step: 532/463, loss: 0.03307022526860237 2023-01-24 03:47:50.366983: step: 534/463, loss: 0.018062535673379898 2023-01-24 03:47:50.941898: step: 536/463, loss: 0.013953792862594128 2023-01-24 03:47:51.645008: step: 538/463, loss: 0.04186782240867615 2023-01-24 03:47:52.270830: step: 540/463, loss: 0.02663356252014637 2023-01-24 03:47:52.864736: step: 542/463, loss: 0.04372838884592056 2023-01-24 03:47:53.445221: step: 544/463, loss: 0.050833266228437424 2023-01-24 03:47:54.023887: step: 546/463, loss: 0.0713435560464859 2023-01-24 03:47:54.663193: step: 548/463, loss: 0.015953855589032173 2023-01-24 03:47:55.285880: step: 550/463, loss: 0.018654095008969307 2023-01-24 03:47:55.858950: step: 552/463, loss: 0.0008604592876508832 2023-01-24 03:47:56.446321: step: 554/463, loss: 0.008143816143274307 2023-01-24 03:47:57.081511: step: 556/463, loss: 0.008201424963772297 2023-01-24 03:47:57.693169: step: 558/463, loss: 0.012904100120067596 2023-01-24 03:47:58.302667: step: 560/463, loss: 0.0026776702143251896 2023-01-24 03:47:58.987835: step: 562/463, loss: 0.007896258495748043 2023-01-24 03:47:59.628623: step: 564/463, loss: 0.054269932210445404 2023-01-24 03:48:00.225164: step: 566/463, loss: 0.0296170637011528 2023-01-24 03:48:00.775131: step: 568/463, loss: 3.2243096828460693 2023-01-24 03:48:01.449437: step: 570/463, loss: 0.004563071299344301 2023-01-24 03:48:02.035181: step: 572/463, loss: 0.01740649901330471 2023-01-24 03:48:02.666879: step: 574/463, loss: 0.1640261560678482 2023-01-24 03:48:03.300355: step: 576/463, loss: 0.004023135639727116 2023-01-24 03:48:03.951901: step: 578/463, loss: 0.009760151617228985 2023-01-24 03:48:04.554809: step: 580/463, loss: 0.04655862972140312 2023-01-24 03:48:05.148548: step: 582/463, loss: 0.06478311866521835 2023-01-24 03:48:05.732486: step: 584/463, loss: 0.022139519453048706 2023-01-24 03:48:06.382297: step: 586/463, loss: 0.22393105924129486 2023-01-24 03:48:06.975861: step: 588/463, loss: 0.026952018961310387 2023-01-24 03:48:07.632352: step: 590/463, loss: 0.24487528204917908 2023-01-24 03:48:08.230123: step: 592/463, loss: 0.027951108291745186 2023-01-24 03:48:08.810028: step: 594/463, loss: 0.02111688256263733 2023-01-24 03:48:09.382482: step: 596/463, loss: 0.004064800217747688 2023-01-24 03:48:10.043580: step: 598/463, loss: 0.01912226527929306 2023-01-24 03:48:10.638729: step: 600/463, loss: 0.007461976259946823 2023-01-24 03:48:11.244266: step: 602/463, loss: 0.01676899939775467 2023-01-24 03:48:11.756765: step: 604/463, loss: 0.012414227239787579 2023-01-24 03:48:12.348936: step: 606/463, loss: 0.0318518728017807 2023-01-24 03:48:12.974587: step: 608/463, loss: 0.026514628902077675 2023-01-24 03:48:13.560787: step: 610/463, loss: 0.05796220153570175 2023-01-24 03:48:14.139426: step: 612/463, loss: 0.0027333872858434916 2023-01-24 03:48:14.734656: step: 614/463, loss: 0.016810808330774307 2023-01-24 03:48:15.290562: step: 616/463, loss: 0.008629131130874157 2023-01-24 03:48:15.898397: step: 618/463, loss: 0.04084446281194687 2023-01-24 03:48:16.555861: step: 620/463, loss: 0.07355980575084686 2023-01-24 03:48:17.180730: step: 622/463, loss: 0.03867322951555252 2023-01-24 03:48:17.767774: step: 624/463, loss: 0.015890194103121758 2023-01-24 03:48:18.432409: step: 626/463, loss: 0.010094290599226952 2023-01-24 03:48:19.032684: step: 628/463, loss: 0.006953805685043335 2023-01-24 03:48:19.656689: step: 630/463, loss: 0.03119542822241783 2023-01-24 03:48:20.221760: step: 632/463, loss: 0.0003766108420677483 2023-01-24 03:48:20.821692: step: 634/463, loss: 0.005900821182876825 2023-01-24 03:48:21.378934: step: 636/463, loss: 0.02261260896921158 2023-01-24 03:48:21.978564: step: 638/463, loss: 0.017702914774417877 2023-01-24 03:48:22.563621: step: 640/463, loss: 0.0018331267638131976 2023-01-24 03:48:23.191669: step: 642/463, loss: 0.03502148762345314 2023-01-24 03:48:23.830653: step: 644/463, loss: 0.0816122442483902 2023-01-24 03:48:24.447383: step: 646/463, loss: 0.0035813990980386734 2023-01-24 03:48:25.057631: step: 648/463, loss: 0.1350533813238144 2023-01-24 03:48:25.673373: step: 650/463, loss: 0.07879451662302017 2023-01-24 03:48:26.352920: step: 652/463, loss: 0.009687604382634163 2023-01-24 03:48:26.993193: step: 654/463, loss: 0.007044443394988775 2023-01-24 03:48:27.560730: step: 656/463, loss: 0.0014136114623397589 2023-01-24 03:48:28.203295: step: 658/463, loss: 0.03274233266711235 2023-01-24 03:48:28.802447: step: 660/463, loss: 0.0008556006359867752 2023-01-24 03:48:29.421838: step: 662/463, loss: 0.04687538370490074 2023-01-24 03:48:29.982857: step: 664/463, loss: 0.0003732472541742027 2023-01-24 03:48:30.559860: step: 666/463, loss: 0.00988706573843956 2023-01-24 03:48:31.162645: step: 668/463, loss: 0.047460928559303284 2023-01-24 03:48:31.760560: step: 670/463, loss: 0.0043182033114135265 2023-01-24 03:48:32.398666: step: 672/463, loss: 0.004817423410713673 2023-01-24 03:48:32.972834: step: 674/463, loss: 0.01478367019444704 2023-01-24 03:48:33.594210: step: 676/463, loss: 0.014214013703167439 2023-01-24 03:48:34.233542: step: 678/463, loss: 0.03612767532467842 2023-01-24 03:48:34.875620: step: 680/463, loss: 0.002364536514505744 2023-01-24 03:48:35.517680: step: 682/463, loss: 0.00754452683031559 2023-01-24 03:48:36.121516: step: 684/463, loss: 0.04462805762887001 2023-01-24 03:48:36.785768: step: 686/463, loss: 0.011713942512869835 2023-01-24 03:48:37.377247: step: 688/463, loss: 0.020594924688339233 2023-01-24 03:48:37.980022: step: 690/463, loss: 0.03847086429595947 2023-01-24 03:48:38.588224: step: 692/463, loss: 0.016664152964949608 2023-01-24 03:48:39.161984: step: 694/463, loss: 0.011178790591657162 2023-01-24 03:48:39.807457: step: 696/463, loss: 0.016343101859092712 2023-01-24 03:48:40.400590: step: 698/463, loss: 0.016421450302004814 2023-01-24 03:48:41.047337: step: 700/463, loss: 0.0098012899979949 2023-01-24 03:48:41.724777: step: 702/463, loss: 0.008619521744549274 2023-01-24 03:48:42.313364: step: 704/463, loss: 0.010770173743367195 2023-01-24 03:48:42.882786: step: 706/463, loss: 0.023617876693606377 2023-01-24 03:48:43.469145: step: 708/463, loss: 0.005524185486137867 2023-01-24 03:48:44.113312: step: 710/463, loss: 0.19701699912548065 2023-01-24 03:48:44.639555: step: 712/463, loss: 0.0043031577952206135 2023-01-24 03:48:45.272998: step: 714/463, loss: 0.0025023603811860085 2023-01-24 03:48:45.881924: step: 716/463, loss: 0.06475044786930084 2023-01-24 03:48:46.598741: step: 718/463, loss: 0.003465653397142887 2023-01-24 03:48:47.251794: step: 720/463, loss: 0.01827049069106579 2023-01-24 03:48:47.878849: step: 722/463, loss: 0.005189152434468269 2023-01-24 03:48:48.413488: step: 724/463, loss: 0.006974215619266033 2023-01-24 03:48:49.050111: step: 726/463, loss: 0.09348303824663162 2023-01-24 03:48:49.687809: step: 728/463, loss: 0.047746215015649796 2023-01-24 03:48:50.287936: step: 730/463, loss: 0.031120633706450462 2023-01-24 03:48:50.914668: step: 732/463, loss: 0.004572523292154074 2023-01-24 03:48:51.517237: step: 734/463, loss: 0.0016970207216218114 2023-01-24 03:48:52.086980: step: 736/463, loss: 0.031358614563941956 2023-01-24 03:48:52.680228: step: 738/463, loss: 0.03257216885685921 2023-01-24 03:48:53.240801: step: 740/463, loss: 0.013148258440196514 2023-01-24 03:48:53.849102: step: 742/463, loss: 0.007374712731689215 2023-01-24 03:48:54.475371: step: 744/463, loss: 1.1168992519378662 2023-01-24 03:48:55.071193: step: 746/463, loss: 0.000548319541849196 2023-01-24 03:48:55.687114: step: 748/463, loss: 0.0011827029520645738 2023-01-24 03:48:56.241396: step: 750/463, loss: 0.0008641545427963138 2023-01-24 03:48:56.821441: step: 752/463, loss: 0.046011410653591156 2023-01-24 03:48:57.420042: step: 754/463, loss: 0.0014125334564596415 2023-01-24 03:48:58.034347: step: 756/463, loss: 0.3372320532798767 2023-01-24 03:48:58.618226: step: 758/463, loss: 0.005192271899431944 2023-01-24 03:48:59.217283: step: 760/463, loss: 0.010531393811106682 2023-01-24 03:48:59.816687: step: 762/463, loss: 0.019783440977334976 2023-01-24 03:49:00.385723: step: 764/463, loss: 0.0014692284166812897 2023-01-24 03:49:01.018652: step: 766/463, loss: 0.022370390594005585 2023-01-24 03:49:01.702188: step: 768/463, loss: 0.003821658669039607 2023-01-24 03:49:02.275113: step: 770/463, loss: 0.05137111619114876 2023-01-24 03:49:02.849470: step: 772/463, loss: 0.017113439738750458 2023-01-24 03:49:03.445295: step: 774/463, loss: 0.00013005558867007494 2023-01-24 03:49:04.033496: step: 776/463, loss: 0.006770826876163483 2023-01-24 03:49:04.658959: step: 778/463, loss: 0.01557447575032711 2023-01-24 03:49:05.218062: step: 780/463, loss: 0.00828070379793644 2023-01-24 03:49:05.815958: step: 782/463, loss: 0.06586623936891556 2023-01-24 03:49:06.434673: step: 784/463, loss: 0.016940977424383163 2023-01-24 03:49:07.055686: step: 786/463, loss: 0.0001668120821705088 2023-01-24 03:49:07.668033: step: 788/463, loss: 0.03028116375207901 2023-01-24 03:49:08.227624: step: 790/463, loss: 0.000825108087155968 2023-01-24 03:49:08.785422: step: 792/463, loss: 0.08452858775854111 2023-01-24 03:49:09.390149: step: 794/463, loss: 0.004131886176764965 2023-01-24 03:49:10.008431: step: 796/463, loss: 0.03449702635407448 2023-01-24 03:49:10.667374: step: 798/463, loss: 0.015711188316345215 2023-01-24 03:49:11.333326: step: 800/463, loss: 0.014718927443027496 2023-01-24 03:49:11.992783: step: 802/463, loss: 0.03163851797580719 2023-01-24 03:49:12.652128: step: 804/463, loss: 0.016314640641212463 2023-01-24 03:49:13.276713: step: 806/463, loss: 0.0012460040161386132 2023-01-24 03:49:13.855726: step: 808/463, loss: 0.015356335788965225 2023-01-24 03:49:14.486690: step: 810/463, loss: 0.027849426493048668 2023-01-24 03:49:15.135145: step: 812/463, loss: 0.000667195999994874 2023-01-24 03:49:15.760889: step: 814/463, loss: 0.12272325903177261 2023-01-24 03:49:16.365811: step: 816/463, loss: 0.031702302396297455 2023-01-24 03:49:16.940630: step: 818/463, loss: 0.0169863011687994 2023-01-24 03:49:17.526349: step: 820/463, loss: 0.01184864155948162 2023-01-24 03:49:18.187488: step: 822/463, loss: 0.4696779251098633 2023-01-24 03:49:19.007199: step: 824/463, loss: 0.015859538689255714 2023-01-24 03:49:19.735362: step: 826/463, loss: 0.019510632380843163 2023-01-24 03:49:20.328368: step: 828/463, loss: 0.018747366964817047 2023-01-24 03:49:20.877954: step: 830/463, loss: 0.018183868378400803 2023-01-24 03:49:21.453438: step: 832/463, loss: 0.020355088636279106 2023-01-24 03:49:22.017424: step: 834/463, loss: 0.23231194913387299 2023-01-24 03:49:22.643039: step: 836/463, loss: 0.02821563556790352 2023-01-24 03:49:23.169938: step: 838/463, loss: 0.001066009746864438 2023-01-24 03:49:23.808343: step: 840/463, loss: 0.0261029414832592 2023-01-24 03:49:24.331448: step: 842/463, loss: 0.006555048283189535 2023-01-24 03:49:24.929306: step: 844/463, loss: 0.013622031547129154 2023-01-24 03:49:25.522060: step: 846/463, loss: 0.039892490953207016 2023-01-24 03:49:26.164944: step: 848/463, loss: 0.1354444921016693 2023-01-24 03:49:26.744433: step: 850/463, loss: 0.00016802630852907896 2023-01-24 03:49:27.343488: step: 852/463, loss: 0.0024423236027359962 2023-01-24 03:49:27.918688: step: 854/463, loss: 0.00023083611449692398 2023-01-24 03:49:28.548465: step: 856/463, loss: 0.04842939227819443 2023-01-24 03:49:29.208293: step: 858/463, loss: 0.008157627657055855 2023-01-24 03:49:29.856659: step: 860/463, loss: 0.03697714954614639 2023-01-24 03:49:30.515015: step: 862/463, loss: 0.025667544454336166 2023-01-24 03:49:31.147002: step: 864/463, loss: 0.0037541664205491543 2023-01-24 03:49:31.751056: step: 866/463, loss: 0.039844758808612823 2023-01-24 03:49:32.336469: step: 868/463, loss: 0.019969046115875244 2023-01-24 03:49:32.883012: step: 870/463, loss: 0.04320311173796654 2023-01-24 03:49:33.420986: step: 872/463, loss: 0.010679672472178936 2023-01-24 03:49:34.001693: step: 874/463, loss: 0.005685543641448021 2023-01-24 03:49:34.588249: step: 876/463, loss: 0.00035601688432507217 2023-01-24 03:49:35.222800: step: 878/463, loss: 0.01687551662325859 2023-01-24 03:49:35.818646: step: 880/463, loss: 0.024747854098677635 2023-01-24 03:49:36.406115: step: 882/463, loss: 0.004030841402709484 2023-01-24 03:49:37.010072: step: 884/463, loss: 0.0929553285241127 2023-01-24 03:49:37.656946: step: 886/463, loss: 0.016330555081367493 2023-01-24 03:49:38.259767: step: 888/463, loss: 0.01833495870232582 2023-01-24 03:49:38.887392: step: 890/463, loss: 0.034522976726293564 2023-01-24 03:49:39.504061: step: 892/463, loss: 0.01271047256886959 2023-01-24 03:49:40.089879: step: 894/463, loss: 0.06464182585477829 2023-01-24 03:49:40.733651: step: 896/463, loss: 0.008035671897232533 2023-01-24 03:49:41.297595: step: 898/463, loss: 0.01843496412038803 2023-01-24 03:49:41.959230: step: 900/463, loss: 0.046923328191041946 2023-01-24 03:49:42.601928: step: 902/463, loss: 0.019787698984146118 2023-01-24 03:49:43.289256: step: 904/463, loss: 0.5545731782913208 2023-01-24 03:49:43.889403: step: 906/463, loss: 0.006263541057705879 2023-01-24 03:49:44.527273: step: 908/463, loss: 0.0003363310534041375 2023-01-24 03:49:45.164732: step: 910/463, loss: 0.03302348405122757 2023-01-24 03:49:45.721807: step: 912/463, loss: 0.0007958811474964023 2023-01-24 03:49:46.268317: step: 914/463, loss: 0.020184241235256195 2023-01-24 03:49:46.990251: step: 916/463, loss: 0.012933444231748581 2023-01-24 03:49:47.585188: step: 918/463, loss: 0.0006091843242757022 2023-01-24 03:49:48.187561: step: 920/463, loss: 0.05601540207862854 2023-01-24 03:49:48.805413: step: 922/463, loss: 0.5589209198951721 2023-01-24 03:49:49.426278: step: 924/463, loss: 0.022015033289790154 2023-01-24 03:49:50.013976: step: 926/463, loss: 0.029957737773656845 ================================================== Loss: 0.050 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32417243083003955, 'r': 0.3112547438330171, 'f1': 0.3175822846079381}, 'combined': 0.23400799918479648, 'epoch': 29} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.373343472400721, 'r': 0.3186124921534949, 'f1': 0.34381348023343233}, 'combined': 0.24187883031497753, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3338947105788423, 'r': 0.31742172675521824, 'f1': 0.3254499027237354}, 'combined': 0.23980519148064713, 'epoch': 29} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36953242651007123, 'r': 0.31568258774289676, 'f1': 0.34049152522669146}, 'combined': 0.24174898291095093, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3498599796334012, 'r': 0.3259606261859583, 'f1': 0.337487721021611}, 'combined': 0.24867516285802915, 'epoch': 29} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.39005919816818174, 'r': 0.30864072798285824, 'f1': 0.34460617605107036}, 'combined': 0.24467038499625995, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2583333333333333, 'r': 0.2952380952380952, 'f1': 0.27555555555555555}, 'combined': 0.1837037037037037, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.3804347826086957, 'f1': 0.3431372549019608}, 'combined': 0.1715686274509804, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:52:22.880962: step: 2/463, loss: 0.02287071757018566 2023-01-24 03:52:23.613828: step: 4/463, loss: 0.0010977593483403325 2023-01-24 03:52:24.191010: step: 6/463, loss: 0.0018210893031209707 2023-01-24 03:52:24.797604: step: 8/463, loss: 0.011499973013997078 2023-01-24 03:52:25.438347: step: 10/463, loss: 0.031644780188798904 2023-01-24 03:52:26.046056: step: 12/463, loss: 0.02684555947780609 2023-01-24 03:52:26.601233: step: 14/463, loss: 0.00984534528106451 2023-01-24 03:52:27.175268: step: 16/463, loss: 0.0005210680537857115 2023-01-24 03:52:27.790808: step: 18/463, loss: 0.021071812137961388 2023-01-24 03:52:28.395134: step: 20/463, loss: 0.1370922178030014 2023-01-24 03:52:29.092704: step: 22/463, loss: 0.008282049559056759 2023-01-24 03:52:29.625066: step: 24/463, loss: 0.005774232558906078 2023-01-24 03:52:30.214689: step: 26/463, loss: 0.05597303807735443 2023-01-24 03:52:30.812443: step: 28/463, loss: 0.007607400417327881 2023-01-24 03:52:31.421923: step: 30/463, loss: 0.00556810200214386 2023-01-24 03:52:32.016007: step: 32/463, loss: 0.00722869485616684 2023-01-24 03:52:32.650154: step: 34/463, loss: 0.04142138734459877 2023-01-24 03:52:33.173751: step: 36/463, loss: 0.14875413477420807 2023-01-24 03:52:33.835152: step: 38/463, loss: 0.004953530617058277 2023-01-24 03:52:34.416602: step: 40/463, loss: 0.043295565992593765 2023-01-24 03:52:35.056721: step: 42/463, loss: 0.004795689135789871 2023-01-24 03:52:35.643101: step: 44/463, loss: 0.005133289378136396 2023-01-24 03:52:36.266906: step: 46/463, loss: 0.007721369154751301 2023-01-24 03:52:36.888998: step: 48/463, loss: 0.026323147118091583 2023-01-24 03:52:37.603296: step: 50/463, loss: 0.013883039355278015 2023-01-24 03:52:38.216765: step: 52/463, loss: 0.030775751918554306 2023-01-24 03:52:38.806758: step: 54/463, loss: 0.030654147267341614 2023-01-24 03:52:39.408905: step: 56/463, loss: 0.0059691909700632095 2023-01-24 03:52:39.984383: step: 58/463, loss: 0.008692620322108269 2023-01-24 03:52:40.631356: step: 60/463, loss: 0.1722310185432434 2023-01-24 03:52:41.311857: step: 62/463, loss: 0.003924692049622536 2023-01-24 03:52:41.964429: step: 64/463, loss: 0.17060643434524536 2023-01-24 03:52:42.525283: step: 66/463, loss: 0.008244921453297138 2023-01-24 03:52:43.107277: step: 68/463, loss: 0.20475663244724274 2023-01-24 03:52:43.701283: step: 70/463, loss: 0.05069133639335632 2023-01-24 03:52:44.319894: step: 72/463, loss: 0.10135912895202637 2023-01-24 03:52:44.903769: step: 74/463, loss: 0.010476440191268921 2023-01-24 03:52:45.500275: step: 76/463, loss: 0.04310664162039757 2023-01-24 03:52:46.144730: step: 78/463, loss: 0.06516048312187195 2023-01-24 03:52:46.745832: step: 80/463, loss: 0.010809807106852531 2023-01-24 03:52:47.433556: step: 82/463, loss: 0.028552232310175896 2023-01-24 03:52:48.149802: step: 84/463, loss: 10.702315330505371 2023-01-24 03:52:48.853511: step: 86/463, loss: 0.056084778159856796 2023-01-24 03:52:49.357608: step: 88/463, loss: 0.029514199122786522 2023-01-24 03:52:50.011110: step: 90/463, loss: 0.043735358864068985 2023-01-24 03:52:50.646972: step: 92/463, loss: 0.050229739397764206 2023-01-24 03:52:51.230824: step: 94/463, loss: 0.0179668627679348 2023-01-24 03:52:51.754981: step: 96/463, loss: 0.02492492087185383 2023-01-24 03:52:52.399656: step: 98/463, loss: 0.03357226029038429 2023-01-24 03:52:52.989425: step: 100/463, loss: 0.0371817909181118 2023-01-24 03:52:53.705266: step: 102/463, loss: 0.08622241020202637 2023-01-24 03:52:54.341522: step: 104/463, loss: 0.2825721502304077 2023-01-24 03:52:54.997005: step: 106/463, loss: 0.003581398166716099 2023-01-24 03:52:55.602655: step: 108/463, loss: 0.006747517269104719 2023-01-24 03:52:56.166459: step: 110/463, loss: 0.0044977692887187 2023-01-24 03:52:56.828820: step: 112/463, loss: 0.02416067011654377 2023-01-24 03:52:57.441857: step: 114/463, loss: 0.003193135606124997 2023-01-24 03:52:58.072843: step: 116/463, loss: 0.00278079672716558 2023-01-24 03:52:58.692211: step: 118/463, loss: 0.0323198102414608 2023-01-24 03:52:59.268593: step: 120/463, loss: 0.05133876949548721 2023-01-24 03:52:59.949629: step: 122/463, loss: 0.01280826237052679 2023-01-24 03:53:00.602543: step: 124/463, loss: 0.05533473193645477 2023-01-24 03:53:01.200790: step: 126/463, loss: 0.008032385259866714 2023-01-24 03:53:01.852210: step: 128/463, loss: 0.03915274143218994 2023-01-24 03:53:02.380681: step: 130/463, loss: 0.00026882460224442184 2023-01-24 03:53:02.974161: step: 132/463, loss: 0.19075115025043488 2023-01-24 03:53:03.663007: step: 134/463, loss: 0.003682692302390933 2023-01-24 03:53:04.281117: step: 136/463, loss: 0.007612552028149366 2023-01-24 03:53:04.859396: step: 138/463, loss: 0.009081754833459854 2023-01-24 03:53:05.458043: step: 140/463, loss: 0.11434803903102875 2023-01-24 03:53:06.033674: step: 142/463, loss: 0.008975489065051079 2023-01-24 03:53:06.602453: step: 144/463, loss: 0.09609223157167435 2023-01-24 03:53:07.181013: step: 146/463, loss: 0.028597110882401466 2023-01-24 03:53:07.769544: step: 148/463, loss: 0.002698329510167241 2023-01-24 03:53:08.362047: step: 150/463, loss: 0.0019676671363413334 2023-01-24 03:53:08.996434: step: 152/463, loss: 0.010723394341766834 2023-01-24 03:53:09.557974: step: 154/463, loss: 0.12380588054656982 2023-01-24 03:53:10.196384: step: 156/463, loss: 0.021020790562033653 2023-01-24 03:53:10.790202: step: 158/463, loss: 0.0013696174137294292 2023-01-24 03:53:11.361390: step: 160/463, loss: 0.022433588281273842 2023-01-24 03:53:12.031259: step: 162/463, loss: 0.10054502636194229 2023-01-24 03:53:12.672047: step: 164/463, loss: 0.0009888445492833853 2023-01-24 03:53:13.316796: step: 166/463, loss: 0.0010303184390068054 2023-01-24 03:53:13.992810: step: 168/463, loss: 0.0016256992239505053 2023-01-24 03:53:14.596198: step: 170/463, loss: 0.0011340640485286713 2023-01-24 03:53:15.219377: step: 172/463, loss: 0.0027440411504358053 2023-01-24 03:53:15.858664: step: 174/463, loss: 0.02149234525859356 2023-01-24 03:53:16.443350: step: 176/463, loss: 0.01609439216554165 2023-01-24 03:53:17.049820: step: 178/463, loss: 0.004796427208930254 2023-01-24 03:53:17.711892: step: 180/463, loss: 0.004902226384729147 2023-01-24 03:53:18.297399: step: 182/463, loss: 0.0017605216708034277 2023-01-24 03:53:18.932422: step: 184/463, loss: 0.0008493036730214953 2023-01-24 03:53:19.585582: step: 186/463, loss: 0.0018293842440471053 2023-01-24 03:53:20.210282: step: 188/463, loss: 0.020065097138285637 2023-01-24 03:53:20.805959: step: 190/463, loss: 1.041109561920166 2023-01-24 03:53:21.446857: step: 192/463, loss: 0.016056202352046967 2023-01-24 03:53:22.025062: step: 194/463, loss: 0.05330295115709305 2023-01-24 03:53:22.768373: step: 196/463, loss: 0.05304675176739693 2023-01-24 03:53:23.345697: step: 198/463, loss: 0.0032352011185139418 2023-01-24 03:53:23.920410: step: 200/463, loss: 0.013113613240420818 2023-01-24 03:53:24.500468: step: 202/463, loss: 0.017349371686577797 2023-01-24 03:53:25.116485: step: 204/463, loss: 0.0588812492787838 2023-01-24 03:53:25.764767: step: 206/463, loss: 0.00950802955776453 2023-01-24 03:53:26.331347: step: 208/463, loss: 0.037574172019958496 2023-01-24 03:53:26.961633: step: 210/463, loss: 0.0014999906998127699 2023-01-24 03:53:27.582761: step: 212/463, loss: 0.010171581991016865 2023-01-24 03:53:28.237814: step: 214/463, loss: 0.011187254451215267 2023-01-24 03:53:28.842054: step: 216/463, loss: 0.0028039561584591866 2023-01-24 03:53:29.454270: step: 218/463, loss: 0.0051842643879354 2023-01-24 03:53:30.117885: step: 220/463, loss: 0.0003626102115958929 2023-01-24 03:53:30.818216: step: 222/463, loss: 0.20281988382339478 2023-01-24 03:53:31.413897: step: 224/463, loss: 0.05257974937558174 2023-01-24 03:53:32.018235: step: 226/463, loss: 0.022596748545765877 2023-01-24 03:53:32.591356: step: 228/463, loss: 0.0028894469141960144 2023-01-24 03:53:33.149615: step: 230/463, loss: 0.0024516438134014606 2023-01-24 03:53:33.765876: step: 232/463, loss: 0.42550885677337646 2023-01-24 03:53:34.433396: step: 234/463, loss: 0.0031005556229501963 2023-01-24 03:53:35.035148: step: 236/463, loss: 0.35000911355018616 2023-01-24 03:53:35.619213: step: 238/463, loss: 0.0046531399711966515 2023-01-24 03:53:36.198936: step: 240/463, loss: 0.011566930450499058 2023-01-24 03:53:36.818744: step: 242/463, loss: 0.016721833497285843 2023-01-24 03:53:37.399634: step: 244/463, loss: 0.002699431963264942 2023-01-24 03:53:37.991497: step: 246/463, loss: 0.025858163833618164 2023-01-24 03:53:38.538703: step: 248/463, loss: 0.006494059693068266 2023-01-24 03:53:39.042449: step: 250/463, loss: 0.002201608382165432 2023-01-24 03:53:39.674452: step: 252/463, loss: 0.004083262290805578 2023-01-24 03:53:40.280800: step: 254/463, loss: 0.13721609115600586 2023-01-24 03:53:40.885983: step: 256/463, loss: 0.016592271625995636 2023-01-24 03:53:41.497374: step: 258/463, loss: 0.014239763841032982 2023-01-24 03:53:42.127396: step: 260/463, loss: 0.006751021835952997 2023-01-24 03:53:42.801072: step: 262/463, loss: 0.04388519003987312 2023-01-24 03:53:43.388448: step: 264/463, loss: 0.027372760698199272 2023-01-24 03:53:44.039543: step: 266/463, loss: 0.0049220360815525055 2023-01-24 03:53:44.670328: step: 268/463, loss: 0.09807948023080826 2023-01-24 03:53:45.270382: step: 270/463, loss: 0.49886587262153625 2023-01-24 03:53:45.920734: step: 272/463, loss: 0.025776801630854607 2023-01-24 03:53:46.534456: step: 274/463, loss: 0.05722448229789734 2023-01-24 03:53:47.188126: step: 276/463, loss: 0.012615852989256382 2023-01-24 03:53:47.768973: step: 278/463, loss: 0.0015723053365945816 2023-01-24 03:53:48.314226: step: 280/463, loss: 0.011726630851626396 2023-01-24 03:53:48.960497: step: 282/463, loss: 0.18824024498462677 2023-01-24 03:53:49.551648: step: 284/463, loss: 0.05381745845079422 2023-01-24 03:53:50.073618: step: 286/463, loss: 0.00012856740795541555 2023-01-24 03:53:50.664831: step: 288/463, loss: 0.025308340787887573 2023-01-24 03:53:51.211621: step: 290/463, loss: 0.050366874784231186 2023-01-24 03:53:51.821279: step: 292/463, loss: 0.015104142017662525 2023-01-24 03:53:52.400042: step: 294/463, loss: 0.022485410794615746 2023-01-24 03:53:53.183502: step: 296/463, loss: 0.0014514451613649726 2023-01-24 03:53:53.745646: step: 298/463, loss: 0.003337111556902528 2023-01-24 03:53:54.353238: step: 300/463, loss: 0.013438807800412178 2023-01-24 03:53:54.882362: step: 302/463, loss: 0.038186293095350266 2023-01-24 03:53:55.588493: step: 304/463, loss: 0.025216538459062576 2023-01-24 03:53:56.188919: step: 306/463, loss: 0.005685428157448769 2023-01-24 03:53:56.774131: step: 308/463, loss: 0.030130673199892044 2023-01-24 03:53:57.395424: step: 310/463, loss: 0.011983786709606647 2023-01-24 03:53:58.000383: step: 312/463, loss: 0.02587090991437435 2023-01-24 03:53:58.695125: step: 314/463, loss: 0.0066139185801148415 2023-01-24 03:53:59.282071: step: 316/463, loss: 0.017479298636317253 2023-01-24 03:53:59.881141: step: 318/463, loss: 0.008532943204045296 2023-01-24 03:54:00.427163: step: 320/463, loss: 0.015127045102417469 2023-01-24 03:54:01.033360: step: 322/463, loss: 0.117922842502594 2023-01-24 03:54:01.697933: step: 324/463, loss: 0.024927185848355293 2023-01-24 03:54:02.286473: step: 326/463, loss: 0.0013276407262310386 2023-01-24 03:54:02.888194: step: 328/463, loss: 0.013469409197568893 2023-01-24 03:54:03.516253: step: 330/463, loss: 0.014201073907315731 2023-01-24 03:54:04.103550: step: 332/463, loss: 0.015197456814348698 2023-01-24 03:54:04.671060: step: 334/463, loss: 0.04778115078806877 2023-01-24 03:54:05.340894: step: 336/463, loss: 0.03152937442064285 2023-01-24 03:54:05.886073: step: 338/463, loss: 0.0024203353095799685 2023-01-24 03:54:06.531578: step: 340/463, loss: 0.025824224576354027 2023-01-24 03:54:07.125981: step: 342/463, loss: 0.01271248608827591 2023-01-24 03:54:07.738435: step: 344/463, loss: 0.14091353118419647 2023-01-24 03:54:08.358749: step: 346/463, loss: 0.011125164106488228 2023-01-24 03:54:08.921359: step: 348/463, loss: 0.0011161884758621454 2023-01-24 03:54:09.511864: step: 350/463, loss: 0.0038418604526668787 2023-01-24 03:54:10.099695: step: 352/463, loss: 0.0003088073863182217 2023-01-24 03:54:10.639728: step: 354/463, loss: 0.014115207828581333 2023-01-24 03:54:11.242030: step: 356/463, loss: 0.003571959212422371 2023-01-24 03:54:11.876312: step: 358/463, loss: 0.014766908250749111 2023-01-24 03:54:12.524816: step: 360/463, loss: 0.032025765627622604 2023-01-24 03:54:13.084638: step: 362/463, loss: 0.019933437928557396 2023-01-24 03:54:13.670903: step: 364/463, loss: 0.0022547240369021893 2023-01-24 03:54:14.267152: step: 366/463, loss: 0.011349081061780453 2023-01-24 03:54:14.890703: step: 368/463, loss: 0.040100473910570145 2023-01-24 03:54:15.436921: step: 370/463, loss: 0.010645358823239803 2023-01-24 03:54:16.058193: step: 372/463, loss: 0.002642563311383128 2023-01-24 03:54:16.754114: step: 374/463, loss: 0.00703195808455348 2023-01-24 03:54:17.310117: step: 376/463, loss: 0.01919461600482464 2023-01-24 03:54:17.984191: step: 378/463, loss: 0.019767770543694496 2023-01-24 03:54:18.631302: step: 380/463, loss: 0.02593395486474037 2023-01-24 03:54:19.267759: step: 382/463, loss: 0.012317029759287834 2023-01-24 03:54:19.913782: step: 384/463, loss: 0.01070999726653099 2023-01-24 03:54:20.564698: step: 386/463, loss: 0.0452464260160923 2023-01-24 03:54:21.192203: step: 388/463, loss: 0.0009972817497327924 2023-01-24 03:54:21.835067: step: 390/463, loss: 0.016652686521410942 2023-01-24 03:54:22.462579: step: 392/463, loss: 0.06522921472787857 2023-01-24 03:54:23.065134: step: 394/463, loss: 0.009243017993867397 2023-01-24 03:54:23.600922: step: 396/463, loss: 0.04568766802549362 2023-01-24 03:54:24.162951: step: 398/463, loss: 0.010907656513154507 2023-01-24 03:54:24.754041: step: 400/463, loss: 0.02632075734436512 2023-01-24 03:54:25.384306: step: 402/463, loss: 0.05536564439535141 2023-01-24 03:54:25.966432: step: 404/463, loss: 0.08034903556108475 2023-01-24 03:54:26.553489: step: 406/463, loss: 0.0027036741375923157 2023-01-24 03:54:27.316962: step: 408/463, loss: 0.000353818031726405 2023-01-24 03:54:27.943706: step: 410/463, loss: 0.01621779054403305 2023-01-24 03:54:28.525369: step: 412/463, loss: 0.6634386777877808 2023-01-24 03:54:29.175682: step: 414/463, loss: 1.7478951215744019 2023-01-24 03:54:29.783944: step: 416/463, loss: 0.006815090775489807 2023-01-24 03:54:30.408910: step: 418/463, loss: 0.030978742986917496 2023-01-24 03:54:31.036596: step: 420/463, loss: 0.026101812720298767 2023-01-24 03:54:31.611936: step: 422/463, loss: 0.003912142012268305 2023-01-24 03:54:32.249405: step: 424/463, loss: 0.014064560644328594 2023-01-24 03:54:32.813537: step: 426/463, loss: 0.05807751789689064 2023-01-24 03:54:33.477148: step: 428/463, loss: 0.04308537021279335 2023-01-24 03:54:34.046119: step: 430/463, loss: 0.04749622941017151 2023-01-24 03:54:34.600755: step: 432/463, loss: 0.01203763484954834 2023-01-24 03:54:35.228377: step: 434/463, loss: 0.008490880019962788 2023-01-24 03:54:35.831861: step: 436/463, loss: 0.011498426087200642 2023-01-24 03:54:36.457143: step: 438/463, loss: 0.058897268027067184 2023-01-24 03:54:37.115786: step: 440/463, loss: 0.008513512089848518 2023-01-24 03:54:37.638969: step: 442/463, loss: 0.01493648812174797 2023-01-24 03:54:38.250949: step: 444/463, loss: 0.019208183512091637 2023-01-24 03:54:38.814631: step: 446/463, loss: 0.008299942128360271 2023-01-24 03:54:39.352939: step: 448/463, loss: 0.043655067682266235 2023-01-24 03:54:39.961762: step: 450/463, loss: 0.043023645877838135 2023-01-24 03:54:40.560154: step: 452/463, loss: 0.014713020995259285 2023-01-24 03:54:41.176943: step: 454/463, loss: 0.009616974741220474 2023-01-24 03:54:41.813050: step: 456/463, loss: 0.04363328218460083 2023-01-24 03:54:42.402486: step: 458/463, loss: 0.03413240984082222 2023-01-24 03:54:43.051626: step: 460/463, loss: 0.24219352006912231 2023-01-24 03:54:43.786793: step: 462/463, loss: 0.02137332409620285 2023-01-24 03:54:44.391380: step: 464/463, loss: 0.022857673466205597 2023-01-24 03:54:44.961018: step: 466/463, loss: 0.5105307698249817 2023-01-24 03:54:45.640064: step: 468/463, loss: 0.05497068539261818 2023-01-24 03:54:46.227897: step: 470/463, loss: 0.011975910514593124 2023-01-24 03:54:46.816187: step: 472/463, loss: 0.018005985766649246 2023-01-24 03:54:47.416848: step: 474/463, loss: 0.07274439930915833 2023-01-24 03:54:48.063607: step: 476/463, loss: 0.015684565529227257 2023-01-24 03:54:48.668419: step: 478/463, loss: 0.177455872297287 2023-01-24 03:54:49.280757: step: 480/463, loss: 0.0014859967632219195 2023-01-24 03:54:49.897790: step: 482/463, loss: 0.0661887526512146 2023-01-24 03:54:50.538225: step: 484/463, loss: 0.07044608145952225 2023-01-24 03:54:51.160454: step: 486/463, loss: 0.011772790923714638 2023-01-24 03:54:51.803028: step: 488/463, loss: 0.011859515681862831 2023-01-24 03:54:52.373899: step: 490/463, loss: 0.022062404081225395 2023-01-24 03:54:52.981671: step: 492/463, loss: 0.1274869292974472 2023-01-24 03:54:53.587727: step: 494/463, loss: 0.041393477469682693 2023-01-24 03:54:54.181736: step: 496/463, loss: 0.0026368016842752695 2023-01-24 03:54:54.766831: step: 498/463, loss: 0.045303940773010254 2023-01-24 03:54:55.324921: step: 500/463, loss: 0.0012860259739682078 2023-01-24 03:54:55.971567: step: 502/463, loss: 0.7907482981681824 2023-01-24 03:54:56.554774: step: 504/463, loss: 0.027108099311590195 2023-01-24 03:54:57.128222: step: 506/463, loss: 0.0023503468837589025 2023-01-24 03:54:57.751494: step: 508/463, loss: 0.026525065302848816 2023-01-24 03:54:58.338292: step: 510/463, loss: 0.0061807045713067055 2023-01-24 03:54:58.937327: step: 512/463, loss: 0.047148000448942184 2023-01-24 03:54:59.533712: step: 514/463, loss: 0.002997663104906678 2023-01-24 03:55:00.175880: step: 516/463, loss: 0.004110563080757856 2023-01-24 03:55:00.774120: step: 518/463, loss: 0.0005467283772304654 2023-01-24 03:55:01.508516: step: 520/463, loss: 0.01614767126739025 2023-01-24 03:55:02.155644: step: 522/463, loss: 0.04990639165043831 2023-01-24 03:55:02.822729: step: 524/463, loss: 0.006604416761547327 2023-01-24 03:55:03.436686: step: 526/463, loss: 0.27749958634376526 2023-01-24 03:55:04.012898: step: 528/463, loss: 0.0005027002189308405 2023-01-24 03:55:04.618748: step: 530/463, loss: 0.021910516545176506 2023-01-24 03:55:05.242807: step: 532/463, loss: 0.003972940146923065 2023-01-24 03:55:05.863591: step: 534/463, loss: 0.018834222108125687 2023-01-24 03:55:06.402517: step: 536/463, loss: 0.00020300761389080435 2023-01-24 03:55:07.043307: step: 538/463, loss: 0.012414202094078064 2023-01-24 03:55:07.665351: step: 540/463, loss: 0.0010491933207958937 2023-01-24 03:55:08.205176: step: 542/463, loss: 0.0010448634857311845 2023-01-24 03:55:08.839598: step: 544/463, loss: 0.03614458441734314 2023-01-24 03:55:09.462724: step: 546/463, loss: 0.059722650796175 2023-01-24 03:55:10.024992: step: 548/463, loss: 0.0001090754522010684 2023-01-24 03:55:10.629540: step: 550/463, loss: 0.00458102161064744 2023-01-24 03:55:11.229288: step: 552/463, loss: 0.004893497563898563 2023-01-24 03:55:11.851820: step: 554/463, loss: 0.1626121699810028 2023-01-24 03:55:12.533526: step: 556/463, loss: 0.7224539518356323 2023-01-24 03:55:13.167241: step: 558/463, loss: 0.05753172188997269 2023-01-24 03:55:13.740906: step: 560/463, loss: 0.00853778701275587 2023-01-24 03:55:14.319751: step: 562/463, loss: 0.062451332807540894 2023-01-24 03:55:14.959068: step: 564/463, loss: 0.014598296023905277 2023-01-24 03:55:15.581264: step: 566/463, loss: 0.021690139546990395 2023-01-24 03:55:16.145699: step: 568/463, loss: 0.007573750335723162 2023-01-24 03:55:16.788466: step: 570/463, loss: 0.007899986580014229 2023-01-24 03:55:17.379389: step: 572/463, loss: 0.05881894379854202 2023-01-24 03:55:17.944960: step: 574/463, loss: 0.03397366404533386 2023-01-24 03:55:18.569281: step: 576/463, loss: 0.0095414062961936 2023-01-24 03:55:19.158365: step: 578/463, loss: 0.020920274779200554 2023-01-24 03:55:19.777306: step: 580/463, loss: 0.022272922098636627 2023-01-24 03:55:20.295485: step: 582/463, loss: 0.01040613278746605 2023-01-24 03:55:20.905798: step: 584/463, loss: 0.027083832770586014 2023-01-24 03:55:21.489226: step: 586/463, loss: 0.011641021817922592 2023-01-24 03:55:22.109731: step: 588/463, loss: 0.0018511103698983788 2023-01-24 03:55:22.737753: step: 590/463, loss: 0.014830281026661396 2023-01-24 03:55:23.461523: step: 592/463, loss: 0.04202072322368622 2023-01-24 03:55:24.067235: step: 594/463, loss: 0.08907254040241241 2023-01-24 03:55:24.628018: step: 596/463, loss: 0.0347394160926342 2023-01-24 03:55:25.274388: step: 598/463, loss: 0.008837294764816761 2023-01-24 03:55:25.896455: step: 600/463, loss: 0.01040163729339838 2023-01-24 03:55:26.490842: step: 602/463, loss: 0.0940219834446907 2023-01-24 03:55:27.146458: step: 604/463, loss: 0.004594606813043356 2023-01-24 03:55:27.794202: step: 606/463, loss: 0.013371134176850319 2023-01-24 03:55:28.379892: step: 608/463, loss: 0.01577639952301979 2023-01-24 03:55:29.086923: step: 610/463, loss: 0.003339444985613227 2023-01-24 03:55:29.692498: step: 612/463, loss: 0.07452493906021118 2023-01-24 03:55:30.312101: step: 614/463, loss: 0.038163404911756516 2023-01-24 03:55:30.871872: step: 616/463, loss: 0.030114853754639626 2023-01-24 03:55:31.443642: step: 618/463, loss: 0.04174436256289482 2023-01-24 03:55:32.041703: step: 620/463, loss: 0.05688111484050751 2023-01-24 03:55:32.634209: step: 622/463, loss: 0.020295582711696625 2023-01-24 03:55:33.198177: step: 624/463, loss: 0.03760010749101639 2023-01-24 03:55:33.821339: step: 626/463, loss: 0.03958737850189209 2023-01-24 03:55:34.519822: step: 628/463, loss: 0.011222248896956444 2023-01-24 03:55:35.121867: step: 630/463, loss: 0.049771998077631 2023-01-24 03:55:35.768480: step: 632/463, loss: 0.003004712052643299 2023-01-24 03:55:36.358604: step: 634/463, loss: 0.007820134051144123 2023-01-24 03:55:36.953074: step: 636/463, loss: 0.007004916202276945 2023-01-24 03:55:37.483647: step: 638/463, loss: 0.0049260398373007774 2023-01-24 03:55:38.116072: step: 640/463, loss: 0.010610437951982021 2023-01-24 03:55:38.685824: step: 642/463, loss: 0.0028233390767127275 2023-01-24 03:55:39.283147: step: 644/463, loss: 0.07816382497549057 2023-01-24 03:55:39.911699: step: 646/463, loss: 0.027265602722764015 2023-01-24 03:55:40.658148: step: 648/463, loss: 0.0012642203364521265 2023-01-24 03:55:41.240197: step: 650/463, loss: 0.04181981459259987 2023-01-24 03:55:41.905199: step: 652/463, loss: 0.024855943396687508 2023-01-24 03:55:42.467279: step: 654/463, loss: 0.3836914598941803 2023-01-24 03:55:43.036144: step: 656/463, loss: 0.010687730275094509 2023-01-24 03:55:43.643471: step: 658/463, loss: 0.03217970207333565 2023-01-24 03:55:44.239976: step: 660/463, loss: 0.022564584389328957 2023-01-24 03:55:44.873113: step: 662/463, loss: 0.08565708994865417 2023-01-24 03:55:45.449205: step: 664/463, loss: 0.011327611282467842 2023-01-24 03:55:46.105055: step: 666/463, loss: 0.004622115753591061 2023-01-24 03:55:46.733686: step: 668/463, loss: 0.016787691041827202 2023-01-24 03:55:47.333201: step: 670/463, loss: 0.525429368019104 2023-01-24 03:55:47.998670: step: 672/463, loss: 0.02729513682425022 2023-01-24 03:55:48.648404: step: 674/463, loss: 0.012964976951479912 2023-01-24 03:55:49.267809: step: 676/463, loss: 0.004189207684248686 2023-01-24 03:55:49.926020: step: 678/463, loss: 0.005609723273664713 2023-01-24 03:55:50.534521: step: 680/463, loss: 0.3587227761745453 2023-01-24 03:55:51.209975: step: 682/463, loss: 0.0242922380566597 2023-01-24 03:55:51.884609: step: 684/463, loss: 0.0050307633355259895 2023-01-24 03:55:52.526915: step: 686/463, loss: 0.013518266379833221 2023-01-24 03:55:53.124549: step: 688/463, loss: 0.018836049363017082 2023-01-24 03:55:53.786448: step: 690/463, loss: 0.005632149055600166 2023-01-24 03:55:54.484802: step: 692/463, loss: 0.0006052825483493507 2023-01-24 03:55:55.095598: step: 694/463, loss: 0.011170556768774986 2023-01-24 03:55:55.722419: step: 696/463, loss: 0.03173106163740158 2023-01-24 03:55:56.374207: step: 698/463, loss: 0.018734842538833618 2023-01-24 03:55:57.009275: step: 700/463, loss: 0.0337497778236866 2023-01-24 03:55:57.582725: step: 702/463, loss: 0.0116070955991745 2023-01-24 03:55:58.179665: step: 704/463, loss: 0.0032084763515740633 2023-01-24 03:55:58.814803: step: 706/463, loss: 0.021473128348588943 2023-01-24 03:55:59.398577: step: 708/463, loss: 0.005380632821470499 2023-01-24 03:55:59.969031: step: 710/463, loss: 0.0428922176361084 2023-01-24 03:56:00.588868: step: 712/463, loss: 0.017252519726753235 2023-01-24 03:56:01.181300: step: 714/463, loss: 0.000857905950397253 2023-01-24 03:56:01.790257: step: 716/463, loss: 0.19404752552509308 2023-01-24 03:56:02.316245: step: 718/463, loss: 0.5824961066246033 2023-01-24 03:56:02.873843: step: 720/463, loss: 0.028750771656632423 2023-01-24 03:56:03.479309: step: 722/463, loss: 0.13830217719078064 2023-01-24 03:56:04.082990: step: 724/463, loss: 0.0005885143764317036 2023-01-24 03:56:04.665977: step: 726/463, loss: 0.15766999125480652 2023-01-24 03:56:05.284359: step: 728/463, loss: 0.01509685255587101 2023-01-24 03:56:05.911446: step: 730/463, loss: 0.005241991952061653 2023-01-24 03:56:06.476295: step: 732/463, loss: 0.050539541989564896 2023-01-24 03:56:07.078776: step: 734/463, loss: 0.025972770527005196 2023-01-24 03:56:07.687740: step: 736/463, loss: 0.008269667625427246 2023-01-24 03:56:08.307494: step: 738/463, loss: 0.0003923339245375246 2023-01-24 03:56:08.882427: step: 740/463, loss: 0.08304078876972198 2023-01-24 03:56:09.467087: step: 742/463, loss: 0.05594936013221741 2023-01-24 03:56:10.054221: step: 744/463, loss: 0.07069092988967896 2023-01-24 03:56:10.670988: step: 746/463, loss: 0.0008289951365441084 2023-01-24 03:56:11.244666: step: 748/463, loss: 0.000946294458117336 2023-01-24 03:56:11.871196: step: 750/463, loss: 0.005845469422638416 2023-01-24 03:56:12.473438: step: 752/463, loss: 0.001117986743338406 2023-01-24 03:56:13.014128: step: 754/463, loss: 0.012012851424515247 2023-01-24 03:56:13.558099: step: 756/463, loss: 0.001254445523954928 2023-01-24 03:56:14.138646: step: 758/463, loss: 0.005217529833316803 2023-01-24 03:56:14.792177: step: 760/463, loss: 0.04603040590882301 2023-01-24 03:56:15.329094: step: 762/463, loss: 0.0019567597191780806 2023-01-24 03:56:15.930392: step: 764/463, loss: 0.0506419837474823 2023-01-24 03:56:16.604485: step: 766/463, loss: 0.01793968677520752 2023-01-24 03:56:17.326692: step: 768/463, loss: 0.11406755447387695 2023-01-24 03:56:17.939017: step: 770/463, loss: 0.01673888973891735 2023-01-24 03:56:18.539017: step: 772/463, loss: 0.00026779406471177936 2023-01-24 03:56:19.163305: step: 774/463, loss: 0.004441849887371063 2023-01-24 03:56:19.791884: step: 776/463, loss: 0.028758076950907707 2023-01-24 03:56:20.414638: step: 778/463, loss: 0.021082861348986626 2023-01-24 03:56:21.012580: step: 780/463, loss: 0.0464310348033905 2023-01-24 03:56:21.625491: step: 782/463, loss: 0.08372507989406586 2023-01-24 03:56:22.232739: step: 784/463, loss: 0.03811129555106163 2023-01-24 03:56:22.939717: step: 786/463, loss: 0.0038397684693336487 2023-01-24 03:56:23.522151: step: 788/463, loss: 0.02345157042145729 2023-01-24 03:56:24.104403: step: 790/463, loss: 0.011639229021966457 2023-01-24 03:56:24.686134: step: 792/463, loss: 0.07394200563430786 2023-01-24 03:56:25.274048: step: 794/463, loss: 0.0069422414526343346 2023-01-24 03:56:25.821617: step: 796/463, loss: 0.016587136313319206 2023-01-24 03:56:26.417377: step: 798/463, loss: 0.009428348392248154 2023-01-24 03:56:27.023249: step: 800/463, loss: 0.0008990506175905466 2023-01-24 03:56:27.584699: step: 802/463, loss: 0.08483754843473434 2023-01-24 03:56:28.173567: step: 804/463, loss: 0.2546783685684204 2023-01-24 03:56:28.731866: step: 806/463, loss: 0.012691248208284378 2023-01-24 03:56:29.335191: step: 808/463, loss: 0.008671457879245281 2023-01-24 03:56:29.929003: step: 810/463, loss: 0.07310877740383148 2023-01-24 03:56:30.545087: step: 812/463, loss: 0.0009924436453729868 2023-01-24 03:56:31.127024: step: 814/463, loss: 0.0014428084250539541 2023-01-24 03:56:31.794899: step: 816/463, loss: 0.05713183432817459 2023-01-24 03:56:32.524761: step: 818/463, loss: 0.011410077102482319 2023-01-24 03:56:33.118289: step: 820/463, loss: 0.008633735589683056 2023-01-24 03:56:33.728666: step: 822/463, loss: 0.018710706382989883 2023-01-24 03:56:34.313815: step: 824/463, loss: 0.0023364704102277756 2023-01-24 03:56:34.924709: step: 826/463, loss: 0.04826956242322922 2023-01-24 03:56:35.532310: step: 828/463, loss: 0.028795450925827026 2023-01-24 03:56:36.223610: step: 830/463, loss: 3.061577320098877 2023-01-24 03:56:36.853466: step: 832/463, loss: 0.004696912597864866 2023-01-24 03:56:37.462624: step: 834/463, loss: 0.07544531673192978 2023-01-24 03:56:38.032049: step: 836/463, loss: 0.006717769429087639 2023-01-24 03:56:38.637121: step: 838/463, loss: 2.0155328456894495e-05 2023-01-24 03:56:39.251594: step: 840/463, loss: 0.008734667673707008 2023-01-24 03:56:39.892114: step: 842/463, loss: 0.06910417228937149 2023-01-24 03:56:40.492131: step: 844/463, loss: 0.6662358045578003 2023-01-24 03:56:41.044567: step: 846/463, loss: 0.060290176421403885 2023-01-24 03:56:41.659967: step: 848/463, loss: 0.0008736244635656476 2023-01-24 03:56:42.197341: step: 850/463, loss: 0.1463870108127594 2023-01-24 03:56:42.782324: step: 852/463, loss: 0.01935930922627449 2023-01-24 03:56:43.490723: step: 854/463, loss: 0.055295784026384354 2023-01-24 03:56:44.142431: step: 856/463, loss: 0.057409726083278656 2023-01-24 03:56:44.744505: step: 858/463, loss: 0.029954412952065468 2023-01-24 03:56:45.330931: step: 860/463, loss: 0.004219128284603357 2023-01-24 03:56:45.956559: step: 862/463, loss: 0.01118563488125801 2023-01-24 03:56:46.573949: step: 864/463, loss: 0.03922933340072632 2023-01-24 03:56:47.166858: step: 866/463, loss: 0.06932850927114487 2023-01-24 03:56:47.804153: step: 868/463, loss: 0.019711172208189964 2023-01-24 03:56:48.432383: step: 870/463, loss: 0.04873194172978401 2023-01-24 03:56:49.019366: step: 872/463, loss: 0.00083334551891312 2023-01-24 03:56:49.655412: step: 874/463, loss: 0.014355028048157692 2023-01-24 03:56:50.253872: step: 876/463, loss: 0.005506808869540691 2023-01-24 03:56:50.864860: step: 878/463, loss: 0.07173406332731247 2023-01-24 03:56:51.529183: step: 880/463, loss: 0.1292986422777176 2023-01-24 03:56:52.095921: step: 882/463, loss: 0.026829225942492485 2023-01-24 03:56:52.686336: step: 884/463, loss: 0.0101832440122962 2023-01-24 03:56:53.310387: step: 886/463, loss: 0.004178597126156092 2023-01-24 03:56:53.929627: step: 888/463, loss: 0.020512109622359276 2023-01-24 03:56:54.560176: step: 890/463, loss: 0.0017298809252679348 2023-01-24 03:56:55.129739: step: 892/463, loss: 0.04164180904626846 2023-01-24 03:56:55.760657: step: 894/463, loss: 0.06886038184165955 2023-01-24 03:56:56.341416: step: 896/463, loss: 0.007509952876716852 2023-01-24 03:56:56.967243: step: 898/463, loss: 0.00106125941965729 2023-01-24 03:56:57.556963: step: 900/463, loss: 0.004839407280087471 2023-01-24 03:56:58.128561: step: 902/463, loss: 0.029843060299754143 2023-01-24 03:56:58.718959: step: 904/463, loss: 0.07037293165922165 2023-01-24 03:56:59.325777: step: 906/463, loss: 0.03483103960752487 2023-01-24 03:56:59.954541: step: 908/463, loss: 0.11868391931056976 2023-01-24 03:57:00.589650: step: 910/463, loss: 0.023331904783844948 2023-01-24 03:57:01.214906: step: 912/463, loss: 0.004009230528026819 2023-01-24 03:57:01.801943: step: 914/463, loss: 0.0056771948002278805 2023-01-24 03:57:02.449401: step: 916/463, loss: 0.028860315680503845 2023-01-24 03:57:03.017647: step: 918/463, loss: 0.02672826685011387 2023-01-24 03:57:03.642446: step: 920/463, loss: 0.04568895697593689 2023-01-24 03:57:04.259870: step: 922/463, loss: 0.040201522409915924 2023-01-24 03:57:04.806560: step: 924/463, loss: 0.012073663994669914 2023-01-24 03:57:05.442321: step: 926/463, loss: 0.01778118498623371 ================================================== Loss: 0.080 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33459169920059484, 'r': 0.3104655425219941, 'f1': 0.3220774427344309}, 'combined': 0.23732022096221222, 'epoch': 30} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.36650109432152067, 'r': 0.31400661443616745, 'f1': 0.3382291378451663}, 'combined': 0.2379501472277552, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3368659420289855, 'r': 0.3087405123339659, 'f1': 0.3221905940594059}, 'combined': 0.23740359562272015, 'epoch': 30} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36420316963724847, 'r': 0.311401662074119, 'f1': 0.33573908010816034}, 'combined': 0.23837474687679383, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3561513143304898, 'r': 0.3196576312681626, 'f1': 0.3369191433566433}, 'combined': 0.24825621089436872, 'epoch': 30} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37526273684740985, 'r': 0.29562182413656213, 'f1': 0.3307151818625927}, 'combined': 0.2348077791224408, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2403846153846154, 'r': 0.26785714285714285, 'f1': 0.25337837837837834}, 'combined': 0.1689189189189189, 'epoch': 30} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.3804347826086957, 'f1': 0.3431372549019608}, 'combined': 0.1715686274509804, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45454545454545453, 'r': 0.1724137931034483, 'f1': 0.25000000000000006}, 'combined': 0.16666666666666669, 'epoch': 30} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:59:36.978139: step: 2/463, loss: 0.00725026149302721 2023-01-24 03:59:37.594724: step: 4/463, loss: 0.0001363893097732216 2023-01-24 03:59:38.150891: step: 6/463, loss: 0.0029738969169557095 2023-01-24 03:59:38.870455: step: 8/463, loss: 0.13773195445537567 2023-01-24 03:59:39.439212: step: 10/463, loss: 0.009015035815536976 2023-01-24 03:59:40.049543: step: 12/463, loss: 0.03185706585645676 2023-01-24 03:59:40.675978: step: 14/463, loss: 0.006370891816914082 2023-01-24 03:59:41.289869: step: 16/463, loss: 0.015026927925646305 2023-01-24 03:59:41.814478: step: 18/463, loss: 0.006893111392855644 2023-01-24 03:59:42.459794: step: 20/463, loss: 0.010060859844088554 2023-01-24 03:59:43.070024: step: 22/463, loss: 0.014011325314640999 2023-01-24 03:59:43.670076: step: 24/463, loss: 0.006456129252910614 2023-01-24 03:59:44.306948: step: 26/463, loss: 0.012332580983638763 2023-01-24 03:59:44.855726: step: 28/463, loss: 0.027756234630942345 2023-01-24 03:59:45.555739: step: 30/463, loss: 0.009706638753414154 2023-01-24 03:59:46.211444: step: 32/463, loss: 0.004507745616137981 2023-01-24 03:59:46.799701: step: 34/463, loss: 0.0006223267409950495 2023-01-24 03:59:47.395204: step: 36/463, loss: 0.009652109816670418 2023-01-24 03:59:47.984792: step: 38/463, loss: 0.011093094013631344 2023-01-24 03:59:48.631716: step: 40/463, loss: 0.020224500447511673 2023-01-24 03:59:49.287704: step: 42/463, loss: 0.0020708031952381134 2023-01-24 03:59:49.894071: step: 44/463, loss: 0.0009130054386332631 2023-01-24 03:59:50.478361: step: 46/463, loss: 0.0054998574778437614 2023-01-24 03:59:51.033796: step: 48/463, loss: 0.00042001731344498694 2023-01-24 03:59:51.643853: step: 50/463, loss: 0.01372883003205061 2023-01-24 03:59:52.272537: step: 52/463, loss: 0.0011782310903072357 2023-01-24 03:59:52.811765: step: 54/463, loss: 0.026677841320633888 2023-01-24 03:59:53.416037: step: 56/463, loss: 0.0007169287418946624 2023-01-24 03:59:53.967221: step: 58/463, loss: 0.07843128591775894 2023-01-24 03:59:54.588199: step: 60/463, loss: 0.0029024684336036444 2023-01-24 03:59:55.193292: step: 62/463, loss: 0.013909272849559784 2023-01-24 03:59:55.858900: step: 64/463, loss: 0.0019254968501627445 2023-01-24 03:59:56.459519: step: 66/463, loss: 0.005651150364428759 2023-01-24 03:59:57.059675: step: 68/463, loss: 0.013490301556885242 2023-01-24 03:59:57.648468: step: 70/463, loss: 0.007879774086177349 2023-01-24 03:59:58.302287: step: 72/463, loss: 0.04683401435613632 2023-01-24 03:59:58.911140: step: 74/463, loss: 0.0306683499366045 2023-01-24 03:59:59.508240: step: 76/463, loss: 0.3676237165927887 2023-01-24 04:00:00.190797: step: 78/463, loss: 0.0015667150728404522 2023-01-24 04:00:00.845329: step: 80/463, loss: 0.015830207616090775 2023-01-24 04:00:01.480262: step: 82/463, loss: 0.010973169468343258 2023-01-24 04:00:02.087287: step: 84/463, loss: 0.041591107845306396 2023-01-24 04:00:02.727164: step: 86/463, loss: 0.19536875188350677 2023-01-24 04:00:03.362457: step: 88/463, loss: 0.011319223791360855 2023-01-24 04:00:03.961689: step: 90/463, loss: 4.463930054043885e-06 2023-01-24 04:00:04.530721: step: 92/463, loss: 0.007775151636451483 2023-01-24 04:00:05.110468: step: 94/463, loss: 0.0010762730380520225 2023-01-24 04:00:05.671072: step: 96/463, loss: 0.016633011400699615 2023-01-24 04:00:06.225717: step: 98/463, loss: 0.042592067271471024 2023-01-24 04:00:06.780390: step: 100/463, loss: 0.16869518160820007 2023-01-24 04:00:07.377036: step: 102/463, loss: 0.04562800005078316 2023-01-24 04:00:08.007388: step: 104/463, loss: 0.00037364126183092594 2023-01-24 04:00:08.586846: step: 106/463, loss: 0.03178171068429947 2023-01-24 04:00:09.163391: step: 108/463, loss: 0.001316481619141996 2023-01-24 04:00:09.727027: step: 110/463, loss: 0.017384584993124008 2023-01-24 04:00:10.276755: step: 112/463, loss: 0.006682189647108316 2023-01-24 04:00:10.917965: step: 114/463, loss: 0.004350430332124233 2023-01-24 04:00:11.600762: step: 116/463, loss: 0.04855617508292198 2023-01-24 04:00:12.227776: step: 118/463, loss: 0.13614965975284576 2023-01-24 04:00:12.783427: step: 120/463, loss: 0.002895102836191654 2023-01-24 04:00:13.383684: step: 122/463, loss: 0.016039488837122917 2023-01-24 04:00:14.005926: step: 124/463, loss: 0.030543366447091103 2023-01-24 04:00:14.689078: step: 126/463, loss: 0.00019572608289308846 2023-01-24 04:00:15.309031: step: 128/463, loss: 0.009491250850260258 2023-01-24 04:00:15.916756: step: 130/463, loss: 0.020830780267715454 2023-01-24 04:00:16.540529: step: 132/463, loss: 0.02602963335812092 2023-01-24 04:00:17.126037: step: 134/463, loss: 0.0052163442596793175 2023-01-24 04:00:17.665274: step: 136/463, loss: 9.89046020549722e-05 2023-01-24 04:00:18.258929: step: 138/463, loss: 0.0026122969575226307 2023-01-24 04:00:18.860331: step: 140/463, loss: 0.005483146756887436 2023-01-24 04:00:19.556615: step: 142/463, loss: 0.017223993316292763 2023-01-24 04:00:20.146928: step: 144/463, loss: 0.005342448595911264 2023-01-24 04:00:20.770924: step: 146/463, loss: 0.1071867048740387 2023-01-24 04:00:21.360025: step: 148/463, loss: 0.14484000205993652 2023-01-24 04:00:21.955542: step: 150/463, loss: 0.0067138043232262135 2023-01-24 04:00:22.554420: step: 152/463, loss: 0.16120043396949768 2023-01-24 04:00:23.154088: step: 154/463, loss: 0.003946268931031227 2023-01-24 04:00:23.785945: step: 156/463, loss: 0.005271818023175001 2023-01-24 04:00:24.367734: step: 158/463, loss: 0.00818425603210926 2023-01-24 04:00:24.958046: step: 160/463, loss: 0.021719109266996384 2023-01-24 04:00:25.535041: step: 162/463, loss: 0.0015534190461039543 2023-01-24 04:00:26.097167: step: 164/463, loss: 0.0013039576588198543 2023-01-24 04:00:26.773574: step: 166/463, loss: 0.023855723440647125 2023-01-24 04:00:27.427646: step: 168/463, loss: 0.01022611279040575 2023-01-24 04:00:28.076205: step: 170/463, loss: 0.012751995585858822 2023-01-24 04:00:28.697755: step: 172/463, loss: 0.004349060822278261 2023-01-24 04:00:29.348992: step: 174/463, loss: 0.005351799074560404 2023-01-24 04:00:29.933876: step: 176/463, loss: 0.010071488097310066 2023-01-24 04:00:30.525406: step: 178/463, loss: 0.004112984519451857 2023-01-24 04:00:31.202105: step: 180/463, loss: 0.00930499006062746 2023-01-24 04:00:31.777618: step: 182/463, loss: 0.007685250602662563 2023-01-24 04:00:32.376353: step: 184/463, loss: 7.922354416223243e-05 2023-01-24 04:00:32.959323: step: 186/463, loss: 0.03231342136859894 2023-01-24 04:00:33.529527: step: 188/463, loss: 0.02998088113963604 2023-01-24 04:00:34.121986: step: 190/463, loss: 0.0034571161959320307 2023-01-24 04:00:34.769731: step: 192/463, loss: 0.024584541097283363 2023-01-24 04:00:35.348453: step: 194/463, loss: 0.12980739772319794 2023-01-24 04:00:35.902364: step: 196/463, loss: 0.055669765919446945 2023-01-24 04:00:36.541965: step: 198/463, loss: 0.0012383840512484312 2023-01-24 04:00:37.154986: step: 200/463, loss: 0.007689193822443485 2023-01-24 04:00:37.762744: step: 202/463, loss: 0.08940066397190094 2023-01-24 04:00:38.427732: step: 204/463, loss: 0.01005682535469532 2023-01-24 04:00:38.999699: step: 206/463, loss: 0.004086892120540142 2023-01-24 04:00:39.568206: step: 208/463, loss: 0.0005765280802734196 2023-01-24 04:00:40.101660: step: 210/463, loss: 0.00358169456012547 2023-01-24 04:00:40.686899: step: 212/463, loss: 0.0009182443027384579 2023-01-24 04:00:41.286405: step: 214/463, loss: 0.004284856375306845 2023-01-24 04:00:41.852014: step: 216/463, loss: 0.01596018671989441 2023-01-24 04:00:42.457649: step: 218/463, loss: 0.0012788488529622555 2023-01-24 04:00:43.040399: step: 220/463, loss: 0.04990892857313156 2023-01-24 04:00:43.659361: step: 222/463, loss: 0.015364880673587322 2023-01-24 04:00:44.285584: step: 224/463, loss: 0.03153572231531143 2023-01-24 04:00:44.944703: step: 226/463, loss: 0.001673261751420796 2023-01-24 04:00:45.579041: step: 228/463, loss: 0.04643953964114189 2023-01-24 04:00:46.186403: step: 230/463, loss: 0.002543516457080841 2023-01-24 04:00:46.769949: step: 232/463, loss: 0.0018153395503759384 2023-01-24 04:00:47.391225: step: 234/463, loss: 0.005837084259837866 2023-01-24 04:00:47.984097: step: 236/463, loss: 0.01580464467406273 2023-01-24 04:00:48.638988: step: 238/463, loss: 0.016377022489905357 2023-01-24 04:00:49.326544: step: 240/463, loss: 0.010063891299068928 2023-01-24 04:00:49.937400: step: 242/463, loss: 0.07427211850881577 2023-01-24 04:00:50.551864: step: 244/463, loss: 0.015656745061278343 2023-01-24 04:00:51.162395: step: 246/463, loss: 0.02390866167843342 2023-01-24 04:00:51.816635: step: 248/463, loss: 0.29639890789985657 2023-01-24 04:00:52.430723: step: 250/463, loss: 0.0003697085485327989 2023-01-24 04:00:53.080638: step: 252/463, loss: 0.028861818835139275 2023-01-24 04:00:53.681674: step: 254/463, loss: 0.06549953669309616 2023-01-24 04:00:54.324494: step: 256/463, loss: 0.012257049791514874 2023-01-24 04:00:55.042702: step: 258/463, loss: 0.0016428704839199781 2023-01-24 04:00:55.623376: step: 260/463, loss: 0.0019410232780501246 2023-01-24 04:00:56.183499: step: 262/463, loss: 0.023514997214078903 2023-01-24 04:00:56.803333: step: 264/463, loss: 0.003023408818989992 2023-01-24 04:00:57.443390: step: 266/463, loss: 0.1949722319841385 2023-01-24 04:00:58.009277: step: 268/463, loss: 0.04643306881189346 2023-01-24 04:00:58.629144: step: 270/463, loss: 0.030390070751309395 2023-01-24 04:00:59.267297: step: 272/463, loss: 0.0058040861040353775 2023-01-24 04:00:59.894230: step: 274/463, loss: 0.002368512563407421 2023-01-24 04:01:00.504997: step: 276/463, loss: 0.05977480486035347 2023-01-24 04:01:01.139958: step: 278/463, loss: 0.038890399038791656 2023-01-24 04:01:01.714627: step: 280/463, loss: 0.0012865137541666627 2023-01-24 04:01:02.292659: step: 282/463, loss: 0.010569889098405838 2023-01-24 04:01:02.843198: step: 284/463, loss: 0.0021865293383598328 2023-01-24 04:01:03.497175: step: 286/463, loss: 0.029273446649312973 2023-01-24 04:01:04.124335: step: 288/463, loss: 0.022681208327412605 2023-01-24 04:01:04.710636: step: 290/463, loss: 0.07850952446460724 2023-01-24 04:01:05.274352: step: 292/463, loss: 0.025779077783226967 2023-01-24 04:01:05.860318: step: 294/463, loss: 0.001145223155617714 2023-01-24 04:01:06.455244: step: 296/463, loss: 0.0018018580740317702 2023-01-24 04:01:07.103908: step: 298/463, loss: 0.014939815737307072 2023-01-24 04:01:07.769528: step: 300/463, loss: 0.023720912635326385 2023-01-24 04:01:08.400660: step: 302/463, loss: 0.011312957853078842 2023-01-24 04:01:09.046844: step: 304/463, loss: 0.031415555626153946 2023-01-24 04:01:09.645753: step: 306/463, loss: 0.027801595628261566 2023-01-24 04:01:10.232687: step: 308/463, loss: 0.0024184619542211294 2023-01-24 04:01:10.758880: step: 310/463, loss: 0.0016299487324431539 2023-01-24 04:01:11.346749: step: 312/463, loss: 0.012374631129205227 2023-01-24 04:01:11.974072: step: 314/463, loss: 0.004865078255534172 2023-01-24 04:01:12.618801: step: 316/463, loss: 0.0003527867665980011 2023-01-24 04:01:13.220926: step: 318/463, loss: 0.07314730435609818 2023-01-24 04:01:13.854392: step: 320/463, loss: 0.0031003013718873262 2023-01-24 04:01:14.429041: step: 322/463, loss: 0.023870335891842842 2023-01-24 04:01:15.027991: step: 324/463, loss: 0.0003286516584921628 2023-01-24 04:01:15.665841: step: 326/463, loss: 0.0026390599086880684 2023-01-24 04:01:16.282553: step: 328/463, loss: 0.04541634023189545 2023-01-24 04:01:16.895292: step: 330/463, loss: 0.0460786409676075 2023-01-24 04:01:17.531583: step: 332/463, loss: 0.009213834069669247 2023-01-24 04:01:18.139962: step: 334/463, loss: 0.005137164611369371 2023-01-24 04:01:18.818321: step: 336/463, loss: 0.0341918058693409 2023-01-24 04:01:19.385389: step: 338/463, loss: 0.03992049768567085 2023-01-24 04:01:20.023964: step: 340/463, loss: 0.0005737691535614431 2023-01-24 04:01:20.715453: step: 342/463, loss: 0.02265383116900921 2023-01-24 04:01:21.340033: step: 344/463, loss: 0.05088517442345619 2023-01-24 04:01:21.944179: step: 346/463, loss: 0.0006356483208946884 2023-01-24 04:01:22.496196: step: 348/463, loss: 0.0008056862861849368 2023-01-24 04:01:23.036248: step: 350/463, loss: 0.003064524382352829 2023-01-24 04:01:23.636367: step: 352/463, loss: 0.00011712976265698671 2023-01-24 04:01:24.268705: step: 354/463, loss: 0.00039748940616846085 2023-01-24 04:01:24.848278: step: 356/463, loss: 0.0059139844961464405 2023-01-24 04:01:25.435297: step: 358/463, loss: 0.005912731401622295 2023-01-24 04:01:26.021655: step: 360/463, loss: 0.0026770024560391903 2023-01-24 04:01:26.606161: step: 362/463, loss: 0.004558705259114504 2023-01-24 04:01:27.175845: step: 364/463, loss: 0.00042951450450345874 2023-01-24 04:01:27.827077: step: 366/463, loss: 0.05039579048752785 2023-01-24 04:01:28.410499: step: 368/463, loss: 0.019948001950979233 2023-01-24 04:01:28.984279: step: 370/463, loss: 0.002043224172666669 2023-01-24 04:01:29.616733: step: 372/463, loss: 0.001063555246219039 2023-01-24 04:01:30.239690: step: 374/463, loss: 0.01840025931596756 2023-01-24 04:01:30.871792: step: 376/463, loss: 0.004129776265472174 2023-01-24 04:01:31.518297: step: 378/463, loss: 0.06632975488901138 2023-01-24 04:01:32.113929: step: 380/463, loss: 0.012461543083190918 2023-01-24 04:01:32.726254: step: 382/463, loss: 0.01947632245719433 2023-01-24 04:01:33.313881: step: 384/463, loss: 0.0013626560103148222 2023-01-24 04:01:33.858929: step: 386/463, loss: 0.00034974896698258817 2023-01-24 04:01:34.485182: step: 388/463, loss: 0.0016777660930529237 2023-01-24 04:01:35.122936: step: 390/463, loss: 0.019397951662540436 2023-01-24 04:01:35.781097: step: 392/463, loss: 0.005715191829949617 2023-01-24 04:01:36.457463: step: 394/463, loss: 0.003102952381595969 2023-01-24 04:01:37.078163: step: 396/463, loss: 0.10066412389278412 2023-01-24 04:01:37.709689: step: 398/463, loss: 0.004147304687649012 2023-01-24 04:01:38.351640: step: 400/463, loss: 0.00591952633112669 2023-01-24 04:01:38.959969: step: 402/463, loss: 0.028772516176104546 2023-01-24 04:01:39.616707: step: 404/463, loss: 0.0066604577004909515 2023-01-24 04:01:40.169617: step: 406/463, loss: 0.0007989301229827106 2023-01-24 04:01:40.785420: step: 408/463, loss: 0.03508681431412697 2023-01-24 04:01:41.447978: step: 410/463, loss: 0.09349997341632843 2023-01-24 04:01:42.044058: step: 412/463, loss: 0.04553740471601486 2023-01-24 04:01:42.633821: step: 414/463, loss: 0.020893126726150513 2023-01-24 04:01:43.235901: step: 416/463, loss: 0.006720874458551407 2023-01-24 04:01:43.857800: step: 418/463, loss: 0.030708542093634605 2023-01-24 04:01:44.416711: step: 420/463, loss: 6.32376249996014e-05 2023-01-24 04:01:45.119459: step: 422/463, loss: 0.08602461963891983 2023-01-24 04:01:45.672116: step: 424/463, loss: 0.01434249896556139 2023-01-24 04:01:46.228443: step: 426/463, loss: 0.005864245351403952 2023-01-24 04:01:46.789231: step: 428/463, loss: 0.002704811282455921 2023-01-24 04:01:47.371919: step: 430/463, loss: 0.1177762970328331 2023-01-24 04:01:47.951538: step: 432/463, loss: 0.03911645710468292 2023-01-24 04:01:48.489155: step: 434/463, loss: 0.0011838545324280858 2023-01-24 04:01:49.129027: step: 436/463, loss: 0.007203293032944202 2023-01-24 04:01:49.826586: step: 438/463, loss: 0.0008631915552541614 2023-01-24 04:01:50.416591: step: 440/463, loss: 0.22465580701828003 2023-01-24 04:01:51.032695: step: 442/463, loss: 0.0036286641843616962 2023-01-24 04:01:51.596881: step: 444/463, loss: 0.0014444930711761117 2023-01-24 04:01:52.197204: step: 446/463, loss: 0.01000501960515976 2023-01-24 04:01:52.868838: step: 448/463, loss: 0.0020021407399326563 2023-01-24 04:01:53.481693: step: 450/463, loss: 0.011914189904928207 2023-01-24 04:01:54.123668: step: 452/463, loss: 0.004677074961364269 2023-01-24 04:01:54.747100: step: 454/463, loss: 0.011585263535380363 2023-01-24 04:01:55.378547: step: 456/463, loss: 0.028917789459228516 2023-01-24 04:01:55.935924: step: 458/463, loss: 0.0019708839245140553 2023-01-24 04:01:56.515506: step: 460/463, loss: 0.005017781630158424 2023-01-24 04:01:57.126370: step: 462/463, loss: 0.004680877085775137 2023-01-24 04:01:57.702942: step: 464/463, loss: 5.8627803809940815e-05 2023-01-24 04:01:58.260035: step: 466/463, loss: 0.018524853512644768 2023-01-24 04:01:58.895488: step: 468/463, loss: 0.052745286375284195 2023-01-24 04:01:59.469128: step: 470/463, loss: 0.0019491834100335836 2023-01-24 04:02:00.031367: step: 472/463, loss: 0.005913922563195229 2023-01-24 04:02:00.671417: step: 474/463, loss: 0.012083961628377438 2023-01-24 04:02:01.326713: step: 476/463, loss: 0.00569996889680624 2023-01-24 04:02:01.901260: step: 478/463, loss: 0.004558409098535776 2023-01-24 04:02:02.486345: step: 480/463, loss: 0.02894248627126217 2023-01-24 04:02:03.101645: step: 482/463, loss: 0.004375309217721224 2023-01-24 04:02:03.716780: step: 484/463, loss: 0.027949832379817963 2023-01-24 04:02:04.293555: step: 486/463, loss: 0.017888184636831284 2023-01-24 04:02:04.863626: step: 488/463, loss: 0.044304318726062775 2023-01-24 04:02:05.607427: step: 490/463, loss: 0.01752992533147335 2023-01-24 04:02:06.130856: step: 492/463, loss: 0.018006280064582825 2023-01-24 04:02:06.769750: step: 494/463, loss: 0.012743828818202019 2023-01-24 04:02:07.441013: step: 496/463, loss: 0.024910010397434235 2023-01-24 04:02:08.039981: step: 498/463, loss: 0.02561298757791519 2023-01-24 04:02:08.578104: step: 500/463, loss: 0.02798738330602646 2023-01-24 04:02:09.221216: step: 502/463, loss: 0.005998285487294197 2023-01-24 04:02:09.812774: step: 504/463, loss: 0.01590811274945736 2023-01-24 04:02:10.378512: step: 506/463, loss: 0.006833244115114212 2023-01-24 04:02:10.958256: step: 508/463, loss: 0.06002255156636238 2023-01-24 04:02:11.507845: step: 510/463, loss: 0.03906403109431267 2023-01-24 04:02:12.111080: step: 512/463, loss: 0.0017979465192183852 2023-01-24 04:02:12.732509: step: 514/463, loss: 0.0028399177826941013 2023-01-24 04:02:13.344072: step: 516/463, loss: 0.029050398617982864 2023-01-24 04:02:14.019006: step: 518/463, loss: 0.021828724071383476 2023-01-24 04:02:14.660959: step: 520/463, loss: 0.005752418655902147 2023-01-24 04:02:15.250610: step: 522/463, loss: 0.008832084946334362 2023-01-24 04:02:15.872062: step: 524/463, loss: 0.09144224971532822 2023-01-24 04:02:16.493543: step: 526/463, loss: 0.024472521618008614 2023-01-24 04:02:17.072053: step: 528/463, loss: 0.017040187492966652 2023-01-24 04:02:17.649945: step: 530/463, loss: 8.080002589849755e-05 2023-01-24 04:02:18.301039: step: 532/463, loss: 0.01090080477297306 2023-01-24 04:02:18.958771: step: 534/463, loss: 0.02142927795648575 2023-01-24 04:02:19.579456: step: 536/463, loss: 0.008503993973135948 2023-01-24 04:02:20.197116: step: 538/463, loss: 0.0228914562612772 2023-01-24 04:02:20.785453: step: 540/463, loss: 0.002231088699772954 2023-01-24 04:02:21.345769: step: 542/463, loss: 0.02670999988913536 2023-01-24 04:02:21.967552: step: 544/463, loss: 0.0901108831167221 2023-01-24 04:02:22.570902: step: 546/463, loss: 0.02485244907438755 2023-01-24 04:02:23.153396: step: 548/463, loss: 0.00044756263378076255 2023-01-24 04:02:23.730844: step: 550/463, loss: 0.001110156998038292 2023-01-24 04:02:24.291265: step: 552/463, loss: 0.01374089252203703 2023-01-24 04:02:24.917623: step: 554/463, loss: 0.27272501587867737 2023-01-24 04:02:25.643900: step: 556/463, loss: 0.050288762897253036 2023-01-24 04:02:26.239435: step: 558/463, loss: 0.34021005034446716 2023-01-24 04:02:26.858734: step: 560/463, loss: 0.021084623411297798 2023-01-24 04:02:27.477650: step: 562/463, loss: 0.04831981658935547 2023-01-24 04:02:28.123954: step: 564/463, loss: 0.008745735511183739 2023-01-24 04:02:28.710000: step: 566/463, loss: 0.004962179809808731 2023-01-24 04:02:29.285428: step: 568/463, loss: 0.04365970939397812 2023-01-24 04:02:29.893539: step: 570/463, loss: 0.03384136036038399 2023-01-24 04:02:30.486987: step: 572/463, loss: 0.02389417588710785 2023-01-24 04:02:31.138560: step: 574/463, loss: 0.0049474504776299 2023-01-24 04:02:31.705174: step: 576/463, loss: 0.1513223797082901 2023-01-24 04:02:32.284822: step: 578/463, loss: 0.0028791793156415224 2023-01-24 04:02:32.872416: step: 580/463, loss: 0.01237533986568451 2023-01-24 04:02:33.449722: step: 582/463, loss: 0.0258727315813303 2023-01-24 04:02:34.101634: step: 584/463, loss: 0.02633424662053585 2023-01-24 04:02:34.623211: step: 586/463, loss: 0.11017702519893646 2023-01-24 04:02:35.227632: step: 588/463, loss: 0.004107676446437836 2023-01-24 04:02:35.902195: step: 590/463, loss: 0.008155887946486473 2023-01-24 04:02:36.551170: step: 592/463, loss: 0.0015238303458318114 2023-01-24 04:02:37.129713: step: 594/463, loss: 0.07830831408500671 2023-01-24 04:02:37.738114: step: 596/463, loss: 0.008800147101283073 2023-01-24 04:02:38.412855: step: 598/463, loss: 0.015675554051995277 2023-01-24 04:02:39.025540: step: 600/463, loss: 0.010863970033824444 2023-01-24 04:02:39.607944: step: 602/463, loss: 0.011105144396424294 2023-01-24 04:02:40.204473: step: 604/463, loss: 0.026077480986714363 2023-01-24 04:02:40.786246: step: 606/463, loss: 0.03320009633898735 2023-01-24 04:02:41.421217: step: 608/463, loss: 0.021250873804092407 2023-01-24 04:02:42.007350: step: 610/463, loss: 0.001374796498566866 2023-01-24 04:02:42.653183: step: 612/463, loss: 0.00414453586563468 2023-01-24 04:02:43.289581: step: 614/463, loss: 0.029450085014104843 2023-01-24 04:02:43.869415: step: 616/463, loss: 0.010704120621085167 2023-01-24 04:02:44.364413: step: 618/463, loss: 0.00018523633480072021 2023-01-24 04:02:44.996665: step: 620/463, loss: 0.04890349879860878 2023-01-24 04:02:45.552035: step: 622/463, loss: 0.002012216253206134 2023-01-24 04:02:46.120692: step: 624/463, loss: 0.0008049262687563896 2023-01-24 04:02:46.723961: step: 626/463, loss: 0.00030575424898415804 2023-01-24 04:02:47.268447: step: 628/463, loss: 0.015389290638267994 2023-01-24 04:02:47.851340: step: 630/463, loss: 0.001931900391355157 2023-01-24 04:02:48.579902: step: 632/463, loss: 0.004005905240774155 2023-01-24 04:02:49.261264: step: 634/463, loss: 0.010838436894118786 2023-01-24 04:02:49.820580: step: 636/463, loss: 0.03228865563869476 2023-01-24 04:02:50.419909: step: 638/463, loss: 0.024002322927117348 2023-01-24 04:02:51.010069: step: 640/463, loss: 0.015172014944255352 2023-01-24 04:02:51.634822: step: 642/463, loss: 0.005878330674022436 2023-01-24 04:02:52.238192: step: 644/463, loss: 0.008941175416111946 2023-01-24 04:02:52.930841: step: 646/463, loss: 0.009758789092302322 2023-01-24 04:02:53.533123: step: 648/463, loss: 0.005844649858772755 2023-01-24 04:02:54.175923: step: 650/463, loss: 0.01981349289417267 2023-01-24 04:02:54.817741: step: 652/463, loss: 0.008496775291860104 2023-01-24 04:02:55.381495: step: 654/463, loss: 0.011117861606180668 2023-01-24 04:02:56.073118: step: 656/463, loss: 0.0025481984484940767 2023-01-24 04:02:56.642347: step: 658/463, loss: 0.0017674986738711596 2023-01-24 04:02:57.249975: step: 660/463, loss: 0.01495400257408619 2023-01-24 04:02:57.878428: step: 662/463, loss: 0.0009722562972456217 2023-01-24 04:02:58.494851: step: 664/463, loss: 0.017547620460391045 2023-01-24 04:02:59.100309: step: 666/463, loss: 0.033522240817546844 2023-01-24 04:02:59.701338: step: 668/463, loss: 0.004980067722499371 2023-01-24 04:03:00.377564: step: 670/463, loss: 0.006512417923659086 2023-01-24 04:03:00.985558: step: 672/463, loss: 0.09302426129579544 2023-01-24 04:03:01.615265: step: 674/463, loss: 0.0005786706460639834 2023-01-24 04:03:02.285237: step: 676/463, loss: 0.0036979098804295063 2023-01-24 04:03:02.845576: step: 678/463, loss: 0.0069421036168932915 2023-01-24 04:03:03.417838: step: 680/463, loss: 0.006377502344548702 2023-01-24 04:03:04.046630: step: 682/463, loss: 0.00437382236123085 2023-01-24 04:03:04.633306: step: 684/463, loss: 0.047333355993032455 2023-01-24 04:03:05.247215: step: 686/463, loss: 0.029140764847397804 2023-01-24 04:03:05.928716: step: 688/463, loss: 0.0022368209902197123 2023-01-24 04:03:06.498492: step: 690/463, loss: 0.0036652071867138147 2023-01-24 04:03:07.059060: step: 692/463, loss: 0.00044778609299100935 2023-01-24 04:03:07.633062: step: 694/463, loss: 0.011457733809947968 2023-01-24 04:03:08.274548: step: 696/463, loss: 0.05908054858446121 2023-01-24 04:03:08.861571: step: 698/463, loss: 0.01540225651115179 2023-01-24 04:03:09.430691: step: 700/463, loss: 0.0369904562830925 2023-01-24 04:03:09.980058: step: 702/463, loss: 0.0019531333819031715 2023-01-24 04:03:10.552106: step: 704/463, loss: 0.027142250910401344 2023-01-24 04:03:11.215217: step: 706/463, loss: 0.02668718807399273 2023-01-24 04:03:11.835832: step: 708/463, loss: 0.07387158274650574 2023-01-24 04:03:12.467617: step: 710/463, loss: 0.0007678885012865067 2023-01-24 04:03:13.110949: step: 712/463, loss: 0.0005417651846073568 2023-01-24 04:03:13.707313: step: 714/463, loss: 2.712903733481653e-05 2023-01-24 04:03:14.309928: step: 716/463, loss: 0.01443419512361288 2023-01-24 04:03:14.864048: step: 718/463, loss: 0.010695182718336582 2023-01-24 04:03:15.502323: step: 720/463, loss: 0.018565524369478226 2023-01-24 04:03:16.110402: step: 722/463, loss: 0.06473232805728912 2023-01-24 04:03:16.726821: step: 724/463, loss: 0.02647439017891884 2023-01-24 04:03:17.306399: step: 726/463, loss: 0.020551705732941628 2023-01-24 04:03:17.911944: step: 728/463, loss: 0.040676601231098175 2023-01-24 04:03:18.483803: step: 730/463, loss: 0.00035846527316607535 2023-01-24 04:03:19.085749: step: 732/463, loss: 4.534011532086879e-05 2023-01-24 04:03:19.696637: step: 734/463, loss: 0.018328970298171043 2023-01-24 04:03:20.285683: step: 736/463, loss: 0.0021359403617680073 2023-01-24 04:03:20.881343: step: 738/463, loss: 0.006957944016903639 2023-01-24 04:03:21.454768: step: 740/463, loss: 0.041869524866342545 2023-01-24 04:03:22.050695: step: 742/463, loss: 0.00473732640966773 2023-01-24 04:03:22.616803: step: 744/463, loss: 0.009586895816028118 2023-01-24 04:03:23.185551: step: 746/463, loss: 0.01732269674539566 2023-01-24 04:03:23.742587: step: 748/463, loss: 0.010633905418217182 2023-01-24 04:03:24.369348: step: 750/463, loss: 0.003047639038413763 2023-01-24 04:03:24.956275: step: 752/463, loss: 0.1609930694103241 2023-01-24 04:03:25.579829: step: 754/463, loss: 0.008998725563287735 2023-01-24 04:03:26.167578: step: 756/463, loss: 0.05640096589922905 2023-01-24 04:03:26.812605: step: 758/463, loss: 0.015272996388375759 2023-01-24 04:03:27.403778: step: 760/463, loss: 0.020668920129537582 2023-01-24 04:03:28.036557: step: 762/463, loss: 0.0029049983713775873 2023-01-24 04:03:28.658383: step: 764/463, loss: 0.013802766799926758 2023-01-24 04:03:29.284573: step: 766/463, loss: 0.01632774993777275 2023-01-24 04:03:29.874901: step: 768/463, loss: 0.0029873461462557316 2023-01-24 04:03:30.531681: step: 770/463, loss: 0.04625753313302994 2023-01-24 04:03:31.099062: step: 772/463, loss: 0.010082759894430637 2023-01-24 04:03:31.691859: step: 774/463, loss: 0.015680672600865364 2023-01-24 04:03:32.291108: step: 776/463, loss: 0.01905141770839691 2023-01-24 04:03:32.857183: step: 778/463, loss: 0.027830949053168297 2023-01-24 04:03:33.421371: step: 780/463, loss: 0.2017330378293991 2023-01-24 04:03:34.033313: step: 782/463, loss: 0.010649469681084156 2023-01-24 04:03:34.813375: step: 784/463, loss: 0.0080515556037426 2023-01-24 04:03:35.379593: step: 786/463, loss: 0.013423859141767025 2023-01-24 04:03:35.880447: step: 788/463, loss: 0.004958702251315117 2023-01-24 04:03:36.476832: step: 790/463, loss: 0.0008512893109582365 2023-01-24 04:03:37.126141: step: 792/463, loss: 0.01572311297059059 2023-01-24 04:03:37.650769: step: 794/463, loss: 0.020107876509428024 2023-01-24 04:03:38.318922: step: 796/463, loss: 0.05576716735959053 2023-01-24 04:03:38.883692: step: 798/463, loss: 0.010191565379500389 2023-01-24 04:03:39.497382: step: 800/463, loss: 0.010433323681354523 2023-01-24 04:03:40.118667: step: 802/463, loss: 0.07402213662862778 2023-01-24 04:03:40.760780: step: 804/463, loss: 0.2019401639699936 2023-01-24 04:03:41.408686: step: 806/463, loss: 0.029602546244859695 2023-01-24 04:03:41.986142: step: 808/463, loss: 0.01775132119655609 2023-01-24 04:03:42.502538: step: 810/463, loss: 0.000788956182077527 2023-01-24 04:03:43.081400: step: 812/463, loss: 0.027141405269503593 2023-01-24 04:03:43.675824: step: 814/463, loss: 0.0005608610226772726 2023-01-24 04:03:44.292177: step: 816/463, loss: 0.00044557906221598387 2023-01-24 04:03:44.952907: step: 818/463, loss: 0.6101090908050537 2023-01-24 04:03:45.637069: step: 820/463, loss: 0.009533628821372986 2023-01-24 04:03:46.215819: step: 822/463, loss: 0.03025132045149803 2023-01-24 04:03:46.834880: step: 824/463, loss: 0.06425667554140091 2023-01-24 04:03:47.503971: step: 826/463, loss: 0.004169210325926542 2023-01-24 04:03:48.192601: step: 828/463, loss: 0.023449435830116272 2023-01-24 04:03:48.781506: step: 830/463, loss: 0.01126700360327959 2023-01-24 04:03:49.442492: step: 832/463, loss: 0.05990641191601753 2023-01-24 04:03:50.023697: step: 834/463, loss: 0.02726084738969803 2023-01-24 04:03:50.654493: step: 836/463, loss: 0.036090701818466187 2023-01-24 04:03:51.296902: step: 838/463, loss: 0.02891107089817524 2023-01-24 04:03:51.890143: step: 840/463, loss: 0.029088353738188744 2023-01-24 04:03:52.691841: step: 842/463, loss: 0.03414663299918175 2023-01-24 04:03:53.262340: step: 844/463, loss: 0.0036298446357250214 2023-01-24 04:03:53.890782: step: 846/463, loss: 0.023342851549386978 2023-01-24 04:03:54.543156: step: 848/463, loss: 0.1556846648454666 2023-01-24 04:03:55.135959: step: 850/463, loss: 0.003532565198838711 2023-01-24 04:03:55.780015: step: 852/463, loss: 0.026758458465337753 2023-01-24 04:03:56.385894: step: 854/463, loss: 0.010007509030401707 2023-01-24 04:03:56.954945: step: 856/463, loss: 0.044645048677921295 2023-01-24 04:03:57.568270: step: 858/463, loss: 0.08060014247894287 2023-01-24 04:03:58.204549: step: 860/463, loss: 0.19717563688755035 2023-01-24 04:03:58.726849: step: 862/463, loss: 0.0011181774316355586 2023-01-24 04:03:59.325448: step: 864/463, loss: 0.0004933009622618556 2023-01-24 04:03:59.941317: step: 866/463, loss: 0.00010603290138533339 2023-01-24 04:04:00.567861: step: 868/463, loss: 0.006303685717284679 2023-01-24 04:04:01.226296: step: 870/463, loss: 0.01584933139383793 2023-01-24 04:04:01.755530: step: 872/463, loss: 0.037185944616794586 2023-01-24 04:04:02.403248: step: 874/463, loss: 0.022174609825015068 2023-01-24 04:04:03.032930: step: 876/463, loss: 0.003561200574040413 2023-01-24 04:04:03.641752: step: 878/463, loss: 0.008033213205635548 2023-01-24 04:04:04.236013: step: 880/463, loss: 0.03912869095802307 2023-01-24 04:04:04.842668: step: 882/463, loss: 0.05260114371776581 2023-01-24 04:04:05.477096: step: 884/463, loss: 0.8567591309547424 2023-01-24 04:04:06.044604: step: 886/463, loss: 0.0029154145158827305 2023-01-24 04:04:06.675892: step: 888/463, loss: 0.00939836073666811 2023-01-24 04:04:07.277237: step: 890/463, loss: 0.03094368800520897 2023-01-24 04:04:07.914173: step: 892/463, loss: 0.011767975986003876 2023-01-24 04:04:08.515915: step: 894/463, loss: 0.00037097797030583024 2023-01-24 04:04:09.049281: step: 896/463, loss: 0.04142456874251366 2023-01-24 04:04:09.587331: step: 898/463, loss: 0.004181623458862305 2023-01-24 04:04:10.190924: step: 900/463, loss: 0.1254042387008667 2023-01-24 04:04:10.807624: step: 902/463, loss: 0.04262140765786171 2023-01-24 04:04:11.391855: step: 904/463, loss: 7.5566040322883055e-06 2023-01-24 04:04:12.007387: step: 906/463, loss: 0.030473776161670685 2023-01-24 04:04:12.659526: step: 908/463, loss: 0.024384384974837303 2023-01-24 04:04:13.290566: step: 910/463, loss: 0.0058188894763588905 2023-01-24 04:04:13.920835: step: 912/463, loss: 0.014894187450408936 2023-01-24 04:04:14.514840: step: 914/463, loss: 0.010063652880489826 2023-01-24 04:04:15.197077: step: 916/463, loss: 0.003647984005510807 2023-01-24 04:04:15.779232: step: 918/463, loss: 0.0026171591598540545 2023-01-24 04:04:16.293008: step: 920/463, loss: 9.662180673331022e-05 2023-01-24 04:04:16.882519: step: 922/463, loss: 0.03843434154987335 2023-01-24 04:04:17.464906: step: 924/463, loss: 0.0034490060061216354 2023-01-24 04:04:18.121357: step: 926/463, loss: 0.033105213195085526 ================================================== Loss: 0.029 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33540794313787836, 'r': 0.30931358703037737, 'f1': 0.32183269568609846}, 'combined': 0.237139881031862, 'epoch': 31} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3614500486037469, 'r': 0.3106260679703816, 'f1': 0.33411634366001597}, 'combined': 0.23505672418292584, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3360140460776054, 'r': 0.3066845467994842, 'f1': 0.32068007175263524}, 'combined': 0.23629057918615226, 'epoch': 31} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.35898512678403977, 'r': 0.31321060406747225, 'f1': 0.33453931124744}, 'combined': 0.2375229109856824, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34935139057900494, 'r': 0.3162060973551904, 'f1': 0.3319534129605286}, 'combined': 0.24459725165512633, 'epoch': 31} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3694542926879884, 'r': 0.302280784926536, 'f1': 0.3325088634191896}, 'combined': 0.23608129302762462, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22868217054263562, 'r': 0.2809523809523809, 'f1': 0.2521367521367521}, 'combined': 0.16809116809116803, 'epoch': 31} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2767857142857143, 'r': 0.33695652173913043, 'f1': 0.30392156862745096}, 'combined': 0.15196078431372548, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4318181818181818, 'r': 0.16379310344827586, 'f1': 0.23750000000000002}, 'combined': 0.15833333333333333, 'epoch': 31} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:06:49.073494: step: 2/463, loss: 0.0181606262922287 2023-01-24 04:06:49.692241: step: 4/463, loss: 0.001627701218239963 2023-01-24 04:06:50.381850: step: 6/463, loss: 0.007256168406456709 2023-01-24 04:06:50.998599: step: 8/463, loss: 0.048920098692178726 2023-01-24 04:06:51.600624: step: 10/463, loss: 4.261564254760742 2023-01-24 04:06:52.230151: step: 12/463, loss: 0.0021468738559633493 2023-01-24 04:06:52.851775: step: 14/463, loss: 0.04507168009877205 2023-01-24 04:06:53.469721: step: 16/463, loss: 0.0018095432315021753 2023-01-24 04:06:54.081457: step: 18/463, loss: 0.0018184571526944637 2023-01-24 04:06:54.657869: step: 20/463, loss: 0.0508790947496891 2023-01-24 04:06:55.298287: step: 22/463, loss: 0.001566702383570373 2023-01-24 04:06:55.927150: step: 24/463, loss: 0.0031056287698447704 2023-01-24 04:06:56.568016: step: 26/463, loss: 0.0013041511410847306 2023-01-24 04:06:57.202368: step: 28/463, loss: 0.04816519841551781 2023-01-24 04:06:57.816751: step: 30/463, loss: 0.004592801909893751 2023-01-24 04:06:58.405112: step: 32/463, loss: 0.014258313924074173 2023-01-24 04:06:59.165872: step: 34/463, loss: 0.006813503336161375 2023-01-24 04:06:59.758886: step: 36/463, loss: 0.0016586168203502893 2023-01-24 04:07:00.320199: step: 38/463, loss: 0.006019343622028828 2023-01-24 04:07:00.895758: step: 40/463, loss: 0.001291142893023789 2023-01-24 04:07:01.502358: step: 42/463, loss: 3.4471840858459473 2023-01-24 04:07:02.105252: step: 44/463, loss: 0.003044555429369211 2023-01-24 04:07:02.740051: step: 46/463, loss: 0.014145866967737675 2023-01-24 04:07:03.292651: step: 48/463, loss: 0.01567729189991951 2023-01-24 04:07:03.826118: step: 50/463, loss: 0.05486137047410011 2023-01-24 04:07:04.408330: step: 52/463, loss: 0.008107100613415241 2023-01-24 04:07:05.015777: step: 54/463, loss: 0.0040552690625190735 2023-01-24 04:07:05.648803: step: 56/463, loss: 0.26978880167007446 2023-01-24 04:07:06.175903: step: 58/463, loss: 0.24094001948833466 2023-01-24 04:07:06.805663: step: 60/463, loss: 0.03373754024505615 2023-01-24 04:07:07.411454: step: 62/463, loss: 0.0027700867503881454 2023-01-24 04:07:08.114390: step: 64/463, loss: 0.5279542803764343 2023-01-24 04:07:08.720349: step: 66/463, loss: 0.014293326996266842 2023-01-24 04:07:09.316309: step: 68/463, loss: 0.003106173826381564 2023-01-24 04:07:09.918816: step: 70/463, loss: 0.0013800394954159856 2023-01-24 04:07:10.545424: step: 72/463, loss: 0.0011457506334409118 2023-01-24 04:07:11.091726: step: 74/463, loss: 0.0035314864944666624 2023-01-24 04:07:11.661578: step: 76/463, loss: 0.0036101932637393475 2023-01-24 04:07:12.300582: step: 78/463, loss: 0.04046108201146126 2023-01-24 04:07:12.964969: step: 80/463, loss: 0.03248461335897446 2023-01-24 04:07:13.613498: step: 82/463, loss: 0.009222766384482384 2023-01-24 04:07:14.264514: step: 84/463, loss: 0.07355571538209915 2023-01-24 04:07:14.842033: step: 86/463, loss: 0.0044911932200193405 2023-01-24 04:07:15.452863: step: 88/463, loss: 0.019300516694784164 2023-01-24 04:07:16.049616: step: 90/463, loss: 0.05209234356880188 2023-01-24 04:07:16.659648: step: 92/463, loss: 0.0007125348201952875 2023-01-24 04:07:17.257269: step: 94/463, loss: 0.011067361570894718 2023-01-24 04:07:17.887441: step: 96/463, loss: 0.02435273304581642 2023-01-24 04:07:18.516735: step: 98/463, loss: 0.0356578566133976 2023-01-24 04:07:19.162666: step: 100/463, loss: 0.008644402027130127 2023-01-24 04:07:19.770024: step: 102/463, loss: 0.02453622967004776 2023-01-24 04:07:20.311882: step: 104/463, loss: 0.021075600758194923 2023-01-24 04:07:20.927965: step: 106/463, loss: 0.016346894204616547 2023-01-24 04:07:21.552020: step: 108/463, loss: 0.0015254128957167268 2023-01-24 04:07:22.168454: step: 110/463, loss: 0.11596240848302841 2023-01-24 04:07:22.784890: step: 112/463, loss: 0.005277351476252079 2023-01-24 04:07:23.450751: step: 114/463, loss: 0.02461421675980091 2023-01-24 04:07:24.103376: step: 116/463, loss: 0.16692639887332916 2023-01-24 04:07:24.688577: step: 118/463, loss: 0.023045014590024948 2023-01-24 04:07:25.302774: step: 120/463, loss: 0.35669681429862976 2023-01-24 04:07:25.929496: step: 122/463, loss: 0.05789782479405403 2023-01-24 04:07:26.627862: step: 124/463, loss: 0.17643237113952637 2023-01-24 04:07:27.258355: step: 126/463, loss: 0.001164703513495624 2023-01-24 04:07:27.852572: step: 128/463, loss: 0.040854960680007935 2023-01-24 04:07:28.535203: step: 130/463, loss: 0.008128301240503788 2023-01-24 04:07:29.170053: step: 132/463, loss: 0.001041875802911818 2023-01-24 04:07:29.771986: step: 134/463, loss: 0.025251230224967003 2023-01-24 04:07:30.321815: step: 136/463, loss: 0.009744648821651936 2023-01-24 04:07:30.845399: step: 138/463, loss: 0.02595999464392662 2023-01-24 04:07:31.467837: step: 140/463, loss: 0.01202879473567009 2023-01-24 04:07:32.051478: step: 142/463, loss: 0.017625033855438232 2023-01-24 04:07:32.632267: step: 144/463, loss: 0.005781098268926144 2023-01-24 04:07:33.296076: step: 146/463, loss: 0.009168743155896664 2023-01-24 04:07:33.872937: step: 148/463, loss: 0.02797919325530529 2023-01-24 04:07:34.462568: step: 150/463, loss: 0.005777680780738592 2023-01-24 04:07:35.079067: step: 152/463, loss: 0.012669871561229229 2023-01-24 04:07:35.696680: step: 154/463, loss: 0.000261423148913309 2023-01-24 04:07:36.301054: step: 156/463, loss: 0.08534269779920578 2023-01-24 04:07:36.898984: step: 158/463, loss: 0.0037384990137070417 2023-01-24 04:07:37.484933: step: 160/463, loss: 0.019089017063379288 2023-01-24 04:07:38.094388: step: 162/463, loss: 0.030589185655117035 2023-01-24 04:07:38.687550: step: 164/463, loss: 3.8167004585266113 2023-01-24 04:07:39.350130: step: 166/463, loss: 0.03901047632098198 2023-01-24 04:07:39.985785: step: 168/463, loss: 0.00219841580837965 2023-01-24 04:07:40.588971: step: 170/463, loss: 0.011181983165442944 2023-01-24 04:07:41.222529: step: 172/463, loss: 0.014016332104802132 2023-01-24 04:07:41.834338: step: 174/463, loss: 0.005151611752808094 2023-01-24 04:07:42.449149: step: 176/463, loss: 0.00413312716409564 2023-01-24 04:07:42.994728: step: 178/463, loss: 0.019537249580025673 2023-01-24 04:07:43.569943: step: 180/463, loss: 0.0007970586884766817 2023-01-24 04:07:44.194327: step: 182/463, loss: 0.0084525840356946 2023-01-24 04:07:44.789587: step: 184/463, loss: 0.0005667332443408668 2023-01-24 04:07:45.406682: step: 186/463, loss: 0.003875585738569498 2023-01-24 04:07:46.022544: step: 188/463, loss: 0.05626220256090164 2023-01-24 04:07:46.693920: step: 190/463, loss: 0.00048162552411668 2023-01-24 04:07:47.309708: step: 192/463, loss: 0.006511688232421875 2023-01-24 04:07:47.948345: step: 194/463, loss: 0.010253442451357841 2023-01-24 04:07:48.545661: step: 196/463, loss: 0.003974742256104946 2023-01-24 04:07:49.147530: step: 198/463, loss: 0.05738965794444084 2023-01-24 04:07:49.821575: step: 200/463, loss: 0.0017896610079333186 2023-01-24 04:07:50.411009: step: 202/463, loss: 2.061668634414673 2023-01-24 04:07:51.003236: step: 204/463, loss: 0.000910982140339911 2023-01-24 04:07:51.560719: step: 206/463, loss: 0.655641496181488 2023-01-24 04:07:52.108642: step: 208/463, loss: 0.03462628275156021 2023-01-24 04:07:52.743284: step: 210/463, loss: 0.009112349711358547 2023-01-24 04:07:53.359260: step: 212/463, loss: 0.52211993932724 2023-01-24 04:07:53.985886: step: 214/463, loss: 0.8688774108886719 2023-01-24 04:07:54.606552: step: 216/463, loss: 0.001534082693979144 2023-01-24 04:07:55.264887: step: 218/463, loss: 0.02225937508046627 2023-01-24 04:07:55.857390: step: 220/463, loss: 0.03636852279305458 2023-01-24 04:07:56.445750: step: 222/463, loss: 1.660961606830824e-05 2023-01-24 04:07:57.004601: step: 224/463, loss: 0.0029867065604776144 2023-01-24 04:07:57.623470: step: 226/463, loss: 0.004934343043714762 2023-01-24 04:07:58.256826: step: 228/463, loss: 0.006617359817028046 2023-01-24 04:07:58.826662: step: 230/463, loss: 0.0007778635481372476 2023-01-24 04:07:59.479471: step: 232/463, loss: 0.0023597916588187218 2023-01-24 04:08:00.085915: step: 234/463, loss: 0.024659525603055954 2023-01-24 04:08:00.648855: step: 236/463, loss: 0.005099315661936998 2023-01-24 04:08:01.265188: step: 238/463, loss: 0.015258288010954857 2023-01-24 04:08:01.895865: step: 240/463, loss: 0.0003689987934194505 2023-01-24 04:08:02.542561: step: 242/463, loss: 0.0004577186773531139 2023-01-24 04:08:03.138472: step: 244/463, loss: 0.0807991698384285 2023-01-24 04:08:03.741772: step: 246/463, loss: 0.00026435338077135384 2023-01-24 04:08:04.363888: step: 248/463, loss: 0.01753665693104267 2023-01-24 04:08:04.988865: step: 250/463, loss: 0.09141430258750916 2023-01-24 04:08:05.625959: step: 252/463, loss: 0.002938895719125867 2023-01-24 04:08:06.200155: step: 254/463, loss: 0.0032441923394799232 2023-01-24 04:08:06.878243: step: 256/463, loss: 0.04946107417345047 2023-01-24 04:08:07.553290: step: 258/463, loss: 0.0008468400337733328 2023-01-24 04:08:08.151554: step: 260/463, loss: 0.0006121476762928069 2023-01-24 04:08:08.747114: step: 262/463, loss: 0.005582297686487436 2023-01-24 04:08:09.354909: step: 264/463, loss: 0.007067762780934572 2023-01-24 04:08:09.953368: step: 266/463, loss: 0.012579495087265968 2023-01-24 04:08:10.540904: step: 268/463, loss: 0.01883220300078392 2023-01-24 04:08:11.202465: step: 270/463, loss: 0.002969000954180956 2023-01-24 04:08:11.877669: step: 272/463, loss: 0.013337090611457825 2023-01-24 04:08:12.500896: step: 274/463, loss: 0.025783322751522064 2023-01-24 04:08:13.091197: step: 276/463, loss: 0.002391841961070895 2023-01-24 04:08:13.689712: step: 278/463, loss: 0.026956550776958466 2023-01-24 04:08:14.313582: step: 280/463, loss: 0.28474295139312744 2023-01-24 04:08:14.913367: step: 282/463, loss: 0.36947813630104065 2023-01-24 04:08:15.496582: step: 284/463, loss: 0.0008390786242671311 2023-01-24 04:08:16.159703: step: 286/463, loss: 0.014901400543749332 2023-01-24 04:08:16.782314: step: 288/463, loss: 0.00010058927728096023 2023-01-24 04:08:17.316907: step: 290/463, loss: 0.002473019063472748 2023-01-24 04:08:18.039737: step: 292/463, loss: 0.034303102642297745 2023-01-24 04:08:18.606220: step: 294/463, loss: 0.0444025993347168 2023-01-24 04:08:19.245368: step: 296/463, loss: 0.8087300658226013 2023-01-24 04:08:19.905619: step: 298/463, loss: 0.014818630181252956 2023-01-24 04:08:20.492179: step: 300/463, loss: 0.0077395387925207615 2023-01-24 04:08:21.064574: step: 302/463, loss: 0.04093672335147858 2023-01-24 04:08:21.687558: step: 304/463, loss: 0.004246116615831852 2023-01-24 04:08:22.277479: step: 306/463, loss: 0.018555039539933205 2023-01-24 04:08:22.875969: step: 308/463, loss: 0.06111717224121094 2023-01-24 04:08:23.507359: step: 310/463, loss: 0.02484469674527645 2023-01-24 04:08:24.100641: step: 312/463, loss: 0.004752378910779953 2023-01-24 04:08:24.677799: step: 314/463, loss: 0.019758980721235275 2023-01-24 04:08:25.262139: step: 316/463, loss: 0.034170035272836685 2023-01-24 04:08:25.817854: step: 318/463, loss: 0.0023187012411653996 2023-01-24 04:08:26.522785: step: 320/463, loss: 0.0015280378283932805 2023-01-24 04:08:27.204935: step: 322/463, loss: 0.011928373016417027 2023-01-24 04:08:27.894127: step: 324/463, loss: 0.011216020211577415 2023-01-24 04:08:28.473669: step: 326/463, loss: 0.006065668538212776 2023-01-24 04:08:29.071652: step: 328/463, loss: 0.08085978776216507 2023-01-24 04:08:29.627630: step: 330/463, loss: 0.01336361002177 2023-01-24 04:08:30.233099: step: 332/463, loss: 0.002071402035653591 2023-01-24 04:08:30.887821: step: 334/463, loss: 0.02465335838496685 2023-01-24 04:08:31.480613: step: 336/463, loss: 0.012266107834875584 2023-01-24 04:08:32.056753: step: 338/463, loss: 0.04395830258727074 2023-01-24 04:08:32.655326: step: 340/463, loss: 0.03355079144239426 2023-01-24 04:08:33.236127: step: 342/463, loss: 0.0009584089857526124 2023-01-24 04:08:33.835325: step: 344/463, loss: 0.003572637215256691 2023-01-24 04:08:34.533928: step: 346/463, loss: 0.007430777885019779 2023-01-24 04:08:35.218638: step: 348/463, loss: 0.009962947107851505 2023-01-24 04:08:35.868857: step: 350/463, loss: 0.020123349502682686 2023-01-24 04:08:36.459461: step: 352/463, loss: 0.005423809867352247 2023-01-24 04:08:37.076236: step: 354/463, loss: 0.0011625710176303983 2023-01-24 04:08:37.711457: step: 356/463, loss: 0.003082401817664504 2023-01-24 04:08:38.230008: step: 358/463, loss: 0.013530966825783253 2023-01-24 04:08:38.814483: step: 360/463, loss: 0.025241289287805557 2023-01-24 04:08:39.381601: step: 362/463, loss: 0.056574441492557526 2023-01-24 04:08:40.028378: step: 364/463, loss: 0.025673476979136467 2023-01-24 04:08:40.612851: step: 366/463, loss: 0.003485635621473193 2023-01-24 04:08:41.267774: step: 368/463, loss: 0.0017653555842116475 2023-01-24 04:08:41.850116: step: 370/463, loss: 0.003503311425447464 2023-01-24 04:08:42.443909: step: 372/463, loss: 0.18487899005413055 2023-01-24 04:08:43.040301: step: 374/463, loss: 3.704572736751288e-05 2023-01-24 04:08:43.608866: step: 376/463, loss: 0.00028759316774085164 2023-01-24 04:08:44.195496: step: 378/463, loss: 0.0001019562769215554 2023-01-24 04:08:44.725103: step: 380/463, loss: 0.012977383099496365 2023-01-24 04:08:45.378558: step: 382/463, loss: 0.012966250069439411 2023-01-24 04:08:45.971017: step: 384/463, loss: 0.0012129429960623384 2023-01-24 04:08:46.548802: step: 386/463, loss: 0.0026221417356282473 2023-01-24 04:08:47.103614: step: 388/463, loss: 0.025549722835421562 2023-01-24 04:08:47.699305: step: 390/463, loss: 0.005611758679151535 2023-01-24 04:08:48.280582: step: 392/463, loss: 0.007849397137761116 2023-01-24 04:08:48.853537: step: 394/463, loss: 0.0046800971031188965 2023-01-24 04:08:49.411254: step: 396/463, loss: 0.0007445795345120132 2023-01-24 04:08:50.043300: step: 398/463, loss: 1.03985595703125 2023-01-24 04:08:50.669652: step: 400/463, loss: 0.0016480869380757213 2023-01-24 04:08:51.337150: step: 402/463, loss: 0.014422272332012653 2023-01-24 04:08:51.921636: step: 404/463, loss: 0.0009348697494715452 2023-01-24 04:08:52.554051: step: 406/463, loss: 0.003977763932198286 2023-01-24 04:08:53.146397: step: 408/463, loss: 0.12198872864246368 2023-01-24 04:08:53.747136: step: 410/463, loss: 0.08858168870210648 2023-01-24 04:08:54.346607: step: 412/463, loss: 0.10297659784555435 2023-01-24 04:08:54.973492: step: 414/463, loss: 0.0018301011295989156 2023-01-24 04:08:55.562392: step: 416/463, loss: 0.07184568792581558 2023-01-24 04:08:56.126303: step: 418/463, loss: 0.00019261198758613318 2023-01-24 04:08:56.675530: step: 420/463, loss: 0.003002261510118842 2023-01-24 04:08:57.302407: step: 422/463, loss: 0.00022051353880669922 2023-01-24 04:08:57.896350: step: 424/463, loss: 0.0013514729216694832 2023-01-24 04:08:58.418964: step: 426/463, loss: 0.002453508088365197 2023-01-24 04:08:59.028818: step: 428/463, loss: 0.025327682495117188 2023-01-24 04:08:59.666510: step: 430/463, loss: 0.0015153599670156837 2023-01-24 04:09:00.234770: step: 432/463, loss: 0.041157785803079605 2023-01-24 04:09:00.892336: step: 434/463, loss: 0.0019242753041908145 2023-01-24 04:09:01.485790: step: 436/463, loss: 0.017609497532248497 2023-01-24 04:09:02.066018: step: 438/463, loss: 0.014199520461261272 2023-01-24 04:09:02.688076: step: 440/463, loss: 0.11898516118526459 2023-01-24 04:09:03.262378: step: 442/463, loss: 0.7344368100166321 2023-01-24 04:09:03.810990: step: 444/463, loss: 0.00012431932555045933 2023-01-24 04:09:04.397127: step: 446/463, loss: 0.002610772615298629 2023-01-24 04:09:04.987610: step: 448/463, loss: 0.003034910885617137 2023-01-24 04:09:05.637679: step: 450/463, loss: 0.01071448065340519 2023-01-24 04:09:06.246317: step: 452/463, loss: 0.005033975932747126 2023-01-24 04:09:06.807006: step: 454/463, loss: 0.2656709849834442 2023-01-24 04:09:07.507817: step: 456/463, loss: 0.08654310554265976 2023-01-24 04:09:08.127667: step: 458/463, loss: 0.0661768689751625 2023-01-24 04:09:08.749808: step: 460/463, loss: 0.031137442216277122 2023-01-24 04:09:09.327813: step: 462/463, loss: 0.00028951550484634936 2023-01-24 04:09:09.976140: step: 464/463, loss: 0.11712165176868439 2023-01-24 04:09:10.644191: step: 466/463, loss: 0.03977241367101669 2023-01-24 04:09:11.191601: step: 468/463, loss: 0.010040998458862305 2023-01-24 04:09:11.839567: step: 470/463, loss: 0.046154413372278214 2023-01-24 04:09:12.592143: step: 472/463, loss: 0.009679949842393398 2023-01-24 04:09:13.240786: step: 474/463, loss: 0.004819505847990513 2023-01-24 04:09:13.799766: step: 476/463, loss: 0.00038883613888174295 2023-01-24 04:09:14.375268: step: 478/463, loss: 0.0045163254253566265 2023-01-24 04:09:14.918377: step: 480/463, loss: 0.00434342585504055 2023-01-24 04:09:15.482435: step: 482/463, loss: 0.021196944639086723 2023-01-24 04:09:16.130958: step: 484/463, loss: 0.10372253507375717 2023-01-24 04:09:16.721848: step: 486/463, loss: 0.023437362164258957 2023-01-24 04:09:17.305513: step: 488/463, loss: 0.02046622522175312 2023-01-24 04:09:17.993608: step: 490/463, loss: 0.0051236082799732685 2023-01-24 04:09:18.628093: step: 492/463, loss: 0.005093232728540897 2023-01-24 04:09:19.245707: step: 494/463, loss: 0.2517682909965515 2023-01-24 04:09:19.809114: step: 496/463, loss: 0.03450251370668411 2023-01-24 04:09:20.384424: step: 498/463, loss: 0.02413996122777462 2023-01-24 04:09:20.980185: step: 500/463, loss: 0.0031456011347472668 2023-01-24 04:09:22.232064: step: 502/463, loss: 0.000920072547160089 2023-01-24 04:09:22.849433: step: 504/463, loss: 0.002672564471140504 2023-01-24 04:09:23.486842: step: 506/463, loss: 0.20000502467155457 2023-01-24 04:09:24.100875: step: 508/463, loss: 0.0060021355748176575 2023-01-24 04:09:24.748153: step: 510/463, loss: 0.029658935964107513 2023-01-24 04:09:25.369841: step: 512/463, loss: 0.006748534273356199 2023-01-24 04:09:25.932195: step: 514/463, loss: 0.003728124313056469 2023-01-24 04:09:26.566224: step: 516/463, loss: 0.0038728744257241488 2023-01-24 04:09:27.167082: step: 518/463, loss: 0.05159010738134384 2023-01-24 04:09:27.784237: step: 520/463, loss: 0.026952482759952545 2023-01-24 04:09:28.397155: step: 522/463, loss: 0.22280287742614746 2023-01-24 04:09:29.016739: step: 524/463, loss: 0.00749147217720747 2023-01-24 04:09:29.601765: step: 526/463, loss: 0.009855619631707668 2023-01-24 04:09:30.182272: step: 528/463, loss: 0.060199037194252014 2023-01-24 04:09:30.785470: step: 530/463, loss: 0.02846071869134903 2023-01-24 04:09:31.354880: step: 532/463, loss: 0.017634257674217224 2023-01-24 04:09:31.935053: step: 534/463, loss: 0.0259077288210392 2023-01-24 04:09:32.555278: step: 536/463, loss: 0.003699155058711767 2023-01-24 04:09:33.222578: step: 538/463, loss: 0.04289009049534798 2023-01-24 04:09:33.750906: step: 540/463, loss: 0.022542990744113922 2023-01-24 04:09:34.413991: step: 542/463, loss: 0.08888021111488342 2023-01-24 04:09:35.025939: step: 544/463, loss: 0.03301907330751419 2023-01-24 04:09:35.640379: step: 546/463, loss: 0.03346843644976616 2023-01-24 04:09:36.250220: step: 548/463, loss: 0.0069305505603551865 2023-01-24 04:09:36.868302: step: 550/463, loss: 0.0054976907558739185 2023-01-24 04:09:37.519437: step: 552/463, loss: 0.0021201458293944597 2023-01-24 04:09:38.047596: step: 554/463, loss: 0.041732095181941986 2023-01-24 04:09:38.649385: step: 556/463, loss: 0.018743671476840973 2023-01-24 04:09:39.199383: step: 558/463, loss: 0.0014384971000254154 2023-01-24 04:09:39.867586: step: 560/463, loss: 0.024591274559497833 2023-01-24 04:09:40.509032: step: 562/463, loss: 0.0006790111656300724 2023-01-24 04:09:41.177520: step: 564/463, loss: 0.0038785107899457216 2023-01-24 04:09:41.766273: step: 566/463, loss: 0.0026493656914681196 2023-01-24 04:09:42.327723: step: 568/463, loss: 0.0033295247703790665 2023-01-24 04:09:42.905802: step: 570/463, loss: 0.02014324627816677 2023-01-24 04:09:43.527644: step: 572/463, loss: 0.009593944065272808 2023-01-24 04:09:44.211471: step: 574/463, loss: 0.026680314913392067 2023-01-24 04:09:44.760890: step: 576/463, loss: 0.0024374749045819044 2023-01-24 04:09:45.355049: step: 578/463, loss: 0.1531105488538742 2023-01-24 04:09:46.010548: step: 580/463, loss: 0.03962937742471695 2023-01-24 04:09:46.700831: step: 582/463, loss: 0.035365909337997437 2023-01-24 04:09:47.336269: step: 584/463, loss: 0.020350730046629906 2023-01-24 04:09:47.992388: step: 586/463, loss: 0.01611383818089962 2023-01-24 04:09:48.648233: step: 588/463, loss: 0.0008707508095540106 2023-01-24 04:09:49.244943: step: 590/463, loss: 0.004841223359107971 2023-01-24 04:09:49.856457: step: 592/463, loss: 0.5070900917053223 2023-01-24 04:09:50.438392: step: 594/463, loss: 0.0005800604703836143 2023-01-24 04:09:51.089809: step: 596/463, loss: 0.017423473298549652 2023-01-24 04:09:51.646681: step: 598/463, loss: 0.0005731906276196241 2023-01-24 04:09:52.215516: step: 600/463, loss: 0.002325799548998475 2023-01-24 04:09:52.797624: step: 602/463, loss: 0.00082722760271281 2023-01-24 04:09:53.392970: step: 604/463, loss: 0.10802621394395828 2023-01-24 04:09:53.975630: step: 606/463, loss: 0.052759476006031036 2023-01-24 04:09:54.633981: step: 608/463, loss: 0.023277411237359047 2023-01-24 04:09:55.271196: step: 610/463, loss: 0.0499635711312294 2023-01-24 04:09:55.941155: step: 612/463, loss: 0.0029287217184901237 2023-01-24 04:09:56.559364: step: 614/463, loss: 0.007659556344151497 2023-01-24 04:09:57.158713: step: 616/463, loss: 0.019356215372681618 2023-01-24 04:09:57.723617: step: 618/463, loss: 0.001633546082302928 2023-01-24 04:09:58.321125: step: 620/463, loss: 0.006532317027449608 2023-01-24 04:09:58.965207: step: 622/463, loss: 0.0012429840862751007 2023-01-24 04:09:59.490488: step: 624/463, loss: 0.0007024153019301593 2023-01-24 04:10:00.107174: step: 626/463, loss: 0.009118237532675266 2023-01-24 04:10:00.810414: step: 628/463, loss: 0.001394743798300624 2023-01-24 04:10:01.351923: step: 630/463, loss: 0.0021011694334447384 2023-01-24 04:10:02.001610: step: 632/463, loss: 0.015018503181636333 2023-01-24 04:10:02.771055: step: 634/463, loss: 0.003622051328420639 2023-01-24 04:10:03.352264: step: 636/463, loss: 0.018979400396347046 2023-01-24 04:10:03.965379: step: 638/463, loss: 0.0006759192328900099 2023-01-24 04:10:04.621269: step: 640/463, loss: 0.045064277946949005 2023-01-24 04:10:05.275007: step: 642/463, loss: 0.01152975857257843 2023-01-24 04:10:05.918690: step: 644/463, loss: 0.17232316732406616 2023-01-24 04:10:06.581543: step: 646/463, loss: 0.0094269048422575 2023-01-24 04:10:07.164713: step: 648/463, loss: 0.02903854474425316 2023-01-24 04:10:07.733417: step: 650/463, loss: 0.004598059691488743 2023-01-24 04:10:08.348994: step: 652/463, loss: 0.0011152077931910753 2023-01-24 04:10:08.931727: step: 654/463, loss: 0.00412494782358408 2023-01-24 04:10:09.557855: step: 656/463, loss: 0.007545419503003359 2023-01-24 04:10:10.140443: step: 658/463, loss: 0.01966894045472145 2023-01-24 04:10:10.747644: step: 660/463, loss: 0.005048603285104036 2023-01-24 04:10:11.353427: step: 662/463, loss: 0.0030583988409489393 2023-01-24 04:10:11.916845: step: 664/463, loss: 0.06218159943819046 2023-01-24 04:10:12.495110: step: 666/463, loss: 0.07868531346321106 2023-01-24 04:10:13.090512: step: 668/463, loss: 0.28304189443588257 2023-01-24 04:10:13.722447: step: 670/463, loss: 4.691996946348809e-05 2023-01-24 04:10:14.317663: step: 672/463, loss: 0.03258002921938896 2023-01-24 04:10:14.955497: step: 674/463, loss: 0.0032711983658373356 2023-01-24 04:10:15.559838: step: 676/463, loss: 0.0020057326182723045 2023-01-24 04:10:16.154605: step: 678/463, loss: 0.002122721169143915 2023-01-24 04:10:16.756302: step: 680/463, loss: 0.02505510114133358 2023-01-24 04:10:17.308123: step: 682/463, loss: 0.0011033754562959075 2023-01-24 04:10:17.935051: step: 684/463, loss: 0.000933489587623626 2023-01-24 04:10:18.525962: step: 686/463, loss: 0.00079430389450863 2023-01-24 04:10:19.199122: step: 688/463, loss: 0.012062291614711285 2023-01-24 04:10:19.866612: step: 690/463, loss: 0.08209416270256042 2023-01-24 04:10:20.456781: step: 692/463, loss: 0.04253535717725754 2023-01-24 04:10:21.030165: step: 694/463, loss: 0.00048671153490431607 2023-01-24 04:10:21.680230: step: 696/463, loss: 0.04332999512553215 2023-01-24 04:10:22.361562: step: 698/463, loss: 0.0014223200269043446 2023-01-24 04:10:22.960799: step: 700/463, loss: 0.03229007124900818 2023-01-24 04:10:23.553470: step: 702/463, loss: 0.016360638663172722 2023-01-24 04:10:24.188823: step: 704/463, loss: 0.047041378915309906 2023-01-24 04:10:24.776523: step: 706/463, loss: 0.0008586629992350936 2023-01-24 04:10:25.309673: step: 708/463, loss: 0.02213762328028679 2023-01-24 04:10:25.947086: step: 710/463, loss: 0.004487354774028063 2023-01-24 04:10:26.544189: step: 712/463, loss: 0.05226609483361244 2023-01-24 04:10:27.153065: step: 714/463, loss: 0.03615260869264603 2023-01-24 04:10:27.786394: step: 716/463, loss: 0.007002922706305981 2023-01-24 04:10:28.486393: step: 718/463, loss: 0.09840276092290878 2023-01-24 04:10:29.056020: step: 720/463, loss: 0.00012211498687975109 2023-01-24 04:10:29.635548: step: 722/463, loss: 0.03511885553598404 2023-01-24 04:10:30.313752: step: 724/463, loss: 0.020227808505296707 2023-01-24 04:10:30.902036: step: 726/463, loss: 0.06579276919364929 2023-01-24 04:10:31.480192: step: 728/463, loss: 0.13906344771385193 2023-01-24 04:10:32.094470: step: 730/463, loss: 0.001676154904998839 2023-01-24 04:10:32.714411: step: 732/463, loss: 0.014798545278608799 2023-01-24 04:10:33.257099: step: 734/463, loss: 0.00010226824088022113 2023-01-24 04:10:33.914470: step: 736/463, loss: 0.007279396988451481 2023-01-24 04:10:34.590530: step: 738/463, loss: 0.004478995688259602 2023-01-24 04:10:35.249820: step: 740/463, loss: 0.04144086688756943 2023-01-24 04:10:35.818610: step: 742/463, loss: 0.06496033817529678 2023-01-24 04:10:36.417786: step: 744/463, loss: 0.006775314919650555 2023-01-24 04:10:37.019400: step: 746/463, loss: 0.0015269364230334759 2023-01-24 04:10:37.613293: step: 748/463, loss: 0.5756934285163879 2023-01-24 04:10:38.144780: step: 750/463, loss: 0.0030371088068932295 2023-01-24 04:10:38.764629: step: 752/463, loss: 0.018589776009321213 2023-01-24 04:10:39.369493: step: 754/463, loss: 0.014166095294058323 2023-01-24 04:10:40.054487: step: 756/463, loss: 0.0016255276277661324 2023-01-24 04:10:40.577756: step: 758/463, loss: 0.0005802181549370289 2023-01-24 04:10:41.151724: step: 760/463, loss: 0.0010424678912386298 2023-01-24 04:10:41.752212: step: 762/463, loss: 0.0036991271190345287 2023-01-24 04:10:42.368371: step: 764/463, loss: 0.002380595076829195 2023-01-24 04:10:42.993414: step: 766/463, loss: 0.0012162536149844527 2023-01-24 04:10:43.596996: step: 768/463, loss: 0.000260368047747761 2023-01-24 04:10:44.208304: step: 770/463, loss: 0.006308032665401697 2023-01-24 04:10:44.848035: step: 772/463, loss: 0.039203885942697525 2023-01-24 04:10:45.442790: step: 774/463, loss: 0.0015957781579345465 2023-01-24 04:10:46.025168: step: 776/463, loss: 0.00032782068592496216 2023-01-24 04:10:46.620274: step: 778/463, loss: 0.08055504411458969 2023-01-24 04:10:47.229234: step: 780/463, loss: 0.000563858775421977 2023-01-24 04:10:47.825420: step: 782/463, loss: 0.000413000350818038 2023-01-24 04:10:48.457158: step: 784/463, loss: 0.0011832569725811481 2023-01-24 04:10:49.096362: step: 786/463, loss: 0.000800123147200793 2023-01-24 04:10:49.769151: step: 788/463, loss: 0.02994483709335327 2023-01-24 04:10:50.363691: step: 790/463, loss: 0.11393272876739502 2023-01-24 04:10:50.978533: step: 792/463, loss: 0.0008954140357673168 2023-01-24 04:10:51.594359: step: 794/463, loss: 0.01384236104786396 2023-01-24 04:10:52.143126: step: 796/463, loss: 0.02325369603931904 2023-01-24 04:10:52.689863: step: 798/463, loss: 0.11536335200071335 2023-01-24 04:10:53.302312: step: 800/463, loss: 0.05841157212853432 2023-01-24 04:10:53.920042: step: 802/463, loss: 0.020103054121136665 2023-01-24 04:10:54.509507: step: 804/463, loss: 0.0007817949517630041 2023-01-24 04:10:55.118323: step: 806/463, loss: 0.016514485701918602 2023-01-24 04:10:55.730623: step: 808/463, loss: 0.00011975892266491428 2023-01-24 04:10:56.352371: step: 810/463, loss: 0.001813625800423324 2023-01-24 04:10:57.030510: step: 812/463, loss: 0.004150128923356533 2023-01-24 04:10:57.614649: step: 814/463, loss: 0.9811153411865234 2023-01-24 04:10:58.178582: step: 816/463, loss: 0.0007879543118178844 2023-01-24 04:10:58.812907: step: 818/463, loss: 0.06918565183877945 2023-01-24 04:10:59.486199: step: 820/463, loss: 0.027260111644864082 2023-01-24 04:11:00.097988: step: 822/463, loss: 0.026670122519135475 2023-01-24 04:11:00.727235: step: 824/463, loss: 0.03385030850768089 2023-01-24 04:11:01.429408: step: 826/463, loss: 0.34042808413505554 2023-01-24 04:11:02.027439: step: 828/463, loss: 0.006516442168504 2023-01-24 04:11:02.681161: step: 830/463, loss: 0.0002889304014388472 2023-01-24 04:11:03.309844: step: 832/463, loss: 0.07425732165575027 2023-01-24 04:11:03.867920: step: 834/463, loss: 0.0762614905834198 2023-01-24 04:11:04.414610: step: 836/463, loss: 0.030965104699134827 2023-01-24 04:11:05.049816: step: 838/463, loss: 0.00872024241834879 2023-01-24 04:11:05.661397: step: 840/463, loss: 0.0067772516049444675 2023-01-24 04:11:06.223402: step: 842/463, loss: 0.012217678129673004 2023-01-24 04:11:06.820865: step: 844/463, loss: 0.054619017988443375 2023-01-24 04:11:07.440547: step: 846/463, loss: 0.02612578310072422 2023-01-24 04:11:08.000670: step: 848/463, loss: 0.012977249920368195 2023-01-24 04:11:08.562102: step: 850/463, loss: 0.06328490376472473 2023-01-24 04:11:09.248676: step: 852/463, loss: 0.0014958162792026997 2023-01-24 04:11:09.862691: step: 854/463, loss: 0.0016592261381447315 2023-01-24 04:11:10.465923: step: 856/463, loss: 0.005616058129817247 2023-01-24 04:11:11.014121: step: 858/463, loss: 0.003547705477103591 2023-01-24 04:11:11.634250: step: 860/463, loss: 0.0033139472361654043 2023-01-24 04:11:12.225953: step: 862/463, loss: 0.04407806694507599 2023-01-24 04:11:12.833823: step: 864/463, loss: 0.084138423204422 2023-01-24 04:11:13.406275: step: 866/463, loss: 0.008432947099208832 2023-01-24 04:11:14.026862: step: 868/463, loss: 0.053198426961898804 2023-01-24 04:11:14.655635: step: 870/463, loss: 1.3525458574295044 2023-01-24 04:11:15.365956: step: 872/463, loss: 0.03148134425282478 2023-01-24 04:11:15.960146: step: 874/463, loss: 0.018678434193134308 2023-01-24 04:11:16.553849: step: 876/463, loss: 0.01867004856467247 2023-01-24 04:11:17.147750: step: 878/463, loss: 0.0107154855504632 2023-01-24 04:11:17.712836: step: 880/463, loss: 0.006085630971938372 2023-01-24 04:11:18.385399: step: 882/463, loss: 0.001016081660054624 2023-01-24 04:11:19.016638: step: 884/463, loss: 0.03283504396677017 2023-01-24 04:11:19.619802: step: 886/463, loss: 0.004466425161808729 2023-01-24 04:11:20.202069: step: 888/463, loss: 0.0044245729222893715 2023-01-24 04:11:20.818871: step: 890/463, loss: 0.03785926103591919 2023-01-24 04:11:21.389124: step: 892/463, loss: 0.016917934641242027 2023-01-24 04:11:21.994015: step: 894/463, loss: 0.0007436272571794689 2023-01-24 04:11:22.653036: step: 896/463, loss: 0.00171443703584373 2023-01-24 04:11:23.217618: step: 898/463, loss: 0.0020551409106701612 2023-01-24 04:11:23.849486: step: 900/463, loss: 0.02761920355260372 2023-01-24 04:11:24.418772: step: 902/463, loss: 0.18716730177402496 2023-01-24 04:11:24.991919: step: 904/463, loss: 0.010687085799872875 2023-01-24 04:11:25.622035: step: 906/463, loss: 0.48349571228027344 2023-01-24 04:11:26.305561: step: 908/463, loss: 0.01711428537964821 2023-01-24 04:11:26.904842: step: 910/463, loss: 0.0013519119238480926 2023-01-24 04:11:27.441742: step: 912/463, loss: 0.021199705079197884 2023-01-24 04:11:28.143042: step: 914/463, loss: 0.10481961816549301 2023-01-24 04:11:28.682010: step: 916/463, loss: 0.003973302431404591 2023-01-24 04:11:29.295453: step: 918/463, loss: 0.05862942337989807 2023-01-24 04:11:29.889806: step: 920/463, loss: 0.00661077257245779 2023-01-24 04:11:30.444137: step: 922/463, loss: 0.03870907425880432 2023-01-24 04:11:30.972216: step: 924/463, loss: 0.05001901462674141 2023-01-24 04:11:31.571291: step: 926/463, loss: 0.01128501258790493 ================================================== Loss: 0.076 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3222281237644117, 'r': 0.3112430740906249, 'f1': 0.3166403528320808}, 'combined': 0.23331394419205953, 'epoch': 32} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3641621313717475, 'r': 0.31617916292747716, 'f1': 0.33847858077056403}, 'combined': 0.23812563471295964, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32227629406896635, 'r': 0.30762737161128606, 'f1': 0.3147814965324788}, 'combined': 0.2319442606028791, 'epoch': 32} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3652830134078295, 'r': 0.31551519673392436, 'f1': 0.3385800377322806}, 'combined': 0.24039182678991924, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3402690565836817, 'r': 0.32154457339406733, 'f1': 0.3306419320559482}, 'combined': 0.2436308973043829, 'epoch': 32} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3787295801458257, 'r': 0.3099297961542696, 'f1': 0.34089300345498436}, 'combined': 0.2420340324530389, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32291666666666663, 'r': 0.2952380952380952, 'f1': 0.308457711442786}, 'combined': 0.20563847429519067, 'epoch': 32} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28846153846153844, 'r': 0.32608695652173914, 'f1': 0.30612244897959184}, 'combined': 0.15306122448979592, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:14:04.093241: step: 2/463, loss: 0.0005838728975504637 2023-01-24 04:14:04.746869: step: 4/463, loss: 0.0036796301137655973 2023-01-24 04:14:05.366787: step: 6/463, loss: 0.028553789481520653 2023-01-24 04:14:06.023921: step: 8/463, loss: 0.11518396437168121 2023-01-24 04:14:06.670481: step: 10/463, loss: 0.061361946165561676 2023-01-24 04:14:07.244012: step: 12/463, loss: 0.00021376917720772326 2023-01-24 04:14:07.855656: step: 14/463, loss: 0.001544314669445157 2023-01-24 04:14:08.401003: step: 16/463, loss: 0.0016198744997382164 2023-01-24 04:14:09.017021: step: 18/463, loss: 0.012311260215938091 2023-01-24 04:14:09.673258: step: 20/463, loss: 0.0001487911940785125 2023-01-24 04:14:10.251207: step: 22/463, loss: 0.12347698956727982 2023-01-24 04:14:10.763994: step: 24/463, loss: 0.004561249166727066 2023-01-24 04:14:11.399053: step: 26/463, loss: 0.009677656926214695 2023-01-24 04:14:12.009773: step: 28/463, loss: 0.031378380954265594 2023-01-24 04:14:12.674176: step: 30/463, loss: 0.029382258653640747 2023-01-24 04:14:13.240929: step: 32/463, loss: 0.004005856346338987 2023-01-24 04:14:13.884698: step: 34/463, loss: 0.00637076934799552 2023-01-24 04:14:14.465776: step: 36/463, loss: 0.02646203152835369 2023-01-24 04:14:15.045595: step: 38/463, loss: 0.022790322080254555 2023-01-24 04:14:15.638335: step: 40/463, loss: 0.016529332846403122 2023-01-24 04:14:16.292384: step: 42/463, loss: 0.21076831221580505 2023-01-24 04:14:16.925523: step: 44/463, loss: 0.009082441218197346 2023-01-24 04:14:17.511313: step: 46/463, loss: 0.010140952654182911 2023-01-24 04:14:18.134060: step: 48/463, loss: 0.004477259702980518 2023-01-24 04:14:18.730357: step: 50/463, loss: 0.026485251262784004 2023-01-24 04:14:19.295066: step: 52/463, loss: 0.009523769840598106 2023-01-24 04:14:19.860516: step: 54/463, loss: 0.08658444136381149 2023-01-24 04:14:20.410182: step: 56/463, loss: 0.0018306249985471368 2023-01-24 04:14:20.958049: step: 58/463, loss: 0.07603178173303604 2023-01-24 04:14:21.629533: step: 60/463, loss: 0.02289430797100067 2023-01-24 04:14:22.417348: step: 62/463, loss: 0.02469564974308014 2023-01-24 04:14:23.061529: step: 64/463, loss: 0.0027112350799143314 2023-01-24 04:14:23.623826: step: 66/463, loss: 0.00598236545920372 2023-01-24 04:14:24.239607: step: 68/463, loss: 0.0068175229243934155 2023-01-24 04:14:24.859009: step: 70/463, loss: 0.7199687957763672 2023-01-24 04:14:25.503585: step: 72/463, loss: 0.02131953462958336 2023-01-24 04:14:26.083439: step: 74/463, loss: 0.010990191251039505 2023-01-24 04:14:26.700633: step: 76/463, loss: 0.009139345958828926 2023-01-24 04:14:27.317748: step: 78/463, loss: 0.004103429615497589 2023-01-24 04:14:27.977731: step: 80/463, loss: 0.01168191060423851 2023-01-24 04:14:28.505667: step: 82/463, loss: 0.003918282687664032 2023-01-24 04:14:29.133618: step: 84/463, loss: 0.011452531442046165 2023-01-24 04:14:29.784541: step: 86/463, loss: 0.08862166851758957 2023-01-24 04:14:30.418868: step: 88/463, loss: 0.003478341968730092 2023-01-24 04:14:30.976397: step: 90/463, loss: 0.021763445809483528 2023-01-24 04:14:31.584644: step: 92/463, loss: 0.03426680713891983 2023-01-24 04:14:32.104945: step: 94/463, loss: 0.0011700113536790013 2023-01-24 04:14:32.686239: step: 96/463, loss: 0.02986243925988674 2023-01-24 04:14:33.319434: step: 98/463, loss: 0.0002916607481893152 2023-01-24 04:14:33.969703: step: 100/463, loss: 0.00420115003362298 2023-01-24 04:14:34.566878: step: 102/463, loss: 0.008330157957971096 2023-01-24 04:14:35.150168: step: 104/463, loss: 0.016884522512555122 2023-01-24 04:14:35.711406: step: 106/463, loss: 0.0009095568675547838 2023-01-24 04:14:36.313074: step: 108/463, loss: 0.012146411463618279 2023-01-24 04:14:36.906583: step: 110/463, loss: 0.04539710655808449 2023-01-24 04:14:37.514731: step: 112/463, loss: 0.003982507158070803 2023-01-24 04:14:38.142389: step: 114/463, loss: 0.015414956957101822 2023-01-24 04:14:38.723161: step: 116/463, loss: 0.032326217740774155 2023-01-24 04:14:39.293193: step: 118/463, loss: 0.04555743187665939 2023-01-24 04:14:39.914164: step: 120/463, loss: 0.00603244174271822 2023-01-24 04:14:40.492314: step: 122/463, loss: 0.04901004210114479 2023-01-24 04:14:41.054782: step: 124/463, loss: 0.0006416928372345865 2023-01-24 04:14:41.772211: step: 126/463, loss: 0.098506398499012 2023-01-24 04:14:42.370383: step: 128/463, loss: 0.009569530375301838 2023-01-24 04:14:42.959014: step: 130/463, loss: 0.028234215453267097 2023-01-24 04:14:43.500367: step: 132/463, loss: 0.0016172085888683796 2023-01-24 04:14:44.081608: step: 134/463, loss: 0.007912351749837399 2023-01-24 04:14:44.622679: step: 136/463, loss: 0.02441013604402542 2023-01-24 04:14:45.195554: step: 138/463, loss: 0.001291831023991108 2023-01-24 04:14:45.901251: step: 140/463, loss: 0.0021343007683753967 2023-01-24 04:14:46.531797: step: 142/463, loss: 0.015480690635740757 2023-01-24 04:14:47.112253: step: 144/463, loss: 0.016786731779575348 2023-01-24 04:14:47.687045: step: 146/463, loss: 0.0397174134850502 2023-01-24 04:14:48.261879: step: 148/463, loss: 0.009523576125502586 2023-01-24 04:14:48.856278: step: 150/463, loss: 0.03512715920805931 2023-01-24 04:14:49.547647: step: 152/463, loss: 0.04385817423462868 2023-01-24 04:14:50.156100: step: 154/463, loss: 0.11459621787071228 2023-01-24 04:14:50.691928: step: 156/463, loss: 0.0013194279745221138 2023-01-24 04:14:51.315459: step: 158/463, loss: 0.0011858544312417507 2023-01-24 04:14:51.940667: step: 160/463, loss: 0.012376347556710243 2023-01-24 04:14:52.556913: step: 162/463, loss: 0.002585452049970627 2023-01-24 04:14:53.221246: step: 164/463, loss: 0.009718937799334526 2023-01-24 04:14:53.835156: step: 166/463, loss: 0.015149606391787529 2023-01-24 04:14:54.430362: step: 168/463, loss: 0.07000407576560974 2023-01-24 04:14:55.000712: step: 170/463, loss: 0.0004015228769276291 2023-01-24 04:14:55.596132: step: 172/463, loss: 0.09126792848110199 2023-01-24 04:14:56.182512: step: 174/463, loss: 0.025713082402944565 2023-01-24 04:14:56.831811: step: 176/463, loss: 0.02312450483441353 2023-01-24 04:14:57.474583: step: 178/463, loss: 0.005459593143314123 2023-01-24 04:14:58.136952: step: 180/463, loss: 1.098474383354187 2023-01-24 04:14:58.738123: step: 182/463, loss: 0.010516777634620667 2023-01-24 04:14:59.336103: step: 184/463, loss: 0.007044765166938305 2023-01-24 04:15:00.019592: step: 186/463, loss: 0.025057977065443993 2023-01-24 04:15:00.653528: step: 188/463, loss: 0.18140362203121185 2023-01-24 04:15:01.278981: step: 190/463, loss: 0.005479200277477503 2023-01-24 04:15:01.898176: step: 192/463, loss: 0.003280727192759514 2023-01-24 04:15:02.573727: step: 194/463, loss: 0.12403030693531036 2023-01-24 04:15:03.207996: step: 196/463, loss: 0.008641340769827366 2023-01-24 04:15:03.794438: step: 198/463, loss: 0.001279389951378107 2023-01-24 04:15:04.393618: step: 200/463, loss: 0.00016722716100048274 2023-01-24 04:15:04.992524: step: 202/463, loss: 0.0009838627884164453 2023-01-24 04:15:05.611790: step: 204/463, loss: 0.001831158879213035 2023-01-24 04:15:06.149866: step: 206/463, loss: 0.06412768363952637 2023-01-24 04:15:06.734561: step: 208/463, loss: 0.0002927652676589787 2023-01-24 04:15:07.380774: step: 210/463, loss: 0.03261362761259079 2023-01-24 04:15:08.022165: step: 212/463, loss: 0.004331209231168032 2023-01-24 04:15:08.631572: step: 214/463, loss: 0.0008582014706917107 2023-01-24 04:15:09.245305: step: 216/463, loss: 0.0013344939798116684 2023-01-24 04:15:09.937669: step: 218/463, loss: 0.009280706755816936 2023-01-24 04:15:10.556949: step: 220/463, loss: 0.022709403187036514 2023-01-24 04:15:11.162237: step: 222/463, loss: 0.0003444275353103876 2023-01-24 04:15:11.699917: step: 224/463, loss: 0.008144727908074856 2023-01-24 04:15:12.272472: step: 226/463, loss: 0.06918017566204071 2023-01-24 04:15:12.874512: step: 228/463, loss: 0.003187294816598296 2023-01-24 04:15:13.488062: step: 230/463, loss: 0.02032507210969925 2023-01-24 04:15:14.071262: step: 232/463, loss: 0.07200367748737335 2023-01-24 04:15:14.677145: step: 234/463, loss: 0.05037083476781845 2023-01-24 04:15:15.297923: step: 236/463, loss: 0.0007847630186006427 2023-01-24 04:15:15.891425: step: 238/463, loss: 0.008002848364412785 2023-01-24 04:15:16.522664: step: 240/463, loss: 0.0011279461905360222 2023-01-24 04:15:17.094252: step: 242/463, loss: 0.02964147925376892 2023-01-24 04:15:17.718526: step: 244/463, loss: 0.008962658233940601 2023-01-24 04:15:18.341027: step: 246/463, loss: 0.020298093557357788 2023-01-24 04:15:18.915147: step: 248/463, loss: 0.022281860932707787 2023-01-24 04:15:19.524441: step: 250/463, loss: 0.021841777488589287 2023-01-24 04:15:20.089860: step: 252/463, loss: 0.029822684824466705 2023-01-24 04:15:20.752810: step: 254/463, loss: 0.21131698787212372 2023-01-24 04:15:21.373500: step: 256/463, loss: 0.02831444889307022 2023-01-24 04:15:21.916178: step: 258/463, loss: 8.886739669833332e-05 2023-01-24 04:15:22.427655: step: 260/463, loss: 0.003187543712556362 2023-01-24 04:15:23.024113: step: 262/463, loss: 0.017826393246650696 2023-01-24 04:15:23.601789: step: 264/463, loss: 0.0058879125863313675 2023-01-24 04:15:24.252249: step: 266/463, loss: 0.005320696625858545 2023-01-24 04:15:24.853807: step: 268/463, loss: 0.03186306729912758 2023-01-24 04:15:25.528047: step: 270/463, loss: 0.07570669054985046 2023-01-24 04:15:26.195394: step: 272/463, loss: 6.97263385518454e-05 2023-01-24 04:15:26.790786: step: 274/463, loss: 0.05834873020648956 2023-01-24 04:15:27.356178: step: 276/463, loss: 0.00018535653362050653 2023-01-24 04:15:27.940569: step: 278/463, loss: 0.04679768905043602 2023-01-24 04:15:28.514331: step: 280/463, loss: 0.0043772365897893906 2023-01-24 04:15:29.150830: step: 282/463, loss: 0.01356886699795723 2023-01-24 04:15:29.750562: step: 284/463, loss: 0.0028775909449905157 2023-01-24 04:15:30.383330: step: 286/463, loss: 0.0021698339842259884 2023-01-24 04:15:30.983275: step: 288/463, loss: 0.0012278464855626225 2023-01-24 04:15:31.628695: step: 290/463, loss: 0.11959181725978851 2023-01-24 04:15:32.214625: step: 292/463, loss: 0.004736327566206455 2023-01-24 04:15:32.855737: step: 294/463, loss: 0.00015975927817635238 2023-01-24 04:15:33.467330: step: 296/463, loss: 0.027367424219846725 2023-01-24 04:15:34.109362: step: 298/463, loss: 0.0012036832049489021 2023-01-24 04:15:34.737338: step: 300/463, loss: 0.03683348000049591 2023-01-24 04:15:35.377286: step: 302/463, loss: 0.0276046022772789 2023-01-24 04:15:36.021963: step: 304/463, loss: 4.63661017420236e-05 2023-01-24 04:15:36.679502: step: 306/463, loss: 0.03613134101033211 2023-01-24 04:15:37.245678: step: 308/463, loss: 0.09685475379228592 2023-01-24 04:15:37.888027: step: 310/463, loss: 0.01267706137150526 2023-01-24 04:15:38.433236: step: 312/463, loss: 0.00018830002227332443 2023-01-24 04:15:39.037747: step: 314/463, loss: 0.012970653362572193 2023-01-24 04:15:39.605462: step: 316/463, loss: 0.0019017593003809452 2023-01-24 04:15:40.276196: step: 318/463, loss: 0.00013035692973062396 2023-01-24 04:15:41.046643: step: 320/463, loss: 0.001292020664550364 2023-01-24 04:15:41.631155: step: 322/463, loss: 0.012800169177353382 2023-01-24 04:15:42.206728: step: 324/463, loss: 0.02842739410698414 2023-01-24 04:15:42.875246: step: 326/463, loss: 0.0073278117924928665 2023-01-24 04:15:43.510454: step: 328/463, loss: 0.006085410248488188 2023-01-24 04:15:44.099466: step: 330/463, loss: 0.04722003638744354 2023-01-24 04:15:44.627739: step: 332/463, loss: 0.003975823987275362 2023-01-24 04:15:45.216075: step: 334/463, loss: 0.0044944193214178085 2023-01-24 04:15:45.843992: step: 336/463, loss: 0.006745536811649799 2023-01-24 04:15:46.427992: step: 338/463, loss: 0.0035184926819056273 2023-01-24 04:15:47.100617: step: 340/463, loss: 0.006379464641213417 2023-01-24 04:15:47.664474: step: 342/463, loss: 0.0035234037786722183 2023-01-24 04:15:48.250783: step: 344/463, loss: 0.009157435037195683 2023-01-24 04:15:48.912941: step: 346/463, loss: 0.039795663207769394 2023-01-24 04:15:49.537374: step: 348/463, loss: 0.03033830225467682 2023-01-24 04:15:50.146522: step: 350/463, loss: 0.012800659984350204 2023-01-24 04:15:50.773521: step: 352/463, loss: 0.004381866194307804 2023-01-24 04:15:51.446161: step: 354/463, loss: 0.02245953306555748 2023-01-24 04:15:52.077707: step: 356/463, loss: 0.035399630665779114 2023-01-24 04:15:52.628226: step: 358/463, loss: 0.0006448173662647605 2023-01-24 04:15:53.271231: step: 360/463, loss: 0.02219400554895401 2023-01-24 04:15:53.910184: step: 362/463, loss: 0.0007372766849584877 2023-01-24 04:15:54.544222: step: 364/463, loss: 0.035487107932567596 2023-01-24 04:15:55.208932: step: 366/463, loss: 0.00020207747002132237 2023-01-24 04:15:55.817303: step: 368/463, loss: 0.009313790127635002 2023-01-24 04:15:56.463573: step: 370/463, loss: 0.013273043558001518 2023-01-24 04:15:57.094486: step: 372/463, loss: 0.0069611091166734695 2023-01-24 04:15:57.737246: step: 374/463, loss: 0.0461600087583065 2023-01-24 04:15:58.419115: step: 376/463, loss: 0.02742009051144123 2023-01-24 04:15:59.040888: step: 378/463, loss: 0.01643667370080948 2023-01-24 04:15:59.673278: step: 380/463, loss: 0.005410562735050917 2023-01-24 04:16:00.349714: step: 382/463, loss: 0.06586920469999313 2023-01-24 04:16:00.977637: step: 384/463, loss: 0.005287934560328722 2023-01-24 04:16:01.586424: step: 386/463, loss: 0.09115124493837357 2023-01-24 04:16:02.156211: step: 388/463, loss: 0.0028624183032661676 2023-01-24 04:16:02.799523: step: 390/463, loss: 0.17645984888076782 2023-01-24 04:16:03.327049: step: 392/463, loss: 0.009274193085730076 2023-01-24 04:16:03.870612: step: 394/463, loss: 0.0003609932027757168 2023-01-24 04:16:04.485225: step: 396/463, loss: 0.01095089502632618 2023-01-24 04:16:05.132117: step: 398/463, loss: 0.027245143428444862 2023-01-24 04:16:05.719485: step: 400/463, loss: 0.02114170975983143 2023-01-24 04:16:06.379373: step: 402/463, loss: 0.03707890585064888 2023-01-24 04:16:07.023690: step: 404/463, loss: 0.01049686037003994 2023-01-24 04:16:07.592289: step: 406/463, loss: 0.047573719173669815 2023-01-24 04:16:08.203036: step: 408/463, loss: 0.021048221737146378 2023-01-24 04:16:08.856245: step: 410/463, loss: 0.005747769959270954 2023-01-24 04:16:09.438018: step: 412/463, loss: 0.004609661176800728 2023-01-24 04:16:10.054861: step: 414/463, loss: 0.006279991939663887 2023-01-24 04:16:10.736377: step: 416/463, loss: 0.08118898421525955 2023-01-24 04:16:11.348901: step: 418/463, loss: 0.009789901785552502 2023-01-24 04:16:11.952158: step: 420/463, loss: 0.003455519210547209 2023-01-24 04:16:12.562818: step: 422/463, loss: 0.031343795359134674 2023-01-24 04:16:13.143278: step: 424/463, loss: 0.018997281789779663 2023-01-24 04:16:13.756939: step: 426/463, loss: 0.05251457914710045 2023-01-24 04:16:14.362328: step: 428/463, loss: 0.010864072479307652 2023-01-24 04:16:14.981723: step: 430/463, loss: 0.03742692619562149 2023-01-24 04:16:15.626809: step: 432/463, loss: 0.0031419347506016493 2023-01-24 04:16:16.337413: step: 434/463, loss: 0.031105373054742813 2023-01-24 04:16:16.885459: step: 436/463, loss: 0.00456908019259572 2023-01-24 04:16:17.548989: step: 438/463, loss: 0.005175802391022444 2023-01-24 04:16:18.157576: step: 440/463, loss: 0.06676509976387024 2023-01-24 04:16:18.820856: step: 442/463, loss: 0.05279666557908058 2023-01-24 04:16:19.448230: step: 444/463, loss: 6.657070480287075e-05 2023-01-24 04:16:20.054015: step: 446/463, loss: 0.006009325850754976 2023-01-24 04:16:20.696093: step: 448/463, loss: 0.21648679673671722 2023-01-24 04:16:21.292740: step: 450/463, loss: 0.04684234410524368 2023-01-24 04:16:21.876421: step: 452/463, loss: 0.012322206050157547 2023-01-24 04:16:22.429762: step: 454/463, loss: 0.02403104491531849 2023-01-24 04:16:23.028403: step: 456/463, loss: 0.004016124177724123 2023-01-24 04:16:23.607868: step: 458/463, loss: 0.012978221289813519 2023-01-24 04:16:24.170573: step: 460/463, loss: 0.01576056145131588 2023-01-24 04:16:24.749502: step: 462/463, loss: 0.014221275225281715 2023-01-24 04:16:25.420303: step: 464/463, loss: 0.011870700865983963 2023-01-24 04:16:26.115361: step: 466/463, loss: 0.0004337652353569865 2023-01-24 04:16:26.797576: step: 468/463, loss: 3.930867023882456e-05 2023-01-24 04:16:27.369088: step: 470/463, loss: 0.03720438480377197 2023-01-24 04:16:27.975525: step: 472/463, loss: 0.00041053217137232423 2023-01-24 04:16:28.601363: step: 474/463, loss: 0.0030586852226406336 2023-01-24 04:16:29.199209: step: 476/463, loss: 0.017206983640789986 2023-01-24 04:16:29.803999: step: 478/463, loss: 0.005409649573266506 2023-01-24 04:16:30.446285: step: 480/463, loss: 0.0010311121586710215 2023-01-24 04:16:31.085994: step: 482/463, loss: 0.0032840233761817217 2023-01-24 04:16:31.650745: step: 484/463, loss: 0.0017206562915816903 2023-01-24 04:16:32.228780: step: 486/463, loss: 0.1412554830312729 2023-01-24 04:16:32.838293: step: 488/463, loss: 0.009815244935452938 2023-01-24 04:16:33.453853: step: 490/463, loss: 0.0005244537605904043 2023-01-24 04:16:34.074283: step: 492/463, loss: 0.01462055929005146 2023-01-24 04:16:34.729273: step: 494/463, loss: 0.0033912458457052708 2023-01-24 04:16:35.312727: step: 496/463, loss: 0.0027820607647299767 2023-01-24 04:16:35.927500: step: 498/463, loss: 0.019720012322068214 2023-01-24 04:16:36.538329: step: 500/463, loss: 0.017409952357411385 2023-01-24 04:16:37.176530: step: 502/463, loss: 0.0034055879805237055 2023-01-24 04:16:37.798483: step: 504/463, loss: 0.0019153524190187454 2023-01-24 04:16:38.418242: step: 506/463, loss: 0.008466927334666252 2023-01-24 04:16:39.008924: step: 508/463, loss: 0.0380420945584774 2023-01-24 04:16:39.688794: step: 510/463, loss: 0.004329627379775047 2023-01-24 04:16:40.336303: step: 512/463, loss: 0.005321603734046221 2023-01-24 04:16:40.888012: step: 514/463, loss: 0.009746571071445942 2023-01-24 04:16:41.475958: step: 516/463, loss: 0.0038170150946825743 2023-01-24 04:16:42.085100: step: 518/463, loss: 0.001492805196903646 2023-01-24 04:16:42.716450: step: 520/463, loss: 0.05219261348247528 2023-01-24 04:16:43.346757: step: 522/463, loss: 0.004959399346262217 2023-01-24 04:16:43.946669: step: 524/463, loss: 0.025648441165685654 2023-01-24 04:16:44.471281: step: 526/463, loss: 0.032058265060186386 2023-01-24 04:16:45.040508: step: 528/463, loss: 0.00040168166742660105 2023-01-24 04:16:45.664002: step: 530/463, loss: 0.0010049269767478108 2023-01-24 04:16:46.205173: step: 532/463, loss: 0.009779085405170918 2023-01-24 04:16:46.785577: step: 534/463, loss: 0.0034142315853387117 2023-01-24 04:16:47.351836: step: 536/463, loss: 0.01920711062848568 2023-01-24 04:16:47.931144: step: 538/463, loss: 0.5397843718528748 2023-01-24 04:16:48.438697: step: 540/463, loss: 0.08125152438879013 2023-01-24 04:16:49.040259: step: 542/463, loss: 0.04342392832040787 2023-01-24 04:16:49.645463: step: 544/463, loss: 0.003960686270147562 2023-01-24 04:16:50.244802: step: 546/463, loss: 0.02034965343773365 2023-01-24 04:16:50.829792: step: 548/463, loss: 0.007116594351828098 2023-01-24 04:16:51.472360: step: 550/463, loss: 0.015324195846915245 2023-01-24 04:16:52.044236: step: 552/463, loss: 0.02763814851641655 2023-01-24 04:16:52.636624: step: 554/463, loss: 0.1192675307393074 2023-01-24 04:16:53.227696: step: 556/463, loss: 0.3865557909011841 2023-01-24 04:16:53.835233: step: 558/463, loss: 0.016509568318724632 2023-01-24 04:16:54.418648: step: 560/463, loss: 0.0031381144654005766 2023-01-24 04:16:55.043023: step: 562/463, loss: 0.016119441017508507 2023-01-24 04:16:55.720074: step: 564/463, loss: 0.004178203176707029 2023-01-24 04:16:56.319810: step: 566/463, loss: 0.003621061099693179 2023-01-24 04:16:56.900916: step: 568/463, loss: 0.021494220942258835 2023-01-24 04:16:57.603860: step: 570/463, loss: 0.022982075810432434 2023-01-24 04:16:58.152593: step: 572/463, loss: 0.0028020706959068775 2023-01-24 04:16:58.762541: step: 574/463, loss: 0.012720122002065182 2023-01-24 04:16:59.342303: step: 576/463, loss: 0.19362422823905945 2023-01-24 04:16:59.958345: step: 578/463, loss: 0.03292255103588104 2023-01-24 04:17:00.584915: step: 580/463, loss: 0.029395947232842445 2023-01-24 04:17:01.177528: step: 582/463, loss: 0.017274651676416397 2023-01-24 04:17:01.817531: step: 584/463, loss: 0.0035893472377210855 2023-01-24 04:17:02.398058: step: 586/463, loss: 0.03905964642763138 2023-01-24 04:17:03.014881: step: 588/463, loss: 0.0022449661046266556 2023-01-24 04:17:03.530165: step: 590/463, loss: 0.038851529359817505 2023-01-24 04:17:04.109019: step: 592/463, loss: 0.010086464695632458 2023-01-24 04:17:04.723771: step: 594/463, loss: 0.00311562092974782 2023-01-24 04:17:05.338627: step: 596/463, loss: 0.0116587458178401 2023-01-24 04:17:05.928134: step: 598/463, loss: 0.008730889298021793 2023-01-24 04:17:06.516526: step: 600/463, loss: 0.012512135319411755 2023-01-24 04:17:07.091797: step: 602/463, loss: 0.002097171498462558 2023-01-24 04:17:07.787352: step: 604/463, loss: 0.01029769703745842 2023-01-24 04:17:08.375844: step: 606/463, loss: 0.0010419189929962158 2023-01-24 04:17:08.964119: step: 608/463, loss: 0.00022850008099339902 2023-01-24 04:17:09.560655: step: 610/463, loss: 0.025674065575003624 2023-01-24 04:17:10.205289: step: 612/463, loss: 0.004144778475165367 2023-01-24 04:17:10.775677: step: 614/463, loss: 0.02449372410774231 2023-01-24 04:17:11.395805: step: 616/463, loss: 0.09946693480014801 2023-01-24 04:17:11.959987: step: 618/463, loss: 0.02570274844765663 2023-01-24 04:17:12.620161: step: 620/463, loss: 0.011625581420958042 2023-01-24 04:17:13.247988: step: 622/463, loss: 0.0017208412755280733 2023-01-24 04:17:13.831342: step: 624/463, loss: 0.01203273143619299 2023-01-24 04:17:14.448110: step: 626/463, loss: 0.027488011866807938 2023-01-24 04:17:15.010599: step: 628/463, loss: 0.003213544376194477 2023-01-24 04:17:15.603498: step: 630/463, loss: 0.00025309988996014 2023-01-24 04:17:16.282115: step: 632/463, loss: 0.06430121511220932 2023-01-24 04:17:16.938401: step: 634/463, loss: 0.0026590453926473856 2023-01-24 04:17:17.521541: step: 636/463, loss: 0.0043989322148263454 2023-01-24 04:17:18.163049: step: 638/463, loss: 0.002246115356683731 2023-01-24 04:17:18.800902: step: 640/463, loss: 0.4990261197090149 2023-01-24 04:17:19.488125: step: 642/463, loss: 0.02360442280769348 2023-01-24 04:17:20.035507: step: 644/463, loss: 0.0037420173175632954 2023-01-24 04:17:20.575806: step: 646/463, loss: 0.005335619673132896 2023-01-24 04:17:21.180692: step: 648/463, loss: 0.0038639490958303213 2023-01-24 04:17:21.775004: step: 650/463, loss: 0.03851168975234032 2023-01-24 04:17:22.369889: step: 652/463, loss: 0.00371488812379539 2023-01-24 04:17:23.027499: step: 654/463, loss: 0.017145732417702675 2023-01-24 04:17:23.649625: step: 656/463, loss: 0.035568736493587494 2023-01-24 04:17:24.216240: step: 658/463, loss: 0.00021081640443298966 2023-01-24 04:17:24.729141: step: 660/463, loss: 0.011847691610455513 2023-01-24 04:17:25.370233: step: 662/463, loss: 0.06191055476665497 2023-01-24 04:17:25.988564: step: 664/463, loss: 0.041682902723550797 2023-01-24 04:17:26.637082: step: 666/463, loss: 0.027384614571928978 2023-01-24 04:17:27.244758: step: 668/463, loss: 0.002008102834224701 2023-01-24 04:17:27.868561: step: 670/463, loss: 0.0504489503800869 2023-01-24 04:17:28.467182: step: 672/463, loss: 0.006112492643296719 2023-01-24 04:17:29.040053: step: 674/463, loss: 0.052975572645664215 2023-01-24 04:17:29.617691: step: 676/463, loss: 0.003929234109818935 2023-01-24 04:17:30.261533: step: 678/463, loss: 0.008626963943243027 2023-01-24 04:17:30.846058: step: 680/463, loss: 0.007996519096195698 2023-01-24 04:17:31.501037: step: 682/463, loss: 0.00557350181043148 2023-01-24 04:17:32.129119: step: 684/463, loss: 0.0032443313393741846 2023-01-24 04:17:32.735672: step: 686/463, loss: 0.00299658696167171 2023-01-24 04:17:33.323229: step: 688/463, loss: 0.06638474762439728 2023-01-24 04:17:33.909425: step: 690/463, loss: 0.0025290795601904392 2023-01-24 04:17:34.525797: step: 692/463, loss: 0.002020661486312747 2023-01-24 04:17:35.142262: step: 694/463, loss: 0.009624073281884193 2023-01-24 04:17:35.843477: step: 696/463, loss: 0.0036236043088138103 2023-01-24 04:17:36.548580: step: 698/463, loss: 0.012569190002977848 2023-01-24 04:17:37.121607: step: 700/463, loss: 0.012405741959810257 2023-01-24 04:17:37.693107: step: 702/463, loss: 0.015494809485971928 2023-01-24 04:17:38.227523: step: 704/463, loss: 0.2980192303657532 2023-01-24 04:17:38.846566: step: 706/463, loss: 0.0023888051509857178 2023-01-24 04:17:39.422801: step: 708/463, loss: 0.0007658750400878489 2023-01-24 04:17:40.044310: step: 710/463, loss: 0.009567346423864365 2023-01-24 04:17:40.617887: step: 712/463, loss: 0.0017552217468619347 2023-01-24 04:17:41.231518: step: 714/463, loss: 0.005241338163614273 2023-01-24 04:17:41.909098: step: 716/463, loss: 0.012667390517890453 2023-01-24 04:17:42.537720: step: 718/463, loss: 0.10796917229890823 2023-01-24 04:17:43.185763: step: 720/463, loss: 0.026891672983765602 2023-01-24 04:17:43.838023: step: 722/463, loss: 0.00013931158173363656 2023-01-24 04:17:44.431279: step: 724/463, loss: 0.004255113657563925 2023-01-24 04:17:44.981627: step: 726/463, loss: 0.0016220025718212128 2023-01-24 04:17:45.529891: step: 728/463, loss: 0.054670292884111404 2023-01-24 04:17:46.137350: step: 730/463, loss: 0.022167248651385307 2023-01-24 04:17:46.771985: step: 732/463, loss: 0.0018162551568821073 2023-01-24 04:17:47.359440: step: 734/463, loss: 0.007134732324630022 2023-01-24 04:17:47.959904: step: 736/463, loss: 0.005410294514149427 2023-01-24 04:17:48.555178: step: 738/463, loss: 0.22042253613471985 2023-01-24 04:17:49.177329: step: 740/463, loss: 0.023468226194381714 2023-01-24 04:17:49.735789: step: 742/463, loss: 0.00027941924054175615 2023-01-24 04:17:50.322753: step: 744/463, loss: 0.00024287303676828742 2023-01-24 04:17:50.996236: step: 746/463, loss: 0.017278704792261124 2023-01-24 04:17:51.612107: step: 748/463, loss: 0.03500797972083092 2023-01-24 04:17:52.269195: step: 750/463, loss: 0.796728789806366 2023-01-24 04:17:52.916632: step: 752/463, loss: 0.0005052015767432749 2023-01-24 04:17:53.451744: step: 754/463, loss: 0.006026283837854862 2023-01-24 04:17:54.105053: step: 756/463, loss: 0.006658461876213551 2023-01-24 04:17:54.741973: step: 758/463, loss: 0.0018847661558538675 2023-01-24 04:17:55.329277: step: 760/463, loss: 0.06354539841413498 2023-01-24 04:17:55.928578: step: 762/463, loss: 0.002102949656546116 2023-01-24 04:17:56.525319: step: 764/463, loss: 0.011750219389796257 2023-01-24 04:17:57.219107: step: 766/463, loss: 0.0011926926672458649 2023-01-24 04:17:57.821479: step: 768/463, loss: 0.008566496893763542 2023-01-24 04:17:58.433565: step: 770/463, loss: 0.014147879555821419 2023-01-24 04:17:59.009486: step: 772/463, loss: 0.0026623227167874575 2023-01-24 04:17:59.637725: step: 774/463, loss: 0.013729373924434185 2023-01-24 04:18:00.317189: step: 776/463, loss: 0.053739797323942184 2023-01-24 04:18:00.959035: step: 778/463, loss: 0.021200843155384064 2023-01-24 04:18:01.606907: step: 780/463, loss: 0.17194998264312744 2023-01-24 04:18:02.151021: step: 782/463, loss: 0.0033063823357224464 2023-01-24 04:18:02.744884: step: 784/463, loss: 0.023603692650794983 2023-01-24 04:18:03.345050: step: 786/463, loss: 0.024756046012043953 2023-01-24 04:18:03.963332: step: 788/463, loss: 0.07934191823005676 2023-01-24 04:18:04.639516: step: 790/463, loss: 0.00034389994107186794 2023-01-24 04:18:05.293197: step: 792/463, loss: 0.0025236739311367273 2023-01-24 04:18:05.871082: step: 794/463, loss: 0.027777913957834244 2023-01-24 04:18:06.459469: step: 796/463, loss: 0.026156937703490257 2023-01-24 04:18:07.032369: step: 798/463, loss: 0.10625739395618439 2023-01-24 04:18:07.709319: step: 800/463, loss: 0.007496539503335953 2023-01-24 04:18:08.288052: step: 802/463, loss: 0.008351575583219528 2023-01-24 04:18:08.847927: step: 804/463, loss: 0.0009183790534734726 2023-01-24 04:18:09.550793: step: 806/463, loss: 0.0338217131793499 2023-01-24 04:18:10.163571: step: 808/463, loss: 0.058468520641326904 2023-01-24 04:18:10.778936: step: 810/463, loss: 0.004259915091097355 2023-01-24 04:18:11.485813: step: 812/463, loss: 0.0012492173118516803 2023-01-24 04:18:12.115449: step: 814/463, loss: 0.20168477296829224 2023-01-24 04:18:12.814545: step: 816/463, loss: 0.026671335101127625 2023-01-24 04:18:13.367216: step: 818/463, loss: 0.00018686223484110087 2023-01-24 04:18:13.990386: step: 820/463, loss: 0.0009451408986933529 2023-01-24 04:18:14.630299: step: 822/463, loss: 0.0052192420698702335 2023-01-24 04:18:15.319968: step: 824/463, loss: 0.027328137308359146 2023-01-24 04:18:15.933470: step: 826/463, loss: 0.0033348642755299807 2023-01-24 04:18:16.503754: step: 828/463, loss: 0.0016838399460539222 2023-01-24 04:18:17.159180: step: 830/463, loss: 0.0015164094511419535 2023-01-24 04:18:17.800996: step: 832/463, loss: 0.00967051088809967 2023-01-24 04:18:18.375112: step: 834/463, loss: 0.08464853465557098 2023-01-24 04:18:18.997777: step: 836/463, loss: 0.0023542698472738266 2023-01-24 04:18:19.588374: step: 838/463, loss: 0.004748101811856031 2023-01-24 04:18:20.157635: step: 840/463, loss: 0.0015349648892879486 2023-01-24 04:18:20.781248: step: 842/463, loss: 0.1834559589624405 2023-01-24 04:18:21.315986: step: 844/463, loss: 0.0003381153510417789 2023-01-24 04:18:21.906166: step: 846/463, loss: 0.004294493701308966 2023-01-24 04:18:22.448542: step: 848/463, loss: 0.01123901642858982 2023-01-24 04:18:23.069105: step: 850/463, loss: 9.217923798132688e-05 2023-01-24 04:18:23.645720: step: 852/463, loss: 0.03025658056139946 2023-01-24 04:18:24.290979: step: 854/463, loss: 0.005184739828109741 2023-01-24 04:18:24.855812: step: 856/463, loss: 7.097554771462455e-05 2023-01-24 04:18:25.452754: step: 858/463, loss: 0.0007093837484717369 2023-01-24 04:18:26.103019: step: 860/463, loss: 0.013053419068455696 2023-01-24 04:18:26.708719: step: 862/463, loss: 0.010157177224755287 2023-01-24 04:18:27.378868: step: 864/463, loss: 0.001715026912279427 2023-01-24 04:18:27.987702: step: 866/463, loss: 0.1343306303024292 2023-01-24 04:18:28.596130: step: 868/463, loss: 0.007917540147900581 2023-01-24 04:18:29.249832: step: 870/463, loss: 0.08077823370695114 2023-01-24 04:18:29.840335: step: 872/463, loss: 0.003051335923373699 2023-01-24 04:18:30.406009: step: 874/463, loss: 0.24387234449386597 2023-01-24 04:18:30.980651: step: 876/463, loss: 0.01877758465707302 2023-01-24 04:18:31.585301: step: 878/463, loss: 0.18816280364990234 2023-01-24 04:18:32.140953: step: 880/463, loss: 0.011968160048127174 2023-01-24 04:18:32.749544: step: 882/463, loss: 0.028890138491988182 2023-01-24 04:18:33.364186: step: 884/463, loss: 0.020444074645638466 2023-01-24 04:18:34.000704: step: 886/463, loss: 0.002560785971581936 2023-01-24 04:18:34.601359: step: 888/463, loss: 0.002157779410481453 2023-01-24 04:18:35.182734: step: 890/463, loss: 0.04258251562714577 2023-01-24 04:18:35.763191: step: 892/463, loss: 0.0030910021159797907 2023-01-24 04:18:36.342613: step: 894/463, loss: 0.0004493285669013858 2023-01-24 04:18:37.016292: step: 896/463, loss: 0.031631145626306534 2023-01-24 04:18:37.636446: step: 898/463, loss: 0.005111501086503267 2023-01-24 04:18:38.252031: step: 900/463, loss: 0.17092856764793396 2023-01-24 04:18:38.834117: step: 902/463, loss: 0.0042287507094442844 2023-01-24 04:18:39.471346: step: 904/463, loss: 0.01966959983110428 2023-01-24 04:18:40.055370: step: 906/463, loss: 0.000968066742643714 2023-01-24 04:18:40.706076: step: 908/463, loss: 0.009539997205138206 2023-01-24 04:18:41.267009: step: 910/463, loss: 0.004551413934677839 2023-01-24 04:18:41.877876: step: 912/463, loss: 0.07617460191249847 2023-01-24 04:18:42.411753: step: 914/463, loss: 0.0019033390562981367 2023-01-24 04:18:43.000084: step: 916/463, loss: 0.001804685452952981 2023-01-24 04:18:43.582808: step: 918/463, loss: 0.0009741802350617945 2023-01-24 04:18:44.181946: step: 920/463, loss: 0.015898147597908974 2023-01-24 04:18:44.809983: step: 922/463, loss: 0.004721301142126322 2023-01-24 04:18:45.491353: step: 924/463, loss: 0.03736497089266777 2023-01-24 04:18:46.068753: step: 926/463, loss: 0.0020374306477606297 ================================================== Loss: 0.033 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3302065617913832, 'r': 0.3157952697207915, 'f1': 0.32284016904530966}, 'combined': 0.23788222982285973, 'epoch': 33} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.36751651277620545, 'r': 0.31423464280166385, 'f1': 0.3387934708172365}, 'combined': 0.23834716539906087, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3298659759358289, 'r': 0.3129658215709952, 'f1': 0.32119374482553936}, 'combined': 0.23666907513460794, 'epoch': 33} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3677399230923137, 'r': 0.3108927908762966, 'f1': 0.336935395696507}, 'combined': 0.23922413094451994, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3408056500589437, 'r': 0.32916522937381854, 'f1': 0.3348843163706609}, 'combined': 0.24675686469417119, 'epoch': 33} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37311192316289277, 'r': 0.29881540047194116, 'f1': 0.3318560946075389}, 'combined': 0.2356178271713526, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.2952380952380952, 'f1': 0.3131313131313131}, 'combined': 0.20875420875420872, 'epoch': 33} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33653846153846156, 'r': 0.3804347826086957, 'f1': 0.35714285714285715}, 'combined': 0.17857142857142858, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.13793103448275862, 'f1': 0.2}, 'combined': 0.13333333333333333, 'epoch': 33} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3394620582120582, 'r': 0.3092447916666667, 'f1': 0.32364965312190286}, 'combined': 0.23847869177403366, 'epoch': 7} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36468931207169214, 'r': 0.2587193810770381, 'f1': 0.30269771384817323}, 'combined': 0.214915376832203, 'epoch': 7} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 7} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:21:17.894076: step: 2/463, loss: 0.029304951429367065 2023-01-24 04:21:18.508963: step: 4/463, loss: 0.007931295782327652 2023-01-24 04:21:19.083446: step: 6/463, loss: 0.0007992827449925244 2023-01-24 04:21:19.722287: step: 8/463, loss: 0.014037723653018475 2023-01-24 04:21:20.319653: step: 10/463, loss: 0.009433233179152012 2023-01-24 04:21:21.023564: step: 12/463, loss: 0.0995924100279808 2023-01-24 04:21:21.656490: step: 14/463, loss: 0.016255252063274384 2023-01-24 04:21:22.298308: step: 16/463, loss: 0.0031666599679738283 2023-01-24 04:21:22.922937: step: 18/463, loss: 0.002274454338476062 2023-01-24 04:21:23.538009: step: 20/463, loss: 0.013991070911288261 2023-01-24 04:21:24.107884: step: 22/463, loss: 0.00034361015423201025 2023-01-24 04:21:24.717702: step: 24/463, loss: 0.005209648050367832 2023-01-24 04:21:25.248961: step: 26/463, loss: 0.0010698563419282436 2023-01-24 04:21:25.820928: step: 28/463, loss: 0.0002590560761746019 2023-01-24 04:21:26.474809: step: 30/463, loss: 0.013892616145312786 2023-01-24 04:21:27.059383: step: 32/463, loss: 0.05545768886804581 2023-01-24 04:21:27.642660: step: 34/463, loss: 0.010394544340670109 2023-01-24 04:21:28.239665: step: 36/463, loss: 0.001028071390464902 2023-01-24 04:21:28.850844: step: 38/463, loss: 0.013471875339746475 2023-01-24 04:21:29.498319: step: 40/463, loss: 0.01951676420867443 2023-01-24 04:21:30.069726: step: 42/463, loss: 0.0008970889030024409 2023-01-24 04:21:30.635274: step: 44/463, loss: 0.009694176726043224 2023-01-24 04:21:31.240684: step: 46/463, loss: 0.0049149636179208755 2023-01-24 04:21:31.918838: step: 48/463, loss: 0.01297727506607771 2023-01-24 04:21:32.477819: step: 50/463, loss: 0.00391235901042819 2023-01-24 04:21:33.071858: step: 52/463, loss: 0.0001812854316085577 2023-01-24 04:21:33.684551: step: 54/463, loss: 0.0027035123202949762 2023-01-24 04:21:34.352570: step: 56/463, loss: 0.03743349760770798 2023-01-24 04:21:34.908964: step: 58/463, loss: 0.0017144337762147188 2023-01-24 04:21:35.473936: step: 60/463, loss: 21.491796493530273 2023-01-24 04:21:36.087289: step: 62/463, loss: 0.00410600146278739 2023-01-24 04:21:36.685315: step: 64/463, loss: 0.02777678146958351 2023-01-24 04:21:37.265740: step: 66/463, loss: 0.015230216085910797 2023-01-24 04:21:37.832657: step: 68/463, loss: 0.007852450013160706 2023-01-24 04:21:38.471109: step: 70/463, loss: 0.02570335939526558 2023-01-24 04:21:39.062361: step: 72/463, loss: 0.011742936447262764 2023-01-24 04:21:39.665375: step: 74/463, loss: 0.025805555284023285 2023-01-24 04:21:40.258547: step: 76/463, loss: 0.0020883132237941027 2023-01-24 04:21:40.848594: step: 78/463, loss: 0.0029377825558185577 2023-01-24 04:21:41.537520: step: 80/463, loss: 0.008978652767837048 2023-01-24 04:21:42.156393: step: 82/463, loss: 0.0670977532863617 2023-01-24 04:21:42.824360: step: 84/463, loss: 0.0019001472974196076 2023-01-24 04:21:43.395829: step: 86/463, loss: 0.000321685045491904 2023-01-24 04:21:43.975876: step: 88/463, loss: 0.01526445709168911 2023-01-24 04:21:44.539601: step: 90/463, loss: 0.007875067181885242 2023-01-24 04:21:45.170034: step: 92/463, loss: 0.018254505470395088 2023-01-24 04:21:45.776870: step: 94/463, loss: 0.015650613233447075 2023-01-24 04:21:46.320987: step: 96/463, loss: 0.010977067984640598 2023-01-24 04:21:46.869200: step: 98/463, loss: 0.009907637722790241 2023-01-24 04:21:47.427861: step: 100/463, loss: 0.0019805775955319405 2023-01-24 04:21:47.978957: step: 102/463, loss: 0.018707267940044403 2023-01-24 04:21:48.603779: step: 104/463, loss: 0.00346344499848783 2023-01-24 04:21:49.147965: step: 106/463, loss: 0.0015283458633348346 2023-01-24 04:21:49.718262: step: 108/463, loss: 0.04074486717581749 2023-01-24 04:21:50.393459: step: 110/463, loss: 0.012105356901884079 2023-01-24 04:21:51.027924: step: 112/463, loss: 0.18130572140216827 2023-01-24 04:21:51.606909: step: 114/463, loss: 0.002669182838872075 2023-01-24 04:21:52.207234: step: 116/463, loss: 0.001340729184448719 2023-01-24 04:21:52.831121: step: 118/463, loss: 0.00025550031568855047 2023-01-24 04:21:53.442828: step: 120/463, loss: 0.004472694359719753 2023-01-24 04:21:54.023574: step: 122/463, loss: 0.0009512307005934417 2023-01-24 04:21:54.628358: step: 124/463, loss: 0.10495840758085251 2023-01-24 04:21:55.259131: step: 126/463, loss: 0.004247522447258234 2023-01-24 04:21:55.898467: step: 128/463, loss: 0.001238059950992465 2023-01-24 04:21:56.478168: step: 130/463, loss: 0.07480993121862411 2023-01-24 04:21:57.100762: step: 132/463, loss: 0.010832366533577442 2023-01-24 04:21:57.733329: step: 134/463, loss: 0.010326452553272247 2023-01-24 04:21:58.325979: step: 136/463, loss: 3.73233669961337e-05 2023-01-24 04:21:58.926428: step: 138/463, loss: 0.013216571882367134 2023-01-24 04:21:59.605684: step: 140/463, loss: 0.006979175377637148 2023-01-24 04:22:00.171432: step: 142/463, loss: 0.008622732944786549 2023-01-24 04:22:00.769715: step: 144/463, loss: 0.0034023327752947807 2023-01-24 04:22:01.398059: step: 146/463, loss: 0.12339223921298981 2023-01-24 04:22:02.012134: step: 148/463, loss: 0.0018417502287775278 2023-01-24 04:22:02.661747: step: 150/463, loss: 0.026469113305211067 2023-01-24 04:22:03.231588: step: 152/463, loss: 0.00040981321944855154 2023-01-24 04:22:03.893273: step: 154/463, loss: 0.0009731424506753683 2023-01-24 04:22:04.473476: step: 156/463, loss: 0.004813824780285358 2023-01-24 04:22:05.083431: step: 158/463, loss: 0.0011529552284628153 2023-01-24 04:22:05.673306: step: 160/463, loss: 0.03348485380411148 2023-01-24 04:22:06.287704: step: 162/463, loss: 0.009393349289894104 2023-01-24 04:22:06.818805: step: 164/463, loss: 0.0023935572244226933 2023-01-24 04:22:07.471261: step: 166/463, loss: 0.0009037660784088075 2023-01-24 04:22:08.036423: step: 168/463, loss: 0.005582581274211407 2023-01-24 04:22:08.716927: step: 170/463, loss: 0.04677043482661247 2023-01-24 04:22:09.316978: step: 172/463, loss: 0.09852571785449982 2023-01-24 04:22:09.903856: step: 174/463, loss: 0.015726812183856964 2023-01-24 04:22:10.548742: step: 176/463, loss: 0.008652674965560436 2023-01-24 04:22:11.137094: step: 178/463, loss: 0.002285152906551957 2023-01-24 04:22:11.740442: step: 180/463, loss: 0.006512057036161423 2023-01-24 04:22:12.316768: step: 182/463, loss: 0.012659301050007343 2023-01-24 04:22:12.905935: step: 184/463, loss: 0.00985607411712408 2023-01-24 04:22:13.528164: step: 186/463, loss: 0.03183724731206894 2023-01-24 04:22:14.124460: step: 188/463, loss: 0.019817395135760307 2023-01-24 04:22:14.749707: step: 190/463, loss: 0.012516515329480171 2023-01-24 04:22:15.345813: step: 192/463, loss: 0.006831088103353977 2023-01-24 04:22:15.958819: step: 194/463, loss: 0.015963073819875717 2023-01-24 04:22:16.557649: step: 196/463, loss: 4.200536204734817e-05 2023-01-24 04:22:17.122560: step: 198/463, loss: 0.01954985223710537 2023-01-24 04:22:17.685999: step: 200/463, loss: 0.0001290324580622837 2023-01-24 04:22:18.324264: step: 202/463, loss: 0.0019248885801061988 2023-01-24 04:22:18.931995: step: 204/463, loss: 0.0005912953638471663 2023-01-24 04:22:19.532852: step: 206/463, loss: 0.0033509531058371067 2023-01-24 04:22:20.145430: step: 208/463, loss: 0.011447211727499962 2023-01-24 04:22:20.845264: step: 210/463, loss: 0.004402702208608389 2023-01-24 04:22:21.455320: step: 212/463, loss: 0.15103071928024292 2023-01-24 04:22:22.085539: step: 214/463, loss: 0.031743768602609634 2023-01-24 04:22:22.669967: step: 216/463, loss: 1.0620481967926025 2023-01-24 04:22:23.299656: step: 218/463, loss: 0.0011288271052762866 2023-01-24 04:22:23.884296: step: 220/463, loss: 0.015985960140824318 2023-01-24 04:22:24.516762: step: 222/463, loss: 0.0032427851110696793 2023-01-24 04:22:25.108796: step: 224/463, loss: 0.0692337229847908 2023-01-24 04:22:25.620406: step: 226/463, loss: 0.012289442121982574 2023-01-24 04:22:26.222002: step: 228/463, loss: 0.01814359799027443 2023-01-24 04:22:26.836235: step: 230/463, loss: 2.4756223865551874e-05 2023-01-24 04:22:27.505897: step: 232/463, loss: 0.04240674152970314 2023-01-24 04:22:28.167976: step: 234/463, loss: 1.0751752853393555 2023-01-24 04:22:28.758543: step: 236/463, loss: 0.0012131592957302928 2023-01-24 04:22:29.353447: step: 238/463, loss: 0.02139495313167572 2023-01-24 04:22:29.947013: step: 240/463, loss: 0.0005623931065201759 2023-01-24 04:22:30.539153: step: 242/463, loss: 0.035046253353357315 2023-01-24 04:22:31.106879: step: 244/463, loss: 0.0190215352922678 2023-01-24 04:22:31.738146: step: 246/463, loss: 0.006461307406425476 2023-01-24 04:22:32.397210: step: 248/463, loss: 0.0008459900855086744 2023-01-24 04:22:32.958676: step: 250/463, loss: 0.002626551315188408 2023-01-24 04:22:33.557040: step: 252/463, loss: 0.008953112177550793 2023-01-24 04:22:34.228734: step: 254/463, loss: 0.317119300365448 2023-01-24 04:22:34.883491: step: 256/463, loss: 0.00023721116303931922 2023-01-24 04:22:35.462381: step: 258/463, loss: 0.0025030833203345537 2023-01-24 04:22:36.050323: step: 260/463, loss: 0.027066366747021675 2023-01-24 04:22:36.607053: step: 262/463, loss: 0.07746951282024384 2023-01-24 04:22:37.193220: step: 264/463, loss: 0.013095933012664318 2023-01-24 04:22:37.866443: step: 266/463, loss: 0.00036936014657840133 2023-01-24 04:22:38.419311: step: 268/463, loss: 0.0029038519132882357 2023-01-24 04:22:39.095856: step: 270/463, loss: 0.06907950341701508 2023-01-24 04:22:39.698113: step: 272/463, loss: 0.005004499107599258 2023-01-24 04:22:40.363116: step: 274/463, loss: 0.14616289734840393 2023-01-24 04:22:40.993952: step: 276/463, loss: 0.019431130960583687 2023-01-24 04:22:41.577001: step: 278/463, loss: 0.0363524928689003 2023-01-24 04:22:42.172401: step: 280/463, loss: 0.12175170332193375 2023-01-24 04:22:42.761922: step: 282/463, loss: 0.012057164683938026 2023-01-24 04:22:43.385911: step: 284/463, loss: 3.346443918417208e-05 2023-01-24 04:22:44.021871: step: 286/463, loss: 0.02247983030974865 2023-01-24 04:22:44.620548: step: 288/463, loss: 0.004407068714499474 2023-01-24 04:22:45.205247: step: 290/463, loss: 0.013931292109191418 2023-01-24 04:22:45.813433: step: 292/463, loss: 0.01208361517637968 2023-01-24 04:22:46.414724: step: 294/463, loss: 0.015463906340301037 2023-01-24 04:22:47.007144: step: 296/463, loss: 0.00584445521235466 2023-01-24 04:22:47.590033: step: 298/463, loss: 0.024595266208052635 2023-01-24 04:22:48.229446: step: 300/463, loss: 0.01786055602133274 2023-01-24 04:22:49.062991: step: 302/463, loss: 0.22887186706066132 2023-01-24 04:22:49.758179: step: 304/463, loss: 0.002958871191367507 2023-01-24 04:22:50.377769: step: 306/463, loss: 0.17137330770492554 2023-01-24 04:22:50.930127: step: 308/463, loss: 0.0012828095350414515 2023-01-24 04:22:51.485389: step: 310/463, loss: 0.023614313453435898 2023-01-24 04:22:52.086173: step: 312/463, loss: 0.00012902781600132585 2023-01-24 04:22:52.672972: step: 314/463, loss: 0.007176273036748171 2023-01-24 04:22:53.240297: step: 316/463, loss: 0.0020926850847899914 2023-01-24 04:22:53.854457: step: 318/463, loss: 0.00315940217114985 2023-01-24 04:22:54.475557: step: 320/463, loss: 0.011579002253711224 2023-01-24 04:22:55.101519: step: 322/463, loss: 0.017291663214564323 2023-01-24 04:22:55.657580: step: 324/463, loss: 9.207376024278346e-06 2023-01-24 04:22:56.247000: step: 326/463, loss: 0.00482725165784359 2023-01-24 04:22:56.798303: step: 328/463, loss: 0.02736220695078373 2023-01-24 04:22:57.384805: step: 330/463, loss: 0.00038527837023139 2023-01-24 04:22:58.084007: step: 332/463, loss: 0.08366252481937408 2023-01-24 04:22:58.707221: step: 334/463, loss: 0.01787085086107254 2023-01-24 04:22:59.317607: step: 336/463, loss: 0.01859370619058609 2023-01-24 04:22:59.983876: step: 338/463, loss: 0.002084902720525861 2023-01-24 04:23:00.681857: step: 340/463, loss: 0.009957806207239628 2023-01-24 04:23:01.282213: step: 342/463, loss: 0.008032606914639473 2023-01-24 04:23:01.849384: step: 344/463, loss: 0.009560206905007362 2023-01-24 04:23:02.459033: step: 346/463, loss: 0.0031678732484579086 2023-01-24 04:23:03.061650: step: 348/463, loss: 0.015052815899252892 2023-01-24 04:23:03.652743: step: 350/463, loss: 0.043149806559085846 2023-01-24 04:23:04.211501: step: 352/463, loss: 0.004476632457226515 2023-01-24 04:23:04.827302: step: 354/463, loss: 0.14227645099163055 2023-01-24 04:23:05.386463: step: 356/463, loss: 0.009312931448221207 2023-01-24 04:23:05.937786: step: 358/463, loss: 0.013149126432836056 2023-01-24 04:23:06.607827: step: 360/463, loss: 0.0005433306214399636 2023-01-24 04:23:07.212040: step: 362/463, loss: 0.012858019210398197 2023-01-24 04:23:07.806223: step: 364/463, loss: 0.030376385897397995 2023-01-24 04:23:08.330590: step: 366/463, loss: 0.007295642979443073 2023-01-24 04:23:08.897215: step: 368/463, loss: 0.0022440587636083364 2023-01-24 04:23:09.470345: step: 370/463, loss: 0.005413179751485586 2023-01-24 04:23:10.100123: step: 372/463, loss: 0.010031335055828094 2023-01-24 04:23:10.649662: step: 374/463, loss: 4.012528370367363e-05 2023-01-24 04:23:11.205534: step: 376/463, loss: 0.00036025006556883454 2023-01-24 04:23:11.796953: step: 378/463, loss: 0.14191597700119019 2023-01-24 04:23:12.437062: step: 380/463, loss: 0.20364102721214294 2023-01-24 04:23:13.059163: step: 382/463, loss: 0.09716681391000748 2023-01-24 04:23:13.703397: step: 384/463, loss: 0.0019270686898380518 2023-01-24 04:23:14.304963: step: 386/463, loss: 0.03987650200724602 2023-01-24 04:23:14.929097: step: 388/463, loss: 0.003833697410300374 2023-01-24 04:23:15.543439: step: 390/463, loss: 4.997056385036558e-05 2023-01-24 04:23:16.226499: step: 392/463, loss: 0.060596466064453125 2023-01-24 04:23:16.822312: step: 394/463, loss: 0.03140886127948761 2023-01-24 04:23:17.445600: step: 396/463, loss: 0.02825196646153927 2023-01-24 04:23:18.045762: step: 398/463, loss: 0.0007397735607810318 2023-01-24 04:23:18.646811: step: 400/463, loss: 0.02667791210114956 2023-01-24 04:23:19.267066: step: 402/463, loss: 0.18926508724689484 2023-01-24 04:23:19.861507: step: 404/463, loss: 0.016279099509119987 2023-01-24 04:23:20.467613: step: 406/463, loss: 0.022808557376265526 2023-01-24 04:23:21.101787: step: 408/463, loss: 0.002932406961917877 2023-01-24 04:23:21.677803: step: 410/463, loss: 0.12233137339353561 2023-01-24 04:23:22.251550: step: 412/463, loss: 0.001009731786325574 2023-01-24 04:23:22.861025: step: 414/463, loss: 0.0023473717737942934 2023-01-24 04:23:23.420948: step: 416/463, loss: 0.009441317990422249 2023-01-24 04:23:23.987102: step: 418/463, loss: 0.0001928530546138063 2023-01-24 04:23:24.655878: step: 420/463, loss: 0.0003137665626127273 2023-01-24 04:23:25.274915: step: 422/463, loss: 0.0004363911575637758 2023-01-24 04:23:25.863989: step: 424/463, loss: 0.004309255629777908 2023-01-24 04:23:26.487258: step: 426/463, loss: 0.037607211619615555 2023-01-24 04:23:27.057971: step: 428/463, loss: 0.006302648689597845 2023-01-24 04:23:27.664085: step: 430/463, loss: 0.024068133905529976 2023-01-24 04:23:28.352392: step: 432/463, loss: 0.011388716287910938 2023-01-24 04:23:29.048836: step: 434/463, loss: 0.054872121661901474 2023-01-24 04:23:29.710461: step: 436/463, loss: 0.007197789382189512 2023-01-24 04:23:30.298272: step: 438/463, loss: 0.30809688568115234 2023-01-24 04:23:30.884759: step: 440/463, loss: 0.011834491044282913 2023-01-24 04:23:31.500520: step: 442/463, loss: 0.01220619585365057 2023-01-24 04:23:32.139195: step: 444/463, loss: 0.07357313483953476 2023-01-24 04:23:32.759704: step: 446/463, loss: 0.016274893656373024 2023-01-24 04:23:33.347612: step: 448/463, loss: 0.009248634800314903 2023-01-24 04:23:33.982159: step: 450/463, loss: 0.007376141380518675 2023-01-24 04:23:34.626126: step: 452/463, loss: 6.673377356491983e-05 2023-01-24 04:23:35.240644: step: 454/463, loss: 0.05586463212966919 2023-01-24 04:23:35.883284: step: 456/463, loss: 0.015556227415800095 2023-01-24 04:23:36.523233: step: 458/463, loss: 0.015799710527062416 2023-01-24 04:23:37.054071: step: 460/463, loss: 0.003802322782576084 2023-01-24 04:23:37.692346: step: 462/463, loss: 0.015119172632694244 2023-01-24 04:23:38.315173: step: 464/463, loss: 0.0004248555633239448 2023-01-24 04:23:38.888715: step: 466/463, loss: 0.0005464109126478434 2023-01-24 04:23:39.514637: step: 468/463, loss: 0.001078117173165083 2023-01-24 04:23:40.185148: step: 470/463, loss: 0.015052588656544685 2023-01-24 04:23:40.819600: step: 472/463, loss: 0.08557146042585373 2023-01-24 04:23:41.509700: step: 474/463, loss: 0.10655432194471359 2023-01-24 04:23:42.122565: step: 476/463, loss: 0.0012556456495076418 2023-01-24 04:23:42.710469: step: 478/463, loss: 0.020708682015538216 2023-01-24 04:23:43.324028: step: 480/463, loss: 0.006127460394054651 2023-01-24 04:23:43.978747: step: 482/463, loss: 0.27468377351760864 2023-01-24 04:23:44.545320: step: 484/463, loss: 0.017907043918967247 2023-01-24 04:23:45.106213: step: 486/463, loss: 0.0036532816011458635 2023-01-24 04:23:45.671856: step: 488/463, loss: 0.01151213888078928 2023-01-24 04:23:46.329434: step: 490/463, loss: 0.046640198677778244 2023-01-24 04:23:46.933150: step: 492/463, loss: 0.00032133155036717653 2023-01-24 04:23:47.557375: step: 494/463, loss: 0.004813730716705322 2023-01-24 04:23:48.186736: step: 496/463, loss: 0.00733962282538414 2023-01-24 04:23:48.777973: step: 498/463, loss: 0.00035385185037739575 2023-01-24 04:23:49.376939: step: 500/463, loss: 0.004682705271989107 2023-01-24 04:23:49.952402: step: 502/463, loss: 0.0029303315095603466 2023-01-24 04:23:50.533930: step: 504/463, loss: 0.007881103083491325 2023-01-24 04:23:51.128615: step: 506/463, loss: 0.0009046468767337501 2023-01-24 04:23:51.699494: step: 508/463, loss: 0.0065812962129712105 2023-01-24 04:23:52.277737: step: 510/463, loss: 0.026188207790255547 2023-01-24 04:23:52.893471: step: 512/463, loss: 0.058072272688150406 2023-01-24 04:23:53.564711: step: 514/463, loss: 0.08100173622369766 2023-01-24 04:23:54.107083: step: 516/463, loss: 0.004734456539154053 2023-01-24 04:23:54.750474: step: 518/463, loss: 0.007158897817134857 2023-01-24 04:23:55.328126: step: 520/463, loss: 0.030324876308441162 2023-01-24 04:23:55.977150: step: 522/463, loss: 0.024162674322724342 2023-01-24 04:23:56.562084: step: 524/463, loss: 0.12371625006198883 2023-01-24 04:23:57.125802: step: 526/463, loss: 0.0036596404388546944 2023-01-24 04:23:57.736999: step: 528/463, loss: 0.004540819674730301 2023-01-24 04:23:58.306385: step: 530/463, loss: 0.003917932976037264 2023-01-24 04:23:58.891760: step: 532/463, loss: 0.008182617835700512 2023-01-24 04:23:59.462582: step: 534/463, loss: 0.001537121832370758 2023-01-24 04:24:00.090167: step: 536/463, loss: 0.0002586818009149283 2023-01-24 04:24:00.711025: step: 538/463, loss: 0.0039536720141768456 2023-01-24 04:24:01.303675: step: 540/463, loss: 0.0004113368922844529 2023-01-24 04:24:01.893968: step: 542/463, loss: 9.025986219057813e-05 2023-01-24 04:24:02.477620: step: 544/463, loss: 0.004117557313293219 2023-01-24 04:24:03.178150: step: 546/463, loss: 0.08571869134902954 2023-01-24 04:24:03.832203: step: 548/463, loss: 0.06169676035642624 2023-01-24 04:24:04.448652: step: 550/463, loss: 0.022573435679078102 2023-01-24 04:24:05.173928: step: 552/463, loss: 0.021124515682458878 2023-01-24 04:24:05.779553: step: 554/463, loss: 0.0014513990608975291 2023-01-24 04:24:06.380115: step: 556/463, loss: 0.004664327949285507 2023-01-24 04:24:07.016812: step: 558/463, loss: 0.0023477845825254917 2023-01-24 04:24:07.635912: step: 560/463, loss: 0.1622641384601593 2023-01-24 04:24:08.250068: step: 562/463, loss: 0.8554898500442505 2023-01-24 04:24:08.862844: step: 564/463, loss: 0.012670737691223621 2023-01-24 04:24:09.644762: step: 566/463, loss: 0.006660348270088434 2023-01-24 04:24:10.313214: step: 568/463, loss: 0.06424423307180405 2023-01-24 04:24:11.041727: step: 570/463, loss: 2.2366831302642822 2023-01-24 04:24:11.641740: step: 572/463, loss: 0.2683389484882355 2023-01-24 04:24:12.291500: step: 574/463, loss: 0.015657979995012283 2023-01-24 04:24:12.951069: step: 576/463, loss: 0.17777566611766815 2023-01-24 04:24:13.607204: step: 578/463, loss: 0.17410486936569214 2023-01-24 04:24:14.233381: step: 580/463, loss: 0.036822330206632614 2023-01-24 04:24:14.820481: step: 582/463, loss: 0.001787062268704176 2023-01-24 04:24:15.439373: step: 584/463, loss: 0.0072268275544047356 2023-01-24 04:24:15.987257: step: 586/463, loss: 0.005326924845576286 2023-01-24 04:24:16.624756: step: 588/463, loss: 0.013666735030710697 2023-01-24 04:24:17.212982: step: 590/463, loss: 0.009873650968074799 2023-01-24 04:24:17.862271: step: 592/463, loss: 0.004724420141428709 2023-01-24 04:24:18.428192: step: 594/463, loss: 0.09126558899879456 2023-01-24 04:24:19.113767: step: 596/463, loss: 0.011129779741168022 2023-01-24 04:24:19.683094: step: 598/463, loss: 0.48119986057281494 2023-01-24 04:24:20.299717: step: 600/463, loss: 0.005791015923023224 2023-01-24 04:24:20.894030: step: 602/463, loss: 0.0018832847708836198 2023-01-24 04:24:21.498975: step: 604/463, loss: 0.02286267839372158 2023-01-24 04:24:22.067018: step: 606/463, loss: 0.005899330601096153 2023-01-24 04:24:22.672068: step: 608/463, loss: 0.0006679327925667167 2023-01-24 04:24:23.344956: step: 610/463, loss: 0.13543611764907837 2023-01-24 04:24:23.917608: step: 612/463, loss: 0.0009585145162418485 2023-01-24 04:24:24.448266: step: 614/463, loss: 1.3077997209620662e-05 2023-01-24 04:24:25.051343: step: 616/463, loss: 0.015752624720335007 2023-01-24 04:24:25.709370: step: 618/463, loss: 0.018556224182248116 2023-01-24 04:24:26.373827: step: 620/463, loss: 0.11773164570331573 2023-01-24 04:24:26.982978: step: 622/463, loss: 2.0934934616088867 2023-01-24 04:24:27.590116: step: 624/463, loss: 0.001118533662520349 2023-01-24 04:24:28.262521: step: 626/463, loss: 0.08965321630239487 2023-01-24 04:24:28.854212: step: 628/463, loss: 0.005063134711235762 2023-01-24 04:24:29.534303: step: 630/463, loss: 0.0180449727922678 2023-01-24 04:24:30.143193: step: 632/463, loss: 0.002079880330711603 2023-01-24 04:24:30.669426: step: 634/463, loss: 0.0003964028146583587 2023-01-24 04:24:31.295877: step: 636/463, loss: 0.0011587260523810983 2023-01-24 04:24:31.930551: step: 638/463, loss: 0.007864152081310749 2023-01-24 04:24:32.615159: step: 640/463, loss: 0.021486135199666023 2023-01-24 04:24:33.285129: step: 642/463, loss: 0.006428128574043512 2023-01-24 04:24:33.924293: step: 644/463, loss: 0.006357813719660044 2023-01-24 04:24:34.472494: step: 646/463, loss: 0.03759616240859032 2023-01-24 04:24:35.070585: step: 648/463, loss: 0.021960265934467316 2023-01-24 04:24:35.649509: step: 650/463, loss: 0.002491376828402281 2023-01-24 04:24:36.275226: step: 652/463, loss: 1.3410999599727802e-06 2023-01-24 04:24:36.835583: step: 654/463, loss: 0.004769804887473583 2023-01-24 04:24:37.440341: step: 656/463, loss: 0.009639819152653217 2023-01-24 04:24:38.037232: step: 658/463, loss: 0.029759405180811882 2023-01-24 04:24:38.597181: step: 660/463, loss: 0.020491939038038254 2023-01-24 04:24:39.200541: step: 662/463, loss: 0.04505288973450661 2023-01-24 04:24:39.806395: step: 664/463, loss: 0.004504587966948748 2023-01-24 04:24:40.407083: step: 666/463, loss: 0.002768989186733961 2023-01-24 04:24:40.969548: step: 668/463, loss: 0.05640977621078491 2023-01-24 04:24:41.538191: step: 670/463, loss: 0.00015957873256411403 2023-01-24 04:24:42.153529: step: 672/463, loss: 0.00310256564989686 2023-01-24 04:24:42.770245: step: 674/463, loss: 0.035309288650751114 2023-01-24 04:24:43.418231: step: 676/463, loss: 0.055911604315042496 2023-01-24 04:24:44.057192: step: 678/463, loss: 0.012881767004728317 2023-01-24 04:24:44.676987: step: 680/463, loss: 0.005857378710061312 2023-01-24 04:24:45.423045: step: 682/463, loss: 0.004019935615360737 2023-01-24 04:24:46.144722: step: 684/463, loss: 0.02304874174296856 2023-01-24 04:24:46.738120: step: 686/463, loss: 0.013506009243428707 2023-01-24 04:24:47.329827: step: 688/463, loss: 0.0015421916032209992 2023-01-24 04:24:47.905247: step: 690/463, loss: 0.011711434461176395 2023-01-24 04:24:48.456899: step: 692/463, loss: 0.04503439739346504 2023-01-24 04:24:49.048246: step: 694/463, loss: 0.0005720967892557383 2023-01-24 04:24:49.699284: step: 696/463, loss: 5.609537765849382e-06 2023-01-24 04:24:50.351785: step: 698/463, loss: 0.046297699213027954 2023-01-24 04:24:50.989014: step: 700/463, loss: 0.02118261717259884 2023-01-24 04:24:51.574050: step: 702/463, loss: 0.0011419239453971386 2023-01-24 04:24:52.314797: step: 704/463, loss: 0.046533890068531036 2023-01-24 04:24:52.966851: step: 706/463, loss: 0.012074626982212067 2023-01-24 04:24:53.522449: step: 708/463, loss: 0.41300857067108154 2023-01-24 04:24:54.106017: step: 710/463, loss: 0.03696899116039276 2023-01-24 04:24:54.702779: step: 712/463, loss: 0.02137652412056923 2023-01-24 04:24:55.365841: step: 714/463, loss: 0.0122491754591465 2023-01-24 04:24:55.946270: step: 716/463, loss: 0.004200242459774017 2023-01-24 04:24:56.575754: step: 718/463, loss: 0.030806398019194603 2023-01-24 04:24:57.225300: step: 720/463, loss: 0.004796880763024092 2023-01-24 04:24:57.833704: step: 722/463, loss: 0.010426010005176067 2023-01-24 04:24:58.362359: step: 724/463, loss: 0.013200612738728523 2023-01-24 04:24:58.960514: step: 726/463, loss: 0.001605497905984521 2023-01-24 04:24:59.515131: step: 728/463, loss: 0.006478451658040285 2023-01-24 04:25:00.106845: step: 730/463, loss: 0.01534070074558258 2023-01-24 04:25:00.730760: step: 732/463, loss: 0.13064607977867126 2023-01-24 04:25:01.368585: step: 734/463, loss: 0.1301373839378357 2023-01-24 04:25:02.024136: step: 736/463, loss: 0.10907932370901108 2023-01-24 04:25:02.629840: step: 738/463, loss: 0.001476506469771266 2023-01-24 04:25:03.222674: step: 740/463, loss: 0.03859546035528183 2023-01-24 04:25:03.776323: step: 742/463, loss: 0.0015190503327175975 2023-01-24 04:25:04.393483: step: 744/463, loss: 0.004878915846347809 2023-01-24 04:25:05.092601: step: 746/463, loss: 0.017407719045877457 2023-01-24 04:25:05.696158: step: 748/463, loss: 0.1556539684534073 2023-01-24 04:25:06.287742: step: 750/463, loss: 0.004552817903459072 2023-01-24 04:25:06.808548: step: 752/463, loss: 0.008616938255727291 2023-01-24 04:25:07.369504: step: 754/463, loss: 0.0489373654127121 2023-01-24 04:25:08.030275: step: 756/463, loss: 0.012599979527294636 2023-01-24 04:25:08.728001: step: 758/463, loss: 0.0023006293922662735 2023-01-24 04:25:09.319586: step: 760/463, loss: 0.0042320359498262405 2023-01-24 04:25:09.940937: step: 762/463, loss: 0.027670301496982574 2023-01-24 04:25:10.548700: step: 764/463, loss: 0.042554546147584915 2023-01-24 04:25:11.170272: step: 766/463, loss: 0.0015436145476996899 2023-01-24 04:25:11.760448: step: 768/463, loss: 0.00019147360580973327 2023-01-24 04:25:12.355326: step: 770/463, loss: 0.012080904096364975 2023-01-24 04:25:12.963666: step: 772/463, loss: 0.011079979129135609 2023-01-24 04:25:13.629227: step: 774/463, loss: 0.037638623267412186 2023-01-24 04:25:14.322210: step: 776/463, loss: 0.005234105978161097 2023-01-24 04:25:14.989701: step: 778/463, loss: 0.00018205822561867535 2023-01-24 04:25:15.565023: step: 780/463, loss: 0.0032096304930746555 2023-01-24 04:25:16.312443: step: 782/463, loss: 1.5367567539215088 2023-01-24 04:25:16.931193: step: 784/463, loss: 0.004942040890455246 2023-01-24 04:25:17.554626: step: 786/463, loss: 0.011632569134235382 2023-01-24 04:25:18.133259: step: 788/463, loss: 0.024039188399910927 2023-01-24 04:25:18.762752: step: 790/463, loss: 0.03575456142425537 2023-01-24 04:25:19.466830: step: 792/463, loss: 0.0038403149228543043 2023-01-24 04:25:20.047056: step: 794/463, loss: 0.001395332277752459 2023-01-24 04:25:20.679836: step: 796/463, loss: 0.001705832313746214 2023-01-24 04:25:21.317585: step: 798/463, loss: 0.02744349092245102 2023-01-24 04:25:21.949756: step: 800/463, loss: 0.001902124029584229 2023-01-24 04:25:22.535308: step: 802/463, loss: 0.008719549514353275 2023-01-24 04:25:23.112583: step: 804/463, loss: 0.009482142515480518 2023-01-24 04:25:23.699223: step: 806/463, loss: 0.0058181630447506905 2023-01-24 04:25:24.275564: step: 808/463, loss: 0.01328601036220789 2023-01-24 04:25:24.907087: step: 810/463, loss: 0.0021279442589730024 2023-01-24 04:25:25.472443: step: 812/463, loss: 0.01440698653459549 2023-01-24 04:25:26.106770: step: 814/463, loss: 5.5210919526871294e-05 2023-01-24 04:25:26.761586: step: 816/463, loss: 0.07560383528470993 2023-01-24 04:25:27.440124: step: 818/463, loss: 0.199522465467453 2023-01-24 04:25:28.033050: step: 820/463, loss: 0.0036417939700186253 2023-01-24 04:25:28.610624: step: 822/463, loss: 0.007410728372633457 2023-01-24 04:25:29.163831: step: 824/463, loss: 0.0014070122269913554 2023-01-24 04:25:29.796048: step: 826/463, loss: 0.03565063700079918 2023-01-24 04:25:30.416221: step: 828/463, loss: 0.007268161978572607 2023-01-24 04:25:31.053286: step: 830/463, loss: 0.010681518353521824 2023-01-24 04:25:31.675926: step: 832/463, loss: 0.020135777071118355 2023-01-24 04:25:32.333243: step: 834/463, loss: 0.0031248119194060564 2023-01-24 04:25:32.936627: step: 836/463, loss: 0.25638529658317566 2023-01-24 04:25:33.524198: step: 838/463, loss: 9.190763375954702e-05 2023-01-24 04:25:34.089619: step: 840/463, loss: 0.026254795491695404 2023-01-24 04:25:34.687007: step: 842/463, loss: 0.016840031370520592 2023-01-24 04:25:35.265121: step: 844/463, loss: 0.028639748692512512 2023-01-24 04:25:35.825372: step: 846/463, loss: 0.009047960862517357 2023-01-24 04:25:36.391859: step: 848/463, loss: 0.0038577434606850147 2023-01-24 04:25:37.076815: step: 850/463, loss: 0.021460607647895813 2023-01-24 04:25:37.643585: step: 852/463, loss: 0.01539433840662241 2023-01-24 04:25:38.223316: step: 854/463, loss: 0.0009205577662214637 2023-01-24 04:25:38.796888: step: 856/463, loss: 0.03710896149277687 2023-01-24 04:25:39.373616: step: 858/463, loss: 0.002890982199460268 2023-01-24 04:25:39.918100: step: 860/463, loss: 0.007611353415995836 2023-01-24 04:25:40.511690: step: 862/463, loss: 0.09223431348800659 2023-01-24 04:25:41.110202: step: 864/463, loss: 0.0028247686568647623 2023-01-24 04:25:41.690995: step: 866/463, loss: 0.0004354445554781705 2023-01-24 04:25:42.290269: step: 868/463, loss: 0.026444759219884872 2023-01-24 04:25:42.961599: step: 870/463, loss: 0.02717723697423935 2023-01-24 04:25:43.519696: step: 872/463, loss: 0.00314628635533154 2023-01-24 04:25:44.122034: step: 874/463, loss: 0.03820386901497841 2023-01-24 04:25:44.721174: step: 876/463, loss: 0.0016157119534909725 2023-01-24 04:25:45.330497: step: 878/463, loss: 0.017151914536952972 2023-01-24 04:25:45.925910: step: 880/463, loss: 0.0009344227146357298 2023-01-24 04:25:46.484876: step: 882/463, loss: 0.0005001876852475107 2023-01-24 04:25:47.092203: step: 884/463, loss: 0.006187606602907181 2023-01-24 04:25:47.714440: step: 886/463, loss: 0.00033557225833646953 2023-01-24 04:25:48.300261: step: 888/463, loss: 0.05378030240535736 2023-01-24 04:25:48.830038: step: 890/463, loss: 0.004444250836968422 2023-01-24 04:25:49.456826: step: 892/463, loss: 0.04814674332737923 2023-01-24 04:25:50.072601: step: 894/463, loss: 0.01687469705939293 2023-01-24 04:25:50.753103: step: 896/463, loss: 0.018378719687461853 2023-01-24 04:25:51.391095: step: 898/463, loss: 0.0008980545680969954 2023-01-24 04:25:52.049721: step: 900/463, loss: 7.355018897214904e-05 2023-01-24 04:25:52.655529: step: 902/463, loss: 0.054010361433029175 2023-01-24 04:25:53.238946: step: 904/463, loss: 0.0034497552551329136 2023-01-24 04:25:53.821440: step: 906/463, loss: 0.004787243902683258 2023-01-24 04:25:54.398617: step: 908/463, loss: 0.03872756287455559 2023-01-24 04:25:54.950595: step: 910/463, loss: 0.07719968259334564 2023-01-24 04:25:55.574504: step: 912/463, loss: 0.007337230257689953 2023-01-24 04:25:56.152007: step: 914/463, loss: 0.0024565935600548983 2023-01-24 04:25:56.727516: step: 916/463, loss: 0.05244877561926842 2023-01-24 04:25:57.311284: step: 918/463, loss: 0.001092995866201818 2023-01-24 04:25:57.881863: step: 920/463, loss: 0.00066555937519297 2023-01-24 04:25:58.518977: step: 922/463, loss: 0.06687462329864502 2023-01-24 04:25:59.184873: step: 924/463, loss: 0.02133883535861969 2023-01-24 04:25:59.805295: step: 926/463, loss: 0.005229978356510401 ================================================== Loss: 0.093 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3302675481691209, 'r': 0.3214938372120665, 'f1': 0.32582163886684423}, 'combined': 0.24007910232293783, 'epoch': 34} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3626883012603883, 'r': 0.3130006369515917, 'f1': 0.3360175456173528}, 'combined': 0.23639425319813767, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3423764807298406, 'r': 0.3300327366428065, 'f1': 0.3360913086198242}, 'combined': 0.247646227404081, 'epoch': 34} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3667948252850706, 'r': 0.31874310144859846, 'f1': 0.3410849076249021}, 'combined': 0.2421702844136805, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3565039176745541, 'r': 0.34365083525364987, 'f1': 0.3499594013114463}, 'combined': 0.25786482201896044, 'epoch': 34} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37789489678955906, 'r': 0.31320721140025465, 'f1': 0.34252364570514954}, 'combined': 0.24319178845065617, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3017241379310345, 'r': 0.25, 'f1': 0.2734375}, 'combined': 0.18229166666666666, 'epoch': 34} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3017241379310345, 'r': 0.3804347826086957, 'f1': 0.3365384615384615}, 'combined': 0.16826923076923075, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6388888888888888, 'r': 0.19827586206896552, 'f1': 0.3026315789473684}, 'combined': 0.20175438596491227, 'epoch': 34} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3565039176745541, 'r': 0.34365083525364987, 'f1': 0.3499594013114463}, 'combined': 0.25786482201896044, 'epoch': 34} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37789489678955906, 'r': 0.31320721140025465, 'f1': 0.34252364570514954}, 'combined': 0.24319178845065617, 'epoch': 34} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6388888888888888, 'r': 0.19827586206896552, 'f1': 0.3026315789473684}, 'combined': 0.20175438596491227, 'epoch': 34} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:28:44.819273: step: 2/463, loss: 0.04822046682238579 2023-01-24 04:28:45.413936: step: 4/463, loss: 0.002158523304387927 2023-01-24 04:28:46.002020: step: 6/463, loss: 0.0013980388175696135 2023-01-24 04:28:46.626429: step: 8/463, loss: 0.009297413751482964 2023-01-24 04:28:47.216919: step: 10/463, loss: 0.003952270373702049 2023-01-24 04:28:47.769572: step: 12/463, loss: 0.0008725534426048398 2023-01-24 04:28:48.375861: step: 14/463, loss: 0.16605637967586517 2023-01-24 04:28:48.912918: step: 16/463, loss: 0.0010938210180029273 2023-01-24 04:28:49.501391: step: 18/463, loss: 0.0005218334845267236 2023-01-24 04:28:50.109629: step: 20/463, loss: 0.0625392496585846 2023-01-24 04:28:50.723368: step: 22/463, loss: 0.0038597893435508013 2023-01-24 04:28:51.338925: step: 24/463, loss: 0.000690078770276159 2023-01-24 04:28:51.905508: step: 26/463, loss: 0.0006426943582482636 2023-01-24 04:28:52.579635: step: 28/463, loss: 0.006238309200853109 2023-01-24 04:28:53.196206: step: 30/463, loss: 0.009806935675442219 2023-01-24 04:28:53.826216: step: 32/463, loss: 0.0013804049231112003 2023-01-24 04:28:54.384321: step: 34/463, loss: 0.03584982827305794 2023-01-24 04:28:54.907006: step: 36/463, loss: 0.004281509201973677 2023-01-24 04:28:55.521218: step: 38/463, loss: 0.021742837503552437 2023-01-24 04:28:56.126954: step: 40/463, loss: 0.00011590655776672065 2023-01-24 04:28:56.722002: step: 42/463, loss: 0.012302846647799015 2023-01-24 04:28:57.322461: step: 44/463, loss: 0.0006030056392773986 2023-01-24 04:28:57.991505: step: 46/463, loss: 0.0004393141425680369 2023-01-24 04:28:58.640153: step: 48/463, loss: 0.2774009108543396 2023-01-24 04:28:59.315334: step: 50/463, loss: 0.007367622572928667 2023-01-24 04:29:00.012572: step: 52/463, loss: 0.03183555603027344 2023-01-24 04:29:00.615223: step: 54/463, loss: 0.006588601972907782 2023-01-24 04:29:01.165936: step: 56/463, loss: 0.021664954721927643 2023-01-24 04:29:01.762484: step: 58/463, loss: 0.020707029849290848 2023-01-24 04:29:02.383472: step: 60/463, loss: 0.0005705213407054543 2023-01-24 04:29:03.054171: step: 62/463, loss: 0.010442380793392658 2023-01-24 04:29:03.730304: step: 64/463, loss: 0.0008257586159743369 2023-01-24 04:29:04.311719: step: 66/463, loss: 0.0013893481809645891 2023-01-24 04:29:04.908063: step: 68/463, loss: 0.00015084251936059445 2023-01-24 04:29:05.520569: step: 70/463, loss: 0.04242658242583275 2023-01-24 04:29:06.086738: step: 72/463, loss: 0.006558512803167105 2023-01-24 04:29:06.646358: step: 74/463, loss: 0.00017704968922771513 2023-01-24 04:29:07.273222: step: 76/463, loss: 0.000778281013481319 2023-01-24 04:29:07.890941: step: 78/463, loss: 0.0057979910634458065 2023-01-24 04:29:08.450273: step: 80/463, loss: 0.001166689908131957 2023-01-24 04:29:09.121494: step: 82/463, loss: 0.0012915873667225242 2023-01-24 04:29:09.688050: step: 84/463, loss: 0.05562686547636986 2023-01-24 04:29:10.264998: step: 86/463, loss: 0.001116053550504148 2023-01-24 04:29:10.865008: step: 88/463, loss: 0.004506578203290701 2023-01-24 04:29:11.497437: step: 90/463, loss: 0.002148211235180497 2023-01-24 04:29:12.096187: step: 92/463, loss: 0.008143628016114235 2023-01-24 04:29:12.690152: step: 94/463, loss: 0.06090735271573067 2023-01-24 04:29:13.263845: step: 96/463, loss: 0.007319955620914698 2023-01-24 04:29:13.873019: step: 98/463, loss: 0.0030701886862516403 2023-01-24 04:29:14.519265: step: 100/463, loss: 0.006018389482051134 2023-01-24 04:29:15.158350: step: 102/463, loss: 0.004597373306751251 2023-01-24 04:29:15.817695: step: 104/463, loss: 0.002303964225575328 2023-01-24 04:29:16.421999: step: 106/463, loss: 0.006760787218809128 2023-01-24 04:29:17.082936: step: 108/463, loss: 0.0004677173856180161 2023-01-24 04:29:17.759437: step: 110/463, loss: 0.0036391415633261204 2023-01-24 04:29:18.440382: step: 112/463, loss: 0.01895393431186676 2023-01-24 04:29:19.005350: step: 114/463, loss: 0.006875019986182451 2023-01-24 04:29:19.689738: step: 116/463, loss: 0.002898501930758357 2023-01-24 04:29:20.278017: step: 118/463, loss: 0.02936098538339138 2023-01-24 04:29:20.871152: step: 120/463, loss: 2.16602384170983e-05 2023-01-24 04:29:21.489655: step: 122/463, loss: 0.001396101200953126 2023-01-24 04:29:22.076762: step: 124/463, loss: 0.005346381105482578 2023-01-24 04:29:22.656323: step: 126/463, loss: 0.07028757780790329 2023-01-24 04:29:23.243598: step: 128/463, loss: 7.743483001831919e-05 2023-01-24 04:29:23.894234: step: 130/463, loss: 0.013012843206524849 2023-01-24 04:29:24.532768: step: 132/463, loss: 0.023345861583948135 2023-01-24 04:29:25.106741: step: 134/463, loss: 0.0029403914231806993 2023-01-24 04:29:25.683120: step: 136/463, loss: 0.077163927257061 2023-01-24 04:29:26.328764: step: 138/463, loss: 0.007124922703951597 2023-01-24 04:29:26.977800: step: 140/463, loss: 0.0036252187564969063 2023-01-24 04:29:27.624146: step: 142/463, loss: 0.0025374589022248983 2023-01-24 04:29:28.178752: step: 144/463, loss: 2.4825914806569926e-05 2023-01-24 04:29:28.771706: step: 146/463, loss: 0.03553524240851402 2023-01-24 04:29:29.389191: step: 148/463, loss: 0.038195207715034485 2023-01-24 04:29:29.979071: step: 150/463, loss: 0.13661576807498932 2023-01-24 04:29:30.587711: step: 152/463, loss: 0.00027044289163313806 2023-01-24 04:29:31.180858: step: 154/463, loss: 0.00021793687483295798 2023-01-24 04:29:31.848402: step: 156/463, loss: 0.006117647979408503 2023-01-24 04:29:32.455889: step: 158/463, loss: 0.008870885707437992 2023-01-24 04:29:33.075260: step: 160/463, loss: 0.00011546815221663564 2023-01-24 04:29:33.713197: step: 162/463, loss: 0.04062633216381073 2023-01-24 04:29:34.295303: step: 164/463, loss: 0.011970317922532558 2023-01-24 04:29:34.875585: step: 166/463, loss: 0.0005139735294505954 2023-01-24 04:29:35.410985: step: 168/463, loss: 0.007251867558807135 2023-01-24 04:29:35.984176: step: 170/463, loss: 0.0008014946943148971 2023-01-24 04:29:36.612703: step: 172/463, loss: 3.2538166124140844e-05 2023-01-24 04:29:37.165444: step: 174/463, loss: 0.012920981273055077 2023-01-24 04:29:37.786968: step: 176/463, loss: 0.00021379493409767747 2023-01-24 04:29:38.340918: step: 178/463, loss: 0.1730731725692749 2023-01-24 04:29:39.031411: step: 180/463, loss: 0.009500016458332539 2023-01-24 04:29:39.682744: step: 182/463, loss: 0.005619610659778118 2023-01-24 04:29:40.311335: step: 184/463, loss: 0.21767207980155945 2023-01-24 04:29:40.906679: step: 186/463, loss: 0.02148180641233921 2023-01-24 04:29:41.495172: step: 188/463, loss: 0.4801897406578064 2023-01-24 04:29:42.207500: step: 190/463, loss: 0.02611337974667549 2023-01-24 04:29:42.765311: step: 192/463, loss: 0.003988457378000021 2023-01-24 04:29:43.326628: step: 194/463, loss: 0.0008855963824316859 2023-01-24 04:29:43.954789: step: 196/463, loss: 0.009947630576789379 2023-01-24 04:29:44.556012: step: 198/463, loss: 0.006265044678002596 2023-01-24 04:29:45.119186: step: 200/463, loss: 0.0024271525908261538 2023-01-24 04:29:45.707825: step: 202/463, loss: 0.00040759460534900427 2023-01-24 04:29:46.290214: step: 204/463, loss: 0.005978697445243597 2023-01-24 04:29:46.880559: step: 206/463, loss: 0.01687988080084324 2023-01-24 04:29:47.449513: step: 208/463, loss: 0.0940324068069458 2023-01-24 04:29:48.084180: step: 210/463, loss: 0.05813317373394966 2023-01-24 04:29:48.677249: step: 212/463, loss: 3.7704983697040007e-05 2023-01-24 04:29:49.319506: step: 214/463, loss: 0.00020554148068185896 2023-01-24 04:29:49.941746: step: 216/463, loss: 0.016397764906287193 2023-01-24 04:29:50.596585: step: 218/463, loss: 0.021813957020640373 2023-01-24 04:29:51.226865: step: 220/463, loss: 0.006158279255032539 2023-01-24 04:29:51.825474: step: 222/463, loss: 0.005190558265894651 2023-01-24 04:29:52.436578: step: 224/463, loss: 0.022783830761909485 2023-01-24 04:29:53.036114: step: 226/463, loss: 0.026874296367168427 2023-01-24 04:29:53.585988: step: 228/463, loss: 0.0025953915901482105 2023-01-24 04:29:54.124456: step: 230/463, loss: 0.024667203426361084 2023-01-24 04:29:54.728400: step: 232/463, loss: 0.025035006925463676 2023-01-24 04:29:55.356103: step: 234/463, loss: 0.0037397989071905613 2023-01-24 04:29:55.963926: step: 236/463, loss: 0.04551269859075546 2023-01-24 04:29:56.533849: step: 238/463, loss: 0.007047099992632866 2023-01-24 04:29:57.118332: step: 240/463, loss: 0.022509906440973282 2023-01-24 04:29:57.684427: step: 242/463, loss: 0.000741275493055582 2023-01-24 04:29:58.281228: step: 244/463, loss: 0.000500971800647676 2023-01-24 04:29:58.902995: step: 246/463, loss: 0.0036605726927518845 2023-01-24 04:29:59.507981: step: 248/463, loss: 0.00798253808170557 2023-01-24 04:30:00.118280: step: 250/463, loss: 0.011122633703052998 2023-01-24 04:30:00.778046: step: 252/463, loss: 0.055964499711990356 2023-01-24 04:30:01.451610: step: 254/463, loss: 0.0007970133447088301 2023-01-24 04:30:02.033201: step: 256/463, loss: 2.6787713068188168e-05 2023-01-24 04:30:02.561317: step: 258/463, loss: 0.0002803103416226804 2023-01-24 04:30:03.154197: step: 260/463, loss: 0.0006095347343944013 2023-01-24 04:30:03.802144: step: 262/463, loss: 0.0009479480795562267 2023-01-24 04:30:04.411409: step: 264/463, loss: 0.0010543520329520106 2023-01-24 04:30:05.083975: step: 266/463, loss: 0.0027155810967087746 2023-01-24 04:30:05.691554: step: 268/463, loss: 7.256793975830078 2023-01-24 04:30:06.281518: step: 270/463, loss: 0.005046521313488483 2023-01-24 04:30:06.893442: step: 272/463, loss: 0.00236939312890172 2023-01-24 04:30:07.554849: step: 274/463, loss: 0.003081847680732608 2023-01-24 04:30:08.202048: step: 276/463, loss: 0.03386622667312622 2023-01-24 04:30:08.778407: step: 278/463, loss: 0.0018652734579518437 2023-01-24 04:30:09.371069: step: 280/463, loss: 0.003871675580739975 2023-01-24 04:30:09.968817: step: 282/463, loss: 0.0002147402847185731 2023-01-24 04:30:10.537679: step: 284/463, loss: 0.0031144635286182165 2023-01-24 04:30:11.135232: step: 286/463, loss: 0.03620857000350952 2023-01-24 04:30:11.735980: step: 288/463, loss: 3.42562489095144e-05 2023-01-24 04:30:12.373509: step: 290/463, loss: 0.03349597752094269 2023-01-24 04:30:12.954596: step: 292/463, loss: 0.002335303695872426 2023-01-24 04:30:13.518781: step: 294/463, loss: 0.01692109741270542 2023-01-24 04:30:14.074666: step: 296/463, loss: 0.0006516374414786696 2023-01-24 04:30:14.622180: step: 298/463, loss: 5.085577504360117e-05 2023-01-24 04:30:15.252398: step: 300/463, loss: 9.569768008077517e-05 2023-01-24 04:30:15.878035: step: 302/463, loss: 0.014764033257961273 2023-01-24 04:30:16.521150: step: 304/463, loss: 0.04688446223735809 2023-01-24 04:30:17.133693: step: 306/463, loss: 0.024546708911657333 2023-01-24 04:30:17.750079: step: 308/463, loss: 0.015640802681446075 2023-01-24 04:30:18.396717: step: 310/463, loss: 0.08678707480430603 2023-01-24 04:30:18.981375: step: 312/463, loss: 0.08490891754627228 2023-01-24 04:30:19.801213: step: 314/463, loss: 0.005431308876723051 2023-01-24 04:30:20.359807: step: 316/463, loss: 0.002540991175919771 2023-01-24 04:30:20.964781: step: 318/463, loss: 0.0249167550355196 2023-01-24 04:30:21.516055: step: 320/463, loss: 0.02033514529466629 2023-01-24 04:30:22.118056: step: 322/463, loss: 0.0018072357634082437 2023-01-24 04:30:22.675881: step: 324/463, loss: 0.00702770845964551 2023-01-24 04:30:23.270293: step: 326/463, loss: 0.016376325860619545 2023-01-24 04:30:23.828864: step: 328/463, loss: 0.008864479139447212 2023-01-24 04:30:24.498560: step: 330/463, loss: 0.008615607395768166 2023-01-24 04:30:25.100848: step: 332/463, loss: 0.006125255487859249 2023-01-24 04:30:25.659108: step: 334/463, loss: 0.0017233621329069138 2023-01-24 04:30:26.219031: step: 336/463, loss: 0.014164685271680355 2023-01-24 04:30:26.789757: step: 338/463, loss: 0.035829097032547 2023-01-24 04:30:27.362429: step: 340/463, loss: 0.0013827476650476456 2023-01-24 04:30:27.945397: step: 342/463, loss: 9.287288412451744e-05 2023-01-24 04:30:28.528773: step: 344/463, loss: 0.007758361287415028 2023-01-24 04:30:29.136758: step: 346/463, loss: 0.00985467154532671 2023-01-24 04:30:29.768925: step: 348/463, loss: 0.01450708881020546 2023-01-24 04:30:30.405358: step: 350/463, loss: 0.006206437945365906 2023-01-24 04:30:31.071555: step: 352/463, loss: 0.06110801920294762 2023-01-24 04:30:31.653261: step: 354/463, loss: 0.00960789155215025 2023-01-24 04:30:32.223743: step: 356/463, loss: 0.0004039070045109838 2023-01-24 04:30:32.853847: step: 358/463, loss: 0.03105982020497322 2023-01-24 04:30:33.483332: step: 360/463, loss: 0.0016352217644453049 2023-01-24 04:30:34.151011: step: 362/463, loss: 0.01712382398545742 2023-01-24 04:30:34.739337: step: 364/463, loss: 0.001710421871393919 2023-01-24 04:30:35.355548: step: 366/463, loss: 0.00014441710663959384 2023-01-24 04:30:36.005735: step: 368/463, loss: 0.0017902238760143518 2023-01-24 04:30:36.660477: step: 370/463, loss: 0.05283595621585846 2023-01-24 04:30:37.235093: step: 372/463, loss: 0.40565189719200134 2023-01-24 04:30:37.883701: step: 374/463, loss: 0.010585200041532516 2023-01-24 04:30:38.522979: step: 376/463, loss: 0.024601001292467117 2023-01-24 04:30:39.145838: step: 378/463, loss: 0.0008520757546648383 2023-01-24 04:30:39.836712: step: 380/463, loss: 0.029361318796873093 2023-01-24 04:30:40.434541: step: 382/463, loss: 0.03575719892978668 2023-01-24 04:30:41.048973: step: 384/463, loss: 0.0004181198892183602 2023-01-24 04:30:41.657377: step: 386/463, loss: 0.0009625989478081465 2023-01-24 04:30:42.184220: step: 388/463, loss: 0.0004738946445286274 2023-01-24 04:30:42.829636: step: 390/463, loss: 0.18986430764198303 2023-01-24 04:30:43.400306: step: 392/463, loss: 0.008693217299878597 2023-01-24 04:30:43.997163: step: 394/463, loss: 0.016747457906603813 2023-01-24 04:30:44.567744: step: 396/463, loss: 0.04488628730177879 2023-01-24 04:30:45.179957: step: 398/463, loss: 0.05225371941924095 2023-01-24 04:30:45.816207: step: 400/463, loss: 0.015386187471449375 2023-01-24 04:30:46.458471: step: 402/463, loss: 0.04722699522972107 2023-01-24 04:30:47.079555: step: 404/463, loss: 0.01991620846092701 2023-01-24 04:30:47.618816: step: 406/463, loss: 0.25877058506011963 2023-01-24 04:30:48.169918: step: 408/463, loss: 0.00028566044056788087 2023-01-24 04:30:48.769834: step: 410/463, loss: 0.013780470937490463 2023-01-24 04:30:49.446253: step: 412/463, loss: 0.01295408234000206 2023-01-24 04:30:50.020956: step: 414/463, loss: 0.004343291278928518 2023-01-24 04:30:50.632859: step: 416/463, loss: 0.1888481080532074 2023-01-24 04:30:51.277019: step: 418/463, loss: 0.0010249022161588073 2023-01-24 04:30:51.902373: step: 420/463, loss: 0.00026630135835148394 2023-01-24 04:30:52.532732: step: 422/463, loss: 0.0013685607118532062 2023-01-24 04:30:53.180304: step: 424/463, loss: 0.005707717500627041 2023-01-24 04:30:53.718517: step: 426/463, loss: 0.020445488393306732 2023-01-24 04:30:54.325095: step: 428/463, loss: 0.01968957670032978 2023-01-24 04:30:54.964169: step: 430/463, loss: 0.028790270909667015 2023-01-24 04:30:55.614527: step: 432/463, loss: 0.0002659510064404458 2023-01-24 04:30:56.235999: step: 434/463, loss: 0.0014418431092053652 2023-01-24 04:30:56.844019: step: 436/463, loss: 0.8340014815330505 2023-01-24 04:30:57.453536: step: 438/463, loss: 0.02282402478158474 2023-01-24 04:30:58.089849: step: 440/463, loss: 0.0045057847164571285 2023-01-24 04:30:58.686734: step: 442/463, loss: 0.17495182156562805 2023-01-24 04:30:59.353566: step: 444/463, loss: 0.012013299390673637 2023-01-24 04:31:00.031231: step: 446/463, loss: 0.008535245433449745 2023-01-24 04:31:00.644342: step: 448/463, loss: 0.08370690047740936 2023-01-24 04:31:01.239942: step: 450/463, loss: 0.0005721264751628041 2023-01-24 04:31:01.820932: step: 452/463, loss: 0.0030591320246458054 2023-01-24 04:31:02.470614: step: 454/463, loss: 0.0013202594127506018 2023-01-24 04:31:03.065358: step: 456/463, loss: 0.000509490491822362 2023-01-24 04:31:03.671915: step: 458/463, loss: 0.011060490272939205 2023-01-24 04:31:04.280655: step: 460/463, loss: 0.2408601939678192 2023-01-24 04:31:04.904331: step: 462/463, loss: 0.01825953647494316 2023-01-24 04:31:05.582041: step: 464/463, loss: 0.015857405960559845 2023-01-24 04:31:06.092474: step: 466/463, loss: 0.00021608831593766809 2023-01-24 04:31:06.706359: step: 468/463, loss: 0.001613238942809403 2023-01-24 04:31:07.382385: step: 470/463, loss: 0.00015454103413503617 2023-01-24 04:31:07.954639: step: 472/463, loss: 0.0008052645134739578 2023-01-24 04:31:08.559539: step: 474/463, loss: 0.008413552306592464 2023-01-24 04:31:09.296616: step: 476/463, loss: 0.14710381627082825 2023-01-24 04:31:09.853247: step: 478/463, loss: 0.013674519024789333 2023-01-24 04:31:10.432627: step: 480/463, loss: 2.565800241427496e-05 2023-01-24 04:31:11.053608: step: 482/463, loss: 0.00039630933315493166 2023-01-24 04:31:11.738299: step: 484/463, loss: 0.008131662383675575 2023-01-24 04:31:12.300419: step: 486/463, loss: 0.019274355843663216 2023-01-24 04:31:12.944542: step: 488/463, loss: 0.04618491977453232 2023-01-24 04:31:13.531446: step: 490/463, loss: 0.10491635650396347 2023-01-24 04:31:14.112049: step: 492/463, loss: 0.004210304468870163 2023-01-24 04:31:14.710140: step: 494/463, loss: 0.03869689255952835 2023-01-24 04:31:15.272668: step: 496/463, loss: 0.0026606637984514236 2023-01-24 04:31:15.908918: step: 498/463, loss: 0.011641241610050201 2023-01-24 04:31:16.476309: step: 500/463, loss: 0.0046510822139680386 2023-01-24 04:31:17.098733: step: 502/463, loss: 0.0016396655701100826 2023-01-24 04:31:17.718983: step: 504/463, loss: 0.0276095699518919 2023-01-24 04:31:18.336300: step: 506/463, loss: 0.20731523633003235 2023-01-24 04:31:18.950262: step: 508/463, loss: 0.013937252573668957 2023-01-24 04:31:19.587523: step: 510/463, loss: 0.0021260359790176153 2023-01-24 04:31:20.162718: step: 512/463, loss: 0.002873530611395836 2023-01-24 04:31:20.723594: step: 514/463, loss: 0.009035038761794567 2023-01-24 04:31:21.365901: step: 516/463, loss: 0.007814629934728146 2023-01-24 04:31:22.072728: step: 518/463, loss: 0.0022959900088608265 2023-01-24 04:31:22.666976: step: 520/463, loss: 0.06328759342432022 2023-01-24 04:31:23.263475: step: 522/463, loss: 0.003395489417016506 2023-01-24 04:31:23.875300: step: 524/463, loss: 0.01214947272092104 2023-01-24 04:31:24.518687: step: 526/463, loss: 0.4036450684070587 2023-01-24 04:31:25.122029: step: 528/463, loss: 0.03120172582566738 2023-01-24 04:31:25.754043: step: 530/463, loss: 0.003344734199345112 2023-01-24 04:31:26.309397: step: 532/463, loss: 0.0039536068215966225 2023-01-24 04:31:26.867511: step: 534/463, loss: 0.0061919367872178555 2023-01-24 04:31:27.437561: step: 536/463, loss: 0.06092844530940056 2023-01-24 04:31:28.101610: step: 538/463, loss: 0.25429683923721313 2023-01-24 04:31:28.683443: step: 540/463, loss: 0.01753782108426094 2023-01-24 04:31:29.215969: step: 542/463, loss: 0.0028878471348434687 2023-01-24 04:31:29.826310: step: 544/463, loss: 0.009843531996011734 2023-01-24 04:31:30.422229: step: 546/463, loss: 0.0038583590649068356 2023-01-24 04:31:31.103109: step: 548/463, loss: 0.00310632330365479 2023-01-24 04:31:31.722853: step: 550/463, loss: 0.024325275793671608 2023-01-24 04:31:32.333119: step: 552/463, loss: 0.00021839042892679572 2023-01-24 04:31:32.937528: step: 554/463, loss: 0.032278068363666534 2023-01-24 04:31:33.547647: step: 556/463, loss: 0.01529574766755104 2023-01-24 04:31:34.139593: step: 558/463, loss: 8.867478754837066e-05 2023-01-24 04:31:34.695917: step: 560/463, loss: 0.00016734552627895027 2023-01-24 04:31:35.369973: step: 562/463, loss: 0.007076086942106485 2023-01-24 04:31:35.929447: step: 564/463, loss: 0.019637173041701317 2023-01-24 04:31:36.509272: step: 566/463, loss: 0.0012049266370013356 2023-01-24 04:31:37.099249: step: 568/463, loss: 0.090630903840065 2023-01-24 04:31:37.758298: step: 570/463, loss: 0.0758119598031044 2023-01-24 04:31:38.300860: step: 572/463, loss: 0.01689886301755905 2023-01-24 04:31:38.886860: step: 574/463, loss: 0.09182406216859818 2023-01-24 04:31:39.522952: step: 576/463, loss: 0.00010060193744720891 2023-01-24 04:31:40.157538: step: 578/463, loss: 0.01154128834605217 2023-01-24 04:31:40.738352: step: 580/463, loss: 0.0015627071261405945 2023-01-24 04:31:41.291856: step: 582/463, loss: 0.010611708275973797 2023-01-24 04:31:41.865012: step: 584/463, loss: 0.0022549566347151995 2023-01-24 04:31:42.478057: step: 586/463, loss: 0.0025100510101765394 2023-01-24 04:31:43.103295: step: 588/463, loss: 0.005831606220453978 2023-01-24 04:31:43.701227: step: 590/463, loss: 0.010560649447143078 2023-01-24 04:31:44.297021: step: 592/463, loss: 0.005076198372989893 2023-01-24 04:31:44.896789: step: 594/463, loss: 0.20906363427639008 2023-01-24 04:31:45.487231: step: 596/463, loss: 0.0045676808804273605 2023-01-24 04:31:46.167470: step: 598/463, loss: 0.012653054669499397 2023-01-24 04:31:46.758648: step: 600/463, loss: 3.163800647598691e-05 2023-01-24 04:31:47.372422: step: 602/463, loss: 0.0009759700042195618 2023-01-24 04:31:48.008404: step: 604/463, loss: 0.052249323576688766 2023-01-24 04:31:48.655278: step: 606/463, loss: 0.012429279275238514 2023-01-24 04:31:49.290959: step: 608/463, loss: 0.22805052995681763 2023-01-24 04:31:49.888929: step: 610/463, loss: 0.019878454506397247 2023-01-24 04:31:50.495534: step: 612/463, loss: 0.005001762881875038 2023-01-24 04:31:51.060793: step: 614/463, loss: 0.00046702451072633266 2023-01-24 04:31:51.627010: step: 616/463, loss: 0.0038721521850675344 2023-01-24 04:31:52.206685: step: 618/463, loss: 0.009439660236239433 2023-01-24 04:31:52.798937: step: 620/463, loss: 0.05195963755249977 2023-01-24 04:31:53.413003: step: 622/463, loss: 0.005857803858816624 2023-01-24 04:31:54.034845: step: 624/463, loss: 0.0007752696983516216 2023-01-24 04:31:54.679736: step: 626/463, loss: 0.33164939284324646 2023-01-24 04:31:55.253299: step: 628/463, loss: 0.0017634157557040453 2023-01-24 04:31:55.925833: step: 630/463, loss: 0.0007176612853072584 2023-01-24 04:31:56.483455: step: 632/463, loss: 0.0018757733050733805 2023-01-24 04:31:57.171972: step: 634/463, loss: 0.009576070122420788 2023-01-24 04:31:57.800139: step: 636/463, loss: 0.00731218047440052 2023-01-24 04:31:58.504432: step: 638/463, loss: 0.017544841393828392 2023-01-24 04:31:59.086033: step: 640/463, loss: 0.0024372346233576536 2023-01-24 04:31:59.693743: step: 642/463, loss: 0.0033194420393556356 2023-01-24 04:32:00.268637: step: 644/463, loss: 0.021192798390984535 2023-01-24 04:32:00.889563: step: 646/463, loss: 0.006811568047851324 2023-01-24 04:32:01.491737: step: 648/463, loss: 0.02585878223180771 2023-01-24 04:32:02.104876: step: 650/463, loss: 0.22984056174755096 2023-01-24 04:32:02.755008: step: 652/463, loss: 0.01265551894903183 2023-01-24 04:32:03.366307: step: 654/463, loss: 0.001258872332982719 2023-01-24 04:32:03.991393: step: 656/463, loss: 0.03512102738022804 2023-01-24 04:32:04.602484: step: 658/463, loss: 0.048630088567733765 2023-01-24 04:32:05.234286: step: 660/463, loss: 0.005446343217045069 2023-01-24 04:32:05.821371: step: 662/463, loss: 0.006137234624475241 2023-01-24 04:32:06.448648: step: 664/463, loss: 0.14989976584911346 2023-01-24 04:32:07.041192: step: 666/463, loss: 0.00017364251834806055 2023-01-24 04:32:07.633267: step: 668/463, loss: 0.004856464918702841 2023-01-24 04:32:08.181979: step: 670/463, loss: 0.009872236289083958 2023-01-24 04:32:08.816870: step: 672/463, loss: 0.032780248671770096 2023-01-24 04:32:09.403189: step: 674/463, loss: 0.027597513049840927 2023-01-24 04:32:09.981742: step: 676/463, loss: 0.02038917876780033 2023-01-24 04:32:10.615530: step: 678/463, loss: 0.008558019995689392 2023-01-24 04:32:11.326265: step: 680/463, loss: 0.03265239670872688 2023-01-24 04:32:11.956728: step: 682/463, loss: 0.006859866436570883 2023-01-24 04:32:12.525249: step: 684/463, loss: 0.011900193989276886 2023-01-24 04:32:13.135616: step: 686/463, loss: 0.04232962802052498 2023-01-24 04:32:13.706001: step: 688/463, loss: 0.0061881705187261105 2023-01-24 04:32:14.333232: step: 690/463, loss: 0.16346432268619537 2023-01-24 04:32:14.987030: step: 692/463, loss: 0.01524968259036541 2023-01-24 04:32:15.599232: step: 694/463, loss: 0.040437716990709305 2023-01-24 04:32:16.215550: step: 696/463, loss: 0.014734717085957527 2023-01-24 04:32:16.828819: step: 698/463, loss: 0.024978233501315117 2023-01-24 04:32:17.458255: step: 700/463, loss: 0.001283841673284769 2023-01-24 04:32:18.064605: step: 702/463, loss: 0.024703670293092728 2023-01-24 04:32:18.683000: step: 704/463, loss: 0.500237762928009 2023-01-24 04:32:19.334211: step: 706/463, loss: 0.0014418258797377348 2023-01-24 04:32:19.925485: step: 708/463, loss: 0.015276388265192509 2023-01-24 04:32:20.504949: step: 710/463, loss: 0.0011005710111930966 2023-01-24 04:32:21.095258: step: 712/463, loss: 0.02783939242362976 2023-01-24 04:32:21.682470: step: 714/463, loss: 0.006485824938863516 2023-01-24 04:32:22.262440: step: 716/463, loss: 0.17663221061229706 2023-01-24 04:32:22.883938: step: 718/463, loss: 0.0077606141567230225 2023-01-24 04:32:23.564837: step: 720/463, loss: 0.031156230717897415 2023-01-24 04:32:24.129537: step: 722/463, loss: 0.008044115267693996 2023-01-24 04:32:24.763096: step: 724/463, loss: 0.009697213768959045 2023-01-24 04:32:25.427500: step: 726/463, loss: 0.021997151896357536 2023-01-24 04:32:26.117852: step: 728/463, loss: 0.008571336977183819 2023-01-24 04:32:26.753063: step: 730/463, loss: 0.06611441820859909 2023-01-24 04:32:27.364358: step: 732/463, loss: 0.0679442435503006 2023-01-24 04:32:28.014467: step: 734/463, loss: 0.00897188764065504 2023-01-24 04:32:28.664845: step: 736/463, loss: 0.019472964107990265 2023-01-24 04:32:29.223922: step: 738/463, loss: 0.00036485164309851825 2023-01-24 04:32:29.797970: step: 740/463, loss: 0.009724732488393784 2023-01-24 04:32:30.423370: step: 742/463, loss: 0.019409963861107826 2023-01-24 04:32:31.038593: step: 744/463, loss: 0.002699153730645776 2023-01-24 04:32:31.726064: step: 746/463, loss: 0.0312996469438076 2023-01-24 04:32:32.391070: step: 748/463, loss: 0.07789342850446701 2023-01-24 04:32:33.010453: step: 750/463, loss: 0.03073692135512829 2023-01-24 04:32:33.611571: step: 752/463, loss: 0.11415460705757141 2023-01-24 04:32:34.216249: step: 754/463, loss: 0.0037880348972976208 2023-01-24 04:32:34.851266: step: 756/463, loss: 0.015598006546497345 2023-01-24 04:32:35.532544: step: 758/463, loss: 0.012638247571885586 2023-01-24 04:32:36.159572: step: 760/463, loss: 0.003824661485850811 2023-01-24 04:32:36.814187: step: 762/463, loss: 0.010064736008644104 2023-01-24 04:32:37.396124: step: 764/463, loss: 0.02641286514699459 2023-01-24 04:32:38.000593: step: 766/463, loss: 0.017185913398861885 2023-01-24 04:32:38.609847: step: 768/463, loss: 0.07805419713258743 2023-01-24 04:32:39.201511: step: 770/463, loss: 0.004813907202333212 2023-01-24 04:32:39.802395: step: 772/463, loss: 0.02401822619140148 2023-01-24 04:32:40.412477: step: 774/463, loss: 0.0021618844475597143 2023-01-24 04:32:41.012873: step: 776/463, loss: 0.0506034754216671 2023-01-24 04:32:41.588657: step: 778/463, loss: 0.0011012544855475426 2023-01-24 04:32:42.200269: step: 780/463, loss: 0.00724897300824523 2023-01-24 04:32:42.808159: step: 782/463, loss: 0.021594876423478127 2023-01-24 04:32:43.379969: step: 784/463, loss: 0.002545403316617012 2023-01-24 04:32:43.950364: step: 786/463, loss: 0.008968256413936615 2023-01-24 04:32:44.576034: step: 788/463, loss: 0.00398713955655694 2023-01-24 04:32:45.228140: step: 790/463, loss: 0.0958980843424797 2023-01-24 04:32:45.832133: step: 792/463, loss: 0.2020694762468338 2023-01-24 04:32:46.400197: step: 794/463, loss: 0.004452807828783989 2023-01-24 04:32:46.988827: step: 796/463, loss: 0.0012410454219207168 2023-01-24 04:32:47.593117: step: 798/463, loss: 0.00015894418174866587 2023-01-24 04:32:48.233314: step: 800/463, loss: 0.002304812427610159 2023-01-24 04:32:48.848506: step: 802/463, loss: 0.019447730854153633 2023-01-24 04:32:49.543899: step: 804/463, loss: 0.027198130264878273 2023-01-24 04:32:50.291174: step: 806/463, loss: 0.001086218049749732 2023-01-24 04:32:50.963380: step: 808/463, loss: 0.05643834173679352 2023-01-24 04:32:51.604623: step: 810/463, loss: 0.13401424884796143 2023-01-24 04:32:52.218182: step: 812/463, loss: 0.006456875707954168 2023-01-24 04:32:52.903687: step: 814/463, loss: 0.004524945747107267 2023-01-24 04:32:53.498557: step: 816/463, loss: 0.0010863632196560502 2023-01-24 04:32:54.113449: step: 818/463, loss: 0.09103409945964813 2023-01-24 04:32:54.760419: step: 820/463, loss: 0.00195170973893255 2023-01-24 04:32:55.429352: step: 822/463, loss: 0.002164292149245739 2023-01-24 04:32:56.115410: step: 824/463, loss: 0.010089936666190624 2023-01-24 04:32:56.688140: step: 826/463, loss: 0.08381212502717972 2023-01-24 04:32:57.237100: step: 828/463, loss: 0.01248124334961176 2023-01-24 04:32:57.819918: step: 830/463, loss: 5.2758427045773715e-05 2023-01-24 04:32:58.473309: step: 832/463, loss: 0.019575046375393867 2023-01-24 04:32:59.095131: step: 834/463, loss: 0.016337310895323753 2023-01-24 04:32:59.719520: step: 836/463, loss: 0.21187326312065125 2023-01-24 04:33:00.277146: step: 838/463, loss: 0.0011931948829442263 2023-01-24 04:33:00.947620: step: 840/463, loss: 0.004035073332488537 2023-01-24 04:33:01.543939: step: 842/463, loss: 0.004755925387144089 2023-01-24 04:33:02.217613: step: 844/463, loss: 0.03387362137436867 2023-01-24 04:33:02.806471: step: 846/463, loss: 0.002419437514618039 2023-01-24 04:33:03.431366: step: 848/463, loss: 0.05277043581008911 2023-01-24 04:33:03.994076: step: 850/463, loss: 0.0017022585961967707 2023-01-24 04:33:04.597223: step: 852/463, loss: 0.0021178354509174824 2023-01-24 04:33:05.199476: step: 854/463, loss: 2.7630369663238525 2023-01-24 04:33:05.797362: step: 856/463, loss: 0.019111981615424156 2023-01-24 04:33:06.391055: step: 858/463, loss: 0.0008478990639559925 2023-01-24 04:33:06.994500: step: 860/463, loss: 0.012525713071227074 2023-01-24 04:33:07.538181: step: 862/463, loss: 0.0038202477153390646 2023-01-24 04:33:08.056443: step: 864/463, loss: 0.029214056208729744 2023-01-24 04:33:08.596250: step: 866/463, loss: 0.0018202961655333638 2023-01-24 04:33:09.182314: step: 868/463, loss: 0.0011077336966991425 2023-01-24 04:33:09.853316: step: 870/463, loss: 0.015170770697295666 2023-01-24 04:33:10.448779: step: 872/463, loss: 0.07138610631227493 2023-01-24 04:33:11.065371: step: 874/463, loss: 0.02452378161251545 2023-01-24 04:33:11.602825: step: 876/463, loss: 0.00027142511680722237 2023-01-24 04:33:12.158017: step: 878/463, loss: 0.0003249652509111911 2023-01-24 04:33:12.791168: step: 880/463, loss: 0.0007309382199309766 2023-01-24 04:33:13.346579: step: 882/463, loss: 0.0014013121835887432 2023-01-24 04:33:13.936860: step: 884/463, loss: 0.004780528135597706 2023-01-24 04:33:14.497554: step: 886/463, loss: 0.003030294319614768 2023-01-24 04:33:15.199987: step: 888/463, loss: 0.020917950198054314 2023-01-24 04:33:15.799644: step: 890/463, loss: 0.02841830626130104 2023-01-24 04:33:16.418497: step: 892/463, loss: 0.0020834861788898706 2023-01-24 04:33:17.077231: step: 894/463, loss: 0.00016452171257697046 2023-01-24 04:33:17.733256: step: 896/463, loss: 0.0148799829185009 2023-01-24 04:33:18.369060: step: 898/463, loss: 0.003282776800915599 2023-01-24 04:33:18.971267: step: 900/463, loss: 0.24687686562538147 2023-01-24 04:33:19.494620: step: 902/463, loss: 0.00020793720614165068 2023-01-24 04:33:20.151886: step: 904/463, loss: 0.013971228152513504 2023-01-24 04:33:20.800424: step: 906/463, loss: 0.03148830309510231 2023-01-24 04:33:21.384891: step: 908/463, loss: 0.001749809249304235 2023-01-24 04:33:22.073908: step: 910/463, loss: 0.007790137082338333 2023-01-24 04:33:22.697279: step: 912/463, loss: 0.19922320544719696 2023-01-24 04:33:23.292816: step: 914/463, loss: 0.00036929320776835084 2023-01-24 04:33:23.844174: step: 916/463, loss: 0.0013819461455568671 2023-01-24 04:33:24.420139: step: 918/463, loss: 0.060713041573762894 2023-01-24 04:33:25.010059: step: 920/463, loss: 0.0022731327917426825 2023-01-24 04:33:25.629588: step: 922/463, loss: 0.000688487954903394 2023-01-24 04:33:26.250510: step: 924/463, loss: 0.0033166767098009586 2023-01-24 04:33:26.865764: step: 926/463, loss: 0.0226771030575037 ================================================== Loss: 0.052 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3319014582820576, 'r': 0.3010415503962496, 'f1': 0.315719198127012}, 'combined': 0.23263519861990356, 'epoch': 35} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.37128241588739996, 'r': 0.30551423925115023, 'f1': 0.33520279385525914}, 'combined': 0.23582106100369993, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3340088912133891, 'r': 0.30295303605313095, 'f1': 0.3177238805970149}, 'combined': 0.23411233307148466, 'epoch': 35} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.375364172269317, 'r': 0.30559753292083136, 'f1': 0.33690694827058465}, 'combined': 0.2392039332721151, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35979490056641533, 'r': 0.32292787090685854, 'f1': 0.3403659759358289}, 'combined': 0.25079598226850547, 'epoch': 35} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3897197992100066, 'r': 0.29518044128646226, 'f1': 0.33592530855440494}, 'combined': 0.2385069690736275, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4133333333333333, 'r': 0.2952380952380952, 'f1': 0.3444444444444444}, 'combined': 0.22962962962962957, 'epoch': 35} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34, 'r': 0.3695652173913043, 'f1': 0.3541666666666667}, 'combined': 0.17708333333333334, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3565039176745541, 'r': 0.34365083525364987, 'f1': 0.3499594013114463}, 'combined': 0.25786482201896044, 'epoch': 34} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37789489678955906, 'r': 0.31320721140025465, 'f1': 0.34252364570514954}, 'combined': 0.24319178845065617, 'epoch': 34} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6388888888888888, 'r': 0.19827586206896552, 'f1': 0.3026315789473684}, 'combined': 0.20175438596491227, 'epoch': 34} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:35:58.024713: step: 2/463, loss: 0.00018477975390851498 2023-01-24 04:35:58.601585: step: 4/463, loss: 0.002586573362350464 2023-01-24 04:35:59.161266: step: 6/463, loss: 0.01254162285476923 2023-01-24 04:35:59.840747: step: 8/463, loss: 0.0005560036515817046 2023-01-24 04:36:00.393811: step: 10/463, loss: 3.6719611671287566e-06 2023-01-24 04:36:01.032637: step: 12/463, loss: 0.010653184726834297 2023-01-24 04:36:01.621061: step: 14/463, loss: 0.06212904676795006 2023-01-24 04:36:02.254891: step: 16/463, loss: 0.08517590165138245 2023-01-24 04:36:02.891693: step: 18/463, loss: 0.0006613713339902461 2023-01-24 04:36:03.518543: step: 20/463, loss: 0.024909690022468567 2023-01-24 04:36:04.121292: step: 22/463, loss: 0.027977725490927696 2023-01-24 04:36:04.894168: step: 24/463, loss: 0.0031869355589151382 2023-01-24 04:36:05.493177: step: 26/463, loss: 0.007166872266680002 2023-01-24 04:36:06.096871: step: 28/463, loss: 0.0001139644009526819 2023-01-24 04:36:06.658255: step: 30/463, loss: 0.007633818779140711 2023-01-24 04:36:07.276317: step: 32/463, loss: 0.0008716383599676192 2023-01-24 04:36:07.897162: step: 34/463, loss: 6.9840629294049e-05 2023-01-24 04:36:08.512868: step: 36/463, loss: 0.0006006468320265412 2023-01-24 04:36:09.104442: step: 38/463, loss: 0.02739059552550316 2023-01-24 04:36:09.732368: step: 40/463, loss: 0.00044425096712075174 2023-01-24 04:36:10.329620: step: 42/463, loss: 0.021930716931819916 2023-01-24 04:36:10.863481: step: 44/463, loss: 0.0034758197143673897 2023-01-24 04:36:11.499829: step: 46/463, loss: 0.0358794666826725 2023-01-24 04:36:12.058429: step: 48/463, loss: 0.0013835822464898229 2023-01-24 04:36:12.609535: step: 50/463, loss: 0.0036755732726305723 2023-01-24 04:36:13.210591: step: 52/463, loss: 0.006308030337095261 2023-01-24 04:36:13.796193: step: 54/463, loss: 0.0009722005925141275 2023-01-24 04:36:14.393846: step: 56/463, loss: 0.055911727249622345 2023-01-24 04:36:14.965543: step: 58/463, loss: 0.00021044843015260994 2023-01-24 04:36:15.560889: step: 60/463, loss: 0.012967073358595371 2023-01-24 04:36:16.136519: step: 62/463, loss: 0.02605103887617588 2023-01-24 04:36:16.729402: step: 64/463, loss: 0.20223182439804077 2023-01-24 04:36:17.355752: step: 66/463, loss: 0.03920434042811394 2023-01-24 04:36:18.014307: step: 68/463, loss: 0.009423425421118736 2023-01-24 04:36:18.645961: step: 70/463, loss: 1.7877624034881592 2023-01-24 04:36:19.230736: step: 72/463, loss: 0.0028661396354436874 2023-01-24 04:36:19.886607: step: 74/463, loss: 0.0067941271699965 2023-01-24 04:36:20.496187: step: 76/463, loss: 0.048051148653030396 2023-01-24 04:36:21.177495: step: 78/463, loss: 0.004888043273240328 2023-01-24 04:36:21.702679: step: 80/463, loss: 0.0003228523419238627 2023-01-24 04:36:22.244176: step: 82/463, loss: 0.014602411538362503 2023-01-24 04:36:22.940596: step: 84/463, loss: 0.1567573994398117 2023-01-24 04:36:23.500660: step: 86/463, loss: 0.008259402588009834 2023-01-24 04:36:24.129005: step: 88/463, loss: 0.0006440196884796023 2023-01-24 04:36:24.696103: step: 90/463, loss: 0.003881461452692747 2023-01-24 04:36:25.349075: step: 92/463, loss: 0.009441063739359379 2023-01-24 04:36:25.945563: step: 94/463, loss: 8.461230754619464e-05 2023-01-24 04:36:26.600582: step: 96/463, loss: 0.00022234415519051254 2023-01-24 04:36:27.227526: step: 98/463, loss: 0.0015088652726262808 2023-01-24 04:36:27.851082: step: 100/463, loss: 0.011041313409805298 2023-01-24 04:36:28.420446: step: 102/463, loss: 0.0006771545158699155 2023-01-24 04:36:29.012199: step: 104/463, loss: 0.00222778576426208 2023-01-24 04:36:29.650563: step: 106/463, loss: 0.032126180827617645 2023-01-24 04:36:30.260662: step: 108/463, loss: 0.05877506360411644 2023-01-24 04:36:30.846829: step: 110/463, loss: 0.05099492147564888 2023-01-24 04:36:31.429996: step: 112/463, loss: 2.119067357853055e-05 2023-01-24 04:36:32.141717: step: 114/463, loss: 0.004613206256181002 2023-01-24 04:36:32.699518: step: 116/463, loss: 0.0032659436110407114 2023-01-24 04:36:33.271265: step: 118/463, loss: 0.005213357973843813 2023-01-24 04:36:33.887866: step: 120/463, loss: 1.3453845977783203 2023-01-24 04:36:34.477209: step: 122/463, loss: 0.00042691596900112927 2023-01-24 04:36:35.072185: step: 124/463, loss: 0.0015211913269013166 2023-01-24 04:36:35.686671: step: 126/463, loss: 0.012985821813344955 2023-01-24 04:36:36.335368: step: 128/463, loss: 0.0011606249026954174 2023-01-24 04:36:36.993058: step: 130/463, loss: 0.012957274913787842 2023-01-24 04:36:37.643099: step: 132/463, loss: 0.0050052618607878685 2023-01-24 04:36:38.216215: step: 134/463, loss: 0.06463655084371567 2023-01-24 04:36:38.789308: step: 136/463, loss: 0.0008031946490518749 2023-01-24 04:36:39.431881: step: 138/463, loss: 0.0011434725020080805 2023-01-24 04:36:40.020278: step: 140/463, loss: 0.003745359368622303 2023-01-24 04:36:40.571559: step: 142/463, loss: 0.0015434781089425087 2023-01-24 04:36:41.186955: step: 144/463, loss: 0.02998007833957672 2023-01-24 04:36:41.780978: step: 146/463, loss: 0.0001813816634239629 2023-01-24 04:36:42.403944: step: 148/463, loss: 0.006158629432320595 2023-01-24 04:36:43.039287: step: 150/463, loss: 0.000812047510407865 2023-01-24 04:36:43.654835: step: 152/463, loss: 0.001170398318208754 2023-01-24 04:36:44.244629: step: 154/463, loss: 0.023130126297473907 2023-01-24 04:36:44.834807: step: 156/463, loss: 0.001461489126086235 2023-01-24 04:36:45.414409: step: 158/463, loss: 0.0025803425814956427 2023-01-24 04:36:46.031905: step: 160/463, loss: 0.00015224525122903287 2023-01-24 04:36:46.634030: step: 162/463, loss: 0.0017454553162679076 2023-01-24 04:36:47.302970: step: 164/463, loss: 0.01290972251445055 2023-01-24 04:36:47.928837: step: 166/463, loss: 0.012948507443070412 2023-01-24 04:36:48.500690: step: 168/463, loss: 0.059472475200891495 2023-01-24 04:36:49.086080: step: 170/463, loss: 0.008713082410395145 2023-01-24 04:36:49.654840: step: 172/463, loss: 0.3101966679096222 2023-01-24 04:36:50.259968: step: 174/463, loss: 0.01842559315264225 2023-01-24 04:36:50.818697: step: 176/463, loss: 0.027530785650014877 2023-01-24 04:36:51.398686: step: 178/463, loss: 0.19989630579948425 2023-01-24 04:36:51.991503: step: 180/463, loss: 0.023694457486271858 2023-01-24 04:36:52.595158: step: 182/463, loss: 0.002713697263970971 2023-01-24 04:36:53.221929: step: 184/463, loss: 0.006852052174508572 2023-01-24 04:36:53.847558: step: 186/463, loss: 0.00852872896939516 2023-01-24 04:36:54.418854: step: 188/463, loss: 0.0018891674699261785 2023-01-24 04:36:55.044883: step: 190/463, loss: 0.0029637375846505165 2023-01-24 04:36:55.640998: step: 192/463, loss: 0.009908498264849186 2023-01-24 04:36:56.181053: step: 194/463, loss: 4.746121406555176 2023-01-24 04:36:56.814169: step: 196/463, loss: 0.00039524302701465786 2023-01-24 04:36:57.423437: step: 198/463, loss: 0.006018438376486301 2023-01-24 04:36:58.035649: step: 200/463, loss: 0.07183034718036652 2023-01-24 04:36:58.628777: step: 202/463, loss: 0.006683429703116417 2023-01-24 04:36:59.191704: step: 204/463, loss: 3.561225094017573e-05 2023-01-24 04:36:59.799880: step: 206/463, loss: 0.08714350312948227 2023-01-24 04:37:00.417883: step: 208/463, loss: 0.2385743260383606 2023-01-24 04:37:01.033653: step: 210/463, loss: 0.0047097462229430676 2023-01-24 04:37:01.638050: step: 212/463, loss: 0.009119530208408833 2023-01-24 04:37:02.270761: step: 214/463, loss: 4.668133988161571e-05 2023-01-24 04:37:02.939724: step: 216/463, loss: 0.015168114565312862 2023-01-24 04:37:03.604486: step: 218/463, loss: 0.01135489996522665 2023-01-24 04:37:04.176020: step: 220/463, loss: 0.00355769251473248 2023-01-24 04:37:04.821698: step: 222/463, loss: 0.0008655776618979871 2023-01-24 04:37:05.405496: step: 224/463, loss: 0.01830996200442314 2023-01-24 04:37:06.005418: step: 226/463, loss: 0.0008322811918333173 2023-01-24 04:37:06.592917: step: 228/463, loss: 0.005375024396926165 2023-01-24 04:37:07.161795: step: 230/463, loss: 0.010820578783750534 2023-01-24 04:37:07.732393: step: 232/463, loss: 0.0005042811972089112 2023-01-24 04:37:08.314981: step: 234/463, loss: 0.019505063071846962 2023-01-24 04:37:08.957982: step: 236/463, loss: 0.003925623372197151 2023-01-24 04:37:09.559836: step: 238/463, loss: 0.006843195762485266 2023-01-24 04:37:10.193476: step: 240/463, loss: 0.003378171008080244 2023-01-24 04:37:10.859164: step: 242/463, loss: 0.0008172544767148793 2023-01-24 04:37:11.461573: step: 244/463, loss: 0.0008558965637348592 2023-01-24 04:37:12.044410: step: 246/463, loss: 0.015266952104866505 2023-01-24 04:37:12.668584: step: 248/463, loss: 0.00018558491137810051 2023-01-24 04:37:13.279517: step: 250/463, loss: 0.00020397448679432273 2023-01-24 04:37:13.841985: step: 252/463, loss: 0.0002503470459487289 2023-01-24 04:37:14.431320: step: 254/463, loss: 0.0014185481704771519 2023-01-24 04:37:15.016172: step: 256/463, loss: 0.001482093008235097 2023-01-24 04:37:15.650155: step: 258/463, loss: 0.008275902830064297 2023-01-24 04:37:16.258771: step: 260/463, loss: 0.005905026104301214 2023-01-24 04:37:16.868210: step: 262/463, loss: 0.00762594398111105 2023-01-24 04:37:17.466146: step: 264/463, loss: 0.022281507030129433 2023-01-24 04:37:18.043775: step: 266/463, loss: 0.006242890376597643 2023-01-24 04:37:18.647768: step: 268/463, loss: 0.00939255952835083 2023-01-24 04:37:19.178434: step: 270/463, loss: 0.020396443083882332 2023-01-24 04:37:19.781204: step: 272/463, loss: 0.009728401899337769 2023-01-24 04:37:20.411813: step: 274/463, loss: 0.0002565548929851502 2023-01-24 04:37:21.028656: step: 276/463, loss: 0.004107839427888393 2023-01-24 04:37:21.645696: step: 278/463, loss: 0.02013067901134491 2023-01-24 04:37:22.216224: step: 280/463, loss: 0.00020316324662417173 2023-01-24 04:37:22.840743: step: 282/463, loss: 0.012393047101795673 2023-01-24 04:37:23.427514: step: 284/463, loss: 0.000367345055565238 2023-01-24 04:37:24.069729: step: 286/463, loss: 0.00029526956495828927 2023-01-24 04:37:24.655835: step: 288/463, loss: 0.0093417102470994 2023-01-24 04:37:25.355621: step: 290/463, loss: 0.18198327720165253 2023-01-24 04:37:25.948616: step: 292/463, loss: 0.013032200746238232 2023-01-24 04:37:26.512868: step: 294/463, loss: 0.00010384383494965732 2023-01-24 04:37:27.110437: step: 296/463, loss: 0.005056554451584816 2023-01-24 04:37:27.794820: step: 298/463, loss: 0.006475057452917099 2023-01-24 04:37:28.406765: step: 300/463, loss: 0.044289737939834595 2023-01-24 04:37:29.037987: step: 302/463, loss: 0.0011133004445582628 2023-01-24 04:37:29.632182: step: 304/463, loss: 0.0002841418900061399 2023-01-24 04:37:30.202070: step: 306/463, loss: 0.03293266519904137 2023-01-24 04:37:30.729700: step: 308/463, loss: 5.08477351104375e-05 2023-01-24 04:37:31.297896: step: 310/463, loss: 1.5656491086701863e-05 2023-01-24 04:37:31.865348: step: 312/463, loss: 0.020989621058106422 2023-01-24 04:37:32.521749: step: 314/463, loss: 0.0008404700784012675 2023-01-24 04:37:33.178587: step: 316/463, loss: 0.012840775772929192 2023-01-24 04:37:33.756067: step: 318/463, loss: 5.4942534006841015e-06 2023-01-24 04:37:34.332591: step: 320/463, loss: 0.009245647117495537 2023-01-24 04:37:34.938105: step: 322/463, loss: 0.002585381269454956 2023-01-24 04:37:35.550460: step: 324/463, loss: 0.0030975088011473417 2023-01-24 04:37:36.144792: step: 326/463, loss: 0.018508590757846832 2023-01-24 04:37:36.841871: step: 328/463, loss: 0.06864801049232483 2023-01-24 04:37:37.475730: step: 330/463, loss: 0.039617009460926056 2023-01-24 04:37:38.089413: step: 332/463, loss: 0.0046293162740767 2023-01-24 04:37:38.697047: step: 334/463, loss: 0.07547248154878616 2023-01-24 04:37:39.371293: step: 336/463, loss: 0.012146884575486183 2023-01-24 04:37:39.988659: step: 338/463, loss: 0.2809365391731262 2023-01-24 04:37:40.585536: step: 340/463, loss: 0.07777852565050125 2023-01-24 04:37:41.195833: step: 342/463, loss: 0.0006456745322793722 2023-01-24 04:37:41.875965: step: 344/463, loss: 0.0022031529806554317 2023-01-24 04:37:42.480315: step: 346/463, loss: 0.004379592835903168 2023-01-24 04:37:43.063236: step: 348/463, loss: 0.0016600445378571749 2023-01-24 04:37:43.665249: step: 350/463, loss: 0.040337592363357544 2023-01-24 04:37:44.234271: step: 352/463, loss: 0.010544832795858383 2023-01-24 04:37:44.833721: step: 354/463, loss: 0.0007203896529972553 2023-01-24 04:37:45.471356: step: 356/463, loss: 0.005806326400488615 2023-01-24 04:37:46.106520: step: 358/463, loss: 0.04795518144965172 2023-01-24 04:37:46.641237: step: 360/463, loss: 0.49848684668540955 2023-01-24 04:37:47.193874: step: 362/463, loss: 0.0017186783952638507 2023-01-24 04:37:47.797595: step: 364/463, loss: 0.0618063360452652 2023-01-24 04:37:48.373057: step: 366/463, loss: 0.015333887189626694 2023-01-24 04:37:49.056856: step: 368/463, loss: 0.030861306935548782 2023-01-24 04:37:49.660224: step: 370/463, loss: 0.011862830258905888 2023-01-24 04:37:50.219124: step: 372/463, loss: 0.00023576055536977947 2023-01-24 04:37:50.831410: step: 374/463, loss: 0.03927438706159592 2023-01-24 04:37:51.390437: step: 376/463, loss: 0.012882445938885212 2023-01-24 04:37:51.992638: step: 378/463, loss: 0.014295519329607487 2023-01-24 04:37:52.603868: step: 380/463, loss: 6.978096644161269e-05 2023-01-24 04:37:53.143374: step: 382/463, loss: 0.014438939280807972 2023-01-24 04:37:53.852526: step: 384/463, loss: 0.017670677974820137 2023-01-24 04:37:54.496469: step: 386/463, loss: 0.028395643457770348 2023-01-24 04:37:55.092784: step: 388/463, loss: 0.0077108535915613174 2023-01-24 04:37:55.690882: step: 390/463, loss: 0.0006085914792492986 2023-01-24 04:37:56.226394: step: 392/463, loss: 0.0011646468192338943 2023-01-24 04:37:56.833731: step: 394/463, loss: 0.02546307072043419 2023-01-24 04:37:57.442748: step: 396/463, loss: 0.004215105436742306 2023-01-24 04:37:58.050175: step: 398/463, loss: 0.36631548404693604 2023-01-24 04:37:58.644835: step: 400/463, loss: 7.981716771610081e-05 2023-01-24 04:37:59.192095: step: 402/463, loss: 0.0010229367762804031 2023-01-24 04:37:59.837935: step: 404/463, loss: 1.92062509059906 2023-01-24 04:38:00.411262: step: 406/463, loss: 0.007291493937373161 2023-01-24 04:38:01.059429: step: 408/463, loss: 0.09611520916223526 2023-01-24 04:38:01.648333: step: 410/463, loss: 0.006677689962089062 2023-01-24 04:38:02.258879: step: 412/463, loss: 0.4269252419471741 2023-01-24 04:38:02.947968: step: 414/463, loss: 0.024571621790528297 2023-01-24 04:38:03.594199: step: 416/463, loss: 2.9635182727361098e-05 2023-01-24 04:38:04.214061: step: 418/463, loss: 0.021223362535238266 2023-01-24 04:38:04.861372: step: 420/463, loss: 0.013204047456383705 2023-01-24 04:38:05.446647: step: 422/463, loss: 0.022228669375181198 2023-01-24 04:38:06.061541: step: 424/463, loss: 0.005649225786328316 2023-01-24 04:38:06.668181: step: 426/463, loss: 0.0006842428701929748 2023-01-24 04:38:07.228150: step: 428/463, loss: 0.000252889993134886 2023-01-24 04:38:07.811901: step: 430/463, loss: 0.008655249141156673 2023-01-24 04:38:08.412948: step: 432/463, loss: 0.0026644503232091665 2023-01-24 04:38:08.997227: step: 434/463, loss: 0.00012206823157612234 2023-01-24 04:38:09.597260: step: 436/463, loss: 0.002764018252491951 2023-01-24 04:38:10.227562: step: 438/463, loss: 0.00846618227660656 2023-01-24 04:38:10.775756: step: 440/463, loss: 0.0042661637999117374 2023-01-24 04:38:11.306131: step: 442/463, loss: 0.00772404158487916 2023-01-24 04:38:11.973651: step: 444/463, loss: 0.008310729637742043 2023-01-24 04:38:12.625732: step: 446/463, loss: 0.021370723843574524 2023-01-24 04:38:13.285705: step: 448/463, loss: 0.014346071518957615 2023-01-24 04:38:13.871792: step: 450/463, loss: 0.001953733153641224 2023-01-24 04:38:14.527720: step: 452/463, loss: 0.0007094720494933426 2023-01-24 04:38:15.063364: step: 454/463, loss: 0.0803772434592247 2023-01-24 04:38:15.677051: step: 456/463, loss: 0.0012401383137330413 2023-01-24 04:38:16.280975: step: 458/463, loss: 0.08720210194587708 2023-01-24 04:38:16.851583: step: 460/463, loss: 0.010531988926231861 2023-01-24 04:38:17.467998: step: 462/463, loss: 0.0010227502789348364 2023-01-24 04:38:18.205926: step: 464/463, loss: 0.004808021709322929 2023-01-24 04:38:18.792150: step: 466/463, loss: 0.0013550515286624432 2023-01-24 04:38:19.412925: step: 468/463, loss: 0.009388318285346031 2023-01-24 04:38:20.030018: step: 470/463, loss: 0.038546156138181686 2023-01-24 04:38:20.627020: step: 472/463, loss: 0.04721713066101074 2023-01-24 04:38:21.242028: step: 474/463, loss: 0.0014991264324635267 2023-01-24 04:38:21.910655: step: 476/463, loss: 0.000984358717687428 2023-01-24 04:38:22.592929: step: 478/463, loss: 0.14098307490348816 2023-01-24 04:38:23.185848: step: 480/463, loss: 0.04212009534239769 2023-01-24 04:38:23.841938: step: 482/463, loss: 0.002332093194127083 2023-01-24 04:38:24.696032: step: 484/463, loss: 0.09824497997760773 2023-01-24 04:38:25.311565: step: 486/463, loss: 0.004004096612334251 2023-01-24 04:38:25.911169: step: 488/463, loss: 0.015998590737581253 2023-01-24 04:38:26.490044: step: 490/463, loss: 0.0007407974917441607 2023-01-24 04:38:27.070435: step: 492/463, loss: 0.003984102047979832 2023-01-24 04:38:27.698217: step: 494/463, loss: 0.0003921253082808107 2023-01-24 04:38:28.314982: step: 496/463, loss: 0.020443357527256012 2023-01-24 04:38:28.991178: step: 498/463, loss: 0.019573738798499107 2023-01-24 04:38:29.610934: step: 500/463, loss: 0.0028591034933924675 2023-01-24 04:38:30.262068: step: 502/463, loss: 0.0031713892240077257 2023-01-24 04:38:30.841699: step: 504/463, loss: 0.007966415025293827 2023-01-24 04:38:31.456950: step: 506/463, loss: 0.00025052065029740334 2023-01-24 04:38:32.121935: step: 508/463, loss: 0.03500470519065857 2023-01-24 04:38:32.785415: step: 510/463, loss: 0.003998374100774527 2023-01-24 04:38:33.378194: step: 512/463, loss: 0.0654691532254219 2023-01-24 04:38:33.969663: step: 514/463, loss: 0.005838135723024607 2023-01-24 04:38:34.554277: step: 516/463, loss: 0.005801546387374401 2023-01-24 04:38:35.167332: step: 518/463, loss: 0.04985227808356285 2023-01-24 04:38:35.831274: step: 520/463, loss: 0.014603681862354279 2023-01-24 04:38:36.431432: step: 522/463, loss: 0.006826764438301325 2023-01-24 04:38:37.057272: step: 524/463, loss: 0.004902217071503401 2023-01-24 04:38:37.693619: step: 526/463, loss: 0.0010066617978736758 2023-01-24 04:38:38.334184: step: 528/463, loss: 0.0031879793386906385 2023-01-24 04:38:38.932978: step: 530/463, loss: 0.019291529431939125 2023-01-24 04:38:39.527275: step: 532/463, loss: 0.06669043004512787 2023-01-24 04:38:40.223124: step: 534/463, loss: 0.011341448873281479 2023-01-24 04:38:40.795962: step: 536/463, loss: 0.023887408897280693 2023-01-24 04:38:41.394749: step: 538/463, loss: 0.00034027238143607974 2023-01-24 04:38:42.034922: step: 540/463, loss: 0.0003938330919481814 2023-01-24 04:38:42.650545: step: 542/463, loss: 0.01596011407673359 2023-01-24 04:38:43.238855: step: 544/463, loss: 0.04258929193019867 2023-01-24 04:38:43.778836: step: 546/463, loss: 0.007682508323341608 2023-01-24 04:38:44.344184: step: 548/463, loss: 0.024547051638364792 2023-01-24 04:38:44.983009: step: 550/463, loss: 0.010762165300548077 2023-01-24 04:38:45.610477: step: 552/463, loss: 0.025260981172323227 2023-01-24 04:38:46.211531: step: 554/463, loss: 0.18682165443897247 2023-01-24 04:38:46.833014: step: 556/463, loss: 0.08230628818273544 2023-01-24 04:38:47.368494: step: 558/463, loss: 0.0010263995500281453 2023-01-24 04:38:48.045532: step: 560/463, loss: 0.0006909972871653736 2023-01-24 04:38:48.715943: step: 562/463, loss: 0.001220092992298305 2023-01-24 04:38:49.322492: step: 564/463, loss: 0.00035275923437438905 2023-01-24 04:38:49.932799: step: 566/463, loss: 0.0027179240714758635 2023-01-24 04:38:50.575876: step: 568/463, loss: 0.004040037281811237 2023-01-24 04:38:51.265029: step: 570/463, loss: 0.0043217698112130165 2023-01-24 04:38:51.851868: step: 572/463, loss: 0.0020980723202228546 2023-01-24 04:38:52.429533: step: 574/463, loss: 0.0349947027862072 2023-01-24 04:38:53.112059: step: 576/463, loss: 0.006946204695850611 2023-01-24 04:38:53.670193: step: 578/463, loss: 0.002458976348862052 2023-01-24 04:38:54.239675: step: 580/463, loss: 2.2669863028568216e-05 2023-01-24 04:38:54.838485: step: 582/463, loss: 0.004590875003486872 2023-01-24 04:38:55.443631: step: 584/463, loss: 0.02159574255347252 2023-01-24 04:38:56.022423: step: 586/463, loss: 0.0444759838283062 2023-01-24 04:38:56.649777: step: 588/463, loss: 0.640264630317688 2023-01-24 04:38:57.276303: step: 590/463, loss: 0.01380386296659708 2023-01-24 04:38:57.894493: step: 592/463, loss: 0.00554287526756525 2023-01-24 04:38:58.482016: step: 594/463, loss: 0.02985914796590805 2023-01-24 04:38:59.104031: step: 596/463, loss: 0.03196682780981064 2023-01-24 04:38:59.709793: step: 598/463, loss: 0.03797312080860138 2023-01-24 04:39:00.335768: step: 600/463, loss: 0.013108941726386547 2023-01-24 04:39:00.908729: step: 602/463, loss: 0.0034993926528841257 2023-01-24 04:39:01.590238: step: 604/463, loss: 0.017693879082798958 2023-01-24 04:39:02.182451: step: 606/463, loss: 0.0014404625399038196 2023-01-24 04:39:02.809229: step: 608/463, loss: 0.003694535233080387 2023-01-24 04:39:03.415174: step: 610/463, loss: 4.360695311333984e-05 2023-01-24 04:39:04.149842: step: 612/463, loss: 0.0003853631787933409 2023-01-24 04:39:04.764855: step: 614/463, loss: 0.00024459275300614536 2023-01-24 04:39:05.389824: step: 616/463, loss: 0.028303908184170723 2023-01-24 04:39:06.017621: step: 618/463, loss: 0.0005941252456977963 2023-01-24 04:39:06.644809: step: 620/463, loss: 0.016746044158935547 2023-01-24 04:39:07.238451: step: 622/463, loss: 0.0003903472679667175 2023-01-24 04:39:07.824357: step: 624/463, loss: 0.024911433458328247 2023-01-24 04:39:08.390189: step: 626/463, loss: 0.041521310806274414 2023-01-24 04:39:09.011775: step: 628/463, loss: 0.044566236436367035 2023-01-24 04:39:09.628353: step: 630/463, loss: 0.009688147343695164 2023-01-24 04:39:10.303674: step: 632/463, loss: 0.10730026662349701 2023-01-24 04:39:10.852846: step: 634/463, loss: 0.027705391868948936 2023-01-24 04:39:11.462015: step: 636/463, loss: 0.005960948299616575 2023-01-24 04:39:12.054432: step: 638/463, loss: 0.004408666864037514 2023-01-24 04:39:12.703476: step: 640/463, loss: 0.04632148891687393 2023-01-24 04:39:13.261760: step: 642/463, loss: 0.005062588024884462 2023-01-24 04:39:13.874398: step: 644/463, loss: 0.00033174885902553797 2023-01-24 04:39:14.496342: step: 646/463, loss: 0.00787564367055893 2023-01-24 04:39:15.156894: step: 648/463, loss: 0.07679491490125656 2023-01-24 04:39:15.759611: step: 650/463, loss: 0.017134273424744606 2023-01-24 04:39:16.381655: step: 652/463, loss: 0.00013606299762614071 2023-01-24 04:39:16.975550: step: 654/463, loss: 0.002743704477325082 2023-01-24 04:39:17.562955: step: 656/463, loss: 0.042979560792446136 2023-01-24 04:39:18.147276: step: 658/463, loss: 0.006811094470322132 2023-01-24 04:39:18.733924: step: 660/463, loss: 0.006995844189077616 2023-01-24 04:39:19.296493: step: 662/463, loss: 0.1619366705417633 2023-01-24 04:39:19.947536: step: 664/463, loss: 0.0009379011462442577 2023-01-24 04:39:20.548802: step: 666/463, loss: 0.1486087590456009 2023-01-24 04:39:21.149657: step: 668/463, loss: 0.0006898125284351408 2023-01-24 04:39:21.746385: step: 670/463, loss: 0.0007817331934347749 2023-01-24 04:39:22.323454: step: 672/463, loss: 0.01785426028072834 2023-01-24 04:39:22.985394: step: 674/463, loss: 0.005564748775213957 2023-01-24 04:39:23.612579: step: 676/463, loss: 0.009939735755324364 2023-01-24 04:39:24.203460: step: 678/463, loss: 0.007601333782076836 2023-01-24 04:39:24.793290: step: 680/463, loss: 0.01108794379979372 2023-01-24 04:39:25.402559: step: 682/463, loss: 0.017001325264573097 2023-01-24 04:39:25.957627: step: 684/463, loss: 0.025107501074671745 2023-01-24 04:39:26.557328: step: 686/463, loss: 0.0035102497786283493 2023-01-24 04:39:27.181438: step: 688/463, loss: 0.004108706954866648 2023-01-24 04:39:27.689644: step: 690/463, loss: 0.000315919634886086 2023-01-24 04:39:28.303302: step: 692/463, loss: 0.0011977856047451496 2023-01-24 04:39:28.871767: step: 694/463, loss: 7.98964683781378e-05 2023-01-24 04:39:29.461191: step: 696/463, loss: 0.0005440947134047747 2023-01-24 04:39:30.088840: step: 698/463, loss: 0.034743938595056534 2023-01-24 04:39:30.725738: step: 700/463, loss: 0.006214173045009375 2023-01-24 04:39:31.334020: step: 702/463, loss: 0.00016585469711571932 2023-01-24 04:39:31.904616: step: 704/463, loss: 0.057825587689876556 2023-01-24 04:39:32.533795: step: 706/463, loss: 0.016981564462184906 2023-01-24 04:39:33.121461: step: 708/463, loss: 0.030388062819838524 2023-01-24 04:39:33.716422: step: 710/463, loss: 0.00936510507017374 2023-01-24 04:39:34.294002: step: 712/463, loss: 0.011664063669741154 2023-01-24 04:39:34.873060: step: 714/463, loss: 0.010398375801742077 2023-01-24 04:39:35.467800: step: 716/463, loss: 0.001023126533254981 2023-01-24 04:39:36.045836: step: 718/463, loss: 0.07717477530241013 2023-01-24 04:39:36.534658: step: 720/463, loss: 0.0017669686349108815 2023-01-24 04:39:37.135416: step: 722/463, loss: 0.0054135811515152454 2023-01-24 04:39:37.820196: step: 724/463, loss: 8.840007649268955e-05 2023-01-24 04:39:38.409753: step: 726/463, loss: 0.0014150857459753752 2023-01-24 04:39:38.922936: step: 728/463, loss: 0.0014516629744321108 2023-01-24 04:39:39.548492: step: 730/463, loss: 0.10982697457075119 2023-01-24 04:39:40.200706: step: 732/463, loss: 0.07614944875240326 2023-01-24 04:39:40.736701: step: 734/463, loss: 0.019077781587839127 2023-01-24 04:39:41.337112: step: 736/463, loss: 0.0009775303769856691 2023-01-24 04:39:42.017836: step: 738/463, loss: 0.016003990545868874 2023-01-24 04:39:42.614210: step: 740/463, loss: 0.002546560950577259 2023-01-24 04:39:43.204731: step: 742/463, loss: 0.00016295938985422254 2023-01-24 04:39:43.806879: step: 744/463, loss: 0.0030551485251635313 2023-01-24 04:39:44.401051: step: 746/463, loss: 0.05976440757513046 2023-01-24 04:39:45.075987: step: 748/463, loss: 0.005893585737794638 2023-01-24 04:39:45.713742: step: 750/463, loss: 0.018692543730139732 2023-01-24 04:39:46.328602: step: 752/463, loss: 0.043402936309576035 2023-01-24 04:39:46.977452: step: 754/463, loss: 0.08064614981412888 2023-01-24 04:39:47.582050: step: 756/463, loss: 0.006998608820140362 2023-01-24 04:39:48.224719: step: 758/463, loss: 0.07806695997714996 2023-01-24 04:39:48.878194: step: 760/463, loss: 0.013026420027017593 2023-01-24 04:39:49.437878: step: 762/463, loss: 0.00034567678812891245 2023-01-24 04:39:50.104235: step: 764/463, loss: 0.0004127187130507082 2023-01-24 04:39:50.789850: step: 766/463, loss: 0.026834925636649132 2023-01-24 04:39:51.377043: step: 768/463, loss: 0.0027298880741000175 2023-01-24 04:39:51.955737: step: 770/463, loss: 0.02942761220037937 2023-01-24 04:39:52.580167: step: 772/463, loss: 0.0025796496775001287 2023-01-24 04:39:53.269543: step: 774/463, loss: 0.003045035060495138 2023-01-24 04:39:53.856574: step: 776/463, loss: 0.005927689839154482 2023-01-24 04:39:54.445577: step: 778/463, loss: 0.003598433919250965 2023-01-24 04:39:54.987998: step: 780/463, loss: 0.1727404147386551 2023-01-24 04:39:55.556375: step: 782/463, loss: 4.6617791667813435e-05 2023-01-24 04:39:56.185311: step: 784/463, loss: 0.020961705595254898 2023-01-24 04:39:56.779042: step: 786/463, loss: 0.18897892534732819 2023-01-24 04:39:57.380216: step: 788/463, loss: 0.006640390492975712 2023-01-24 04:39:57.978617: step: 790/463, loss: 0.021562987938523293 2023-01-24 04:39:58.656678: step: 792/463, loss: 0.012017210945487022 2023-01-24 04:39:59.244102: step: 794/463, loss: 0.004005698952823877 2023-01-24 04:39:59.923156: step: 796/463, loss: 0.026791753247380257 2023-01-24 04:40:00.537481: step: 798/463, loss: 0.0022525296080857515 2023-01-24 04:40:01.113069: step: 800/463, loss: 0.004900086671113968 2023-01-24 04:40:01.802910: step: 802/463, loss: 0.09184303879737854 2023-01-24 04:40:02.437649: step: 804/463, loss: 0.009676571935415268 2023-01-24 04:40:03.152188: step: 806/463, loss: 0.0026059786323457956 2023-01-24 04:40:03.743074: step: 808/463, loss: 0.04989391937851906 2023-01-24 04:40:04.278363: step: 810/463, loss: 0.009931264445185661 2023-01-24 04:40:04.897741: step: 812/463, loss: 0.026024105027318 2023-01-24 04:40:05.541536: step: 814/463, loss: 0.0005905491998419166 2023-01-24 04:40:06.198240: step: 816/463, loss: 0.008228331804275513 2023-01-24 04:40:06.831783: step: 818/463, loss: 0.0018181417835876346 2023-01-24 04:40:07.401043: step: 820/463, loss: 0.006443162448704243 2023-01-24 04:40:08.056305: step: 822/463, loss: 0.00018859248666558415 2023-01-24 04:40:08.659943: step: 824/463, loss: 0.0030499440617859364 2023-01-24 04:40:09.341598: step: 826/463, loss: 0.02094387076795101 2023-01-24 04:40:09.912279: step: 828/463, loss: 0.04119623079895973 2023-01-24 04:40:10.498490: step: 830/463, loss: 0.0035908017307519913 2023-01-24 04:40:11.045491: step: 832/463, loss: 0.001964488998055458 2023-01-24 04:40:11.634459: step: 834/463, loss: 0.0008214334375225008 2023-01-24 04:40:12.244944: step: 836/463, loss: 0.04633745923638344 2023-01-24 04:40:12.806217: step: 838/463, loss: 3.5378096072236076e-05 2023-01-24 04:40:13.410609: step: 840/463, loss: 0.00015187349345069379 2023-01-24 04:40:14.122734: step: 842/463, loss: 0.0011101494310423732 2023-01-24 04:40:14.746791: step: 844/463, loss: 0.020022006705403328 2023-01-24 04:40:15.366151: step: 846/463, loss: 0.001741610700264573 2023-01-24 04:40:15.958174: step: 848/463, loss: 0.0032914618495851755 2023-01-24 04:40:16.556477: step: 850/463, loss: 0.011812699027359486 2023-01-24 04:40:17.189787: step: 852/463, loss: 0.08439243584871292 2023-01-24 04:40:17.743707: step: 854/463, loss: 0.0011140432907268405 2023-01-24 04:40:18.415327: step: 856/463, loss: 0.019256513565778732 2023-01-24 04:40:19.068855: step: 858/463, loss: 0.170555979013443 2023-01-24 04:40:19.707218: step: 860/463, loss: 0.014372684061527252 2023-01-24 04:40:20.364148: step: 862/463, loss: 0.005521410144865513 2023-01-24 04:40:20.948398: step: 864/463, loss: 0.0008696223958395422 2023-01-24 04:40:21.598393: step: 866/463, loss: 0.010795632377266884 2023-01-24 04:40:22.226061: step: 868/463, loss: 0.0071609229780733585 2023-01-24 04:40:22.913485: step: 870/463, loss: 0.0010295568499714136 2023-01-24 04:40:23.475173: step: 872/463, loss: 0.6849566102027893 2023-01-24 04:40:24.061766: step: 874/463, loss: 0.000346811197232455 2023-01-24 04:40:24.719051: step: 876/463, loss: 0.027410496026277542 2023-01-24 04:40:25.323704: step: 878/463, loss: 0.0003196878533344716 2023-01-24 04:40:25.973963: step: 880/463, loss: 0.004681325517594814 2023-01-24 04:40:26.597694: step: 882/463, loss: 0.014082439243793488 2023-01-24 04:40:27.159228: step: 884/463, loss: 0.0005842610262334347 2023-01-24 04:40:27.742235: step: 886/463, loss: 0.11893736571073532 2023-01-24 04:40:28.365888: step: 888/463, loss: 0.019334862008690834 2023-01-24 04:40:28.951178: step: 890/463, loss: 1.9350363800185733e-05 2023-01-24 04:40:29.531749: step: 892/463, loss: 0.0038684175815433264 2023-01-24 04:40:30.089938: step: 894/463, loss: 0.23018212616443634 2023-01-24 04:40:30.719052: step: 896/463, loss: 0.05963263288140297 2023-01-24 04:40:31.355119: step: 898/463, loss: 0.015097756870090961 2023-01-24 04:40:32.014231: step: 900/463, loss: 0.001594790956005454 2023-01-24 04:40:32.605866: step: 902/463, loss: 0.005346574354916811 2023-01-24 04:40:33.132558: step: 904/463, loss: 0.00048248874372802675 2023-01-24 04:40:33.727559: step: 906/463, loss: 0.0010599014349281788 2023-01-24 04:40:34.309962: step: 908/463, loss: 0.005658330861479044 2023-01-24 04:40:34.914850: step: 910/463, loss: 0.006876115687191486 2023-01-24 04:40:35.541751: step: 912/463, loss: 0.003231428563594818 2023-01-24 04:40:36.147550: step: 914/463, loss: 0.002709886059165001 2023-01-24 04:40:36.703137: step: 916/463, loss: 0.0007764467154629529 2023-01-24 04:40:37.271418: step: 918/463, loss: 0.001592060667462647 2023-01-24 04:40:37.955160: step: 920/463, loss: 0.0035088066942989826 2023-01-24 04:40:38.564524: step: 922/463, loss: 0.05347345769405365 2023-01-24 04:40:39.180472: step: 924/463, loss: 0.001194766373373568 2023-01-24 04:40:39.737779: step: 926/463, loss: 0.00010651007323758677 ================================================== Loss: 0.047 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32125, 'r': 0.320030834914611, 'f1': 0.3206392585551331}, 'combined': 0.23626050630378226, 'epoch': 36} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.38002154757318957, 'r': 0.31303694669205145, 'f1': 0.3432921922575033}, 'combined': 0.24151209505552998, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3162643403441683, 'r': 0.31386385199240985, 'f1': 0.31505952380952384}, 'combined': 0.23214912280701755, 'epoch': 36} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37447980370599415, 'r': 0.3087987910141051, 'f1': 0.3384824624602243}, 'combined': 0.24032254834675923, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33566391460177586, 'r': 0.3305684472074415, 'f1': 0.3330966953696399}, 'combined': 0.2454396702723662, 'epoch': 36} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38521738278187234, 'r': 0.30649173424828446, 'f1': 0.3413745483602001}, 'combined': 0.24237592933574206, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2703488372093023, 'r': 0.33214285714285713, 'f1': 0.2980769230769231}, 'combined': 0.1987179487179487, 'epoch': 36} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26785714285714285, 'r': 0.32608695652173914, 'f1': 0.29411764705882354}, 'combined': 0.14705882352941177, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 36} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3565039176745541, 'r': 0.34365083525364987, 'f1': 0.3499594013114463}, 'combined': 0.25786482201896044, 'epoch': 34} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37789489678955906, 'r': 0.31320721140025465, 'f1': 0.34252364570514954}, 'combined': 0.24319178845065617, 'epoch': 34} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6388888888888888, 'r': 0.19827586206896552, 'f1': 0.3026315789473684}, 'combined': 0.20175438596491227, 'epoch': 34} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:43:13.258504: step: 2/463, loss: 0.015925686806440353 2023-01-24 04:43:13.831404: step: 4/463, loss: 0.1284734010696411 2023-01-24 04:43:14.471517: step: 6/463, loss: 0.01938653364777565 2023-01-24 04:43:15.005768: step: 8/463, loss: 0.0006967397057451308 2023-01-24 04:43:15.666473: step: 10/463, loss: 0.00036833647754974663 2023-01-24 04:43:16.250799: step: 12/463, loss: 0.00921196024864912 2023-01-24 04:43:16.845957: step: 14/463, loss: 5.457161751110107e-05 2023-01-24 04:43:17.470577: step: 16/463, loss: 2.5349106788635254 2023-01-24 04:43:18.087869: step: 18/463, loss: 0.014386044815182686 2023-01-24 04:43:18.647867: step: 20/463, loss: 0.0058361138217151165 2023-01-24 04:43:19.287857: step: 22/463, loss: 0.04466794431209564 2023-01-24 04:43:19.829222: step: 24/463, loss: 0.014767187647521496 2023-01-24 04:43:20.620674: step: 26/463, loss: 0.009759888052940369 2023-01-24 04:43:21.307136: step: 28/463, loss: 0.10354077816009521 2023-01-24 04:43:21.938447: step: 30/463, loss: 0.00030505500035360456 2023-01-24 04:43:22.505321: step: 32/463, loss: 0.015149957500398159 2023-01-24 04:43:23.150923: step: 34/463, loss: 0.005501517094671726 2023-01-24 04:43:23.765297: step: 36/463, loss: 0.005676018539816141 2023-01-24 04:43:24.396483: step: 38/463, loss: 0.012289422564208508 2023-01-24 04:43:25.073952: step: 40/463, loss: 0.11439238488674164 2023-01-24 04:43:25.717122: step: 42/463, loss: 0.01677403412759304 2023-01-24 04:43:26.390288: step: 44/463, loss: 0.03556295484304428 2023-01-24 04:43:26.976359: step: 46/463, loss: 0.00027532782405614853 2023-01-24 04:43:27.546409: step: 48/463, loss: 0.09015578031539917 2023-01-24 04:43:28.176874: step: 50/463, loss: 0.006938501261174679 2023-01-24 04:43:28.803808: step: 52/463, loss: 0.004936607554554939 2023-01-24 04:43:29.414661: step: 54/463, loss: 0.009095422923564911 2023-01-24 04:43:30.020459: step: 56/463, loss: 0.0012269522994756699 2023-01-24 04:43:30.616898: step: 58/463, loss: 0.002221311442553997 2023-01-24 04:43:31.296763: step: 60/463, loss: 0.008022718131542206 2023-01-24 04:43:31.912871: step: 62/463, loss: 0.18175439536571503 2023-01-24 04:43:32.534002: step: 64/463, loss: 0.010690509341657162 2023-01-24 04:43:33.169260: step: 66/463, loss: 0.005187695845961571 2023-01-24 04:43:33.902073: step: 68/463, loss: 0.0065385811030864716 2023-01-24 04:43:34.482987: step: 70/463, loss: 0.00010391232353867963 2023-01-24 04:43:35.180759: step: 72/463, loss: 0.01977268047630787 2023-01-24 04:43:35.779921: step: 74/463, loss: 0.000464979384560138 2023-01-24 04:43:36.412879: step: 76/463, loss: 5.4299212933983654e-05 2023-01-24 04:43:37.043739: step: 78/463, loss: 0.001807558350265026 2023-01-24 04:43:37.623852: step: 80/463, loss: 0.00015654343587812036 2023-01-24 04:43:38.191525: step: 82/463, loss: 0.002695757197216153 2023-01-24 04:43:38.786896: step: 84/463, loss: 0.0004905984969809651 2023-01-24 04:43:39.397716: step: 86/463, loss: 6.309442687779665e-05 2023-01-24 04:43:39.947733: step: 88/463, loss: 0.0002688544918783009 2023-01-24 04:43:40.626323: step: 90/463, loss: 0.16342560946941376 2023-01-24 04:43:41.289506: step: 92/463, loss: 0.0018617324531078339 2023-01-24 04:43:41.883435: step: 94/463, loss: 0.04855068400502205 2023-01-24 04:43:42.459271: step: 96/463, loss: 0.00017389397544320673 2023-01-24 04:43:43.180762: step: 98/463, loss: 0.014723521657288074 2023-01-24 04:43:43.821347: step: 100/463, loss: 0.0028962327633053064 2023-01-24 04:43:44.409936: step: 102/463, loss: 0.000671829329803586 2023-01-24 04:43:45.039146: step: 104/463, loss: 0.002442521508783102 2023-01-24 04:43:45.699326: step: 106/463, loss: 0.0005844722036272287 2023-01-24 04:43:46.373788: step: 108/463, loss: 0.45019006729125977 2023-01-24 04:43:46.976819: step: 110/463, loss: 0.0014309384860098362 2023-01-24 04:43:47.614911: step: 112/463, loss: 0.00034239585511386395 2023-01-24 04:43:48.163996: step: 114/463, loss: 0.0037274265196174383 2023-01-24 04:43:48.740845: step: 116/463, loss: 0.003234335221350193 2023-01-24 04:43:49.334128: step: 118/463, loss: 0.0004548080323729664 2023-01-24 04:43:49.982628: step: 120/463, loss: 0.0017193283420056105 2023-01-24 04:43:50.609607: step: 122/463, loss: 0.00017995692905969918 2023-01-24 04:43:51.272654: step: 124/463, loss: 0.0038415221497416496 2023-01-24 04:43:51.943827: step: 126/463, loss: 0.00393354007974267 2023-01-24 04:43:52.525807: step: 128/463, loss: 0.0006179808406159282 2023-01-24 04:43:53.166501: step: 130/463, loss: 0.001107672811485827 2023-01-24 04:43:53.713295: step: 132/463, loss: 0.0016522861551493406 2023-01-24 04:43:54.315899: step: 134/463, loss: 0.004426448605954647 2023-01-24 04:43:54.884373: step: 136/463, loss: 0.00022677789092995226 2023-01-24 04:43:55.474192: step: 138/463, loss: 0.002778515685349703 2023-01-24 04:43:56.126258: step: 140/463, loss: 0.0012254421599209309 2023-01-24 04:43:56.706623: step: 142/463, loss: 0.00047467651893384755 2023-01-24 04:43:57.282926: step: 144/463, loss: 0.38797128200531006 2023-01-24 04:43:57.866669: step: 146/463, loss: 0.004194560460746288 2023-01-24 04:43:58.462907: step: 148/463, loss: 0.03994058072566986 2023-01-24 04:43:59.100633: step: 150/463, loss: 0.0035859174095094204 2023-01-24 04:43:59.705183: step: 152/463, loss: 0.03405928611755371 2023-01-24 04:44:00.321537: step: 154/463, loss: 0.0033042747527360916 2023-01-24 04:44:00.893443: step: 156/463, loss: 0.00043253094190731645 2023-01-24 04:44:01.470431: step: 158/463, loss: 0.01293222140520811 2023-01-24 04:44:02.131516: step: 160/463, loss: 0.004078219179064035 2023-01-24 04:44:02.756311: step: 162/463, loss: 7.907477993285283e-05 2023-01-24 04:44:03.385434: step: 164/463, loss: 0.0002572746598161757 2023-01-24 04:44:03.975905: step: 166/463, loss: 0.0017893232870846987 2023-01-24 04:44:04.626468: step: 168/463, loss: 0.00020371029677335173 2023-01-24 04:44:05.300446: step: 170/463, loss: 0.011961580254137516 2023-01-24 04:44:05.929629: step: 172/463, loss: 0.0008393466123379767 2023-01-24 04:44:06.491523: step: 174/463, loss: 0.003518938785418868 2023-01-24 04:44:07.076850: step: 176/463, loss: 1.3841745385434479e-05 2023-01-24 04:44:07.686707: step: 178/463, loss: 0.003632677486166358 2023-01-24 04:44:08.337724: step: 180/463, loss: 0.0005513997166417539 2023-01-24 04:44:08.900652: step: 182/463, loss: 0.0026229764334857464 2023-01-24 04:44:09.481684: step: 184/463, loss: 0.008434385061264038 2023-01-24 04:44:10.040081: step: 186/463, loss: 0.4827212691307068 2023-01-24 04:44:10.583097: step: 188/463, loss: 0.0035966832656413317 2023-01-24 04:44:11.204097: step: 190/463, loss: 0.0018845020094886422 2023-01-24 04:44:11.829964: step: 192/463, loss: 0.002707409206777811 2023-01-24 04:44:12.443596: step: 194/463, loss: 0.0011578865814954042 2023-01-24 04:44:13.062371: step: 196/463, loss: 0.001966217067092657 2023-01-24 04:44:13.586852: step: 198/463, loss: 0.0005960768321529031 2023-01-24 04:44:14.186539: step: 200/463, loss: 0.32067203521728516 2023-01-24 04:44:14.761031: step: 202/463, loss: 0.015540058724582195 2023-01-24 04:44:15.361192: step: 204/463, loss: 0.002846715273335576 2023-01-24 04:44:15.985767: step: 206/463, loss: 0.004527293145656586 2023-01-24 04:44:16.591851: step: 208/463, loss: 0.0106010427698493 2023-01-24 04:44:17.170598: step: 210/463, loss: 0.00639278395101428 2023-01-24 04:44:17.786486: step: 212/463, loss: 0.03609781339764595 2023-01-24 04:44:18.438831: step: 214/463, loss: 0.013216461054980755 2023-01-24 04:44:19.057752: step: 216/463, loss: 0.012052077800035477 2023-01-24 04:44:19.697370: step: 218/463, loss: 0.06721899658441544 2023-01-24 04:44:20.308392: step: 220/463, loss: 0.007395126856863499 2023-01-24 04:44:21.002339: step: 222/463, loss: 0.014693491160869598 2023-01-24 04:44:21.566195: step: 224/463, loss: 0.000899296544957906 2023-01-24 04:44:22.171332: step: 226/463, loss: 0.02759615331888199 2023-01-24 04:44:22.770005: step: 228/463, loss: 0.0966109111905098 2023-01-24 04:44:23.377826: step: 230/463, loss: 0.016918528825044632 2023-01-24 04:44:24.002169: step: 232/463, loss: 0.022550635039806366 2023-01-24 04:44:24.651211: step: 234/463, loss: 0.00030698030604980886 2023-01-24 04:44:25.258843: step: 236/463, loss: 0.02659011259675026 2023-01-24 04:44:25.813897: step: 238/463, loss: 0.02552046626806259 2023-01-24 04:44:26.474360: step: 240/463, loss: 3.1026546955108643 2023-01-24 04:44:27.157406: step: 242/463, loss: 0.013677166774868965 2023-01-24 04:44:27.785004: step: 244/463, loss: 0.13480009138584137 2023-01-24 04:44:28.384532: step: 246/463, loss: 0.016980769112706184 2023-01-24 04:44:29.095284: step: 248/463, loss: 0.0002069400652544573 2023-01-24 04:44:29.747018: step: 250/463, loss: 0.011860392056405544 2023-01-24 04:44:30.326952: step: 252/463, loss: 0.016221310943365097 2023-01-24 04:44:30.882791: step: 254/463, loss: 0.005877818912267685 2023-01-24 04:44:31.466888: step: 256/463, loss: 0.01959020085632801 2023-01-24 04:44:32.121059: step: 258/463, loss: 0.002157642040401697 2023-01-24 04:44:32.749807: step: 260/463, loss: 0.001956917578354478 2023-01-24 04:44:33.347595: step: 262/463, loss: 0.015780068933963776 2023-01-24 04:44:33.987796: step: 264/463, loss: 0.008614979684352875 2023-01-24 04:44:34.638457: step: 266/463, loss: 0.013734927400946617 2023-01-24 04:44:35.212398: step: 268/463, loss: 0.0008622343884781003 2023-01-24 04:44:35.797175: step: 270/463, loss: 0.06798648089170456 2023-01-24 04:44:36.382974: step: 272/463, loss: 0.0009369360050186515 2023-01-24 04:44:37.026731: step: 274/463, loss: 0.035179074853658676 2023-01-24 04:44:37.633458: step: 276/463, loss: 0.02295459620654583 2023-01-24 04:44:38.280721: step: 278/463, loss: 0.0066400207579135895 2023-01-24 04:44:38.863519: step: 280/463, loss: 0.09455142915248871 2023-01-24 04:44:39.604005: step: 282/463, loss: 0.059199314564466476 2023-01-24 04:44:40.252435: step: 284/463, loss: 0.01033067237585783 2023-01-24 04:44:40.838382: step: 286/463, loss: 0.5607481598854065 2023-01-24 04:44:41.397782: step: 288/463, loss: 0.006166654638946056 2023-01-24 04:44:42.021993: step: 290/463, loss: 0.007604492362588644 2023-01-24 04:44:42.662204: step: 292/463, loss: 0.01320961769670248 2023-01-24 04:44:43.276366: step: 294/463, loss: 0.051836952567100525 2023-01-24 04:44:43.883379: step: 296/463, loss: 0.03406181558966637 2023-01-24 04:44:44.490135: step: 298/463, loss: 0.001170820090919733 2023-01-24 04:44:45.078770: step: 300/463, loss: 0.012097871862351894 2023-01-24 04:44:45.632572: step: 302/463, loss: 0.03724971041083336 2023-01-24 04:44:46.244921: step: 304/463, loss: 0.030470777302980423 2023-01-24 04:44:46.829626: step: 306/463, loss: 0.011723075062036514 2023-01-24 04:44:47.425680: step: 308/463, loss: 0.0034146239049732685 2023-01-24 04:44:48.053504: step: 310/463, loss: 0.0016246286686509848 2023-01-24 04:44:48.623038: step: 312/463, loss: 0.0026414867024868727 2023-01-24 04:44:49.263367: step: 314/463, loss: 0.0037011061795055866 2023-01-24 04:44:49.892909: step: 316/463, loss: 0.008898429572582245 2023-01-24 04:44:50.562861: step: 318/463, loss: 0.000204692711122334 2023-01-24 04:44:51.193561: step: 320/463, loss: 0.00016486746608279645 2023-01-24 04:44:51.737527: step: 322/463, loss: 0.0003319421666674316 2023-01-24 04:44:52.331713: step: 324/463, loss: 0.04314819723367691 2023-01-24 04:44:52.884656: step: 326/463, loss: 0.007910536602139473 2023-01-24 04:44:53.498338: step: 328/463, loss: 0.012681147083640099 2023-01-24 04:44:54.094579: step: 330/463, loss: 0.18922637403011322 2023-01-24 04:44:54.724095: step: 332/463, loss: 0.06726793944835663 2023-01-24 04:44:55.269147: step: 334/463, loss: 0.004655348137021065 2023-01-24 04:44:55.842423: step: 336/463, loss: 0.004608979914337397 2023-01-24 04:44:56.416905: step: 338/463, loss: 0.024997835978865623 2023-01-24 04:44:57.046146: step: 340/463, loss: 0.015666527673602104 2023-01-24 04:44:57.608202: step: 342/463, loss: 0.00251259608194232 2023-01-24 04:44:58.190222: step: 344/463, loss: 0.0011087666498497128 2023-01-24 04:44:58.815461: step: 346/463, loss: 0.001653894898481667 2023-01-24 04:44:59.430076: step: 348/463, loss: 0.42081642150878906 2023-01-24 04:45:00.072232: step: 350/463, loss: 0.09854322671890259 2023-01-24 04:45:00.700061: step: 352/463, loss: 0.0139821358025074 2023-01-24 04:45:01.335132: step: 354/463, loss: 0.02077757939696312 2023-01-24 04:45:01.932332: step: 356/463, loss: 0.004520373418927193 2023-01-24 04:45:02.494024: step: 358/463, loss: 0.0021381170954555273 2023-01-24 04:45:03.049351: step: 360/463, loss: 0.011525973677635193 2023-01-24 04:45:03.660327: step: 362/463, loss: 0.00524140102788806 2023-01-24 04:45:04.240447: step: 364/463, loss: 0.002586216200143099 2023-01-24 04:45:04.814107: step: 366/463, loss: 0.016752595081925392 2023-01-24 04:45:05.401252: step: 368/463, loss: 0.00037584544043056667 2023-01-24 04:45:05.933978: step: 370/463, loss: 0.05501309782266617 2023-01-24 04:45:06.507381: step: 372/463, loss: 0.010416027158498764 2023-01-24 04:45:07.075108: step: 374/463, loss: 0.013579539023339748 2023-01-24 04:45:07.701264: step: 376/463, loss: 0.01982123591005802 2023-01-24 04:45:08.360137: step: 378/463, loss: 0.01334003359079361 2023-01-24 04:45:08.976308: step: 380/463, loss: 0.03021981008350849 2023-01-24 04:45:09.541444: step: 382/463, loss: 0.012180507183074951 2023-01-24 04:45:10.106472: step: 384/463, loss: 0.003944627474993467 2023-01-24 04:45:10.729226: step: 386/463, loss: 0.008726648055016994 2023-01-24 04:45:11.303269: step: 388/463, loss: 0.0018098487053066492 2023-01-24 04:45:11.883764: step: 390/463, loss: 0.012420181185007095 2023-01-24 04:45:12.512017: step: 392/463, loss: 0.05934207886457443 2023-01-24 04:45:13.158162: step: 394/463, loss: 0.022653451189398766 2023-01-24 04:45:13.721699: step: 396/463, loss: 0.0001656046079006046 2023-01-24 04:45:14.334336: step: 398/463, loss: 0.0014117197133600712 2023-01-24 04:45:14.961831: step: 400/463, loss: 0.012813691981136799 2023-01-24 04:45:15.527670: step: 402/463, loss: 0.00034777450491674244 2023-01-24 04:45:16.132952: step: 404/463, loss: 0.0010485026286914945 2023-01-24 04:45:16.709920: step: 406/463, loss: 0.00021904638560954481 2023-01-24 04:45:17.251721: step: 408/463, loss: 0.0006512750405818224 2023-01-24 04:45:17.897931: step: 410/463, loss: 0.004915330093353987 2023-01-24 04:45:18.530407: step: 412/463, loss: 0.0008318633772432804 2023-01-24 04:45:19.123289: step: 414/463, loss: 0.0019312752410769463 2023-01-24 04:45:19.807917: step: 416/463, loss: 0.18539226055145264 2023-01-24 04:45:20.436072: step: 418/463, loss: 0.05309020355343819 2023-01-24 04:45:20.995743: step: 420/463, loss: 0.0025338686536997557 2023-01-24 04:45:21.616173: step: 422/463, loss: 0.012110315263271332 2023-01-24 04:45:22.278088: step: 424/463, loss: 0.16335336863994598 2023-01-24 04:45:22.910468: step: 426/463, loss: 0.00011610922956606373 2023-01-24 04:45:23.579880: step: 428/463, loss: 0.009560694918036461 2023-01-24 04:45:24.205585: step: 430/463, loss: 0.04147922620177269 2023-01-24 04:45:24.828471: step: 432/463, loss: 0.02366034686565399 2023-01-24 04:45:25.452835: step: 434/463, loss: 0.021077027544379234 2023-01-24 04:45:26.132072: step: 436/463, loss: 0.03035559505224228 2023-01-24 04:45:26.711694: step: 438/463, loss: 0.007190102711319923 2023-01-24 04:45:27.304034: step: 440/463, loss: 7.001210178714246e-05 2023-01-24 04:45:27.841486: step: 442/463, loss: 0.0017333229770883918 2023-01-24 04:45:28.444847: step: 444/463, loss: 0.0016283588483929634 2023-01-24 04:45:29.050076: step: 446/463, loss: 0.02017482928931713 2023-01-24 04:45:29.697472: step: 448/463, loss: 0.0006280511734075844 2023-01-24 04:45:30.260241: step: 450/463, loss: 0.056460149586200714 2023-01-24 04:45:30.842402: step: 452/463, loss: 0.0014573701191693544 2023-01-24 04:45:31.411075: step: 454/463, loss: 0.0014028857694938779 2023-01-24 04:45:31.971687: step: 456/463, loss: 0.16121962666511536 2023-01-24 04:45:32.551103: step: 458/463, loss: 0.17215877771377563 2023-01-24 04:45:33.142159: step: 460/463, loss: 0.0054393489845097065 2023-01-24 04:45:33.729234: step: 462/463, loss: 0.444570928812027 2023-01-24 04:45:34.411117: step: 464/463, loss: 0.0015637845499441028 2023-01-24 04:45:35.042386: step: 466/463, loss: 0.10342828929424286 2023-01-24 04:45:35.593954: step: 468/463, loss: 0.001159918843768537 2023-01-24 04:45:36.123891: step: 470/463, loss: 0.003532806411385536 2023-01-24 04:45:36.692858: step: 472/463, loss: 0.0076744272373616695 2023-01-24 04:45:37.313996: step: 474/463, loss: 0.00014217857096809894 2023-01-24 04:45:37.878915: step: 476/463, loss: 0.002200124319642782 2023-01-24 04:45:38.551646: step: 478/463, loss: 0.0032019633799791336 2023-01-24 04:45:39.134805: step: 480/463, loss: 0.018552714958786964 2023-01-24 04:45:39.741695: step: 482/463, loss: 0.020878735929727554 2023-01-24 04:45:40.395463: step: 484/463, loss: 0.018028713762760162 2023-01-24 04:45:40.954826: step: 486/463, loss: 0.025008901953697205 2023-01-24 04:45:41.590754: step: 488/463, loss: 0.007390551269054413 2023-01-24 04:45:42.167292: step: 490/463, loss: 0.0012768340529873967 2023-01-24 04:45:42.818568: step: 492/463, loss: 0.0003434084355831146 2023-01-24 04:45:43.463640: step: 494/463, loss: 0.04078767076134682 2023-01-24 04:45:44.105327: step: 496/463, loss: 0.00483100488781929 2023-01-24 04:45:44.701761: step: 498/463, loss: 0.02777959778904915 2023-01-24 04:45:45.325843: step: 500/463, loss: 0.008002035319805145 2023-01-24 04:45:45.885534: step: 502/463, loss: 0.00014723942149430513 2023-01-24 04:45:46.500625: step: 504/463, loss: 0.022711673751473427 2023-01-24 04:45:47.141831: step: 506/463, loss: 0.012040197849273682 2023-01-24 04:45:47.811765: step: 508/463, loss: 0.0013332802336663008 2023-01-24 04:45:48.420994: step: 510/463, loss: 0.010531298816204071 2023-01-24 04:45:48.996666: step: 512/463, loss: 0.0006033892859704792 2023-01-24 04:45:49.590819: step: 514/463, loss: 1.1693079613905866e-05 2023-01-24 04:45:50.143244: step: 516/463, loss: 0.0045654308050870895 2023-01-24 04:45:50.731878: step: 518/463, loss: 0.0008171007502824068 2023-01-24 04:45:51.300993: step: 520/463, loss: 0.03508278355002403 2023-01-24 04:45:51.901331: step: 522/463, loss: 0.0011541288113221526 2023-01-24 04:45:52.424343: step: 524/463, loss: 0.0029247431084513664 2023-01-24 04:45:53.002220: step: 526/463, loss: 0.003104378003627062 2023-01-24 04:45:53.596169: step: 528/463, loss: 0.04946824535727501 2023-01-24 04:45:54.210810: step: 530/463, loss: 0.0030297490302473307 2023-01-24 04:45:54.783745: step: 532/463, loss: 0.006193089764565229 2023-01-24 04:45:55.341151: step: 534/463, loss: 0.0049389139749109745 2023-01-24 04:45:55.910245: step: 536/463, loss: 0.000421927310526371 2023-01-24 04:45:56.456585: step: 538/463, loss: 8.814280590740964e-05 2023-01-24 04:45:57.157371: step: 540/463, loss: 0.00819741003215313 2023-01-24 04:45:57.747644: step: 542/463, loss: 0.0002509946352802217 2023-01-24 04:45:58.448519: step: 544/463, loss: 0.018666474148631096 2023-01-24 04:45:59.194067: step: 546/463, loss: 0.0007343461620621383 2023-01-24 04:45:59.798404: step: 548/463, loss: 0.0583709217607975 2023-01-24 04:46:00.324571: step: 550/463, loss: 0.0001603500422788784 2023-01-24 04:46:00.914155: step: 552/463, loss: 0.000939822755753994 2023-01-24 04:46:01.544825: step: 554/463, loss: 6.452210800489411e-05 2023-01-24 04:46:02.128963: step: 556/463, loss: 0.013769018463790417 2023-01-24 04:46:02.766840: step: 558/463, loss: 0.048532430082559586 2023-01-24 04:46:03.339628: step: 560/463, loss: 0.0027629502583295107 2023-01-24 04:46:04.046917: step: 562/463, loss: 0.20478509366512299 2023-01-24 04:46:04.681729: step: 564/463, loss: 0.02836715616285801 2023-01-24 04:46:05.341326: step: 566/463, loss: 0.020669756457209587 2023-01-24 04:46:05.996564: step: 568/463, loss: 0.01727297157049179 2023-01-24 04:46:06.636644: step: 570/463, loss: 6.225648403167725 2023-01-24 04:46:07.259742: step: 572/463, loss: 0.004517556168138981 2023-01-24 04:46:07.834003: step: 574/463, loss: 0.11431720107793808 2023-01-24 04:46:08.395319: step: 576/463, loss: 0.018515853211283684 2023-01-24 04:46:09.013255: step: 578/463, loss: 0.013264975510537624 2023-01-24 04:46:09.725161: step: 580/463, loss: 0.030864093452692032 2023-01-24 04:46:10.308366: step: 582/463, loss: 0.008248608559370041 2023-01-24 04:46:10.904165: step: 584/463, loss: 0.00031893333652988076 2023-01-24 04:46:11.501370: step: 586/463, loss: 0.0016082542715594172 2023-01-24 04:46:12.050451: step: 588/463, loss: 0.0006486453930847347 2023-01-24 04:46:12.678791: step: 590/463, loss: 0.01677672006189823 2023-01-24 04:46:13.247796: step: 592/463, loss: 0.001418036175891757 2023-01-24 04:46:13.882228: step: 594/463, loss: 0.13934968411922455 2023-01-24 04:46:14.434343: step: 596/463, loss: 0.0006156499148346484 2023-01-24 04:46:15.075300: step: 598/463, loss: 0.003790837014093995 2023-01-24 04:46:15.678297: step: 600/463, loss: 0.5827410221099854 2023-01-24 04:46:16.247512: step: 602/463, loss: 0.0020878047216683626 2023-01-24 04:46:16.867681: step: 604/463, loss: 0.03782229498028755 2023-01-24 04:46:17.471896: step: 606/463, loss: 0.0008112310315482318 2023-01-24 04:46:18.099016: step: 608/463, loss: 0.09476321935653687 2023-01-24 04:46:18.764717: step: 610/463, loss: 3.6237805034033954e-05 2023-01-24 04:46:19.317086: step: 612/463, loss: 7.480369822587818e-05 2023-01-24 04:46:19.882725: step: 614/463, loss: 0.0022156850900501013 2023-01-24 04:46:20.444818: step: 616/463, loss: 2.512464561732486e-05 2023-01-24 04:46:21.055684: step: 618/463, loss: 0.003231775714084506 2023-01-24 04:46:21.704435: step: 620/463, loss: 0.011265301145613194 2023-01-24 04:46:22.349704: step: 622/463, loss: 0.0040325140580534935 2023-01-24 04:46:22.925023: step: 624/463, loss: 0.00043433421524241567 2023-01-24 04:46:23.522423: step: 626/463, loss: 0.01722455769777298 2023-01-24 04:46:24.136339: step: 628/463, loss: 0.00014486753207165748 2023-01-24 04:46:24.730124: step: 630/463, loss: 0.0007838544552214444 2023-01-24 04:46:25.296238: step: 632/463, loss: 0.012911086902022362 2023-01-24 04:46:25.841318: step: 634/463, loss: 0.004618994891643524 2023-01-24 04:46:26.415421: step: 636/463, loss: 0.025439461693167686 2023-01-24 04:46:27.012585: step: 638/463, loss: 0.003372404258698225 2023-01-24 04:46:27.595368: step: 640/463, loss: 0.03195835277438164 2023-01-24 04:46:28.242239: step: 642/463, loss: 0.03204534575343132 2023-01-24 04:46:28.909361: step: 644/463, loss: 0.16264154016971588 2023-01-24 04:46:29.501565: step: 646/463, loss: 0.007832168601453304 2023-01-24 04:46:30.089539: step: 648/463, loss: 0.08645153790712357 2023-01-24 04:46:30.679979: step: 650/463, loss: 0.00018185481894761324 2023-01-24 04:46:31.256362: step: 652/463, loss: 0.025547439232468605 2023-01-24 04:46:31.825313: step: 654/463, loss: 0.0037446673959493637 2023-01-24 04:46:32.391231: step: 656/463, loss: 0.2571095824241638 2023-01-24 04:46:33.009570: step: 658/463, loss: 0.012654936872422695 2023-01-24 04:46:33.674448: step: 660/463, loss: 0.024374214932322502 2023-01-24 04:46:34.260917: step: 662/463, loss: 0.004268074873834848 2023-01-24 04:46:34.884138: step: 664/463, loss: 0.0020136188250035048 2023-01-24 04:46:35.500418: step: 666/463, loss: 0.05725840479135513 2023-01-24 04:46:36.104537: step: 668/463, loss: 8.681348117534071e-05 2023-01-24 04:46:36.673911: step: 670/463, loss: 0.0028000895399600267 2023-01-24 04:46:37.329931: step: 672/463, loss: 0.009713009931147099 2023-01-24 04:46:37.962323: step: 674/463, loss: 0.005357592832297087 2023-01-24 04:46:38.635348: step: 676/463, loss: 0.0007312570814974606 2023-01-24 04:46:39.273057: step: 678/463, loss: 0.022476792335510254 2023-01-24 04:46:39.920688: step: 680/463, loss: 0.008066995069384575 2023-01-24 04:46:40.525154: step: 682/463, loss: 0.0018116147257387638 2023-01-24 04:46:41.126523: step: 684/463, loss: 0.0038374296855181456 2023-01-24 04:46:41.768248: step: 686/463, loss: 0.004355157725512981 2023-01-24 04:46:42.337270: step: 688/463, loss: 0.0019263519207015634 2023-01-24 04:46:43.084166: step: 690/463, loss: 0.0005903561832383275 2023-01-24 04:46:43.689993: step: 692/463, loss: 0.22368377447128296 2023-01-24 04:46:44.297657: step: 694/463, loss: 0.015418034046888351 2023-01-24 04:46:44.939134: step: 696/463, loss: 0.00011264804197708145 2023-01-24 04:46:45.532122: step: 698/463, loss: 0.15834298729896545 2023-01-24 04:46:46.112403: step: 700/463, loss: 0.004373855888843536 2023-01-24 04:46:46.700703: step: 702/463, loss: 0.0008607544004917145 2023-01-24 04:46:47.265770: step: 704/463, loss: 0.011030750349164009 2023-01-24 04:46:47.843168: step: 706/463, loss: 0.0029105967842042446 2023-01-24 04:46:48.437478: step: 708/463, loss: 0.4664709270000458 2023-01-24 04:46:49.054331: step: 710/463, loss: 0.001995642436668277 2023-01-24 04:46:49.684776: step: 712/463, loss: 0.010487283580005169 2023-01-24 04:46:50.310550: step: 714/463, loss: 0.009157008491456509 2023-01-24 04:46:50.885337: step: 716/463, loss: 0.0016009201062843204 2023-01-24 04:46:51.501269: step: 718/463, loss: 0.003716432023793459 2023-01-24 04:46:52.080557: step: 720/463, loss: 0.0014902635011821985 2023-01-24 04:46:52.747533: step: 722/463, loss: 0.009615927003324032 2023-01-24 04:46:53.319178: step: 724/463, loss: 0.006865447387099266 2023-01-24 04:46:53.978785: step: 726/463, loss: 0.0001768609945429489 2023-01-24 04:46:54.609584: step: 728/463, loss: 0.0008733849390409887 2023-01-24 04:46:55.272849: step: 730/463, loss: 0.018005194142460823 2023-01-24 04:46:55.901375: step: 732/463, loss: 0.019280152395367622 2023-01-24 04:46:56.516687: step: 734/463, loss: 0.00034078213502652943 2023-01-24 04:46:57.152007: step: 736/463, loss: 0.015452605672180653 2023-01-24 04:46:57.762999: step: 738/463, loss: 0.315398633480072 2023-01-24 04:46:58.339231: step: 740/463, loss: 0.00023309765674639493 2023-01-24 04:46:59.048576: step: 742/463, loss: 0.0018646889366209507 2023-01-24 04:46:59.587770: step: 744/463, loss: 0.003130537224933505 2023-01-24 04:47:00.195185: step: 746/463, loss: 0.022479839622974396 2023-01-24 04:47:00.802121: step: 748/463, loss: 0.07461729645729065 2023-01-24 04:47:01.407917: step: 750/463, loss: 0.028300816193223 2023-01-24 04:47:01.973297: step: 752/463, loss: 0.027221383526921272 2023-01-24 04:47:02.569028: step: 754/463, loss: 0.00922930333763361 2023-01-24 04:47:03.153643: step: 756/463, loss: 0.04774511605501175 2023-01-24 04:47:03.767588: step: 758/463, loss: 0.001095455139875412 2023-01-24 04:47:04.383623: step: 760/463, loss: 0.015710918232798576 2023-01-24 04:47:04.979806: step: 762/463, loss: 0.04717395827174187 2023-01-24 04:47:05.595276: step: 764/463, loss: 0.016485104337334633 2023-01-24 04:47:06.351328: step: 766/463, loss: 0.00036186681245453656 2023-01-24 04:47:07.016945: step: 768/463, loss: 0.020466728135943413 2023-01-24 04:47:07.657139: step: 770/463, loss: 0.04494362324476242 2023-01-24 04:47:08.288708: step: 772/463, loss: 0.008679266087710857 2023-01-24 04:47:08.884792: step: 774/463, loss: 0.0013050142442807555 2023-01-24 04:47:09.485811: step: 776/463, loss: 0.021732358261942863 2023-01-24 04:47:10.069886: step: 778/463, loss: 0.0001534092443762347 2023-01-24 04:47:10.655031: step: 780/463, loss: 0.014414872974157333 2023-01-24 04:47:11.229368: step: 782/463, loss: 0.021851930767297745 2023-01-24 04:47:11.836669: step: 784/463, loss: 0.013899555429816246 2023-01-24 04:47:12.468975: step: 786/463, loss: 0.006331122480332851 2023-01-24 04:47:13.064946: step: 788/463, loss: 0.007858030498027802 2023-01-24 04:47:13.676879: step: 790/463, loss: 0.0008617473067715764 2023-01-24 04:47:14.290820: step: 792/463, loss: 0.06828677654266357 2023-01-24 04:47:14.889529: step: 794/463, loss: 0.012816148810088634 2023-01-24 04:47:15.457362: step: 796/463, loss: 0.02845672518014908 2023-01-24 04:47:16.055359: step: 798/463, loss: 0.03094632551074028 2023-01-24 04:47:16.671865: step: 800/463, loss: 0.0015459490241482854 2023-01-24 04:47:17.229720: step: 802/463, loss: 0.0016645047580823302 2023-01-24 04:47:17.876147: step: 804/463, loss: 0.015947535634040833 2023-01-24 04:47:18.470005: step: 806/463, loss: 0.0006057674181647599 2023-01-24 04:47:19.038046: step: 808/463, loss: 0.0002544317103456706 2023-01-24 04:47:19.692156: step: 810/463, loss: 0.005968735087662935 2023-01-24 04:47:20.304191: step: 812/463, loss: 0.06271520256996155 2023-01-24 04:47:20.948367: step: 814/463, loss: 0.0009698488865979016 2023-01-24 04:47:21.516233: step: 816/463, loss: 0.006797236390411854 2023-01-24 04:47:22.088574: step: 818/463, loss: 0.059739578515291214 2023-01-24 04:47:22.675588: step: 820/463, loss: 0.00011577414989005774 2023-01-24 04:47:23.300813: step: 822/463, loss: 0.02838192693889141 2023-01-24 04:47:23.895757: step: 824/463, loss: 0.002802395261824131 2023-01-24 04:47:24.480001: step: 826/463, loss: 0.001066510914824903 2023-01-24 04:47:25.116925: step: 828/463, loss: 0.0030788287986069918 2023-01-24 04:47:25.695518: step: 830/463, loss: 0.0027202710043638945 2023-01-24 04:47:26.184290: step: 832/463, loss: 0.002739021321758628 2023-01-24 04:47:26.744171: step: 834/463, loss: 0.0006961432518437505 2023-01-24 04:47:27.367258: step: 836/463, loss: 0.005906557664275169 2023-01-24 04:47:27.950503: step: 838/463, loss: 0.019100988283753395 2023-01-24 04:47:28.596319: step: 840/463, loss: 0.00019169147708453238 2023-01-24 04:47:29.213461: step: 842/463, loss: 0.07370392978191376 2023-01-24 04:47:29.890105: step: 844/463, loss: 0.005135790444910526 2023-01-24 04:47:30.504199: step: 846/463, loss: 0.04300599917769432 2023-01-24 04:47:31.091515: step: 848/463, loss: 0.013606134802103043 2023-01-24 04:47:31.669833: step: 850/463, loss: 0.0028233109042048454 2023-01-24 04:47:32.257764: step: 852/463, loss: 0.0009595666779205203 2023-01-24 04:47:32.818926: step: 854/463, loss: 0.0019446543883532286 2023-01-24 04:47:33.456658: step: 856/463, loss: 0.03213897719979286 2023-01-24 04:47:34.067290: step: 858/463, loss: 0.017805714160203934 2023-01-24 04:47:34.667396: step: 860/463, loss: 0.001443573972210288 2023-01-24 04:47:35.288056: step: 862/463, loss: 1.3227494491729885e-05 2023-01-24 04:47:35.821051: step: 864/463, loss: 0.00019468077516648918 2023-01-24 04:47:36.371259: step: 866/463, loss: 0.000777948647737503 2023-01-24 04:47:36.971950: step: 868/463, loss: 0.19713211059570312 2023-01-24 04:47:37.671871: step: 870/463, loss: 0.013608434237539768 2023-01-24 04:47:38.304828: step: 872/463, loss: 0.0038138467352837324 2023-01-24 04:47:38.894223: step: 874/463, loss: 0.0017072767950594425 2023-01-24 04:47:39.488029: step: 876/463, loss: 0.003981195855885744 2023-01-24 04:47:40.086699: step: 878/463, loss: 0.009307857602834702 2023-01-24 04:47:40.750603: step: 880/463, loss: 0.0042716762982308865 2023-01-24 04:47:41.369547: step: 882/463, loss: 0.0029011103324592113 2023-01-24 04:47:42.047663: step: 884/463, loss: 0.0013269209302961826 2023-01-24 04:47:42.565020: step: 886/463, loss: 6.111144466558471e-05 2023-01-24 04:47:43.184496: step: 888/463, loss: 0.0015109108062461019 2023-01-24 04:47:43.792043: step: 890/463, loss: 0.2621576488018036 2023-01-24 04:47:44.374373: step: 892/463, loss: 0.0005963873118162155 2023-01-24 04:47:45.012428: step: 894/463, loss: 0.011405606754124165 2023-01-24 04:47:45.653451: step: 896/463, loss: 0.0036344542168080807 2023-01-24 04:47:46.305152: step: 898/463, loss: 0.003444177098572254 2023-01-24 04:47:47.038605: step: 900/463, loss: 0.01775944232940674 2023-01-24 04:47:47.626326: step: 902/463, loss: 0.00021660549100488424 2023-01-24 04:47:48.232335: step: 904/463, loss: 0.004816310480237007 2023-01-24 04:47:48.808625: step: 906/463, loss: 0.026279477402567863 2023-01-24 04:47:49.367548: step: 908/463, loss: 5.3743104217574e-05 2023-01-24 04:47:50.032189: step: 910/463, loss: 0.0037809687200933695 2023-01-24 04:47:50.641150: step: 912/463, loss: 0.029408367350697517 2023-01-24 04:47:51.234952: step: 914/463, loss: 0.0034847259521484375 2023-01-24 04:47:51.887847: step: 916/463, loss: 0.014967055059969425 2023-01-24 04:47:52.513977: step: 918/463, loss: 0.005702062975615263 2023-01-24 04:47:53.081214: step: 920/463, loss: 0.020544935017824173 2023-01-24 04:47:53.687517: step: 922/463, loss: 0.18266339600086212 2023-01-24 04:47:54.367137: step: 924/463, loss: 0.2094862163066864 2023-01-24 04:47:54.976239: step: 926/463, loss: 0.015264543704688549 ================================================== Loss: 0.055 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3194635440053884, 'r': 0.3176449659560219, 'f1': 0.31855165948396486}, 'combined': 0.23472227540923724, 'epoch': 37} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.370411068543202, 'r': 0.32512063221477555, 'f1': 0.3462912780334121}, 'combined': 0.2436220046466216, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31122076023391815, 'r': 0.30295303605313095, 'f1': 0.30703125000000003}, 'combined': 0.22623355263157896, 'epoch': 37} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.36484217690067056, 'r': 0.31959537417587125, 'f1': 0.3407231875524884}, 'combined': 0.24191346316226678, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3265663442182957, 'r': 0.325946673735908, 'f1': 0.3262562147366069}, 'combined': 0.2403993161217103, 'epoch': 37} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37505418612716074, 'r': 0.3085017387636873, 'f1': 0.33853811908456427}, 'combined': 0.2403620645500406, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3873456790123456, 'r': 0.2988095238095238, 'f1': 0.33736559139784944}, 'combined': 0.22491039426523296, 'epoch': 37} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2672413793103448, 'r': 0.33695652173913043, 'f1': 0.2980769230769231}, 'combined': 0.14903846153846154, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2727272727272727, 'r': 0.10344827586206896, 'f1': 0.15}, 'combined': 0.09999999999999999, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3565039176745541, 'r': 0.34365083525364987, 'f1': 0.3499594013114463}, 'combined': 0.25786482201896044, 'epoch': 34} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37789489678955906, 'r': 0.31320721140025465, 'f1': 0.34252364570514954}, 'combined': 0.24319178845065617, 'epoch': 34} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6388888888888888, 'r': 0.19827586206896552, 'f1': 0.3026315789473684}, 'combined': 0.20175438596491227, 'epoch': 34} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:50:27.030426: step: 2/463, loss: 0.0012436248362064362 2023-01-24 04:50:27.680729: step: 4/463, loss: 0.009536056779325008 2023-01-24 04:50:28.307617: step: 6/463, loss: 0.0030269906856119633 2023-01-24 04:50:28.937555: step: 8/463, loss: 0.00531970988959074 2023-01-24 04:50:29.548744: step: 10/463, loss: 0.011419760063290596 2023-01-24 04:50:30.131745: step: 12/463, loss: 0.00027438392862677574 2023-01-24 04:50:30.760377: step: 14/463, loss: 0.006549145560711622 2023-01-24 04:50:31.351272: step: 16/463, loss: 0.00679399399086833 2023-01-24 04:50:32.001981: step: 18/463, loss: 0.013158856891095638 2023-01-24 04:50:32.567054: step: 20/463, loss: 0.001251760870218277 2023-01-24 04:50:33.100558: step: 22/463, loss: 0.004471635911613703 2023-01-24 04:50:33.702807: step: 24/463, loss: 0.001364552415907383 2023-01-24 04:50:34.317002: step: 26/463, loss: 0.02498546801507473 2023-01-24 04:50:34.861661: step: 28/463, loss: 0.0021825702860951424 2023-01-24 04:50:35.524365: step: 30/463, loss: 0.040162958204746246 2023-01-24 04:50:36.104212: step: 32/463, loss: 0.00015838131366763264 2023-01-24 04:50:36.719293: step: 34/463, loss: 0.003412335179746151 2023-01-24 04:50:37.314447: step: 36/463, loss: 0.011038684286177158 2023-01-24 04:50:37.983339: step: 38/463, loss: 0.02205650694668293 2023-01-24 04:50:38.621508: step: 40/463, loss: 0.006831469014286995 2023-01-24 04:50:39.214641: step: 42/463, loss: 0.00953890010714531 2023-01-24 04:50:39.869723: step: 44/463, loss: 0.0023999372497200966 2023-01-24 04:50:40.427995: step: 46/463, loss: 0.011095751076936722 2023-01-24 04:50:41.036255: step: 48/463, loss: 0.016051126644015312 2023-01-24 04:50:41.659702: step: 50/463, loss: 0.001529894070699811 2023-01-24 04:50:42.248421: step: 52/463, loss: 0.0017153897788375616 2023-01-24 04:50:42.861470: step: 54/463, loss: 0.013600295409560204 2023-01-24 04:50:43.434866: step: 56/463, loss: 0.00011572736548259854 2023-01-24 04:50:44.059834: step: 58/463, loss: 0.016052721068263054 2023-01-24 04:50:44.705333: step: 60/463, loss: 0.0008754162699915469 2023-01-24 04:50:45.313295: step: 62/463, loss: 0.08315640687942505 2023-01-24 04:50:45.866580: step: 64/463, loss: 0.16951480507850647 2023-01-24 04:50:46.486640: step: 66/463, loss: 0.0036771870218217373 2023-01-24 04:50:47.051689: step: 68/463, loss: 0.0012806517770513892 2023-01-24 04:50:47.672926: step: 70/463, loss: 0.020189383998513222 2023-01-24 04:50:48.246667: step: 72/463, loss: 0.008044351823627949 2023-01-24 04:50:48.838176: step: 74/463, loss: 0.006305080372840166 2023-01-24 04:50:49.492569: step: 76/463, loss: 0.020282555371522903 2023-01-24 04:50:50.080761: step: 78/463, loss: 0.0006125650252215564 2023-01-24 04:50:50.611052: step: 80/463, loss: 0.01552034541964531 2023-01-24 04:50:51.165570: step: 82/463, loss: 0.0009065732010640204 2023-01-24 04:50:51.738624: step: 84/463, loss: 0.0031200575176626444 2023-01-24 04:50:52.364222: step: 86/463, loss: 0.1452089548110962 2023-01-24 04:50:52.978462: step: 88/463, loss: 0.05501601845026016 2023-01-24 04:50:53.562995: step: 90/463, loss: 0.00021862445282749832 2023-01-24 04:50:54.231138: step: 92/463, loss: 0.005238418001681566 2023-01-24 04:50:54.816285: step: 94/463, loss: 1.3463797586155124e-05 2023-01-24 04:50:55.423518: step: 96/463, loss: 0.15899617969989777 2023-01-24 04:50:55.995201: step: 98/463, loss: 0.024052709341049194 2023-01-24 04:50:56.704761: step: 100/463, loss: 0.021613791584968567 2023-01-24 04:50:57.381515: step: 102/463, loss: 0.0321638286113739 2023-01-24 04:50:57.971773: step: 104/463, loss: 1.9976574182510376 2023-01-24 04:50:58.733816: step: 106/463, loss: 0.00013110166764818132 2023-01-24 04:50:59.338030: step: 108/463, loss: 0.004007038660347462 2023-01-24 04:50:59.977972: step: 110/463, loss: 0.008510821498930454 2023-01-24 04:51:00.526374: step: 112/463, loss: 0.00018656335305422544 2023-01-24 04:51:01.114586: step: 114/463, loss: 0.47944924235343933 2023-01-24 04:51:01.696871: step: 116/463, loss: 0.016631120815873146 2023-01-24 04:51:02.241876: step: 118/463, loss: 0.008215327747166157 2023-01-24 04:51:02.877424: step: 120/463, loss: 0.027954351156949997 2023-01-24 04:51:03.400720: step: 122/463, loss: 0.008999533019959927 2023-01-24 04:51:04.049270: step: 124/463, loss: 0.6711862683296204 2023-01-24 04:51:04.760670: step: 126/463, loss: 0.14145392179489136 2023-01-24 04:51:05.326560: step: 128/463, loss: 0.007713482715189457 2023-01-24 04:51:05.915335: step: 130/463, loss: 0.006141917314380407 2023-01-24 04:51:06.574711: step: 132/463, loss: 0.026528455317020416 2023-01-24 04:51:07.213952: step: 134/463, loss: 0.002023426815867424 2023-01-24 04:51:07.841015: step: 136/463, loss: 0.011657417751848698 2023-01-24 04:51:08.403672: step: 138/463, loss: 0.0034805636387318373 2023-01-24 04:51:08.988193: step: 140/463, loss: 0.010879567824304104 2023-01-24 04:51:09.518912: step: 142/463, loss: 0.000863026303704828 2023-01-24 04:51:10.097505: step: 144/463, loss: 0.06926724314689636 2023-01-24 04:51:10.654551: step: 146/463, loss: 0.0015102796023711562 2023-01-24 04:51:11.269769: step: 148/463, loss: 0.004709719680249691 2023-01-24 04:51:11.825183: step: 150/463, loss: 0.006508533842861652 2023-01-24 04:51:12.447390: step: 152/463, loss: 0.14650706946849823 2023-01-24 04:51:13.036064: step: 154/463, loss: 0.016693949699401855 2023-01-24 04:51:13.663762: step: 156/463, loss: 0.06905568391084671 2023-01-24 04:51:14.228792: step: 158/463, loss: 0.0007474282756447792 2023-01-24 04:51:14.782477: step: 160/463, loss: 0.0040927608497440815 2023-01-24 04:51:15.381273: step: 162/463, loss: 0.001862898119725287 2023-01-24 04:51:15.974727: step: 164/463, loss: 0.04858764633536339 2023-01-24 04:51:16.560785: step: 166/463, loss: 0.0016350791556760669 2023-01-24 04:51:17.116989: step: 168/463, loss: 0.002489015692844987 2023-01-24 04:51:17.736726: step: 170/463, loss: 0.0018068765057250857 2023-01-24 04:51:18.306133: step: 172/463, loss: 0.005800067447125912 2023-01-24 04:51:18.888737: step: 174/463, loss: 0.04059125483036041 2023-01-24 04:51:19.582865: step: 176/463, loss: 0.008285727351903915 2023-01-24 04:51:20.196218: step: 178/463, loss: 0.0009633456938900054 2023-01-24 04:51:20.816400: step: 180/463, loss: 0.003748510032892227 2023-01-24 04:51:21.466140: step: 182/463, loss: 0.0009541108156554401 2023-01-24 04:51:22.065792: step: 184/463, loss: 0.0020775371231138706 2023-01-24 04:51:22.672711: step: 186/463, loss: 0.0008627814240753651 2023-01-24 04:51:23.295515: step: 188/463, loss: 0.009890980087220669 2023-01-24 04:51:23.810691: step: 190/463, loss: 0.005685671232640743 2023-01-24 04:51:24.422525: step: 192/463, loss: 0.001466565765440464 2023-01-24 04:51:24.994481: step: 194/463, loss: 0.008458053693175316 2023-01-24 04:51:25.574526: step: 196/463, loss: 0.001899332506582141 2023-01-24 04:51:26.183049: step: 198/463, loss: 0.0009588718530721962 2023-01-24 04:51:26.748362: step: 200/463, loss: 0.010436074808239937 2023-01-24 04:51:27.330952: step: 202/463, loss: 0.02657500095665455 2023-01-24 04:51:27.926416: step: 204/463, loss: 0.0027682885993272066 2023-01-24 04:51:28.523297: step: 206/463, loss: 6.13752199569717e-05 2023-01-24 04:51:29.109410: step: 208/463, loss: 0.00022691786580253392 2023-01-24 04:51:29.676077: step: 210/463, loss: 0.006610346492379904 2023-01-24 04:51:30.293991: step: 212/463, loss: 0.03686845302581787 2023-01-24 04:51:30.886977: step: 214/463, loss: 0.014093350619077682 2023-01-24 04:51:31.447193: step: 216/463, loss: 0.0014373041922226548 2023-01-24 04:51:32.057996: step: 218/463, loss: 0.011702694930136204 2023-01-24 04:51:32.719072: step: 220/463, loss: 0.009712752886116505 2023-01-24 04:51:33.307297: step: 222/463, loss: 0.06862057000398636 2023-01-24 04:51:33.912650: step: 224/463, loss: 1.182347887151991e-06 2023-01-24 04:51:34.593343: step: 226/463, loss: 0.0016625438584014773 2023-01-24 04:51:35.195329: step: 228/463, loss: 0.0012834618100896478 2023-01-24 04:51:35.772920: step: 230/463, loss: 0.6405066251754761 2023-01-24 04:51:36.360735: step: 232/463, loss: 0.003513868199661374 2023-01-24 04:51:37.008402: step: 234/463, loss: 0.054871052503585815 2023-01-24 04:51:37.657162: step: 236/463, loss: 0.000961743644438684 2023-01-24 04:51:38.205114: step: 238/463, loss: 0.008484777063131332 2023-01-24 04:51:38.804935: step: 240/463, loss: 0.42815542221069336 2023-01-24 04:51:39.452155: step: 242/463, loss: 0.07419165968894958 2023-01-24 04:51:40.085986: step: 244/463, loss: 0.05807202309370041 2023-01-24 04:51:40.704073: step: 246/463, loss: 0.0710531622171402 2023-01-24 04:51:41.305114: step: 248/463, loss: 0.0077949282713234425 2023-01-24 04:51:41.924341: step: 250/463, loss: 0.027563894167542458 2023-01-24 04:51:42.538992: step: 252/463, loss: 0.004572470672428608 2023-01-24 04:51:43.338559: step: 254/463, loss: 0.0010085658868774772 2023-01-24 04:51:43.940600: step: 256/463, loss: 0.000302998349070549 2023-01-24 04:51:44.542535: step: 258/463, loss: 0.01577235385775566 2023-01-24 04:51:45.178330: step: 260/463, loss: 0.047634389251470566 2023-01-24 04:51:45.797898: step: 262/463, loss: 0.0002662766491994262 2023-01-24 04:51:46.424893: step: 264/463, loss: 0.022950002923607826 2023-01-24 04:51:46.994436: step: 266/463, loss: 0.002695318777114153 2023-01-24 04:51:47.549060: step: 268/463, loss: 0.0003984082431998104 2023-01-24 04:51:48.153423: step: 270/463, loss: 0.006457486655563116 2023-01-24 04:51:48.809749: step: 272/463, loss: 0.020373571664094925 2023-01-24 04:51:49.441990: step: 274/463, loss: 2.1939616999588907e-05 2023-01-24 04:51:50.009455: step: 276/463, loss: 1.9552831872715615e-05 2023-01-24 04:51:50.613549: step: 278/463, loss: 0.018061438575387 2023-01-24 04:51:51.211919: step: 280/463, loss: 0.1333039551973343 2023-01-24 04:51:51.784891: step: 282/463, loss: 0.005105483345687389 2023-01-24 04:51:52.396528: step: 284/463, loss: 0.0063661374151706696 2023-01-24 04:51:52.951220: step: 286/463, loss: 0.000278616527793929 2023-01-24 04:51:53.591860: step: 288/463, loss: 0.024965964257717133 2023-01-24 04:51:54.252751: step: 290/463, loss: 0.0035448018461465836 2023-01-24 04:51:54.869971: step: 292/463, loss: 0.4215100109577179 2023-01-24 04:51:55.508913: step: 294/463, loss: 0.015466306358575821 2023-01-24 04:51:56.045631: step: 296/463, loss: 0.002688090316951275 2023-01-24 04:51:56.630063: step: 298/463, loss: 0.13854721188545227 2023-01-24 04:51:57.262432: step: 300/463, loss: 0.011023270897567272 2023-01-24 04:51:57.925600: step: 302/463, loss: 0.021014275029301643 2023-01-24 04:51:58.548038: step: 304/463, loss: 0.12189171463251114 2023-01-24 04:51:59.208224: step: 306/463, loss: 0.004628423601388931 2023-01-24 04:51:59.800032: step: 308/463, loss: 0.013151315040886402 2023-01-24 04:52:00.387886: step: 310/463, loss: 0.0024568557273596525 2023-01-24 04:52:00.953146: step: 312/463, loss: 5.861556564923376e-05 2023-01-24 04:52:01.550911: step: 314/463, loss: 0.002008710987865925 2023-01-24 04:52:02.148594: step: 316/463, loss: 0.010991289280354977 2023-01-24 04:52:02.684051: step: 318/463, loss: 0.0015907550696283579 2023-01-24 04:52:03.278719: step: 320/463, loss: 0.001014339504763484 2023-01-24 04:52:03.911604: step: 322/463, loss: 0.005415994208306074 2023-01-24 04:52:04.541746: step: 324/463, loss: 0.0096944160759449 2023-01-24 04:52:05.208756: step: 326/463, loss: 0.022902080789208412 2023-01-24 04:52:05.807918: step: 328/463, loss: 3.4492390155792236 2023-01-24 04:52:06.388030: step: 330/463, loss: 0.004062039777636528 2023-01-24 04:52:07.014202: step: 332/463, loss: 0.0011856693308800459 2023-01-24 04:52:07.679703: step: 334/463, loss: 0.010850033722817898 2023-01-24 04:52:08.315331: step: 336/463, loss: 0.0007012889836914837 2023-01-24 04:52:08.919135: step: 338/463, loss: 0.00310707394964993 2023-01-24 04:52:09.568493: step: 340/463, loss: 8.078169776126742e-05 2023-01-24 04:52:10.184359: step: 342/463, loss: 0.006123453378677368 2023-01-24 04:52:10.740130: step: 344/463, loss: 0.001537897507660091 2023-01-24 04:52:11.354870: step: 346/463, loss: 0.0008189817890524864 2023-01-24 04:52:11.956009: step: 348/463, loss: 0.002744012977927923 2023-01-24 04:52:12.510196: step: 350/463, loss: 0.0010747985215857625 2023-01-24 04:52:13.193483: step: 352/463, loss: 0.00045564997708424926 2023-01-24 04:52:13.791144: step: 354/463, loss: 0.0005814498872496188 2023-01-24 04:52:14.396594: step: 356/463, loss: 0.7390366792678833 2023-01-24 04:52:14.986361: step: 358/463, loss: 0.0004115276678930968 2023-01-24 04:52:15.588387: step: 360/463, loss: 0.0019255608785897493 2023-01-24 04:52:16.214687: step: 362/463, loss: 0.026458287611603737 2023-01-24 04:52:16.840307: step: 364/463, loss: 0.013876879587769508 2023-01-24 04:52:17.402540: step: 366/463, loss: 0.0001612931228009984 2023-01-24 04:52:18.019681: step: 368/463, loss: 0.04030061140656471 2023-01-24 04:52:18.615993: step: 370/463, loss: 0.019329695031046867 2023-01-24 04:52:19.224130: step: 372/463, loss: 0.017482975497841835 2023-01-24 04:52:19.910525: step: 374/463, loss: 0.0036155155394226313 2023-01-24 04:52:20.542257: step: 376/463, loss: 0.04398258030414581 2023-01-24 04:52:21.198524: step: 378/463, loss: 1.8989574527950026e-05 2023-01-24 04:52:21.778730: step: 380/463, loss: 0.039677660912275314 2023-01-24 04:52:22.467873: step: 382/463, loss: 0.0057331351563334465 2023-01-24 04:52:23.108794: step: 384/463, loss: 0.00012831873027607799 2023-01-24 04:52:23.730101: step: 386/463, loss: 0.009099327959120274 2023-01-24 04:52:24.309156: step: 388/463, loss: 0.001493956078775227 2023-01-24 04:52:24.962491: step: 390/463, loss: 0.018317103385925293 2023-01-24 04:52:25.615446: step: 392/463, loss: 0.0030143894255161285 2023-01-24 04:52:26.228931: step: 394/463, loss: 0.017064927145838737 2023-01-24 04:52:26.798587: step: 396/463, loss: 0.004771350417286158 2023-01-24 04:52:27.434871: step: 398/463, loss: 0.0003665891126729548 2023-01-24 04:52:28.100774: step: 400/463, loss: 2.8716200176859275e-05 2023-01-24 04:52:28.672595: step: 402/463, loss: 0.00017528205353301018 2023-01-24 04:52:29.391902: step: 404/463, loss: 0.004325446672737598 2023-01-24 04:52:29.982959: step: 406/463, loss: 0.00030578047153539956 2023-01-24 04:52:30.584456: step: 408/463, loss: 0.006052469834685326 2023-01-24 04:52:31.124930: step: 410/463, loss: 0.006910121068358421 2023-01-24 04:52:31.749690: step: 412/463, loss: 0.0337267704308033 2023-01-24 04:52:32.326769: step: 414/463, loss: 0.0016942259389907122 2023-01-24 04:52:32.933427: step: 416/463, loss: 4.6234159526648e-05 2023-01-24 04:52:33.579104: step: 418/463, loss: 0.00013838573067914695 2023-01-24 04:52:34.197945: step: 420/463, loss: 0.03261140361428261 2023-01-24 04:52:34.825832: step: 422/463, loss: 0.006685355678200722 2023-01-24 04:52:35.410225: step: 424/463, loss: 0.0008961635176092386 2023-01-24 04:52:36.046144: step: 426/463, loss: 0.003007943509146571 2023-01-24 04:52:36.628309: step: 428/463, loss: 0.034754153341054916 2023-01-24 04:52:37.269418: step: 430/463, loss: 0.0077103073708713055 2023-01-24 04:52:37.894783: step: 432/463, loss: 0.009218933992087841 2023-01-24 04:52:38.492003: step: 434/463, loss: 0.0003791186318267137 2023-01-24 04:52:39.081162: step: 436/463, loss: 0.007262576371431351 2023-01-24 04:52:39.640832: step: 438/463, loss: 0.0027627763338387012 2023-01-24 04:52:40.210043: step: 440/463, loss: 0.0007418758468702435 2023-01-24 04:52:40.776881: step: 442/463, loss: 0.025758802890777588 2023-01-24 04:52:41.353761: step: 444/463, loss: 0.001047442201524973 2023-01-24 04:52:41.923484: step: 446/463, loss: 0.4799138903617859 2023-01-24 04:52:42.516234: step: 448/463, loss: 0.012457357719540596 2023-01-24 04:52:43.088251: step: 450/463, loss: 0.002791299019008875 2023-01-24 04:52:43.705026: step: 452/463, loss: 0.005174938123673201 2023-01-24 04:52:44.424792: step: 454/463, loss: 0.0059830849058926105 2023-01-24 04:52:45.028023: step: 456/463, loss: 0.04256100207567215 2023-01-24 04:52:45.670689: step: 458/463, loss: 0.03003384731709957 2023-01-24 04:52:46.260668: step: 460/463, loss: 0.0011640912853181362 2023-01-24 04:52:46.774032: step: 462/463, loss: 0.007371848914772272 2023-01-24 04:52:47.343189: step: 464/463, loss: 0.002341917948797345 2023-01-24 04:52:47.990310: step: 466/463, loss: 0.002676863456144929 2023-01-24 04:52:48.616007: step: 468/463, loss: 0.0007488157716579735 2023-01-24 04:52:49.178191: step: 470/463, loss: 0.0043480475433170795 2023-01-24 04:52:49.781646: step: 472/463, loss: 0.003777798032388091 2023-01-24 04:52:50.416344: step: 474/463, loss: 0.01630716770887375 2023-01-24 04:52:51.031492: step: 476/463, loss: 0.01438457053154707 2023-01-24 04:52:51.593106: step: 478/463, loss: 0.2786991000175476 2023-01-24 04:52:52.169605: step: 480/463, loss: 0.009714446030557156 2023-01-24 04:52:52.725199: step: 482/463, loss: 5.3390169341582805e-05 2023-01-24 04:52:53.237379: step: 484/463, loss: 0.0002385147090535611 2023-01-24 04:52:53.827269: step: 486/463, loss: 0.001468317350372672 2023-01-24 04:52:54.429819: step: 488/463, loss: 0.0033643541391938925 2023-01-24 04:52:55.105079: step: 490/463, loss: 0.000591582793276757 2023-01-24 04:52:55.726974: step: 492/463, loss: 0.021628499031066895 2023-01-24 04:52:56.312075: step: 494/463, loss: 0.0023273890838027 2023-01-24 04:52:56.964750: step: 496/463, loss: 0.0009984226198866963 2023-01-24 04:52:57.590416: step: 498/463, loss: 0.0006210359861142933 2023-01-24 04:52:58.220004: step: 500/463, loss: 0.0026355432346463203 2023-01-24 04:52:58.865383: step: 502/463, loss: 0.08143535256385803 2023-01-24 04:52:59.490629: step: 504/463, loss: 0.008487794548273087 2023-01-24 04:53:00.107506: step: 506/463, loss: 0.017110588029026985 2023-01-24 04:53:00.731766: step: 508/463, loss: 4.756453927257098e-05 2023-01-24 04:53:01.334275: step: 510/463, loss: 4.7229656047420576e-05 2023-01-24 04:53:01.894438: step: 512/463, loss: 0.010466350242495537 2023-01-24 04:53:02.489052: step: 514/463, loss: 0.01220026146620512 2023-01-24 04:53:03.101799: step: 516/463, loss: 0.001194533659145236 2023-01-24 04:53:03.722132: step: 518/463, loss: 0.0004766391939483583 2023-01-24 04:53:04.232458: step: 520/463, loss: 8.050166798057035e-05 2023-01-24 04:53:04.811446: step: 522/463, loss: 0.0013772303937003016 2023-01-24 04:53:05.377026: step: 524/463, loss: 0.036324746906757355 2023-01-24 04:53:05.971204: step: 526/463, loss: 0.005151208024471998 2023-01-24 04:53:06.582629: step: 528/463, loss: 9.46258569456404e-06 2023-01-24 04:53:07.185905: step: 530/463, loss: 0.0027975905686616898 2023-01-24 04:53:07.808939: step: 532/463, loss: 0.0002629204245749861 2023-01-24 04:53:08.437874: step: 534/463, loss: 2.0079292880836874e-05 2023-01-24 04:53:09.023390: step: 536/463, loss: 0.0005018825177103281 2023-01-24 04:53:09.694057: step: 538/463, loss: 0.03579973801970482 2023-01-24 04:53:10.245210: step: 540/463, loss: 0.002988085150718689 2023-01-24 04:53:10.896606: step: 542/463, loss: 0.006085410248488188 2023-01-24 04:53:11.504290: step: 544/463, loss: 0.02955782040953636 2023-01-24 04:53:12.113674: step: 546/463, loss: 0.001141253625974059 2023-01-24 04:53:12.755891: step: 548/463, loss: 0.002093150047585368 2023-01-24 04:53:13.355171: step: 550/463, loss: 0.0001853800640674308 2023-01-24 04:53:14.042115: step: 552/463, loss: 0.0019263820722699165 2023-01-24 04:53:14.690677: step: 554/463, loss: 0.0010252709034830332 2023-01-24 04:53:15.292222: step: 556/463, loss: 0.002662135986611247 2023-01-24 04:53:15.936980: step: 558/463, loss: 0.007904266007244587 2023-01-24 04:53:16.516330: step: 560/463, loss: 0.000580427935346961 2023-01-24 04:53:17.130045: step: 562/463, loss: 0.026549186557531357 2023-01-24 04:53:17.728911: step: 564/463, loss: 0.0010114682372659445 2023-01-24 04:53:18.285009: step: 566/463, loss: 0.004462169948965311 2023-01-24 04:53:18.936741: step: 568/463, loss: 0.036931928247213364 2023-01-24 04:53:19.524318: step: 570/463, loss: 0.000304520275676623 2023-01-24 04:53:20.155902: step: 572/463, loss: 0.0027706429827958345 2023-01-24 04:53:20.704799: step: 574/463, loss: 0.29532575607299805 2023-01-24 04:53:21.314419: step: 576/463, loss: 0.002659860998392105 2023-01-24 04:53:21.914568: step: 578/463, loss: 0.12993448972702026 2023-01-24 04:53:22.560454: step: 580/463, loss: 0.013590283691883087 2023-01-24 04:53:23.289009: step: 582/463, loss: 0.5067077279090881 2023-01-24 04:53:23.900859: step: 584/463, loss: 0.0802796483039856 2023-01-24 04:53:24.546623: step: 586/463, loss: 0.003285561455413699 2023-01-24 04:53:25.164729: step: 588/463, loss: 0.000281439017271623 2023-01-24 04:53:25.788739: step: 590/463, loss: 1.3296985343913548e-05 2023-01-24 04:53:26.334116: step: 592/463, loss: 0.10244598984718323 2023-01-24 04:53:26.984496: step: 594/463, loss: 0.0059324707835912704 2023-01-24 04:53:27.592838: step: 596/463, loss: 6.460685654019471e-06 2023-01-24 04:53:28.206424: step: 598/463, loss: 1.3498259932021028e-06 2023-01-24 04:53:28.814589: step: 600/463, loss: 0.0024578121956437826 2023-01-24 04:53:29.440069: step: 602/463, loss: 0.007364883087575436 2023-01-24 04:53:30.053192: step: 604/463, loss: 0.04647441953420639 2023-01-24 04:53:30.653038: step: 606/463, loss: 0.0013182725524529815 2023-01-24 04:53:31.237842: step: 608/463, loss: 0.024495495483279228 2023-01-24 04:53:31.857884: step: 610/463, loss: 0.005211158189922571 2023-01-24 04:53:32.502771: step: 612/463, loss: 0.0006703778053633869 2023-01-24 04:53:33.101264: step: 614/463, loss: 3.73500952264294e-05 2023-01-24 04:53:33.722383: step: 616/463, loss: 1.0058848857879639 2023-01-24 04:53:34.316556: step: 618/463, loss: 0.00894689466804266 2023-01-24 04:53:34.899457: step: 620/463, loss: 0.00570622319355607 2023-01-24 04:53:35.492553: step: 622/463, loss: 0.005861806217581034 2023-01-24 04:53:36.148933: step: 624/463, loss: 0.01798233762383461 2023-01-24 04:53:36.830806: step: 626/463, loss: 0.005262843798846006 2023-01-24 04:53:37.446375: step: 628/463, loss: 0.0014576544053852558 2023-01-24 04:53:38.066467: step: 630/463, loss: 0.00020095094805583358 2023-01-24 04:53:38.665347: step: 632/463, loss: 0.00045298977056518197 2023-01-24 04:53:39.374885: step: 634/463, loss: 0.0011825087713077664 2023-01-24 04:53:39.988899: step: 636/463, loss: 0.0068921674974262714 2023-01-24 04:53:40.557452: step: 638/463, loss: 0.000646319065708667 2023-01-24 04:53:41.198361: step: 640/463, loss: 0.0013806624338030815 2023-01-24 04:53:41.800703: step: 642/463, loss: 0.2763567566871643 2023-01-24 04:53:42.438011: step: 644/463, loss: 0.02062813751399517 2023-01-24 04:53:43.064546: step: 646/463, loss: 0.0033027585595846176 2023-01-24 04:53:43.628806: step: 648/463, loss: 0.00041194845107384026 2023-01-24 04:53:44.279072: step: 650/463, loss: 0.019701838493347168 2023-01-24 04:53:44.894960: step: 652/463, loss: 0.08705192804336548 2023-01-24 04:53:45.548430: step: 654/463, loss: 5.881166725885123e-05 2023-01-24 04:53:46.174414: step: 656/463, loss: 0.0002002080000238493 2023-01-24 04:53:46.707592: step: 658/463, loss: 0.005479239393025637 2023-01-24 04:53:47.303726: step: 660/463, loss: 0.002251916565001011 2023-01-24 04:53:47.961361: step: 662/463, loss: 0.0013354020193219185 2023-01-24 04:53:48.557551: step: 664/463, loss: 0.01622617058455944 2023-01-24 04:53:49.150919: step: 666/463, loss: 0.11061631888151169 2023-01-24 04:53:49.765591: step: 668/463, loss: 0.03832659497857094 2023-01-24 04:53:50.329467: step: 670/463, loss: 0.000304988119751215 2023-01-24 04:53:50.955422: step: 672/463, loss: 0.030959883704781532 2023-01-24 04:53:51.510344: step: 674/463, loss: 0.0008939355611801147 2023-01-24 04:53:52.199693: step: 676/463, loss: 0.0015217280015349388 2023-01-24 04:53:52.824385: step: 678/463, loss: 0.048933230340480804 2023-01-24 04:53:53.440077: step: 680/463, loss: 0.003050033235922456 2023-01-24 04:53:54.098437: step: 682/463, loss: 0.0009454403771087527 2023-01-24 04:53:54.721599: step: 684/463, loss: 0.0008841116796247661 2023-01-24 04:53:55.276522: step: 686/463, loss: 0.01656637154519558 2023-01-24 04:53:55.818202: step: 688/463, loss: 0.026902878656983376 2023-01-24 04:53:56.342387: step: 690/463, loss: 0.0024852976202964783 2023-01-24 04:53:56.956197: step: 692/463, loss: 0.0064402613788843155 2023-01-24 04:53:57.484474: step: 694/463, loss: 0.006673467345535755 2023-01-24 04:53:58.060106: step: 696/463, loss: 0.011143344454467297 2023-01-24 04:53:58.672327: step: 698/463, loss: 0.011860599741339684 2023-01-24 04:53:59.235391: step: 700/463, loss: 0.01143506821244955 2023-01-24 04:53:59.860223: step: 702/463, loss: 0.00802832841873169 2023-01-24 04:54:00.451744: step: 704/463, loss: 0.04851142689585686 2023-01-24 04:54:01.069896: step: 706/463, loss: 0.010513108223676682 2023-01-24 04:54:01.716955: step: 708/463, loss: 0.004322772845625877 2023-01-24 04:54:02.398540: step: 710/463, loss: 0.002341790124773979 2023-01-24 04:54:03.039348: step: 712/463, loss: 0.00030603326740674675 2023-01-24 04:54:03.629709: step: 714/463, loss: 0.0009508777875453234 2023-01-24 04:54:04.294158: step: 716/463, loss: 0.003192570758983493 2023-01-24 04:54:04.858181: step: 718/463, loss: 0.0001343812618870288 2023-01-24 04:54:05.435508: step: 720/463, loss: 3.262168320361525e-05 2023-01-24 04:54:06.045694: step: 722/463, loss: 0.5415108799934387 2023-01-24 04:54:06.659494: step: 724/463, loss: 0.0003250018635299057 2023-01-24 04:54:07.247823: step: 726/463, loss: 0.016718462109565735 2023-01-24 04:54:07.825053: step: 728/463, loss: 0.003103954019024968 2023-01-24 04:54:08.407003: step: 730/463, loss: 0.004718291573226452 2023-01-24 04:54:08.980482: step: 732/463, loss: 3.248118810006417e-05 2023-01-24 04:54:09.583168: step: 734/463, loss: 8.582418558944482e-06 2023-01-24 04:54:10.149540: step: 736/463, loss: 0.0024089592043310404 2023-01-24 04:54:10.767484: step: 738/463, loss: 0.02409784309566021 2023-01-24 04:54:11.463064: step: 740/463, loss: 0.0018662582151591778 2023-01-24 04:54:12.074492: step: 742/463, loss: 0.0016620608512312174 2023-01-24 04:54:12.665946: step: 744/463, loss: 0.012391077354550362 2023-01-24 04:54:13.291159: step: 746/463, loss: 0.016069089993834496 2023-01-24 04:54:14.041097: step: 748/463, loss: 0.010884617455303669 2023-01-24 04:54:14.616197: step: 750/463, loss: 3.131260018562898e-05 2023-01-24 04:54:15.195106: step: 752/463, loss: 0.0005380554939620197 2023-01-24 04:54:15.757978: step: 754/463, loss: 0.0008161910809576511 2023-01-24 04:54:16.355702: step: 756/463, loss: 0.0315014012157917 2023-01-24 04:54:16.956905: step: 758/463, loss: 3.1650739401811734e-05 2023-01-24 04:54:17.547810: step: 760/463, loss: 0.0009994369465857744 2023-01-24 04:54:18.174132: step: 762/463, loss: 0.014293679036200047 2023-01-24 04:54:18.783792: step: 764/463, loss: 0.01008051447570324 2023-01-24 04:54:19.361671: step: 766/463, loss: 8.05742820375599e-05 2023-01-24 04:54:20.006210: step: 768/463, loss: 0.005147550255060196 2023-01-24 04:54:20.651475: step: 770/463, loss: 0.0011184957111254334 2023-01-24 04:54:21.311196: step: 772/463, loss: 0.042014170438051224 2023-01-24 04:54:21.994454: step: 774/463, loss: 0.0013277383986860514 2023-01-24 04:54:22.540339: step: 776/463, loss: 1.1375059330021031e-05 2023-01-24 04:54:23.169255: step: 778/463, loss: 0.006540678907185793 2023-01-24 04:54:23.742887: step: 780/463, loss: 0.0008439377415925264 2023-01-24 04:54:24.434948: step: 782/463, loss: 0.3813895881175995 2023-01-24 04:54:24.992654: step: 784/463, loss: 2.372957396801212e-06 2023-01-24 04:54:25.626608: step: 786/463, loss: 0.001018224167637527 2023-01-24 04:54:26.189973: step: 788/463, loss: 5.20453104400076e-05 2023-01-24 04:54:26.730209: step: 790/463, loss: 2.885570029320661e-05 2023-01-24 04:54:27.324762: step: 792/463, loss: 0.0027016163803637028 2023-01-24 04:54:27.955152: step: 794/463, loss: 0.023050284013152122 2023-01-24 04:54:28.532637: step: 796/463, loss: 0.01211242750287056 2023-01-24 04:54:29.125708: step: 798/463, loss: 8.883810369297862e-05 2023-01-24 04:54:29.738145: step: 800/463, loss: 5.96759891777765e-05 2023-01-24 04:54:30.380960: step: 802/463, loss: 0.004856074694544077 2023-01-24 04:54:30.997696: step: 804/463, loss: 0.1481492966413498 2023-01-24 04:54:31.682546: step: 806/463, loss: 0.0023376387543976307 2023-01-24 04:54:32.288833: step: 808/463, loss: 0.0012283219257369637 2023-01-24 04:54:32.927532: step: 810/463, loss: 0.013612718321383 2023-01-24 04:54:33.545460: step: 812/463, loss: 0.006563084200024605 2023-01-24 04:54:34.117459: step: 814/463, loss: 0.004529244266450405 2023-01-24 04:54:34.641405: step: 816/463, loss: 0.027440045028924942 2023-01-24 04:54:35.292016: step: 818/463, loss: 0.0020706176292151213 2023-01-24 04:54:35.951713: step: 820/463, loss: 0.002271221950650215 2023-01-24 04:54:36.631540: step: 822/463, loss: 0.4277569651603699 2023-01-24 04:54:37.220799: step: 824/463, loss: 0.0021536508575081825 2023-01-24 04:54:37.769474: step: 826/463, loss: 0.0002154986432287842 2023-01-24 04:54:38.362474: step: 828/463, loss: 0.015550222247838974 2023-01-24 04:54:38.951073: step: 830/463, loss: 0.0030808739829808474 2023-01-24 04:54:39.562316: step: 832/463, loss: 0.030599404126405716 2023-01-24 04:54:40.246596: step: 834/463, loss: 0.12417865544557571 2023-01-24 04:54:40.872241: step: 836/463, loss: 0.028010519221425056 2023-01-24 04:54:41.512877: step: 838/463, loss: 0.006157314870506525 2023-01-24 04:54:42.159464: step: 840/463, loss: 0.01115452591329813 2023-01-24 04:54:42.785243: step: 842/463, loss: 5.826322012580931e-05 2023-01-24 04:54:43.468700: step: 844/463, loss: 0.023668037727475166 2023-01-24 04:54:44.108947: step: 846/463, loss: 0.009101040661334991 2023-01-24 04:54:44.811505: step: 848/463, loss: 0.040552251040935516 2023-01-24 04:54:45.393009: step: 850/463, loss: 0.011479474604129791 2023-01-24 04:54:46.055179: step: 852/463, loss: 0.1683546006679535 2023-01-24 04:54:46.641210: step: 854/463, loss: 0.0005447586299851537 2023-01-24 04:54:47.221843: step: 856/463, loss: 0.012879546731710434 2023-01-24 04:54:47.807427: step: 858/463, loss: 0.009412097744643688 2023-01-24 04:54:48.428587: step: 860/463, loss: 0.0013936893083155155 2023-01-24 04:54:49.041158: step: 862/463, loss: 0.0001515495969215408 2023-01-24 04:54:49.692139: step: 864/463, loss: 0.000497212226036936 2023-01-24 04:54:50.290016: step: 866/463, loss: 0.01581425778567791 2023-01-24 04:54:50.876390: step: 868/463, loss: 0.007990370504558086 2023-01-24 04:54:51.477052: step: 870/463, loss: 0.0020016953349113464 2023-01-24 04:54:52.169761: step: 872/463, loss: 0.048861630260944366 2023-01-24 04:54:52.818604: step: 874/463, loss: 0.027013525366783142 2023-01-24 04:54:53.468255: step: 876/463, loss: 0.006637623533606529 2023-01-24 04:54:54.070441: step: 878/463, loss: 0.0023588091135025024 2023-01-24 04:54:54.664048: step: 880/463, loss: 1.8379653283773223e-06 2023-01-24 04:54:55.254369: step: 882/463, loss: 0.00465978030115366 2023-01-24 04:54:55.866776: step: 884/463, loss: 0.0015332743059843779 2023-01-24 04:54:56.468148: step: 886/463, loss: 0.0014408087590709329 2023-01-24 04:54:57.136314: step: 888/463, loss: 0.0009952731197699904 2023-01-24 04:54:57.723100: step: 890/463, loss: 0.015238079242408276 2023-01-24 04:54:58.333111: step: 892/463, loss: 0.0023763244971632957 2023-01-24 04:54:58.926831: step: 894/463, loss: 0.23238185048103333 2023-01-24 04:54:59.534603: step: 896/463, loss: 0.00034790142672136426 2023-01-24 04:55:00.139431: step: 898/463, loss: 0.00016745791072025895 2023-01-24 04:55:00.708632: step: 900/463, loss: 3.866072802338749e-05 2023-01-24 04:55:01.355204: step: 902/463, loss: 0.03962722048163414 2023-01-24 04:55:01.940406: step: 904/463, loss: 0.009928912855684757 2023-01-24 04:55:02.513816: step: 906/463, loss: 7.201062544481829e-05 2023-01-24 04:55:03.096627: step: 908/463, loss: 0.00030734733445569873 2023-01-24 04:55:03.725343: step: 910/463, loss: 0.0022943688090890646 2023-01-24 04:55:04.391185: step: 912/463, loss: 0.04443220794200897 2023-01-24 04:55:04.930518: step: 914/463, loss: 0.025414040312170982 2023-01-24 04:55:05.546485: step: 916/463, loss: 0.007347363047301769 2023-01-24 04:55:06.179913: step: 918/463, loss: 0.0052402387373149395 2023-01-24 04:55:06.747803: step: 920/463, loss: 0.0025292327627539635 2023-01-24 04:55:07.369617: step: 922/463, loss: 0.00011297204036964104 2023-01-24 04:55:08.004698: step: 924/463, loss: 0.000854483456350863 2023-01-24 04:55:08.615438: step: 926/463, loss: 0.05471451207995415 ================================================== Loss: 0.042 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32587675350701406, 'r': 0.30856261859582546, 'f1': 0.3169834307992203}, 'combined': 0.23356673848363602, 'epoch': 38} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3812140345268542, 'r': 0.3060356996201622, 'f1': 0.33951298331530094}, 'combined': 0.23885335509619163, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3258546109240553, 'r': 0.30050349318613073, 'f1': 0.31266602351251904}, 'combined': 0.23038549100922454, 'epoch': 38} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.38409105867881177, 'r': 0.30666956255769, 'f1': 0.34104155137419967}, 'combined': 0.24213950147568175, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.341658215010142, 'r': 0.3196157495256167, 'f1': 0.33026960784313725}, 'combined': 0.2433565531475748, 'epoch': 38} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3891060791835337, 'r': 0.2864772268573964, 'f1': 0.32999640317074336}, 'combined': 0.23429744625122778, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27314814814814814, 'r': 0.2809523809523809, 'f1': 0.27699530516431925}, 'combined': 0.18466353677621283, 'epoch': 38} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2672413793103448, 'r': 0.33695652173913043, 'f1': 0.2980769230769231}, 'combined': 0.14903846153846154, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.475, 'r': 0.16379310344827586, 'f1': 0.24358974358974356}, 'combined': 0.16239316239316237, 'epoch': 38} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3565039176745541, 'r': 0.34365083525364987, 'f1': 0.3499594013114463}, 'combined': 0.25786482201896044, 'epoch': 34} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37789489678955906, 'r': 0.31320721140025465, 'f1': 0.34252364570514954}, 'combined': 0.24319178845065617, 'epoch': 34} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6388888888888888, 'r': 0.19827586206896552, 'f1': 0.3026315789473684}, 'combined': 0.20175438596491227, 'epoch': 34} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:57:40.704229: step: 2/463, loss: 0.0029536874499171972 2023-01-24 04:57:41.282012: step: 4/463, loss: 0.0017862978857010603 2023-01-24 04:57:41.859237: step: 6/463, loss: 0.06362243741750717 2023-01-24 04:57:42.500607: step: 8/463, loss: 0.00902712345123291 2023-01-24 04:57:43.102578: step: 10/463, loss: 0.0014417126076295972 2023-01-24 04:57:43.749231: step: 12/463, loss: 0.008371115662157536 2023-01-24 04:57:44.328353: step: 14/463, loss: 0.0010603920090943575 2023-01-24 04:57:45.086201: step: 16/463, loss: 0.0037245957646518946 2023-01-24 04:57:45.639386: step: 18/463, loss: 0.1742437779903412 2023-01-24 04:57:46.264630: step: 20/463, loss: 0.002839495427906513 2023-01-24 04:57:46.915265: step: 22/463, loss: 0.00021085584012325853 2023-01-24 04:57:47.484263: step: 24/463, loss: 6.279780063778162e-05 2023-01-24 04:57:48.129794: step: 26/463, loss: 7.870018453104421e-05 2023-01-24 04:57:48.797700: step: 28/463, loss: 0.08939316123723984 2023-01-24 04:57:49.404629: step: 30/463, loss: 0.0006622433429583907 2023-01-24 04:57:49.971178: step: 32/463, loss: 0.003791895229369402 2023-01-24 04:57:50.612333: step: 34/463, loss: 0.042171746492385864 2023-01-24 04:57:51.225193: step: 36/463, loss: 0.00014305461081676185 2023-01-24 04:57:51.788657: step: 38/463, loss: 0.021107910200953484 2023-01-24 04:57:52.319778: step: 40/463, loss: 0.020428108051419258 2023-01-24 04:57:52.912718: step: 42/463, loss: 0.006064204964786768 2023-01-24 04:57:53.514484: step: 44/463, loss: 0.00832943432033062 2023-01-24 04:57:54.146676: step: 46/463, loss: 0.0002912107156589627 2023-01-24 04:57:54.711277: step: 48/463, loss: 0.04078163951635361 2023-01-24 04:57:55.311991: step: 50/463, loss: 0.0036601503379642963 2023-01-24 04:57:55.908513: step: 52/463, loss: 0.031323257833719254 2023-01-24 04:57:56.658635: step: 54/463, loss: 0.002734039444476366 2023-01-24 04:57:57.249158: step: 56/463, loss: 0.00019318339764140546 2023-01-24 04:57:57.881266: step: 58/463, loss: 0.0019674694631248713 2023-01-24 04:57:58.451300: step: 60/463, loss: 0.0012423095759004354 2023-01-24 04:57:59.047848: step: 62/463, loss: 0.0006529511883854866 2023-01-24 04:57:59.700165: step: 64/463, loss: 0.002207188867032528 2023-01-24 04:58:00.335341: step: 66/463, loss: 0.02068692073225975 2023-01-24 04:58:00.924923: step: 68/463, loss: 0.0024868135806173086 2023-01-24 04:58:01.459234: step: 70/463, loss: 0.0005751469288952649 2023-01-24 04:58:02.047275: step: 72/463, loss: 0.01519959606230259 2023-01-24 04:58:02.631102: step: 74/463, loss: 1.536778836452868e-05 2023-01-24 04:58:03.199143: step: 76/463, loss: 0.013179419562220573 2023-01-24 04:58:03.811799: step: 78/463, loss: 0.0014368161791935563 2023-01-24 04:58:04.508731: step: 80/463, loss: 0.05410948768258095 2023-01-24 04:58:05.121227: step: 82/463, loss: 0.00173193437512964 2023-01-24 04:58:05.744910: step: 84/463, loss: 0.03508748859167099 2023-01-24 04:58:06.390344: step: 86/463, loss: 0.00010747952910605818 2023-01-24 04:58:06.980908: step: 88/463, loss: 0.0006502188625745475 2023-01-24 04:58:07.584780: step: 90/463, loss: 0.0039642066694796085 2023-01-24 04:58:08.185454: step: 92/463, loss: 1.873634934425354 2023-01-24 04:58:08.813050: step: 94/463, loss: 0.029093123972415924 2023-01-24 04:58:09.412679: step: 96/463, loss: 0.005717658903449774 2023-01-24 04:58:09.976103: step: 98/463, loss: 0.0007634948706254363 2023-01-24 04:58:10.509685: step: 100/463, loss: 0.0023043924011290073 2023-01-24 04:58:11.102341: step: 102/463, loss: 0.06474603712558746 2023-01-24 04:58:11.696704: step: 104/463, loss: 0.00193217268679291 2023-01-24 04:58:12.264024: step: 106/463, loss: 0.0002613132819533348 2023-01-24 04:58:12.887392: step: 108/463, loss: 0.004165544640272856 2023-01-24 04:58:13.541636: step: 110/463, loss: 0.0028182021342217922 2023-01-24 04:58:14.173632: step: 112/463, loss: 0.00564122898504138 2023-01-24 04:58:14.771964: step: 114/463, loss: 0.00029509453452192247 2023-01-24 04:58:15.393768: step: 116/463, loss: 0.005046121776103973 2023-01-24 04:58:15.959393: step: 118/463, loss: 0.012981036677956581 2023-01-24 04:58:16.503644: step: 120/463, loss: 0.00011047240695916116 2023-01-24 04:58:17.058671: step: 122/463, loss: 0.16296428442001343 2023-01-24 04:58:17.615783: step: 124/463, loss: 0.006037072744220495 2023-01-24 04:58:18.258309: step: 126/463, loss: 0.006804350297898054 2023-01-24 04:58:18.809738: step: 128/463, loss: 0.004623854998499155 2023-01-24 04:58:19.439054: step: 130/463, loss: 0.0015553681878373027 2023-01-24 04:58:20.088671: step: 132/463, loss: 0.015948252752423286 2023-01-24 04:58:20.752009: step: 134/463, loss: 0.0013947018887847662 2023-01-24 04:58:21.347717: step: 136/463, loss: 0.03129919618368149 2023-01-24 04:58:21.942441: step: 138/463, loss: 0.015052997507154942 2023-01-24 04:58:22.621643: step: 140/463, loss: 0.0017096189549192786 2023-01-24 04:58:23.222746: step: 142/463, loss: 0.011871411465108395 2023-01-24 04:58:23.843785: step: 144/463, loss: 0.01735660620033741 2023-01-24 04:58:24.465081: step: 146/463, loss: 0.00023252959363162518 2023-01-24 04:58:25.120218: step: 148/463, loss: 0.19987711310386658 2023-01-24 04:58:25.790594: step: 150/463, loss: 0.005841193720698357 2023-01-24 04:58:26.486034: step: 152/463, loss: 0.014544885605573654 2023-01-24 04:58:27.029913: step: 154/463, loss: 0.0006683548563160002 2023-01-24 04:58:27.689180: step: 156/463, loss: 0.0003412170917727053 2023-01-24 04:58:28.300482: step: 158/463, loss: 0.0003820214478764683 2023-01-24 04:58:28.953024: step: 160/463, loss: 0.003583492012694478 2023-01-24 04:58:29.558888: step: 162/463, loss: 0.00123410124797374 2023-01-24 04:58:30.452261: step: 164/463, loss: 0.00035919432411901653 2023-01-24 04:58:31.042691: step: 166/463, loss: 0.06683976203203201 2023-01-24 04:58:31.619931: step: 168/463, loss: 0.0009309444576501846 2023-01-24 04:58:32.169079: step: 170/463, loss: 0.013543528504669666 2023-01-24 04:58:32.765432: step: 172/463, loss: 0.0034565827809274197 2023-01-24 04:58:33.372228: step: 174/463, loss: 1.769718983268831e-05 2023-01-24 04:58:33.967265: step: 176/463, loss: 0.006950953975319862 2023-01-24 04:58:34.551197: step: 178/463, loss: 0.004042952787131071 2023-01-24 04:58:35.180333: step: 180/463, loss: 0.00037881743628531694 2023-01-24 04:58:35.785812: step: 182/463, loss: 0.0007352713728323579 2023-01-24 04:58:36.436281: step: 184/463, loss: 0.0075101545080542564 2023-01-24 04:58:37.034234: step: 186/463, loss: 0.0003324766585137695 2023-01-24 04:58:37.642273: step: 188/463, loss: 0.026404688134789467 2023-01-24 04:58:38.245589: step: 190/463, loss: 0.0004706969193648547 2023-01-24 04:58:38.775647: step: 192/463, loss: 0.013216906227171421 2023-01-24 04:58:39.372793: step: 194/463, loss: 0.002517499029636383 2023-01-24 04:58:39.999903: step: 196/463, loss: 0.008398378267884254 2023-01-24 04:58:40.615098: step: 198/463, loss: 0.04521096870303154 2023-01-24 04:58:41.167699: step: 200/463, loss: 0.0752374604344368 2023-01-24 04:58:41.706124: step: 202/463, loss: 0.0006386119057424366 2023-01-24 04:58:42.281628: step: 204/463, loss: 0.00042849310557357967 2023-01-24 04:58:42.903786: step: 206/463, loss: 0.004269593395292759 2023-01-24 04:58:43.497702: step: 208/463, loss: 0.0003097612934652716 2023-01-24 04:58:44.132760: step: 210/463, loss: 0.007841651327908039 2023-01-24 04:58:44.746146: step: 212/463, loss: 0.09884548932313919 2023-01-24 04:58:45.420010: step: 214/463, loss: 0.0006907930364832282 2023-01-24 04:58:46.035990: step: 216/463, loss: 0.0013129246653988957 2023-01-24 04:58:46.638789: step: 218/463, loss: 0.00905354879796505 2023-01-24 04:58:47.274597: step: 220/463, loss: 0.004721953999251127 2023-01-24 04:58:47.834900: step: 222/463, loss: 1.682779725342698e-06 2023-01-24 04:58:48.425799: step: 224/463, loss: 2.3083714040694758e-05 2023-01-24 04:58:49.023854: step: 226/463, loss: 0.0033613392151892185 2023-01-24 04:58:49.628527: step: 228/463, loss: 0.003769200062379241 2023-01-24 04:58:50.326269: step: 230/463, loss: 0.0008723926148377359 2023-01-24 04:58:50.967013: step: 232/463, loss: 0.0016435619909316301 2023-01-24 04:58:51.558922: step: 234/463, loss: 0.018162447959184647 2023-01-24 04:58:52.193192: step: 236/463, loss: 6.905943882884458e-05 2023-01-24 04:58:52.809180: step: 238/463, loss: 0.00010352791287004948 2023-01-24 04:58:53.442388: step: 240/463, loss: 0.0003216811455786228 2023-01-24 04:58:54.128517: step: 242/463, loss: 0.00011421266390243545 2023-01-24 04:58:54.728530: step: 244/463, loss: 0.0008253664709627628 2023-01-24 04:58:55.384029: step: 246/463, loss: 0.0003772302297875285 2023-01-24 04:58:56.040693: step: 248/463, loss: 0.04567756503820419 2023-01-24 04:58:56.680164: step: 250/463, loss: 0.0017905785934999585 2023-01-24 04:58:57.287649: step: 252/463, loss: 0.0014611751539632678 2023-01-24 04:58:57.898280: step: 254/463, loss: 0.0005721771158277988 2023-01-24 04:58:58.440713: step: 256/463, loss: 1.7098172903060913 2023-01-24 04:58:59.064958: step: 258/463, loss: 0.0011241431348025799 2023-01-24 04:58:59.643680: step: 260/463, loss: 0.0024045256432145834 2023-01-24 04:59:00.289194: step: 262/463, loss: 0.0010758880525827408 2023-01-24 04:59:00.937352: step: 264/463, loss: 0.03560807183384895 2023-01-24 04:59:01.484678: step: 266/463, loss: 0.005182648077607155 2023-01-24 04:59:02.115545: step: 268/463, loss: 0.004605799913406372 2023-01-24 04:59:02.782943: step: 270/463, loss: 0.004253553692251444 2023-01-24 04:59:03.351741: step: 272/463, loss: 0.008605340495705605 2023-01-24 04:59:03.955674: step: 274/463, loss: 0.003051062813028693 2023-01-24 04:59:04.566954: step: 276/463, loss: 0.008436755277216434 2023-01-24 04:59:05.144831: step: 278/463, loss: 0.004038630984723568 2023-01-24 04:59:05.787859: step: 280/463, loss: 0.002257554791867733 2023-01-24 04:59:06.439480: step: 282/463, loss: 0.020271720364689827 2023-01-24 04:59:07.049064: step: 284/463, loss: 0.000376762734958902 2023-01-24 04:59:07.690087: step: 286/463, loss: 0.019865255802869797 2023-01-24 04:59:08.301478: step: 288/463, loss: 0.008101929910480976 2023-01-24 04:59:08.910150: step: 290/463, loss: 0.0006391589995473623 2023-01-24 04:59:09.522578: step: 292/463, loss: 0.007158253807574511 2023-01-24 04:59:10.060401: step: 294/463, loss: 7.411217666231096e-06 2023-01-24 04:59:10.654994: step: 296/463, loss: 0.0005703741917386651 2023-01-24 04:59:11.260709: step: 298/463, loss: 0.00976228155195713 2023-01-24 04:59:11.892487: step: 300/463, loss: 0.05397902801632881 2023-01-24 04:59:12.501779: step: 302/463, loss: 0.0020465925335884094 2023-01-24 04:59:13.133426: step: 304/463, loss: 0.0010628512827679515 2023-01-24 04:59:13.793429: step: 306/463, loss: 0.011069978587329388 2023-01-24 04:59:14.460723: step: 308/463, loss: 0.00022986774274613708 2023-01-24 04:59:15.023647: step: 310/463, loss: 1.7668295186012983e-05 2023-01-24 04:59:15.630295: step: 312/463, loss: 0.0023194581735879183 2023-01-24 04:59:16.229433: step: 314/463, loss: 3.2315667340299115e-05 2023-01-24 04:59:16.858892: step: 316/463, loss: 0.002089599147439003 2023-01-24 04:59:17.510015: step: 318/463, loss: 0.0008662441396154463 2023-01-24 04:59:18.098108: step: 320/463, loss: 0.05668460577726364 2023-01-24 04:59:18.669188: step: 322/463, loss: 0.005523565225303173 2023-01-24 04:59:19.414003: step: 324/463, loss: 0.011488648131489754 2023-01-24 04:59:19.997831: step: 326/463, loss: 0.014971844851970673 2023-01-24 04:59:20.597328: step: 328/463, loss: 0.00040880334563553333 2023-01-24 04:59:21.298293: step: 330/463, loss: 0.015613595955073833 2023-01-24 04:59:21.987298: step: 332/463, loss: 0.007749130483716726 2023-01-24 04:59:22.578280: step: 334/463, loss: 0.0005654449923895299 2023-01-24 04:59:23.160844: step: 336/463, loss: 0.0017494824714958668 2023-01-24 04:59:23.786899: step: 338/463, loss: 0.029839621856808662 2023-01-24 04:59:24.378336: step: 340/463, loss: 0.059249069541692734 2023-01-24 04:59:24.966742: step: 342/463, loss: 0.0010383508633822203 2023-01-24 04:59:25.594016: step: 344/463, loss: 0.005247156601399183 2023-01-24 04:59:26.229331: step: 346/463, loss: 0.0028701440896838903 2023-01-24 04:59:26.873165: step: 348/463, loss: 8.490162144880742e-05 2023-01-24 04:59:27.464340: step: 350/463, loss: 0.0016751722432672977 2023-01-24 04:59:28.077606: step: 352/463, loss: 0.0004033385484945029 2023-01-24 04:59:28.617324: step: 354/463, loss: 0.001830022782087326 2023-01-24 04:59:29.174543: step: 356/463, loss: 0.00018757030193228275 2023-01-24 04:59:29.787322: step: 358/463, loss: 0.006735657341778278 2023-01-24 04:59:30.481595: step: 360/463, loss: 0.0017203419702127576 2023-01-24 04:59:31.147633: step: 362/463, loss: 0.0030367407016456127 2023-01-24 04:59:31.713477: step: 364/463, loss: 0.00016087255789898336 2023-01-24 04:59:32.507968: step: 366/463, loss: 0.005635458510369062 2023-01-24 04:59:33.100676: step: 368/463, loss: 3.779600956477225e-05 2023-01-24 04:59:33.749403: step: 370/463, loss: 0.0011224321788176894 2023-01-24 04:59:34.362534: step: 372/463, loss: 0.0007173219928517938 2023-01-24 04:59:34.993300: step: 374/463, loss: 0.013019929639995098 2023-01-24 04:59:35.601799: step: 376/463, loss: 3.337765883770771e-05 2023-01-24 04:59:36.180416: step: 378/463, loss: 0.08909279108047485 2023-01-24 04:59:36.744736: step: 380/463, loss: 0.00020109888282604516 2023-01-24 04:59:37.369238: step: 382/463, loss: 0.01129963994026184 2023-01-24 04:59:37.965454: step: 384/463, loss: 0.05421024560928345 2023-01-24 04:59:38.546067: step: 386/463, loss: 0.017056381329894066 2023-01-24 04:59:39.167381: step: 388/463, loss: 0.0013046949170529842 2023-01-24 04:59:39.783791: step: 390/463, loss: 0.3122487962245941 2023-01-24 04:59:40.363457: step: 392/463, loss: 0.0004977398202754557 2023-01-24 04:59:40.947691: step: 394/463, loss: 0.01725260727107525 2023-01-24 04:59:41.588414: step: 396/463, loss: 0.00030762061942368746 2023-01-24 04:59:42.117016: step: 398/463, loss: 0.04174308106303215 2023-01-24 04:59:42.730441: step: 400/463, loss: 0.00011562365398276597 2023-01-24 04:59:43.381410: step: 402/463, loss: 0.008157877251505852 2023-01-24 04:59:44.157834: step: 404/463, loss: 0.011425844393670559 2023-01-24 04:59:44.759583: step: 406/463, loss: 0.00044775003334507346 2023-01-24 04:59:45.393840: step: 408/463, loss: 0.010199598968029022 2023-01-24 04:59:46.031805: step: 410/463, loss: 0.07790151983499527 2023-01-24 04:59:46.706402: step: 412/463, loss: 0.18144969642162323 2023-01-24 04:59:47.353719: step: 414/463, loss: 0.016136107966303825 2023-01-24 04:59:47.962279: step: 416/463, loss: 0.00035462743835523725 2023-01-24 04:59:48.595712: step: 418/463, loss: 0.0033993625547736883 2023-01-24 04:59:49.260965: step: 420/463, loss: 0.017124848440289497 2023-01-24 04:59:49.822910: step: 422/463, loss: 0.0018702426459640265 2023-01-24 04:59:50.423430: step: 424/463, loss: 0.012961570173501968 2023-01-24 04:59:51.047198: step: 426/463, loss: 0.39622244238853455 2023-01-24 04:59:51.670224: step: 428/463, loss: 0.005168942268937826 2023-01-24 04:59:52.409803: step: 430/463, loss: 0.04930108040571213 2023-01-24 04:59:53.016585: step: 432/463, loss: 0.0007135437335819006 2023-01-24 04:59:53.583944: step: 434/463, loss: 0.0027008457109332085 2023-01-24 04:59:54.201660: step: 436/463, loss: 0.005047690588980913 2023-01-24 04:59:54.826134: step: 438/463, loss: 0.007116024382412434 2023-01-24 04:59:55.466639: step: 440/463, loss: 0.0011118858819827437 2023-01-24 04:59:56.045220: step: 442/463, loss: 0.0013045461382716894 2023-01-24 04:59:56.612193: step: 444/463, loss: 0.004540354013442993 2023-01-24 04:59:57.268315: step: 446/463, loss: 0.002491620136424899 2023-01-24 04:59:57.836274: step: 448/463, loss: 0.0003016537521034479 2023-01-24 04:59:58.451675: step: 450/463, loss: 0.0010271563660353422 2023-01-24 04:59:59.089836: step: 452/463, loss: 0.006951835472136736 2023-01-24 04:59:59.673645: step: 454/463, loss: 0.005016373936086893 2023-01-24 05:00:00.214687: step: 456/463, loss: 0.013295596465468407 2023-01-24 05:00:00.784858: step: 458/463, loss: 0.00019500336202327162 2023-01-24 05:00:01.411383: step: 460/463, loss: 0.0025354879908263683 2023-01-24 05:00:02.050775: step: 462/463, loss: 0.008653384633362293 2023-01-24 05:00:02.595569: step: 464/463, loss: 0.008447062224149704 2023-01-24 05:00:03.151807: step: 466/463, loss: 0.016473259776830673 2023-01-24 05:00:03.750454: step: 468/463, loss: 0.003759447718039155 2023-01-24 05:00:04.364898: step: 470/463, loss: 0.007101527415215969 2023-01-24 05:00:05.016904: step: 472/463, loss: 0.02432686649262905 2023-01-24 05:00:05.617160: step: 474/463, loss: 0.0009364105644635856 2023-01-24 05:00:06.254197: step: 476/463, loss: 0.06688988208770752 2023-01-24 05:00:06.840631: step: 478/463, loss: 0.00013121383381076157 2023-01-24 05:00:07.456259: step: 480/463, loss: 0.1822829395532608 2023-01-24 05:00:08.072052: step: 482/463, loss: 0.00043167173862457275 2023-01-24 05:00:08.698588: step: 484/463, loss: 0.019871875643730164 2023-01-24 05:00:09.308667: step: 486/463, loss: 0.003996856510639191 2023-01-24 05:00:09.917694: step: 488/463, loss: 0.008730200119316578 2023-01-24 05:00:10.516606: step: 490/463, loss: 0.05880379304289818 2023-01-24 05:00:11.081088: step: 492/463, loss: 0.0002280681364936754 2023-01-24 05:00:11.746883: step: 494/463, loss: 0.08552487939596176 2023-01-24 05:00:12.355385: step: 496/463, loss: 0.005882505793124437 2023-01-24 05:00:12.897063: step: 498/463, loss: 0.002381096128374338 2023-01-24 05:00:13.471038: step: 500/463, loss: 0.0008325762464664876 2023-01-24 05:00:14.145035: step: 502/463, loss: 0.004553348757326603 2023-01-24 05:00:14.697230: step: 504/463, loss: 0.008471532724797726 2023-01-24 05:00:15.270125: step: 506/463, loss: 0.0008587770280428231 2023-01-24 05:00:15.852891: step: 508/463, loss: 0.0008038969826884568 2023-01-24 05:00:16.463773: step: 510/463, loss: 0.024887923151254654 2023-01-24 05:00:17.098335: step: 512/463, loss: 2.6092739062733017e-06 2023-01-24 05:00:17.720869: step: 514/463, loss: 0.07521632313728333 2023-01-24 05:00:18.336024: step: 516/463, loss: 0.060318298637866974 2023-01-24 05:00:18.979649: step: 518/463, loss: 0.0005530479247681797 2023-01-24 05:00:19.577755: step: 520/463, loss: 0.006105293985456228 2023-01-24 05:00:20.125953: step: 522/463, loss: 0.00010918935731751844 2023-01-24 05:00:20.735692: step: 524/463, loss: 0.00030394140048883855 2023-01-24 05:00:21.403573: step: 526/463, loss: 0.0029401849023997784 2023-01-24 05:00:22.017839: step: 528/463, loss: 0.02905370108783245 2023-01-24 05:00:22.596273: step: 530/463, loss: 0.09340333938598633 2023-01-24 05:00:23.154288: step: 532/463, loss: 0.005649177357554436 2023-01-24 05:00:23.808404: step: 534/463, loss: 0.00477104727178812 2023-01-24 05:00:24.416212: step: 536/463, loss: 0.013852742500603199 2023-01-24 05:00:25.163488: step: 538/463, loss: 7.255268428707495e-05 2023-01-24 05:00:25.769642: step: 540/463, loss: 0.009799850173294544 2023-01-24 05:00:26.423374: step: 542/463, loss: 0.003284356091171503 2023-01-24 05:00:27.009197: step: 544/463, loss: 0.017891548573970795 2023-01-24 05:00:27.610354: step: 546/463, loss: 0.0003199272614438087 2023-01-24 05:00:28.180937: step: 548/463, loss: 0.0006704149418510497 2023-01-24 05:00:28.787903: step: 550/463, loss: 0.004221735987812281 2023-01-24 05:00:29.385602: step: 552/463, loss: 0.004358639474958181 2023-01-24 05:00:29.952939: step: 554/463, loss: 0.0001514908653916791 2023-01-24 05:00:30.613558: step: 556/463, loss: 0.008513999171555042 2023-01-24 05:00:31.132778: step: 558/463, loss: 0.00355542846955359 2023-01-24 05:00:31.812009: step: 560/463, loss: 0.056284334510564804 2023-01-24 05:00:32.409465: step: 562/463, loss: 0.005297726485878229 2023-01-24 05:00:33.026791: step: 564/463, loss: 0.00016527307161595672 2023-01-24 05:00:33.609519: step: 566/463, loss: 0.0008678355370648205 2023-01-24 05:00:34.204101: step: 568/463, loss: 0.01331397145986557 2023-01-24 05:00:34.793444: step: 570/463, loss: 0.0013124377001076937 2023-01-24 05:00:35.389861: step: 572/463, loss: 5.893642810406163e-05 2023-01-24 05:00:35.962673: step: 574/463, loss: 0.006040989886969328 2023-01-24 05:00:36.642220: step: 576/463, loss: 0.30151134729385376 2023-01-24 05:00:37.291852: step: 578/463, loss: 0.0006265403935685754 2023-01-24 05:00:37.932941: step: 580/463, loss: 0.005605383310467005 2023-01-24 05:00:38.486200: step: 582/463, loss: 0.00016792109818197787 2023-01-24 05:00:39.012228: step: 584/463, loss: 0.004918633494526148 2023-01-24 05:00:39.633770: step: 586/463, loss: 0.004094654694199562 2023-01-24 05:00:40.265633: step: 588/463, loss: 0.0010497398907318711 2023-01-24 05:00:40.895948: step: 590/463, loss: 0.001240387442521751 2023-01-24 05:00:41.516197: step: 592/463, loss: 0.0012299242662265897 2023-01-24 05:00:42.108068: step: 594/463, loss: 0.0020875621121376753 2023-01-24 05:00:42.713195: step: 596/463, loss: 0.006376017816364765 2023-01-24 05:00:43.334058: step: 598/463, loss: 0.0007959812646731734 2023-01-24 05:00:43.900621: step: 600/463, loss: 0.010854181833565235 2023-01-24 05:00:44.481431: step: 602/463, loss: 0.0012666585389524698 2023-01-24 05:00:45.055308: step: 604/463, loss: 0.0005750849959440529 2023-01-24 05:00:45.676431: step: 606/463, loss: 0.0013638209784403443 2023-01-24 05:00:46.342442: step: 608/463, loss: 1.788135932656587e-07 2023-01-24 05:00:46.945686: step: 610/463, loss: 0.021375054493546486 2023-01-24 05:00:47.587597: step: 612/463, loss: 0.008225424215197563 2023-01-24 05:00:48.239584: step: 614/463, loss: 0.0031556535977870226 2023-01-24 05:00:48.854136: step: 616/463, loss: 0.006158936768770218 2023-01-24 05:00:49.431926: step: 618/463, loss: 0.4602362811565399 2023-01-24 05:00:50.007467: step: 620/463, loss: 0.0023817699402570724 2023-01-24 05:00:50.572947: step: 622/463, loss: 0.002229909412562847 2023-01-24 05:00:51.209174: step: 624/463, loss: 0.00012015277025057003 2023-01-24 05:00:51.840985: step: 626/463, loss: 0.031846918165683746 2023-01-24 05:00:52.468811: step: 628/463, loss: 0.02883807197213173 2023-01-24 05:00:53.062861: step: 630/463, loss: 0.0005970646161586046 2023-01-24 05:00:53.727780: step: 632/463, loss: 0.0010542309610173106 2023-01-24 05:00:54.333054: step: 634/463, loss: 0.02164854295551777 2023-01-24 05:00:54.892844: step: 636/463, loss: 0.026588352397084236 2023-01-24 05:00:55.546958: step: 638/463, loss: 0.0026291690301150084 2023-01-24 05:00:56.174860: step: 640/463, loss: 0.00013516045873984694 2023-01-24 05:00:56.777370: step: 642/463, loss: 0.05543491616845131 2023-01-24 05:00:57.357764: step: 644/463, loss: 0.0002271700941491872 2023-01-24 05:00:57.911463: step: 646/463, loss: 1.0644317626429256e-05 2023-01-24 05:00:58.474992: step: 648/463, loss: 7.628042658325285e-05 2023-01-24 05:00:59.120574: step: 650/463, loss: 0.037397388368844986 2023-01-24 05:00:59.777414: step: 652/463, loss: 0.055204834789037704 2023-01-24 05:01:00.458716: step: 654/463, loss: 0.11422988027334213 2023-01-24 05:01:01.136591: step: 656/463, loss: 0.016001403331756592 2023-01-24 05:01:01.685429: step: 658/463, loss: 0.0004762106982525438 2023-01-24 05:01:02.304416: step: 660/463, loss: 0.0004457059549167752 2023-01-24 05:01:02.934514: step: 662/463, loss: 0.2303406298160553 2023-01-24 05:01:03.522254: step: 664/463, loss: 3.318985545774922e-05 2023-01-24 05:01:04.165222: step: 666/463, loss: 0.006047721952199936 2023-01-24 05:01:04.727871: step: 668/463, loss: 0.0027455417439341545 2023-01-24 05:01:05.272466: step: 670/463, loss: 0.0025245281867682934 2023-01-24 05:01:05.849721: step: 672/463, loss: 0.02166556939482689 2023-01-24 05:01:06.436410: step: 674/463, loss: 0.017267046496272087 2023-01-24 05:01:06.975777: step: 676/463, loss: 0.00014559221745003015 2023-01-24 05:01:07.545925: step: 678/463, loss: 0.00634402222931385 2023-01-24 05:01:08.158061: step: 680/463, loss: 0.0032267258502542973 2023-01-24 05:01:08.687291: step: 682/463, loss: 0.0032526233699172735 2023-01-24 05:01:09.345776: step: 684/463, loss: 0.054434701800346375 2023-01-24 05:01:09.892029: step: 686/463, loss: 0.015999989584088326 2023-01-24 05:01:10.502005: step: 688/463, loss: 0.0003327361773699522 2023-01-24 05:01:10.989377: step: 690/463, loss: 0.003887621685862541 2023-01-24 05:01:11.630594: step: 692/463, loss: 0.0006905045011080801 2023-01-24 05:01:12.254174: step: 694/463, loss: 0.36939725279808044 2023-01-24 05:01:12.869969: step: 696/463, loss: 0.013163231313228607 2023-01-24 05:01:13.475264: step: 698/463, loss: 0.003509597619995475 2023-01-24 05:01:14.096842: step: 700/463, loss: 0.0004136463685426861 2023-01-24 05:01:14.687791: step: 702/463, loss: 0.03307529166340828 2023-01-24 05:01:15.347103: step: 704/463, loss: 0.0003018724964931607 2023-01-24 05:01:15.909188: step: 706/463, loss: 0.008670144714415073 2023-01-24 05:01:16.474469: step: 708/463, loss: 0.022026104852557182 2023-01-24 05:01:17.084206: step: 710/463, loss: 0.008024441078305244 2023-01-24 05:01:17.702087: step: 712/463, loss: 0.0021652805153280497 2023-01-24 05:01:18.277307: step: 714/463, loss: 0.0011814641766250134 2023-01-24 05:01:18.854436: step: 716/463, loss: 0.003586155828088522 2023-01-24 05:01:19.454608: step: 718/463, loss: 0.0037679339293390512 2023-01-24 05:01:20.116490: step: 720/463, loss: 0.5030397772789001 2023-01-24 05:01:20.724929: step: 722/463, loss: 0.22961629927158356 2023-01-24 05:01:21.354858: step: 724/463, loss: 0.006679709535092115 2023-01-24 05:01:21.958997: step: 726/463, loss: 0.0050551095046103 2023-01-24 05:01:22.561164: step: 728/463, loss: 0.00042673206189647317 2023-01-24 05:01:23.167368: step: 730/463, loss: 0.027647657319903374 2023-01-24 05:01:23.773904: step: 732/463, loss: 0.18176552653312683 2023-01-24 05:01:24.375685: step: 734/463, loss: 0.002313205972313881 2023-01-24 05:01:24.984489: step: 736/463, loss: 0.002246041316539049 2023-01-24 05:01:25.591947: step: 738/463, loss: 0.0002258759195683524 2023-01-24 05:01:26.202227: step: 740/463, loss: 2.485910226823762e-05 2023-01-24 05:01:26.799923: step: 742/463, loss: 0.011985654942691326 2023-01-24 05:01:27.365198: step: 744/463, loss: 0.018923815339803696 2023-01-24 05:01:27.901597: step: 746/463, loss: 0.01808539219200611 2023-01-24 05:01:28.587408: step: 748/463, loss: 0.0224132277071476 2023-01-24 05:01:29.220207: step: 750/463, loss: 0.11625774949789047 2023-01-24 05:01:29.819983: step: 752/463, loss: 0.008917507715523243 2023-01-24 05:01:30.400304: step: 754/463, loss: 0.03607790172100067 2023-01-24 05:01:31.007821: step: 756/463, loss: 0.0061187478713691235 2023-01-24 05:01:31.605270: step: 758/463, loss: 0.0016714216908439994 2023-01-24 05:01:32.182685: step: 760/463, loss: 0.9870973229408264 2023-01-24 05:01:32.751332: step: 762/463, loss: 0.0002732712309807539 2023-01-24 05:01:33.411710: step: 764/463, loss: 0.00562549801543355 2023-01-24 05:01:34.052926: step: 766/463, loss: 0.00650402856990695 2023-01-24 05:01:34.605264: step: 768/463, loss: 0.0035541339311748743 2023-01-24 05:01:35.189093: step: 770/463, loss: 0.004167993552982807 2023-01-24 05:01:35.740578: step: 772/463, loss: 0.08935530483722687 2023-01-24 05:01:36.373162: step: 774/463, loss: 0.0015259721549227834 2023-01-24 05:01:37.013540: step: 776/463, loss: 0.039182282984256744 2023-01-24 05:01:37.640293: step: 778/463, loss: 0.02477927692234516 2023-01-24 05:01:38.330804: step: 780/463, loss: 0.17088502645492554 2023-01-24 05:01:39.005510: step: 782/463, loss: 0.002818054985255003 2023-01-24 05:01:39.566746: step: 784/463, loss: 0.002836170606315136 2023-01-24 05:01:40.150070: step: 786/463, loss: 0.0011067213490605354 2023-01-24 05:01:40.781931: step: 788/463, loss: 0.0025767995975911617 2023-01-24 05:01:41.358054: step: 790/463, loss: 0.01118152029812336 2023-01-24 05:01:41.946600: step: 792/463, loss: 0.010098408907651901 2023-01-24 05:01:42.545725: step: 794/463, loss: 0.00035823159851133823 2023-01-24 05:01:43.197222: step: 796/463, loss: 0.040988143533468246 2023-01-24 05:01:43.845682: step: 798/463, loss: 0.0013866774970665574 2023-01-24 05:01:44.576139: step: 800/463, loss: 0.0021817174274474382 2023-01-24 05:01:45.173796: step: 802/463, loss: 0.005736764054745436 2023-01-24 05:01:45.731053: step: 804/463, loss: 0.003269749227911234 2023-01-24 05:01:46.302517: step: 806/463, loss: 0.0008220816380344331 2023-01-24 05:01:46.949075: step: 808/463, loss: 0.00043689514859579504 2023-01-24 05:01:47.552187: step: 810/463, loss: 0.023691652342677116 2023-01-24 05:01:48.113888: step: 812/463, loss: 0.00047944829566404223 2023-01-24 05:01:48.739688: step: 814/463, loss: 0.040619369596242905 2023-01-24 05:01:49.409531: step: 816/463, loss: 0.0037286996375769377 2023-01-24 05:01:50.029214: step: 818/463, loss: 0.011137083172798157 2023-01-24 05:01:50.613401: step: 820/463, loss: 0.005711351055651903 2023-01-24 05:01:51.207714: step: 822/463, loss: 0.0019473850261420012 2023-01-24 05:01:51.780256: step: 824/463, loss: 0.001603243756107986 2023-01-24 05:01:52.432140: step: 826/463, loss: 7.72262501413934e-05 2023-01-24 05:01:53.157119: step: 828/463, loss: 0.007704382762312889 2023-01-24 05:01:53.776382: step: 830/463, loss: 0.01583324745297432 2023-01-24 05:01:54.403498: step: 832/463, loss: 0.0035269339568912983 2023-01-24 05:01:55.017527: step: 834/463, loss: 0.003679445246234536 2023-01-24 05:01:55.583335: step: 836/463, loss: 0.01683279499411583 2023-01-24 05:01:56.185254: step: 838/463, loss: 3.340171679155901e-05 2023-01-24 05:01:56.750562: step: 840/463, loss: 0.15346944332122803 2023-01-24 05:01:57.330198: step: 842/463, loss: 0.0015025633620098233 2023-01-24 05:01:58.080164: step: 844/463, loss: 0.028546521440148354 2023-01-24 05:01:58.669962: step: 846/463, loss: 0.07196623831987381 2023-01-24 05:01:59.257053: step: 848/463, loss: 0.0040294816717505455 2023-01-24 05:01:59.841880: step: 850/463, loss: 0.00495159113779664 2023-01-24 05:02:00.417989: step: 852/463, loss: 0.002748672617599368 2023-01-24 05:02:01.091343: step: 854/463, loss: 0.00038506975397467613 2023-01-24 05:02:01.768382: step: 856/463, loss: 0.0020835120230913162 2023-01-24 05:02:02.470997: step: 858/463, loss: 0.008103063330054283 2023-01-24 05:02:03.056110: step: 860/463, loss: 0.0011219581356272101 2023-01-24 05:02:03.628713: step: 862/463, loss: 0.043672967702150345 2023-01-24 05:02:04.176216: step: 864/463, loss: 0.08592807501554489 2023-01-24 05:02:04.780191: step: 866/463, loss: 0.0010538650676608086 2023-01-24 05:02:05.392258: step: 868/463, loss: 0.001713110483251512 2023-01-24 05:02:06.020888: step: 870/463, loss: 0.006746864411979914 2023-01-24 05:02:06.615986: step: 872/463, loss: 0.010338060557842255 2023-01-24 05:02:07.283226: step: 874/463, loss: 0.0021943894680589437 2023-01-24 05:02:07.884696: step: 876/463, loss: 0.0020176905672997236 2023-01-24 05:02:08.526676: step: 878/463, loss: 0.0029604544397443533 2023-01-24 05:02:09.059070: step: 880/463, loss: 0.005362091585993767 2023-01-24 05:02:09.628742: step: 882/463, loss: 0.00253876019269228 2023-01-24 05:02:10.259747: step: 884/463, loss: 0.0010475211311131716 2023-01-24 05:02:10.884466: step: 886/463, loss: 0.05512314662337303 2023-01-24 05:02:11.543751: step: 888/463, loss: 0.3710273206233978 2023-01-24 05:02:12.219267: step: 890/463, loss: 0.04898376762866974 2023-01-24 05:02:12.883604: step: 892/463, loss: 0.005064226221293211 2023-01-24 05:02:13.446887: step: 894/463, loss: 0.037696752697229385 2023-01-24 05:02:14.044028: step: 896/463, loss: 0.004992349538952112 2023-01-24 05:02:14.671272: step: 898/463, loss: 0.007645969279110432 2023-01-24 05:02:15.254587: step: 900/463, loss: 0.0005049493629485369 2023-01-24 05:02:16.016836: step: 902/463, loss: 0.011177973821759224 2023-01-24 05:02:16.639381: step: 904/463, loss: 0.006005801260471344 2023-01-24 05:02:17.235617: step: 906/463, loss: 0.00042458303505554795 2023-01-24 05:02:17.881012: step: 908/463, loss: 0.02222486026585102 2023-01-24 05:02:18.488178: step: 910/463, loss: 0.00013914224109612405 2023-01-24 05:02:19.052999: step: 912/463, loss: 0.0006213517626747489 2023-01-24 05:02:19.702821: step: 914/463, loss: 0.00032655560062266886 2023-01-24 05:02:20.282846: step: 916/463, loss: 0.0005051406333222985 2023-01-24 05:02:20.877972: step: 918/463, loss: 0.0025932895950973034 2023-01-24 05:02:21.468518: step: 920/463, loss: 0.002323127817362547 2023-01-24 05:02:22.106860: step: 922/463, loss: 0.0009428197517991066 2023-01-24 05:02:22.709892: step: 924/463, loss: 0.011570645496249199 2023-01-24 05:02:23.347984: step: 926/463, loss: 0.06112077459692955 ================================================== Loss: 0.031 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.309421301415303, 'r': 0.3123569873869852, 'f1': 0.3108822140754319}, 'combined': 0.22907110510821294, 'epoch': 39} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.3611447743797041, 'r': 0.31797127168335204, 'f1': 0.33818568663491555}, 'combined': 0.23791957853712653, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3139935661764706, 'r': 0.3145893793950218, 'f1': 0.31429119040981324}, 'combined': 0.2315829824072308, 'epoch': 39} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3627059509436324, 'r': 0.31681383673174174, 'f1': 0.33821020670198043}, 'combined': 0.2401292467584061, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.326921046854995, 'r': 0.3294024210246724, 'f1': 0.32815704325142214}, 'combined': 0.24179992660631103, 'epoch': 39} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3753204671699188, 'r': 0.30916450349773417, 'f1': 0.33904547464918977}, 'combined': 0.2407222870009247, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31862745098039214, 'r': 0.3095238095238095, 'f1': 0.31400966183574874}, 'combined': 0.20933977455716582, 'epoch': 39} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.30833333333333335, 'r': 0.40217391304347827, 'f1': 0.34905660377358494}, 'combined': 0.17452830188679247, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.1724137931034483, 'f1': 0.22727272727272724}, 'combined': 0.1515151515151515, 'epoch': 39} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29975747652001977, 'r': 0.32876626457034425, 'f1': 0.3135924369747899}, 'combined': 0.23106811145510833, 'epoch': 8} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.343487220100871, 'r': 0.2997270681508473, 'f1': 0.32011856486567664}, 'combined': 0.22520904060901875, 'epoch': 8} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3270833333333333, 'r': 0.37380952380952376, 'f1': 0.34888888888888886}, 'combined': 0.23259259259259257, 'epoch': 8} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3207567698259188, 'r': 0.3146703036053131, 'f1': 0.3176843869731801}, 'combined': 0.23408323250655372, 'epoch': 25} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.3686692880345029, 'r': 0.31751883707683626, 'f1': 0.34118761114866797}, 'combined': 0.24224320391555423, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3392857142857143, 'r': 0.41304347826086957, 'f1': 0.3725490196078432}, 'combined': 0.1862745098039216, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3565039176745541, 'r': 0.34365083525364987, 'f1': 0.3499594013114463}, 'combined': 0.25786482201896044, 'epoch': 34} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.37789489678955906, 'r': 0.31320721140025465, 'f1': 0.34252364570514954}, 'combined': 0.24319178845065617, 'epoch': 34} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6388888888888888, 'r': 0.19827586206896552, 'f1': 0.3026315789473684}, 'combined': 0.20175438596491227, 'epoch': 34}