Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.imprisoned-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.imprisoned-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.imprisoned-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.imprisoned-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1127]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 613743345, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-23 23:56:51.005149: step: 2/463, loss: 5.602298259735107 2023-01-23 23:56:51.587074: step: 4/463, loss: 8.179482460021973 2023-01-23 23:56:52.290674: step: 6/463, loss: 22.52622413635254 2023-01-23 23:56:52.942132: step: 8/463, loss: 17.946720123291016 2023-01-23 23:56:53.566538: step: 10/463, loss: 13.71105670928955 2023-01-23 23:56:54.296786: step: 12/463, loss: 14.199092864990234 2023-01-23 23:56:54.942820: step: 14/463, loss: 13.36213493347168 2023-01-23 23:56:55.584031: step: 16/463, loss: 7.744307041168213 2023-01-23 23:56:56.260668: step: 18/463, loss: 6.063961982727051 2023-01-23 23:56:56.865882: step: 20/463, loss: 13.85445785522461 2023-01-23 23:56:57.462219: step: 22/463, loss: 5.830394268035889 2023-01-23 23:56:58.104456: step: 24/463, loss: 10.697946548461914 2023-01-23 23:56:58.693797: step: 26/463, loss: 17.892301559448242 2023-01-23 23:56:59.307175: step: 28/463, loss: 21.350595474243164 2023-01-23 23:56:59.816672: step: 30/463, loss: 7.015264987945557 2023-01-23 23:57:00.470129: step: 32/463, loss: 11.877845764160156 2023-01-23 23:57:01.129823: step: 34/463, loss: 11.74675464630127 2023-01-23 23:57:01.735548: step: 36/463, loss: 12.148698806762695 2023-01-23 23:57:02.363355: step: 38/463, loss: 18.386337280273438 2023-01-23 23:57:02.956660: step: 40/463, loss: 9.049979209899902 2023-01-23 23:57:03.574424: step: 42/463, loss: 10.876665115356445 2023-01-23 23:57:04.153419: step: 44/463, loss: 18.010337829589844 2023-01-23 23:57:04.738638: step: 46/463, loss: 15.44178295135498 2023-01-23 23:57:05.388850: step: 48/463, loss: 12.177515983581543 2023-01-23 23:57:06.042801: step: 50/463, loss: 15.834332466125488 2023-01-23 23:57:06.677923: step: 52/463, loss: 26.31072998046875 2023-01-23 23:57:07.353155: step: 54/463, loss: 18.291120529174805 2023-01-23 23:57:07.966959: step: 56/463, loss: 6.603659629821777 2023-01-23 23:57:08.540812: step: 58/463, loss: 20.18153190612793 2023-01-23 23:57:09.161628: step: 60/463, loss: 17.090713500976562 2023-01-23 23:57:09.778176: step: 62/463, loss: 11.084619522094727 2023-01-23 23:57:10.524541: step: 64/463, loss: 12.460599899291992 2023-01-23 23:57:11.153087: step: 66/463, loss: 14.595062255859375 2023-01-23 23:57:11.812470: step: 68/463, loss: 8.258983612060547 2023-01-23 23:57:12.431812: step: 70/463, loss: 24.9277286529541 2023-01-23 23:57:13.043835: step: 72/463, loss: 13.338602066040039 2023-01-23 23:57:13.660867: step: 74/463, loss: 18.1767635345459 2023-01-23 23:57:14.274256: step: 76/463, loss: 14.119132995605469 2023-01-23 23:57:14.845002: step: 78/463, loss: 21.10388946533203 2023-01-23 23:57:15.447292: step: 80/463, loss: 10.985973358154297 2023-01-23 23:57:16.118383: step: 82/463, loss: 16.187604904174805 2023-01-23 23:57:16.778727: step: 84/463, loss: 14.409175872802734 2023-01-23 23:57:17.418898: step: 86/463, loss: 14.989502906799316 2023-01-23 23:57:18.068206: step: 88/463, loss: 16.827714920043945 2023-01-23 23:57:18.716708: step: 90/463, loss: 6.5476298332214355 2023-01-23 23:57:19.310040: step: 92/463, loss: 19.470623016357422 2023-01-23 23:57:19.937107: step: 94/463, loss: 7.303631782531738 2023-01-23 23:57:20.580631: step: 96/463, loss: 12.505188941955566 2023-01-23 23:57:21.169946: step: 98/463, loss: 13.444581031799316 2023-01-23 23:57:21.814085: step: 100/463, loss: 7.426078796386719 2023-01-23 23:57:22.425708: step: 102/463, loss: 9.059783935546875 2023-01-23 23:57:23.040426: step: 104/463, loss: 14.105218887329102 2023-01-23 23:57:23.684114: step: 106/463, loss: 13.249429702758789 2023-01-23 23:57:24.353267: step: 108/463, loss: 15.53437614440918 2023-01-23 23:57:24.982180: step: 110/463, loss: 7.691650390625 2023-01-23 23:57:25.534205: step: 112/463, loss: 23.485111236572266 2023-01-23 23:57:26.128614: step: 114/463, loss: 4.0248260498046875 2023-01-23 23:57:26.747074: step: 116/463, loss: 6.678998947143555 2023-01-23 23:57:27.382409: step: 118/463, loss: 9.573973655700684 2023-01-23 23:57:28.037078: step: 120/463, loss: 16.726200103759766 2023-01-23 23:57:28.672139: step: 122/463, loss: 10.041594505310059 2023-01-23 23:57:29.318759: step: 124/463, loss: 12.505385398864746 2023-01-23 23:57:29.954145: step: 126/463, loss: 11.793069839477539 2023-01-23 23:57:30.554296: step: 128/463, loss: 6.807032585144043 2023-01-23 23:57:31.156360: step: 130/463, loss: 17.378379821777344 2023-01-23 23:57:31.800880: step: 132/463, loss: 15.738119125366211 2023-01-23 23:57:32.395203: step: 134/463, loss: 13.53643798828125 2023-01-23 23:57:33.022282: step: 136/463, loss: 6.586299896240234 2023-01-23 23:57:33.639614: step: 138/463, loss: 5.258662223815918 2023-01-23 23:57:34.430473: step: 140/463, loss: 4.619546413421631 2023-01-23 23:57:35.051489: step: 142/463, loss: 15.715588569641113 2023-01-23 23:57:35.688596: step: 144/463, loss: 4.4310431480407715 2023-01-23 23:57:36.382931: step: 146/463, loss: 23.154541015625 2023-01-23 23:57:37.098332: step: 148/463, loss: 9.951217651367188 2023-01-23 23:57:37.774110: step: 150/463, loss: 8.590143203735352 2023-01-23 23:57:38.393738: step: 152/463, loss: 9.71445369720459 2023-01-23 23:57:39.037054: step: 154/463, loss: 8.005531311035156 2023-01-23 23:57:39.679903: step: 156/463, loss: 13.424943923950195 2023-01-23 23:57:40.275367: step: 158/463, loss: 7.106303691864014 2023-01-23 23:57:40.926508: step: 160/463, loss: 8.364706993103027 2023-01-23 23:57:41.566062: step: 162/463, loss: 8.953907012939453 2023-01-23 23:57:42.155302: step: 164/463, loss: 3.3302693367004395 2023-01-23 23:57:42.823650: step: 166/463, loss: 16.106121063232422 2023-01-23 23:57:43.462792: step: 168/463, loss: 5.725656986236572 2023-01-23 23:57:44.129837: step: 170/463, loss: 6.649268627166748 2023-01-23 23:57:44.730197: step: 172/463, loss: 9.520486831665039 2023-01-23 23:57:45.310358: step: 174/463, loss: 8.203156471252441 2023-01-23 23:57:45.944708: step: 176/463, loss: 11.545543670654297 2023-01-23 23:57:46.642370: step: 178/463, loss: 4.302837371826172 2023-01-23 23:57:47.271687: step: 180/463, loss: 4.450910568237305 2023-01-23 23:57:47.809022: step: 182/463, loss: 6.10361909866333 2023-01-23 23:57:48.388049: step: 184/463, loss: 8.916027069091797 2023-01-23 23:57:49.026693: step: 186/463, loss: 8.522967338562012 2023-01-23 23:57:49.699308: step: 188/463, loss: 8.79146671295166 2023-01-23 23:57:50.317452: step: 190/463, loss: 8.949901580810547 2023-01-23 23:57:50.898272: step: 192/463, loss: 8.222185134887695 2023-01-23 23:57:51.485424: step: 194/463, loss: 4.042442321777344 2023-01-23 23:57:52.094013: step: 196/463, loss: 17.776769638061523 2023-01-23 23:57:52.731786: step: 198/463, loss: 5.129105091094971 2023-01-23 23:57:53.362483: step: 200/463, loss: 8.442646980285645 2023-01-23 23:57:53.948979: step: 202/463, loss: 7.674983501434326 2023-01-23 23:57:54.558102: step: 204/463, loss: 15.125864028930664 2023-01-23 23:57:55.218652: step: 206/463, loss: 8.169317245483398 2023-01-23 23:57:55.879505: step: 208/463, loss: 10.881017684936523 2023-01-23 23:57:56.508885: step: 210/463, loss: 8.816551208496094 2023-01-23 23:57:57.103373: step: 212/463, loss: 10.203678131103516 2023-01-23 23:57:57.613223: step: 214/463, loss: 7.142978191375732 2023-01-23 23:57:58.207392: step: 216/463, loss: 5.470901966094971 2023-01-23 23:57:58.827806: step: 218/463, loss: 5.4315619468688965 2023-01-23 23:57:59.435686: step: 220/463, loss: 12.305489540100098 2023-01-23 23:58:00.029898: step: 222/463, loss: 9.573036193847656 2023-01-23 23:58:00.596423: step: 224/463, loss: 4.671309947967529 2023-01-23 23:58:01.257753: step: 226/463, loss: 14.168991088867188 2023-01-23 23:58:01.832059: step: 228/463, loss: 3.745619535446167 2023-01-23 23:58:02.447518: step: 230/463, loss: 12.521639823913574 2023-01-23 23:58:03.056396: step: 232/463, loss: 5.382946968078613 2023-01-23 23:58:03.655003: step: 234/463, loss: 2.7595434188842773 2023-01-23 23:58:04.264999: step: 236/463, loss: 11.03259563446045 2023-01-23 23:58:04.818537: step: 238/463, loss: 5.2375407218933105 2023-01-23 23:58:05.442725: step: 240/463, loss: 6.666964530944824 2023-01-23 23:58:06.056152: step: 242/463, loss: 8.853500366210938 2023-01-23 23:58:06.696701: step: 244/463, loss: 11.538705825805664 2023-01-23 23:58:07.390538: step: 246/463, loss: 4.874959945678711 2023-01-23 23:58:08.005214: step: 248/463, loss: 2.589587688446045 2023-01-23 23:58:08.622560: step: 250/463, loss: 5.342429161071777 2023-01-23 23:58:09.334485: step: 252/463, loss: 14.176994323730469 2023-01-23 23:58:09.983483: step: 254/463, loss: 6.511935234069824 2023-01-23 23:58:10.643889: step: 256/463, loss: 7.1047844886779785 2023-01-23 23:58:11.283175: step: 258/463, loss: 6.963000774383545 2023-01-23 23:58:11.906783: step: 260/463, loss: 6.797969341278076 2023-01-23 23:58:12.479343: step: 262/463, loss: 9.047029495239258 2023-01-23 23:58:13.013127: step: 264/463, loss: 4.434392929077148 2023-01-23 23:58:13.593438: step: 266/463, loss: 13.79139232635498 2023-01-23 23:58:14.235948: step: 268/463, loss: 8.185296058654785 2023-01-23 23:58:14.896495: step: 270/463, loss: 6.778559684753418 2023-01-23 23:58:15.517296: step: 272/463, loss: 11.998456954956055 2023-01-23 23:58:16.120257: step: 274/463, loss: 8.637171745300293 2023-01-23 23:58:16.712120: step: 276/463, loss: 2.9375 2023-01-23 23:58:17.370937: step: 278/463, loss: 8.361226081848145 2023-01-23 23:58:17.994789: step: 280/463, loss: 2.1594314575195312 2023-01-23 23:58:18.555927: step: 282/463, loss: 4.514021396636963 2023-01-23 23:58:19.126866: step: 284/463, loss: 18.290882110595703 2023-01-23 23:58:19.709750: step: 286/463, loss: 6.228346824645996 2023-01-23 23:58:20.357677: step: 288/463, loss: 6.313811302185059 2023-01-23 23:58:21.010016: step: 290/463, loss: 9.055527687072754 2023-01-23 23:58:21.638192: step: 292/463, loss: 21.097455978393555 2023-01-23 23:58:22.257617: step: 294/463, loss: 9.765172004699707 2023-01-23 23:58:22.835565: step: 296/463, loss: 9.060818672180176 2023-01-23 23:58:23.490524: step: 298/463, loss: 6.205001354217529 2023-01-23 23:58:24.138205: step: 300/463, loss: 14.02237319946289 2023-01-23 23:58:24.686831: step: 302/463, loss: 6.1169891357421875 2023-01-23 23:58:25.271622: step: 304/463, loss: 5.814948081970215 2023-01-23 23:58:25.976542: step: 306/463, loss: 4.788305282592773 2023-01-23 23:58:26.597343: step: 308/463, loss: 7.753364562988281 2023-01-23 23:58:27.167684: step: 310/463, loss: 3.05171275138855 2023-01-23 23:58:27.796158: step: 312/463, loss: 6.9203267097473145 2023-01-23 23:58:28.414977: step: 314/463, loss: 6.376954078674316 2023-01-23 23:58:29.056480: step: 316/463, loss: 16.378690719604492 2023-01-23 23:58:29.621518: step: 318/463, loss: 12.88469123840332 2023-01-23 23:58:30.234655: step: 320/463, loss: 2.2646515369415283 2023-01-23 23:58:30.894433: step: 322/463, loss: 9.245916366577148 2023-01-23 23:58:31.501577: step: 324/463, loss: 3.8333253860473633 2023-01-23 23:58:32.132499: step: 326/463, loss: 14.657800674438477 2023-01-23 23:58:32.766727: step: 328/463, loss: 5.927628040313721 2023-01-23 23:58:33.398938: step: 330/463, loss: 4.4432477951049805 2023-01-23 23:58:33.943055: step: 332/463, loss: 6.40554141998291 2023-01-23 23:58:34.582811: step: 334/463, loss: 4.330059051513672 2023-01-23 23:58:35.185503: step: 336/463, loss: 10.429497718811035 2023-01-23 23:58:35.831269: step: 338/463, loss: 10.983132362365723 2023-01-23 23:58:36.470334: step: 340/463, loss: 7.257326602935791 2023-01-23 23:58:37.113481: step: 342/463, loss: 7.895818710327148 2023-01-23 23:58:37.728632: step: 344/463, loss: 10.706269264221191 2023-01-23 23:58:38.372815: step: 346/463, loss: 5.919206619262695 2023-01-23 23:58:38.955289: step: 348/463, loss: 5.22928524017334 2023-01-23 23:58:39.535558: step: 350/463, loss: 2.5848395824432373 2023-01-23 23:58:40.251464: step: 352/463, loss: 10.033931732177734 2023-01-23 23:58:40.951026: step: 354/463, loss: 4.048956394195557 2023-01-23 23:58:41.516949: step: 356/463, loss: 4.320642471313477 2023-01-23 23:58:42.152537: step: 358/463, loss: 8.659322738647461 2023-01-23 23:58:42.762972: step: 360/463, loss: 2.1166248321533203 2023-01-23 23:58:43.395578: step: 362/463, loss: 5.4784088134765625 2023-01-23 23:58:44.051235: step: 364/463, loss: 7.528636455535889 2023-01-23 23:58:44.639278: step: 366/463, loss: 2.7859270572662354 2023-01-23 23:58:45.283995: step: 368/463, loss: 4.643281936645508 2023-01-23 23:58:45.913266: step: 370/463, loss: 3.4890964031219482 2023-01-23 23:58:46.764963: step: 372/463, loss: 7.304950714111328 2023-01-23 23:58:47.450187: step: 374/463, loss: 5.0774688720703125 2023-01-23 23:58:48.063558: step: 376/463, loss: 1.6356875896453857 2023-01-23 23:58:48.745542: step: 378/463, loss: 4.603309154510498 2023-01-23 23:58:49.401823: step: 380/463, loss: 1.0375280380249023 2023-01-23 23:58:50.002602: step: 382/463, loss: 9.787210464477539 2023-01-23 23:58:50.659127: step: 384/463, loss: 2.499837636947632 2023-01-23 23:58:51.347240: step: 386/463, loss: 4.796648979187012 2023-01-23 23:58:52.009975: step: 388/463, loss: 1.0846575498580933 2023-01-23 23:58:52.679999: step: 390/463, loss: 9.04771900177002 2023-01-23 23:58:53.304876: step: 392/463, loss: 5.604578018188477 2023-01-23 23:58:53.897773: step: 394/463, loss: 5.01338005065918 2023-01-23 23:58:54.495745: step: 396/463, loss: 4.743184566497803 2023-01-23 23:58:55.144781: step: 398/463, loss: 3.104649543762207 2023-01-23 23:58:55.739823: step: 400/463, loss: 3.990280866622925 2023-01-23 23:58:56.495383: step: 402/463, loss: 1.2699460983276367 2023-01-23 23:58:57.055336: step: 404/463, loss: 1.7220298051834106 2023-01-23 23:58:57.634838: step: 406/463, loss: 2.5336601734161377 2023-01-23 23:58:58.260220: step: 408/463, loss: 2.748413324356079 2023-01-23 23:58:58.799605: step: 410/463, loss: 0.7016493082046509 2023-01-23 23:58:59.376406: step: 412/463, loss: 1.9688310623168945 2023-01-23 23:59:00.020302: step: 414/463, loss: 2.25248122215271 2023-01-23 23:59:00.638471: step: 416/463, loss: 1.93528151512146 2023-01-23 23:59:01.238470: step: 418/463, loss: 8.215465545654297 2023-01-23 23:59:01.820561: step: 420/463, loss: 1.888968586921692 2023-01-23 23:59:02.531933: step: 422/463, loss: 2.4234135150909424 2023-01-23 23:59:03.178615: step: 424/463, loss: 6.287452697753906 2023-01-23 23:59:03.828775: step: 426/463, loss: 2.3424036502838135 2023-01-23 23:59:04.372555: step: 428/463, loss: 1.1790132522583008 2023-01-23 23:59:04.955708: step: 430/463, loss: 1.5993249416351318 2023-01-23 23:59:05.525784: step: 432/463, loss: 3.9855756759643555 2023-01-23 23:59:06.150969: step: 434/463, loss: 2.207996129989624 2023-01-23 23:59:06.844631: step: 436/463, loss: 2.2234787940979004 2023-01-23 23:59:07.611599: step: 438/463, loss: 5.171801567077637 2023-01-23 23:59:08.249853: step: 440/463, loss: 2.545515537261963 2023-01-23 23:59:08.961954: step: 442/463, loss: 1.1477330923080444 2023-01-23 23:59:09.579001: step: 444/463, loss: 1.6340665817260742 2023-01-23 23:59:10.175364: step: 446/463, loss: 9.209417343139648 2023-01-23 23:59:10.822318: step: 448/463, loss: 3.4987401962280273 2023-01-23 23:59:11.446978: step: 450/463, loss: 1.4805747270584106 2023-01-23 23:59:12.082185: step: 452/463, loss: 3.5288729667663574 2023-01-23 23:59:12.726765: step: 454/463, loss: 1.6774455308914185 2023-01-23 23:59:13.269132: step: 456/463, loss: 2.0962767601013184 2023-01-23 23:59:13.914835: step: 458/463, loss: 2.5396194458007812 2023-01-23 23:59:14.556704: step: 460/463, loss: 2.4079654216766357 2023-01-23 23:59:15.254859: step: 462/463, loss: 6.29324197769165 2023-01-23 23:59:15.916587: step: 464/463, loss: 0.45623210072517395 2023-01-23 23:59:16.551631: step: 466/463, loss: 1.614783763885498 2023-01-23 23:59:17.207640: step: 468/463, loss: 5.5748395919799805 2023-01-23 23:59:17.754085: step: 470/463, loss: 1.3125004768371582 2023-01-23 23:59:18.422779: step: 472/463, loss: 5.226147651672363 2023-01-23 23:59:19.120019: step: 474/463, loss: 0.5349715948104858 2023-01-23 23:59:19.737796: step: 476/463, loss: 1.50754976272583 2023-01-23 23:59:20.318636: step: 478/463, loss: 0.5347693562507629 2023-01-23 23:59:20.898977: step: 480/463, loss: 0.5113075971603394 2023-01-23 23:59:21.557387: step: 482/463, loss: 9.503129959106445 2023-01-23 23:59:22.213323: step: 484/463, loss: 2.62331485748291 2023-01-23 23:59:22.832771: step: 486/463, loss: 3.452773094177246 2023-01-23 23:59:23.444818: step: 488/463, loss: 2.027191162109375 2023-01-23 23:59:24.037282: step: 490/463, loss: 1.7128016948699951 2023-01-23 23:59:24.614826: step: 492/463, loss: 4.352555274963379 2023-01-23 23:59:25.192525: step: 494/463, loss: 1.2832002639770508 2023-01-23 23:59:25.815844: step: 496/463, loss: 9.903707504272461 2023-01-23 23:59:26.460529: step: 498/463, loss: 5.700617790222168 2023-01-23 23:59:27.117706: step: 500/463, loss: 1.1571046113967896 2023-01-23 23:59:27.794627: step: 502/463, loss: 0.5186383128166199 2023-01-23 23:59:28.424207: step: 504/463, loss: 0.4617786407470703 2023-01-23 23:59:29.071018: step: 506/463, loss: 1.0404834747314453 2023-01-23 23:59:29.767931: step: 508/463, loss: 2.2752439975738525 2023-01-23 23:59:30.462699: step: 510/463, loss: 1.2502347230911255 2023-01-23 23:59:31.169850: step: 512/463, loss: 2.931732177734375 2023-01-23 23:59:31.843974: step: 514/463, loss: 4.332589626312256 2023-01-23 23:59:32.483400: step: 516/463, loss: 9.145713806152344 2023-01-23 23:59:33.115000: step: 518/463, loss: 1.687113642692566 2023-01-23 23:59:33.750620: step: 520/463, loss: 4.266299247741699 2023-01-23 23:59:34.390483: step: 522/463, loss: 1.3389843702316284 2023-01-23 23:59:35.066496: step: 524/463, loss: 4.764490604400635 2023-01-23 23:59:35.688983: step: 526/463, loss: 4.347450256347656 2023-01-23 23:59:36.330124: step: 528/463, loss: 5.252623081207275 2023-01-23 23:59:36.939415: step: 530/463, loss: 0.6236923933029175 2023-01-23 23:59:37.552939: step: 532/463, loss: 2.587319850921631 2023-01-23 23:59:38.196754: step: 534/463, loss: 3.3823764324188232 2023-01-23 23:59:38.811798: step: 536/463, loss: 1.5561316013336182 2023-01-23 23:59:39.430197: step: 538/463, loss: 2.1903076171875 2023-01-23 23:59:40.053253: step: 540/463, loss: 1.647963523864746 2023-01-23 23:59:40.609576: step: 542/463, loss: 1.153571367263794 2023-01-23 23:59:41.250395: step: 544/463, loss: 1.7215423583984375 2023-01-23 23:59:41.835928: step: 546/463, loss: 2.0538575649261475 2023-01-23 23:59:42.455014: step: 548/463, loss: 1.6106829643249512 2023-01-23 23:59:43.082519: step: 550/463, loss: 1.2971807718276978 2023-01-23 23:59:43.778084: step: 552/463, loss: 5.6439619064331055 2023-01-23 23:59:44.353025: step: 554/463, loss: 2.8362927436828613 2023-01-23 23:59:45.006733: step: 556/463, loss: 2.1298794746398926 2023-01-23 23:59:45.638925: step: 558/463, loss: 1.85184907913208 2023-01-23 23:59:46.293324: step: 560/463, loss: 4.955857276916504 2023-01-23 23:59:46.956440: step: 562/463, loss: 2.6816630363464355 2023-01-23 23:59:47.504417: step: 564/463, loss: 1.0311378240585327 2023-01-23 23:59:48.130818: step: 566/463, loss: 1.1304336786270142 2023-01-23 23:59:48.737081: step: 568/463, loss: 1.2872354984283447 2023-01-23 23:59:49.381642: step: 570/463, loss: 0.9479402899742126 2023-01-23 23:59:50.055922: step: 572/463, loss: 3.5955305099487305 2023-01-23 23:59:50.713941: step: 574/463, loss: 17.737064361572266 2023-01-23 23:59:51.366495: step: 576/463, loss: 0.387450248003006 2023-01-23 23:59:52.017584: step: 578/463, loss: 2.014277696609497 2023-01-23 23:59:52.611095: step: 580/463, loss: 0.8113519549369812 2023-01-23 23:59:53.215584: step: 582/463, loss: 1.2146477699279785 2023-01-23 23:59:53.805377: step: 584/463, loss: 1.0005701780319214 2023-01-23 23:59:54.430418: step: 586/463, loss: 3.106804132461548 2023-01-23 23:59:55.130958: step: 588/463, loss: 1.2648484706878662 2023-01-23 23:59:55.773769: step: 590/463, loss: 1.6307803392410278 2023-01-23 23:59:56.343624: step: 592/463, loss: 0.7805302739143372 2023-01-23 23:59:56.979596: step: 594/463, loss: 2.2618353366851807 2023-01-23 23:59:57.632420: step: 596/463, loss: 1.5117416381835938 2023-01-23 23:59:58.326283: step: 598/463, loss: 0.5041603446006775 2023-01-23 23:59:58.982279: step: 600/463, loss: 1.2836923599243164 2023-01-23 23:59:59.559362: step: 602/463, loss: 1.596165657043457 2023-01-24 00:00:00.279250: step: 604/463, loss: 2.5285661220550537 2023-01-24 00:00:00.964193: step: 606/463, loss: 0.4201674461364746 2023-01-24 00:00:01.563514: step: 608/463, loss: 1.2373557090759277 2023-01-24 00:00:02.220809: step: 610/463, loss: 7.557168960571289 2023-01-24 00:00:02.817703: step: 612/463, loss: 1.9671400785446167 2023-01-24 00:00:03.535138: step: 614/463, loss: 1.1954658031463623 2023-01-24 00:00:04.146200: step: 616/463, loss: 0.8409519791603088 2023-01-24 00:00:04.853820: step: 618/463, loss: 1.5823054313659668 2023-01-24 00:00:05.517131: step: 620/463, loss: 0.37346699833869934 2023-01-24 00:00:06.270083: step: 622/463, loss: 1.5253220796585083 2023-01-24 00:00:06.875955: step: 624/463, loss: 2.8737540245056152 2023-01-24 00:00:07.552686: step: 626/463, loss: 1.2973134517669678 2023-01-24 00:00:08.143213: step: 628/463, loss: 1.4284980297088623 2023-01-24 00:00:08.797255: step: 630/463, loss: 2.9359917640686035 2023-01-24 00:00:09.489446: step: 632/463, loss: 4.4256815910339355 2023-01-24 00:00:10.077421: step: 634/463, loss: 1.11931574344635 2023-01-24 00:00:10.715632: step: 636/463, loss: 7.506770133972168 2023-01-24 00:00:11.317575: step: 638/463, loss: 0.7412253618240356 2023-01-24 00:00:11.971508: step: 640/463, loss: 4.581730842590332 2023-01-24 00:00:12.601210: step: 642/463, loss: 1.089477777481079 2023-01-24 00:00:13.195068: step: 644/463, loss: 1.1849415302276611 2023-01-24 00:00:13.847904: step: 646/463, loss: 1.2748390436172485 2023-01-24 00:00:14.480654: step: 648/463, loss: 0.7718403339385986 2023-01-24 00:00:15.141782: step: 650/463, loss: 0.7421875596046448 2023-01-24 00:00:15.765277: step: 652/463, loss: 1.2299742698669434 2023-01-24 00:00:16.465913: step: 654/463, loss: 0.8530952334403992 2023-01-24 00:00:17.114274: step: 656/463, loss: 1.1307525634765625 2023-01-24 00:00:17.725416: step: 658/463, loss: 5.466414451599121 2023-01-24 00:00:18.371437: step: 660/463, loss: 8.384843826293945 2023-01-24 00:00:18.955292: step: 662/463, loss: 0.6394960880279541 2023-01-24 00:00:19.632826: step: 664/463, loss: 6.0730204582214355 2023-01-24 00:00:20.322417: step: 666/463, loss: 2.0828628540039062 2023-01-24 00:00:20.956876: step: 668/463, loss: 0.44130241870880127 2023-01-24 00:00:21.643702: step: 670/463, loss: 2.534024238586426 2023-01-24 00:00:22.180381: step: 672/463, loss: 2.019125461578369 2023-01-24 00:00:22.803556: step: 674/463, loss: 2.689220905303955 2023-01-24 00:00:23.479471: step: 676/463, loss: 3.6442699432373047 2023-01-24 00:00:24.171519: step: 678/463, loss: 4.048669815063477 2023-01-24 00:00:24.775470: step: 680/463, loss: 0.5916732549667358 2023-01-24 00:00:25.381016: step: 682/463, loss: 1.1659475564956665 2023-01-24 00:00:26.013655: step: 684/463, loss: 0.9714545607566833 2023-01-24 00:00:26.731176: step: 686/463, loss: 7.451725006103516 2023-01-24 00:00:27.330564: step: 688/463, loss: 3.6644105911254883 2023-01-24 00:00:27.929315: step: 690/463, loss: 0.7021387815475464 2023-01-24 00:00:28.543933: step: 692/463, loss: 4.626512050628662 2023-01-24 00:00:29.201817: step: 694/463, loss: 2.7556650638580322 2023-01-24 00:00:29.788740: step: 696/463, loss: 1.2383744716644287 2023-01-24 00:00:30.429782: step: 698/463, loss: 1.3536591529846191 2023-01-24 00:00:31.020512: step: 700/463, loss: 0.46849948167800903 2023-01-24 00:00:31.704209: step: 702/463, loss: 3.882406711578369 2023-01-24 00:00:32.341952: step: 704/463, loss: 1.7366465330123901 2023-01-24 00:00:33.040974: step: 706/463, loss: 0.6374361515045166 2023-01-24 00:00:33.620250: step: 708/463, loss: 1.248744010925293 2023-01-24 00:00:34.163026: step: 710/463, loss: 1.792894959449768 2023-01-24 00:00:34.770812: step: 712/463, loss: 2.7405524253845215 2023-01-24 00:00:35.408012: step: 714/463, loss: 6.932621955871582 2023-01-24 00:00:36.022107: step: 716/463, loss: 9.49526596069336 2023-01-24 00:00:36.638464: step: 718/463, loss: 1.006314992904663 2023-01-24 00:00:37.306099: step: 720/463, loss: 2.450564384460449 2023-01-24 00:00:37.930662: step: 722/463, loss: 3.009561061859131 2023-01-24 00:00:38.537064: step: 724/463, loss: 1.802248239517212 2023-01-24 00:00:39.137879: step: 726/463, loss: 0.5707215666770935 2023-01-24 00:00:39.718085: step: 728/463, loss: 1.8768552541732788 2023-01-24 00:00:40.324042: step: 730/463, loss: 2.089775323867798 2023-01-24 00:00:40.960590: step: 732/463, loss: 2.413895606994629 2023-01-24 00:00:41.547988: step: 734/463, loss: 1.5247026681900024 2023-01-24 00:00:42.182713: step: 736/463, loss: 1.6237387657165527 2023-01-24 00:00:42.830106: step: 738/463, loss: 1.133504867553711 2023-01-24 00:00:43.523512: step: 740/463, loss: 3.4109933376312256 2023-01-24 00:00:44.136227: step: 742/463, loss: 0.524638295173645 2023-01-24 00:00:44.771391: step: 744/463, loss: 1.4392170906066895 2023-01-24 00:00:45.402741: step: 746/463, loss: 0.466078519821167 2023-01-24 00:00:45.985531: step: 748/463, loss: 1.4921762943267822 2023-01-24 00:00:46.619724: step: 750/463, loss: 0.9424622058868408 2023-01-24 00:00:47.214920: step: 752/463, loss: 1.864139199256897 2023-01-24 00:00:47.839747: step: 754/463, loss: 1.7786788940429688 2023-01-24 00:00:48.485510: step: 756/463, loss: 0.4550171494483948 2023-01-24 00:00:49.080048: step: 758/463, loss: 0.8722974061965942 2023-01-24 00:00:49.656223: step: 760/463, loss: 1.4529714584350586 2023-01-24 00:00:50.229835: step: 762/463, loss: 1.4644681215286255 2023-01-24 00:00:50.860658: step: 764/463, loss: 0.9574808478355408 2023-01-24 00:00:51.502880: step: 766/463, loss: 1.4590986967086792 2023-01-24 00:00:52.121633: step: 768/463, loss: 1.6554760932922363 2023-01-24 00:00:52.742985: step: 770/463, loss: 0.768781304359436 2023-01-24 00:00:53.404145: step: 772/463, loss: 3.460744619369507 2023-01-24 00:00:54.054471: step: 774/463, loss: 5.019446849822998 2023-01-24 00:00:54.695851: step: 776/463, loss: 1.7518839836120605 2023-01-24 00:00:55.327026: step: 778/463, loss: 0.7793686985969543 2023-01-24 00:00:55.927867: step: 780/463, loss: 5.875916004180908 2023-01-24 00:00:56.473794: step: 782/463, loss: 3.337308168411255 2023-01-24 00:00:57.202377: step: 784/463, loss: 2.53031849861145 2023-01-24 00:00:57.867276: step: 786/463, loss: 0.6848602890968323 2023-01-24 00:00:58.419953: step: 788/463, loss: 1.8918633460998535 2023-01-24 00:00:59.051800: step: 790/463, loss: 2.227695941925049 2023-01-24 00:00:59.719920: step: 792/463, loss: 0.4642110764980316 2023-01-24 00:01:00.399008: step: 794/463, loss: 9.37708854675293 2023-01-24 00:01:01.042313: step: 796/463, loss: 6.848600387573242 2023-01-24 00:01:01.689616: step: 798/463, loss: 4.573422908782959 2023-01-24 00:01:02.285870: step: 800/463, loss: 1.1996718645095825 2023-01-24 00:01:02.966830: step: 802/463, loss: 1.856066346168518 2023-01-24 00:01:03.615218: step: 804/463, loss: 1.0040066242218018 2023-01-24 00:01:04.267341: step: 806/463, loss: 3.0978808403015137 2023-01-24 00:01:04.924143: step: 808/463, loss: 1.158308744430542 2023-01-24 00:01:05.546216: step: 810/463, loss: 2.584512710571289 2023-01-24 00:01:06.154113: step: 812/463, loss: 2.0479159355163574 2023-01-24 00:01:06.746704: step: 814/463, loss: 0.7239602208137512 2023-01-24 00:01:07.380958: step: 816/463, loss: 2.8600265979766846 2023-01-24 00:01:07.956640: step: 818/463, loss: 3.3635849952697754 2023-01-24 00:01:08.575855: step: 820/463, loss: 3.387791156768799 2023-01-24 00:01:09.230883: step: 822/463, loss: 2.9429941177368164 2023-01-24 00:01:09.876128: step: 824/463, loss: 0.7158240079879761 2023-01-24 00:01:10.480139: step: 826/463, loss: 1.6996464729309082 2023-01-24 00:01:11.095175: step: 828/463, loss: 1.3286644220352173 2023-01-24 00:01:11.735300: step: 830/463, loss: 1.5376002788543701 2023-01-24 00:01:12.317267: step: 832/463, loss: 15.429784774780273 2023-01-24 00:01:12.913877: step: 834/463, loss: 6.225384712219238 2023-01-24 00:01:13.561135: step: 836/463, loss: 0.42232662439346313 2023-01-24 00:01:14.202490: step: 838/463, loss: 0.8103923797607422 2023-01-24 00:01:14.801273: step: 840/463, loss: 5.468482971191406 2023-01-24 00:01:15.432679: step: 842/463, loss: 0.8557579517364502 2023-01-24 00:01:16.014693: step: 844/463, loss: 0.682935893535614 2023-01-24 00:01:16.590406: step: 846/463, loss: 1.533571720123291 2023-01-24 00:01:17.225365: step: 848/463, loss: 1.630355954170227 2023-01-24 00:01:17.779695: step: 850/463, loss: 1.689854621887207 2023-01-24 00:01:18.412928: step: 852/463, loss: 1.1244410276412964 2023-01-24 00:01:19.024681: step: 854/463, loss: 5.726593971252441 2023-01-24 00:01:19.699963: step: 856/463, loss: 2.025653839111328 2023-01-24 00:01:20.356759: step: 858/463, loss: 4.137726783752441 2023-01-24 00:01:20.992911: step: 860/463, loss: 8.090264320373535 2023-01-24 00:01:21.644181: step: 862/463, loss: 1.6618350744247437 2023-01-24 00:01:22.293893: step: 864/463, loss: 1.0811971426010132 2023-01-24 00:01:23.073865: step: 866/463, loss: 2.1779613494873047 2023-01-24 00:01:23.747850: step: 868/463, loss: 3.202235221862793 2023-01-24 00:01:24.473767: step: 870/463, loss: 2.495041608810425 2023-01-24 00:01:25.079087: step: 872/463, loss: 4.0412445068359375 2023-01-24 00:01:25.653812: step: 874/463, loss: 0.5530418753623962 2023-01-24 00:01:26.318886: step: 876/463, loss: 1.8525749444961548 2023-01-24 00:01:26.939842: step: 878/463, loss: 6.692317962646484 2023-01-24 00:01:27.565359: step: 880/463, loss: 1.459074854850769 2023-01-24 00:01:28.293400: step: 882/463, loss: 0.6565572023391724 2023-01-24 00:01:28.911096: step: 884/463, loss: 2.0492184162139893 2023-01-24 00:01:29.513542: step: 886/463, loss: 6.110342979431152 2023-01-24 00:01:30.141724: step: 888/463, loss: 0.8213084936141968 2023-01-24 00:01:30.821027: step: 890/463, loss: 1.5049846172332764 2023-01-24 00:01:31.478386: step: 892/463, loss: 2.3778645992279053 2023-01-24 00:01:32.103154: step: 894/463, loss: 2.0398316383361816 2023-01-24 00:01:32.728609: step: 896/463, loss: 3.0210647583007812 2023-01-24 00:01:33.359878: step: 898/463, loss: 1.910044550895691 2023-01-24 00:01:34.017891: step: 900/463, loss: 2.312530994415283 2023-01-24 00:01:34.580781: step: 902/463, loss: 2.947416305541992 2023-01-24 00:01:35.218188: step: 904/463, loss: 2.189818859100342 2023-01-24 00:01:35.825271: step: 906/463, loss: 1.1345771551132202 2023-01-24 00:01:36.426246: step: 908/463, loss: 1.2360376119613647 2023-01-24 00:01:37.037637: step: 910/463, loss: 1.3576138019561768 2023-01-24 00:01:37.659429: step: 912/463, loss: 2.997972011566162 2023-01-24 00:01:38.261527: step: 914/463, loss: 0.937995195388794 2023-01-24 00:01:38.910396: step: 916/463, loss: 4.045863628387451 2023-01-24 00:01:39.561453: step: 918/463, loss: 2.332854747772217 2023-01-24 00:01:40.168097: step: 920/463, loss: 1.0578027963638306 2023-01-24 00:01:40.850253: step: 922/463, loss: 3.0752735137939453 2023-01-24 00:01:41.505297: step: 924/463, loss: 8.361388206481934 2023-01-24 00:01:42.195658: step: 926/463, loss: 2.755699634552002 ================================================== Loss: 5.605 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.37850467289719625, 'r': 0.07655954631379962, 'f1': 0.12735849056603774}, 'combined': 0.09384309831181728, 'epoch': 0} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3574561403508772, 'r': 0.03711293260473588, 'f1': 0.06724422442244225}, 'combined': 0.052122317494907394, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34733893557422973, 'r': 0.07813484562066793, 'f1': 0.12757201646090535}, 'combined': 0.09400043318171973, 'epoch': 0} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3389830508474576, 'r': 0.03642987249544627, 'f1': 0.06578947368421052}, 'combined': 0.0509947116595316, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3571120689655172, 'r': 0.07830812854442344, 'f1': 0.12844961240310077}, 'combined': 0.0946470828233374, 'epoch': 0} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.334070796460177, 'r': 0.034380692167577415, 'f1': 0.06234516928158546}, 'combined': 0.0483249637493629, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.14285714285714285, 'r': 0.02857142857142857, 'f1': 0.04761904761904761}, 'combined': 0.03174603174603174, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35, 'r': 0.0603448275862069, 'f1': 0.10294117647058824}, 'combined': 0.06862745098039216, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.37850467289719625, 'r': 0.07655954631379962, 'f1': 0.12735849056603774}, 'combined': 0.09384309831181728, 'epoch': 0} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3574561403508772, 'r': 0.03711293260473588, 'f1': 0.06724422442244225}, 'combined': 0.052122317494907394, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.14285714285714285, 'r': 0.02857142857142857, 'f1': 0.04761904761904761}, 'combined': 0.03174603174603174, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34733893557422973, 'r': 0.07813484562066793, 'f1': 0.12757201646090535}, 'combined': 0.09400043318171973, 'epoch': 0} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3389830508474576, 'r': 0.03642987249544627, 'f1': 0.06578947368421052}, 'combined': 0.0509947116595316, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3571120689655172, 'r': 0.07830812854442344, 'f1': 0.12844961240310077}, 'combined': 0.0946470828233374, 'epoch': 0} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.334070796460177, 'r': 0.034380692167577415, 'f1': 0.06234516928158546}, 'combined': 0.0483249637493629, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35, 'r': 0.0603448275862069, 'f1': 0.10294117647058824}, 'combined': 0.06862745098039216, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:04:26.845162: step: 2/463, loss: 0.6685802340507507 2023-01-24 00:04:27.487337: step: 4/463, loss: 3.815256357192993 2023-01-24 00:04:28.130697: step: 6/463, loss: 5.122040271759033 2023-01-24 00:04:28.809108: step: 8/463, loss: 0.5858937501907349 2023-01-24 00:04:29.470353: step: 10/463, loss: 3.097500801086426 2023-01-24 00:04:30.058577: step: 12/463, loss: 2.622636556625366 2023-01-24 00:04:30.681381: step: 14/463, loss: 0.7561248540878296 2023-01-24 00:04:31.283494: step: 16/463, loss: 4.578721523284912 2023-01-24 00:04:31.975708: step: 18/463, loss: 1.684999704360962 2023-01-24 00:04:32.656040: step: 20/463, loss: 1.2447631359100342 2023-01-24 00:04:33.198432: step: 22/463, loss: 2.5334525108337402 2023-01-24 00:04:33.844709: step: 24/463, loss: 5.395010948181152 2023-01-24 00:04:34.437852: step: 26/463, loss: 0.6044031381607056 2023-01-24 00:04:35.024776: step: 28/463, loss: 2.5535888671875 2023-01-24 00:04:35.649890: step: 30/463, loss: 2.1872472763061523 2023-01-24 00:04:36.256014: step: 32/463, loss: 1.477719783782959 2023-01-24 00:04:36.888264: step: 34/463, loss: 1.7267385721206665 2023-01-24 00:04:37.580062: step: 36/463, loss: 1.0844117403030396 2023-01-24 00:04:38.195609: step: 38/463, loss: 0.5043696165084839 2023-01-24 00:04:38.834924: step: 40/463, loss: 0.7670609951019287 2023-01-24 00:04:39.505491: step: 42/463, loss: 1.944401741027832 2023-01-24 00:04:40.122236: step: 44/463, loss: 1.4977705478668213 2023-01-24 00:04:40.766622: step: 46/463, loss: 5.696686267852783 2023-01-24 00:04:41.365162: step: 48/463, loss: 1.3773748874664307 2023-01-24 00:04:42.004985: step: 50/463, loss: 1.2749525308609009 2023-01-24 00:04:42.623335: step: 52/463, loss: 1.9650002717971802 2023-01-24 00:04:43.267977: step: 54/463, loss: 9.730448722839355 2023-01-24 00:04:43.975108: step: 56/463, loss: 2.6587979793548584 2023-01-24 00:04:44.558684: step: 58/463, loss: 1.1504578590393066 2023-01-24 00:04:45.220799: step: 60/463, loss: 2.79347562789917 2023-01-24 00:04:45.860294: step: 62/463, loss: 4.746162414550781 2023-01-24 00:04:46.487596: step: 64/463, loss: 6.020909786224365 2023-01-24 00:04:47.116080: step: 66/463, loss: 4.420544624328613 2023-01-24 00:04:47.674589: step: 68/463, loss: 0.9461444616317749 2023-01-24 00:04:48.264357: step: 70/463, loss: 4.956018447875977 2023-01-24 00:04:48.912611: step: 72/463, loss: 6.183978080749512 2023-01-24 00:04:49.519732: step: 74/463, loss: 3.3069844245910645 2023-01-24 00:04:50.143981: step: 76/463, loss: 0.9955776929855347 2023-01-24 00:04:50.772224: step: 78/463, loss: 4.352029800415039 2023-01-24 00:04:51.348858: step: 80/463, loss: 0.9474515914916992 2023-01-24 00:04:51.934103: step: 82/463, loss: 1.5701227188110352 2023-01-24 00:04:52.532187: step: 84/463, loss: 2.8134660720825195 2023-01-24 00:04:53.135661: step: 86/463, loss: 7.871100425720215 2023-01-24 00:04:53.757514: step: 88/463, loss: 2.4787893295288086 2023-01-24 00:04:54.338614: step: 90/463, loss: 2.772219181060791 2023-01-24 00:04:55.002174: step: 92/463, loss: 3.449888229370117 2023-01-24 00:04:55.549123: step: 94/463, loss: 3.2907216548919678 2023-01-24 00:04:56.192794: step: 96/463, loss: 1.6990973949432373 2023-01-24 00:04:56.853379: step: 98/463, loss: 2.050144910812378 2023-01-24 00:04:57.501840: step: 100/463, loss: 1.248523235321045 2023-01-24 00:04:58.078876: step: 102/463, loss: 1.1855612993240356 2023-01-24 00:04:58.665914: step: 104/463, loss: 1.0870994329452515 2023-01-24 00:04:59.247980: step: 106/463, loss: 2.6335551738739014 2023-01-24 00:04:59.921704: step: 108/463, loss: 1.172735571861267 2023-01-24 00:05:00.544673: step: 110/463, loss: 1.4004729986190796 2023-01-24 00:05:01.196539: step: 112/463, loss: 0.9719650149345398 2023-01-24 00:05:01.926909: step: 114/463, loss: 1.6797430515289307 2023-01-24 00:05:02.534279: step: 116/463, loss: 3.8506879806518555 2023-01-24 00:05:03.213927: step: 118/463, loss: 0.6834520697593689 2023-01-24 00:05:03.863053: step: 120/463, loss: 0.8853448629379272 2023-01-24 00:05:04.516224: step: 122/463, loss: 5.188709735870361 2023-01-24 00:05:05.274871: step: 124/463, loss: 1.7113059759140015 2023-01-24 00:05:05.910963: step: 126/463, loss: 1.7622789144515991 2023-01-24 00:05:06.478410: step: 128/463, loss: 0.5963531732559204 2023-01-24 00:05:07.099733: step: 130/463, loss: 0.6164882183074951 2023-01-24 00:05:07.633070: step: 132/463, loss: 0.5139610767364502 2023-01-24 00:05:08.268325: step: 134/463, loss: 6.962419509887695 2023-01-24 00:05:08.919409: step: 136/463, loss: 6.858845233917236 2023-01-24 00:05:09.555105: step: 138/463, loss: 0.9637402296066284 2023-01-24 00:05:10.146980: step: 140/463, loss: 1.5021545886993408 2023-01-24 00:05:10.702470: step: 142/463, loss: 11.442215919494629 2023-01-24 00:05:11.284496: step: 144/463, loss: 1.0192979574203491 2023-01-24 00:05:11.900855: step: 146/463, loss: 0.937065601348877 2023-01-24 00:05:12.529151: step: 148/463, loss: 0.5950444936752319 2023-01-24 00:05:13.178540: step: 150/463, loss: 2.0465996265411377 2023-01-24 00:05:13.833498: step: 152/463, loss: 1.736748456954956 2023-01-24 00:05:14.505086: step: 154/463, loss: 1.2617604732513428 2023-01-24 00:05:15.137091: step: 156/463, loss: 0.35340410470962524 2023-01-24 00:05:15.706637: step: 158/463, loss: 6.960509300231934 2023-01-24 00:05:16.348497: step: 160/463, loss: 1.908158302307129 2023-01-24 00:05:17.010361: step: 162/463, loss: 0.7639504671096802 2023-01-24 00:05:17.714720: step: 164/463, loss: 0.793492317199707 2023-01-24 00:05:18.354039: step: 166/463, loss: 0.5547686815261841 2023-01-24 00:05:18.922300: step: 168/463, loss: 1.7488555908203125 2023-01-24 00:05:19.577717: step: 170/463, loss: 1.4251786470413208 2023-01-24 00:05:20.278930: step: 172/463, loss: 1.2899792194366455 2023-01-24 00:05:20.880596: step: 174/463, loss: 1.2221816778182983 2023-01-24 00:05:21.546152: step: 176/463, loss: 1.1296358108520508 2023-01-24 00:05:22.173473: step: 178/463, loss: 0.9121953248977661 2023-01-24 00:05:22.768079: step: 180/463, loss: 4.790550708770752 2023-01-24 00:05:23.417119: step: 182/463, loss: 0.37892529368400574 2023-01-24 00:05:24.106560: step: 184/463, loss: 1.215133786201477 2023-01-24 00:05:24.716909: step: 186/463, loss: 2.994995355606079 2023-01-24 00:05:25.321040: step: 188/463, loss: 1.41506826877594 2023-01-24 00:05:25.925807: step: 190/463, loss: 1.048855185508728 2023-01-24 00:05:26.560254: step: 192/463, loss: 0.6058880090713501 2023-01-24 00:05:27.205643: step: 194/463, loss: 3.981769561767578 2023-01-24 00:05:27.739042: step: 196/463, loss: 0.8736894726753235 2023-01-24 00:05:28.355508: step: 198/463, loss: 0.5024668574333191 2023-01-24 00:05:28.994399: step: 200/463, loss: 4.32876443862915 2023-01-24 00:05:29.621315: step: 202/463, loss: 0.6034828424453735 2023-01-24 00:05:30.248511: step: 204/463, loss: 0.7853680849075317 2023-01-24 00:05:30.841719: step: 206/463, loss: 2.123030185699463 2023-01-24 00:05:31.458286: step: 208/463, loss: 0.46757593750953674 2023-01-24 00:05:32.068941: step: 210/463, loss: 13.134532928466797 2023-01-24 00:05:32.731900: step: 212/463, loss: 0.7102795839309692 2023-01-24 00:05:33.317444: step: 214/463, loss: 1.3386493921279907 2023-01-24 00:05:33.966984: step: 216/463, loss: 0.4966233968734741 2023-01-24 00:05:34.613856: step: 218/463, loss: 0.7630448341369629 2023-01-24 00:05:35.299070: step: 220/463, loss: 2.044830322265625 2023-01-24 00:05:35.953709: step: 222/463, loss: 5.183900833129883 2023-01-24 00:05:36.560853: step: 224/463, loss: 4.236065864562988 2023-01-24 00:05:37.182338: step: 226/463, loss: 0.9535512924194336 2023-01-24 00:05:37.822136: step: 228/463, loss: 8.569318771362305 2023-01-24 00:05:38.446474: step: 230/463, loss: 0.725615382194519 2023-01-24 00:05:39.041016: step: 232/463, loss: 1.22381591796875 2023-01-24 00:05:39.668017: step: 234/463, loss: 3.0906450748443604 2023-01-24 00:05:40.345532: step: 236/463, loss: 1.2112234830856323 2023-01-24 00:05:40.911238: step: 238/463, loss: 0.6695554852485657 2023-01-24 00:05:41.557269: step: 240/463, loss: 1.61887526512146 2023-01-24 00:05:42.139577: step: 242/463, loss: 2.4645869731903076 2023-01-24 00:05:42.724540: step: 244/463, loss: 0.8892167806625366 2023-01-24 00:05:43.324214: step: 246/463, loss: 2.0332322120666504 2023-01-24 00:05:43.983083: step: 248/463, loss: 2.7730774879455566 2023-01-24 00:05:44.597911: step: 250/463, loss: 2.852320909500122 2023-01-24 00:05:45.252318: step: 252/463, loss: 2.1586720943450928 2023-01-24 00:05:45.893606: step: 254/463, loss: 2.050269842147827 2023-01-24 00:05:46.547079: step: 256/463, loss: 1.6949431896209717 2023-01-24 00:05:47.146704: step: 258/463, loss: 1.2738231420516968 2023-01-24 00:05:47.768840: step: 260/463, loss: 4.912646293640137 2023-01-24 00:05:48.339591: step: 262/463, loss: 1.6960387229919434 2023-01-24 00:05:48.991777: step: 264/463, loss: 1.8729029893875122 2023-01-24 00:05:49.595523: step: 266/463, loss: 0.477815717458725 2023-01-24 00:05:50.180177: step: 268/463, loss: 0.39634814858436584 2023-01-24 00:05:50.817535: step: 270/463, loss: 0.8920003175735474 2023-01-24 00:05:51.476009: step: 272/463, loss: 1.7977694272994995 2023-01-24 00:05:52.139526: step: 274/463, loss: 0.8314753770828247 2023-01-24 00:05:52.744175: step: 276/463, loss: 5.017110347747803 2023-01-24 00:05:53.359310: step: 278/463, loss: 1.5411887168884277 2023-01-24 00:05:53.949115: step: 280/463, loss: 2.2009634971618652 2023-01-24 00:05:54.499101: step: 282/463, loss: 0.9728631377220154 2023-01-24 00:05:55.114310: step: 284/463, loss: 5.722582817077637 2023-01-24 00:05:55.749985: step: 286/463, loss: 6.1283159255981445 2023-01-24 00:05:56.415622: step: 288/463, loss: 1.30561363697052 2023-01-24 00:05:56.983803: step: 290/463, loss: 4.328171253204346 2023-01-24 00:05:57.594090: step: 292/463, loss: 0.4708421230316162 2023-01-24 00:05:58.202548: step: 294/463, loss: 4.787020683288574 2023-01-24 00:05:58.853371: step: 296/463, loss: 3.3478779792785645 2023-01-24 00:05:59.492151: step: 298/463, loss: 3.621676445007324 2023-01-24 00:06:00.096628: step: 300/463, loss: 4.760943412780762 2023-01-24 00:06:00.685492: step: 302/463, loss: 7.1631598472595215 2023-01-24 00:06:01.259349: step: 304/463, loss: 1.246717929840088 2023-01-24 00:06:01.866869: step: 306/463, loss: 0.9490251541137695 2023-01-24 00:06:02.535557: step: 308/463, loss: 1.1441110372543335 2023-01-24 00:06:03.277275: step: 310/463, loss: 2.801241397857666 2023-01-24 00:06:03.858769: step: 312/463, loss: 6.124081611633301 2023-01-24 00:06:04.491646: step: 314/463, loss: 3.1647603511810303 2023-01-24 00:06:05.133700: step: 316/463, loss: 2.703418731689453 2023-01-24 00:06:05.734042: step: 318/463, loss: 5.265385150909424 2023-01-24 00:06:06.396341: step: 320/463, loss: 1.3631547689437866 2023-01-24 00:06:07.009177: step: 322/463, loss: 0.37927454710006714 2023-01-24 00:06:07.593927: step: 324/463, loss: 0.5200359225273132 2023-01-24 00:06:08.244199: step: 326/463, loss: 1.185221791267395 2023-01-24 00:06:08.827744: step: 328/463, loss: 3.7239699363708496 2023-01-24 00:06:09.452351: step: 330/463, loss: 1.7734441757202148 2023-01-24 00:06:10.148494: step: 332/463, loss: 1.6176515817642212 2023-01-24 00:06:10.822223: step: 334/463, loss: 0.6846375465393066 2023-01-24 00:06:11.531895: step: 336/463, loss: 4.7630205154418945 2023-01-24 00:06:12.148440: step: 338/463, loss: 4.955362319946289 2023-01-24 00:06:12.794484: step: 340/463, loss: 1.6540472507476807 2023-01-24 00:06:13.416636: step: 342/463, loss: 1.7042542695999146 2023-01-24 00:06:14.029526: step: 344/463, loss: 4.431768417358398 2023-01-24 00:06:14.647279: step: 346/463, loss: 0.724765419960022 2023-01-24 00:06:15.348709: step: 348/463, loss: 0.8880772590637207 2023-01-24 00:06:16.002928: step: 350/463, loss: 2.6019840240478516 2023-01-24 00:06:16.726780: step: 352/463, loss: 2.2807857990264893 2023-01-24 00:06:17.390428: step: 354/463, loss: 1.6729512214660645 2023-01-24 00:06:18.054421: step: 356/463, loss: 0.29523590207099915 2023-01-24 00:06:18.664384: step: 358/463, loss: 1.171804428100586 2023-01-24 00:06:19.238232: step: 360/463, loss: 1.4620466232299805 2023-01-24 00:06:19.857643: step: 362/463, loss: 1.2754545211791992 2023-01-24 00:06:20.495192: step: 364/463, loss: 4.919126033782959 2023-01-24 00:06:21.144643: step: 366/463, loss: 1.9732675552368164 2023-01-24 00:06:21.715661: step: 368/463, loss: 2.2051146030426025 2023-01-24 00:06:22.273784: step: 370/463, loss: 1.236556887626648 2023-01-24 00:06:22.912560: step: 372/463, loss: 0.9393231868743896 2023-01-24 00:06:23.599791: step: 374/463, loss: 3.647397994995117 2023-01-24 00:06:24.203207: step: 376/463, loss: 1.0917147397994995 2023-01-24 00:06:24.865770: step: 378/463, loss: 1.8892124891281128 2023-01-24 00:06:25.499732: step: 380/463, loss: 0.615264892578125 2023-01-24 00:06:26.193709: step: 382/463, loss: 0.5577064752578735 2023-01-24 00:06:26.791464: step: 384/463, loss: 0.3654223680496216 2023-01-24 00:06:27.470659: step: 386/463, loss: 2.1288323402404785 2023-01-24 00:06:28.099007: step: 388/463, loss: 0.46888911724090576 2023-01-24 00:06:28.699245: step: 390/463, loss: 6.196017265319824 2023-01-24 00:06:29.384752: step: 392/463, loss: 0.3807101249694824 2023-01-24 00:06:30.046591: step: 394/463, loss: 1.2807503938674927 2023-01-24 00:06:30.671753: step: 396/463, loss: 0.8407572507858276 2023-01-24 00:06:31.285840: step: 398/463, loss: 3.6059751510620117 2023-01-24 00:06:31.933226: step: 400/463, loss: 0.7967534065246582 2023-01-24 00:06:32.499564: step: 402/463, loss: 1.070436954498291 2023-01-24 00:06:33.179967: step: 404/463, loss: 0.9442594051361084 2023-01-24 00:06:33.848619: step: 406/463, loss: 1.3431742191314697 2023-01-24 00:06:34.487006: step: 408/463, loss: 1.3841991424560547 2023-01-24 00:06:35.126026: step: 410/463, loss: 1.2273149490356445 2023-01-24 00:06:35.770554: step: 412/463, loss: 1.0216115713119507 2023-01-24 00:06:36.401535: step: 414/463, loss: 2.7380475997924805 2023-01-24 00:06:37.026285: step: 416/463, loss: 0.5786815285682678 2023-01-24 00:06:37.663814: step: 418/463, loss: 0.3677418529987335 2023-01-24 00:06:38.291587: step: 420/463, loss: 4.857378005981445 2023-01-24 00:06:38.914847: step: 422/463, loss: 0.6248304843902588 2023-01-24 00:06:39.512805: step: 424/463, loss: 1.3008418083190918 2023-01-24 00:06:40.107693: step: 426/463, loss: 4.184779167175293 2023-01-24 00:06:40.774648: step: 428/463, loss: 4.474721908569336 2023-01-24 00:06:41.417006: step: 430/463, loss: 0.7341389060020447 2023-01-24 00:06:42.058895: step: 432/463, loss: 8.08967113494873 2023-01-24 00:06:42.703614: step: 434/463, loss: 10.318756103515625 2023-01-24 00:06:43.283032: step: 436/463, loss: 0.5290687084197998 2023-01-24 00:06:43.846638: step: 438/463, loss: 0.9555424451828003 2023-01-24 00:06:44.436344: step: 440/463, loss: 4.081544876098633 2023-01-24 00:06:45.057477: step: 442/463, loss: 0.6440564393997192 2023-01-24 00:06:45.658560: step: 444/463, loss: 3.0376334190368652 2023-01-24 00:06:46.305349: step: 446/463, loss: 0.6823337078094482 2023-01-24 00:06:46.904691: step: 448/463, loss: 0.9174196720123291 2023-01-24 00:06:47.586188: step: 450/463, loss: 2.0081372261047363 2023-01-24 00:06:48.209401: step: 452/463, loss: 0.7903839945793152 2023-01-24 00:06:48.830015: step: 454/463, loss: 0.4242723882198334 2023-01-24 00:06:49.426037: step: 456/463, loss: 0.7555660009384155 2023-01-24 00:06:50.034484: step: 458/463, loss: 5.849801063537598 2023-01-24 00:06:50.711869: step: 460/463, loss: 0.9508663415908813 2023-01-24 00:06:51.375755: step: 462/463, loss: 0.8560835719108582 2023-01-24 00:06:52.052498: step: 464/463, loss: 1.0714929103851318 2023-01-24 00:06:52.717420: step: 466/463, loss: 0.48916831612586975 2023-01-24 00:06:53.402648: step: 468/463, loss: 1.1483577489852905 2023-01-24 00:06:54.030289: step: 470/463, loss: 1.8613275289535522 2023-01-24 00:06:54.645274: step: 472/463, loss: 0.6832380294799805 2023-01-24 00:06:55.357135: step: 474/463, loss: 2.1384215354919434 2023-01-24 00:06:55.993534: step: 476/463, loss: 1.636368989944458 2023-01-24 00:06:56.605094: step: 478/463, loss: 1.5405359268188477 2023-01-24 00:06:57.235331: step: 480/463, loss: 2.874466896057129 2023-01-24 00:06:57.916602: step: 482/463, loss: 1.4746910333633423 2023-01-24 00:06:58.530090: step: 484/463, loss: 2.5639333724975586 2023-01-24 00:06:59.134167: step: 486/463, loss: 1.324280858039856 2023-01-24 00:06:59.830924: step: 488/463, loss: 1.0978533029556274 2023-01-24 00:07:00.547118: step: 490/463, loss: 23.97795295715332 2023-01-24 00:07:01.158355: step: 492/463, loss: 2.39566707611084 2023-01-24 00:07:01.774480: step: 494/463, loss: 2.2220065593719482 2023-01-24 00:07:02.370481: step: 496/463, loss: 7.3298020362854 2023-01-24 00:07:03.098482: step: 498/463, loss: 1.9549223184585571 2023-01-24 00:07:03.696267: step: 500/463, loss: 0.5419197082519531 2023-01-24 00:07:04.549850: step: 502/463, loss: 0.749390721321106 2023-01-24 00:07:05.162193: step: 504/463, loss: 1.9556846618652344 2023-01-24 00:07:05.743946: step: 506/463, loss: 2.056826114654541 2023-01-24 00:07:06.383376: step: 508/463, loss: 1.2158986330032349 2023-01-24 00:07:07.023230: step: 510/463, loss: 0.4667484760284424 2023-01-24 00:07:07.749333: step: 512/463, loss: 5.003033638000488 2023-01-24 00:07:08.380698: step: 514/463, loss: 0.5443268418312073 2023-01-24 00:07:09.074995: step: 516/463, loss: 2.5313668251037598 2023-01-24 00:07:09.694091: step: 518/463, loss: 2.405019760131836 2023-01-24 00:07:10.368249: step: 520/463, loss: 8.626433372497559 2023-01-24 00:07:10.974826: step: 522/463, loss: 1.2849178314208984 2023-01-24 00:07:11.613188: step: 524/463, loss: 7.990692615509033 2023-01-24 00:07:12.203255: step: 526/463, loss: 1.7502684593200684 2023-01-24 00:07:12.825393: step: 528/463, loss: 1.3626116514205933 2023-01-24 00:07:13.569269: step: 530/463, loss: 2.245612859725952 2023-01-24 00:07:14.197282: step: 532/463, loss: 3.4241137504577637 2023-01-24 00:07:14.832274: step: 534/463, loss: 0.7420649528503418 2023-01-24 00:07:15.444843: step: 536/463, loss: 0.5043743848800659 2023-01-24 00:07:16.040076: step: 538/463, loss: 2.1114377975463867 2023-01-24 00:07:16.665862: step: 540/463, loss: 0.8497910499572754 2023-01-24 00:07:17.277165: step: 542/463, loss: 1.3439457416534424 2023-01-24 00:07:17.881161: step: 544/463, loss: 3.4550399780273438 2023-01-24 00:07:18.555533: step: 546/463, loss: 0.5475401878356934 2023-01-24 00:07:19.161137: step: 548/463, loss: 0.849360466003418 2023-01-24 00:07:19.730017: step: 550/463, loss: 17.204517364501953 2023-01-24 00:07:20.393144: step: 552/463, loss: 2.0592710971832275 2023-01-24 00:07:21.035617: step: 554/463, loss: 0.8619705438613892 2023-01-24 00:07:21.614952: step: 556/463, loss: 6.703515529632568 2023-01-24 00:07:22.354547: step: 558/463, loss: 0.493984192609787 2023-01-24 00:07:22.982483: step: 560/463, loss: 1.1164534091949463 2023-01-24 00:07:23.644462: step: 562/463, loss: 0.35726404190063477 2023-01-24 00:07:24.255686: step: 564/463, loss: 0.6496716737747192 2023-01-24 00:07:24.956726: step: 566/463, loss: 0.9316614866256714 2023-01-24 00:07:25.615215: step: 568/463, loss: 0.791114091873169 2023-01-24 00:07:26.263530: step: 570/463, loss: 1.144370675086975 2023-01-24 00:07:26.948660: step: 572/463, loss: 0.7868590950965881 2023-01-24 00:07:27.599341: step: 574/463, loss: 1.517804741859436 2023-01-24 00:07:28.331866: step: 576/463, loss: 1.369206190109253 2023-01-24 00:07:28.982915: step: 578/463, loss: 0.7141205072402954 2023-01-24 00:07:29.602240: step: 580/463, loss: 0.3427852988243103 2023-01-24 00:07:30.191201: step: 582/463, loss: 0.9353724718093872 2023-01-24 00:07:30.834911: step: 584/463, loss: 2.5886993408203125 2023-01-24 00:07:31.481445: step: 586/463, loss: 2.3710124492645264 2023-01-24 00:07:32.188095: step: 588/463, loss: 0.9547492265701294 2023-01-24 00:07:32.792001: step: 590/463, loss: 0.7733676433563232 2023-01-24 00:07:33.445063: step: 592/463, loss: 1.1091457605361938 2023-01-24 00:07:34.094254: step: 594/463, loss: 1.0487369298934937 2023-01-24 00:07:34.823391: step: 596/463, loss: 0.9992524981498718 2023-01-24 00:07:35.404302: step: 598/463, loss: 0.40794116258621216 2023-01-24 00:07:35.975463: step: 600/463, loss: 3.844391345977783 2023-01-24 00:07:36.564467: step: 602/463, loss: 1.3142657279968262 2023-01-24 00:07:37.199662: step: 604/463, loss: 1.470659613609314 2023-01-24 00:07:37.839182: step: 606/463, loss: 0.4712180495262146 2023-01-24 00:07:38.480400: step: 608/463, loss: 1.1896867752075195 2023-01-24 00:07:39.083115: step: 610/463, loss: 1.4044132232666016 2023-01-24 00:07:39.714481: step: 612/463, loss: 12.41322135925293 2023-01-24 00:07:40.360280: step: 614/463, loss: 1.7171419858932495 2023-01-24 00:07:40.996537: step: 616/463, loss: 0.3041672110557556 2023-01-24 00:07:41.567949: step: 618/463, loss: 2.610074281692505 2023-01-24 00:07:42.236212: step: 620/463, loss: 0.8648314476013184 2023-01-24 00:07:42.823189: step: 622/463, loss: 1.521160364151001 2023-01-24 00:07:43.403361: step: 624/463, loss: 1.4403678178787231 2023-01-24 00:07:44.051475: step: 626/463, loss: 0.3804192543029785 2023-01-24 00:07:44.657269: step: 628/463, loss: 0.2660582363605499 2023-01-24 00:07:45.377646: step: 630/463, loss: 1.0609209537506104 2023-01-24 00:07:46.086810: step: 632/463, loss: 1.5146912336349487 2023-01-24 00:07:46.709692: step: 634/463, loss: 1.0184211730957031 2023-01-24 00:07:47.275682: step: 636/463, loss: 3.6134836673736572 2023-01-24 00:07:47.933723: step: 638/463, loss: 5.694892883300781 2023-01-24 00:07:48.598141: step: 640/463, loss: 1.4821346998214722 2023-01-24 00:07:49.203444: step: 642/463, loss: 1.0457806587219238 2023-01-24 00:07:49.830679: step: 644/463, loss: 1.15732741355896 2023-01-24 00:07:50.438347: step: 646/463, loss: 0.8805398344993591 2023-01-24 00:07:51.087969: step: 648/463, loss: 1.3210361003875732 2023-01-24 00:07:51.723572: step: 650/463, loss: 2.356326103210449 2023-01-24 00:07:52.319107: step: 652/463, loss: 0.37767118215560913 2023-01-24 00:07:52.901854: step: 654/463, loss: 1.0555232763290405 2023-01-24 00:07:53.523473: step: 656/463, loss: 1.132493257522583 2023-01-24 00:07:54.126637: step: 658/463, loss: 0.3220830261707306 2023-01-24 00:07:54.718019: step: 660/463, loss: 1.618369698524475 2023-01-24 00:07:55.349017: step: 662/463, loss: 2.3216099739074707 2023-01-24 00:07:55.960236: step: 664/463, loss: 0.3483749032020569 2023-01-24 00:07:56.573362: step: 666/463, loss: 0.4026816189289093 2023-01-24 00:07:57.116446: step: 668/463, loss: 0.9321635961532593 2023-01-24 00:07:57.674716: step: 670/463, loss: 3.0314548015594482 2023-01-24 00:07:58.287949: step: 672/463, loss: 1.1623146533966064 2023-01-24 00:07:58.864672: step: 674/463, loss: 1.567813754081726 2023-01-24 00:07:59.440986: step: 676/463, loss: 1.1469882726669312 2023-01-24 00:08:00.006463: step: 678/463, loss: 1.4488227367401123 2023-01-24 00:08:00.657960: step: 680/463, loss: 5.3399763107299805 2023-01-24 00:08:01.372870: step: 682/463, loss: 2.2437868118286133 2023-01-24 00:08:01.962154: step: 684/463, loss: 1.210664987564087 2023-01-24 00:08:02.591945: step: 686/463, loss: 0.20591044425964355 2023-01-24 00:08:03.244664: step: 688/463, loss: 1.8625613451004028 2023-01-24 00:08:03.869606: step: 690/463, loss: 3.4302992820739746 2023-01-24 00:08:04.489253: step: 692/463, loss: 1.37776780128479 2023-01-24 00:08:05.172822: step: 694/463, loss: 2.2194693088531494 2023-01-24 00:08:05.777764: step: 696/463, loss: 2.1454403400421143 2023-01-24 00:08:06.395878: step: 698/463, loss: 3.7571959495544434 2023-01-24 00:08:06.997680: step: 700/463, loss: 1.7427443265914917 2023-01-24 00:08:07.670874: step: 702/463, loss: 0.44627776741981506 2023-01-24 00:08:08.352022: step: 704/463, loss: 2.0296101570129395 2023-01-24 00:08:08.994007: step: 706/463, loss: 3.0709877014160156 2023-01-24 00:08:09.599005: step: 708/463, loss: 1.7025601863861084 2023-01-24 00:08:10.166699: step: 710/463, loss: 0.8691942691802979 2023-01-24 00:08:10.792928: step: 712/463, loss: 0.6644496917724609 2023-01-24 00:08:11.445069: step: 714/463, loss: 1.6537455320358276 2023-01-24 00:08:12.052822: step: 716/463, loss: 0.4561287760734558 2023-01-24 00:08:12.700823: step: 718/463, loss: 4.355337142944336 2023-01-24 00:08:13.323183: step: 720/463, loss: 0.2932380437850952 2023-01-24 00:08:13.963092: step: 722/463, loss: 1.7480332851409912 2023-01-24 00:08:14.540281: step: 724/463, loss: 0.6260095238685608 2023-01-24 00:08:15.108852: step: 726/463, loss: 2.5749106407165527 2023-01-24 00:08:15.715022: step: 728/463, loss: 1.2347019910812378 2023-01-24 00:08:16.354793: step: 730/463, loss: 2.2668538093566895 2023-01-24 00:08:16.997465: step: 732/463, loss: 0.4873841106891632 2023-01-24 00:08:17.622368: step: 734/463, loss: 0.5760297775268555 2023-01-24 00:08:18.245620: step: 736/463, loss: 0.29523178935050964 2023-01-24 00:08:18.893048: step: 738/463, loss: 1.0267291069030762 2023-01-24 00:08:19.490487: step: 740/463, loss: 1.0055054426193237 2023-01-24 00:08:20.101381: step: 742/463, loss: 1.4158940315246582 2023-01-24 00:08:20.705039: step: 744/463, loss: 2.1626105308532715 2023-01-24 00:08:21.324521: step: 746/463, loss: 0.4254618287086487 2023-01-24 00:08:21.947898: step: 748/463, loss: 4.174129009246826 2023-01-24 00:08:22.648309: step: 750/463, loss: 0.6104744076728821 2023-01-24 00:08:23.291692: step: 752/463, loss: 0.6177344918251038 2023-01-24 00:08:23.929719: step: 754/463, loss: 4.04056978225708 2023-01-24 00:08:24.586519: step: 756/463, loss: 2.3158512115478516 2023-01-24 00:08:25.220189: step: 758/463, loss: 0.59056556224823 2023-01-24 00:08:25.841428: step: 760/463, loss: 1.5480821132659912 2023-01-24 00:08:26.461264: step: 762/463, loss: 1.7627665996551514 2023-01-24 00:08:27.115153: step: 764/463, loss: 0.8368968963623047 2023-01-24 00:08:27.685853: step: 766/463, loss: 0.8852662444114685 2023-01-24 00:08:28.310475: step: 768/463, loss: 1.296881914138794 2023-01-24 00:08:28.873580: step: 770/463, loss: 0.405099481344223 2023-01-24 00:08:29.483008: step: 772/463, loss: 0.8391633629798889 2023-01-24 00:08:30.081207: step: 774/463, loss: 1.044474482536316 2023-01-24 00:08:30.769472: step: 776/463, loss: 1.3586586713790894 2023-01-24 00:08:31.395362: step: 778/463, loss: 0.4751380681991577 2023-01-24 00:08:32.009865: step: 780/463, loss: 1.0881835222244263 2023-01-24 00:08:32.642929: step: 782/463, loss: 1.1149868965148926 2023-01-24 00:08:33.281286: step: 784/463, loss: 0.8063578605651855 2023-01-24 00:08:33.908634: step: 786/463, loss: 1.1965080499649048 2023-01-24 00:08:34.579874: step: 788/463, loss: 0.7118450403213501 2023-01-24 00:08:35.174622: step: 790/463, loss: 2.6800503730773926 2023-01-24 00:08:35.777176: step: 792/463, loss: 1.428928017616272 2023-01-24 00:08:36.398867: step: 794/463, loss: 1.1040412187576294 2023-01-24 00:08:37.044903: step: 796/463, loss: 0.8095263242721558 2023-01-24 00:08:37.672405: step: 798/463, loss: 1.8907171487808228 2023-01-24 00:08:38.314557: step: 800/463, loss: 1.46341872215271 2023-01-24 00:08:38.969340: step: 802/463, loss: 3.4522597789764404 2023-01-24 00:08:39.565943: step: 804/463, loss: 1.3675616979599 2023-01-24 00:08:40.218300: step: 806/463, loss: 1.7054327726364136 2023-01-24 00:08:40.834792: step: 808/463, loss: 1.456458568572998 2023-01-24 00:08:41.427117: step: 810/463, loss: 0.9994184374809265 2023-01-24 00:08:42.126151: step: 812/463, loss: 1.1029129028320312 2023-01-24 00:08:42.782119: step: 814/463, loss: 0.8378959894180298 2023-01-24 00:08:43.402963: step: 816/463, loss: 1.1774468421936035 2023-01-24 00:08:44.094648: step: 818/463, loss: 2.8366777896881104 2023-01-24 00:08:44.662126: step: 820/463, loss: 2.7778737545013428 2023-01-24 00:08:45.222207: step: 822/463, loss: 1.7021194696426392 2023-01-24 00:08:45.815606: step: 824/463, loss: 1.1035935878753662 2023-01-24 00:08:46.380785: step: 826/463, loss: 0.9226508140563965 2023-01-24 00:08:46.988380: step: 828/463, loss: 1.2424044609069824 2023-01-24 00:08:47.632926: step: 830/463, loss: 5.341761589050293 2023-01-24 00:08:48.418986: step: 832/463, loss: 1.7395113706588745 2023-01-24 00:08:49.026051: step: 834/463, loss: 1.1090619564056396 2023-01-24 00:08:49.622444: step: 836/463, loss: 1.822282075881958 2023-01-24 00:08:50.298997: step: 838/463, loss: 8.355430603027344 2023-01-24 00:08:50.925930: step: 840/463, loss: 1.2190101146697998 2023-01-24 00:08:51.557483: step: 842/463, loss: 1.0992544889450073 2023-01-24 00:08:52.193242: step: 844/463, loss: 8.197500228881836 2023-01-24 00:08:52.856918: step: 846/463, loss: 0.6674755215644836 2023-01-24 00:08:53.477871: step: 848/463, loss: 0.3324805200099945 2023-01-24 00:08:54.132021: step: 850/463, loss: 1.9683656692504883 2023-01-24 00:08:54.769647: step: 852/463, loss: 0.7022861242294312 2023-01-24 00:08:55.401036: step: 854/463, loss: 0.5963472127914429 2023-01-24 00:08:56.094391: step: 856/463, loss: 8.475939750671387 2023-01-24 00:08:56.799677: step: 858/463, loss: 1.18717360496521 2023-01-24 00:08:57.365877: step: 860/463, loss: 1.9248133897781372 2023-01-24 00:08:58.018646: step: 862/463, loss: 0.484659880399704 2023-01-24 00:08:58.566455: step: 864/463, loss: 0.9339818358421326 2023-01-24 00:08:59.878815: step: 866/463, loss: 0.6975277662277222 2023-01-24 00:09:00.533436: step: 868/463, loss: 1.8819152116775513 2023-01-24 00:09:01.155605: step: 870/463, loss: 0.7943848967552185 2023-01-24 00:09:01.774072: step: 872/463, loss: 1.0655615329742432 2023-01-24 00:09:02.392813: step: 874/463, loss: 10.067506790161133 2023-01-24 00:09:03.029649: step: 876/463, loss: 2.9179444313049316 2023-01-24 00:09:03.624026: step: 878/463, loss: 2.4652976989746094 2023-01-24 00:09:04.256034: step: 880/463, loss: 1.2682594060897827 2023-01-24 00:09:04.947307: step: 882/463, loss: 2.0408806800842285 2023-01-24 00:09:05.631540: step: 884/463, loss: 1.9349489212036133 2023-01-24 00:09:06.218382: step: 886/463, loss: 1.3845210075378418 2023-01-24 00:09:06.892547: step: 888/463, loss: 1.749855637550354 2023-01-24 00:09:07.487685: step: 890/463, loss: 0.8009510040283203 2023-01-24 00:09:08.106371: step: 892/463, loss: 1.494858980178833 2023-01-24 00:09:08.746784: step: 894/463, loss: 1.754842758178711 2023-01-24 00:09:09.352039: step: 896/463, loss: 2.425070285797119 2023-01-24 00:09:09.982466: step: 898/463, loss: 1.732689380645752 2023-01-24 00:09:10.618074: step: 900/463, loss: 1.6223952770233154 2023-01-24 00:09:11.199463: step: 902/463, loss: 3.6462016105651855 2023-01-24 00:09:11.798366: step: 904/463, loss: 2.098703384399414 2023-01-24 00:09:12.472355: step: 906/463, loss: 1.2983542680740356 2023-01-24 00:09:13.080040: step: 908/463, loss: 2.644914150238037 2023-01-24 00:09:13.682694: step: 910/463, loss: 1.0027775764465332 2023-01-24 00:09:14.268284: step: 912/463, loss: 0.8366899490356445 2023-01-24 00:09:14.932861: step: 914/463, loss: 1.4652920961380005 2023-01-24 00:09:15.655052: step: 916/463, loss: 4.050869941711426 2023-01-24 00:09:16.303856: step: 918/463, loss: 0.1418902575969696 2023-01-24 00:09:16.990460: step: 920/463, loss: 1.356013536453247 2023-01-24 00:09:17.611006: step: 922/463, loss: 1.6795458793640137 2023-01-24 00:09:18.213380: step: 924/463, loss: 1.043339729309082 2023-01-24 00:09:18.921944: step: 926/463, loss: 4.3604583740234375 ================================================== Loss: 2.155 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34571150097465886, 'r': 0.17678429027113238, 'f1': 0.23394011344149845}, 'combined': 0.1723769256937357, 'epoch': 1} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.38209719059336583, 'r': 0.2518129056247755, 'f1': 0.3035667182725194}, 'combined': 0.23530051846960837, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3158844986290638, 'r': 0.17708676438296003, 'f1': 0.2269461446461235}, 'combined': 0.16722347500240678, 'epoch': 1} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3528413671222335, 'r': 0.245468925431508, 'f1': 0.2895206461035408}, 'combined': 0.2244131323864766, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31625652080010636, 'r': 0.172503556800058, 'f1': 0.22323989703536917}, 'combined': 0.16449255571027202, 'epoch': 1} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3627961653331973, 'r': 0.2476175108312582, 'f1': 0.294340336420408}, 'combined': 0.2281489689000292, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.17142857142857143, 'f1': 0.21818181818181817}, 'combined': 0.14545454545454545, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.22826086956521738, 'f1': 0.28378378378378377}, 'combined': 0.14189189189189189, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40460526315789475, 'r': 0.11161524500907441, 'f1': 0.17496443812233284}, 'combined': 0.11664295874822189, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34571150097465886, 'r': 0.17678429027113238, 'f1': 0.23394011344149845}, 'combined': 0.1723769256937357, 'epoch': 1} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.38209719059336583, 'r': 0.2518129056247755, 'f1': 0.3035667182725194}, 'combined': 0.23530051846960837, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.17142857142857143, 'f1': 0.21818181818181817}, 'combined': 0.14545454545454545, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3158844986290638, 'r': 0.17708676438296003, 'f1': 0.2269461446461235}, 'combined': 0.16722347500240678, 'epoch': 1} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3528413671222335, 'r': 0.245468925431508, 'f1': 0.2895206461035408}, 'combined': 0.2244131323864766, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.22826086956521738, 'f1': 0.28378378378378377}, 'combined': 0.14189189189189189, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31625652080010636, 'r': 0.172503556800058, 'f1': 0.22323989703536917}, 'combined': 0.16449255571027202, 'epoch': 1} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3627961653331973, 'r': 0.2476175108312582, 'f1': 0.294340336420408}, 'combined': 0.2281489689000292, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40460526315789475, 'r': 0.11161524500907441, 'f1': 0.17496443812233284}, 'combined': 0.11664295874822189, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:12:15.908163: step: 2/463, loss: 1.4660286903381348 2023-01-24 00:12:16.589391: step: 4/463, loss: 1.409912347793579 2023-01-24 00:12:17.234653: step: 6/463, loss: 1.0477442741394043 2023-01-24 00:12:17.820962: step: 8/463, loss: 0.9781979918479919 2023-01-24 00:12:18.431556: step: 10/463, loss: 0.44752755761146545 2023-01-24 00:12:19.040843: step: 12/463, loss: 3.8065993785858154 2023-01-24 00:12:19.651590: step: 14/463, loss: 1.494258999824524 2023-01-24 00:12:20.236826: step: 16/463, loss: 0.5401797294616699 2023-01-24 00:12:20.845245: step: 18/463, loss: 11.624839782714844 2023-01-24 00:12:21.458425: step: 20/463, loss: 0.5669568777084351 2023-01-24 00:12:22.046441: step: 22/463, loss: 0.73268723487854 2023-01-24 00:12:22.658828: step: 24/463, loss: 2.1691019535064697 2023-01-24 00:12:23.294755: step: 26/463, loss: 0.29225462675094604 2023-01-24 00:12:23.902282: step: 28/463, loss: 1.8725613355636597 2023-01-24 00:12:24.528476: step: 30/463, loss: 0.5147923827171326 2023-01-24 00:12:25.133955: step: 32/463, loss: 0.8150551319122314 2023-01-24 00:12:25.795672: step: 34/463, loss: 1.74850332736969 2023-01-24 00:12:26.346609: step: 36/463, loss: 2.0846107006073 2023-01-24 00:12:26.917952: step: 38/463, loss: 1.8782908916473389 2023-01-24 00:12:27.533781: step: 40/463, loss: 0.5618594884872437 2023-01-24 00:12:28.252990: step: 42/463, loss: 2.6510252952575684 2023-01-24 00:12:28.828043: step: 44/463, loss: 0.811319887638092 2023-01-24 00:12:29.451374: step: 46/463, loss: 1.146957516670227 2023-01-24 00:12:30.081796: step: 48/463, loss: 0.8560822606086731 2023-01-24 00:12:30.616611: step: 50/463, loss: 0.3750093877315521 2023-01-24 00:12:31.200397: step: 52/463, loss: 0.6742995977401733 2023-01-24 00:12:31.897769: step: 54/463, loss: 1.573695421218872 2023-01-24 00:12:32.473565: step: 56/463, loss: 0.9209487438201904 2023-01-24 00:12:33.049202: step: 58/463, loss: 1.2521803379058838 2023-01-24 00:12:33.652602: step: 60/463, loss: 0.7721288800239563 2023-01-24 00:12:34.194909: step: 62/463, loss: 0.7939708828926086 2023-01-24 00:12:34.813592: step: 64/463, loss: 0.5871481895446777 2023-01-24 00:12:35.493028: step: 66/463, loss: 1.1367230415344238 2023-01-24 00:12:36.187220: step: 68/463, loss: 0.720838189125061 2023-01-24 00:12:36.812039: step: 70/463, loss: 6.537387847900391 2023-01-24 00:12:37.442431: step: 72/463, loss: 1.9464659690856934 2023-01-24 00:12:37.999900: step: 74/463, loss: 2.104203224182129 2023-01-24 00:12:38.547243: step: 76/463, loss: 0.4451018273830414 2023-01-24 00:12:39.319261: step: 78/463, loss: 0.6001902222633362 2023-01-24 00:12:39.822571: step: 80/463, loss: 0.8536806702613831 2023-01-24 00:12:40.453249: step: 82/463, loss: 0.7285168766975403 2023-01-24 00:12:41.065621: step: 84/463, loss: 0.276102215051651 2023-01-24 00:12:41.743053: step: 86/463, loss: 3.1876182556152344 2023-01-24 00:12:42.343870: step: 88/463, loss: 0.5421339273452759 2023-01-24 00:12:42.999266: step: 90/463, loss: 1.9555740356445312 2023-01-24 00:12:43.557710: step: 92/463, loss: 1.3989418745040894 2023-01-24 00:12:44.199367: step: 94/463, loss: 0.33679524064064026 2023-01-24 00:12:44.788898: step: 96/463, loss: 0.48682576417922974 2023-01-24 00:12:45.463379: step: 98/463, loss: 0.6764743328094482 2023-01-24 00:12:46.128281: step: 100/463, loss: 2.4615845680236816 2023-01-24 00:12:46.659363: step: 102/463, loss: 4.785783290863037 2023-01-24 00:12:47.236295: step: 104/463, loss: 4.911044120788574 2023-01-24 00:12:47.928303: step: 106/463, loss: 0.643082857131958 2023-01-24 00:12:48.584061: step: 108/463, loss: 1.482361078262329 2023-01-24 00:12:49.206429: step: 110/463, loss: 1.034616470336914 2023-01-24 00:12:49.796405: step: 112/463, loss: 2.216588020324707 2023-01-24 00:12:50.435967: step: 114/463, loss: 3.748507261276245 2023-01-24 00:12:51.069695: step: 116/463, loss: 3.693830966949463 2023-01-24 00:12:51.677389: step: 118/463, loss: 1.0829129219055176 2023-01-24 00:12:52.368625: step: 120/463, loss: 1.3640830516815186 2023-01-24 00:12:52.945394: step: 122/463, loss: 2.6467080116271973 2023-01-24 00:12:53.593748: step: 124/463, loss: 1.7558529376983643 2023-01-24 00:12:54.248245: step: 126/463, loss: 0.5338179469108582 2023-01-24 00:12:54.868326: step: 128/463, loss: 4.626224994659424 2023-01-24 00:12:55.535402: step: 130/463, loss: 0.9506338834762573 2023-01-24 00:12:56.084762: step: 132/463, loss: 1.1604193449020386 2023-01-24 00:12:56.655879: step: 134/463, loss: 0.9605328440666199 2023-01-24 00:12:57.277227: step: 136/463, loss: 2.9243288040161133 2023-01-24 00:12:57.911463: step: 138/463, loss: 0.8890388011932373 2023-01-24 00:12:58.511544: step: 140/463, loss: 1.8085663318634033 2023-01-24 00:12:59.086595: step: 142/463, loss: 0.8109515905380249 2023-01-24 00:12:59.640883: step: 144/463, loss: 0.6374633312225342 2023-01-24 00:13:00.245331: step: 146/463, loss: 0.6498245596885681 2023-01-24 00:13:00.864513: step: 148/463, loss: 1.0928947925567627 2023-01-24 00:13:01.460832: step: 150/463, loss: 1.227744460105896 2023-01-24 00:13:02.043010: step: 152/463, loss: 0.3492332994937897 2023-01-24 00:13:02.725336: step: 154/463, loss: 0.16385142505168915 2023-01-24 00:13:03.402603: step: 156/463, loss: 2.3085997104644775 2023-01-24 00:13:03.966998: step: 158/463, loss: 1.3386341333389282 2023-01-24 00:13:04.724794: step: 160/463, loss: 1.241170048713684 2023-01-24 00:13:05.321740: step: 162/463, loss: 0.8155643939971924 2023-01-24 00:13:05.972168: step: 164/463, loss: 0.8639945387840271 2023-01-24 00:13:06.625483: step: 166/463, loss: 0.8003929853439331 2023-01-24 00:13:07.196702: step: 168/463, loss: 1.34852135181427 2023-01-24 00:13:07.813291: step: 170/463, loss: 2.3515124320983887 2023-01-24 00:13:08.457018: step: 172/463, loss: 0.6648683547973633 2023-01-24 00:13:09.058441: step: 174/463, loss: 0.8789557218551636 2023-01-24 00:13:09.720404: step: 176/463, loss: 2.127133846282959 2023-01-24 00:13:10.360075: step: 178/463, loss: 0.28835994005203247 2023-01-24 00:13:10.985088: step: 180/463, loss: 0.6357977390289307 2023-01-24 00:13:11.557722: step: 182/463, loss: 0.6560345888137817 2023-01-24 00:13:12.086232: step: 184/463, loss: 0.23894557356834412 2023-01-24 00:13:12.659685: step: 186/463, loss: 1.8762205839157104 2023-01-24 00:13:13.234103: step: 188/463, loss: 3.8380684852600098 2023-01-24 00:13:13.859377: step: 190/463, loss: 1.7604936361312866 2023-01-24 00:13:14.463075: step: 192/463, loss: 0.8130249977111816 2023-01-24 00:13:15.090210: step: 194/463, loss: 0.999138593673706 2023-01-24 00:13:15.754520: step: 196/463, loss: 0.5387791991233826 2023-01-24 00:13:16.316225: step: 198/463, loss: 1.9275074005126953 2023-01-24 00:13:16.863515: step: 200/463, loss: 0.762654185295105 2023-01-24 00:13:17.451004: step: 202/463, loss: 1.0605900287628174 2023-01-24 00:13:18.029661: step: 204/463, loss: 5.775745391845703 2023-01-24 00:13:18.648562: step: 206/463, loss: 0.5210607051849365 2023-01-24 00:13:19.225316: step: 208/463, loss: 5.282645225524902 2023-01-24 00:13:19.836346: step: 210/463, loss: 0.870699405670166 2023-01-24 00:13:20.402968: step: 212/463, loss: 1.8843967914581299 2023-01-24 00:13:20.970515: step: 214/463, loss: 1.6005609035491943 2023-01-24 00:13:21.599882: step: 216/463, loss: 5.171538829803467 2023-01-24 00:13:22.160602: step: 218/463, loss: 3.3907129764556885 2023-01-24 00:13:22.833239: step: 220/463, loss: 7.025726318359375 2023-01-24 00:13:23.458962: step: 222/463, loss: 1.6763451099395752 2023-01-24 00:13:24.066040: step: 224/463, loss: 3.6829617023468018 2023-01-24 00:13:24.754469: step: 226/463, loss: 3.822625160217285 2023-01-24 00:13:25.349490: step: 228/463, loss: 0.3872496485710144 2023-01-24 00:13:25.970819: step: 230/463, loss: 1.786887288093567 2023-01-24 00:13:26.553335: step: 232/463, loss: 2.5063986778259277 2023-01-24 00:13:27.151209: step: 234/463, loss: 0.3635252118110657 2023-01-24 00:13:27.792575: step: 236/463, loss: 1.9342350959777832 2023-01-24 00:13:28.397786: step: 238/463, loss: 4.329127311706543 2023-01-24 00:13:29.049069: step: 240/463, loss: 0.24979916214942932 2023-01-24 00:13:29.655617: step: 242/463, loss: 0.7364491820335388 2023-01-24 00:13:30.259257: step: 244/463, loss: 2.0005571842193604 2023-01-24 00:13:30.771751: step: 246/463, loss: 1.3922744989395142 2023-01-24 00:13:31.409511: step: 248/463, loss: 1.6599267721176147 2023-01-24 00:13:32.119869: step: 250/463, loss: 0.9804898500442505 2023-01-24 00:13:32.744286: step: 252/463, loss: 1.5135247707366943 2023-01-24 00:13:33.343995: step: 254/463, loss: 1.1995038986206055 2023-01-24 00:13:33.939514: step: 256/463, loss: 1.7913821935653687 2023-01-24 00:13:34.586293: step: 258/463, loss: 1.5432732105255127 2023-01-24 00:13:35.192365: step: 260/463, loss: 1.5108667612075806 2023-01-24 00:13:35.874353: step: 262/463, loss: 0.36469635367393494 2023-01-24 00:13:36.496093: step: 264/463, loss: 0.7474936842918396 2023-01-24 00:13:37.089537: step: 266/463, loss: 0.7007871866226196 2023-01-24 00:13:37.703645: step: 268/463, loss: 0.2520897090435028 2023-01-24 00:13:38.251846: step: 270/463, loss: 6.105922222137451 2023-01-24 00:13:38.886555: step: 272/463, loss: 2.4899673461914062 2023-01-24 00:13:39.496262: step: 274/463, loss: 3.262800455093384 2023-01-24 00:13:40.114425: step: 276/463, loss: 1.1527456045150757 2023-01-24 00:13:40.872577: step: 278/463, loss: 2.031769037246704 2023-01-24 00:13:41.493354: step: 280/463, loss: 2.4171719551086426 2023-01-24 00:13:42.099332: step: 282/463, loss: 2.7042527198791504 2023-01-24 00:13:42.700803: step: 284/463, loss: 3.3712878227233887 2023-01-24 00:13:43.294066: step: 286/463, loss: 5.174746036529541 2023-01-24 00:13:43.934679: step: 288/463, loss: 1.609847068786621 2023-01-24 00:13:44.538073: step: 290/463, loss: 0.902625560760498 2023-01-24 00:13:45.168575: step: 292/463, loss: 0.6399639844894409 2023-01-24 00:13:45.813098: step: 294/463, loss: 1.2580806016921997 2023-01-24 00:13:46.390683: step: 296/463, loss: 0.6816375255584717 2023-01-24 00:13:47.049105: step: 298/463, loss: 1.2385717630386353 2023-01-24 00:13:47.687452: step: 300/463, loss: 0.536469578742981 2023-01-24 00:13:48.270586: step: 302/463, loss: 0.9843665361404419 2023-01-24 00:13:48.888950: step: 304/463, loss: 0.8425735235214233 2023-01-24 00:13:49.423482: step: 306/463, loss: 0.27193188667297363 2023-01-24 00:13:50.025367: step: 308/463, loss: 0.4825771450996399 2023-01-24 00:13:50.633151: step: 310/463, loss: 0.8274874091148376 2023-01-24 00:13:51.285372: step: 312/463, loss: 4.543065071105957 2023-01-24 00:13:51.905384: step: 314/463, loss: 0.3094620406627655 2023-01-24 00:13:52.616983: step: 316/463, loss: 0.7046681046485901 2023-01-24 00:13:53.228367: step: 318/463, loss: 0.38656431436538696 2023-01-24 00:13:53.901761: step: 320/463, loss: 0.5257557034492493 2023-01-24 00:13:54.616903: step: 322/463, loss: 0.4737904965877533 2023-01-24 00:13:55.211224: step: 324/463, loss: 0.5960273742675781 2023-01-24 00:13:55.814591: step: 326/463, loss: 1.6312655210494995 2023-01-24 00:13:56.400630: step: 328/463, loss: 1.3117055892944336 2023-01-24 00:13:57.020989: step: 330/463, loss: 1.133667230606079 2023-01-24 00:13:57.715378: step: 332/463, loss: 5.118167877197266 2023-01-24 00:13:58.317361: step: 334/463, loss: 0.7834743857383728 2023-01-24 00:13:58.938780: step: 336/463, loss: 0.40042173862457275 2023-01-24 00:13:59.594292: step: 338/463, loss: 1.3825232982635498 2023-01-24 00:14:00.137396: step: 340/463, loss: 1.819142460823059 2023-01-24 00:14:00.778338: step: 342/463, loss: 0.5662192106246948 2023-01-24 00:14:01.356922: step: 344/463, loss: 1.0023061037063599 2023-01-24 00:14:01.977204: step: 346/463, loss: 2.2159485816955566 2023-01-24 00:14:02.615493: step: 348/463, loss: 1.493571162223816 2023-01-24 00:14:03.304506: step: 350/463, loss: 1.2307442426681519 2023-01-24 00:14:03.934165: step: 352/463, loss: 1.850580096244812 2023-01-24 00:14:04.577820: step: 354/463, loss: 1.633779764175415 2023-01-24 00:14:05.301235: step: 356/463, loss: 2.8367621898651123 2023-01-24 00:14:05.967112: step: 358/463, loss: 0.959022045135498 2023-01-24 00:14:06.567024: step: 360/463, loss: 0.41384461522102356 2023-01-24 00:14:07.172673: step: 362/463, loss: 6.035156726837158 2023-01-24 00:14:07.767899: step: 364/463, loss: 0.5185157656669617 2023-01-24 00:14:08.385622: step: 366/463, loss: 1.361049771308899 2023-01-24 00:14:09.018528: step: 368/463, loss: 1.148449182510376 2023-01-24 00:14:09.661875: step: 370/463, loss: 0.7699196338653564 2023-01-24 00:14:10.328513: step: 372/463, loss: 0.23366540670394897 2023-01-24 00:14:10.944338: step: 374/463, loss: 0.40603286027908325 2023-01-24 00:14:11.517003: step: 376/463, loss: 1.3715323209762573 2023-01-24 00:14:12.151191: step: 378/463, loss: 0.9165188074111938 2023-01-24 00:14:12.790014: step: 380/463, loss: 0.6428179740905762 2023-01-24 00:14:13.420944: step: 382/463, loss: 0.9613966345787048 2023-01-24 00:14:13.965194: step: 384/463, loss: 1.0092254877090454 2023-01-24 00:14:14.538049: step: 386/463, loss: 1.6940070390701294 2023-01-24 00:14:15.161294: step: 388/463, loss: 1.739461064338684 2023-01-24 00:14:15.741742: step: 390/463, loss: 0.2564719021320343 2023-01-24 00:14:16.364430: step: 392/463, loss: 1.3298134803771973 2023-01-24 00:14:16.940337: step: 394/463, loss: 0.3073103427886963 2023-01-24 00:14:17.553745: step: 396/463, loss: 2.5522541999816895 2023-01-24 00:14:18.139323: step: 398/463, loss: 0.31658124923706055 2023-01-24 00:14:18.808069: step: 400/463, loss: 2.982454299926758 2023-01-24 00:14:19.416730: step: 402/463, loss: 1.2065281867980957 2023-01-24 00:14:20.052078: step: 404/463, loss: 8.816350936889648 2023-01-24 00:14:20.686393: step: 406/463, loss: 1.474839448928833 2023-01-24 00:14:21.305645: step: 408/463, loss: 1.553722858428955 2023-01-24 00:14:21.906644: step: 410/463, loss: 0.47708964347839355 2023-01-24 00:14:22.613594: step: 412/463, loss: 0.6190148591995239 2023-01-24 00:14:23.267852: step: 414/463, loss: 0.6632809042930603 2023-01-24 00:14:23.842748: step: 416/463, loss: 1.2494126558303833 2023-01-24 00:14:24.479028: step: 418/463, loss: 0.9812670946121216 2023-01-24 00:14:25.121327: step: 420/463, loss: 1.4529144763946533 2023-01-24 00:14:25.714063: step: 422/463, loss: 1.0692442655563354 2023-01-24 00:14:26.340738: step: 424/463, loss: 3.1296417713165283 2023-01-24 00:14:26.914317: step: 426/463, loss: 7.347497940063477 2023-01-24 00:14:27.551290: step: 428/463, loss: 0.987598180770874 2023-01-24 00:14:28.167132: step: 430/463, loss: 1.848487377166748 2023-01-24 00:14:28.802515: step: 432/463, loss: 0.6004656553268433 2023-01-24 00:14:29.427614: step: 434/463, loss: 0.9679194092750549 2023-01-24 00:14:30.109671: step: 436/463, loss: 1.8503260612487793 2023-01-24 00:14:30.737578: step: 438/463, loss: 9.326584815979004 2023-01-24 00:14:31.325107: step: 440/463, loss: 1.3815076351165771 2023-01-24 00:14:31.927087: step: 442/463, loss: 2.8499250411987305 2023-01-24 00:14:32.533033: step: 444/463, loss: 1.5699008703231812 2023-01-24 00:14:33.173689: step: 446/463, loss: 1.273353099822998 2023-01-24 00:14:33.798156: step: 448/463, loss: 0.603901207447052 2023-01-24 00:14:34.419362: step: 450/463, loss: 0.6193287372589111 2023-01-24 00:14:35.013567: step: 452/463, loss: 12.034431457519531 2023-01-24 00:14:35.572361: step: 454/463, loss: 0.9807054400444031 2023-01-24 00:14:36.194864: step: 456/463, loss: 3.2840614318847656 2023-01-24 00:14:36.834983: step: 458/463, loss: 0.6853996515274048 2023-01-24 00:14:37.428927: step: 460/463, loss: 0.7610770463943481 2023-01-24 00:14:38.007734: step: 462/463, loss: 2.459209442138672 2023-01-24 00:14:38.672872: step: 464/463, loss: 0.7250696420669556 2023-01-24 00:14:39.242652: step: 466/463, loss: 1.5966969728469849 2023-01-24 00:14:39.896194: step: 468/463, loss: 2.038206100463867 2023-01-24 00:14:40.540146: step: 470/463, loss: 0.3883358836174011 2023-01-24 00:14:41.175105: step: 472/463, loss: 3.1393630504608154 2023-01-24 00:14:41.834729: step: 474/463, loss: 0.7819725871086121 2023-01-24 00:14:42.458173: step: 476/463, loss: 3.0739307403564453 2023-01-24 00:14:43.092979: step: 478/463, loss: 0.979496419429779 2023-01-24 00:14:43.734163: step: 480/463, loss: 3.8374500274658203 2023-01-24 00:14:44.368748: step: 482/463, loss: 1.2882366180419922 2023-01-24 00:14:45.006542: step: 484/463, loss: 0.7963497638702393 2023-01-24 00:14:45.618684: step: 486/463, loss: 0.8104899525642395 2023-01-24 00:14:46.245032: step: 488/463, loss: 1.0237727165222168 2023-01-24 00:14:46.882600: step: 490/463, loss: 3.098798990249634 2023-01-24 00:14:47.500320: step: 492/463, loss: 1.7690072059631348 2023-01-24 00:14:48.154581: step: 494/463, loss: 0.6741423010826111 2023-01-24 00:14:48.734513: step: 496/463, loss: 0.22404100000858307 2023-01-24 00:14:49.343467: step: 498/463, loss: 0.48252809047698975 2023-01-24 00:14:49.883961: step: 500/463, loss: 0.22238671779632568 2023-01-24 00:14:50.478833: step: 502/463, loss: 1.140441656112671 2023-01-24 00:14:51.068919: step: 504/463, loss: 0.32342782616615295 2023-01-24 00:14:51.705875: step: 506/463, loss: 0.9599140882492065 2023-01-24 00:14:52.248938: step: 508/463, loss: 0.38328179717063904 2023-01-24 00:14:52.894219: step: 510/463, loss: 1.8107041120529175 2023-01-24 00:14:53.491863: step: 512/463, loss: 0.5601431727409363 2023-01-24 00:14:54.141359: step: 514/463, loss: 0.8199431896209717 2023-01-24 00:14:54.733060: step: 516/463, loss: 0.28174111247062683 2023-01-24 00:14:55.353180: step: 518/463, loss: 0.3254542648792267 2023-01-24 00:14:55.943387: step: 520/463, loss: 1.1558681726455688 2023-01-24 00:14:56.561469: step: 522/463, loss: 0.5232790112495422 2023-01-24 00:14:57.172283: step: 524/463, loss: 2.3477325439453125 2023-01-24 00:14:57.769319: step: 526/463, loss: 0.6032165288925171 2023-01-24 00:14:58.391129: step: 528/463, loss: 0.8377740979194641 2023-01-24 00:14:58.957473: step: 530/463, loss: 0.5559775233268738 2023-01-24 00:14:59.681219: step: 532/463, loss: 0.685110330581665 2023-01-24 00:15:00.301375: step: 534/463, loss: 1.523877739906311 2023-01-24 00:15:00.979339: step: 536/463, loss: 2.7532782554626465 2023-01-24 00:15:01.642747: step: 538/463, loss: 0.42484498023986816 2023-01-24 00:15:02.236782: step: 540/463, loss: 1.3740956783294678 2023-01-24 00:15:02.768057: step: 542/463, loss: 0.6577314138412476 2023-01-24 00:15:03.398941: step: 544/463, loss: 0.2149064689874649 2023-01-24 00:15:04.037986: step: 546/463, loss: 0.8327336311340332 2023-01-24 00:15:04.721137: step: 548/463, loss: 0.5447402000427246 2023-01-24 00:15:05.321537: step: 550/463, loss: 0.37505486607551575 2023-01-24 00:15:06.000741: step: 552/463, loss: 0.6039700508117676 2023-01-24 00:15:06.711492: step: 554/463, loss: 0.36998796463012695 2023-01-24 00:15:07.315285: step: 556/463, loss: 5.404050827026367 2023-01-24 00:15:07.960909: step: 558/463, loss: 1.346888542175293 2023-01-24 00:15:08.559664: step: 560/463, loss: 0.7103744745254517 2023-01-24 00:15:09.219543: step: 562/463, loss: 0.5426487922668457 2023-01-24 00:15:09.829211: step: 564/463, loss: 1.5296276807785034 2023-01-24 00:15:10.426122: step: 566/463, loss: 3.758345603942871 2023-01-24 00:15:11.004596: step: 568/463, loss: 3.3112542629241943 2023-01-24 00:15:11.620445: step: 570/463, loss: 1.065105676651001 2023-01-24 00:15:12.334284: step: 572/463, loss: 1.2148536443710327 2023-01-24 00:15:13.008267: step: 574/463, loss: 2.2530767917633057 2023-01-24 00:15:13.609324: step: 576/463, loss: 0.49736371636390686 2023-01-24 00:15:14.163869: step: 578/463, loss: 1.3433315753936768 2023-01-24 00:15:14.761191: step: 580/463, loss: 1.414001703262329 2023-01-24 00:15:15.369320: step: 582/463, loss: 1.4997971057891846 2023-01-24 00:15:16.004569: step: 584/463, loss: 0.8019573092460632 2023-01-24 00:15:16.619586: step: 586/463, loss: 0.794381856918335 2023-01-24 00:15:17.301847: step: 588/463, loss: 0.8728071451187134 2023-01-24 00:15:17.884000: step: 590/463, loss: 1.1764867305755615 2023-01-24 00:15:18.500249: step: 592/463, loss: 0.33734798431396484 2023-01-24 00:15:19.112718: step: 594/463, loss: 0.4521723985671997 2023-01-24 00:15:19.794885: step: 596/463, loss: 0.7711382508277893 2023-01-24 00:15:20.380461: step: 598/463, loss: 2.0961408615112305 2023-01-24 00:15:21.002767: step: 600/463, loss: 0.8732158541679382 2023-01-24 00:15:21.584896: step: 602/463, loss: 1.8879420757293701 2023-01-24 00:15:22.220218: step: 604/463, loss: 2.4897968769073486 2023-01-24 00:15:22.858593: step: 606/463, loss: 1.7163336277008057 2023-01-24 00:15:23.486093: step: 608/463, loss: 1.9362083673477173 2023-01-24 00:15:24.037701: step: 610/463, loss: 0.7525981068611145 2023-01-24 00:15:24.618219: step: 612/463, loss: 0.758465051651001 2023-01-24 00:15:25.199766: step: 614/463, loss: 10.410308837890625 2023-01-24 00:15:25.848786: step: 616/463, loss: 0.6769464015960693 2023-01-24 00:15:26.476977: step: 618/463, loss: 1.1008172035217285 2023-01-24 00:15:27.187955: step: 620/463, loss: 0.9038336277008057 2023-01-24 00:15:27.797112: step: 622/463, loss: 1.5241053104400635 2023-01-24 00:15:28.516971: step: 624/463, loss: 2.4104151725769043 2023-01-24 00:15:29.153265: step: 626/463, loss: 0.24718955159187317 2023-01-24 00:15:29.799169: step: 628/463, loss: 0.9794787764549255 2023-01-24 00:15:30.389493: step: 630/463, loss: 0.6829535961151123 2023-01-24 00:15:30.992248: step: 632/463, loss: 0.29277709126472473 2023-01-24 00:15:31.581597: step: 634/463, loss: 1.5057624578475952 2023-01-24 00:15:32.166172: step: 636/463, loss: 0.3484926223754883 2023-01-24 00:15:32.756217: step: 638/463, loss: 1.3105700016021729 2023-01-24 00:15:33.350891: step: 640/463, loss: 0.8319970369338989 2023-01-24 00:15:33.984835: step: 642/463, loss: 0.731103777885437 2023-01-24 00:15:34.660073: step: 644/463, loss: 0.32466021180152893 2023-01-24 00:15:35.335127: step: 646/463, loss: 1.8179347515106201 2023-01-24 00:15:35.943492: step: 648/463, loss: 0.982518196105957 2023-01-24 00:15:36.514286: step: 650/463, loss: 1.2489746809005737 2023-01-24 00:15:37.119714: step: 652/463, loss: 1.3136425018310547 2023-01-24 00:15:37.713383: step: 654/463, loss: 0.7801439762115479 2023-01-24 00:15:38.253525: step: 656/463, loss: 1.4304418563842773 2023-01-24 00:15:38.827402: step: 658/463, loss: 3.3175930976867676 2023-01-24 00:15:39.396155: step: 660/463, loss: 0.4897221624851227 2023-01-24 00:15:40.022549: step: 662/463, loss: 0.6910770535469055 2023-01-24 00:15:40.606061: step: 664/463, loss: 0.6170697212219238 2023-01-24 00:15:41.271322: step: 666/463, loss: 0.35087454319000244 2023-01-24 00:15:41.961911: step: 668/463, loss: 0.8153607845306396 2023-01-24 00:15:42.565397: step: 670/463, loss: 0.5577743649482727 2023-01-24 00:15:43.130279: step: 672/463, loss: 1.2256635427474976 2023-01-24 00:15:43.711693: step: 674/463, loss: 0.37994086742401123 2023-01-24 00:15:44.353584: step: 676/463, loss: 2.9787983894348145 2023-01-24 00:15:44.985736: step: 678/463, loss: 1.5564532279968262 2023-01-24 00:15:45.641239: step: 680/463, loss: 0.9659140706062317 2023-01-24 00:15:46.240159: step: 682/463, loss: 0.8655729293823242 2023-01-24 00:15:46.820372: step: 684/463, loss: 1.3255674839019775 2023-01-24 00:15:47.433734: step: 686/463, loss: 0.5485014915466309 2023-01-24 00:15:48.039896: step: 688/463, loss: 1.1680799722671509 2023-01-24 00:15:48.623387: step: 690/463, loss: 6.5204033851623535 2023-01-24 00:15:49.205566: step: 692/463, loss: 2.358337879180908 2023-01-24 00:15:49.825948: step: 694/463, loss: 0.3776841163635254 2023-01-24 00:15:50.386913: step: 696/463, loss: 0.5681212544441223 2023-01-24 00:15:51.054282: step: 698/463, loss: 2.4553847312927246 2023-01-24 00:15:51.620297: step: 700/463, loss: 0.9091672301292419 2023-01-24 00:15:52.191104: step: 702/463, loss: 0.3592880666255951 2023-01-24 00:15:52.824249: step: 704/463, loss: 2.7278480529785156 2023-01-24 00:15:53.420391: step: 706/463, loss: 0.6220060586929321 2023-01-24 00:15:54.023383: step: 708/463, loss: 1.7619963884353638 2023-01-24 00:15:54.796471: step: 710/463, loss: 0.7121729254722595 2023-01-24 00:15:55.383380: step: 712/463, loss: 0.4605644643306732 2023-01-24 00:15:56.035591: step: 714/463, loss: 0.533030092716217 2023-01-24 00:15:56.684911: step: 716/463, loss: 0.9994133710861206 2023-01-24 00:15:57.357561: step: 718/463, loss: 0.5203347206115723 2023-01-24 00:15:58.020911: step: 720/463, loss: 0.34223470091819763 2023-01-24 00:15:58.579942: step: 722/463, loss: 1.003799319267273 2023-01-24 00:15:59.222313: step: 724/463, loss: 2.554177761077881 2023-01-24 00:15:59.780760: step: 726/463, loss: 1.3827364444732666 2023-01-24 00:16:00.494362: step: 728/463, loss: 2.2792391777038574 2023-01-24 00:16:01.082431: step: 730/463, loss: 0.8657217025756836 2023-01-24 00:16:01.652057: step: 732/463, loss: 1.5779836177825928 2023-01-24 00:16:02.269464: step: 734/463, loss: 1.3845607042312622 2023-01-24 00:16:02.925850: step: 736/463, loss: 0.5036277770996094 2023-01-24 00:16:03.479744: step: 738/463, loss: 0.8902627825737 2023-01-24 00:16:04.117581: step: 740/463, loss: 0.9201827049255371 2023-01-24 00:16:04.667548: step: 742/463, loss: 0.6846176385879517 2023-01-24 00:16:05.256358: step: 744/463, loss: 0.20370186865329742 2023-01-24 00:16:05.892704: step: 746/463, loss: 2.649069309234619 2023-01-24 00:16:06.508435: step: 748/463, loss: 2.1348648071289062 2023-01-24 00:16:07.169223: step: 750/463, loss: 1.833654522895813 2023-01-24 00:16:07.803100: step: 752/463, loss: 0.4487384855747223 2023-01-24 00:16:08.403308: step: 754/463, loss: 1.139836311340332 2023-01-24 00:16:09.033443: step: 756/463, loss: 3.737612724304199 2023-01-24 00:16:09.634423: step: 758/463, loss: 4.06385612487793 2023-01-24 00:16:10.248904: step: 760/463, loss: 0.8881456255912781 2023-01-24 00:16:10.842143: step: 762/463, loss: 1.1446888446807861 2023-01-24 00:16:11.399711: step: 764/463, loss: 3.837965488433838 2023-01-24 00:16:11.996411: step: 766/463, loss: 0.6248462796211243 2023-01-24 00:16:12.639411: step: 768/463, loss: 0.45582717657089233 2023-01-24 00:16:13.198348: step: 770/463, loss: 1.6969670057296753 2023-01-24 00:16:13.843572: step: 772/463, loss: 0.45889759063720703 2023-01-24 00:16:14.376047: step: 774/463, loss: 1.189276099205017 2023-01-24 00:16:14.984576: step: 776/463, loss: 3.6378519535064697 2023-01-24 00:16:15.588925: step: 778/463, loss: 1.0155627727508545 2023-01-24 00:16:16.148114: step: 780/463, loss: 0.20280370116233826 2023-01-24 00:16:16.743657: step: 782/463, loss: 2.4504642486572266 2023-01-24 00:16:17.368576: step: 784/463, loss: 0.46651384234428406 2023-01-24 00:16:18.002887: step: 786/463, loss: 1.5811887979507446 2023-01-24 00:16:18.656740: step: 788/463, loss: 0.5913249254226685 2023-01-24 00:16:19.304276: step: 790/463, loss: 0.39848899841308594 2023-01-24 00:16:20.022354: step: 792/463, loss: 0.4430505633354187 2023-01-24 00:16:20.661980: step: 794/463, loss: 1.360766887664795 2023-01-24 00:16:21.247944: step: 796/463, loss: 1.5937418937683105 2023-01-24 00:16:21.955485: step: 798/463, loss: 3.325157880783081 2023-01-24 00:16:22.639063: step: 800/463, loss: 2.8510522842407227 2023-01-24 00:16:23.309135: step: 802/463, loss: 7.090426921844482 2023-01-24 00:16:23.915215: step: 804/463, loss: 2.0580217838287354 2023-01-24 00:16:24.468958: step: 806/463, loss: 0.3951593339443207 2023-01-24 00:16:25.094605: step: 808/463, loss: 1.911102533340454 2023-01-24 00:16:25.751237: step: 810/463, loss: 0.8056557178497314 2023-01-24 00:16:26.462903: step: 812/463, loss: 0.8891547322273254 2023-01-24 00:16:27.099685: step: 814/463, loss: 1.185707449913025 2023-01-24 00:16:27.700349: step: 816/463, loss: 0.8997979164123535 2023-01-24 00:16:28.364804: step: 818/463, loss: 1.4040427207946777 2023-01-24 00:16:28.951533: step: 820/463, loss: 0.7298281192779541 2023-01-24 00:16:29.605639: step: 822/463, loss: 2.939669132232666 2023-01-24 00:16:30.243073: step: 824/463, loss: 0.9704604744911194 2023-01-24 00:16:30.910849: step: 826/463, loss: 2.1452479362487793 2023-01-24 00:16:31.561969: step: 828/463, loss: 0.4087268114089966 2023-01-24 00:16:32.170190: step: 830/463, loss: 0.9527699947357178 2023-01-24 00:16:32.757120: step: 832/463, loss: 0.4639630913734436 2023-01-24 00:16:33.402359: step: 834/463, loss: 1.3756364583969116 2023-01-24 00:16:33.983362: step: 836/463, loss: 1.413311243057251 2023-01-24 00:16:34.575096: step: 838/463, loss: 1.6089898347854614 2023-01-24 00:16:35.224855: step: 840/463, loss: 1.9243468046188354 2023-01-24 00:16:35.826972: step: 842/463, loss: 1.7314045429229736 2023-01-24 00:16:36.473657: step: 844/463, loss: 0.7033754587173462 2023-01-24 00:16:37.067816: step: 846/463, loss: 1.3000514507293701 2023-01-24 00:16:37.685759: step: 848/463, loss: 3.187561511993408 2023-01-24 00:16:38.341586: step: 850/463, loss: 1.1734458208084106 2023-01-24 00:16:38.929197: step: 852/463, loss: 0.9049826860427856 2023-01-24 00:16:39.497630: step: 854/463, loss: 0.322780042886734 2023-01-24 00:16:40.118471: step: 856/463, loss: 0.5923429727554321 2023-01-24 00:16:40.781604: step: 858/463, loss: 3.676504611968994 2023-01-24 00:16:41.377295: step: 860/463, loss: 1.0498003959655762 2023-01-24 00:16:42.029619: step: 862/463, loss: 0.45997345447540283 2023-01-24 00:16:42.676716: step: 864/463, loss: 1.7753878831863403 2023-01-24 00:16:43.281102: step: 866/463, loss: 0.7851157188415527 2023-01-24 00:16:43.930513: step: 868/463, loss: 3.227285861968994 2023-01-24 00:16:44.554832: step: 870/463, loss: 0.6186690926551819 2023-01-24 00:16:45.239590: step: 872/463, loss: 0.7951236963272095 2023-01-24 00:16:45.790301: step: 874/463, loss: 0.7681107521057129 2023-01-24 00:16:46.388169: step: 876/463, loss: 0.6349336504936218 2023-01-24 00:16:46.910213: step: 878/463, loss: 1.5183823108673096 2023-01-24 00:16:47.561508: step: 880/463, loss: 0.8476854562759399 2023-01-24 00:16:48.197726: step: 882/463, loss: 1.6248974800109863 2023-01-24 00:16:48.812185: step: 884/463, loss: 1.7656960487365723 2023-01-24 00:16:49.350036: step: 886/463, loss: 1.8569083213806152 2023-01-24 00:16:50.001907: step: 888/463, loss: 3.104783535003662 2023-01-24 00:16:50.635195: step: 890/463, loss: 0.704376220703125 2023-01-24 00:16:51.222444: step: 892/463, loss: 1.4094443321228027 2023-01-24 00:16:51.856101: step: 894/463, loss: 0.9609196186065674 2023-01-24 00:16:52.474223: step: 896/463, loss: 0.8082519769668579 2023-01-24 00:16:53.109575: step: 898/463, loss: 2.183816432952881 2023-01-24 00:16:53.738653: step: 900/463, loss: 4.201904296875 2023-01-24 00:16:54.317517: step: 902/463, loss: 1.4621878862380981 2023-01-24 00:16:55.004486: step: 904/463, loss: 0.7892708778381348 2023-01-24 00:16:55.612954: step: 906/463, loss: 1.0239051580429077 2023-01-24 00:16:56.246735: step: 908/463, loss: 1.0663679838180542 2023-01-24 00:16:56.846813: step: 910/463, loss: 0.3086651563644409 2023-01-24 00:16:57.468064: step: 912/463, loss: 0.7683236002922058 2023-01-24 00:16:58.163445: step: 914/463, loss: 1.2961827516555786 2023-01-24 00:16:58.711497: step: 916/463, loss: 0.8860579133033752 2023-01-24 00:16:59.321668: step: 918/463, loss: 2.0733752250671387 2023-01-24 00:16:59.968631: step: 920/463, loss: 1.5364549160003662 2023-01-24 00:17:00.557865: step: 922/463, loss: 0.8691327571868896 2023-01-24 00:17:01.237190: step: 924/463, loss: 0.23627275228500366 2023-01-24 00:17:01.878150: step: 926/463, loss: 0.609950065612793 ================================================== Loss: 1.538 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3256431448548412, 'r': 0.23868162321746883, 'f1': 0.2754620700739312}, 'combined': 0.20297205163342297, 'epoch': 2} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.36731924414118594, 'r': 0.29851012953622547, 'f1': 0.3293592006737077}, 'combined': 0.2552927775556969, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3000784009255876, 'r': 0.23585707648507356, 'f1': 0.26411990749548003}, 'combined': 0.19461466868088, 'epoch': 2} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34916330373071497, 'r': 0.2978629101614639, 'f1': 0.3214793946143054}, 'combined': 0.2491849852991267, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3033269710323153, 'r': 0.2355379888697903, 'f1': 0.26516856742697076}, 'combined': 0.19538736547250476, 'epoch': 2} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3496730766041415, 'r': 0.28738053586841744, 'f1': 0.315481253589422}, 'combined': 0.2445357085238582, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36, 'r': 0.2571428571428571, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3, 'r': 0.1956521739130435, 'f1': 0.23684210526315788}, 'combined': 0.11842105263157894, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.7916666666666666, 'r': 0.16379310344827586, 'f1': 0.2714285714285714}, 'combined': 0.18095238095238092, 'epoch': 2} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3256431448548412, 'r': 0.23868162321746883, 'f1': 0.2754620700739312}, 'combined': 0.20297205163342297, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.36731924414118594, 'r': 0.29851012953622547, 'f1': 0.3293592006737077}, 'combined': 0.2552927775556969, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36, 'r': 0.2571428571428571, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3158844986290638, 'r': 0.17708676438296003, 'f1': 0.2269461446461235}, 'combined': 0.16722347500240678, 'epoch': 1} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3528413671222335, 'r': 0.245468925431508, 'f1': 0.2895206461035408}, 'combined': 0.2244131323864766, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.375, 'r': 0.22826086956521738, 'f1': 0.28378378378378377}, 'combined': 0.14189189189189189, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3033269710323153, 'r': 0.2355379888697903, 'f1': 0.26516856742697076}, 'combined': 0.19538736547250476, 'epoch': 2} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3496730766041415, 'r': 0.28738053586841744, 'f1': 0.315481253589422}, 'combined': 0.2445357085238582, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.7916666666666666, 'r': 0.16379310344827586, 'f1': 0.2714285714285714}, 'combined': 0.18095238095238092, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:19:53.255991: step: 2/463, loss: 0.5861802101135254 2023-01-24 00:19:53.913640: step: 4/463, loss: 1.3735284805297852 2023-01-24 00:19:54.537402: step: 6/463, loss: 0.8099331259727478 2023-01-24 00:19:55.145842: step: 8/463, loss: 1.2316306829452515 2023-01-24 00:19:55.794007: step: 10/463, loss: 1.097306251525879 2023-01-24 00:19:56.413173: step: 12/463, loss: 1.6339104175567627 2023-01-24 00:19:57.039429: step: 14/463, loss: 1.1404949426651 2023-01-24 00:19:57.672066: step: 16/463, loss: 1.3325152397155762 2023-01-24 00:19:58.293031: step: 18/463, loss: 7.127131462097168 2023-01-24 00:19:58.919587: step: 20/463, loss: 1.808300495147705 2023-01-24 00:19:59.525788: step: 22/463, loss: 0.4308868944644928 2023-01-24 00:20:00.186902: step: 24/463, loss: 6.343321323394775 2023-01-24 00:20:00.823286: step: 26/463, loss: 4.054040431976318 2023-01-24 00:20:01.537090: step: 28/463, loss: 2.443748950958252 2023-01-24 00:20:02.198263: step: 30/463, loss: 0.453969806432724 2023-01-24 00:20:02.877061: step: 32/463, loss: 1.8452019691467285 2023-01-24 00:20:03.495724: step: 34/463, loss: 0.8011531829833984 2023-01-24 00:20:04.086812: step: 36/463, loss: 0.26531073451042175 2023-01-24 00:20:04.759128: step: 38/463, loss: 6.34611177444458 2023-01-24 00:20:05.415232: step: 40/463, loss: 2.094949722290039 2023-01-24 00:20:05.987403: step: 42/463, loss: 1.7757618427276611 2023-01-24 00:20:06.550647: step: 44/463, loss: 0.9916521906852722 2023-01-24 00:20:07.154403: step: 46/463, loss: 0.39908546209335327 2023-01-24 00:20:07.773752: step: 48/463, loss: 0.6339250206947327 2023-01-24 00:20:08.350657: step: 50/463, loss: 0.25349336862564087 2023-01-24 00:20:08.959669: step: 52/463, loss: 0.5262441635131836 2023-01-24 00:20:09.596888: step: 54/463, loss: 0.5490635633468628 2023-01-24 00:20:10.182268: step: 56/463, loss: 3.83716082572937 2023-01-24 00:20:10.773421: step: 58/463, loss: 1.4231542348861694 2023-01-24 00:20:11.348522: step: 60/463, loss: 0.2889988422393799 2023-01-24 00:20:12.003779: step: 62/463, loss: 0.4947792589664459 2023-01-24 00:20:12.565114: step: 64/463, loss: 2.2839412689208984 2023-01-24 00:20:13.160001: step: 66/463, loss: 0.31450018286705017 2023-01-24 00:20:13.787482: step: 68/463, loss: 2.052475929260254 2023-01-24 00:20:14.359308: step: 70/463, loss: 0.9109322428703308 2023-01-24 00:20:14.979038: step: 72/463, loss: 3.709829092025757 2023-01-24 00:20:15.557917: step: 74/463, loss: 5.1272501945495605 2023-01-24 00:20:16.154511: step: 76/463, loss: 0.5184556841850281 2023-01-24 00:20:16.756201: step: 78/463, loss: 0.8050916790962219 2023-01-24 00:20:17.367975: step: 80/463, loss: 0.8729863166809082 2023-01-24 00:20:17.984610: step: 82/463, loss: 1.5307533740997314 2023-01-24 00:20:18.553310: step: 84/463, loss: 1.6778154373168945 2023-01-24 00:20:19.180428: step: 86/463, loss: 0.26905369758605957 2023-01-24 00:20:19.780832: step: 88/463, loss: 1.1601436138153076 2023-01-24 00:20:20.426320: step: 90/463, loss: 1.824967861175537 2023-01-24 00:20:21.082007: step: 92/463, loss: 1.315454363822937 2023-01-24 00:20:21.783732: step: 94/463, loss: 0.28194063901901245 2023-01-24 00:20:22.385297: step: 96/463, loss: 3.870901346206665 2023-01-24 00:20:22.986655: step: 98/463, loss: 0.9389933347702026 2023-01-24 00:20:23.663402: step: 100/463, loss: 5.372572898864746 2023-01-24 00:20:24.354476: step: 102/463, loss: 0.6139791011810303 2023-01-24 00:20:24.959619: step: 104/463, loss: 1.6130611896514893 2023-01-24 00:20:25.553351: step: 106/463, loss: 0.558000922203064 2023-01-24 00:20:26.165836: step: 108/463, loss: 0.8126239776611328 2023-01-24 00:20:26.805099: step: 110/463, loss: 1.028892159461975 2023-01-24 00:20:27.438916: step: 112/463, loss: 1.3918007612228394 2023-01-24 00:20:28.032215: step: 114/463, loss: 0.6031695604324341 2023-01-24 00:20:28.589504: step: 116/463, loss: 1.7772356271743774 2023-01-24 00:20:29.198132: step: 118/463, loss: 0.6793345212936401 2023-01-24 00:20:29.800313: step: 120/463, loss: 0.5006611943244934 2023-01-24 00:20:30.408069: step: 122/463, loss: 0.4140205979347229 2023-01-24 00:20:31.024658: step: 124/463, loss: 1.1413962841033936 2023-01-24 00:20:31.617751: step: 126/463, loss: 0.2686176896095276 2023-01-24 00:20:32.236944: step: 128/463, loss: 0.8819482326507568 2023-01-24 00:20:32.877971: step: 130/463, loss: 1.0352551937103271 2023-01-24 00:20:33.497930: step: 132/463, loss: 2.252594232559204 2023-01-24 00:20:34.095880: step: 134/463, loss: 0.8180625438690186 2023-01-24 00:20:34.681837: step: 136/463, loss: 0.4154853820800781 2023-01-24 00:20:35.381710: step: 138/463, loss: 4.321404457092285 2023-01-24 00:20:36.034957: step: 140/463, loss: 0.5059050917625427 2023-01-24 00:20:36.648451: step: 142/463, loss: 5.630484580993652 2023-01-24 00:20:37.285594: step: 144/463, loss: 0.8276830315589905 2023-01-24 00:20:37.869915: step: 146/463, loss: 0.6797397136688232 2023-01-24 00:20:38.470603: step: 148/463, loss: 0.3368200361728668 2023-01-24 00:20:39.023699: step: 150/463, loss: 1.7757160663604736 2023-01-24 00:20:39.642035: step: 152/463, loss: 0.5957731604576111 2023-01-24 00:20:40.348705: step: 154/463, loss: 0.9258847236633301 2023-01-24 00:20:40.947427: step: 156/463, loss: 0.26371699571609497 2023-01-24 00:20:41.514697: step: 158/463, loss: 1.1446921825408936 2023-01-24 00:20:42.133550: step: 160/463, loss: 0.6360496878623962 2023-01-24 00:20:42.782613: step: 162/463, loss: 1.1370587348937988 2023-01-24 00:20:43.327208: step: 164/463, loss: 0.8999028205871582 2023-01-24 00:20:43.878402: step: 166/463, loss: 2.5184946060180664 2023-01-24 00:20:44.522279: step: 168/463, loss: 0.1822376251220703 2023-01-24 00:20:45.126650: step: 170/463, loss: 1.5731807947158813 2023-01-24 00:20:45.721294: step: 172/463, loss: 0.47844138741493225 2023-01-24 00:20:46.320889: step: 174/463, loss: 0.6122033596038818 2023-01-24 00:20:46.900353: step: 176/463, loss: 1.179314136505127 2023-01-24 00:20:47.497593: step: 178/463, loss: 1.4083936214447021 2023-01-24 00:20:48.112843: step: 180/463, loss: 1.178756833076477 2023-01-24 00:20:48.688605: step: 182/463, loss: 1.239646077156067 2023-01-24 00:20:49.287515: step: 184/463, loss: 0.27436578273773193 2023-01-24 00:20:49.949912: step: 186/463, loss: 0.5282583832740784 2023-01-24 00:20:50.487593: step: 188/463, loss: 1.8934845924377441 2023-01-24 00:20:51.143495: step: 190/463, loss: 0.856751561164856 2023-01-24 00:20:51.795749: step: 192/463, loss: 0.8625920414924622 2023-01-24 00:20:52.351548: step: 194/463, loss: 0.35751649737358093 2023-01-24 00:20:52.977714: step: 196/463, loss: 0.6424140334129333 2023-01-24 00:20:53.612901: step: 198/463, loss: 0.2851732671260834 2023-01-24 00:20:54.278215: step: 200/463, loss: 1.6641547679901123 2023-01-24 00:20:54.930481: step: 202/463, loss: 1.0294564962387085 2023-01-24 00:20:55.553756: step: 204/463, loss: 0.5370976328849792 2023-01-24 00:20:56.228331: step: 206/463, loss: 0.8250738382339478 2023-01-24 00:20:56.966848: step: 208/463, loss: 0.9348017573356628 2023-01-24 00:20:57.603431: step: 210/463, loss: 0.42686107754707336 2023-01-24 00:20:58.330229: step: 212/463, loss: 0.43081042170524597 2023-01-24 00:20:58.959796: step: 214/463, loss: 1.2076212167739868 2023-01-24 00:20:59.528067: step: 216/463, loss: 0.5701276063919067 2023-01-24 00:21:00.126308: step: 218/463, loss: 0.30391037464141846 2023-01-24 00:21:00.777892: step: 220/463, loss: 0.748412013053894 2023-01-24 00:21:01.337681: step: 222/463, loss: 0.6345020532608032 2023-01-24 00:21:01.946989: step: 224/463, loss: 1.4756361246109009 2023-01-24 00:21:02.532204: step: 226/463, loss: 1.0517140626907349 2023-01-24 00:21:03.142210: step: 228/463, loss: 0.2785537540912628 2023-01-24 00:21:03.792395: step: 230/463, loss: 1.8610318899154663 2023-01-24 00:21:04.449640: step: 232/463, loss: 2.6059792041778564 2023-01-24 00:21:05.098675: step: 234/463, loss: 1.5763198137283325 2023-01-24 00:21:05.718943: step: 236/463, loss: 0.27505967020988464 2023-01-24 00:21:06.288590: step: 238/463, loss: 6.978393077850342 2023-01-24 00:21:06.925008: step: 240/463, loss: 0.27931398153305054 2023-01-24 00:21:07.519266: step: 242/463, loss: 3.0710248947143555 2023-01-24 00:21:08.119263: step: 244/463, loss: 3.735290050506592 2023-01-24 00:21:08.775307: step: 246/463, loss: 0.9977658987045288 2023-01-24 00:21:09.355640: step: 248/463, loss: 7.581729888916016 2023-01-24 00:21:09.957459: step: 250/463, loss: 1.1610678434371948 2023-01-24 00:21:10.559692: step: 252/463, loss: 0.419791042804718 2023-01-24 00:21:11.180185: step: 254/463, loss: 0.5280798673629761 2023-01-24 00:21:11.754141: step: 256/463, loss: 1.2745078802108765 2023-01-24 00:21:12.358147: step: 258/463, loss: 0.6631940603256226 2023-01-24 00:21:12.932048: step: 260/463, loss: 0.5666185617446899 2023-01-24 00:21:13.505610: step: 262/463, loss: 0.5521462559700012 2023-01-24 00:21:14.154381: step: 264/463, loss: 0.7782831192016602 2023-01-24 00:21:14.715002: step: 266/463, loss: 2.3119421005249023 2023-01-24 00:21:15.316488: step: 268/463, loss: 0.17674469947814941 2023-01-24 00:21:15.989274: step: 270/463, loss: 0.24772882461547852 2023-01-24 00:21:16.568946: step: 272/463, loss: 0.35136878490448 2023-01-24 00:21:17.206461: step: 274/463, loss: 1.448638916015625 2023-01-24 00:21:17.873801: step: 276/463, loss: 0.6813992261886597 2023-01-24 00:21:18.427646: step: 278/463, loss: 0.5054275393486023 2023-01-24 00:21:19.141829: step: 280/463, loss: 0.6831744909286499 2023-01-24 00:21:19.696724: step: 282/463, loss: 2.321988344192505 2023-01-24 00:21:20.314279: step: 284/463, loss: 0.4068462550640106 2023-01-24 00:21:20.937093: step: 286/463, loss: 1.177586317062378 2023-01-24 00:21:21.554289: step: 288/463, loss: 0.40971463918685913 2023-01-24 00:21:22.161374: step: 290/463, loss: 0.6653015613555908 2023-01-24 00:21:22.730315: step: 292/463, loss: 2.758209228515625 2023-01-24 00:21:23.372900: step: 294/463, loss: 1.2204540967941284 2023-01-24 00:21:23.987839: step: 296/463, loss: 0.8606878519058228 2023-01-24 00:21:24.618585: step: 298/463, loss: 0.4252905249595642 2023-01-24 00:21:25.327900: step: 300/463, loss: 0.7016019821166992 2023-01-24 00:21:25.961594: step: 302/463, loss: 1.239556074142456 2023-01-24 00:21:26.576187: step: 304/463, loss: 0.8451669216156006 2023-01-24 00:21:27.184940: step: 306/463, loss: 0.2898506224155426 2023-01-24 00:21:27.801823: step: 308/463, loss: 1.11007821559906 2023-01-24 00:21:28.561261: step: 310/463, loss: 1.357858419418335 2023-01-24 00:21:29.197296: step: 312/463, loss: 0.8738505840301514 2023-01-24 00:21:29.773155: step: 314/463, loss: 0.8062949776649475 2023-01-24 00:21:30.401826: step: 316/463, loss: 0.28662511706352234 2023-01-24 00:21:31.051247: step: 318/463, loss: 1.0566612482070923 2023-01-24 00:21:31.713768: step: 320/463, loss: 4.029036045074463 2023-01-24 00:21:32.406811: step: 322/463, loss: 0.33994489908218384 2023-01-24 00:21:33.041086: step: 324/463, loss: 0.6363861560821533 2023-01-24 00:21:33.636459: step: 326/463, loss: 0.17141923308372498 2023-01-24 00:21:34.280326: step: 328/463, loss: 1.4284721612930298 2023-01-24 00:21:34.886469: step: 330/463, loss: 0.9983595609664917 2023-01-24 00:21:35.546209: step: 332/463, loss: 0.5424853563308716 2023-01-24 00:21:36.155254: step: 334/463, loss: 0.4410066306591034 2023-01-24 00:21:36.793307: step: 336/463, loss: 0.5908340811729431 2023-01-24 00:21:37.530208: step: 338/463, loss: 1.1698366403579712 2023-01-24 00:21:38.109124: step: 340/463, loss: 1.2951130867004395 2023-01-24 00:21:38.723604: step: 342/463, loss: 5.039706230163574 2023-01-24 00:21:39.322555: step: 344/463, loss: 0.8472703099250793 2023-01-24 00:21:39.895242: step: 346/463, loss: 0.4648662507534027 2023-01-24 00:21:40.538911: step: 348/463, loss: 1.3336620330810547 2023-01-24 00:21:41.158325: step: 350/463, loss: 0.7034398913383484 2023-01-24 00:21:41.820758: step: 352/463, loss: 2.906385660171509 2023-01-24 00:21:42.399690: step: 354/463, loss: 0.6561774015426636 2023-01-24 00:21:42.960141: step: 356/463, loss: 1.2728078365325928 2023-01-24 00:21:43.498818: step: 358/463, loss: 0.570324718952179 2023-01-24 00:21:44.123345: step: 360/463, loss: 2.1639058589935303 2023-01-24 00:21:44.757609: step: 362/463, loss: 0.8726208806037903 2023-01-24 00:21:45.402629: step: 364/463, loss: 0.8003362417221069 2023-01-24 00:21:46.033151: step: 366/463, loss: 0.3665609359741211 2023-01-24 00:21:46.662064: step: 368/463, loss: 2.069092035293579 2023-01-24 00:21:47.283107: step: 370/463, loss: 0.6988101005554199 2023-01-24 00:21:47.971430: step: 372/463, loss: 0.3569216728210449 2023-01-24 00:21:48.628048: step: 374/463, loss: 0.4643917977809906 2023-01-24 00:21:49.266844: step: 376/463, loss: 0.5362979769706726 2023-01-24 00:21:49.835457: step: 378/463, loss: 0.45398181676864624 2023-01-24 00:21:50.515394: step: 380/463, loss: 1.6672056913375854 2023-01-24 00:21:51.104172: step: 382/463, loss: 12.548444747924805 2023-01-24 00:21:51.672719: step: 384/463, loss: 0.3443489670753479 2023-01-24 00:21:52.253572: step: 386/463, loss: 0.6675260663032532 2023-01-24 00:21:52.811578: step: 388/463, loss: 0.5176720023155212 2023-01-24 00:21:53.416789: step: 390/463, loss: 2.1954400539398193 2023-01-24 00:21:53.974609: step: 392/463, loss: 2.113750457763672 2023-01-24 00:21:54.569617: step: 394/463, loss: 0.849291205406189 2023-01-24 00:21:55.207114: step: 396/463, loss: 0.6472211480140686 2023-01-24 00:21:55.838931: step: 398/463, loss: 0.7684171795845032 2023-01-24 00:21:56.454380: step: 400/463, loss: 0.21302194893360138 2023-01-24 00:21:57.064554: step: 402/463, loss: 0.29459697008132935 2023-01-24 00:21:57.675797: step: 404/463, loss: 0.871033251285553 2023-01-24 00:21:58.280698: step: 406/463, loss: 3.115330934524536 2023-01-24 00:21:58.929240: step: 408/463, loss: 10.548649787902832 2023-01-24 00:21:59.502670: step: 410/463, loss: 2.4393489360809326 2023-01-24 00:22:00.128183: step: 412/463, loss: 0.8470199108123779 2023-01-24 00:22:00.814495: step: 414/463, loss: 0.917614758014679 2023-01-24 00:22:01.398568: step: 416/463, loss: 1.7795963287353516 2023-01-24 00:22:02.017760: step: 418/463, loss: 0.9657366275787354 2023-01-24 00:22:02.648753: step: 420/463, loss: 0.5376374125480652 2023-01-24 00:22:03.273621: step: 422/463, loss: 1.7917696237564087 2023-01-24 00:22:03.968534: step: 424/463, loss: 0.6141297817230225 2023-01-24 00:22:04.601842: step: 426/463, loss: 1.829071044921875 2023-01-24 00:22:05.184732: step: 428/463, loss: 1.31123685836792 2023-01-24 00:22:05.774115: step: 430/463, loss: 1.9714741706848145 2023-01-24 00:22:06.397999: step: 432/463, loss: 1.7060784101486206 2023-01-24 00:22:07.083951: step: 434/463, loss: 1.519418478012085 2023-01-24 00:22:07.655918: step: 436/463, loss: 1.744737982749939 2023-01-24 00:22:08.235974: step: 438/463, loss: 0.24459630250930786 2023-01-24 00:22:08.801904: step: 440/463, loss: 0.5234218239784241 2023-01-24 00:22:09.444351: step: 442/463, loss: 0.6314299702644348 2023-01-24 00:22:10.048112: step: 444/463, loss: 0.5994163155555725 2023-01-24 00:22:10.693814: step: 446/463, loss: 0.8587840795516968 2023-01-24 00:22:11.299441: step: 448/463, loss: 1.207343339920044 2023-01-24 00:22:11.929825: step: 450/463, loss: 0.9677929878234863 2023-01-24 00:22:12.504318: step: 452/463, loss: 0.2548527717590332 2023-01-24 00:22:13.125191: step: 454/463, loss: 0.7236099243164062 2023-01-24 00:22:13.654872: step: 456/463, loss: 0.15329702198505402 2023-01-24 00:22:14.291224: step: 458/463, loss: 0.9661197662353516 2023-01-24 00:22:14.909986: step: 460/463, loss: 0.6926252841949463 2023-01-24 00:22:15.564388: step: 462/463, loss: 1.2916291952133179 2023-01-24 00:22:16.174261: step: 464/463, loss: 1.450941562652588 2023-01-24 00:22:16.799416: step: 466/463, loss: 0.4431131184101105 2023-01-24 00:22:17.444656: step: 468/463, loss: 1.0406421422958374 2023-01-24 00:22:18.082665: step: 470/463, loss: 0.3492239713668823 2023-01-24 00:22:18.706532: step: 472/463, loss: 0.7932349443435669 2023-01-24 00:22:19.324795: step: 474/463, loss: 0.2141757756471634 2023-01-24 00:22:19.923988: step: 476/463, loss: 1.2393091917037964 2023-01-24 00:22:20.521231: step: 478/463, loss: 0.7231630086898804 2023-01-24 00:22:21.067084: step: 480/463, loss: 1.9460698366165161 2023-01-24 00:22:21.658090: step: 482/463, loss: 0.7675976753234863 2023-01-24 00:22:22.210529: step: 484/463, loss: 0.8713288307189941 2023-01-24 00:22:22.780734: step: 486/463, loss: 4.226635932922363 2023-01-24 00:22:23.350184: step: 488/463, loss: 0.9001918435096741 2023-01-24 00:22:23.928259: step: 490/463, loss: 1.3170396089553833 2023-01-24 00:22:24.538161: step: 492/463, loss: 0.19038604199886322 2023-01-24 00:22:25.116398: step: 494/463, loss: 0.71153724193573 2023-01-24 00:22:25.763325: step: 496/463, loss: 1.6241918802261353 2023-01-24 00:22:26.347593: step: 498/463, loss: 2.139375925064087 2023-01-24 00:22:26.915667: step: 500/463, loss: 0.3230654299259186 2023-01-24 00:22:27.552498: step: 502/463, loss: 1.6597295999526978 2023-01-24 00:22:28.133324: step: 504/463, loss: 1.501914143562317 2023-01-24 00:22:28.776627: step: 506/463, loss: 0.7005184292793274 2023-01-24 00:22:29.452172: step: 508/463, loss: 1.976676344871521 2023-01-24 00:22:30.098372: step: 510/463, loss: 2.682526111602783 2023-01-24 00:22:30.721188: step: 512/463, loss: 0.5501776933670044 2023-01-24 00:22:31.293808: step: 514/463, loss: 0.16431453824043274 2023-01-24 00:22:31.962565: step: 516/463, loss: 0.6123006939888 2023-01-24 00:22:32.563836: step: 518/463, loss: 1.1639758348464966 2023-01-24 00:22:33.273353: step: 520/463, loss: 1.2645277976989746 2023-01-24 00:22:33.916910: step: 522/463, loss: 0.31428611278533936 2023-01-24 00:22:34.546457: step: 524/463, loss: 1.8434460163116455 2023-01-24 00:22:35.145748: step: 526/463, loss: 0.513737678527832 2023-01-24 00:22:35.797456: step: 528/463, loss: 1.9712008237838745 2023-01-24 00:22:36.419440: step: 530/463, loss: 1.06448495388031 2023-01-24 00:22:37.022578: step: 532/463, loss: 2.177704095840454 2023-01-24 00:22:37.660050: step: 534/463, loss: 0.7558973431587219 2023-01-24 00:22:38.288890: step: 536/463, loss: 1.3033359050750732 2023-01-24 00:22:38.896728: step: 538/463, loss: 0.44104665517807007 2023-01-24 00:22:39.545463: step: 540/463, loss: 1.5810799598693848 2023-01-24 00:22:40.160006: step: 542/463, loss: 1.0467878580093384 2023-01-24 00:22:40.748543: step: 544/463, loss: 2.053138256072998 2023-01-24 00:22:41.312149: step: 546/463, loss: 0.5331852436065674 2023-01-24 00:22:41.948228: step: 548/463, loss: 0.8410186767578125 2023-01-24 00:22:42.551705: step: 550/463, loss: 1.1138092279434204 2023-01-24 00:22:43.208935: step: 552/463, loss: 1.8896617889404297 2023-01-24 00:22:43.831539: step: 554/463, loss: 0.5497190952301025 2023-01-24 00:22:44.372336: step: 556/463, loss: 0.8403732180595398 2023-01-24 00:22:44.965166: step: 558/463, loss: 0.7325672507286072 2023-01-24 00:22:45.579938: step: 560/463, loss: 0.8002176880836487 2023-01-24 00:22:46.190636: step: 562/463, loss: 0.8524148464202881 2023-01-24 00:22:46.826976: step: 564/463, loss: 0.6542765498161316 2023-01-24 00:22:47.421815: step: 566/463, loss: 0.15191848576068878 2023-01-24 00:22:48.025651: step: 568/463, loss: 1.5579509735107422 2023-01-24 00:22:48.661851: step: 570/463, loss: 0.545935332775116 2023-01-24 00:22:49.286776: step: 572/463, loss: 0.6627711057662964 2023-01-24 00:22:49.909733: step: 574/463, loss: 0.7539028525352478 2023-01-24 00:22:50.514195: step: 576/463, loss: 0.9779089689254761 2023-01-24 00:22:51.054686: step: 578/463, loss: 0.3440927565097809 2023-01-24 00:22:51.644650: step: 580/463, loss: 0.7873618006706238 2023-01-24 00:22:52.248980: step: 582/463, loss: 0.5235024690628052 2023-01-24 00:22:52.848847: step: 584/463, loss: 0.4406951665878296 2023-01-24 00:22:53.446615: step: 586/463, loss: 1.1145025491714478 2023-01-24 00:22:54.097741: step: 588/463, loss: 0.8192187547683716 2023-01-24 00:22:54.777379: step: 590/463, loss: 0.5468909740447998 2023-01-24 00:22:55.350048: step: 592/463, loss: 0.8540785312652588 2023-01-24 00:22:55.922733: step: 594/463, loss: 0.48876598477363586 2023-01-24 00:22:56.545604: step: 596/463, loss: 7.115334987640381 2023-01-24 00:22:57.234303: step: 598/463, loss: 1.5285708904266357 2023-01-24 00:22:57.864847: step: 600/463, loss: 0.5567865371704102 2023-01-24 00:22:58.532146: step: 602/463, loss: 0.6331756114959717 2023-01-24 00:22:59.162266: step: 604/463, loss: 0.25854185223579407 2023-01-24 00:22:59.741780: step: 606/463, loss: 0.7981890439987183 2023-01-24 00:23:00.367259: step: 608/463, loss: 1.4998795986175537 2023-01-24 00:23:01.023294: step: 610/463, loss: 0.7032451033592224 2023-01-24 00:23:01.629813: step: 612/463, loss: 1.8100223541259766 2023-01-24 00:23:02.294030: step: 614/463, loss: 0.6211801767349243 2023-01-24 00:23:02.952587: step: 616/463, loss: 0.9448941349983215 2023-01-24 00:23:03.571148: step: 618/463, loss: 1.4542386531829834 2023-01-24 00:23:04.246849: step: 620/463, loss: 1.0262504816055298 2023-01-24 00:23:04.829125: step: 622/463, loss: 0.3168994188308716 2023-01-24 00:23:05.455887: step: 624/463, loss: 0.4160541892051697 2023-01-24 00:23:06.025333: step: 626/463, loss: 0.16113285720348358 2023-01-24 00:23:06.657977: step: 628/463, loss: 0.5850247144699097 2023-01-24 00:23:07.270173: step: 630/463, loss: 1.9325248003005981 2023-01-24 00:23:07.858160: step: 632/463, loss: 0.4104180634021759 2023-01-24 00:23:08.489615: step: 634/463, loss: 3.36464262008667 2023-01-24 00:23:09.063776: step: 636/463, loss: 2.89518404006958 2023-01-24 00:23:09.691772: step: 638/463, loss: 1.1435872316360474 2023-01-24 00:23:10.345582: step: 640/463, loss: 0.5668440461158752 2023-01-24 00:23:10.985705: step: 642/463, loss: 2.265939474105835 2023-01-24 00:23:11.629461: step: 644/463, loss: 0.8448171615600586 2023-01-24 00:23:12.250352: step: 646/463, loss: 0.904466986656189 2023-01-24 00:23:12.956818: step: 648/463, loss: 0.6500080823898315 2023-01-24 00:23:13.612629: step: 650/463, loss: 1.0509213209152222 2023-01-24 00:23:14.218521: step: 652/463, loss: 2.3355085849761963 2023-01-24 00:23:14.873293: step: 654/463, loss: 2.0012452602386475 2023-01-24 00:23:15.534038: step: 656/463, loss: 0.36765116453170776 2023-01-24 00:23:16.145337: step: 658/463, loss: 0.7038108110427856 2023-01-24 00:23:16.789701: step: 660/463, loss: 4.1042160987854 2023-01-24 00:23:17.372512: step: 662/463, loss: 0.7068663835525513 2023-01-24 00:23:17.994906: step: 664/463, loss: 0.8961589336395264 2023-01-24 00:23:18.601624: step: 666/463, loss: 0.7031440734863281 2023-01-24 00:23:19.204336: step: 668/463, loss: 0.6003390550613403 2023-01-24 00:23:19.858589: step: 670/463, loss: 2.03519868850708 2023-01-24 00:23:20.483335: step: 672/463, loss: 0.9938336610794067 2023-01-24 00:23:21.090615: step: 674/463, loss: 0.3683180809020996 2023-01-24 00:23:21.654982: step: 676/463, loss: 0.5071661472320557 2023-01-24 00:23:22.365515: step: 678/463, loss: 1.2671972513198853 2023-01-24 00:23:23.041830: step: 680/463, loss: 7.186624526977539 2023-01-24 00:23:23.676880: step: 682/463, loss: 0.9342090487480164 2023-01-24 00:23:24.314488: step: 684/463, loss: 0.7077946662902832 2023-01-24 00:23:24.859022: step: 686/463, loss: 0.5273284912109375 2023-01-24 00:23:25.430923: step: 688/463, loss: 0.4433707594871521 2023-01-24 00:23:26.047487: step: 690/463, loss: 2.368081569671631 2023-01-24 00:23:26.734578: step: 692/463, loss: 1.9197748899459839 2023-01-24 00:23:27.355864: step: 694/463, loss: 9.760427474975586 2023-01-24 00:23:28.029207: step: 696/463, loss: 0.564527690410614 2023-01-24 00:23:28.597154: step: 698/463, loss: 0.6014097929000854 2023-01-24 00:23:29.183410: step: 700/463, loss: 0.82591313123703 2023-01-24 00:23:29.952874: step: 702/463, loss: 1.2693265676498413 2023-01-24 00:23:30.580297: step: 704/463, loss: 0.3230922520160675 2023-01-24 00:23:31.163988: step: 706/463, loss: 1.8927083015441895 2023-01-24 00:23:31.916558: step: 708/463, loss: 0.9642900228500366 2023-01-24 00:23:32.580630: step: 710/463, loss: 0.3218848705291748 2023-01-24 00:23:33.189540: step: 712/463, loss: 0.43335258960723877 2023-01-24 00:23:33.815963: step: 714/463, loss: 1.2807881832122803 2023-01-24 00:23:34.428638: step: 716/463, loss: 0.17460721731185913 2023-01-24 00:23:35.108226: step: 718/463, loss: 1.3837840557098389 2023-01-24 00:23:35.737030: step: 720/463, loss: 0.69569993019104 2023-01-24 00:23:36.268886: step: 722/463, loss: 0.9557390213012695 2023-01-24 00:23:36.877682: step: 724/463, loss: 1.474457859992981 2023-01-24 00:23:37.481155: step: 726/463, loss: 6.387290000915527 2023-01-24 00:23:38.198170: step: 728/463, loss: 0.5838481783866882 2023-01-24 00:23:38.817647: step: 730/463, loss: 1.8129897117614746 2023-01-24 00:23:39.372141: step: 732/463, loss: 1.4038245677947998 2023-01-24 00:23:40.019046: step: 734/463, loss: 2.108640432357788 2023-01-24 00:23:40.626337: step: 736/463, loss: 0.3806455433368683 2023-01-24 00:23:41.225844: step: 738/463, loss: 0.6286329030990601 2023-01-24 00:23:41.899128: step: 740/463, loss: 1.0798470973968506 2023-01-24 00:23:42.520057: step: 742/463, loss: 1.1314997673034668 2023-01-24 00:23:43.150346: step: 744/463, loss: 0.7847237586975098 2023-01-24 00:23:43.814619: step: 746/463, loss: 1.1423081159591675 2023-01-24 00:23:44.421145: step: 748/463, loss: 1.2383679151535034 2023-01-24 00:23:45.013073: step: 750/463, loss: 3.0755460262298584 2023-01-24 00:23:45.576341: step: 752/463, loss: 0.8417287468910217 2023-01-24 00:23:46.236141: step: 754/463, loss: 1.211743712425232 2023-01-24 00:23:46.829959: step: 756/463, loss: 0.2925609052181244 2023-01-24 00:23:47.435930: step: 758/463, loss: 0.8931692838668823 2023-01-24 00:23:48.050066: step: 760/463, loss: 1.3160549402236938 2023-01-24 00:23:48.719340: step: 762/463, loss: 5.234009742736816 2023-01-24 00:23:49.290872: step: 764/463, loss: 0.8863605260848999 2023-01-24 00:23:49.947349: step: 766/463, loss: 0.5623803734779358 2023-01-24 00:23:50.512343: step: 768/463, loss: 0.270097017288208 2023-01-24 00:23:51.132854: step: 770/463, loss: 1.0369184017181396 2023-01-24 00:23:51.729565: step: 772/463, loss: 0.25732794404029846 2023-01-24 00:23:52.299454: step: 774/463, loss: 0.9469619989395142 2023-01-24 00:23:52.878907: step: 776/463, loss: 1.1866776943206787 2023-01-24 00:23:53.463738: step: 778/463, loss: 1.0973572731018066 2023-01-24 00:23:54.128632: step: 780/463, loss: 0.8199627995491028 2023-01-24 00:23:54.725982: step: 782/463, loss: 2.367387533187866 2023-01-24 00:23:55.389062: step: 784/463, loss: 0.8228763341903687 2023-01-24 00:23:55.995615: step: 786/463, loss: 1.0637669563293457 2023-01-24 00:23:56.620878: step: 788/463, loss: 0.5030679106712341 2023-01-24 00:23:57.290633: step: 790/463, loss: 1.4799129962921143 2023-01-24 00:23:57.914105: step: 792/463, loss: 0.46158695220947266 2023-01-24 00:23:58.533645: step: 794/463, loss: 1.2611229419708252 2023-01-24 00:23:59.166741: step: 796/463, loss: 0.6380144357681274 2023-01-24 00:23:59.783678: step: 798/463, loss: 1.0006062984466553 2023-01-24 00:24:00.432515: step: 800/463, loss: 0.5657363533973694 2023-01-24 00:24:00.999763: step: 802/463, loss: 0.577856719493866 2023-01-24 00:24:01.611042: step: 804/463, loss: 0.7096571326255798 2023-01-24 00:24:02.223010: step: 806/463, loss: 0.44972920417785645 2023-01-24 00:24:02.818021: step: 808/463, loss: 1.2680013179779053 2023-01-24 00:24:03.426460: step: 810/463, loss: 0.42306584119796753 2023-01-24 00:24:04.016463: step: 812/463, loss: 1.772107481956482 2023-01-24 00:24:04.618573: step: 814/463, loss: 0.429757297039032 2023-01-24 00:24:05.299466: step: 816/463, loss: 0.3739444315433502 2023-01-24 00:24:05.883319: step: 818/463, loss: 2.2081494331359863 2023-01-24 00:24:06.497486: step: 820/463, loss: 1.4641931056976318 2023-01-24 00:24:07.096019: step: 822/463, loss: 5.53977108001709 2023-01-24 00:24:07.677328: step: 824/463, loss: 0.5255829095840454 2023-01-24 00:24:08.352292: step: 826/463, loss: 0.7704782485961914 2023-01-24 00:24:08.966299: step: 828/463, loss: 0.2725366950035095 2023-01-24 00:24:09.559663: step: 830/463, loss: 1.444212555885315 2023-01-24 00:24:10.189686: step: 832/463, loss: 0.8818296194076538 2023-01-24 00:24:10.881274: step: 834/463, loss: 0.9003888368606567 2023-01-24 00:24:11.450516: step: 836/463, loss: 0.7898910045623779 2023-01-24 00:24:12.030251: step: 838/463, loss: 0.697838544845581 2023-01-24 00:24:12.671316: step: 840/463, loss: 0.5520128011703491 2023-01-24 00:24:13.359252: step: 842/463, loss: 0.26900333166122437 2023-01-24 00:24:13.939646: step: 844/463, loss: 0.30813395977020264 2023-01-24 00:24:14.498198: step: 846/463, loss: 0.35832253098487854 2023-01-24 00:24:15.082173: step: 848/463, loss: 0.689294159412384 2023-01-24 00:24:15.711506: step: 850/463, loss: 0.30226805806159973 2023-01-24 00:24:16.353684: step: 852/463, loss: 2.3166003227233887 2023-01-24 00:24:16.981406: step: 854/463, loss: 0.8949844837188721 2023-01-24 00:24:17.579977: step: 856/463, loss: 0.3378556966781616 2023-01-24 00:24:18.223798: step: 858/463, loss: 0.9642161726951599 2023-01-24 00:24:18.849664: step: 860/463, loss: 0.2953707277774811 2023-01-24 00:24:19.450967: step: 862/463, loss: 1.545053243637085 2023-01-24 00:24:20.033416: step: 864/463, loss: 0.3494056165218353 2023-01-24 00:24:20.637612: step: 866/463, loss: 0.14667749404907227 2023-01-24 00:24:21.242921: step: 868/463, loss: 0.5536177754402161 2023-01-24 00:24:21.849919: step: 870/463, loss: 0.861301600933075 2023-01-24 00:24:22.589819: step: 872/463, loss: 0.8773106932640076 2023-01-24 00:24:23.152024: step: 874/463, loss: 1.549638032913208 2023-01-24 00:24:23.732130: step: 876/463, loss: 0.4861299991607666 2023-01-24 00:24:24.321519: step: 878/463, loss: 0.571794331073761 2023-01-24 00:24:24.899187: step: 880/463, loss: 0.2687010169029236 2023-01-24 00:24:25.523300: step: 882/463, loss: 0.27729666233062744 2023-01-24 00:24:26.116621: step: 884/463, loss: 0.5071350932121277 2023-01-24 00:24:26.708427: step: 886/463, loss: 0.9567668437957764 2023-01-24 00:24:27.281530: step: 888/463, loss: 0.5104363560676575 2023-01-24 00:24:27.904856: step: 890/463, loss: 0.3868088722229004 2023-01-24 00:24:28.648693: step: 892/463, loss: 0.889945924282074 2023-01-24 00:24:29.139798: step: 894/463, loss: 0.7016943693161011 2023-01-24 00:24:29.748088: step: 896/463, loss: 0.6431083083152771 2023-01-24 00:24:30.350679: step: 898/463, loss: 1.282279372215271 2023-01-24 00:24:31.037473: step: 900/463, loss: 1.4700337648391724 2023-01-24 00:24:31.629998: step: 902/463, loss: 0.7349509000778198 2023-01-24 00:24:32.260222: step: 904/463, loss: 1.867394208908081 2023-01-24 00:24:32.939407: step: 906/463, loss: 1.5084949731826782 2023-01-24 00:24:33.590422: step: 908/463, loss: 0.7614808082580566 2023-01-24 00:24:34.215384: step: 910/463, loss: 0.258074015378952 2023-01-24 00:24:34.825284: step: 912/463, loss: 1.1549773216247559 2023-01-24 00:24:35.474726: step: 914/463, loss: 1.5394691228866577 2023-01-24 00:24:36.027715: step: 916/463, loss: 0.7869983315467834 2023-01-24 00:24:36.657095: step: 918/463, loss: 4.469120979309082 2023-01-24 00:24:37.283375: step: 920/463, loss: 0.9039833545684814 2023-01-24 00:24:37.915270: step: 922/463, loss: 1.4155858755111694 2023-01-24 00:24:38.610057: step: 924/463, loss: 1.495905876159668 2023-01-24 00:24:39.273235: step: 926/463, loss: 0.18362940847873688 ================================================== Loss: 1.260 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3545336737828291, 'r': 0.2571712065508021, 'f1': 0.2981040550138826}, 'combined': 0.2196556194839135, 'epoch': 3} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34385369627128226, 'r': 0.31703941720425566, 'f1': 0.32990259164929703}, 'combined': 0.25571397056069917, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31420591787439617, 'r': 0.24636600378787882, 'f1': 0.27618099787685774}, 'combined': 0.20350178790926357, 'epoch': 3} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.326948061144331, 'r': 0.31854939535346743, 'f1': 0.32269409008854977}, 'combined': 0.2501265195901678, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32896606559290387, 'r': 0.2542010506854257, 'f1': 0.286790928978429}, 'combined': 0.21131963187884242, 'epoch': 3} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3253325084748121, 'r': 0.3038426547039988, 'f1': 0.31422058218914484}, 'combined': 0.24355853739062902, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35, 'r': 0.25, 'f1': 0.2916666666666667}, 'combined': 0.19444444444444445, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.359375, 'r': 0.25, 'f1': 0.2948717948717949}, 'combined': 0.14743589743589744, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.06896551724137931, 'f1': 0.1176470588235294}, 'combined': 0.07843137254901959, 'epoch': 3} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3545336737828291, 'r': 0.2571712065508021, 'f1': 0.2981040550138826}, 'combined': 0.2196556194839135, 'epoch': 3} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34385369627128226, 'r': 0.31703941720425566, 'f1': 0.32990259164929703}, 'combined': 0.25571397056069917, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35, 'r': 0.25, 'f1': 0.2916666666666667}, 'combined': 0.19444444444444445, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31420591787439617, 'r': 0.24636600378787882, 'f1': 0.27618099787685774}, 'combined': 0.20350178790926357, 'epoch': 3} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.326948061144331, 'r': 0.31854939535346743, 'f1': 0.32269409008854977}, 'combined': 0.2501265195901678, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.359375, 'r': 0.25, 'f1': 0.2948717948717949}, 'combined': 0.14743589743589744, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3033269710323153, 'r': 0.2355379888697903, 'f1': 0.26516856742697076}, 'combined': 0.19538736547250476, 'epoch': 2} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3496730766041415, 'r': 0.28738053586841744, 'f1': 0.315481253589422}, 'combined': 0.2445357085238582, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.7916666666666666, 'r': 0.16379310344827586, 'f1': 0.2714285714285714}, 'combined': 0.18095238095238092, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:27:31.221034: step: 2/463, loss: 1.5499814748764038 2023-01-24 00:27:31.772222: step: 4/463, loss: 1.3976256847381592 2023-01-24 00:27:32.397136: step: 6/463, loss: 0.7366541624069214 2023-01-24 00:27:33.036534: step: 8/463, loss: 1.052539587020874 2023-01-24 00:27:33.697736: step: 10/463, loss: 0.12517094612121582 2023-01-24 00:27:34.318486: step: 12/463, loss: 0.7128562927246094 2023-01-24 00:27:34.991557: step: 14/463, loss: 0.6162844896316528 2023-01-24 00:27:35.566687: step: 16/463, loss: 0.2992062270641327 2023-01-24 00:27:36.125133: step: 18/463, loss: 0.6701135635375977 2023-01-24 00:27:36.804013: step: 20/463, loss: 3.2410378456115723 2023-01-24 00:27:37.445774: step: 22/463, loss: 2.759457588195801 2023-01-24 00:27:38.022055: step: 24/463, loss: 0.7164468765258789 2023-01-24 00:27:38.634106: step: 26/463, loss: 0.4062047302722931 2023-01-24 00:27:39.257358: step: 28/463, loss: 0.6168721318244934 2023-01-24 00:27:39.874116: step: 30/463, loss: 0.9625312685966492 2023-01-24 00:27:40.569197: step: 32/463, loss: 0.5392823219299316 2023-01-24 00:27:41.150075: step: 34/463, loss: 0.3740987479686737 2023-01-24 00:27:41.803535: step: 36/463, loss: 2.635406255722046 2023-01-24 00:27:42.504212: step: 38/463, loss: 0.32080456614494324 2023-01-24 00:27:43.185828: step: 40/463, loss: 0.4820089340209961 2023-01-24 00:27:43.754917: step: 42/463, loss: 0.5408957004547119 2023-01-24 00:27:44.357593: step: 44/463, loss: 0.3159761428833008 2023-01-24 00:27:44.946807: step: 46/463, loss: 1.0234254598617554 2023-01-24 00:27:45.600219: step: 48/463, loss: 1.0249654054641724 2023-01-24 00:27:46.247714: step: 50/463, loss: 0.5312938690185547 2023-01-24 00:27:46.886926: step: 52/463, loss: 0.697744607925415 2023-01-24 00:27:47.484157: step: 54/463, loss: 0.4011181592941284 2023-01-24 00:27:48.052538: step: 56/463, loss: 1.4504947662353516 2023-01-24 00:27:48.674827: step: 58/463, loss: 0.7250833511352539 2023-01-24 00:27:49.271811: step: 60/463, loss: 0.9447992444038391 2023-01-24 00:27:49.890895: step: 62/463, loss: 1.2293415069580078 2023-01-24 00:27:50.451030: step: 64/463, loss: 1.1988518238067627 2023-01-24 00:27:51.034187: step: 66/463, loss: 0.6876519918441772 2023-01-24 00:27:51.757329: step: 68/463, loss: 1.082821249961853 2023-01-24 00:27:52.319429: step: 70/463, loss: 1.3291658163070679 2023-01-24 00:27:52.923077: step: 72/463, loss: 5.81356954574585 2023-01-24 00:27:53.464449: step: 74/463, loss: 0.2818446159362793 2023-01-24 00:27:54.096055: step: 76/463, loss: 0.5646530985832214 2023-01-24 00:27:54.721652: step: 78/463, loss: 0.5189517736434937 2023-01-24 00:27:55.319040: step: 80/463, loss: 1.4787445068359375 2023-01-24 00:27:55.917192: step: 82/463, loss: 0.20730076730251312 2023-01-24 00:27:56.584449: step: 84/463, loss: 0.991459310054779 2023-01-24 00:27:57.159100: step: 86/463, loss: 1.3527065515518188 2023-01-24 00:27:57.754077: step: 88/463, loss: 0.48701098561286926 2023-01-24 00:27:58.346648: step: 90/463, loss: 0.5792151093482971 2023-01-24 00:27:58.907075: step: 92/463, loss: 0.5322930812835693 2023-01-24 00:27:59.493413: step: 94/463, loss: 0.6045755743980408 2023-01-24 00:28:00.090503: step: 96/463, loss: 0.5694054961204529 2023-01-24 00:28:00.680894: step: 98/463, loss: 0.36010074615478516 2023-01-24 00:28:01.301627: step: 100/463, loss: 0.4744027256965637 2023-01-24 00:28:01.877108: step: 102/463, loss: 0.2686455547809601 2023-01-24 00:28:02.479679: step: 104/463, loss: 0.22884207963943481 2023-01-24 00:28:03.038318: step: 106/463, loss: 8.636054039001465 2023-01-24 00:28:03.623146: step: 108/463, loss: 0.4087449908256531 2023-01-24 00:28:04.215430: step: 110/463, loss: 0.696462869644165 2023-01-24 00:28:04.802393: step: 112/463, loss: 0.7888972759246826 2023-01-24 00:28:05.446210: step: 114/463, loss: 1.7524969577789307 2023-01-24 00:28:06.079680: step: 116/463, loss: 1.1989630460739136 2023-01-24 00:28:06.672185: step: 118/463, loss: 0.8862960338592529 2023-01-24 00:28:07.314049: step: 120/463, loss: 1.5011028051376343 2023-01-24 00:28:07.894438: step: 122/463, loss: 0.2632182538509369 2023-01-24 00:28:08.451617: step: 124/463, loss: 1.1665992736816406 2023-01-24 00:28:09.057473: step: 126/463, loss: 0.5007005929946899 2023-01-24 00:28:09.708690: step: 128/463, loss: 1.1219534873962402 2023-01-24 00:28:10.319956: step: 130/463, loss: 0.24945278465747833 2023-01-24 00:28:10.939180: step: 132/463, loss: 0.572050929069519 2023-01-24 00:28:11.515460: step: 134/463, loss: 0.5119114518165588 2023-01-24 00:28:12.275532: step: 136/463, loss: 1.209529161453247 2023-01-24 00:28:12.896188: step: 138/463, loss: 0.8966051340103149 2023-01-24 00:28:13.528091: step: 140/463, loss: 0.4535786509513855 2023-01-24 00:28:14.137645: step: 142/463, loss: 0.4864313006401062 2023-01-24 00:28:14.742321: step: 144/463, loss: 0.3802744150161743 2023-01-24 00:28:15.380959: step: 146/463, loss: 1.4790608882904053 2023-01-24 00:28:15.999577: step: 148/463, loss: 1.546055793762207 2023-01-24 00:28:16.607135: step: 150/463, loss: 1.0345149040222168 2023-01-24 00:28:17.212410: step: 152/463, loss: 1.5517895221710205 2023-01-24 00:28:17.843608: step: 154/463, loss: 1.5662128925323486 2023-01-24 00:28:18.423845: step: 156/463, loss: 0.9450207948684692 2023-01-24 00:28:19.033231: step: 158/463, loss: 1.6443309783935547 2023-01-24 00:28:19.688611: step: 160/463, loss: 1.9773542881011963 2023-01-24 00:28:20.313241: step: 162/463, loss: 1.3677631616592407 2023-01-24 00:28:20.974658: step: 164/463, loss: 0.1987934708595276 2023-01-24 00:28:21.583253: step: 166/463, loss: 1.2400168180465698 2023-01-24 00:28:22.158373: step: 168/463, loss: 0.9201279282569885 2023-01-24 00:28:22.781232: step: 170/463, loss: 5.982337951660156 2023-01-24 00:28:23.394607: step: 172/463, loss: 4.339910507202148 2023-01-24 00:28:23.974806: step: 174/463, loss: 0.9256488680839539 2023-01-24 00:28:24.609237: step: 176/463, loss: 1.2025197744369507 2023-01-24 00:28:25.324328: step: 178/463, loss: 1.4537386894226074 2023-01-24 00:28:25.870813: step: 180/463, loss: 1.1383411884307861 2023-01-24 00:28:26.452279: step: 182/463, loss: 0.48516348004341125 2023-01-24 00:28:27.077248: step: 184/463, loss: 0.6389771699905396 2023-01-24 00:28:27.800489: step: 186/463, loss: 1.2300013303756714 2023-01-24 00:28:28.465471: step: 188/463, loss: 1.5572624206542969 2023-01-24 00:28:29.087267: step: 190/463, loss: 0.558221161365509 2023-01-24 00:28:29.661077: step: 192/463, loss: 0.3494292199611664 2023-01-24 00:28:30.271559: step: 194/463, loss: 0.3570707440376282 2023-01-24 00:28:30.825117: step: 196/463, loss: 0.6325271129608154 2023-01-24 00:28:31.367004: step: 198/463, loss: 1.267531394958496 2023-01-24 00:28:32.004665: step: 200/463, loss: 0.9931960701942444 2023-01-24 00:28:32.597620: step: 202/463, loss: 1.6724361181259155 2023-01-24 00:28:33.149586: step: 204/463, loss: 0.859893798828125 2023-01-24 00:28:33.731170: step: 206/463, loss: 1.2097604274749756 2023-01-24 00:28:34.349101: step: 208/463, loss: 0.9325987100601196 2023-01-24 00:28:34.971233: step: 210/463, loss: 0.31368377804756165 2023-01-24 00:28:35.558621: step: 212/463, loss: 0.36260324716567993 2023-01-24 00:28:36.119607: step: 214/463, loss: 1.1381499767303467 2023-01-24 00:28:36.743907: step: 216/463, loss: 0.21593250334262848 2023-01-24 00:28:37.431407: step: 218/463, loss: 1.3254320621490479 2023-01-24 00:28:38.077632: step: 220/463, loss: 1.705629587173462 2023-01-24 00:28:38.695568: step: 222/463, loss: 0.4967641234397888 2023-01-24 00:28:39.333256: step: 224/463, loss: 0.9133996367454529 2023-01-24 00:28:40.055990: step: 226/463, loss: 3.405883550643921 2023-01-24 00:28:40.695134: step: 228/463, loss: 1.4448118209838867 2023-01-24 00:28:41.327346: step: 230/463, loss: 0.7092037200927734 2023-01-24 00:28:42.019827: step: 232/463, loss: 0.5238355398178101 2023-01-24 00:28:42.626219: step: 234/463, loss: 1.1063101291656494 2023-01-24 00:28:43.231012: step: 236/463, loss: 0.6502557992935181 2023-01-24 00:28:44.050669: step: 238/463, loss: 1.274713397026062 2023-01-24 00:28:44.622405: step: 240/463, loss: 0.704504668712616 2023-01-24 00:28:45.351260: step: 242/463, loss: 0.24314984679222107 2023-01-24 00:28:45.902548: step: 244/463, loss: 0.5813273191452026 2023-01-24 00:28:46.568399: step: 246/463, loss: 0.464899480342865 2023-01-24 00:28:47.149162: step: 248/463, loss: 0.5019469261169434 2023-01-24 00:28:47.710192: step: 250/463, loss: 0.8156411051750183 2023-01-24 00:28:48.318083: step: 252/463, loss: 1.0405614376068115 2023-01-24 00:28:48.965001: step: 254/463, loss: 0.28098031878471375 2023-01-24 00:28:49.578118: step: 256/463, loss: 0.8996996879577637 2023-01-24 00:28:50.229103: step: 258/463, loss: 0.4171857237815857 2023-01-24 00:28:50.825021: step: 260/463, loss: 0.21163006126880646 2023-01-24 00:28:51.442052: step: 262/463, loss: 1.0563526153564453 2023-01-24 00:28:52.052829: step: 264/463, loss: 0.5746558308601379 2023-01-24 00:28:52.643323: step: 266/463, loss: 0.7748067378997803 2023-01-24 00:28:53.353696: step: 268/463, loss: 0.24001193046569824 2023-01-24 00:28:53.968771: step: 270/463, loss: 1.6013071537017822 2023-01-24 00:28:54.559231: step: 272/463, loss: 1.338907241821289 2023-01-24 00:28:55.204633: step: 274/463, loss: 1.3389027118682861 2023-01-24 00:28:55.809463: step: 276/463, loss: 0.4273982048034668 2023-01-24 00:28:56.445492: step: 278/463, loss: 0.8513306379318237 2023-01-24 00:28:57.038759: step: 280/463, loss: 3.8566060066223145 2023-01-24 00:28:57.715177: step: 282/463, loss: 0.27493754029273987 2023-01-24 00:28:58.352769: step: 284/463, loss: 0.2856474220752716 2023-01-24 00:28:58.961990: step: 286/463, loss: 6.383513450622559 2023-01-24 00:28:59.627653: step: 288/463, loss: 0.9333340525627136 2023-01-24 00:29:00.273061: step: 290/463, loss: 0.31084156036376953 2023-01-24 00:29:00.980892: step: 292/463, loss: 0.7775826454162598 2023-01-24 00:29:01.599023: step: 294/463, loss: 0.5315329432487488 2023-01-24 00:29:02.241010: step: 296/463, loss: 0.6977090239524841 2023-01-24 00:29:02.893812: step: 298/463, loss: 1.2035317420959473 2023-01-24 00:29:03.573156: step: 300/463, loss: 2.377845525741577 2023-01-24 00:29:04.105268: step: 302/463, loss: 1.1665148735046387 2023-01-24 00:29:04.803180: step: 304/463, loss: 0.6681552529335022 2023-01-24 00:29:05.437103: step: 306/463, loss: 0.7620559334754944 2023-01-24 00:29:06.040414: step: 308/463, loss: 0.34468916058540344 2023-01-24 00:29:06.717260: step: 310/463, loss: 0.47265100479125977 2023-01-24 00:29:07.320912: step: 312/463, loss: 0.4769750237464905 2023-01-24 00:29:07.944840: step: 314/463, loss: 0.17289303243160248 2023-01-24 00:29:08.524035: step: 316/463, loss: 0.4151703715324402 2023-01-24 00:29:09.146464: step: 318/463, loss: 0.4990484118461609 2023-01-24 00:29:09.765575: step: 320/463, loss: 2.021172285079956 2023-01-24 00:29:10.344587: step: 322/463, loss: 0.4618651866912842 2023-01-24 00:29:10.985243: step: 324/463, loss: 0.7754377126693726 2023-01-24 00:29:11.654176: step: 326/463, loss: 0.9057049751281738 2023-01-24 00:29:12.294821: step: 328/463, loss: 0.5969914793968201 2023-01-24 00:29:12.995106: step: 330/463, loss: 0.22016790509223938 2023-01-24 00:29:13.646016: step: 332/463, loss: 0.4143878221511841 2023-01-24 00:29:14.295519: step: 334/463, loss: 1.5573976039886475 2023-01-24 00:29:14.892369: step: 336/463, loss: 2.2091221809387207 2023-01-24 00:29:15.508698: step: 338/463, loss: 0.7839574217796326 2023-01-24 00:29:16.180399: step: 340/463, loss: 1.1517176628112793 2023-01-24 00:29:16.840174: step: 342/463, loss: 2.2496585845947266 2023-01-24 00:29:17.439250: step: 344/463, loss: 2.2116799354553223 2023-01-24 00:29:18.028452: step: 346/463, loss: 0.6737585067749023 2023-01-24 00:29:18.649117: step: 348/463, loss: 2.59515380859375 2023-01-24 00:29:19.206083: step: 350/463, loss: 0.5876641869544983 2023-01-24 00:29:19.786435: step: 352/463, loss: 0.6915403008460999 2023-01-24 00:29:20.336517: step: 354/463, loss: 1.2536826133728027 2023-01-24 00:29:21.003857: step: 356/463, loss: 0.845802366733551 2023-01-24 00:29:21.534689: step: 358/463, loss: 1.0244643688201904 2023-01-24 00:29:22.162193: step: 360/463, loss: 0.9937472343444824 2023-01-24 00:29:22.796514: step: 362/463, loss: 0.4416143596172333 2023-01-24 00:29:23.397513: step: 364/463, loss: 0.5093398094177246 2023-01-24 00:29:24.009797: step: 366/463, loss: 1.3189377784729004 2023-01-24 00:29:24.601303: step: 368/463, loss: 1.2794768810272217 2023-01-24 00:29:25.182361: step: 370/463, loss: 0.49750301241874695 2023-01-24 00:29:25.849037: step: 372/463, loss: 2.072526693344116 2023-01-24 00:29:26.495302: step: 374/463, loss: 1.5029501914978027 2023-01-24 00:29:27.088273: step: 376/463, loss: 1.5449005365371704 2023-01-24 00:29:27.728386: step: 378/463, loss: 0.7353377342224121 2023-01-24 00:29:28.323327: step: 380/463, loss: 0.22864212095737457 2023-01-24 00:29:28.945855: step: 382/463, loss: 0.9468680024147034 2023-01-24 00:29:29.513970: step: 384/463, loss: 1.0538992881774902 2023-01-24 00:29:30.132707: step: 386/463, loss: 0.7449288964271545 2023-01-24 00:29:30.771883: step: 388/463, loss: 0.6340309977531433 2023-01-24 00:29:31.370736: step: 390/463, loss: 0.2261842042207718 2023-01-24 00:29:31.976712: step: 392/463, loss: 1.7919375896453857 2023-01-24 00:29:32.624576: step: 394/463, loss: 0.657434344291687 2023-01-24 00:29:33.266988: step: 396/463, loss: 0.5850203633308411 2023-01-24 00:29:33.896892: step: 398/463, loss: 1.2136361598968506 2023-01-24 00:29:34.469558: step: 400/463, loss: 1.141325831413269 2023-01-24 00:29:35.126707: step: 402/463, loss: 0.3117745816707611 2023-01-24 00:29:35.764335: step: 404/463, loss: 0.4130016267299652 2023-01-24 00:29:36.374349: step: 406/463, loss: 0.5025079846382141 2023-01-24 00:29:36.980693: step: 408/463, loss: 0.5135420560836792 2023-01-24 00:29:37.602465: step: 410/463, loss: 0.2760311961174011 2023-01-24 00:29:38.172584: step: 412/463, loss: 1.9271175861358643 2023-01-24 00:29:38.803216: step: 414/463, loss: 0.6319013237953186 2023-01-24 00:29:39.447695: step: 416/463, loss: 0.6430721282958984 2023-01-24 00:29:39.992779: step: 418/463, loss: 0.6520329117774963 2023-01-24 00:29:40.720645: step: 420/463, loss: 0.5931282639503479 2023-01-24 00:29:41.273240: step: 422/463, loss: 3.5090017318725586 2023-01-24 00:29:41.803058: step: 424/463, loss: 0.18159732222557068 2023-01-24 00:29:42.421672: step: 426/463, loss: 0.7756088376045227 2023-01-24 00:29:42.973666: step: 428/463, loss: 0.40602627396583557 2023-01-24 00:29:43.616450: step: 430/463, loss: 1.0019469261169434 2023-01-24 00:29:44.303071: step: 432/463, loss: 1.3378796577453613 2023-01-24 00:29:44.867145: step: 434/463, loss: 1.8086732625961304 2023-01-24 00:29:45.503420: step: 436/463, loss: 1.603554368019104 2023-01-24 00:29:46.100976: step: 438/463, loss: 0.6995077729225159 2023-01-24 00:29:46.709928: step: 440/463, loss: 0.6076539754867554 2023-01-24 00:29:47.336796: step: 442/463, loss: 0.44419634342193604 2023-01-24 00:29:47.973165: step: 444/463, loss: 0.7216389775276184 2023-01-24 00:29:48.553113: step: 446/463, loss: 0.8208056688308716 2023-01-24 00:29:49.172654: step: 448/463, loss: 0.6675727367401123 2023-01-24 00:29:49.880805: step: 450/463, loss: 1.1969261169433594 2023-01-24 00:29:50.514179: step: 452/463, loss: 1.633812427520752 2023-01-24 00:29:51.108796: step: 454/463, loss: 0.6107681393623352 2023-01-24 00:29:51.833716: step: 456/463, loss: 2.18015193939209 2023-01-24 00:29:52.452648: step: 458/463, loss: 0.8157476782798767 2023-01-24 00:29:53.153400: step: 460/463, loss: 0.4467031955718994 2023-01-24 00:29:53.750043: step: 462/463, loss: 3.7532663345336914 2023-01-24 00:29:54.292086: step: 464/463, loss: 0.682722806930542 2023-01-24 00:29:54.918387: step: 466/463, loss: 0.5589893460273743 2023-01-24 00:29:55.532071: step: 468/463, loss: 1.3220820426940918 2023-01-24 00:29:56.136574: step: 470/463, loss: 0.19605286419391632 2023-01-24 00:29:56.815557: step: 472/463, loss: 0.2717975676059723 2023-01-24 00:29:57.504181: step: 474/463, loss: 0.7258267402648926 2023-01-24 00:29:58.090114: step: 476/463, loss: 0.8305843472480774 2023-01-24 00:29:58.728199: step: 478/463, loss: 1.0759207010269165 2023-01-24 00:29:59.315812: step: 480/463, loss: 0.9920896291732788 2023-01-24 00:30:00.028468: step: 482/463, loss: 1.0002574920654297 2023-01-24 00:30:00.649170: step: 484/463, loss: 0.7887349724769592 2023-01-24 00:30:01.255150: step: 486/463, loss: 0.38126900792121887 2023-01-24 00:30:01.867371: step: 488/463, loss: 1.2281166315078735 2023-01-24 00:30:02.463015: step: 490/463, loss: 0.612450897693634 2023-01-24 00:30:03.069845: step: 492/463, loss: 0.6850741505622864 2023-01-24 00:30:03.746833: step: 494/463, loss: 0.6846902966499329 2023-01-24 00:30:04.400432: step: 496/463, loss: 0.8670388460159302 2023-01-24 00:30:04.996987: step: 498/463, loss: 0.5522689819335938 2023-01-24 00:30:05.608125: step: 500/463, loss: 1.7460061311721802 2023-01-24 00:30:06.223350: step: 502/463, loss: 0.6664052605628967 2023-01-24 00:30:06.880303: step: 504/463, loss: 1.4807360172271729 2023-01-24 00:30:07.494137: step: 506/463, loss: 0.8345937132835388 2023-01-24 00:30:08.119169: step: 508/463, loss: 1.1722451448440552 2023-01-24 00:30:08.718881: step: 510/463, loss: 0.8332258462905884 2023-01-24 00:30:09.316468: step: 512/463, loss: 0.6260147094726562 2023-01-24 00:30:09.978852: step: 514/463, loss: 1.7738807201385498 2023-01-24 00:30:10.525421: step: 516/463, loss: 0.671615481376648 2023-01-24 00:30:11.169102: step: 518/463, loss: 0.5809204578399658 2023-01-24 00:30:11.709829: step: 520/463, loss: 0.9149419665336609 2023-01-24 00:30:12.406499: step: 522/463, loss: 0.5901783108711243 2023-01-24 00:30:13.048752: step: 524/463, loss: 3.1206703186035156 2023-01-24 00:30:13.688045: step: 526/463, loss: 0.30269375443458557 2023-01-24 00:30:14.284178: step: 528/463, loss: 1.3251066207885742 2023-01-24 00:30:14.816736: step: 530/463, loss: 0.6037574410438538 2023-01-24 00:30:15.517225: step: 532/463, loss: 0.6829760670661926 2023-01-24 00:30:16.094760: step: 534/463, loss: 1.2309551239013672 2023-01-24 00:30:16.765931: step: 536/463, loss: 0.8177639842033386 2023-01-24 00:30:17.334697: step: 538/463, loss: 2.6674256324768066 2023-01-24 00:30:17.968949: step: 540/463, loss: 2.1480700969696045 2023-01-24 00:30:18.554541: step: 542/463, loss: 0.5788074731826782 2023-01-24 00:30:19.151033: step: 544/463, loss: 2.4021010398864746 2023-01-24 00:30:19.831613: step: 546/463, loss: 0.89857017993927 2023-01-24 00:30:20.401641: step: 548/463, loss: 0.1400298923254013 2023-01-24 00:30:21.050529: step: 550/463, loss: 0.9120315313339233 2023-01-24 00:30:21.675114: step: 552/463, loss: 2.2394559383392334 2023-01-24 00:30:22.268218: step: 554/463, loss: 0.33983784914016724 2023-01-24 00:30:22.884938: step: 556/463, loss: 1.1214693784713745 2023-01-24 00:30:23.519901: step: 558/463, loss: 3.144329309463501 2023-01-24 00:30:24.167220: step: 560/463, loss: 0.5036450028419495 2023-01-24 00:30:24.775115: step: 562/463, loss: 1.2057098150253296 2023-01-24 00:30:25.349450: step: 564/463, loss: 0.4738738536834717 2023-01-24 00:30:26.029848: step: 566/463, loss: 0.49166056513786316 2023-01-24 00:30:26.721678: step: 568/463, loss: 0.8041418790817261 2023-01-24 00:30:27.359509: step: 570/463, loss: 0.7282711267471313 2023-01-24 00:30:27.992528: step: 572/463, loss: 0.44081413745880127 2023-01-24 00:30:28.674558: step: 574/463, loss: 0.08008335530757904 2023-01-24 00:30:29.291597: step: 576/463, loss: 0.5423160195350647 2023-01-24 00:30:29.895480: step: 578/463, loss: 6.515285015106201 2023-01-24 00:30:30.613500: step: 580/463, loss: 0.3133462071418762 2023-01-24 00:30:31.191456: step: 582/463, loss: 0.6519750356674194 2023-01-24 00:30:31.816298: step: 584/463, loss: 3.7580223083496094 2023-01-24 00:30:32.428848: step: 586/463, loss: 6.031226634979248 2023-01-24 00:30:32.986808: step: 588/463, loss: 0.6599688529968262 2023-01-24 00:30:33.583162: step: 590/463, loss: 1.0934603214263916 2023-01-24 00:30:34.182024: step: 592/463, loss: 1.2284626960754395 2023-01-24 00:30:34.812831: step: 594/463, loss: 0.18504633009433746 2023-01-24 00:30:35.404833: step: 596/463, loss: 0.6401344537734985 2023-01-24 00:30:36.036347: step: 598/463, loss: 0.230966717004776 2023-01-24 00:30:36.635049: step: 600/463, loss: 1.2084563970565796 2023-01-24 00:30:37.319326: step: 602/463, loss: 5.660030364990234 2023-01-24 00:30:37.932139: step: 604/463, loss: 0.5374194979667664 2023-01-24 00:30:38.585105: step: 606/463, loss: 1.4481379985809326 2023-01-24 00:30:39.237112: step: 608/463, loss: 1.5611393451690674 2023-01-24 00:30:39.852523: step: 610/463, loss: 2.258495807647705 2023-01-24 00:30:40.535137: step: 612/463, loss: 0.323032945394516 2023-01-24 00:30:41.057847: step: 614/463, loss: 0.61004638671875 2023-01-24 00:30:41.682563: step: 616/463, loss: 0.593949556350708 2023-01-24 00:30:42.216602: step: 618/463, loss: 1.1773996353149414 2023-01-24 00:30:42.800538: step: 620/463, loss: 1.2285062074661255 2023-01-24 00:30:43.351812: step: 622/463, loss: 1.7627588510513306 2023-01-24 00:30:44.016426: step: 624/463, loss: 1.952219843864441 2023-01-24 00:30:44.688705: step: 626/463, loss: 0.28251194953918457 2023-01-24 00:30:45.239638: step: 628/463, loss: 0.5243813395500183 2023-01-24 00:30:45.872028: step: 630/463, loss: 0.8729084730148315 2023-01-24 00:30:46.439149: step: 632/463, loss: 0.9973430633544922 2023-01-24 00:30:47.059259: step: 634/463, loss: 0.21113714575767517 2023-01-24 00:30:47.628370: step: 636/463, loss: 0.7165486812591553 2023-01-24 00:30:48.308197: step: 638/463, loss: 1.3764750957489014 2023-01-24 00:30:48.926998: step: 640/463, loss: 0.9395245313644409 2023-01-24 00:30:49.604498: step: 642/463, loss: 1.3470277786254883 2023-01-24 00:30:50.328033: step: 644/463, loss: 0.5813266038894653 2023-01-24 00:30:50.933455: step: 646/463, loss: 2.697793483734131 2023-01-24 00:30:51.539500: step: 648/463, loss: 1.7574219703674316 2023-01-24 00:30:52.116963: step: 650/463, loss: 2.361961841583252 2023-01-24 00:30:52.740754: step: 652/463, loss: 0.8672612905502319 2023-01-24 00:30:53.347658: step: 654/463, loss: 1.0100539922714233 2023-01-24 00:30:53.989118: step: 656/463, loss: 1.2172175645828247 2023-01-24 00:30:54.638853: step: 658/463, loss: 0.4758746027946472 2023-01-24 00:30:55.274845: step: 660/463, loss: 1.8471059799194336 2023-01-24 00:30:55.916232: step: 662/463, loss: 0.6793623566627502 2023-01-24 00:30:56.490994: step: 664/463, loss: 0.8186171650886536 2023-01-24 00:30:57.085800: step: 666/463, loss: 0.8648422360420227 2023-01-24 00:30:57.748206: step: 668/463, loss: 2.600332498550415 2023-01-24 00:30:58.352429: step: 670/463, loss: 0.4720839858055115 2023-01-24 00:30:58.931449: step: 672/463, loss: 0.2750377058982849 2023-01-24 00:30:59.460900: step: 674/463, loss: 0.5651376247406006 2023-01-24 00:31:00.099702: step: 676/463, loss: 1.260711908340454 2023-01-24 00:31:00.714168: step: 678/463, loss: 1.6037933826446533 2023-01-24 00:31:01.404525: step: 680/463, loss: 0.13473549485206604 2023-01-24 00:31:02.100134: step: 682/463, loss: 0.758983314037323 2023-01-24 00:31:02.818992: step: 684/463, loss: 0.4858092963695526 2023-01-24 00:31:03.458501: step: 686/463, loss: 2.1029160022735596 2023-01-24 00:31:04.093191: step: 688/463, loss: 0.9216176867485046 2023-01-24 00:31:04.702825: step: 690/463, loss: 0.5142749547958374 2023-01-24 00:31:05.325347: step: 692/463, loss: 0.31698527932167053 2023-01-24 00:31:05.899818: step: 694/463, loss: 0.31004756689071655 2023-01-24 00:31:06.571863: step: 696/463, loss: 0.9755090475082397 2023-01-24 00:31:07.156717: step: 698/463, loss: 0.3814958930015564 2023-01-24 00:31:07.683579: step: 700/463, loss: 0.4584546685218811 2023-01-24 00:31:08.325208: step: 702/463, loss: 1.25494384765625 2023-01-24 00:31:09.000817: step: 704/463, loss: 0.5057708024978638 2023-01-24 00:31:09.589829: step: 706/463, loss: 2.5170087814331055 2023-01-24 00:31:10.201382: step: 708/463, loss: 2.6502881050109863 2023-01-24 00:31:10.794714: step: 710/463, loss: 0.9171215295791626 2023-01-24 00:31:11.393870: step: 712/463, loss: 0.4922195374965668 2023-01-24 00:31:11.982174: step: 714/463, loss: 1.2491676807403564 2023-01-24 00:31:12.584536: step: 716/463, loss: 1.4253944158554077 2023-01-24 00:31:13.199924: step: 718/463, loss: 0.787155270576477 2023-01-24 00:31:13.816253: step: 720/463, loss: 1.5657223463058472 2023-01-24 00:31:14.379134: step: 722/463, loss: 1.7306480407714844 2023-01-24 00:31:15.057478: step: 724/463, loss: 0.4160867929458618 2023-01-24 00:31:15.714252: step: 726/463, loss: 0.6272135972976685 2023-01-24 00:31:16.417818: step: 728/463, loss: 0.5074421763420105 2023-01-24 00:31:17.015266: step: 730/463, loss: 1.9525392055511475 2023-01-24 00:31:17.667768: step: 732/463, loss: 1.1704788208007812 2023-01-24 00:31:18.326165: step: 734/463, loss: 1.7162446975708008 2023-01-24 00:31:18.970153: step: 736/463, loss: 0.9051508903503418 2023-01-24 00:31:19.629927: step: 738/463, loss: 0.7166047096252441 2023-01-24 00:31:20.355698: step: 740/463, loss: 0.504905641078949 2023-01-24 00:31:21.043709: step: 742/463, loss: 1.1095871925354004 2023-01-24 00:31:21.721314: step: 744/463, loss: 0.45236122608184814 2023-01-24 00:31:22.297711: step: 746/463, loss: 0.78248530626297 2023-01-24 00:31:22.992882: step: 748/463, loss: 1.0007086992263794 2023-01-24 00:31:23.577230: step: 750/463, loss: 0.3215717673301697 2023-01-24 00:31:24.188286: step: 752/463, loss: 1.448829174041748 2023-01-24 00:31:24.791043: step: 754/463, loss: 0.6961119771003723 2023-01-24 00:31:25.389122: step: 756/463, loss: 0.5924288034439087 2023-01-24 00:31:26.067434: step: 758/463, loss: 0.5901463031768799 2023-01-24 00:31:26.691355: step: 760/463, loss: 1.212836503982544 2023-01-24 00:31:27.300267: step: 762/463, loss: 1.0207247734069824 2023-01-24 00:31:27.948793: step: 764/463, loss: 0.5022910833358765 2023-01-24 00:31:28.547467: step: 766/463, loss: 0.4612976312637329 2023-01-24 00:31:29.147623: step: 768/463, loss: 0.495321661233902 2023-01-24 00:31:29.802259: step: 770/463, loss: 0.32203686237335205 2023-01-24 00:31:30.423433: step: 772/463, loss: 0.21196919679641724 2023-01-24 00:31:31.095677: step: 774/463, loss: 0.3725093901157379 2023-01-24 00:31:31.719312: step: 776/463, loss: 0.6345022916793823 2023-01-24 00:31:32.311451: step: 778/463, loss: 1.3286399841308594 2023-01-24 00:31:32.935992: step: 780/463, loss: 0.415843665599823 2023-01-24 00:31:33.515432: step: 782/463, loss: 0.5772085785865784 2023-01-24 00:31:34.159247: step: 784/463, loss: 0.5375340580940247 2023-01-24 00:31:34.774663: step: 786/463, loss: 0.2410525381565094 2023-01-24 00:31:35.373637: step: 788/463, loss: 3.295276641845703 2023-01-24 00:31:35.970944: step: 790/463, loss: 0.18998917937278748 2023-01-24 00:31:36.641898: step: 792/463, loss: 0.8517022132873535 2023-01-24 00:31:37.190177: step: 794/463, loss: 1.0473159551620483 2023-01-24 00:31:37.802052: step: 796/463, loss: 2.286813259124756 2023-01-24 00:31:38.434347: step: 798/463, loss: 0.2646630108356476 2023-01-24 00:31:39.058010: step: 800/463, loss: 0.5460038185119629 2023-01-24 00:31:39.663213: step: 802/463, loss: 1.433488368988037 2023-01-24 00:31:40.281434: step: 804/463, loss: 0.8649519085884094 2023-01-24 00:31:40.853832: step: 806/463, loss: 0.08898138999938965 2023-01-24 00:31:41.420893: step: 808/463, loss: 1.1761524677276611 2023-01-24 00:31:42.054361: step: 810/463, loss: 1.080528736114502 2023-01-24 00:31:42.665920: step: 812/463, loss: 1.019982933998108 2023-01-24 00:31:43.233638: step: 814/463, loss: 1.567225694656372 2023-01-24 00:31:43.828626: step: 816/463, loss: 0.7515830993652344 2023-01-24 00:31:44.502952: step: 818/463, loss: 0.5104256272315979 2023-01-24 00:31:45.071236: step: 820/463, loss: 1.502622365951538 2023-01-24 00:31:45.674862: step: 822/463, loss: 0.8216833472251892 2023-01-24 00:31:46.301596: step: 824/463, loss: 0.6181341409683228 2023-01-24 00:31:46.973744: step: 826/463, loss: 1.1912914514541626 2023-01-24 00:31:47.590154: step: 828/463, loss: 0.3649296462535858 2023-01-24 00:31:48.191736: step: 830/463, loss: 0.42700421810150146 2023-01-24 00:31:48.775265: step: 832/463, loss: 0.4368527829647064 2023-01-24 00:31:49.348846: step: 834/463, loss: 2.055616617202759 2023-01-24 00:31:49.976249: step: 836/463, loss: 0.5650148987770081 2023-01-24 00:31:50.556109: step: 838/463, loss: 0.3242678940296173 2023-01-24 00:31:51.144329: step: 840/463, loss: 0.1505160629749298 2023-01-24 00:31:51.702984: step: 842/463, loss: 2.7335596084594727 2023-01-24 00:31:52.343988: step: 844/463, loss: 0.9642425775527954 2023-01-24 00:31:53.020153: step: 846/463, loss: 0.5027278661727905 2023-01-24 00:31:53.671849: step: 848/463, loss: 4.763665199279785 2023-01-24 00:31:54.235683: step: 850/463, loss: 0.8516854047775269 2023-01-24 00:31:54.829890: step: 852/463, loss: 1.2815616130828857 2023-01-24 00:31:55.444436: step: 854/463, loss: 0.6073788404464722 2023-01-24 00:31:56.047350: step: 856/463, loss: 0.8202017545700073 2023-01-24 00:31:56.603459: step: 858/463, loss: 0.46286290884017944 2023-01-24 00:31:57.240486: step: 860/463, loss: 0.9589809775352478 2023-01-24 00:31:57.893024: step: 862/463, loss: 0.636374294757843 2023-01-24 00:31:58.433936: step: 864/463, loss: 0.35780298709869385 2023-01-24 00:31:59.044010: step: 866/463, loss: 0.8220357298851013 2023-01-24 00:31:59.757283: step: 868/463, loss: 0.5103381872177124 2023-01-24 00:32:00.353767: step: 870/463, loss: 0.11688251793384552 2023-01-24 00:32:01.004206: step: 872/463, loss: 3.6261203289031982 2023-01-24 00:32:01.632741: step: 874/463, loss: 1.783101201057434 2023-01-24 00:32:02.228841: step: 876/463, loss: 0.3184046149253845 2023-01-24 00:32:02.814047: step: 878/463, loss: 0.8564475178718567 2023-01-24 00:32:03.414212: step: 880/463, loss: 1.0247830152511597 2023-01-24 00:32:04.046389: step: 882/463, loss: 0.13300535082817078 2023-01-24 00:32:04.623947: step: 884/463, loss: 1.0001450777053833 2023-01-24 00:32:05.228488: step: 886/463, loss: 1.7600239515304565 2023-01-24 00:32:05.800226: step: 888/463, loss: 2.075435161590576 2023-01-24 00:32:06.413376: step: 890/463, loss: 1.5437798500061035 2023-01-24 00:32:06.963462: step: 892/463, loss: 0.7232778072357178 2023-01-24 00:32:07.634958: step: 894/463, loss: 0.30222436785697937 2023-01-24 00:32:08.390778: step: 896/463, loss: 1.6298611164093018 2023-01-24 00:32:08.998320: step: 898/463, loss: 0.6090134382247925 2023-01-24 00:32:09.639390: step: 900/463, loss: 0.9103258848190308 2023-01-24 00:32:10.242784: step: 902/463, loss: 0.9974690675735474 2023-01-24 00:32:10.847323: step: 904/463, loss: 2.5768048763275146 2023-01-24 00:32:11.572769: step: 906/463, loss: 0.4563380479812622 2023-01-24 00:32:12.238373: step: 908/463, loss: 1.9918770790100098 2023-01-24 00:32:12.928210: step: 910/463, loss: 0.5253063440322876 2023-01-24 00:32:13.495262: step: 912/463, loss: 0.7533784508705139 2023-01-24 00:32:14.120600: step: 914/463, loss: 0.44251173734664917 2023-01-24 00:32:14.710845: step: 916/463, loss: 0.7267775535583496 2023-01-24 00:32:15.335777: step: 918/463, loss: 7.284496307373047 2023-01-24 00:32:15.935142: step: 920/463, loss: 0.13394027948379517 2023-01-24 00:32:16.548086: step: 922/463, loss: 0.5903412103652954 2023-01-24 00:32:17.104814: step: 924/463, loss: 0.48630595207214355 2023-01-24 00:32:17.731294: step: 926/463, loss: 0.1694043129682541 ================================================== Loss: 1.065 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3760126974800888, 'r': 0.23759436102631795, 'f1': 0.29119122851365015}, 'combined': 0.21456195785216325, 'epoch': 4} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3645177282762899, 'r': 0.29295186235782567, 'f1': 0.32483980668365203}, 'combined': 0.2517897066160365, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34545757850241543, 'r': 0.23598620163352857, 'f1': 0.2804165236998186}, 'combined': 0.20662270167355054, 'epoch': 4} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34660094684831527, 'r': 0.2941338310410015, 'f1': 0.31821923159770876}, 'combined': 0.2466579689896116, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34485636645962736, 'r': 0.23034049524438108, 'f1': 0.276198955617267}, 'combined': 0.2035150199285125, 'epoch': 4} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3443968899677904, 'r': 0.28310056276251394, 'f1': 0.3107548976950052}, 'combined': 0.2408722173521093, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45535714285714285, 'r': 0.18214285714285713, 'f1': 0.2602040816326531}, 'combined': 0.17346938775510204, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26666666666666666, 'r': 0.17391304347826086, 'f1': 0.2105263157894737}, 'combined': 0.10526315789473685, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.06896551724137931, 'f1': 0.1176470588235294}, 'combined': 0.07843137254901959, 'epoch': 4} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3545336737828291, 'r': 0.2571712065508021, 'f1': 0.2981040550138826}, 'combined': 0.2196556194839135, 'epoch': 3} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34385369627128226, 'r': 0.31703941720425566, 'f1': 0.32990259164929703}, 'combined': 0.25571397056069917, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35, 'r': 0.25, 'f1': 0.2916666666666667}, 'combined': 0.19444444444444445, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31420591787439617, 'r': 0.24636600378787882, 'f1': 0.27618099787685774}, 'combined': 0.20350178790926357, 'epoch': 3} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.326948061144331, 'r': 0.31854939535346743, 'f1': 0.32269409008854977}, 'combined': 0.2501265195901678, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.359375, 'r': 0.25, 'f1': 0.2948717948717949}, 'combined': 0.14743589743589744, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3033269710323153, 'r': 0.2355379888697903, 'f1': 0.26516856742697076}, 'combined': 0.19538736547250476, 'epoch': 2} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3496730766041415, 'r': 0.28738053586841744, 'f1': 0.315481253589422}, 'combined': 0.2445357085238582, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.7916666666666666, 'r': 0.16379310344827586, 'f1': 0.2714285714285714}, 'combined': 0.18095238095238092, 'epoch': 2} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:34:49.115198: step: 2/463, loss: 0.6280529499053955 2023-01-24 00:34:49.735326: step: 4/463, loss: 1.1968387365341187 2023-01-24 00:34:50.367886: step: 6/463, loss: 0.1501970887184143 2023-01-24 00:34:50.967268: step: 8/463, loss: 1.8222169876098633 2023-01-24 00:34:51.561967: step: 10/463, loss: 0.389104962348938 2023-01-24 00:34:52.233392: step: 12/463, loss: 1.9284547567367554 2023-01-24 00:34:52.878196: step: 14/463, loss: 0.21964147686958313 2023-01-24 00:34:53.469519: step: 16/463, loss: 1.3979219198226929 2023-01-24 00:34:54.128910: step: 18/463, loss: 0.25654852390289307 2023-01-24 00:34:54.692316: step: 20/463, loss: 0.2708037793636322 2023-01-24 00:34:55.294043: step: 22/463, loss: 0.8239782452583313 2023-01-24 00:34:55.868880: step: 24/463, loss: 0.5828837156295776 2023-01-24 00:34:56.536977: step: 26/463, loss: 1.0815584659576416 2023-01-24 00:34:57.152773: step: 28/463, loss: 0.33342719078063965 2023-01-24 00:34:57.785086: step: 30/463, loss: 0.7502411603927612 2023-01-24 00:34:58.345727: step: 32/463, loss: 0.3727218210697174 2023-01-24 00:34:59.011168: step: 34/463, loss: 0.5319357514381409 2023-01-24 00:34:59.606564: step: 36/463, loss: 0.24648316204547882 2023-01-24 00:35:00.214805: step: 38/463, loss: 0.7171517610549927 2023-01-24 00:35:00.827709: step: 40/463, loss: 0.7860630750656128 2023-01-24 00:35:01.416078: step: 42/463, loss: 0.19975592195987701 2023-01-24 00:35:02.095958: step: 44/463, loss: 0.23239928483963013 2023-01-24 00:35:02.704886: step: 46/463, loss: 0.30570685863494873 2023-01-24 00:35:03.386736: step: 48/463, loss: 1.0062568187713623 2023-01-24 00:35:03.974292: step: 50/463, loss: 0.4155263304710388 2023-01-24 00:35:04.691848: step: 52/463, loss: 0.9353864789009094 2023-01-24 00:35:05.264392: step: 54/463, loss: 0.41787198185920715 2023-01-24 00:35:05.860843: step: 56/463, loss: 0.31391215324401855 2023-01-24 00:35:06.547766: step: 58/463, loss: 0.7312435507774353 2023-01-24 00:35:07.136363: step: 60/463, loss: 0.7266860008239746 2023-01-24 00:35:07.765893: step: 62/463, loss: 0.48195719718933105 2023-01-24 00:35:08.401501: step: 64/463, loss: 1.275835394859314 2023-01-24 00:35:08.960323: step: 66/463, loss: 1.4393664598464966 2023-01-24 00:35:09.592068: step: 68/463, loss: 0.920201301574707 2023-01-24 00:35:10.218179: step: 70/463, loss: 0.6970359683036804 2023-01-24 00:35:10.810495: step: 72/463, loss: 0.7751462459564209 2023-01-24 00:35:11.517659: step: 74/463, loss: 0.593722403049469 2023-01-24 00:35:12.161142: step: 76/463, loss: 0.45327481627464294 2023-01-24 00:35:12.837576: step: 78/463, loss: 0.6711190342903137 2023-01-24 00:35:13.455124: step: 80/463, loss: 0.9234674572944641 2023-01-24 00:35:14.044488: step: 82/463, loss: 0.8867120146751404 2023-01-24 00:35:14.679903: step: 84/463, loss: 0.5921074151992798 2023-01-24 00:35:15.285809: step: 86/463, loss: 0.26304155588150024 2023-01-24 00:35:15.896683: step: 88/463, loss: 0.3195022940635681 2023-01-24 00:35:16.473940: step: 90/463, loss: 0.22182859480381012 2023-01-24 00:35:17.147030: step: 92/463, loss: 1.2784497737884521 2023-01-24 00:35:17.800100: step: 94/463, loss: 0.7152591347694397 2023-01-24 00:35:18.454128: step: 96/463, loss: 0.9055781364440918 2023-01-24 00:35:19.021263: step: 98/463, loss: 2.5438709259033203 2023-01-24 00:35:19.588646: step: 100/463, loss: 0.17646795511245728 2023-01-24 00:35:20.211524: step: 102/463, loss: 1.8708586692810059 2023-01-24 00:35:20.854791: step: 104/463, loss: 0.5604712963104248 2023-01-24 00:35:21.452321: step: 106/463, loss: 0.6748901009559631 2023-01-24 00:35:22.059454: step: 108/463, loss: 0.22819286584854126 2023-01-24 00:35:22.625004: step: 110/463, loss: 0.9221715331077576 2023-01-24 00:35:23.207705: step: 112/463, loss: 0.2510334551334381 2023-01-24 00:35:23.827072: step: 114/463, loss: 0.5729089379310608 2023-01-24 00:35:24.471704: step: 116/463, loss: 0.4011878967285156 2023-01-24 00:35:25.043355: step: 118/463, loss: 0.664446234703064 2023-01-24 00:35:25.666727: step: 120/463, loss: 0.42498496174812317 2023-01-24 00:35:26.291935: step: 122/463, loss: 0.3810936510562897 2023-01-24 00:35:26.841741: step: 124/463, loss: 0.16681428253650665 2023-01-24 00:35:27.430415: step: 126/463, loss: 0.29393795132637024 2023-01-24 00:35:28.033266: step: 128/463, loss: 3.136566638946533 2023-01-24 00:35:28.604983: step: 130/463, loss: 1.044725775718689 2023-01-24 00:35:29.215691: step: 132/463, loss: 0.5069814324378967 2023-01-24 00:35:29.884855: step: 134/463, loss: 1.2128422260284424 2023-01-24 00:35:30.458151: step: 136/463, loss: 0.7545170783996582 2023-01-24 00:35:31.021118: step: 138/463, loss: 0.6737390756607056 2023-01-24 00:35:31.592535: step: 140/463, loss: 0.6108490824699402 2023-01-24 00:35:32.220374: step: 142/463, loss: 1.2781355381011963 2023-01-24 00:35:32.819957: step: 144/463, loss: 0.5160717368125916 2023-01-24 00:35:33.486942: step: 146/463, loss: 0.4122994542121887 2023-01-24 00:35:34.121689: step: 148/463, loss: 1.1514430046081543 2023-01-24 00:35:34.724644: step: 150/463, loss: 0.3080209195613861 2023-01-24 00:35:35.328946: step: 152/463, loss: 0.19173398613929749 2023-01-24 00:35:35.936811: step: 154/463, loss: 1.4000709056854248 2023-01-24 00:35:36.571185: step: 156/463, loss: 0.42382609844207764 2023-01-24 00:35:37.182983: step: 158/463, loss: 0.659846305847168 2023-01-24 00:35:37.767604: step: 160/463, loss: 0.9543033838272095 2023-01-24 00:35:38.384889: step: 162/463, loss: 0.22856488823890686 2023-01-24 00:35:39.038213: step: 164/463, loss: 0.3018302619457245 2023-01-24 00:35:39.723971: step: 166/463, loss: 1.1354354619979858 2023-01-24 00:35:40.316603: step: 168/463, loss: 0.6017375588417053 2023-01-24 00:35:40.921263: step: 170/463, loss: 0.6742135286331177 2023-01-24 00:35:41.506049: step: 172/463, loss: 0.4315274953842163 2023-01-24 00:35:42.124516: step: 174/463, loss: 0.9956346750259399 2023-01-24 00:35:42.763297: step: 176/463, loss: 0.8927717208862305 2023-01-24 00:35:43.351163: step: 178/463, loss: 0.3627506494522095 2023-01-24 00:35:43.968427: step: 180/463, loss: 2.179762840270996 2023-01-24 00:35:44.597926: step: 182/463, loss: 0.3978305160999298 2023-01-24 00:35:45.272411: step: 184/463, loss: 0.6732479929924011 2023-01-24 00:35:45.876184: step: 186/463, loss: 1.0474364757537842 2023-01-24 00:35:46.482545: step: 188/463, loss: 0.08027077466249466 2023-01-24 00:35:47.124878: step: 190/463, loss: 1.8433949947357178 2023-01-24 00:35:47.745046: step: 192/463, loss: 0.5679114460945129 2023-01-24 00:35:48.353121: step: 194/463, loss: 0.1965644657611847 2023-01-24 00:35:48.950724: step: 196/463, loss: 0.3184294104576111 2023-01-24 00:35:49.551442: step: 198/463, loss: 11.678325653076172 2023-01-24 00:35:50.150284: step: 200/463, loss: 0.7518364191055298 2023-01-24 00:35:50.732376: step: 202/463, loss: 1.3319413661956787 2023-01-24 00:35:51.319640: step: 204/463, loss: 0.9582511186599731 2023-01-24 00:35:51.913076: step: 206/463, loss: 2.8840670585632324 2023-01-24 00:35:52.508931: step: 208/463, loss: 0.5567591190338135 2023-01-24 00:35:53.091805: step: 210/463, loss: 1.0928796529769897 2023-01-24 00:35:53.615674: step: 212/463, loss: 0.2947782874107361 2023-01-24 00:35:54.293809: step: 214/463, loss: 0.5035770535469055 2023-01-24 00:35:54.904631: step: 216/463, loss: 0.8390800952911377 2023-01-24 00:35:55.550788: step: 218/463, loss: 1.0294955968856812 2023-01-24 00:35:56.127862: step: 220/463, loss: 1.071850299835205 2023-01-24 00:35:56.778068: step: 222/463, loss: 0.6560050845146179 2023-01-24 00:35:57.335399: step: 224/463, loss: 0.23306003212928772 2023-01-24 00:35:57.912789: step: 226/463, loss: 1.802243709564209 2023-01-24 00:35:58.603548: step: 228/463, loss: 0.8357193470001221 2023-01-24 00:35:59.168678: step: 230/463, loss: 0.8067852258682251 2023-01-24 00:35:59.749474: step: 232/463, loss: 0.2559684216976166 2023-01-24 00:36:00.358631: step: 234/463, loss: 0.8133226633071899 2023-01-24 00:36:00.991476: step: 236/463, loss: 1.7267839908599854 2023-01-24 00:36:01.644783: step: 238/463, loss: 0.9141260385513306 2023-01-24 00:36:02.241515: step: 240/463, loss: 0.602196216583252 2023-01-24 00:36:02.823973: step: 242/463, loss: 2.07576584815979 2023-01-24 00:36:03.436653: step: 244/463, loss: 0.26883256435394287 2023-01-24 00:36:04.056951: step: 246/463, loss: 0.2601020932197571 2023-01-24 00:36:04.626668: step: 248/463, loss: 0.5728490352630615 2023-01-24 00:36:05.209993: step: 250/463, loss: 1.3841387033462524 2023-01-24 00:36:05.884683: step: 252/463, loss: 1.2997407913208008 2023-01-24 00:36:06.510512: step: 254/463, loss: 0.1857355237007141 2023-01-24 00:36:07.169106: step: 256/463, loss: 0.2503657341003418 2023-01-24 00:36:07.779759: step: 258/463, loss: 0.266375869512558 2023-01-24 00:36:08.432322: step: 260/463, loss: 0.636852502822876 2023-01-24 00:36:09.001725: step: 262/463, loss: 1.0548986196517944 2023-01-24 00:36:09.591299: step: 264/463, loss: 0.3508453369140625 2023-01-24 00:36:10.191828: step: 266/463, loss: 1.0740405321121216 2023-01-24 00:36:10.829348: step: 268/463, loss: 0.7337073087692261 2023-01-24 00:36:11.496435: step: 270/463, loss: 1.2571613788604736 2023-01-24 00:36:12.029039: step: 272/463, loss: 0.3398612141609192 2023-01-24 00:36:12.735338: step: 274/463, loss: 1.3762160539627075 2023-01-24 00:36:13.300162: step: 276/463, loss: 0.37680551409721375 2023-01-24 00:36:13.858526: step: 278/463, loss: 1.4244345426559448 2023-01-24 00:36:14.501909: step: 280/463, loss: 0.2548726201057434 2023-01-24 00:36:15.111799: step: 282/463, loss: 0.25318050384521484 2023-01-24 00:36:15.769689: step: 284/463, loss: 2.1174185276031494 2023-01-24 00:36:16.391268: step: 286/463, loss: 0.25583475828170776 2023-01-24 00:36:17.001659: step: 288/463, loss: 0.8417084813117981 2023-01-24 00:36:17.618961: step: 290/463, loss: 0.22802284359931946 2023-01-24 00:36:18.265522: step: 292/463, loss: 1.4744998216629028 2023-01-24 00:36:18.956244: step: 294/463, loss: 3.5566883087158203 2023-01-24 00:36:19.588145: step: 296/463, loss: 0.6578130722045898 2023-01-24 00:36:20.393393: step: 298/463, loss: 0.8024518489837646 2023-01-24 00:36:20.957174: step: 300/463, loss: 2.375673294067383 2023-01-24 00:36:21.603351: step: 302/463, loss: 3.3944952487945557 2023-01-24 00:36:22.231450: step: 304/463, loss: 0.6203200817108154 2023-01-24 00:36:22.889388: step: 306/463, loss: 1.3409645557403564 2023-01-24 00:36:23.447326: step: 308/463, loss: 0.6700774431228638 2023-01-24 00:36:24.076956: step: 310/463, loss: 0.9447431564331055 2023-01-24 00:36:24.738234: step: 312/463, loss: 0.7548693418502808 2023-01-24 00:36:25.292849: step: 314/463, loss: 1.5705002546310425 2023-01-24 00:36:25.889444: step: 316/463, loss: 0.3633936941623688 2023-01-24 00:36:26.490109: step: 318/463, loss: 0.24533593654632568 2023-01-24 00:36:27.147737: step: 320/463, loss: 0.4184809923171997 2023-01-24 00:36:27.800529: step: 322/463, loss: 0.7727669477462769 2023-01-24 00:36:28.433486: step: 324/463, loss: 0.9991454482078552 2023-01-24 00:36:29.098762: step: 326/463, loss: 0.1757662296295166 2023-01-24 00:36:29.649126: step: 328/463, loss: 0.6562673449516296 2023-01-24 00:36:30.230219: step: 330/463, loss: 0.6632398366928101 2023-01-24 00:36:30.831384: step: 332/463, loss: 1.1940343379974365 2023-01-24 00:36:31.394615: step: 334/463, loss: 1.6092807054519653 2023-01-24 00:36:31.988290: step: 336/463, loss: 0.438454270362854 2023-01-24 00:36:32.589716: step: 338/463, loss: 0.5626126527786255 2023-01-24 00:36:33.220186: step: 340/463, loss: 2.9122085571289062 2023-01-24 00:36:33.881532: step: 342/463, loss: 1.530444622039795 2023-01-24 00:36:34.534919: step: 344/463, loss: 0.8764356970787048 2023-01-24 00:36:35.159882: step: 346/463, loss: 0.8195348978042603 2023-01-24 00:36:35.777820: step: 348/463, loss: 0.29962530732154846 2023-01-24 00:36:36.429198: step: 350/463, loss: 1.9524885416030884 2023-01-24 00:36:37.049317: step: 352/463, loss: 0.6456241607666016 2023-01-24 00:36:37.688034: step: 354/463, loss: 0.656253457069397 2023-01-24 00:36:38.345206: step: 356/463, loss: 0.5095264911651611 2023-01-24 00:36:38.936249: step: 358/463, loss: 3.0658915042877197 2023-01-24 00:36:39.523677: step: 360/463, loss: 1.7187938690185547 2023-01-24 00:36:40.107307: step: 362/463, loss: 0.7578294277191162 2023-01-24 00:36:40.852171: step: 364/463, loss: 0.29678162932395935 2023-01-24 00:36:41.405372: step: 366/463, loss: 0.9792714715003967 2023-01-24 00:36:41.951111: step: 368/463, loss: 0.3181105852127075 2023-01-24 00:36:42.492229: step: 370/463, loss: 1.940513253211975 2023-01-24 00:36:43.106374: step: 372/463, loss: 0.2905747592449188 2023-01-24 00:36:43.689249: step: 374/463, loss: 0.9227666854858398 2023-01-24 00:36:44.271876: step: 376/463, loss: 0.8191882967948914 2023-01-24 00:36:44.930073: step: 378/463, loss: 0.6728701591491699 2023-01-24 00:36:45.491536: step: 380/463, loss: 0.8327441811561584 2023-01-24 00:36:46.174089: step: 382/463, loss: 0.6571238040924072 2023-01-24 00:36:46.781261: step: 384/463, loss: 1.015677571296692 2023-01-24 00:36:47.405631: step: 386/463, loss: 0.5491654872894287 2023-01-24 00:36:48.047344: step: 388/463, loss: 0.6769430637359619 2023-01-24 00:36:48.645029: step: 390/463, loss: 0.9880294799804688 2023-01-24 00:36:49.253026: step: 392/463, loss: 2.2204930782318115 2023-01-24 00:36:49.950273: step: 394/463, loss: 0.524406909942627 2023-01-24 00:36:50.531329: step: 396/463, loss: 0.38486480712890625 2023-01-24 00:36:51.074333: step: 398/463, loss: 0.9057947397232056 2023-01-24 00:36:51.701438: step: 400/463, loss: 0.5521529912948608 2023-01-24 00:36:52.354834: step: 402/463, loss: 0.40139076113700867 2023-01-24 00:36:52.958362: step: 404/463, loss: 2.3127365112304688 2023-01-24 00:36:53.567706: step: 406/463, loss: 0.1813669502735138 2023-01-24 00:36:54.200154: step: 408/463, loss: 0.41743168234825134 2023-01-24 00:36:54.844982: step: 410/463, loss: 1.1886844635009766 2023-01-24 00:36:55.417056: step: 412/463, loss: 0.5159401893615723 2023-01-24 00:36:56.015013: step: 414/463, loss: 0.2798379361629486 2023-01-24 00:36:56.595660: step: 416/463, loss: 1.701764702796936 2023-01-24 00:36:57.156682: step: 418/463, loss: 0.22832989692687988 2023-01-24 00:36:57.858841: step: 420/463, loss: 1.0826969146728516 2023-01-24 00:36:58.434508: step: 422/463, loss: 1.515113115310669 2023-01-24 00:36:59.098394: step: 424/463, loss: 0.5910017490386963 2023-01-24 00:36:59.710479: step: 426/463, loss: 0.570904016494751 2023-01-24 00:37:00.378523: step: 428/463, loss: 3.435241937637329 2023-01-24 00:37:00.968859: step: 430/463, loss: 0.38170838356018066 2023-01-24 00:37:01.693496: step: 432/463, loss: 0.8956758975982666 2023-01-24 00:37:02.370067: step: 434/463, loss: 1.1432406902313232 2023-01-24 00:37:02.968485: step: 436/463, loss: 0.6462689638137817 2023-01-24 00:37:03.647558: step: 438/463, loss: 0.33242911100387573 2023-01-24 00:37:04.271990: step: 440/463, loss: 0.4339667558670044 2023-01-24 00:37:04.927856: step: 442/463, loss: 0.5501629710197449 2023-01-24 00:37:05.540838: step: 444/463, loss: 0.9211000800132751 2023-01-24 00:37:06.283804: step: 446/463, loss: 0.8362309336662292 2023-01-24 00:37:06.902949: step: 448/463, loss: 0.2754156291484833 2023-01-24 00:37:07.578635: step: 450/463, loss: 1.3154528141021729 2023-01-24 00:37:08.238634: step: 452/463, loss: 1.008116364479065 2023-01-24 00:37:08.868815: step: 454/463, loss: 1.3285428285598755 2023-01-24 00:37:09.477300: step: 456/463, loss: 1.4497106075286865 2023-01-24 00:37:10.127636: step: 458/463, loss: 0.48949024081230164 2023-01-24 00:37:10.712064: step: 460/463, loss: 0.4763083755970001 2023-01-24 00:37:11.287580: step: 462/463, loss: 1.9459871053695679 2023-01-24 00:37:11.958992: step: 464/463, loss: 0.3341725170612335 2023-01-24 00:37:12.583289: step: 466/463, loss: 0.22736842930316925 2023-01-24 00:37:13.200063: step: 468/463, loss: 0.57209312915802 2023-01-24 00:37:13.831045: step: 470/463, loss: 0.2902805209159851 2023-01-24 00:37:14.486866: step: 472/463, loss: 0.5204944610595703 2023-01-24 00:37:15.117178: step: 474/463, loss: 1.1368167400360107 2023-01-24 00:37:15.764900: step: 476/463, loss: 0.8238862752914429 2023-01-24 00:37:16.399005: step: 478/463, loss: 2.135991096496582 2023-01-24 00:37:16.943124: step: 480/463, loss: 1.2142736911773682 2023-01-24 00:37:17.559547: step: 482/463, loss: 0.9453557729721069 2023-01-24 00:37:18.196781: step: 484/463, loss: 1.0474512577056885 2023-01-24 00:37:18.841388: step: 486/463, loss: 0.2816633880138397 2023-01-24 00:37:19.520697: step: 488/463, loss: 0.6661911606788635 2023-01-24 00:37:20.178805: step: 490/463, loss: 0.3506323993206024 2023-01-24 00:37:20.792377: step: 492/463, loss: 0.6111321449279785 2023-01-24 00:37:21.397617: step: 494/463, loss: 0.2844662368297577 2023-01-24 00:37:22.034594: step: 496/463, loss: 0.8982583284378052 2023-01-24 00:37:22.635242: step: 498/463, loss: 0.12321044504642487 2023-01-24 00:37:23.304391: step: 500/463, loss: 0.2831031382083893 2023-01-24 00:37:23.952705: step: 502/463, loss: 0.44888120889663696 2023-01-24 00:37:24.544130: step: 504/463, loss: 0.4643210768699646 2023-01-24 00:37:25.134685: step: 506/463, loss: 0.7459716200828552 2023-01-24 00:37:25.750822: step: 508/463, loss: 1.4252722263336182 2023-01-24 00:37:26.339966: step: 510/463, loss: 0.723647952079773 2023-01-24 00:37:26.892591: step: 512/463, loss: 0.33296412229537964 2023-01-24 00:37:27.574527: step: 514/463, loss: 5.2945170402526855 2023-01-24 00:37:28.322781: step: 516/463, loss: 0.49359017610549927 2023-01-24 00:37:28.948495: step: 518/463, loss: 0.6195040941238403 2023-01-24 00:37:29.521365: step: 520/463, loss: 0.34351181983947754 2023-01-24 00:37:30.060169: step: 522/463, loss: 0.32653310894966125 2023-01-24 00:37:30.660206: step: 524/463, loss: 0.5711261034011841 2023-01-24 00:37:31.223828: step: 526/463, loss: 3.975227117538452 2023-01-24 00:37:31.752344: step: 528/463, loss: 0.2498871386051178 2023-01-24 00:37:32.401532: step: 530/463, loss: 0.8269243240356445 2023-01-24 00:37:32.988490: step: 532/463, loss: 0.9738156199455261 2023-01-24 00:37:33.586728: step: 534/463, loss: 0.4229276478290558 2023-01-24 00:37:34.179652: step: 536/463, loss: 1.4646267890930176 2023-01-24 00:37:34.808737: step: 538/463, loss: 1.0056730508804321 2023-01-24 00:37:35.487226: step: 540/463, loss: 0.16793085634708405 2023-01-24 00:37:36.082094: step: 542/463, loss: 0.19599169492721558 2023-01-24 00:37:36.724934: step: 544/463, loss: 0.5333278179168701 2023-01-24 00:37:37.359128: step: 546/463, loss: 0.5272350907325745 2023-01-24 00:37:37.985301: step: 548/463, loss: 0.17491260170936584 2023-01-24 00:37:38.583195: step: 550/463, loss: 0.2488846480846405 2023-01-24 00:37:39.166310: step: 552/463, loss: 3.257854461669922 2023-01-24 00:37:39.861539: step: 554/463, loss: 0.38660240173339844 2023-01-24 00:37:40.438674: step: 556/463, loss: 0.17643235623836517 2023-01-24 00:37:41.048608: step: 558/463, loss: 0.3294633626937866 2023-01-24 00:37:41.681098: step: 560/463, loss: 0.8908080458641052 2023-01-24 00:37:42.326217: step: 562/463, loss: 0.5673060417175293 2023-01-24 00:37:42.911236: step: 564/463, loss: 2.551823854446411 2023-01-24 00:37:43.547045: step: 566/463, loss: 0.20586590468883514 2023-01-24 00:37:44.147801: step: 568/463, loss: 0.8672521710395813 2023-01-24 00:37:44.789750: step: 570/463, loss: 0.5010704398155212 2023-01-24 00:37:45.409575: step: 572/463, loss: 0.48847177624702454 2023-01-24 00:37:46.169127: step: 574/463, loss: 0.9841750264167786 2023-01-24 00:37:46.775466: step: 576/463, loss: 0.10429425537586212 2023-01-24 00:37:47.321221: step: 578/463, loss: 0.6549193859100342 2023-01-24 00:37:47.981973: step: 580/463, loss: 0.3207032084465027 2023-01-24 00:37:48.581737: step: 582/463, loss: 1.3616400957107544 2023-01-24 00:37:49.226924: step: 584/463, loss: 0.2568977177143097 2023-01-24 00:37:49.905616: step: 586/463, loss: 1.2974032163619995 2023-01-24 00:37:50.512384: step: 588/463, loss: 1.5133144855499268 2023-01-24 00:37:51.174299: step: 590/463, loss: 0.876836895942688 2023-01-24 00:37:51.776433: step: 592/463, loss: 5.248604774475098 2023-01-24 00:37:52.358713: step: 594/463, loss: 0.7509450912475586 2023-01-24 00:37:52.993405: step: 596/463, loss: 1.0288889408111572 2023-01-24 00:37:53.690903: step: 598/463, loss: 0.23910602927207947 2023-01-24 00:37:54.322001: step: 600/463, loss: 1.0580503940582275 2023-01-24 00:37:55.039537: step: 602/463, loss: 0.4943075478076935 2023-01-24 00:37:55.634888: step: 604/463, loss: 1.9560822248458862 2023-01-24 00:37:56.253426: step: 606/463, loss: 0.35893234610557556 2023-01-24 00:37:56.822340: step: 608/463, loss: 0.8699542880058289 2023-01-24 00:37:57.415871: step: 610/463, loss: 0.3269708454608917 2023-01-24 00:37:58.054586: step: 612/463, loss: 0.17714658379554749 2023-01-24 00:37:58.693672: step: 614/463, loss: 1.839385747909546 2023-01-24 00:37:59.255990: step: 616/463, loss: 0.6757277846336365 2023-01-24 00:37:59.875638: step: 618/463, loss: 0.20764169096946716 2023-01-24 00:38:00.512481: step: 620/463, loss: 0.8209818601608276 2023-01-24 00:38:01.124801: step: 622/463, loss: 2.5471646785736084 2023-01-24 00:38:01.741748: step: 624/463, loss: 5.142049312591553 2023-01-24 00:38:02.350836: step: 626/463, loss: 1.1702842712402344 2023-01-24 00:38:02.893921: step: 628/463, loss: 0.18336109817028046 2023-01-24 00:38:03.543148: step: 630/463, loss: 1.1800868511199951 2023-01-24 00:38:04.118393: step: 632/463, loss: 0.2515396177768707 2023-01-24 00:38:04.795518: step: 634/463, loss: 0.32376086711883545 2023-01-24 00:38:05.417847: step: 636/463, loss: 0.5929350852966309 2023-01-24 00:38:06.092270: step: 638/463, loss: 0.7307126522064209 2023-01-24 00:38:06.876997: step: 640/463, loss: 1.8886864185333252 2023-01-24 00:38:07.410905: step: 642/463, loss: 1.7407853603363037 2023-01-24 00:38:08.030105: step: 644/463, loss: 1.293758511543274 2023-01-24 00:38:08.628241: step: 646/463, loss: 0.3747621774673462 2023-01-24 00:38:09.259129: step: 648/463, loss: 0.81679767370224 2023-01-24 00:38:09.891406: step: 650/463, loss: 0.8737527132034302 2023-01-24 00:38:10.488019: step: 652/463, loss: 1.2232550382614136 2023-01-24 00:38:11.135862: step: 654/463, loss: 4.435805320739746 2023-01-24 00:38:11.717580: step: 656/463, loss: 0.246099054813385 2023-01-24 00:38:12.336882: step: 658/463, loss: 0.35442981123924255 2023-01-24 00:38:12.903802: step: 660/463, loss: 0.6668121814727783 2023-01-24 00:38:13.549859: step: 662/463, loss: 2.919111728668213 2023-01-24 00:38:14.216873: step: 664/463, loss: 1.1155575513839722 2023-01-24 00:38:14.790389: step: 666/463, loss: 0.32291388511657715 2023-01-24 00:38:15.436435: step: 668/463, loss: 0.8881826996803284 2023-01-24 00:38:16.062019: step: 670/463, loss: 2.108267307281494 2023-01-24 00:38:16.731441: step: 672/463, loss: 2.5875234603881836 2023-01-24 00:38:17.297309: step: 674/463, loss: 0.4154404401779175 2023-01-24 00:38:17.883667: step: 676/463, loss: 0.5452799797058105 2023-01-24 00:38:18.488865: step: 678/463, loss: 0.33030974864959717 2023-01-24 00:38:19.095183: step: 680/463, loss: 0.7643932104110718 2023-01-24 00:38:19.645673: step: 682/463, loss: 1.0512075424194336 2023-01-24 00:38:20.281299: step: 684/463, loss: 0.29171621799468994 2023-01-24 00:38:20.869664: step: 686/463, loss: 0.17222242057323456 2023-01-24 00:38:21.441979: step: 688/463, loss: 0.7244187593460083 2023-01-24 00:38:22.071751: step: 690/463, loss: 0.9839054346084595 2023-01-24 00:38:22.713032: step: 692/463, loss: 0.3827417194843292 2023-01-24 00:38:23.361128: step: 694/463, loss: 0.8360433578491211 2023-01-24 00:38:24.015439: step: 696/463, loss: 1.2717273235321045 2023-01-24 00:38:24.654438: step: 698/463, loss: 0.11977909505367279 2023-01-24 00:38:25.310530: step: 700/463, loss: 1.086526870727539 2023-01-24 00:38:25.909646: step: 702/463, loss: 0.8046153783798218 2023-01-24 00:38:26.504922: step: 704/463, loss: 0.2607859969139099 2023-01-24 00:38:27.134249: step: 706/463, loss: 0.2078489065170288 2023-01-24 00:38:27.739802: step: 708/463, loss: 0.5821657776832581 2023-01-24 00:38:28.323770: step: 710/463, loss: 0.7629448771476746 2023-01-24 00:38:28.931539: step: 712/463, loss: 0.5218830108642578 2023-01-24 00:38:29.488170: step: 714/463, loss: 0.13013869524002075 2023-01-24 00:38:30.128791: step: 716/463, loss: 0.41951417922973633 2023-01-24 00:38:30.854174: step: 718/463, loss: 0.945431113243103 2023-01-24 00:38:31.406097: step: 720/463, loss: 0.8925947546958923 2023-01-24 00:38:32.045781: step: 722/463, loss: 1.3860774040222168 2023-01-24 00:38:32.619181: step: 724/463, loss: 0.6957374811172485 2023-01-24 00:38:33.336154: step: 726/463, loss: 0.3541806638240814 2023-01-24 00:38:33.931849: step: 728/463, loss: 0.7336874008178711 2023-01-24 00:38:34.565485: step: 730/463, loss: 3.7046642303466797 2023-01-24 00:38:35.176537: step: 732/463, loss: 0.5892671346664429 2023-01-24 00:38:35.856173: step: 734/463, loss: 3.310222625732422 2023-01-24 00:38:36.520136: step: 736/463, loss: 0.40570223331451416 2023-01-24 00:38:37.177448: step: 738/463, loss: 1.0854921340942383 2023-01-24 00:38:37.782719: step: 740/463, loss: 1.2302539348602295 2023-01-24 00:38:38.328449: step: 742/463, loss: 0.29824066162109375 2023-01-24 00:38:38.925645: step: 744/463, loss: 0.21834328770637512 2023-01-24 00:38:39.453443: step: 746/463, loss: 0.9164169430732727 2023-01-24 00:38:40.049034: step: 748/463, loss: 1.6107051372528076 2023-01-24 00:38:40.664820: step: 750/463, loss: 0.39505648612976074 2023-01-24 00:38:41.332226: step: 752/463, loss: 0.7051186561584473 2023-01-24 00:38:41.993140: step: 754/463, loss: 0.5606783628463745 2023-01-24 00:38:42.589805: step: 756/463, loss: 0.6574631929397583 2023-01-24 00:38:43.169952: step: 758/463, loss: 0.6522522568702698 2023-01-24 00:38:43.786198: step: 760/463, loss: 1.08747398853302 2023-01-24 00:38:44.353139: step: 762/463, loss: 0.8959791660308838 2023-01-24 00:38:45.077375: step: 764/463, loss: 12.886826515197754 2023-01-24 00:38:45.611844: step: 766/463, loss: 0.49474939703941345 2023-01-24 00:38:46.256033: step: 768/463, loss: 0.22615544497966766 2023-01-24 00:38:46.862334: step: 770/463, loss: 0.8572375178337097 2023-01-24 00:38:47.467029: step: 772/463, loss: 1.1405339241027832 2023-01-24 00:38:48.076056: step: 774/463, loss: 0.5088615417480469 2023-01-24 00:38:48.786140: step: 776/463, loss: 0.8074105381965637 2023-01-24 00:38:49.429968: step: 778/463, loss: 1.1362767219543457 2023-01-24 00:38:50.075451: step: 780/463, loss: 0.24066327512264252 2023-01-24 00:38:50.783646: step: 782/463, loss: 0.6467727422714233 2023-01-24 00:38:51.352151: step: 784/463, loss: 0.3192857503890991 2023-01-24 00:38:51.948385: step: 786/463, loss: 2.1987528800964355 2023-01-24 00:38:52.586050: step: 788/463, loss: 1.2378207445144653 2023-01-24 00:38:53.245465: step: 790/463, loss: 0.57787024974823 2023-01-24 00:38:53.853326: step: 792/463, loss: 0.7829220294952393 2023-01-24 00:38:54.448169: step: 794/463, loss: 1.5045771598815918 2023-01-24 00:38:55.054161: step: 796/463, loss: 0.39104029536247253 2023-01-24 00:38:55.735050: step: 798/463, loss: 0.804844856262207 2023-01-24 00:38:56.321941: step: 800/463, loss: 4.929046630859375 2023-01-24 00:38:56.899045: step: 802/463, loss: 0.38950616121292114 2023-01-24 00:38:57.553005: step: 804/463, loss: 0.6650965809822083 2023-01-24 00:38:58.153378: step: 806/463, loss: 0.6405189037322998 2023-01-24 00:38:58.789917: step: 808/463, loss: 0.864395022392273 2023-01-24 00:38:59.362217: step: 810/463, loss: 0.7275404930114746 2023-01-24 00:38:59.970226: step: 812/463, loss: 1.927489161491394 2023-01-24 00:39:00.600100: step: 814/463, loss: 0.6312630772590637 2023-01-24 00:39:01.224445: step: 816/463, loss: 1.1469464302062988 2023-01-24 00:39:01.834999: step: 818/463, loss: 1.0295991897583008 2023-01-24 00:39:02.395450: step: 820/463, loss: 0.5181655287742615 2023-01-24 00:39:03.016537: step: 822/463, loss: 0.2782285511493683 2023-01-24 00:39:03.627000: step: 824/463, loss: 0.7657139897346497 2023-01-24 00:39:04.230986: step: 826/463, loss: 0.3979091942310333 2023-01-24 00:39:04.815610: step: 828/463, loss: 1.0972727537155151 2023-01-24 00:39:05.431302: step: 830/463, loss: 0.4853355586528778 2023-01-24 00:39:06.095183: step: 832/463, loss: 0.58555668592453 2023-01-24 00:39:06.666539: step: 834/463, loss: 0.7757879495620728 2023-01-24 00:39:07.377334: step: 836/463, loss: 0.9302806854248047 2023-01-24 00:39:08.006238: step: 838/463, loss: 0.287883460521698 2023-01-24 00:39:08.588730: step: 840/463, loss: 0.9032585620880127 2023-01-24 00:39:09.211948: step: 842/463, loss: 0.5623165369033813 2023-01-24 00:39:09.831116: step: 844/463, loss: 0.3701925277709961 2023-01-24 00:39:10.495731: step: 846/463, loss: 3.4421286582946777 2023-01-24 00:39:11.137898: step: 848/463, loss: 0.2465246617794037 2023-01-24 00:39:11.727026: step: 850/463, loss: 1.4481582641601562 2023-01-24 00:39:12.326523: step: 852/463, loss: 2.952502727508545 2023-01-24 00:39:12.980458: step: 854/463, loss: 0.8330993056297302 2023-01-24 00:39:13.609796: step: 856/463, loss: 0.980499804019928 2023-01-24 00:39:14.260905: step: 858/463, loss: 0.5031222701072693 2023-01-24 00:39:14.923009: step: 860/463, loss: 0.5115187168121338 2023-01-24 00:39:15.560281: step: 862/463, loss: 0.30255091190338135 2023-01-24 00:39:16.111949: step: 864/463, loss: 0.6826913356781006 2023-01-24 00:39:16.799734: step: 866/463, loss: 2.0543980598449707 2023-01-24 00:39:17.336276: step: 868/463, loss: 1.2051109075546265 2023-01-24 00:39:17.957323: step: 870/463, loss: 0.3185446858406067 2023-01-24 00:39:18.549353: step: 872/463, loss: 0.26583802700042725 2023-01-24 00:39:19.196916: step: 874/463, loss: 0.5186485648155212 2023-01-24 00:39:19.818603: step: 876/463, loss: 0.17223262786865234 2023-01-24 00:39:20.453618: step: 878/463, loss: 0.23885127902030945 2023-01-24 00:39:21.122478: step: 880/463, loss: 0.5206378102302551 2023-01-24 00:39:21.720917: step: 882/463, loss: 0.25288882851600647 2023-01-24 00:39:22.381330: step: 884/463, loss: 1.3440959453582764 2023-01-24 00:39:23.049547: step: 886/463, loss: 1.2510030269622803 2023-01-24 00:39:23.649451: step: 888/463, loss: 0.9494075775146484 2023-01-24 00:39:24.377989: step: 890/463, loss: 2.1396734714508057 2023-01-24 00:39:24.949016: step: 892/463, loss: 0.894913375377655 2023-01-24 00:39:25.532144: step: 894/463, loss: 0.48696890473365784 2023-01-24 00:39:26.104313: step: 896/463, loss: 0.31168943643569946 2023-01-24 00:39:26.677826: step: 898/463, loss: 0.4771164059638977 2023-01-24 00:39:27.388594: step: 900/463, loss: 0.41636332869529724 2023-01-24 00:39:28.051803: step: 902/463, loss: 0.5606483221054077 2023-01-24 00:39:28.669155: step: 904/463, loss: 0.4186766743659973 2023-01-24 00:39:29.283363: step: 906/463, loss: 0.8164801001548767 2023-01-24 00:39:29.893831: step: 908/463, loss: 0.7248661518096924 2023-01-24 00:39:30.544471: step: 910/463, loss: 1.102360725402832 2023-01-24 00:39:31.149596: step: 912/463, loss: 0.5251678228378296 2023-01-24 00:39:31.813278: step: 914/463, loss: 0.6367273330688477 2023-01-24 00:39:32.464265: step: 916/463, loss: 0.1624290943145752 2023-01-24 00:39:33.050339: step: 918/463, loss: 1.067218542098999 2023-01-24 00:39:33.651461: step: 920/463, loss: 0.3017536699771881 2023-01-24 00:39:34.267273: step: 922/463, loss: 0.7523595690727234 2023-01-24 00:39:34.857360: step: 924/463, loss: 0.6766957640647888 2023-01-24 00:39:35.450278: step: 926/463, loss: 0.9524654150009155 ================================================== Loss: 0.932 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34665908539989426, 'r': 0.29019567376279026, 'f1': 0.3159243623644397}, 'combined': 0.23278637226853452, 'epoch': 5} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34381048886598153, 'r': 0.3491825277545125, 'f1': 0.3464756864540899}, 'combined': 0.26856010146202186, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32352017463984456, 'r': 0.29288379446561685, 'f1': 0.3074406430971087}, 'combined': 0.22653521070313273, 'epoch': 5} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32498915501807535, 'r': 0.3485867131489834, 'f1': 0.3363745843956487}, 'combined': 0.26073053910093347, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32326799298325637, 'r': 0.29632899356798503, 'f1': 0.30921286285354954}, 'combined': 0.22784105683945755, 'epoch': 5} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32287388038511977, 'r': 0.3347442436345727, 'f1': 0.32870192876752263}, 'combined': 0.2547833132073621, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2730263157894737, 'r': 0.29642857142857143, 'f1': 0.2842465753424657}, 'combined': 0.18949771689497713, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.2717391304347826, 'f1': 0.29069767441860467}, 'combined': 0.14534883720930233, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.59375, 'r': 0.16379310344827586, 'f1': 0.25675675675675674}, 'combined': 0.17117117117117114, 'epoch': 5} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34665908539989426, 'r': 0.29019567376279026, 'f1': 0.3159243623644397}, 'combined': 0.23278637226853452, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34381048886598153, 'r': 0.3491825277545125, 'f1': 0.3464756864540899}, 'combined': 0.26856010146202186, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2730263157894737, 'r': 0.29642857142857143, 'f1': 0.2842465753424657}, 'combined': 0.18949771689497713, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32352017463984456, 'r': 0.29288379446561685, 'f1': 0.3074406430971087}, 'combined': 0.22653521070313273, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32498915501807535, 'r': 0.3485867131489834, 'f1': 0.3363745843956487}, 'combined': 0.26073053910093347, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.2717391304347826, 'f1': 0.29069767441860467}, 'combined': 0.14534883720930233, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32326799298325637, 'r': 0.29632899356798503, 'f1': 0.30921286285354954}, 'combined': 0.22784105683945755, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32287388038511977, 'r': 0.3347442436345727, 'f1': 0.32870192876752263}, 'combined': 0.2547833132073621, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.59375, 'r': 0.16379310344827586, 'f1': 0.25675675675675674}, 'combined': 0.17117117117117114, 'epoch': 5} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:42:39.227175: step: 2/463, loss: 0.7707034349441528 2023-01-24 00:42:39.893570: step: 4/463, loss: 2.139986038208008 2023-01-24 00:42:40.552899: step: 6/463, loss: 1.0813167095184326 2023-01-24 00:42:41.163937: step: 8/463, loss: 0.9199732542037964 2023-01-24 00:42:41.775670: step: 10/463, loss: 0.2954943776130676 2023-01-24 00:42:42.445480: step: 12/463, loss: 0.7491180300712585 2023-01-24 00:42:43.060225: step: 14/463, loss: 2.387415885925293 2023-01-24 00:42:43.683162: step: 16/463, loss: 0.7784934043884277 2023-01-24 00:42:44.310443: step: 18/463, loss: 1.0621168613433838 2023-01-24 00:42:44.965200: step: 20/463, loss: 0.43312177062034607 2023-01-24 00:42:45.608997: step: 22/463, loss: 0.3385125994682312 2023-01-24 00:42:46.219410: step: 24/463, loss: 0.26764538884162903 2023-01-24 00:42:46.840070: step: 26/463, loss: 0.5014766454696655 2023-01-24 00:42:47.404621: step: 28/463, loss: 0.7479044795036316 2023-01-24 00:42:48.065190: step: 30/463, loss: 1.2933988571166992 2023-01-24 00:42:48.702715: step: 32/463, loss: 0.826672375202179 2023-01-24 00:42:49.459311: step: 34/463, loss: 0.3321710526943207 2023-01-24 00:42:50.104017: step: 36/463, loss: 0.8432543873786926 2023-01-24 00:42:50.831410: step: 38/463, loss: 4.003955841064453 2023-01-24 00:42:51.483254: step: 40/463, loss: 0.5457615852355957 2023-01-24 00:42:52.117325: step: 42/463, loss: 0.48903197050094604 2023-01-24 00:42:52.753433: step: 44/463, loss: 0.411626935005188 2023-01-24 00:42:53.392104: step: 46/463, loss: 0.5120250582695007 2023-01-24 00:42:54.004003: step: 48/463, loss: 0.8634063601493835 2023-01-24 00:42:54.609868: step: 50/463, loss: 0.5466212034225464 2023-01-24 00:42:55.233021: step: 52/463, loss: 0.3239036202430725 2023-01-24 00:42:55.840216: step: 54/463, loss: 0.13446322083473206 2023-01-24 00:42:56.479971: step: 56/463, loss: 1.3016963005065918 2023-01-24 00:42:57.108335: step: 58/463, loss: 0.45938900113105774 2023-01-24 00:42:57.702119: step: 60/463, loss: 0.2309417873620987 2023-01-24 00:42:58.294762: step: 62/463, loss: 0.5879989862442017 2023-01-24 00:42:58.937790: step: 64/463, loss: 0.3607172667980194 2023-01-24 00:42:59.537581: step: 66/463, loss: 0.15693148970603943 2023-01-24 00:43:00.155389: step: 68/463, loss: 0.43909192085266113 2023-01-24 00:43:00.809460: step: 70/463, loss: 0.7162376642227173 2023-01-24 00:43:01.406592: step: 72/463, loss: 0.7164316773414612 2023-01-24 00:43:01.929689: step: 74/463, loss: 0.6055495738983154 2023-01-24 00:43:02.534131: step: 76/463, loss: 0.40779149532318115 2023-01-24 00:43:03.220639: step: 78/463, loss: 0.3160075843334198 2023-01-24 00:43:03.910479: step: 80/463, loss: 0.3557608127593994 2023-01-24 00:43:04.564295: step: 82/463, loss: 0.417134165763855 2023-01-24 00:43:05.144103: step: 84/463, loss: 0.7544646859169006 2023-01-24 00:43:05.764027: step: 86/463, loss: 0.14090657234191895 2023-01-24 00:43:06.445624: step: 88/463, loss: 0.38752931356430054 2023-01-24 00:43:07.082664: step: 90/463, loss: 0.31950053572654724 2023-01-24 00:43:07.738759: step: 92/463, loss: 0.3570743501186371 2023-01-24 00:43:08.295356: step: 94/463, loss: 0.43369346857070923 2023-01-24 00:43:08.953103: step: 96/463, loss: 0.31948956847190857 2023-01-24 00:43:09.621241: step: 98/463, loss: 0.9211097359657288 2023-01-24 00:43:10.240905: step: 100/463, loss: 0.4100872874259949 2023-01-24 00:43:10.843938: step: 102/463, loss: 0.507490336894989 2023-01-24 00:43:11.533641: step: 104/463, loss: 0.6800287961959839 2023-01-24 00:43:12.177715: step: 106/463, loss: 0.3257635533809662 2023-01-24 00:43:12.753078: step: 108/463, loss: 0.24174264073371887 2023-01-24 00:43:13.447525: step: 110/463, loss: 0.5335452556610107 2023-01-24 00:43:14.085185: step: 112/463, loss: 0.7120741605758667 2023-01-24 00:43:14.702253: step: 114/463, loss: 0.2345055639743805 2023-01-24 00:43:15.302755: step: 116/463, loss: 0.7252466678619385 2023-01-24 00:43:15.957246: step: 118/463, loss: 2.573593854904175 2023-01-24 00:43:16.615278: step: 120/463, loss: 0.5916131734848022 2023-01-24 00:43:17.217486: step: 122/463, loss: 0.7222192883491516 2023-01-24 00:43:17.822979: step: 124/463, loss: 0.743853747844696 2023-01-24 00:43:18.411165: step: 126/463, loss: 0.4970948100090027 2023-01-24 00:43:19.067958: step: 128/463, loss: 0.9570875763893127 2023-01-24 00:43:19.688663: step: 130/463, loss: 1.0289407968521118 2023-01-24 00:43:20.253539: step: 132/463, loss: 0.39893364906311035 2023-01-24 00:43:20.889571: step: 134/463, loss: 0.10169912129640579 2023-01-24 00:43:21.497303: step: 136/463, loss: 0.3106191158294678 2023-01-24 00:43:22.120831: step: 138/463, loss: 1.3883326053619385 2023-01-24 00:43:22.771675: step: 140/463, loss: 0.2029099315404892 2023-01-24 00:43:23.356741: step: 142/463, loss: 0.23208893835544586 2023-01-24 00:43:23.971403: step: 144/463, loss: 0.48671191930770874 2023-01-24 00:43:24.533625: step: 146/463, loss: 1.0086190700531006 2023-01-24 00:43:25.123575: step: 148/463, loss: 0.186837300658226 2023-01-24 00:43:25.696324: step: 150/463, loss: 0.40221652388572693 2023-01-24 00:43:26.391494: step: 152/463, loss: 0.41822606325149536 2023-01-24 00:43:27.062147: step: 154/463, loss: 0.2611430883407593 2023-01-24 00:43:27.652784: step: 156/463, loss: 0.208994060754776 2023-01-24 00:43:28.282390: step: 158/463, loss: 0.600904107093811 2023-01-24 00:43:28.896681: step: 160/463, loss: 0.3257748782634735 2023-01-24 00:43:29.507179: step: 162/463, loss: 0.6409143805503845 2023-01-24 00:43:30.075513: step: 164/463, loss: 0.2803063690662384 2023-01-24 00:43:30.697970: step: 166/463, loss: 2.9948930740356445 2023-01-24 00:43:31.316528: step: 168/463, loss: 0.20377136766910553 2023-01-24 00:43:31.963540: step: 170/463, loss: 0.47860196232795715 2023-01-24 00:43:32.573719: step: 172/463, loss: 0.3640652596950531 2023-01-24 00:43:33.198166: step: 174/463, loss: 0.7649862766265869 2023-01-24 00:43:33.811994: step: 176/463, loss: 0.5883657932281494 2023-01-24 00:43:34.456252: step: 178/463, loss: 0.42379188537597656 2023-01-24 00:43:35.119435: step: 180/463, loss: 0.45746615529060364 2023-01-24 00:43:35.746618: step: 182/463, loss: 0.2927231192588806 2023-01-24 00:43:36.425634: step: 184/463, loss: 1.417479157447815 2023-01-24 00:43:37.047629: step: 186/463, loss: 1.0829448699951172 2023-01-24 00:43:37.629137: step: 188/463, loss: 0.19566978514194489 2023-01-24 00:43:38.353612: step: 190/463, loss: 0.2037958949804306 2023-01-24 00:43:38.954461: step: 192/463, loss: 0.6271510124206543 2023-01-24 00:43:39.587243: step: 194/463, loss: 0.6505361795425415 2023-01-24 00:43:40.201844: step: 196/463, loss: 1.0390840768814087 2023-01-24 00:43:40.834891: step: 198/463, loss: 0.6060810685157776 2023-01-24 00:43:41.432343: step: 200/463, loss: 0.3452938199043274 2023-01-24 00:43:42.033136: step: 202/463, loss: 0.9558874368667603 2023-01-24 00:43:42.779641: step: 204/463, loss: 1.3529548645019531 2023-01-24 00:43:43.394992: step: 206/463, loss: 0.8873156905174255 2023-01-24 00:43:44.055845: step: 208/463, loss: 0.41194331645965576 2023-01-24 00:43:44.736583: step: 210/463, loss: 1.1888885498046875 2023-01-24 00:43:45.397776: step: 212/463, loss: 1.880638837814331 2023-01-24 00:43:46.036132: step: 214/463, loss: 1.268115758895874 2023-01-24 00:43:46.678632: step: 216/463, loss: 0.25972822308540344 2023-01-24 00:43:47.267849: step: 218/463, loss: 0.7622128129005432 2023-01-24 00:43:47.844782: step: 220/463, loss: 0.31395792961120605 2023-01-24 00:43:48.488658: step: 222/463, loss: 0.41082993149757385 2023-01-24 00:43:49.157897: step: 224/463, loss: 0.9554826021194458 2023-01-24 00:43:49.920959: step: 226/463, loss: 0.6175981163978577 2023-01-24 00:43:50.508170: step: 228/463, loss: 0.8517409563064575 2023-01-24 00:43:51.173916: step: 230/463, loss: 0.2539173662662506 2023-01-24 00:43:51.829010: step: 232/463, loss: 0.6553291082382202 2023-01-24 00:43:52.401738: step: 234/463, loss: 0.2384515255689621 2023-01-24 00:43:53.033134: step: 236/463, loss: 0.43330925703048706 2023-01-24 00:43:53.718935: step: 238/463, loss: 0.2994055151939392 2023-01-24 00:43:54.331641: step: 240/463, loss: 0.2976920008659363 2023-01-24 00:43:55.022475: step: 242/463, loss: 0.8106310367584229 2023-01-24 00:43:55.709145: step: 244/463, loss: 0.3887229263782501 2023-01-24 00:43:56.374345: step: 246/463, loss: 0.5251857042312622 2023-01-24 00:43:57.027187: step: 248/463, loss: 1.9007600545883179 2023-01-24 00:43:57.671089: step: 250/463, loss: 0.23919978737831116 2023-01-24 00:43:58.270786: step: 252/463, loss: 0.6328753232955933 2023-01-24 00:43:58.942143: step: 254/463, loss: 0.7381503582000732 2023-01-24 00:43:59.544116: step: 256/463, loss: 0.20988327264785767 2023-01-24 00:44:00.162635: step: 258/463, loss: 0.9936560392379761 2023-01-24 00:44:00.804156: step: 260/463, loss: 1.5107687711715698 2023-01-24 00:44:01.464412: step: 262/463, loss: 0.4509623646736145 2023-01-24 00:44:02.003966: step: 264/463, loss: 0.272694855928421 2023-01-24 00:44:02.618515: step: 266/463, loss: 0.2572956085205078 2023-01-24 00:44:03.240374: step: 268/463, loss: 0.7112085819244385 2023-01-24 00:44:03.821453: step: 270/463, loss: 0.3555031418800354 2023-01-24 00:44:04.462198: step: 272/463, loss: 0.17245355248451233 2023-01-24 00:44:05.146047: step: 274/463, loss: 0.35611552000045776 2023-01-24 00:44:05.804039: step: 276/463, loss: 0.20791593194007874 2023-01-24 00:44:06.391630: step: 278/463, loss: 0.21697591245174408 2023-01-24 00:44:07.113142: step: 280/463, loss: 0.5917024612426758 2023-01-24 00:44:07.672217: step: 282/463, loss: 0.24465574324131012 2023-01-24 00:44:08.398136: step: 284/463, loss: 0.31289833784103394 2023-01-24 00:44:09.011945: step: 286/463, loss: 3.5686933994293213 2023-01-24 00:44:09.635953: step: 288/463, loss: 0.5982808470726013 2023-01-24 00:44:10.301623: step: 290/463, loss: 0.10122385621070862 2023-01-24 00:44:10.891452: step: 292/463, loss: 0.30538395047187805 2023-01-24 00:44:11.527402: step: 294/463, loss: 1.1313884258270264 2023-01-24 00:44:12.155762: step: 296/463, loss: 0.9204930067062378 2023-01-24 00:44:12.852900: step: 298/463, loss: 0.2620668113231659 2023-01-24 00:44:13.427687: step: 300/463, loss: 0.19864974915981293 2023-01-24 00:44:14.076545: step: 302/463, loss: 0.4536626935005188 2023-01-24 00:44:14.644417: step: 304/463, loss: 0.29439884424209595 2023-01-24 00:44:15.220262: step: 306/463, loss: 0.45344313979148865 2023-01-24 00:44:15.850740: step: 308/463, loss: 0.9328432679176331 2023-01-24 00:44:16.465324: step: 310/463, loss: 0.3301640450954437 2023-01-24 00:44:17.132135: step: 312/463, loss: 0.6652065515518188 2023-01-24 00:44:17.805838: step: 314/463, loss: 1.7454242706298828 2023-01-24 00:44:18.472312: step: 316/463, loss: 1.013277530670166 2023-01-24 00:44:19.119108: step: 318/463, loss: 0.9130158424377441 2023-01-24 00:44:19.802276: step: 320/463, loss: 0.2867955267429352 2023-01-24 00:44:20.393298: step: 322/463, loss: 0.800923764705658 2023-01-24 00:44:21.020611: step: 324/463, loss: 0.8890997171401978 2023-01-24 00:44:21.700391: step: 326/463, loss: 0.09248176217079163 2023-01-24 00:44:22.289174: step: 328/463, loss: 0.7782219052314758 2023-01-24 00:44:22.917443: step: 330/463, loss: 1.5487060546875 2023-01-24 00:44:23.559001: step: 332/463, loss: 0.23208898305892944 2023-01-24 00:44:24.184087: step: 334/463, loss: 1.7774015665054321 2023-01-24 00:44:24.848968: step: 336/463, loss: 0.23104318976402283 2023-01-24 00:44:25.407307: step: 338/463, loss: 1.6652344465255737 2023-01-24 00:44:26.025356: step: 340/463, loss: 0.942787766456604 2023-01-24 00:44:26.673934: step: 342/463, loss: 0.30264759063720703 2023-01-24 00:44:27.318612: step: 344/463, loss: 0.3393298089504242 2023-01-24 00:44:27.978042: step: 346/463, loss: 0.26077795028686523 2023-01-24 00:44:28.594904: step: 348/463, loss: 0.3874621093273163 2023-01-24 00:44:29.191496: step: 350/463, loss: 0.41772162914276123 2023-01-24 00:44:29.869458: step: 352/463, loss: 0.5013745427131653 2023-01-24 00:44:30.501259: step: 354/463, loss: 0.23540829122066498 2023-01-24 00:44:31.054282: step: 356/463, loss: 0.4870157837867737 2023-01-24 00:44:31.741537: step: 358/463, loss: 0.5283874869346619 2023-01-24 00:44:32.405039: step: 360/463, loss: 2.800855875015259 2023-01-24 00:44:33.030046: step: 362/463, loss: 0.3269934356212616 2023-01-24 00:44:33.594870: step: 364/463, loss: 0.31342813372612 2023-01-24 00:44:34.293462: step: 366/463, loss: 0.6667982339859009 2023-01-24 00:44:34.910003: step: 368/463, loss: 0.44992002844810486 2023-01-24 00:44:35.564834: step: 370/463, loss: 0.5229851007461548 2023-01-24 00:44:36.274827: step: 372/463, loss: 0.959665060043335 2023-01-24 00:44:36.886766: step: 374/463, loss: 0.7876143455505371 2023-01-24 00:44:37.468913: step: 376/463, loss: 1.4744727611541748 2023-01-24 00:44:38.139025: step: 378/463, loss: 0.34497082233428955 2023-01-24 00:44:38.679436: step: 380/463, loss: 0.6525307297706604 2023-01-24 00:44:39.321266: step: 382/463, loss: 0.6078543663024902 2023-01-24 00:44:39.951165: step: 384/463, loss: 0.5248273611068726 2023-01-24 00:44:40.541204: step: 386/463, loss: 1.6794519424438477 2023-01-24 00:44:41.080748: step: 388/463, loss: 0.40272361040115356 2023-01-24 00:44:41.681539: step: 390/463, loss: 1.127328872680664 2023-01-24 00:44:42.279071: step: 392/463, loss: 0.4430447518825531 2023-01-24 00:44:42.918605: step: 394/463, loss: 0.15610727667808533 2023-01-24 00:44:43.525629: step: 396/463, loss: 0.16499409079551697 2023-01-24 00:44:44.141403: step: 398/463, loss: 1.1390331983566284 2023-01-24 00:44:44.841091: step: 400/463, loss: 0.15032213926315308 2023-01-24 00:44:45.468887: step: 402/463, loss: 0.5404168963432312 2023-01-24 00:44:46.065620: step: 404/463, loss: 0.3954692780971527 2023-01-24 00:44:46.685188: step: 406/463, loss: 0.9207911491394043 2023-01-24 00:44:47.381385: step: 408/463, loss: 0.5242671370506287 2023-01-24 00:44:47.918347: step: 410/463, loss: 0.09933799505233765 2023-01-24 00:44:48.560320: step: 412/463, loss: 1.0824713706970215 2023-01-24 00:44:49.245686: step: 414/463, loss: 0.5512629747390747 2023-01-24 00:44:49.886892: step: 416/463, loss: 0.2910410761833191 2023-01-24 00:44:50.585155: step: 418/463, loss: 0.2227073609828949 2023-01-24 00:44:51.153431: step: 420/463, loss: 1.188856840133667 2023-01-24 00:44:51.877739: step: 422/463, loss: 0.5407494902610779 2023-01-24 00:44:52.472482: step: 424/463, loss: 0.5058335065841675 2023-01-24 00:44:53.111390: step: 426/463, loss: 0.35298728942871094 2023-01-24 00:44:53.753807: step: 428/463, loss: 1.0835871696472168 2023-01-24 00:44:54.379971: step: 430/463, loss: 0.40585756301879883 2023-01-24 00:44:54.966375: step: 432/463, loss: 0.6000495553016663 2023-01-24 00:44:55.546316: step: 434/463, loss: 0.10432958602905273 2023-01-24 00:44:56.178417: step: 436/463, loss: 0.2688646614551544 2023-01-24 00:44:56.772563: step: 438/463, loss: 0.16700828075408936 2023-01-24 00:44:57.487772: step: 440/463, loss: 1.4053618907928467 2023-01-24 00:44:58.084012: step: 442/463, loss: 0.2594049870967865 2023-01-24 00:44:58.700589: step: 444/463, loss: 0.19910700619220734 2023-01-24 00:44:59.333690: step: 446/463, loss: 0.1585317701101303 2023-01-24 00:44:59.953822: step: 448/463, loss: 1.686780333518982 2023-01-24 00:45:00.568953: step: 450/463, loss: 0.17139622569084167 2023-01-24 00:45:01.194039: step: 452/463, loss: 0.1759694665670395 2023-01-24 00:45:01.773273: step: 454/463, loss: 0.3747706115245819 2023-01-24 00:45:02.367104: step: 456/463, loss: 1.5020641088485718 2023-01-24 00:45:02.962653: step: 458/463, loss: 0.9281125068664551 2023-01-24 00:45:03.593280: step: 460/463, loss: 0.17045475542545319 2023-01-24 00:45:04.129166: step: 462/463, loss: 0.8570803999900818 2023-01-24 00:45:04.705663: step: 464/463, loss: 0.40932726860046387 2023-01-24 00:45:05.358145: step: 466/463, loss: 0.44993898272514343 2023-01-24 00:45:05.927441: step: 468/463, loss: 0.2706313729286194 2023-01-24 00:45:06.529216: step: 470/463, loss: 0.27971887588500977 2023-01-24 00:45:07.098793: step: 472/463, loss: 0.6699079275131226 2023-01-24 00:45:07.720369: step: 474/463, loss: 0.3720257580280304 2023-01-24 00:45:08.284810: step: 476/463, loss: 0.4284832775592804 2023-01-24 00:45:08.891000: step: 478/463, loss: 2.215088129043579 2023-01-24 00:45:09.498062: step: 480/463, loss: 0.49277177453041077 2023-01-24 00:45:10.176578: step: 482/463, loss: 1.403556227684021 2023-01-24 00:45:10.821081: step: 484/463, loss: 1.7338634729385376 2023-01-24 00:45:11.387775: step: 486/463, loss: 0.23187251389026642 2023-01-24 00:45:12.104412: step: 488/463, loss: 0.13469001650810242 2023-01-24 00:45:12.784694: step: 490/463, loss: 0.2629240155220032 2023-01-24 00:45:13.360943: step: 492/463, loss: 0.1605781465768814 2023-01-24 00:45:14.005407: step: 494/463, loss: 0.8620311617851257 2023-01-24 00:45:14.708413: step: 496/463, loss: 1.7924976348876953 2023-01-24 00:45:15.420085: step: 498/463, loss: 0.500994086265564 2023-01-24 00:45:15.996017: step: 500/463, loss: 0.6751488447189331 2023-01-24 00:45:16.560788: step: 502/463, loss: 0.39528971910476685 2023-01-24 00:45:17.145022: step: 504/463, loss: 1.156477689743042 2023-01-24 00:45:17.796619: step: 506/463, loss: 0.6608617901802063 2023-01-24 00:45:18.421260: step: 508/463, loss: 2.1466660499572754 2023-01-24 00:45:19.008984: step: 510/463, loss: 0.16079235076904297 2023-01-24 00:45:19.776338: step: 512/463, loss: 0.6797022819519043 2023-01-24 00:45:20.351775: step: 514/463, loss: 0.42306578159332275 2023-01-24 00:45:20.985114: step: 516/463, loss: 0.7788892984390259 2023-01-24 00:45:21.633587: step: 518/463, loss: 0.31527185440063477 2023-01-24 00:45:22.259375: step: 520/463, loss: 0.7262465953826904 2023-01-24 00:45:22.809925: step: 522/463, loss: 0.24979224801063538 2023-01-24 00:45:23.470842: step: 524/463, loss: 0.9982425570487976 2023-01-24 00:45:24.163544: step: 526/463, loss: 2.9610493183135986 2023-01-24 00:45:24.798256: step: 528/463, loss: 0.5105767250061035 2023-01-24 00:45:25.393174: step: 530/463, loss: 0.47443604469299316 2023-01-24 00:45:26.029053: step: 532/463, loss: 0.4157335162162781 2023-01-24 00:45:26.580596: step: 534/463, loss: 1.113906979560852 2023-01-24 00:45:27.151946: step: 536/463, loss: 0.43656229972839355 2023-01-24 00:45:27.728903: step: 538/463, loss: 0.14742086827754974 2023-01-24 00:45:28.339012: step: 540/463, loss: 0.2634921669960022 2023-01-24 00:45:28.972357: step: 542/463, loss: 2.609593391418457 2023-01-24 00:45:29.570654: step: 544/463, loss: 0.6216706037521362 2023-01-24 00:45:30.166721: step: 546/463, loss: 0.5992388129234314 2023-01-24 00:45:30.824619: step: 548/463, loss: 0.5043485760688782 2023-01-24 00:45:31.395988: step: 550/463, loss: 0.8861814737319946 2023-01-24 00:45:32.001278: step: 552/463, loss: 1.3062583208084106 2023-01-24 00:45:32.634831: step: 554/463, loss: 0.1536601334810257 2023-01-24 00:45:33.253275: step: 556/463, loss: 0.5184997916221619 2023-01-24 00:45:33.849082: step: 558/463, loss: 0.37322762608528137 2023-01-24 00:45:34.433655: step: 560/463, loss: 0.3827729821205139 2023-01-24 00:45:35.108817: step: 562/463, loss: 1.0360947847366333 2023-01-24 00:45:35.789243: step: 564/463, loss: 0.05056465044617653 2023-01-24 00:45:36.414797: step: 566/463, loss: 0.753536581993103 2023-01-24 00:45:37.037993: step: 568/463, loss: 0.4754701256752014 2023-01-24 00:45:37.653001: step: 570/463, loss: 1.1415660381317139 2023-01-24 00:45:38.285597: step: 572/463, loss: 0.8563820123672485 2023-01-24 00:45:38.878276: step: 574/463, loss: 1.3379309177398682 2023-01-24 00:45:39.475029: step: 576/463, loss: 0.9537292718887329 2023-01-24 00:45:40.128630: step: 578/463, loss: 0.6534048914909363 2023-01-24 00:45:40.749233: step: 580/463, loss: 0.18520990014076233 2023-01-24 00:45:41.349556: step: 582/463, loss: 0.35937270522117615 2023-01-24 00:45:41.969458: step: 584/463, loss: 0.7841150760650635 2023-01-24 00:45:42.569662: step: 586/463, loss: 0.32687780261039734 2023-01-24 00:45:43.167416: step: 588/463, loss: 0.2522583305835724 2023-01-24 00:45:43.755813: step: 590/463, loss: 0.6390393972396851 2023-01-24 00:45:44.344136: step: 592/463, loss: 0.8120431303977966 2023-01-24 00:45:44.937722: step: 594/463, loss: 0.6621069312095642 2023-01-24 00:45:45.584138: step: 596/463, loss: 0.7320810556411743 2023-01-24 00:45:46.127290: step: 598/463, loss: 1.664620041847229 2023-01-24 00:45:46.697363: step: 600/463, loss: 0.7753128409385681 2023-01-24 00:45:47.333640: step: 602/463, loss: 0.5026118159294128 2023-01-24 00:45:47.966116: step: 604/463, loss: 0.7103804349899292 2023-01-24 00:45:48.708564: step: 606/463, loss: 0.5003908276557922 2023-01-24 00:45:49.379688: step: 608/463, loss: 0.30910584330558777 2023-01-24 00:45:50.011624: step: 610/463, loss: 0.3031342029571533 2023-01-24 00:45:50.668577: step: 612/463, loss: 3.683682918548584 2023-01-24 00:45:51.299943: step: 614/463, loss: 0.441860169172287 2023-01-24 00:45:51.880705: step: 616/463, loss: 1.2985293865203857 2023-01-24 00:45:52.469629: step: 618/463, loss: 1.1251379251480103 2023-01-24 00:45:53.137383: step: 620/463, loss: 0.7022017240524292 2023-01-24 00:45:53.798340: step: 622/463, loss: 0.3726992607116699 2023-01-24 00:45:54.385916: step: 624/463, loss: 0.5467562675476074 2023-01-24 00:45:54.935222: step: 626/463, loss: 0.7616590857505798 2023-01-24 00:45:55.526809: step: 628/463, loss: 0.7752111554145813 2023-01-24 00:45:56.205477: step: 630/463, loss: 2.210228204727173 2023-01-24 00:45:56.820310: step: 632/463, loss: 0.6542195677757263 2023-01-24 00:45:57.397101: step: 634/463, loss: 0.10479707270860672 2023-01-24 00:45:58.028838: step: 636/463, loss: 1.4585634469985962 2023-01-24 00:45:58.648885: step: 638/463, loss: 0.7846353054046631 2023-01-24 00:45:59.249822: step: 640/463, loss: 0.5195389986038208 2023-01-24 00:45:59.880634: step: 642/463, loss: 0.261513888835907 2023-01-24 00:46:00.487831: step: 644/463, loss: 1.8318625688552856 2023-01-24 00:46:01.070299: step: 646/463, loss: 1.7291748523712158 2023-01-24 00:46:01.784557: step: 648/463, loss: 0.4745703637599945 2023-01-24 00:46:02.437943: step: 650/463, loss: 1.722800612449646 2023-01-24 00:46:03.033324: step: 652/463, loss: 0.8241041898727417 2023-01-24 00:46:03.657683: step: 654/463, loss: 0.4597836136817932 2023-01-24 00:46:04.297888: step: 656/463, loss: 0.42067283391952515 2023-01-24 00:46:04.993003: step: 658/463, loss: 0.6434979438781738 2023-01-24 00:46:05.568331: step: 660/463, loss: 0.20520149171352386 2023-01-24 00:46:06.211335: step: 662/463, loss: 0.9808153510093689 2023-01-24 00:46:06.898702: step: 664/463, loss: 0.7001787424087524 2023-01-24 00:46:07.503944: step: 666/463, loss: 0.20100714266300201 2023-01-24 00:46:08.087359: step: 668/463, loss: 1.1117522716522217 2023-01-24 00:46:08.721834: step: 670/463, loss: 0.30436694622039795 2023-01-24 00:46:09.485917: step: 672/463, loss: 1.8440709114074707 2023-01-24 00:46:10.130204: step: 674/463, loss: 0.4838281273841858 2023-01-24 00:46:10.820424: step: 676/463, loss: 0.31730198860168457 2023-01-24 00:46:11.417180: step: 678/463, loss: 0.6645902395248413 2023-01-24 00:46:12.013002: step: 680/463, loss: 0.24026280641555786 2023-01-24 00:46:12.660548: step: 682/463, loss: 0.31971657276153564 2023-01-24 00:46:13.301057: step: 684/463, loss: 0.852814257144928 2023-01-24 00:46:14.011296: step: 686/463, loss: 0.40470990538597107 2023-01-24 00:46:14.667084: step: 688/463, loss: 0.5372722148895264 2023-01-24 00:46:15.254203: step: 690/463, loss: 0.9322777390480042 2023-01-24 00:46:15.886169: step: 692/463, loss: 0.767435610294342 2023-01-24 00:46:16.565461: step: 694/463, loss: 0.36985763907432556 2023-01-24 00:46:17.228788: step: 696/463, loss: 0.2971535623073578 2023-01-24 00:46:17.919303: step: 698/463, loss: 0.7105740308761597 2023-01-24 00:46:18.510988: step: 700/463, loss: 0.30317947268486023 2023-01-24 00:46:19.127053: step: 702/463, loss: 2.6077194213867188 2023-01-24 00:46:19.820829: step: 704/463, loss: 0.43240219354629517 2023-01-24 00:46:20.484510: step: 706/463, loss: 0.8379062414169312 2023-01-24 00:46:21.094767: step: 708/463, loss: 0.13727572560310364 2023-01-24 00:46:21.712341: step: 710/463, loss: 0.4508086144924164 2023-01-24 00:46:22.311893: step: 712/463, loss: 0.3328574597835541 2023-01-24 00:46:22.933918: step: 714/463, loss: 0.16498662531375885 2023-01-24 00:46:23.576366: step: 716/463, loss: 0.7082381844520569 2023-01-24 00:46:24.147631: step: 718/463, loss: 0.22216297686100006 2023-01-24 00:46:24.727506: step: 720/463, loss: 1.5357190370559692 2023-01-24 00:46:25.406960: step: 722/463, loss: 2.8503880500793457 2023-01-24 00:46:26.033817: step: 724/463, loss: 0.887367844581604 2023-01-24 00:46:26.729363: step: 726/463, loss: 2.088667631149292 2023-01-24 00:46:27.351735: step: 728/463, loss: 1.1347683668136597 2023-01-24 00:46:28.050653: step: 730/463, loss: 0.3880079388618469 2023-01-24 00:46:28.668898: step: 732/463, loss: 0.4522395730018616 2023-01-24 00:46:29.319316: step: 734/463, loss: 0.801544189453125 2023-01-24 00:46:29.992985: step: 736/463, loss: 0.4031171500682831 2023-01-24 00:46:30.690697: step: 738/463, loss: 0.7093856334686279 2023-01-24 00:46:31.356596: step: 740/463, loss: 0.45906001329421997 2023-01-24 00:46:31.987944: step: 742/463, loss: 0.2921605706214905 2023-01-24 00:46:32.579225: step: 744/463, loss: 0.9815138578414917 2023-01-24 00:46:33.231973: step: 746/463, loss: 0.27694931626319885 2023-01-24 00:46:33.891242: step: 748/463, loss: 0.4865105152130127 2023-01-24 00:46:34.509275: step: 750/463, loss: 1.2407090663909912 2023-01-24 00:46:35.100082: step: 752/463, loss: 0.7826077342033386 2023-01-24 00:46:35.745023: step: 754/463, loss: 0.383323073387146 2023-01-24 00:46:36.419842: step: 756/463, loss: 0.4975360929965973 2023-01-24 00:46:37.098259: step: 758/463, loss: 0.8760184049606323 2023-01-24 00:46:37.781364: step: 760/463, loss: 0.5592105984687805 2023-01-24 00:46:38.389994: step: 762/463, loss: 0.43544185161590576 2023-01-24 00:46:39.014372: step: 764/463, loss: 0.25800713896751404 2023-01-24 00:46:39.678099: step: 766/463, loss: 1.2327625751495361 2023-01-24 00:46:40.259673: step: 768/463, loss: 0.1724807471036911 2023-01-24 00:46:40.914022: step: 770/463, loss: 0.5716548562049866 2023-01-24 00:46:41.489341: step: 772/463, loss: 1.4487444162368774 2023-01-24 00:46:42.074194: step: 774/463, loss: 0.4769440293312073 2023-01-24 00:46:42.702680: step: 776/463, loss: 0.18004576861858368 2023-01-24 00:46:43.349216: step: 778/463, loss: 0.7428921461105347 2023-01-24 00:46:44.039107: step: 780/463, loss: 0.95490562915802 2023-01-24 00:46:44.674717: step: 782/463, loss: 1.2781492471694946 2023-01-24 00:46:45.316758: step: 784/463, loss: 1.148795485496521 2023-01-24 00:46:45.984048: step: 786/463, loss: 0.733167290687561 2023-01-24 00:46:46.535924: step: 788/463, loss: 0.3271699845790863 2023-01-24 00:46:47.170725: step: 790/463, loss: 0.7401608228683472 2023-01-24 00:46:47.850403: step: 792/463, loss: 0.9319025278091431 2023-01-24 00:46:48.476360: step: 794/463, loss: 0.8767288327217102 2023-01-24 00:46:49.103475: step: 796/463, loss: 0.38745731115341187 2023-01-24 00:46:49.703393: step: 798/463, loss: 0.36867478489875793 2023-01-24 00:46:50.351458: step: 800/463, loss: 1.06089186668396 2023-01-24 00:46:50.986641: step: 802/463, loss: 0.4051103889942169 2023-01-24 00:46:51.647918: step: 804/463, loss: 0.6608892679214478 2023-01-24 00:46:52.292760: step: 806/463, loss: 0.18371205031871796 2023-01-24 00:46:52.926167: step: 808/463, loss: 1.0662380456924438 2023-01-24 00:46:53.590156: step: 810/463, loss: 0.8443009257316589 2023-01-24 00:46:54.142695: step: 812/463, loss: 0.7897899150848389 2023-01-24 00:46:54.998688: step: 814/463, loss: 3.3653674125671387 2023-01-24 00:46:55.639270: step: 816/463, loss: 0.3008726239204407 2023-01-24 00:46:56.264777: step: 818/463, loss: 0.780657172203064 2023-01-24 00:46:56.957422: step: 820/463, loss: 0.430696576833725 2023-01-24 00:46:57.558065: step: 822/463, loss: 0.32731932401657104 2023-01-24 00:46:58.203665: step: 824/463, loss: 0.7866721749305725 2023-01-24 00:46:58.826032: step: 826/463, loss: 10.629744529724121 2023-01-24 00:46:59.454194: step: 828/463, loss: 0.17941005527973175 2023-01-24 00:47:00.072740: step: 830/463, loss: 2.94114351272583 2023-01-24 00:47:00.663430: step: 832/463, loss: 0.8321788311004639 2023-01-24 00:47:01.302182: step: 834/463, loss: 0.303272008895874 2023-01-24 00:47:01.936586: step: 836/463, loss: 0.4845165014266968 2023-01-24 00:47:02.543470: step: 838/463, loss: 0.9765169024467468 2023-01-24 00:47:03.162833: step: 840/463, loss: 0.36979925632476807 2023-01-24 00:47:03.877442: step: 842/463, loss: 0.9372493624687195 2023-01-24 00:47:04.467445: step: 844/463, loss: 0.383465975522995 2023-01-24 00:47:05.113647: step: 846/463, loss: 0.6714259386062622 2023-01-24 00:47:05.731091: step: 848/463, loss: 0.5280047059059143 2023-01-24 00:47:06.500370: step: 850/463, loss: 0.24716663360595703 2023-01-24 00:47:07.084597: step: 852/463, loss: 1.1619768142700195 2023-01-24 00:47:07.702658: step: 854/463, loss: 0.3374081552028656 2023-01-24 00:47:08.301940: step: 856/463, loss: 0.49963921308517456 2023-01-24 00:47:08.914425: step: 858/463, loss: 0.6429591774940491 2023-01-24 00:47:09.549166: step: 860/463, loss: 0.27377620339393616 2023-01-24 00:47:10.182778: step: 862/463, loss: 0.29810675978660583 2023-01-24 00:47:10.805380: step: 864/463, loss: 0.48808982968330383 2023-01-24 00:47:11.488666: step: 866/463, loss: 0.2809275984764099 2023-01-24 00:47:12.124701: step: 868/463, loss: 0.7357933521270752 2023-01-24 00:47:12.742756: step: 870/463, loss: 0.8168999552726746 2023-01-24 00:47:13.384562: step: 872/463, loss: 0.3281324505805969 2023-01-24 00:47:14.015699: step: 874/463, loss: 0.6887940764427185 2023-01-24 00:47:14.605425: step: 876/463, loss: 0.5973749756813049 2023-01-24 00:47:15.221283: step: 878/463, loss: 0.1966850757598877 2023-01-24 00:47:15.899349: step: 880/463, loss: 0.4791107475757599 2023-01-24 00:47:16.569822: step: 882/463, loss: 0.2369583696126938 2023-01-24 00:47:17.146739: step: 884/463, loss: 0.24381394684314728 2023-01-24 00:47:17.759927: step: 886/463, loss: 0.4390019178390503 2023-01-24 00:47:18.343114: step: 888/463, loss: 4.36630916595459 2023-01-24 00:47:18.916783: step: 890/463, loss: 0.31937089562416077 2023-01-24 00:47:19.590541: step: 892/463, loss: 0.23234668374061584 2023-01-24 00:47:20.183150: step: 894/463, loss: 0.2113475203514099 2023-01-24 00:47:20.811256: step: 896/463, loss: 1.6517930030822754 2023-01-24 00:47:21.376904: step: 898/463, loss: 0.4913887083530426 2023-01-24 00:47:22.017817: step: 900/463, loss: 0.35672491788864136 2023-01-24 00:47:22.646333: step: 902/463, loss: 1.694883108139038 2023-01-24 00:47:23.274358: step: 904/463, loss: 0.3495710492134094 2023-01-24 00:47:23.902570: step: 906/463, loss: 2.152754545211792 2023-01-24 00:47:24.463835: step: 908/463, loss: 0.3849526643753052 2023-01-24 00:47:25.058929: step: 910/463, loss: 0.15306980907917023 2023-01-24 00:47:25.740512: step: 912/463, loss: 0.3186946213245392 2023-01-24 00:47:26.352719: step: 914/463, loss: 0.1676206886768341 2023-01-24 00:47:26.961899: step: 916/463, loss: 1.0460083484649658 2023-01-24 00:47:27.601781: step: 918/463, loss: 0.6239566802978516 2023-01-24 00:47:28.379001: step: 920/463, loss: 1.5490068197250366 2023-01-24 00:47:28.959911: step: 922/463, loss: 0.6644265055656433 2023-01-24 00:47:29.515203: step: 924/463, loss: 0.6493674516677856 2023-01-24 00:47:30.155441: step: 926/463, loss: 1.972741961479187 ================================================== Loss: 0.729 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3664599181442166, 'r': 0.3122210687794179, 'f1': 0.33717316239088774}, 'combined': 0.2484433828143383, 'epoch': 6} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3655563493147106, 'r': 0.32203773630105453, 'f1': 0.34241987150998204}, 'combined': 0.26541635973501004, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3283040393999639, 'r': 0.31335281179920654, 'f1': 0.3206542365401589}, 'combined': 0.23627154271380127, 'epoch': 6} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34970123997901775, 'r': 0.3199189915192662, 'f1': 0.3341478132367659}, 'combined': 0.2590045250926128, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3313349054163703, 'r': 0.3080723029488073, 'f1': 0.31928043983091725}, 'combined': 0.23525927145436007, 'epoch': 6} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35467922086975695, 'r': 0.3202190949190748, 'f1': 0.3365693955266962}, 'combined': 0.2608815410302621, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42857142857142855, 'r': 0.2571428571428571, 'f1': 0.3214285714285714}, 'combined': 0.21428571428571425, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38, 'r': 0.41304347826086957, 'f1': 0.39583333333333337}, 'combined': 0.19791666666666669, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.10344827586206896, 'f1': 0.16216216216216217}, 'combined': 0.10810810810810811, 'epoch': 6} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3664599181442166, 'r': 0.3122210687794179, 'f1': 0.33717316239088774}, 'combined': 0.2484433828143383, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3655563493147106, 'r': 0.32203773630105453, 'f1': 0.34241987150998204}, 'combined': 0.26541635973501004, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42857142857142855, 'r': 0.2571428571428571, 'f1': 0.3214285714285714}, 'combined': 0.21428571428571425, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3283040393999639, 'r': 0.31335281179920654, 'f1': 0.3206542365401589}, 'combined': 0.23627154271380127, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34970123997901775, 'r': 0.3199189915192662, 'f1': 0.3341478132367659}, 'combined': 0.2590045250926128, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38, 'r': 0.41304347826086957, 'f1': 0.39583333333333337}, 'combined': 0.19791666666666669, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32326799298325637, 'r': 0.29632899356798503, 'f1': 0.30921286285354954}, 'combined': 0.22784105683945755, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32287388038511977, 'r': 0.3347442436345727, 'f1': 0.32870192876752263}, 'combined': 0.2547833132073621, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.59375, 'r': 0.16379310344827586, 'f1': 0.25675675675675674}, 'combined': 0.17117117117117114, 'epoch': 5} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:50:22.246323: step: 2/463, loss: 0.807654857635498 2023-01-24 00:50:22.904113: step: 4/463, loss: 0.7418429851531982 2023-01-24 00:50:23.563536: step: 6/463, loss: 0.30306100845336914 2023-01-24 00:50:24.096074: step: 8/463, loss: 0.3872629404067993 2023-01-24 00:50:24.680822: step: 10/463, loss: 0.17483903467655182 2023-01-24 00:50:25.245662: step: 12/463, loss: 0.6835933923721313 2023-01-24 00:50:25.858017: step: 14/463, loss: 0.4460596442222595 2023-01-24 00:50:26.484847: step: 16/463, loss: 0.29260244965553284 2023-01-24 00:50:27.198905: step: 18/463, loss: 0.36839205026626587 2023-01-24 00:50:27.789947: step: 20/463, loss: 0.2833377420902252 2023-01-24 00:50:28.430407: step: 22/463, loss: 0.1488151103258133 2023-01-24 00:50:28.988785: step: 24/463, loss: 0.19251784682273865 2023-01-24 00:50:29.662585: step: 26/463, loss: 0.4787790775299072 2023-01-24 00:50:30.257354: step: 28/463, loss: 0.20639607310295105 2023-01-24 00:50:30.828659: step: 30/463, loss: 0.3026690185070038 2023-01-24 00:50:31.443386: step: 32/463, loss: 0.38833969831466675 2023-01-24 00:50:32.089801: step: 34/463, loss: 0.47689932584762573 2023-01-24 00:50:32.792578: step: 36/463, loss: 0.38047540187835693 2023-01-24 00:50:33.463202: step: 38/463, loss: 0.5756352543830872 2023-01-24 00:50:34.012536: step: 40/463, loss: 0.459454208612442 2023-01-24 00:50:34.574472: step: 42/463, loss: 0.5612775683403015 2023-01-24 00:50:35.190599: step: 44/463, loss: 0.22516365349292755 2023-01-24 00:50:35.812578: step: 46/463, loss: 0.3391384184360504 2023-01-24 00:50:36.412731: step: 48/463, loss: 0.19588753581047058 2023-01-24 00:50:37.075323: step: 50/463, loss: 2.4960684776306152 2023-01-24 00:50:37.727560: step: 52/463, loss: 0.9179222583770752 2023-01-24 00:50:38.395410: step: 54/463, loss: 0.2930730879306793 2023-01-24 00:50:39.034756: step: 56/463, loss: 0.40667015314102173 2023-01-24 00:50:39.659114: step: 58/463, loss: 0.38417690992355347 2023-01-24 00:50:40.312236: step: 60/463, loss: 0.1884060800075531 2023-01-24 00:50:40.916032: step: 62/463, loss: 0.4711410403251648 2023-01-24 00:50:41.517422: step: 64/463, loss: 0.4842836856842041 2023-01-24 00:50:42.152568: step: 66/463, loss: 0.6604782342910767 2023-01-24 00:50:42.725970: step: 68/463, loss: 0.1569122076034546 2023-01-24 00:50:43.360887: step: 70/463, loss: 0.6698318123817444 2023-01-24 00:50:44.023280: step: 72/463, loss: 0.06883817166090012 2023-01-24 00:50:44.630511: step: 74/463, loss: 0.802859365940094 2023-01-24 00:50:45.269587: step: 76/463, loss: 0.21703235805034637 2023-01-24 00:50:45.869156: step: 78/463, loss: 0.4542236626148224 2023-01-24 00:50:46.497710: step: 80/463, loss: 0.45655548572540283 2023-01-24 00:50:47.134547: step: 82/463, loss: 0.17808252573013306 2023-01-24 00:50:47.839541: step: 84/463, loss: 0.4637203812599182 2023-01-24 00:50:48.415254: step: 86/463, loss: 0.5246639251708984 2023-01-24 00:50:49.046648: step: 88/463, loss: 0.8671875 2023-01-24 00:50:49.722335: step: 90/463, loss: 0.28850170969963074 2023-01-24 00:50:50.361877: step: 92/463, loss: 0.8279662728309631 2023-01-24 00:50:50.980346: step: 94/463, loss: 0.1822223663330078 2023-01-24 00:50:51.599984: step: 96/463, loss: 0.780676543712616 2023-01-24 00:50:52.213462: step: 98/463, loss: 0.29151782393455505 2023-01-24 00:50:52.880035: step: 100/463, loss: 0.15302254259586334 2023-01-24 00:50:53.458337: step: 102/463, loss: 0.16352394223213196 2023-01-24 00:50:54.089402: step: 104/463, loss: 0.23694144189357758 2023-01-24 00:50:54.680571: step: 106/463, loss: 0.19907023012638092 2023-01-24 00:50:55.335982: step: 108/463, loss: 0.27573174238204956 2023-01-24 00:50:56.008035: step: 110/463, loss: 0.6128889918327332 2023-01-24 00:50:56.649999: step: 112/463, loss: 1.5029224157333374 2023-01-24 00:50:57.292758: step: 114/463, loss: 0.6199305057525635 2023-01-24 00:50:57.892884: step: 116/463, loss: 0.31033530831336975 2023-01-24 00:50:58.499313: step: 118/463, loss: 0.6868170499801636 2023-01-24 00:50:59.090771: step: 120/463, loss: 0.8857533931732178 2023-01-24 00:50:59.714741: step: 122/463, loss: 0.20317086577415466 2023-01-24 00:51:00.334750: step: 124/463, loss: 1.5232303142547607 2023-01-24 00:51:00.913312: step: 126/463, loss: 1.0587047338485718 2023-01-24 00:51:01.499057: step: 128/463, loss: 0.16158871352672577 2023-01-24 00:51:02.203894: step: 130/463, loss: 0.8642796874046326 2023-01-24 00:51:02.772927: step: 132/463, loss: 0.34781551361083984 2023-01-24 00:51:03.414493: step: 134/463, loss: 0.5579984188079834 2023-01-24 00:51:04.090467: step: 136/463, loss: 0.6466804146766663 2023-01-24 00:51:04.719783: step: 138/463, loss: 0.666228175163269 2023-01-24 00:51:05.315276: step: 140/463, loss: 0.48522070050239563 2023-01-24 00:51:05.916019: step: 142/463, loss: 0.23623982071876526 2023-01-24 00:51:06.505473: step: 144/463, loss: 1.0772197246551514 2023-01-24 00:51:07.120011: step: 146/463, loss: 0.27934178709983826 2023-01-24 00:51:07.693346: step: 148/463, loss: 0.48079627752304077 2023-01-24 00:51:08.380495: step: 150/463, loss: 0.3658713102340698 2023-01-24 00:51:08.974399: step: 152/463, loss: 0.5583712458610535 2023-01-24 00:51:09.630312: step: 154/463, loss: 0.4811897873878479 2023-01-24 00:51:10.243380: step: 156/463, loss: 0.48738229274749756 2023-01-24 00:51:10.941279: step: 158/463, loss: 0.2898690700531006 2023-01-24 00:51:11.524105: step: 160/463, loss: 1.4352582693099976 2023-01-24 00:51:12.173995: step: 162/463, loss: 0.2560354471206665 2023-01-24 00:51:12.769220: step: 164/463, loss: 0.36349260807037354 2023-01-24 00:51:13.408647: step: 166/463, loss: 1.0397658348083496 2023-01-24 00:51:14.049605: step: 168/463, loss: 0.9558521509170532 2023-01-24 00:51:14.663140: step: 170/463, loss: 0.29159045219421387 2023-01-24 00:51:15.261433: step: 172/463, loss: 0.2603519856929779 2023-01-24 00:51:15.871527: step: 174/463, loss: 0.42037704586982727 2023-01-24 00:51:16.499264: step: 176/463, loss: 0.15870143473148346 2023-01-24 00:51:17.125054: step: 178/463, loss: 0.779308021068573 2023-01-24 00:51:17.760146: step: 180/463, loss: 0.26173537969589233 2023-01-24 00:51:18.426557: step: 182/463, loss: 1.1715259552001953 2023-01-24 00:51:19.038854: step: 184/463, loss: 0.5751940011978149 2023-01-24 00:51:19.821618: step: 186/463, loss: 0.331051230430603 2023-01-24 00:51:20.465413: step: 188/463, loss: 0.18856927752494812 2023-01-24 00:51:21.076494: step: 190/463, loss: 0.6854605674743652 2023-01-24 00:51:21.687632: step: 192/463, loss: 0.6216540932655334 2023-01-24 00:51:22.288055: step: 194/463, loss: 0.18062391877174377 2023-01-24 00:51:22.896538: step: 196/463, loss: 0.3826708197593689 2023-01-24 00:51:23.564196: step: 198/463, loss: 0.3214462995529175 2023-01-24 00:51:24.226485: step: 200/463, loss: 0.33739641308784485 2023-01-24 00:51:24.851542: step: 202/463, loss: 0.8185354471206665 2023-01-24 00:51:25.435530: step: 204/463, loss: 0.2642740309238434 2023-01-24 00:51:26.057300: step: 206/463, loss: 0.7646939158439636 2023-01-24 00:51:26.683310: step: 208/463, loss: 16.320388793945312 2023-01-24 00:51:27.353042: step: 210/463, loss: 0.2212313711643219 2023-01-24 00:51:28.070785: step: 212/463, loss: 0.6817798018455505 2023-01-24 00:51:28.761260: step: 214/463, loss: 0.4071405827999115 2023-01-24 00:51:29.363798: step: 216/463, loss: 0.8440535068511963 2023-01-24 00:51:30.011993: step: 218/463, loss: 0.134111687541008 2023-01-24 00:51:30.610941: step: 220/463, loss: 0.2947194278240204 2023-01-24 00:51:31.282650: step: 222/463, loss: 0.22341568768024445 2023-01-24 00:51:31.903167: step: 224/463, loss: 0.2976885139942169 2023-01-24 00:51:32.549004: step: 226/463, loss: 0.8032858967781067 2023-01-24 00:51:33.139943: step: 228/463, loss: 0.8179838061332703 2023-01-24 00:51:33.751397: step: 230/463, loss: 0.33685067296028137 2023-01-24 00:51:34.374013: step: 232/463, loss: 0.5753642320632935 2023-01-24 00:51:34.972264: step: 234/463, loss: 0.06891431659460068 2023-01-24 00:51:35.598133: step: 236/463, loss: 0.25993892550468445 2023-01-24 00:51:36.297529: step: 238/463, loss: 1.5857888460159302 2023-01-24 00:51:36.892858: step: 240/463, loss: 0.23574510216712952 2023-01-24 00:51:37.555315: step: 242/463, loss: 0.9469902515411377 2023-01-24 00:51:38.251776: step: 244/463, loss: 0.8277049660682678 2023-01-24 00:51:38.922096: step: 246/463, loss: 0.7651932835578918 2023-01-24 00:51:39.549238: step: 248/463, loss: 0.40362977981567383 2023-01-24 00:51:40.182938: step: 250/463, loss: 0.37863391637802124 2023-01-24 00:51:40.798935: step: 252/463, loss: 0.49910610914230347 2023-01-24 00:51:41.444853: step: 254/463, loss: 0.6247705817222595 2023-01-24 00:51:42.061693: step: 256/463, loss: 0.39149364829063416 2023-01-24 00:51:42.821485: step: 258/463, loss: 0.6698437333106995 2023-01-24 00:51:43.386139: step: 260/463, loss: 3.34090256690979 2023-01-24 00:51:44.113820: step: 262/463, loss: 0.33949434757232666 2023-01-24 00:51:44.765478: step: 264/463, loss: 0.39752963185310364 2023-01-24 00:51:45.408867: step: 266/463, loss: 0.42182302474975586 2023-01-24 00:51:46.005204: step: 268/463, loss: 0.29192841053009033 2023-01-24 00:51:46.693774: step: 270/463, loss: 0.3941940665245056 2023-01-24 00:51:47.249083: step: 272/463, loss: 0.4327924847602844 2023-01-24 00:51:47.869598: step: 274/463, loss: 1.0333911180496216 2023-01-24 00:51:48.462284: step: 276/463, loss: 0.7380674481391907 2023-01-24 00:51:49.120407: step: 278/463, loss: 0.4221072196960449 2023-01-24 00:51:49.741248: step: 280/463, loss: 0.3454017639160156 2023-01-24 00:51:50.365453: step: 282/463, loss: 0.16518624126911163 2023-01-24 00:51:51.042559: step: 284/463, loss: 0.1964893639087677 2023-01-24 00:51:51.678981: step: 286/463, loss: 1.697434663772583 2023-01-24 00:51:52.317591: step: 288/463, loss: 0.4196591377258301 2023-01-24 00:51:52.981109: step: 290/463, loss: 0.542227566242218 2023-01-24 00:51:53.521949: step: 292/463, loss: 0.1716984063386917 2023-01-24 00:51:54.158845: step: 294/463, loss: 0.19872500002384186 2023-01-24 00:51:54.765710: step: 296/463, loss: 0.3258502185344696 2023-01-24 00:51:55.395762: step: 298/463, loss: 0.1960049271583557 2023-01-24 00:51:56.062018: step: 300/463, loss: 0.4125145673751831 2023-01-24 00:51:56.681024: step: 302/463, loss: 1.8116427659988403 2023-01-24 00:51:57.296200: step: 304/463, loss: 0.7189337611198425 2023-01-24 00:51:57.959239: step: 306/463, loss: 0.5128545165061951 2023-01-24 00:51:58.533898: step: 308/463, loss: 0.4789336323738098 2023-01-24 00:51:59.179321: step: 310/463, loss: 0.1118573248386383 2023-01-24 00:51:59.781388: step: 312/463, loss: 0.13190127909183502 2023-01-24 00:52:00.470288: step: 314/463, loss: 0.6765699982643127 2023-01-24 00:52:01.040150: step: 316/463, loss: 0.4358658492565155 2023-01-24 00:52:01.604275: step: 318/463, loss: 1.0757784843444824 2023-01-24 00:52:02.220047: step: 320/463, loss: 0.6946585178375244 2023-01-24 00:52:02.972461: step: 322/463, loss: 0.3356768786907196 2023-01-24 00:52:03.723107: step: 324/463, loss: 0.3354523181915283 2023-01-24 00:52:04.341128: step: 326/463, loss: 0.6069251894950867 2023-01-24 00:52:05.002519: step: 328/463, loss: 1.1654002666473389 2023-01-24 00:52:05.657693: step: 330/463, loss: 0.38132423162460327 2023-01-24 00:52:06.385590: step: 332/463, loss: 1.2565807104110718 2023-01-24 00:52:07.063425: step: 334/463, loss: 0.4544839859008789 2023-01-24 00:52:07.692846: step: 336/463, loss: 0.21627789735794067 2023-01-24 00:52:08.274147: step: 338/463, loss: 0.44023042917251587 2023-01-24 00:52:08.879125: step: 340/463, loss: 0.8491129875183105 2023-01-24 00:52:09.532365: step: 342/463, loss: 0.6784866452217102 2023-01-24 00:52:10.196397: step: 344/463, loss: 0.8466278910636902 2023-01-24 00:52:10.879752: step: 346/463, loss: 0.4635503888130188 2023-01-24 00:52:11.452198: step: 348/463, loss: 0.3777149021625519 2023-01-24 00:52:12.118451: step: 350/463, loss: 1.0819035768508911 2023-01-24 00:52:12.725884: step: 352/463, loss: 0.8905228972434998 2023-01-24 00:52:13.314406: step: 354/463, loss: 0.17360574007034302 2023-01-24 00:52:13.911213: step: 356/463, loss: 0.7715097069740295 2023-01-24 00:52:14.508442: step: 358/463, loss: 0.24512378871440887 2023-01-24 00:52:15.056819: step: 360/463, loss: 0.25460466742515564 2023-01-24 00:52:15.755338: step: 362/463, loss: 0.30078959465026855 2023-01-24 00:52:16.399576: step: 364/463, loss: 0.34295371174812317 2023-01-24 00:52:16.989850: step: 366/463, loss: 0.14471924304962158 2023-01-24 00:52:17.658633: step: 368/463, loss: 0.39906489849090576 2023-01-24 00:52:18.339367: step: 370/463, loss: 1.0646107196807861 2023-01-24 00:52:18.992630: step: 372/463, loss: 0.25546663999557495 2023-01-24 00:52:19.684829: step: 374/463, loss: 0.45163822174072266 2023-01-24 00:52:20.323836: step: 376/463, loss: 0.2709403336048126 2023-01-24 00:52:20.933525: step: 378/463, loss: 0.18220201134681702 2023-01-24 00:52:21.600979: step: 380/463, loss: 0.2909991443157196 2023-01-24 00:52:22.195715: step: 382/463, loss: 0.353007048368454 2023-01-24 00:52:22.849256: step: 384/463, loss: 0.5811549425125122 2023-01-24 00:52:23.469228: step: 386/463, loss: 0.8028070330619812 2023-01-24 00:52:24.034246: step: 388/463, loss: 0.5155059099197388 2023-01-24 00:52:24.658210: step: 390/463, loss: 0.42461997270584106 2023-01-24 00:52:25.275165: step: 392/463, loss: 3.6070966720581055 2023-01-24 00:52:25.876019: step: 394/463, loss: 0.21848256886005402 2023-01-24 00:52:26.492592: step: 396/463, loss: 0.23974929749965668 2023-01-24 00:52:27.098063: step: 398/463, loss: 0.42405638098716736 2023-01-24 00:52:27.693787: step: 400/463, loss: 0.6365113854408264 2023-01-24 00:52:28.341867: step: 402/463, loss: 0.3297807276248932 2023-01-24 00:52:28.958309: step: 404/463, loss: 0.4948780834674835 2023-01-24 00:52:29.634280: step: 406/463, loss: 0.315847247838974 2023-01-24 00:52:30.250765: step: 408/463, loss: 0.14956313371658325 2023-01-24 00:52:30.930019: step: 410/463, loss: 0.6917096376419067 2023-01-24 00:52:31.556340: step: 412/463, loss: 0.20221228897571564 2023-01-24 00:52:32.231921: step: 414/463, loss: 0.36974620819091797 2023-01-24 00:52:32.877158: step: 416/463, loss: 1.0079822540283203 2023-01-24 00:52:33.513977: step: 418/463, loss: 0.3096442222595215 2023-01-24 00:52:34.219609: step: 420/463, loss: 0.38070785999298096 2023-01-24 00:52:34.839592: step: 422/463, loss: 0.5125380158424377 2023-01-24 00:52:35.513485: step: 424/463, loss: 0.16851523518562317 2023-01-24 00:52:36.103128: step: 426/463, loss: 0.17775863409042358 2023-01-24 00:52:36.740921: step: 428/463, loss: 1.3436264991760254 2023-01-24 00:52:37.392200: step: 430/463, loss: 0.46194779872894287 2023-01-24 00:52:38.018191: step: 432/463, loss: 0.7208782434463501 2023-01-24 00:52:38.625668: step: 434/463, loss: 0.7194353938102722 2023-01-24 00:52:39.224720: step: 436/463, loss: 0.20689809322357178 2023-01-24 00:52:39.941448: step: 438/463, loss: 0.40774598717689514 2023-01-24 00:52:40.567649: step: 440/463, loss: 0.34186121821403503 2023-01-24 00:52:41.207230: step: 442/463, loss: 0.454932302236557 2023-01-24 00:52:41.863154: step: 444/463, loss: 1.22999906539917 2023-01-24 00:52:42.488305: step: 446/463, loss: 0.21150189638137817 2023-01-24 00:52:43.182487: step: 448/463, loss: 0.43343502283096313 2023-01-24 00:52:43.770106: step: 450/463, loss: 3.3858485221862793 2023-01-24 00:52:44.376110: step: 452/463, loss: 0.4066624641418457 2023-01-24 00:52:44.996142: step: 454/463, loss: 0.8977017998695374 2023-01-24 00:52:45.630731: step: 456/463, loss: 0.2965715825557709 2023-01-24 00:52:46.287657: step: 458/463, loss: 0.247700035572052 2023-01-24 00:52:46.966995: step: 460/463, loss: 0.35015323758125305 2023-01-24 00:52:47.616718: step: 462/463, loss: 0.2099350392818451 2023-01-24 00:52:48.162017: step: 464/463, loss: 2.146547794342041 2023-01-24 00:52:48.759104: step: 466/463, loss: 0.8461926579475403 2023-01-24 00:52:49.419450: step: 468/463, loss: 0.28629907965660095 2023-01-24 00:52:50.013151: step: 470/463, loss: 0.2353653907775879 2023-01-24 00:52:50.642165: step: 472/463, loss: 0.28022244572639465 2023-01-24 00:52:51.230027: step: 474/463, loss: 0.8489611148834229 2023-01-24 00:52:51.824789: step: 476/463, loss: 0.8608915209770203 2023-01-24 00:52:52.423051: step: 478/463, loss: 0.3049798607826233 2023-01-24 00:52:53.081428: step: 480/463, loss: 0.23026049137115479 2023-01-24 00:52:53.808874: step: 482/463, loss: 0.7937565445899963 2023-01-24 00:52:54.442817: step: 484/463, loss: 0.6780728101730347 2023-01-24 00:52:55.036084: step: 486/463, loss: 0.15573105216026306 2023-01-24 00:52:55.697635: step: 488/463, loss: 0.6416550278663635 2023-01-24 00:52:56.310309: step: 490/463, loss: 0.34756866097450256 2023-01-24 00:52:57.002670: step: 492/463, loss: 0.46077483892440796 2023-01-24 00:52:57.583774: step: 494/463, loss: 0.5212734937667847 2023-01-24 00:52:58.304061: step: 496/463, loss: 0.7460431456565857 2023-01-24 00:52:58.996734: step: 498/463, loss: 0.1731736809015274 2023-01-24 00:52:59.650987: step: 500/463, loss: 1.0468392372131348 2023-01-24 00:53:00.273100: step: 502/463, loss: 0.2363441437482834 2023-01-24 00:53:00.886588: step: 504/463, loss: 0.27943772077560425 2023-01-24 00:53:01.621399: step: 506/463, loss: 0.25516173243522644 2023-01-24 00:53:02.257818: step: 508/463, loss: 0.21006213128566742 2023-01-24 00:53:02.878579: step: 510/463, loss: 0.17224839329719543 2023-01-24 00:53:03.504798: step: 512/463, loss: 0.6667054891586304 2023-01-24 00:53:04.175974: step: 514/463, loss: 0.5145998597145081 2023-01-24 00:53:04.810673: step: 516/463, loss: 0.2936624586582184 2023-01-24 00:53:05.409020: step: 518/463, loss: 1.0246702432632446 2023-01-24 00:53:06.134603: step: 520/463, loss: 0.3395323157310486 2023-01-24 00:53:06.725899: step: 522/463, loss: 0.1633838415145874 2023-01-24 00:53:07.338448: step: 524/463, loss: 0.7587984204292297 2023-01-24 00:53:07.938172: step: 526/463, loss: 0.15019561350345612 2023-01-24 00:53:08.508934: step: 528/463, loss: 0.8764554262161255 2023-01-24 00:53:09.175718: step: 530/463, loss: 0.48441052436828613 2023-01-24 00:53:09.762956: step: 532/463, loss: 0.3228605389595032 2023-01-24 00:53:10.402359: step: 534/463, loss: 0.2345363199710846 2023-01-24 00:53:11.045734: step: 536/463, loss: 0.47030070424079895 2023-01-24 00:53:11.678842: step: 538/463, loss: 1.6260746717453003 2023-01-24 00:53:12.321618: step: 540/463, loss: 0.3487606644630432 2023-01-24 00:53:12.955688: step: 542/463, loss: 0.5683432817459106 2023-01-24 00:53:13.610764: step: 544/463, loss: 1.0445327758789062 2023-01-24 00:53:14.218640: step: 546/463, loss: 0.52126544713974 2023-01-24 00:53:14.855374: step: 548/463, loss: 0.14662712812423706 2023-01-24 00:53:15.472601: step: 550/463, loss: 0.3969848155975342 2023-01-24 00:53:16.026297: step: 552/463, loss: 0.28085920214653015 2023-01-24 00:53:16.688683: step: 554/463, loss: 0.5541949272155762 2023-01-24 00:53:17.307461: step: 556/463, loss: 0.8852306008338928 2023-01-24 00:53:18.003142: step: 558/463, loss: 0.2770520746707916 2023-01-24 00:53:18.714903: step: 560/463, loss: 0.5369435548782349 2023-01-24 00:53:19.325780: step: 562/463, loss: 0.20169247686862946 2023-01-24 00:53:19.923857: step: 564/463, loss: 0.3182758688926697 2023-01-24 00:53:20.520962: step: 566/463, loss: 0.6117270588874817 2023-01-24 00:53:21.160454: step: 568/463, loss: 0.3761119842529297 2023-01-24 00:53:21.808387: step: 570/463, loss: 0.5531476140022278 2023-01-24 00:53:22.411220: step: 572/463, loss: 0.26792165637016296 2023-01-24 00:53:23.056630: step: 574/463, loss: 0.3079359233379364 2023-01-24 00:53:23.756243: step: 576/463, loss: 0.09021048992872238 2023-01-24 00:53:24.387182: step: 578/463, loss: 0.4560026526451111 2023-01-24 00:53:24.991520: step: 580/463, loss: 0.9412817358970642 2023-01-24 00:53:25.664463: step: 582/463, loss: 0.32238975167274475 2023-01-24 00:53:26.227659: step: 584/463, loss: 0.22451704740524292 2023-01-24 00:53:26.808977: step: 586/463, loss: 0.7485355734825134 2023-01-24 00:53:27.432679: step: 588/463, loss: 0.3757765591144562 2023-01-24 00:53:28.046996: step: 590/463, loss: 0.30013325810432434 2023-01-24 00:53:28.658675: step: 592/463, loss: 0.28425711393356323 2023-01-24 00:53:29.274999: step: 594/463, loss: 0.5399459600448608 2023-01-24 00:53:29.930556: step: 596/463, loss: 0.22150474786758423 2023-01-24 00:53:30.552934: step: 598/463, loss: 0.2701462507247925 2023-01-24 00:53:31.154134: step: 600/463, loss: 0.19972483813762665 2023-01-24 00:53:31.767101: step: 602/463, loss: 0.4586547911167145 2023-01-24 00:53:32.393254: step: 604/463, loss: 0.3901784420013428 2023-01-24 00:53:32.992432: step: 606/463, loss: 0.2278127670288086 2023-01-24 00:53:33.685879: step: 608/463, loss: 0.3906201124191284 2023-01-24 00:53:34.294034: step: 610/463, loss: 0.16159886121749878 2023-01-24 00:53:34.949175: step: 612/463, loss: 0.39694541692733765 2023-01-24 00:53:35.601914: step: 614/463, loss: 0.1280856430530548 2023-01-24 00:53:36.278714: step: 616/463, loss: 0.2246810346841812 2023-01-24 00:53:36.892713: step: 618/463, loss: 2.6703665256500244 2023-01-24 00:53:37.461493: step: 620/463, loss: 0.27411097288131714 2023-01-24 00:53:38.128142: step: 622/463, loss: 0.42829567193984985 2023-01-24 00:53:38.791570: step: 624/463, loss: 0.7449740171432495 2023-01-24 00:53:39.365631: step: 626/463, loss: 0.6394545435905457 2023-01-24 00:53:39.986197: step: 628/463, loss: 1.4328681230545044 2023-01-24 00:53:40.610881: step: 630/463, loss: 0.21688972413539886 2023-01-24 00:53:41.245090: step: 632/463, loss: 0.23573768138885498 2023-01-24 00:53:41.916167: step: 634/463, loss: 0.3699069023132324 2023-01-24 00:53:42.504700: step: 636/463, loss: 0.20608720183372498 2023-01-24 00:53:43.097104: step: 638/463, loss: 0.751802384853363 2023-01-24 00:53:43.700834: step: 640/463, loss: 0.17930588126182556 2023-01-24 00:53:44.329689: step: 642/463, loss: 1.1260582208633423 2023-01-24 00:53:44.944934: step: 644/463, loss: 0.47740891575813293 2023-01-24 00:53:45.537058: step: 646/463, loss: 0.32726845145225525 2023-01-24 00:53:46.118536: step: 648/463, loss: 0.4457055330276489 2023-01-24 00:53:46.687659: step: 650/463, loss: 0.4692555367946625 2023-01-24 00:53:47.371729: step: 652/463, loss: 0.38204291462898254 2023-01-24 00:53:47.938706: step: 654/463, loss: 0.10649622976779938 2023-01-24 00:53:48.497918: step: 656/463, loss: 0.6467968821525574 2023-01-24 00:53:49.173968: step: 658/463, loss: 3.5594067573547363 2023-01-24 00:53:49.819095: step: 660/463, loss: 0.2943882644176483 2023-01-24 00:53:50.461383: step: 662/463, loss: 0.45514634251594543 2023-01-24 00:53:51.079021: step: 664/463, loss: 0.6457360982894897 2023-01-24 00:53:51.809089: step: 666/463, loss: 0.38968369364738464 2023-01-24 00:53:52.432941: step: 668/463, loss: 0.7273297905921936 2023-01-24 00:53:53.114592: step: 670/463, loss: 0.9432954788208008 2023-01-24 00:53:53.692804: step: 672/463, loss: 0.511285126209259 2023-01-24 00:53:54.357505: step: 674/463, loss: 0.5174610018730164 2023-01-24 00:53:54.972707: step: 676/463, loss: 0.8758470416069031 2023-01-24 00:53:55.545204: step: 678/463, loss: 0.6177841424942017 2023-01-24 00:53:56.195050: step: 680/463, loss: 0.16760799288749695 2023-01-24 00:53:56.827142: step: 682/463, loss: 0.5053936839103699 2023-01-24 00:53:57.414379: step: 684/463, loss: 0.6580698490142822 2023-01-24 00:53:58.041669: step: 686/463, loss: 1.0233275890350342 2023-01-24 00:53:58.614976: step: 688/463, loss: 0.37378281354904175 2023-01-24 00:53:59.230432: step: 690/463, loss: 2.5256831645965576 2023-01-24 00:53:59.905145: step: 692/463, loss: 0.2835516631603241 2023-01-24 00:54:00.534313: step: 694/463, loss: 0.39075908064842224 2023-01-24 00:54:01.172106: step: 696/463, loss: 1.6244397163391113 2023-01-24 00:54:01.763455: step: 698/463, loss: 0.4946838319301605 2023-01-24 00:54:02.401048: step: 700/463, loss: 0.6236141324043274 2023-01-24 00:54:03.051214: step: 702/463, loss: 1.4703083038330078 2023-01-24 00:54:03.671400: step: 704/463, loss: 0.27198830246925354 2023-01-24 00:54:04.317718: step: 706/463, loss: 0.23540060222148895 2023-01-24 00:54:04.969931: step: 708/463, loss: 0.37853050231933594 2023-01-24 00:54:05.670528: step: 710/463, loss: 0.39998170733451843 2023-01-24 00:54:06.309840: step: 712/463, loss: 0.3821353018283844 2023-01-24 00:54:06.980596: step: 714/463, loss: 2.231764793395996 2023-01-24 00:54:07.629049: step: 716/463, loss: 1.0240801572799683 2023-01-24 00:54:08.288757: step: 718/463, loss: 0.32155483961105347 2023-01-24 00:54:08.956543: step: 720/463, loss: 0.45779508352279663 2023-01-24 00:54:09.567316: step: 722/463, loss: 0.5051276087760925 2023-01-24 00:54:10.272717: step: 724/463, loss: 0.20838335156440735 2023-01-24 00:54:10.876410: step: 726/463, loss: 0.2737368047237396 2023-01-24 00:54:11.537636: step: 728/463, loss: 1.008242130279541 2023-01-24 00:54:12.154285: step: 730/463, loss: 0.17207854986190796 2023-01-24 00:54:12.879014: step: 732/463, loss: 0.35649871826171875 2023-01-24 00:54:13.527837: step: 734/463, loss: 0.8441442847251892 2023-01-24 00:54:14.146725: step: 736/463, loss: 0.1971072256565094 2023-01-24 00:54:14.867671: step: 738/463, loss: 0.3960895538330078 2023-01-24 00:54:15.459572: step: 740/463, loss: 0.3293589949607849 2023-01-24 00:54:16.117396: step: 742/463, loss: 0.5418511033058167 2023-01-24 00:54:16.778325: step: 744/463, loss: 0.1348060518503189 2023-01-24 00:54:17.374892: step: 746/463, loss: 0.26601338386535645 2023-01-24 00:54:17.980965: step: 748/463, loss: 0.7992173433303833 2023-01-24 00:54:18.613593: step: 750/463, loss: 0.31622886657714844 2023-01-24 00:54:19.247466: step: 752/463, loss: 0.25814250111579895 2023-01-24 00:54:19.877494: step: 754/463, loss: 0.566559910774231 2023-01-24 00:54:20.524816: step: 756/463, loss: 0.13443303108215332 2023-01-24 00:54:21.150438: step: 758/463, loss: 0.2778433859348297 2023-01-24 00:54:21.756066: step: 760/463, loss: 0.17934758961200714 2023-01-24 00:54:22.396349: step: 762/463, loss: 0.5554709434509277 2023-01-24 00:54:23.081302: step: 764/463, loss: 0.3979393541812897 2023-01-24 00:54:23.630778: step: 766/463, loss: 0.5631006956100464 2023-01-24 00:54:24.296258: step: 768/463, loss: 1.1216202974319458 2023-01-24 00:54:24.907675: step: 770/463, loss: 0.9841573238372803 2023-01-24 00:54:25.550165: step: 772/463, loss: 0.44668784737586975 2023-01-24 00:54:26.183458: step: 774/463, loss: 0.7791368365287781 2023-01-24 00:54:26.799872: step: 776/463, loss: 0.1461644023656845 2023-01-24 00:54:27.436285: step: 778/463, loss: 0.9589164853096008 2023-01-24 00:54:28.154349: step: 780/463, loss: 0.8096201419830322 2023-01-24 00:54:28.765942: step: 782/463, loss: 0.4256063997745514 2023-01-24 00:54:29.354167: step: 784/463, loss: 0.7588926553726196 2023-01-24 00:54:29.977848: step: 786/463, loss: 1.454399824142456 2023-01-24 00:54:30.644121: step: 788/463, loss: 0.6575345993041992 2023-01-24 00:54:31.307930: step: 790/463, loss: 0.2303142249584198 2023-01-24 00:54:32.023993: step: 792/463, loss: 1.300675630569458 2023-01-24 00:54:32.692365: step: 794/463, loss: 0.3828582167625427 2023-01-24 00:54:33.358504: step: 796/463, loss: 0.4375804662704468 2023-01-24 00:54:34.013384: step: 798/463, loss: 0.1595740020275116 2023-01-24 00:54:34.640683: step: 800/463, loss: 0.2536550760269165 2023-01-24 00:54:35.237679: step: 802/463, loss: 0.2015850841999054 2023-01-24 00:54:35.926492: step: 804/463, loss: 0.2345610409975052 2023-01-24 00:54:36.601702: step: 806/463, loss: 0.16826623678207397 2023-01-24 00:54:37.236041: step: 808/463, loss: 0.2407093197107315 2023-01-24 00:54:37.849432: step: 810/463, loss: 8.192036628723145 2023-01-24 00:54:38.470564: step: 812/463, loss: 0.34899067878723145 2023-01-24 00:54:39.064915: step: 814/463, loss: 1.387427806854248 2023-01-24 00:54:39.740541: step: 816/463, loss: 0.9117158651351929 2023-01-24 00:54:40.406496: step: 818/463, loss: 1.0657923221588135 2023-01-24 00:54:40.980976: step: 820/463, loss: 0.421714186668396 2023-01-24 00:54:41.602952: step: 822/463, loss: 0.6935148239135742 2023-01-24 00:54:42.209656: step: 824/463, loss: 0.2468547224998474 2023-01-24 00:54:42.881226: step: 826/463, loss: 0.3636510968208313 2023-01-24 00:54:43.516905: step: 828/463, loss: 3.340468406677246 2023-01-24 00:54:44.215472: step: 830/463, loss: 0.29835495352745056 2023-01-24 00:54:44.861008: step: 832/463, loss: 0.9196963310241699 2023-01-24 00:54:45.472649: step: 834/463, loss: 0.27401408553123474 2023-01-24 00:54:46.058945: step: 836/463, loss: 0.21896766126155853 2023-01-24 00:54:46.634134: step: 838/463, loss: 0.44227316975593567 2023-01-24 00:54:47.233362: step: 840/463, loss: 0.15010488033294678 2023-01-24 00:54:47.904301: step: 842/463, loss: 0.6847964525222778 2023-01-24 00:54:48.475109: step: 844/463, loss: 0.6540997624397278 2023-01-24 00:54:49.099359: step: 846/463, loss: 0.28418341279029846 2023-01-24 00:54:49.776430: step: 848/463, loss: 0.677936851978302 2023-01-24 00:54:50.476150: step: 850/463, loss: 0.15395225584506989 2023-01-24 00:54:51.178288: step: 852/463, loss: 0.5777564644813538 2023-01-24 00:54:51.849141: step: 854/463, loss: 0.5975735783576965 2023-01-24 00:54:52.460983: step: 856/463, loss: 1.5409783124923706 2023-01-24 00:54:53.069887: step: 858/463, loss: 0.7472043633460999 2023-01-24 00:54:53.684580: step: 860/463, loss: 0.19810639321804047 2023-01-24 00:54:54.350424: step: 862/463, loss: 0.3365834355354309 2023-01-24 00:54:55.004368: step: 864/463, loss: 0.4925359785556793 2023-01-24 00:54:55.594898: step: 866/463, loss: 0.6772522926330566 2023-01-24 00:54:56.140083: step: 868/463, loss: 0.5169462561607361 2023-01-24 00:54:56.752716: step: 870/463, loss: 0.7759062051773071 2023-01-24 00:54:57.363235: step: 872/463, loss: 0.2759900391101837 2023-01-24 00:54:57.955071: step: 874/463, loss: 0.8977956771850586 2023-01-24 00:54:58.576392: step: 876/463, loss: 0.5913708806037903 2023-01-24 00:54:59.196608: step: 878/463, loss: 0.8133429884910583 2023-01-24 00:54:59.818089: step: 880/463, loss: 0.09968496114015579 2023-01-24 00:55:00.457595: step: 882/463, loss: 0.18211953341960907 2023-01-24 00:55:01.059396: step: 884/463, loss: 0.2955639958381653 2023-01-24 00:55:01.660872: step: 886/463, loss: 0.423570454120636 2023-01-24 00:55:02.284387: step: 888/463, loss: 0.605933427810669 2023-01-24 00:55:02.946099: step: 890/463, loss: 0.6295936107635498 2023-01-24 00:55:03.582375: step: 892/463, loss: 0.33904340863227844 2023-01-24 00:55:04.231684: step: 894/463, loss: 0.09033072739839554 2023-01-24 00:55:04.819120: step: 896/463, loss: 0.3019601106643677 2023-01-24 00:55:05.462346: step: 898/463, loss: 0.28671038150787354 2023-01-24 00:55:06.014423: step: 900/463, loss: 0.21940843760967255 2023-01-24 00:55:06.705650: step: 902/463, loss: 0.136872336268425 2023-01-24 00:55:07.302605: step: 904/463, loss: 0.3400750160217285 2023-01-24 00:55:07.902133: step: 906/463, loss: 1.4074058532714844 2023-01-24 00:55:08.513441: step: 908/463, loss: 0.5688105821609497 2023-01-24 00:55:09.134501: step: 910/463, loss: 0.9753573536872864 2023-01-24 00:55:09.738516: step: 912/463, loss: 0.19331412017345428 2023-01-24 00:55:10.374569: step: 914/463, loss: 0.40865567326545715 2023-01-24 00:55:10.989539: step: 916/463, loss: 0.12868818640708923 2023-01-24 00:55:11.603446: step: 918/463, loss: 0.519225001335144 2023-01-24 00:55:12.210604: step: 920/463, loss: 0.20459315180778503 2023-01-24 00:55:12.835259: step: 922/463, loss: 0.29174530506134033 2023-01-24 00:55:13.456128: step: 924/463, loss: 1.4248042106628418 2023-01-24 00:55:14.084269: step: 926/463, loss: 0.5768977403640747 ================================================== Loss: 0.601 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32307611201461417, 'r': 0.3200108737602061, 'f1': 0.3215361877438105}, 'combined': 0.2369214014954393, 'epoch': 7} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33566514278692416, 'r': 0.3695396067378982, 'f1': 0.35178879593389434}, 'combined': 0.27267839684828177, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2949925095993484, 'r': 0.3190621071567905, 'f1': 0.30655557059549426}, 'combined': 0.2258830520177326, 'epoch': 7} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3201835155145725, 'r': 0.37100163311459183, 'f1': 0.34372441997017006}, 'combined': 0.26642754083812226, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2990214562095038, 'r': 0.31036951906375443, 'f1': 0.30458982597131956}, 'combined': 0.22443460861044598, 'epoch': 7} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3236893788502422, 'r': 0.3652641614548604, 'f1': 0.3432223586084464}, 'combined': 0.26603838322759965, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30067567567567566, 'r': 0.31785714285714284, 'f1': 0.3090277777777778}, 'combined': 0.20601851851851852, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2980769230769231, 'r': 0.33695652173913043, 'f1': 0.31632653061224486}, 'combined': 0.15816326530612243, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.13793103448275862, 'f1': 0.2}, 'combined': 0.13333333333333333, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3664599181442166, 'r': 0.3122210687794179, 'f1': 0.33717316239088774}, 'combined': 0.2484433828143383, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3655563493147106, 'r': 0.32203773630105453, 'f1': 0.34241987150998204}, 'combined': 0.26541635973501004, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42857142857142855, 'r': 0.2571428571428571, 'f1': 0.3214285714285714}, 'combined': 0.21428571428571425, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3283040393999639, 'r': 0.31335281179920654, 'f1': 0.3206542365401589}, 'combined': 0.23627154271380127, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34970123997901775, 'r': 0.3199189915192662, 'f1': 0.3341478132367659}, 'combined': 0.2590045250926128, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38, 'r': 0.41304347826086957, 'f1': 0.39583333333333337}, 'combined': 0.19791666666666669, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32326799298325637, 'r': 0.29632899356798503, 'f1': 0.30921286285354954}, 'combined': 0.22784105683945755, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32287388038511977, 'r': 0.3347442436345727, 'f1': 0.32870192876752263}, 'combined': 0.2547833132073621, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.59375, 'r': 0.16379310344827586, 'f1': 0.25675675675675674}, 'combined': 0.17117117117117114, 'epoch': 5} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:57:53.404974: step: 2/463, loss: 0.27454549074172974 2023-01-24 00:57:54.020245: step: 4/463, loss: 0.1212196797132492 2023-01-24 00:57:54.595133: step: 6/463, loss: 0.1886449158191681 2023-01-24 00:57:55.223134: step: 8/463, loss: 0.9557496309280396 2023-01-24 00:57:55.838463: step: 10/463, loss: 0.3028426468372345 2023-01-24 00:57:56.460986: step: 12/463, loss: 0.17726431787014008 2023-01-24 00:57:57.088523: step: 14/463, loss: 0.7418721318244934 2023-01-24 00:57:57.632854: step: 16/463, loss: 0.23382455110549927 2023-01-24 00:57:58.267009: step: 18/463, loss: 0.34230974316596985 2023-01-24 00:57:58.939045: step: 20/463, loss: 0.31475967168807983 2023-01-24 00:57:59.683323: step: 22/463, loss: 0.5941904187202454 2023-01-24 00:58:00.442431: step: 24/463, loss: 0.079100102186203 2023-01-24 00:58:01.045535: step: 26/463, loss: 0.5590833425521851 2023-01-24 00:58:01.725719: step: 28/463, loss: 0.613706111907959 2023-01-24 00:58:02.317848: step: 30/463, loss: 0.08005934208631516 2023-01-24 00:58:02.859171: step: 32/463, loss: 0.31207549571990967 2023-01-24 00:58:03.507129: step: 34/463, loss: 0.18248826265335083 2023-01-24 00:58:04.081170: step: 36/463, loss: 0.43410515785217285 2023-01-24 00:58:04.729870: step: 38/463, loss: 0.16809117794036865 2023-01-24 00:58:05.332064: step: 40/463, loss: 0.14377035200595856 2023-01-24 00:58:05.928936: step: 42/463, loss: 0.21317505836486816 2023-01-24 00:58:06.676308: step: 44/463, loss: 0.29171112179756165 2023-01-24 00:58:07.324715: step: 46/463, loss: 0.2774196267127991 2023-01-24 00:58:07.958820: step: 48/463, loss: 0.32932984828948975 2023-01-24 00:58:08.607352: step: 50/463, loss: 0.14370252192020416 2023-01-24 00:58:09.233210: step: 52/463, loss: 0.10232265293598175 2023-01-24 00:58:09.858157: step: 54/463, loss: 0.19507911801338196 2023-01-24 00:58:10.661018: step: 56/463, loss: 0.35276392102241516 2023-01-24 00:58:11.326703: step: 58/463, loss: 0.6074838042259216 2023-01-24 00:58:11.926315: step: 60/463, loss: 0.28890860080718994 2023-01-24 00:58:12.521583: step: 62/463, loss: 0.16480675339698792 2023-01-24 00:58:13.147521: step: 64/463, loss: 0.30417776107788086 2023-01-24 00:58:13.868330: step: 66/463, loss: 0.4949904978275299 2023-01-24 00:58:14.518172: step: 68/463, loss: 0.23410335183143616 2023-01-24 00:58:15.249065: step: 70/463, loss: 0.3565497100353241 2023-01-24 00:58:15.883224: step: 72/463, loss: 0.7579011917114258 2023-01-24 00:58:16.550956: step: 74/463, loss: 0.19558286666870117 2023-01-24 00:58:17.246997: step: 76/463, loss: 0.38547712564468384 2023-01-24 00:58:17.893862: step: 78/463, loss: 0.6572482585906982 2023-01-24 00:58:18.475746: step: 80/463, loss: 0.24175013601779938 2023-01-24 00:58:19.091557: step: 82/463, loss: 0.11045575141906738 2023-01-24 00:58:19.697967: step: 84/463, loss: 0.5427460074424744 2023-01-24 00:58:20.335108: step: 86/463, loss: 0.488781213760376 2023-01-24 00:58:20.961725: step: 88/463, loss: 0.14809972047805786 2023-01-24 00:58:21.597162: step: 90/463, loss: 0.48586544394493103 2023-01-24 00:58:22.268523: step: 92/463, loss: 1.466514229774475 2023-01-24 00:58:22.883850: step: 94/463, loss: 0.10293036699295044 2023-01-24 00:58:23.512237: step: 96/463, loss: 0.5894548892974854 2023-01-24 00:58:24.081418: step: 98/463, loss: 0.3123920261859894 2023-01-24 00:58:24.718566: step: 100/463, loss: 0.5899331569671631 2023-01-24 00:58:25.360680: step: 102/463, loss: 0.7128351330757141 2023-01-24 00:58:25.938384: step: 104/463, loss: 0.28514599800109863 2023-01-24 00:58:26.548851: step: 106/463, loss: 0.122963547706604 2023-01-24 00:58:27.143949: step: 108/463, loss: 0.165252685546875 2023-01-24 00:58:27.769253: step: 110/463, loss: 0.997168779373169 2023-01-24 00:58:28.426878: step: 112/463, loss: 0.15040598809719086 2023-01-24 00:58:29.043370: step: 114/463, loss: 0.1996837854385376 2023-01-24 00:58:29.694274: step: 116/463, loss: 0.38388633728027344 2023-01-24 00:58:30.285231: step: 118/463, loss: 0.2027028352022171 2023-01-24 00:58:30.919356: step: 120/463, loss: 1.100383996963501 2023-01-24 00:58:31.576565: step: 122/463, loss: 0.656183123588562 2023-01-24 00:58:32.211402: step: 124/463, loss: 0.331216037273407 2023-01-24 00:58:32.888519: step: 126/463, loss: 0.1319654881954193 2023-01-24 00:58:33.477759: step: 128/463, loss: 0.5940644145011902 2023-01-24 00:58:34.083841: step: 130/463, loss: 0.27870550751686096 2023-01-24 00:58:34.690678: step: 132/463, loss: 0.38014325499534607 2023-01-24 00:58:35.294496: step: 134/463, loss: 0.3247123956680298 2023-01-24 00:58:35.893873: step: 136/463, loss: 0.5922228693962097 2023-01-24 00:58:36.454765: step: 138/463, loss: 0.36510688066482544 2023-01-24 00:58:37.129768: step: 140/463, loss: 0.632095456123352 2023-01-24 00:58:37.697331: step: 142/463, loss: 0.49989303946495056 2023-01-24 00:58:38.235993: step: 144/463, loss: 0.16780757904052734 2023-01-24 00:58:38.926931: step: 146/463, loss: 0.5040149092674255 2023-01-24 00:58:39.602492: step: 148/463, loss: 1.149385929107666 2023-01-24 00:58:40.131641: step: 150/463, loss: 0.3881452977657318 2023-01-24 00:58:40.735522: step: 152/463, loss: 0.6306591033935547 2023-01-24 00:58:41.475988: step: 154/463, loss: 0.914454460144043 2023-01-24 00:58:42.104349: step: 156/463, loss: 0.6722761988639832 2023-01-24 00:58:42.763273: step: 158/463, loss: 0.4319745600223541 2023-01-24 00:58:43.393709: step: 160/463, loss: 0.3853481411933899 2023-01-24 00:58:44.027029: step: 162/463, loss: 0.2538663446903229 2023-01-24 00:58:44.603057: step: 164/463, loss: 0.30353406071662903 2023-01-24 00:58:45.181170: step: 166/463, loss: 0.6068345308303833 2023-01-24 00:58:45.783428: step: 168/463, loss: 0.2134595662355423 2023-01-24 00:58:46.404608: step: 170/463, loss: 0.38325822353363037 2023-01-24 00:58:47.058590: step: 172/463, loss: 0.281634122133255 2023-01-24 00:58:47.711925: step: 174/463, loss: 0.5745058655738831 2023-01-24 00:58:48.316169: step: 176/463, loss: 0.1763850301504135 2023-01-24 00:58:48.963632: step: 178/463, loss: 0.21464592218399048 2023-01-24 00:58:49.593131: step: 180/463, loss: 0.37111854553222656 2023-01-24 00:58:50.161638: step: 182/463, loss: 0.275806725025177 2023-01-24 00:58:50.777188: step: 184/463, loss: 0.764906108379364 2023-01-24 00:58:51.464990: step: 186/463, loss: 1.3192195892333984 2023-01-24 00:58:52.088202: step: 188/463, loss: 0.1547672301530838 2023-01-24 00:58:52.690111: step: 190/463, loss: 0.2144608497619629 2023-01-24 00:58:53.287198: step: 192/463, loss: 0.21921630203723907 2023-01-24 00:58:53.871042: step: 194/463, loss: 0.23773063719272614 2023-01-24 00:58:54.424544: step: 196/463, loss: 0.24143753945827484 2023-01-24 00:58:55.064014: step: 198/463, loss: 0.215574249625206 2023-01-24 00:58:55.690279: step: 200/463, loss: 0.19480891525745392 2023-01-24 00:58:56.312934: step: 202/463, loss: 0.20009520649909973 2023-01-24 00:58:56.914312: step: 204/463, loss: 0.21734221279621124 2023-01-24 00:58:57.544968: step: 206/463, loss: 0.1972508430480957 2023-01-24 00:58:58.134659: step: 208/463, loss: 0.4554538428783417 2023-01-24 00:58:58.735282: step: 210/463, loss: 0.19008655846118927 2023-01-24 00:58:59.300804: step: 212/463, loss: 0.23899120092391968 2023-01-24 00:58:59.886682: step: 214/463, loss: 0.1570320874452591 2023-01-24 00:59:00.574705: step: 216/463, loss: 1.5704302787780762 2023-01-24 00:59:01.281045: step: 218/463, loss: 0.22331584990024567 2023-01-24 00:59:01.928975: step: 220/463, loss: 0.23334158957004547 2023-01-24 00:59:02.501906: step: 222/463, loss: 0.2397872358560562 2023-01-24 00:59:03.144592: step: 224/463, loss: 0.3666159510612488 2023-01-24 00:59:03.760770: step: 226/463, loss: 0.39328733086586 2023-01-24 00:59:04.425156: step: 228/463, loss: 0.27437376976013184 2023-01-24 00:59:05.101176: step: 230/463, loss: 0.3906311094760895 2023-01-24 00:59:05.852456: step: 232/463, loss: 0.51536625623703 2023-01-24 00:59:06.451435: step: 234/463, loss: 0.1613476425409317 2023-01-24 00:59:07.137833: step: 236/463, loss: 0.4213184416294098 2023-01-24 00:59:07.701746: step: 238/463, loss: 1.1637166738510132 2023-01-24 00:59:08.330387: step: 240/463, loss: 0.8391563296318054 2023-01-24 00:59:08.982162: step: 242/463, loss: 0.6976143717765808 2023-01-24 00:59:09.614228: step: 244/463, loss: 0.24530336260795593 2023-01-24 00:59:10.280994: step: 246/463, loss: 0.7133917808532715 2023-01-24 00:59:10.871651: step: 248/463, loss: 0.42089614272117615 2023-01-24 00:59:11.533816: step: 250/463, loss: 0.19285614788532257 2023-01-24 00:59:12.165920: step: 252/463, loss: 0.1550258994102478 2023-01-24 00:59:12.755483: step: 254/463, loss: 0.5059755444526672 2023-01-24 00:59:13.436580: step: 256/463, loss: 0.16709062457084656 2023-01-24 00:59:14.026094: step: 258/463, loss: 0.6122788786888123 2023-01-24 00:59:14.593005: step: 260/463, loss: 0.23059485852718353 2023-01-24 00:59:15.205083: step: 262/463, loss: 0.13720940053462982 2023-01-24 00:59:15.865110: step: 264/463, loss: 0.4587840437889099 2023-01-24 00:59:16.488895: step: 266/463, loss: 0.13675425946712494 2023-01-24 00:59:17.117792: step: 268/463, loss: 0.24087047576904297 2023-01-24 00:59:17.729242: step: 270/463, loss: 0.243925541639328 2023-01-24 00:59:18.359402: step: 272/463, loss: 0.8294358253479004 2023-01-24 00:59:18.921524: step: 274/463, loss: 0.20199105143547058 2023-01-24 00:59:19.538087: step: 276/463, loss: 1.4911930561065674 2023-01-24 00:59:20.224675: step: 278/463, loss: 0.3227616548538208 2023-01-24 00:59:20.856605: step: 280/463, loss: 0.47269970178604126 2023-01-24 00:59:21.513583: step: 282/463, loss: 0.19193769991397858 2023-01-24 00:59:22.186049: step: 284/463, loss: 1.3312082290649414 2023-01-24 00:59:22.752993: step: 286/463, loss: 1.020970344543457 2023-01-24 00:59:23.323798: step: 288/463, loss: 0.3713074326515198 2023-01-24 00:59:23.907084: step: 290/463, loss: 0.46999895572662354 2023-01-24 00:59:24.527731: step: 292/463, loss: 0.09279989451169968 2023-01-24 00:59:25.099588: step: 294/463, loss: 0.28854042291641235 2023-01-24 00:59:25.695196: step: 296/463, loss: 0.32105016708374023 2023-01-24 00:59:26.302288: step: 298/463, loss: 0.5700692534446716 2023-01-24 00:59:26.863827: step: 300/463, loss: 2.2373642921447754 2023-01-24 00:59:27.509080: step: 302/463, loss: 0.48219946026802063 2023-01-24 00:59:28.141724: step: 304/463, loss: 0.19304844737052917 2023-01-24 00:59:28.752913: step: 306/463, loss: 0.32953280210494995 2023-01-24 00:59:29.327584: step: 308/463, loss: 0.3513036370277405 2023-01-24 00:59:29.866321: step: 310/463, loss: 0.1558876484632492 2023-01-24 00:59:30.448961: step: 312/463, loss: 0.3405417501926422 2023-01-24 00:59:31.020898: step: 314/463, loss: 0.5260372757911682 2023-01-24 00:59:31.626296: step: 316/463, loss: 0.18853680789470673 2023-01-24 00:59:32.203928: step: 318/463, loss: 0.18867424130439758 2023-01-24 00:59:32.869251: step: 320/463, loss: 0.13986973464488983 2023-01-24 00:59:33.532733: step: 322/463, loss: 0.16702282428741455 2023-01-24 00:59:34.175498: step: 324/463, loss: 0.24092090129852295 2023-01-24 00:59:34.777403: step: 326/463, loss: 0.16718487441539764 2023-01-24 00:59:35.353848: step: 328/463, loss: 1.4338083267211914 2023-01-24 00:59:35.971493: step: 330/463, loss: 0.5060054063796997 2023-01-24 00:59:36.598824: step: 332/463, loss: 0.2249811738729477 2023-01-24 00:59:37.170727: step: 334/463, loss: 0.12821589410305023 2023-01-24 00:59:37.746768: step: 336/463, loss: 0.25745388865470886 2023-01-24 00:59:38.333041: step: 338/463, loss: 0.20372295379638672 2023-01-24 00:59:38.935314: step: 340/463, loss: 0.35935333371162415 2023-01-24 00:59:39.548485: step: 342/463, loss: 0.1503312736749649 2023-01-24 00:59:40.164878: step: 344/463, loss: 0.39330172538757324 2023-01-24 00:59:40.759751: step: 346/463, loss: 0.31189626455307007 2023-01-24 00:59:41.317642: step: 348/463, loss: 0.25170210003852844 2023-01-24 00:59:41.938166: step: 350/463, loss: 0.1842184215784073 2023-01-24 00:59:42.616738: step: 352/463, loss: 0.30135318636894226 2023-01-24 00:59:43.200516: step: 354/463, loss: 0.15483951568603516 2023-01-24 00:59:43.845479: step: 356/463, loss: 0.3748832643032074 2023-01-24 00:59:44.469453: step: 358/463, loss: 0.15046001970767975 2023-01-24 00:59:45.122297: step: 360/463, loss: 0.6785732507705688 2023-01-24 00:59:45.761558: step: 362/463, loss: 0.46878835558891296 2023-01-24 00:59:46.359531: step: 364/463, loss: 0.14611150324344635 2023-01-24 00:59:46.951416: step: 366/463, loss: 0.20062783360481262 2023-01-24 00:59:47.714231: step: 368/463, loss: 1.4893033504486084 2023-01-24 00:59:48.319978: step: 370/463, loss: 0.3624162971973419 2023-01-24 00:59:48.902173: step: 372/463, loss: 0.22823302447795868 2023-01-24 00:59:49.507126: step: 374/463, loss: 0.47888925671577454 2023-01-24 00:59:50.176353: step: 376/463, loss: 0.2142326831817627 2023-01-24 00:59:50.779859: step: 378/463, loss: 0.08842132240533829 2023-01-24 00:59:51.329371: step: 380/463, loss: 0.20132502913475037 2023-01-24 00:59:51.944545: step: 382/463, loss: 0.5702792406082153 2023-01-24 00:59:52.539310: step: 384/463, loss: 0.2645527124404907 2023-01-24 00:59:53.132348: step: 386/463, loss: 0.2643909156322479 2023-01-24 00:59:53.712670: step: 388/463, loss: 0.11464519798755646 2023-01-24 00:59:54.268364: step: 390/463, loss: 1.2405376434326172 2023-01-24 00:59:54.933734: step: 392/463, loss: 1.8353369235992432 2023-01-24 00:59:55.565658: step: 394/463, loss: 0.19126446545124054 2023-01-24 00:59:56.153740: step: 396/463, loss: 0.18591001629829407 2023-01-24 00:59:56.741810: step: 398/463, loss: 0.6155804991722107 2023-01-24 00:59:57.366916: step: 400/463, loss: 0.6110813021659851 2023-01-24 00:59:57.935350: step: 402/463, loss: 0.3434958755970001 2023-01-24 00:59:58.475718: step: 404/463, loss: 0.2551930546760559 2023-01-24 00:59:59.123653: step: 406/463, loss: 0.4357593357563019 2023-01-24 00:59:59.677746: step: 408/463, loss: 0.34898892045021057 2023-01-24 01:00:00.267396: step: 410/463, loss: 0.17367421090602875 2023-01-24 01:00:00.894654: step: 412/463, loss: 0.26134827733039856 2023-01-24 01:00:01.509907: step: 414/463, loss: 0.3092823326587677 2023-01-24 01:00:02.102439: step: 416/463, loss: 7.929866790771484 2023-01-24 01:00:02.703759: step: 418/463, loss: 0.24986934661865234 2023-01-24 01:00:03.289716: step: 420/463, loss: 0.1763705015182495 2023-01-24 01:00:03.899750: step: 422/463, loss: 0.29315638542175293 2023-01-24 01:00:04.556869: step: 424/463, loss: 1.7149840593338013 2023-01-24 01:00:05.178894: step: 426/463, loss: 0.6248399019241333 2023-01-24 01:00:05.804189: step: 428/463, loss: 0.32113632559776306 2023-01-24 01:00:06.559857: step: 430/463, loss: 0.435170441865921 2023-01-24 01:00:07.197140: step: 432/463, loss: 0.17764750123023987 2023-01-24 01:00:07.804673: step: 434/463, loss: 0.7798184156417847 2023-01-24 01:00:08.393359: step: 436/463, loss: 0.49483349919319153 2023-01-24 01:00:09.009692: step: 438/463, loss: 0.1810191422700882 2023-01-24 01:00:09.548944: step: 440/463, loss: 0.20566925406455994 2023-01-24 01:00:10.146492: step: 442/463, loss: 0.19017474353313446 2023-01-24 01:00:10.706885: step: 444/463, loss: 3.432640552520752 2023-01-24 01:00:11.325166: step: 446/463, loss: 0.23029492795467377 2023-01-24 01:00:11.898449: step: 448/463, loss: 0.2634607255458832 2023-01-24 01:00:12.536540: step: 450/463, loss: 1.443914771080017 2023-01-24 01:00:13.200999: step: 452/463, loss: 0.16821452975273132 2023-01-24 01:00:13.812325: step: 454/463, loss: 0.4631841480731964 2023-01-24 01:00:14.435556: step: 456/463, loss: 0.423465758562088 2023-01-24 01:00:15.071680: step: 458/463, loss: 0.6958268880844116 2023-01-24 01:00:15.698259: step: 460/463, loss: 0.15607315301895142 2023-01-24 01:00:16.262179: step: 462/463, loss: 0.39441296458244324 2023-01-24 01:00:16.877529: step: 464/463, loss: 0.47144651412963867 2023-01-24 01:00:17.475143: step: 466/463, loss: 0.2517615258693695 2023-01-24 01:00:18.082914: step: 468/463, loss: 0.3042495846748352 2023-01-24 01:00:18.697403: step: 470/463, loss: 0.7725745439529419 2023-01-24 01:00:19.293391: step: 472/463, loss: 0.1470322161912918 2023-01-24 01:00:19.987329: step: 474/463, loss: 0.044164594262838364 2023-01-24 01:00:20.596115: step: 476/463, loss: 0.06672929972410202 2023-01-24 01:00:21.212483: step: 478/463, loss: 0.5192210674285889 2023-01-24 01:00:21.817080: step: 480/463, loss: 0.2355055958032608 2023-01-24 01:00:22.483307: step: 482/463, loss: 0.33371806144714355 2023-01-24 01:00:23.090662: step: 484/463, loss: 1.575055480003357 2023-01-24 01:00:23.722823: step: 486/463, loss: 0.30354687571525574 2023-01-24 01:00:24.335844: step: 488/463, loss: 0.3030671179294586 2023-01-24 01:00:25.014284: step: 490/463, loss: 0.4810742735862732 2023-01-24 01:00:25.674406: step: 492/463, loss: 0.34197038412094116 2023-01-24 01:00:26.293032: step: 494/463, loss: 0.3498307764530182 2023-01-24 01:00:27.017470: step: 496/463, loss: 0.468900203704834 2023-01-24 01:00:27.618052: step: 498/463, loss: 0.4007844626903534 2023-01-24 01:00:28.232192: step: 500/463, loss: 0.20504020154476166 2023-01-24 01:00:28.833892: step: 502/463, loss: 0.3164767026901245 2023-01-24 01:00:29.420832: step: 504/463, loss: 0.7533845901489258 2023-01-24 01:00:30.005804: step: 506/463, loss: 0.23010562360286713 2023-01-24 01:00:30.659853: step: 508/463, loss: 0.4565170407295227 2023-01-24 01:00:31.240838: step: 510/463, loss: 0.28601327538490295 2023-01-24 01:00:31.911146: step: 512/463, loss: 0.12773257493972778 2023-01-24 01:00:32.579637: step: 514/463, loss: 0.4118571877479553 2023-01-24 01:00:33.139649: step: 516/463, loss: 0.20072387158870697 2023-01-24 01:00:33.708975: step: 518/463, loss: 0.16222453117370605 2023-01-24 01:00:34.318299: step: 520/463, loss: 0.33557742834091187 2023-01-24 01:00:34.957431: step: 522/463, loss: 0.8582131266593933 2023-01-24 01:00:35.582273: step: 524/463, loss: 0.19500266015529633 2023-01-24 01:00:36.220356: step: 526/463, loss: 0.48093193769454956 2023-01-24 01:00:36.839818: step: 528/463, loss: 0.22190040349960327 2023-01-24 01:00:37.456436: step: 530/463, loss: 1.0980900526046753 2023-01-24 01:00:38.074070: step: 532/463, loss: 0.8851344585418701 2023-01-24 01:00:38.679977: step: 534/463, loss: 0.30034351348876953 2023-01-24 01:00:39.241169: step: 536/463, loss: 4.212319374084473 2023-01-24 01:00:39.877240: step: 538/463, loss: 0.3674173355102539 2023-01-24 01:00:40.536640: step: 540/463, loss: 0.3275724947452545 2023-01-24 01:00:41.134008: step: 542/463, loss: 0.10442414879798889 2023-01-24 01:00:41.734562: step: 544/463, loss: 0.13089825212955475 2023-01-24 01:00:42.356952: step: 546/463, loss: 0.13757087290287018 2023-01-24 01:00:42.987130: step: 548/463, loss: 0.15106546878814697 2023-01-24 01:00:43.595420: step: 550/463, loss: 0.25607678294181824 2023-01-24 01:00:44.164366: step: 552/463, loss: 0.616750955581665 2023-01-24 01:00:44.852696: step: 554/463, loss: 1.3699512481689453 2023-01-24 01:00:45.418701: step: 556/463, loss: 0.05203315615653992 2023-01-24 01:00:46.067457: step: 558/463, loss: 0.26425009965896606 2023-01-24 01:00:46.696481: step: 560/463, loss: 0.4489101469516754 2023-01-24 01:00:47.295062: step: 562/463, loss: 3.074275493621826 2023-01-24 01:00:47.841451: step: 564/463, loss: 0.17674091458320618 2023-01-24 01:00:48.416631: step: 566/463, loss: 0.2708001136779785 2023-01-24 01:00:49.073444: step: 568/463, loss: 0.48294466733932495 2023-01-24 01:00:49.688825: step: 570/463, loss: 0.14365455508232117 2023-01-24 01:00:50.325628: step: 572/463, loss: 0.1714448779821396 2023-01-24 01:00:50.924803: step: 574/463, loss: 0.471382200717926 2023-01-24 01:00:51.675168: step: 576/463, loss: 0.5601073503494263 2023-01-24 01:00:52.318761: step: 578/463, loss: 1.8641855716705322 2023-01-24 01:00:52.934998: step: 580/463, loss: 0.35316845774650574 2023-01-24 01:00:53.575030: step: 582/463, loss: 0.25862613320350647 2023-01-24 01:00:54.185654: step: 584/463, loss: 0.22035980224609375 2023-01-24 01:00:54.801168: step: 586/463, loss: 0.299435555934906 2023-01-24 01:00:55.402532: step: 588/463, loss: 0.31494027376174927 2023-01-24 01:00:56.048644: step: 590/463, loss: 0.2907198965549469 2023-01-24 01:00:56.674332: step: 592/463, loss: 0.41593366861343384 2023-01-24 01:00:57.292049: step: 594/463, loss: 0.17134374380111694 2023-01-24 01:00:57.952638: step: 596/463, loss: 0.9272217750549316 2023-01-24 01:00:58.515254: step: 598/463, loss: 1.0068892240524292 2023-01-24 01:00:59.252794: step: 600/463, loss: 0.6083629131317139 2023-01-24 01:00:59.883013: step: 602/463, loss: 0.06725073605775833 2023-01-24 01:01:00.516137: step: 604/463, loss: 0.6282367706298828 2023-01-24 01:01:01.093757: step: 606/463, loss: 0.483088880777359 2023-01-24 01:01:01.686755: step: 608/463, loss: 0.2649013102054596 2023-01-24 01:01:02.263987: step: 610/463, loss: 0.851344883441925 2023-01-24 01:01:02.884714: step: 612/463, loss: 0.14235958456993103 2023-01-24 01:01:03.506112: step: 614/463, loss: 0.27428579330444336 2023-01-24 01:01:04.196028: step: 616/463, loss: 0.6611388325691223 2023-01-24 01:01:04.891125: step: 618/463, loss: 0.4034227132797241 2023-01-24 01:01:05.494472: step: 620/463, loss: 0.17095880210399628 2023-01-24 01:01:06.082674: step: 622/463, loss: 0.21051353216171265 2023-01-24 01:01:06.801306: step: 624/463, loss: 0.2529064118862152 2023-01-24 01:01:07.444923: step: 626/463, loss: 0.31142446398735046 2023-01-24 01:01:08.065830: step: 628/463, loss: 0.20614519715309143 2023-01-24 01:01:08.677288: step: 630/463, loss: 0.229752779006958 2023-01-24 01:01:09.252780: step: 632/463, loss: 0.1111738383769989 2023-01-24 01:01:09.832942: step: 634/463, loss: 0.8112533688545227 2023-01-24 01:01:10.458408: step: 636/463, loss: 0.32893380522727966 2023-01-24 01:01:11.056223: step: 638/463, loss: 0.4370189905166626 2023-01-24 01:01:11.617951: step: 640/463, loss: 0.32176172733306885 2023-01-24 01:01:12.275377: step: 642/463, loss: 0.4488545358181 2023-01-24 01:01:12.874032: step: 644/463, loss: 0.17297649383544922 2023-01-24 01:01:13.450754: step: 646/463, loss: 0.17448876798152924 2023-01-24 01:01:14.061231: step: 648/463, loss: 0.49763840436935425 2023-01-24 01:01:14.673681: step: 650/463, loss: 0.2580465078353882 2023-01-24 01:01:15.264858: step: 652/463, loss: 0.7945095300674438 2023-01-24 01:01:15.862550: step: 654/463, loss: 0.19404321908950806 2023-01-24 01:01:16.473497: step: 656/463, loss: 0.5144699811935425 2023-01-24 01:01:17.053404: step: 658/463, loss: 0.4411325454711914 2023-01-24 01:01:17.643718: step: 660/463, loss: 0.1756366640329361 2023-01-24 01:01:18.267524: step: 662/463, loss: 0.28219160437583923 2023-01-24 01:01:18.864997: step: 664/463, loss: 0.12736627459526062 2023-01-24 01:01:19.492654: step: 666/463, loss: 0.6618295311927795 2023-01-24 01:01:20.093162: step: 668/463, loss: 0.812481701374054 2023-01-24 01:01:20.711031: step: 670/463, loss: 0.13250994682312012 2023-01-24 01:01:21.351132: step: 672/463, loss: 0.2282523810863495 2023-01-24 01:01:21.895167: step: 674/463, loss: 0.34716796875 2023-01-24 01:01:22.489863: step: 676/463, loss: 0.2386336326599121 2023-01-24 01:01:23.092151: step: 678/463, loss: 0.15245743095874786 2023-01-24 01:01:23.712006: step: 680/463, loss: 0.28126800060272217 2023-01-24 01:01:24.320027: step: 682/463, loss: 0.5431774854660034 2023-01-24 01:01:24.882728: step: 684/463, loss: 0.5609435439109802 2023-01-24 01:01:25.449734: step: 686/463, loss: 0.13595929741859436 2023-01-24 01:01:26.061400: step: 688/463, loss: 0.4991567134857178 2023-01-24 01:01:26.638600: step: 690/463, loss: 0.38147616386413574 2023-01-24 01:01:27.271382: step: 692/463, loss: 0.28176456689834595 2023-01-24 01:01:27.897011: step: 694/463, loss: 0.1652994304895401 2023-01-24 01:01:28.573704: step: 696/463, loss: 0.8659372329711914 2023-01-24 01:01:29.214124: step: 698/463, loss: 0.24530211091041565 2023-01-24 01:01:29.833444: step: 700/463, loss: 0.14377903938293457 2023-01-24 01:01:30.548197: step: 702/463, loss: 0.4163385331630707 2023-01-24 01:01:31.203118: step: 704/463, loss: 0.22683656215667725 2023-01-24 01:01:31.840200: step: 706/463, loss: 0.122826486825943 2023-01-24 01:01:32.477245: step: 708/463, loss: 0.7241946458816528 2023-01-24 01:01:33.143204: step: 710/463, loss: 0.4256995618343353 2023-01-24 01:01:33.743377: step: 712/463, loss: 0.2642606794834137 2023-01-24 01:01:34.355632: step: 714/463, loss: 0.21085220575332642 2023-01-24 01:01:34.998264: step: 716/463, loss: 0.38459616899490356 2023-01-24 01:01:35.623574: step: 718/463, loss: 0.24798916280269623 2023-01-24 01:01:36.197040: step: 720/463, loss: 0.05103160813450813 2023-01-24 01:01:36.824163: step: 722/463, loss: 0.15094715356826782 2023-01-24 01:01:37.387623: step: 724/463, loss: 1.1056698560714722 2023-01-24 01:01:38.044326: step: 726/463, loss: 0.5388104319572449 2023-01-24 01:01:38.685217: step: 728/463, loss: 0.1357012391090393 2023-01-24 01:01:39.338372: step: 730/463, loss: 0.5290142893791199 2023-01-24 01:01:39.997462: step: 732/463, loss: 0.23319855332374573 2023-01-24 01:01:40.610348: step: 734/463, loss: 0.8086882829666138 2023-01-24 01:01:41.204223: step: 736/463, loss: 1.1085156202316284 2023-01-24 01:01:41.801506: step: 738/463, loss: 0.16713033616542816 2023-01-24 01:01:42.448453: step: 740/463, loss: 0.3009839653968811 2023-01-24 01:01:43.036158: step: 742/463, loss: 0.2579971253871918 2023-01-24 01:01:43.760154: step: 744/463, loss: 0.3805778920650482 2023-01-24 01:01:44.430035: step: 746/463, loss: 0.08140489459037781 2023-01-24 01:01:45.105916: step: 748/463, loss: 0.4339982867240906 2023-01-24 01:01:45.708181: step: 750/463, loss: 0.10837500542402267 2023-01-24 01:01:46.319181: step: 752/463, loss: 4.3671064376831055 2023-01-24 01:01:46.956936: step: 754/463, loss: 1.4679443836212158 2023-01-24 01:01:47.521804: step: 756/463, loss: 0.1418849527835846 2023-01-24 01:01:48.205232: step: 758/463, loss: 0.3023754060268402 2023-01-24 01:01:48.740857: step: 760/463, loss: 0.24818727374076843 2023-01-24 01:01:49.387247: step: 762/463, loss: 0.26257655024528503 2023-01-24 01:01:50.045277: step: 764/463, loss: 0.46585485339164734 2023-01-24 01:01:50.736213: step: 766/463, loss: 0.4479946792125702 2023-01-24 01:01:51.419208: step: 768/463, loss: 1.1319674253463745 2023-01-24 01:01:52.078577: step: 770/463, loss: 0.2650575637817383 2023-01-24 01:01:52.678109: step: 772/463, loss: 0.1902674436569214 2023-01-24 01:01:53.277104: step: 774/463, loss: 0.32330140471458435 2023-01-24 01:01:53.902631: step: 776/463, loss: 0.5427892208099365 2023-01-24 01:01:54.600017: step: 778/463, loss: 0.4061028063297272 2023-01-24 01:01:55.180260: step: 780/463, loss: 0.5603631734848022 2023-01-24 01:01:55.777637: step: 782/463, loss: 0.17620758712291718 2023-01-24 01:01:56.383706: step: 784/463, loss: 0.5316905379295349 2023-01-24 01:01:56.952100: step: 786/463, loss: 0.7653321623802185 2023-01-24 01:01:57.521318: step: 788/463, loss: 0.3327375054359436 2023-01-24 01:01:58.161018: step: 790/463, loss: 0.2217465490102768 2023-01-24 01:01:58.819374: step: 792/463, loss: 0.2062823325395584 2023-01-24 01:01:59.411639: step: 794/463, loss: 2.1143057346343994 2023-01-24 01:02:00.053319: step: 796/463, loss: 1.028924584388733 2023-01-24 01:02:00.692148: step: 798/463, loss: 0.24175474047660828 2023-01-24 01:02:01.337724: step: 800/463, loss: 0.3310534954071045 2023-01-24 01:02:02.004831: step: 802/463, loss: 0.17518888413906097 2023-01-24 01:02:02.618144: step: 804/463, loss: 0.5500239729881287 2023-01-24 01:02:03.241151: step: 806/463, loss: 0.43291524052619934 2023-01-24 01:02:03.857169: step: 808/463, loss: 0.32473257184028625 2023-01-24 01:02:04.457913: step: 810/463, loss: 0.18057170510292053 2023-01-24 01:02:05.111006: step: 812/463, loss: 0.5960824489593506 2023-01-24 01:02:05.752006: step: 814/463, loss: 0.6424495577812195 2023-01-24 01:02:06.366666: step: 816/463, loss: 0.10353157669305801 2023-01-24 01:02:07.022285: step: 818/463, loss: 1.0949581861495972 2023-01-24 01:02:07.584917: step: 820/463, loss: 0.21636156737804413 2023-01-24 01:02:08.207398: step: 822/463, loss: 0.5443524718284607 2023-01-24 01:02:08.890945: step: 824/463, loss: 0.14077365398406982 2023-01-24 01:02:09.440804: step: 826/463, loss: 0.32151108980178833 2023-01-24 01:02:10.076952: step: 828/463, loss: 0.23523424565792084 2023-01-24 01:02:10.718995: step: 830/463, loss: 0.5430747866630554 2023-01-24 01:02:11.359135: step: 832/463, loss: 0.25517770648002625 2023-01-24 01:02:11.934762: step: 834/463, loss: 1.2619940042495728 2023-01-24 01:02:12.473649: step: 836/463, loss: 0.27874496579170227 2023-01-24 01:02:13.755715: step: 838/463, loss: 0.16189457476139069 2023-01-24 01:02:14.317560: step: 840/463, loss: 0.20547501742839813 2023-01-24 01:02:14.898989: step: 842/463, loss: 0.23212389647960663 2023-01-24 01:02:15.468003: step: 844/463, loss: 0.22806406021118164 2023-01-24 01:02:16.051092: step: 846/463, loss: 1.722379207611084 2023-01-24 01:02:16.702683: step: 848/463, loss: 0.5067812204360962 2023-01-24 01:02:17.310007: step: 850/463, loss: 0.9827410578727722 2023-01-24 01:02:17.987258: step: 852/463, loss: 0.2934538722038269 2023-01-24 01:02:18.600989: step: 854/463, loss: 0.4790877401828766 2023-01-24 01:02:19.240582: step: 856/463, loss: 0.19760249555110931 2023-01-24 01:02:19.900722: step: 858/463, loss: 0.5390596985816956 2023-01-24 01:02:20.539823: step: 860/463, loss: 0.25786659121513367 2023-01-24 01:02:21.140332: step: 862/463, loss: 0.31573036313056946 2023-01-24 01:02:21.742266: step: 864/463, loss: 0.19822876155376434 2023-01-24 01:02:22.372844: step: 866/463, loss: 0.7719884514808655 2023-01-24 01:02:23.001372: step: 868/463, loss: 0.21586428582668304 2023-01-24 01:02:23.657007: step: 870/463, loss: 0.19002723693847656 2023-01-24 01:02:24.286349: step: 872/463, loss: 0.2642134130001068 2023-01-24 01:02:24.891134: step: 874/463, loss: 0.3830374479293823 2023-01-24 01:02:25.576867: step: 876/463, loss: 1.1521027088165283 2023-01-24 01:02:26.218122: step: 878/463, loss: 0.0997028797864914 2023-01-24 01:02:26.857693: step: 880/463, loss: 0.15445277094841003 2023-01-24 01:02:27.489468: step: 882/463, loss: 1.9089767932891846 2023-01-24 01:02:28.142986: step: 884/463, loss: 0.11428172141313553 2023-01-24 01:02:28.747406: step: 886/463, loss: 0.5019733309745789 2023-01-24 01:02:29.371358: step: 888/463, loss: 0.3502446413040161 2023-01-24 01:02:29.992766: step: 890/463, loss: 0.28718674182891846 2023-01-24 01:02:30.610446: step: 892/463, loss: 0.10633283108472824 2023-01-24 01:02:31.245622: step: 894/463, loss: 0.26933473348617554 2023-01-24 01:02:31.914181: step: 896/463, loss: 0.1510273665189743 2023-01-24 01:02:32.527983: step: 898/463, loss: 0.2200581133365631 2023-01-24 01:02:33.216352: step: 900/463, loss: 0.6008175611495972 2023-01-24 01:02:33.800397: step: 902/463, loss: 0.23193147778511047 2023-01-24 01:02:34.437430: step: 904/463, loss: 0.21933698654174805 2023-01-24 01:02:35.065231: step: 906/463, loss: 0.795813262462616 2023-01-24 01:02:35.665484: step: 908/463, loss: 0.5112689137458801 2023-01-24 01:02:36.360303: step: 910/463, loss: 0.3328273594379425 2023-01-24 01:02:36.926062: step: 912/463, loss: 0.4828084111213684 2023-01-24 01:02:37.502945: step: 914/463, loss: 0.140917107462883 2023-01-24 01:02:38.103780: step: 916/463, loss: 0.8214280605316162 2023-01-24 01:02:38.817665: step: 918/463, loss: 0.4063258767127991 2023-01-24 01:02:39.455955: step: 920/463, loss: 0.1763933002948761 2023-01-24 01:02:40.099422: step: 922/463, loss: 0.6062476634979248 2023-01-24 01:02:40.784487: step: 924/463, loss: 0.3227226436138153 2023-01-24 01:02:41.417729: step: 926/463, loss: 0.3610214293003082 ================================================== Loss: 0.462 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3393608202044899, 'r': 0.31553472846337766, 'f1': 0.32701435968574244}, 'combined': 0.240957949242126, 'epoch': 8} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3399382398419782, 'r': 0.37555676864895016, 'f1': 0.3568609295109675}, 'combined': 0.2766099070850562, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3152820200322959, 'r': 0.32126460105757665, 'f1': 0.3182451969122987}, 'combined': 0.2344964608827464, 'epoch': 8} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3249698945392725, 'r': 0.37455169830169827, 'f1': 0.34800362745708596}, 'combined': 0.26974443850740637, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3186731014426087, 'r': 0.3198824870268311, 'f1': 0.31927664898321967}, 'combined': 0.23525647819816184, 'epoch': 8} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32947231426395585, 'r': 0.3658111724548333, 'f1': 0.34669212162966784}, 'combined': 0.26872786461246984, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30303030303030304, 'r': 0.2857142857142857, 'f1': 0.2941176470588235}, 'combined': 0.196078431372549, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3409090909090909, 'r': 0.12931034482758622, 'f1': 0.1875}, 'combined': 0.125, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3664599181442166, 'r': 0.3122210687794179, 'f1': 0.33717316239088774}, 'combined': 0.2484433828143383, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3655563493147106, 'r': 0.32203773630105453, 'f1': 0.34241987150998204}, 'combined': 0.26541635973501004, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42857142857142855, 'r': 0.2571428571428571, 'f1': 0.3214285714285714}, 'combined': 0.21428571428571425, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3283040393999639, 'r': 0.31335281179920654, 'f1': 0.3206542365401589}, 'combined': 0.23627154271380127, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34970123997901775, 'r': 0.3199189915192662, 'f1': 0.3341478132367659}, 'combined': 0.2590045250926128, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38, 'r': 0.41304347826086957, 'f1': 0.39583333333333337}, 'combined': 0.19791666666666669, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32326799298325637, 'r': 0.29632899356798503, 'f1': 0.30921286285354954}, 'combined': 0.22784105683945755, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32287388038511977, 'r': 0.3347442436345727, 'f1': 0.32870192876752263}, 'combined': 0.2547833132073621, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.59375, 'r': 0.16379310344827586, 'f1': 0.25675675675675674}, 'combined': 0.17117117117117114, 'epoch': 5} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:05:14.287634: step: 2/463, loss: 0.17077802121639252 2023-01-24 01:05:14.996102: step: 4/463, loss: 0.2563360929489136 2023-01-24 01:05:15.632904: step: 6/463, loss: 0.19956116378307343 2023-01-24 01:05:16.302365: step: 8/463, loss: 0.17916664481163025 2023-01-24 01:05:16.911965: step: 10/463, loss: 0.4735904932022095 2023-01-24 01:05:17.564749: step: 12/463, loss: 0.234725221991539 2023-01-24 01:05:18.158913: step: 14/463, loss: 1.0785027742385864 2023-01-24 01:05:18.752475: step: 16/463, loss: 0.05499253422021866 2023-01-24 01:05:19.384732: step: 18/463, loss: 0.10897567868232727 2023-01-24 01:05:20.034060: step: 20/463, loss: 1.1164591312408447 2023-01-24 01:05:20.624186: step: 22/463, loss: 0.21747346222400665 2023-01-24 01:05:21.192977: step: 24/463, loss: 0.18439310789108276 2023-01-24 01:05:21.774388: step: 26/463, loss: 0.2679608166217804 2023-01-24 01:05:22.472038: step: 28/463, loss: 0.28326791524887085 2023-01-24 01:05:23.073021: step: 30/463, loss: 0.11695236712694168 2023-01-24 01:05:23.701868: step: 32/463, loss: 0.1910436898469925 2023-01-24 01:05:24.317622: step: 34/463, loss: 0.13600853085517883 2023-01-24 01:05:24.962699: step: 36/463, loss: 0.43715304136276245 2023-01-24 01:05:25.461013: step: 38/463, loss: 0.05087998881936073 2023-01-24 01:05:26.070334: step: 40/463, loss: 0.6077514886856079 2023-01-24 01:05:26.681615: step: 42/463, loss: 0.20853185653686523 2023-01-24 01:05:27.295332: step: 44/463, loss: 0.09062162786722183 2023-01-24 01:05:27.879064: step: 46/463, loss: 0.08322703838348389 2023-01-24 01:05:28.525222: step: 48/463, loss: 0.38514578342437744 2023-01-24 01:05:29.176421: step: 50/463, loss: 0.23162110149860382 2023-01-24 01:05:29.735618: step: 52/463, loss: 0.23460164666175842 2023-01-24 01:05:30.380645: step: 54/463, loss: 0.33674460649490356 2023-01-24 01:05:30.997993: step: 56/463, loss: 0.5197405815124512 2023-01-24 01:05:31.593793: step: 58/463, loss: 0.2500210106372833 2023-01-24 01:05:32.200860: step: 60/463, loss: 1.7277462482452393 2023-01-24 01:05:32.798628: step: 62/463, loss: 0.3194347023963928 2023-01-24 01:05:33.462907: step: 64/463, loss: 0.11162091046571732 2023-01-24 01:05:34.122450: step: 66/463, loss: 0.060975331813097 2023-01-24 01:05:34.688617: step: 68/463, loss: 0.2567768394947052 2023-01-24 01:05:35.317566: step: 70/463, loss: 0.4143083095550537 2023-01-24 01:05:35.949249: step: 72/463, loss: 0.32424283027648926 2023-01-24 01:05:36.564400: step: 74/463, loss: 0.26578614115715027 2023-01-24 01:05:37.159492: step: 76/463, loss: 0.14340151846408844 2023-01-24 01:05:37.822058: step: 78/463, loss: 0.4129473567008972 2023-01-24 01:05:38.408082: step: 80/463, loss: 0.20089292526245117 2023-01-24 01:05:38.981999: step: 82/463, loss: 0.2243395745754242 2023-01-24 01:05:39.583825: step: 84/463, loss: 0.10392922163009644 2023-01-24 01:05:40.264115: step: 86/463, loss: 0.5979198217391968 2023-01-24 01:05:40.940763: step: 88/463, loss: 0.3399348258972168 2023-01-24 01:05:41.554243: step: 90/463, loss: 0.536894679069519 2023-01-24 01:05:42.228600: step: 92/463, loss: 0.08235007524490356 2023-01-24 01:05:42.873935: step: 94/463, loss: 0.21865396201610565 2023-01-24 01:05:43.467795: step: 96/463, loss: 0.3912118077278137 2023-01-24 01:05:44.051474: step: 98/463, loss: 0.13150645792484283 2023-01-24 01:05:44.675273: step: 100/463, loss: 0.25504016876220703 2023-01-24 01:05:45.330106: step: 102/463, loss: 0.365493506193161 2023-01-24 01:05:45.967667: step: 104/463, loss: 0.5118498802185059 2023-01-24 01:05:46.544631: step: 106/463, loss: 0.18823763728141785 2023-01-24 01:05:47.164222: step: 108/463, loss: 0.19413326680660248 2023-01-24 01:05:47.788459: step: 110/463, loss: 0.24084702134132385 2023-01-24 01:05:48.388156: step: 112/463, loss: 0.6102190017700195 2023-01-24 01:05:49.007202: step: 114/463, loss: 1.2868348360061646 2023-01-24 01:05:49.614327: step: 116/463, loss: 0.17565158009529114 2023-01-24 01:05:50.280926: step: 118/463, loss: 0.9952040314674377 2023-01-24 01:05:50.882388: step: 120/463, loss: 0.07152537256479263 2023-01-24 01:05:51.523859: step: 122/463, loss: 0.3385022282600403 2023-01-24 01:05:52.112343: step: 124/463, loss: 0.23959952592849731 2023-01-24 01:05:52.754537: step: 126/463, loss: 0.16376827657222748 2023-01-24 01:05:53.368107: step: 128/463, loss: 0.17524555325508118 2023-01-24 01:05:54.030234: step: 130/463, loss: 0.21081186830997467 2023-01-24 01:05:54.647117: step: 132/463, loss: 0.15743355453014374 2023-01-24 01:05:55.276956: step: 134/463, loss: 0.2795874774456024 2023-01-24 01:05:55.924687: step: 136/463, loss: 0.7767999768257141 2023-01-24 01:05:56.556960: step: 138/463, loss: 0.7737017273902893 2023-01-24 01:05:57.271857: step: 140/463, loss: 0.34982675313949585 2023-01-24 01:05:57.896083: step: 142/463, loss: 0.2001238316297531 2023-01-24 01:05:58.480380: step: 144/463, loss: 0.2352839708328247 2023-01-24 01:05:59.145906: step: 146/463, loss: 0.22720541059970856 2023-01-24 01:05:59.854961: step: 148/463, loss: 0.35505396127700806 2023-01-24 01:06:00.441299: step: 150/463, loss: 0.35628750920295715 2023-01-24 01:06:00.982534: step: 152/463, loss: 0.9582790732383728 2023-01-24 01:06:01.599843: step: 154/463, loss: 0.10442464053630829 2023-01-24 01:06:02.228063: step: 156/463, loss: 0.23777472972869873 2023-01-24 01:06:02.850524: step: 158/463, loss: 0.497800350189209 2023-01-24 01:06:03.421022: step: 160/463, loss: 0.32195955514907837 2023-01-24 01:06:04.043956: step: 162/463, loss: 1.4129806756973267 2023-01-24 01:06:04.614561: step: 164/463, loss: 0.36605462431907654 2023-01-24 01:06:05.338079: step: 166/463, loss: 0.3789929449558258 2023-01-24 01:06:05.897779: step: 168/463, loss: 0.40258800983428955 2023-01-24 01:06:06.481685: step: 170/463, loss: 0.9423494935035706 2023-01-24 01:06:07.140180: step: 172/463, loss: 0.34803786873817444 2023-01-24 01:06:07.789378: step: 174/463, loss: 0.3194010555744171 2023-01-24 01:06:08.451348: step: 176/463, loss: 1.1413010358810425 2023-01-24 01:06:09.081209: step: 178/463, loss: 0.16474317014217377 2023-01-24 01:06:09.692306: step: 180/463, loss: 0.2473813146352768 2023-01-24 01:06:10.374289: step: 182/463, loss: 0.2337036281824112 2023-01-24 01:06:11.065473: step: 184/463, loss: 0.2663898169994354 2023-01-24 01:06:11.623794: step: 186/463, loss: 0.3031774163246155 2023-01-24 01:06:12.367617: step: 188/463, loss: 0.6257765293121338 2023-01-24 01:06:12.965750: step: 190/463, loss: 0.10745250433683395 2023-01-24 01:06:13.544254: step: 192/463, loss: 0.8570696711540222 2023-01-24 01:06:14.167612: step: 194/463, loss: 0.7822129130363464 2023-01-24 01:06:14.799537: step: 196/463, loss: 0.6533249616622925 2023-01-24 01:06:15.501644: step: 198/463, loss: 0.24035483598709106 2023-01-24 01:06:16.226025: step: 200/463, loss: 1.2772352695465088 2023-01-24 01:06:16.875962: step: 202/463, loss: 0.1282467544078827 2023-01-24 01:06:17.470534: step: 204/463, loss: 0.32942572236061096 2023-01-24 01:06:18.173945: step: 206/463, loss: 0.2963917553424835 2023-01-24 01:06:18.821663: step: 208/463, loss: 0.24848397076129913 2023-01-24 01:06:19.432674: step: 210/463, loss: 0.08796899765729904 2023-01-24 01:06:20.035021: step: 212/463, loss: 0.18135881423950195 2023-01-24 01:06:20.654394: step: 214/463, loss: 0.16503803431987762 2023-01-24 01:06:21.309359: step: 216/463, loss: 0.564280092716217 2023-01-24 01:06:21.906705: step: 218/463, loss: 0.6624785661697388 2023-01-24 01:06:22.510110: step: 220/463, loss: 0.12074249237775803 2023-01-24 01:06:23.169362: step: 222/463, loss: 0.23013147711753845 2023-01-24 01:06:23.817762: step: 224/463, loss: 0.7275635004043579 2023-01-24 01:06:24.415006: step: 226/463, loss: 0.3013765811920166 2023-01-24 01:06:25.068612: step: 228/463, loss: 0.1642383337020874 2023-01-24 01:06:25.750945: step: 230/463, loss: 0.2057809680700302 2023-01-24 01:06:26.367743: step: 232/463, loss: 0.5123519897460938 2023-01-24 01:06:27.012538: step: 234/463, loss: 0.14983588457107544 2023-01-24 01:06:27.649643: step: 236/463, loss: 0.7289428114891052 2023-01-24 01:06:28.240928: step: 238/463, loss: 0.30941131711006165 2023-01-24 01:06:28.883620: step: 240/463, loss: 0.6629010438919067 2023-01-24 01:06:29.516077: step: 242/463, loss: 0.6024523973464966 2023-01-24 01:06:30.165298: step: 244/463, loss: 0.2660287320613861 2023-01-24 01:06:30.742892: step: 246/463, loss: 0.4353945255279541 2023-01-24 01:06:31.400828: step: 248/463, loss: 0.3672528564929962 2023-01-24 01:06:32.047384: step: 250/463, loss: 0.15033632516860962 2023-01-24 01:06:32.611037: step: 252/463, loss: 0.33095505833625793 2023-01-24 01:06:33.225954: step: 254/463, loss: 0.2627013623714447 2023-01-24 01:06:33.843743: step: 256/463, loss: 0.22009465098381042 2023-01-24 01:06:34.470773: step: 258/463, loss: 0.17957089841365814 2023-01-24 01:06:35.087182: step: 260/463, loss: 0.19365763664245605 2023-01-24 01:06:35.723909: step: 262/463, loss: 0.4233319163322449 2023-01-24 01:06:36.344129: step: 264/463, loss: 0.3395460247993469 2023-01-24 01:06:36.962515: step: 266/463, loss: 0.18502365052700043 2023-01-24 01:06:37.595644: step: 268/463, loss: 0.16486413776874542 2023-01-24 01:06:38.208101: step: 270/463, loss: 0.12008094042539597 2023-01-24 01:06:38.898167: step: 272/463, loss: 0.33829760551452637 2023-01-24 01:06:39.579130: step: 274/463, loss: 0.25912705063819885 2023-01-24 01:06:40.214828: step: 276/463, loss: 0.16830359399318695 2023-01-24 01:06:40.818045: step: 278/463, loss: 0.5848151445388794 2023-01-24 01:06:41.480786: step: 280/463, loss: 0.6665197014808655 2023-01-24 01:06:42.103732: step: 282/463, loss: 0.31737908720970154 2023-01-24 01:06:42.746630: step: 284/463, loss: 0.3111627399921417 2023-01-24 01:06:43.302625: step: 286/463, loss: 0.11487477272748947 2023-01-24 01:06:43.884390: step: 288/463, loss: 0.26367437839508057 2023-01-24 01:06:44.458287: step: 290/463, loss: 0.21586939692497253 2023-01-24 01:06:45.050122: step: 292/463, loss: 0.3515869081020355 2023-01-24 01:06:45.655827: step: 294/463, loss: 0.1616506278514862 2023-01-24 01:06:46.209025: step: 296/463, loss: 0.24998968839645386 2023-01-24 01:06:46.773055: step: 298/463, loss: 0.23901832103729248 2023-01-24 01:06:47.368424: step: 300/463, loss: 0.25040459632873535 2023-01-24 01:06:48.008361: step: 302/463, loss: 0.6697432398796082 2023-01-24 01:06:48.694895: step: 304/463, loss: 0.14914096891880035 2023-01-24 01:06:49.293672: step: 306/463, loss: 0.9890682697296143 2023-01-24 01:06:49.882519: step: 308/463, loss: 0.543837308883667 2023-01-24 01:06:50.460711: step: 310/463, loss: 0.20534953474998474 2023-01-24 01:06:51.152244: step: 312/463, loss: 0.25874242186546326 2023-01-24 01:06:51.731981: step: 314/463, loss: 0.7873653769493103 2023-01-24 01:06:52.334312: step: 316/463, loss: 0.627815842628479 2023-01-24 01:06:52.888011: step: 318/463, loss: 0.15859894454479218 2023-01-24 01:06:53.515359: step: 320/463, loss: 1.783982515335083 2023-01-24 01:06:54.131054: step: 322/463, loss: 0.3193933069705963 2023-01-24 01:06:54.675730: step: 324/463, loss: 0.12599636614322662 2023-01-24 01:06:55.358661: step: 326/463, loss: 0.40168797969818115 2023-01-24 01:06:55.966519: step: 328/463, loss: 0.27117809653282166 2023-01-24 01:06:56.589192: step: 330/463, loss: 0.676965594291687 2023-01-24 01:06:57.242728: step: 332/463, loss: 1.0799967050552368 2023-01-24 01:06:57.876864: step: 334/463, loss: 0.32789263129234314 2023-01-24 01:06:58.492170: step: 336/463, loss: 0.38837581872940063 2023-01-24 01:06:59.087422: step: 338/463, loss: 0.12659011781215668 2023-01-24 01:06:59.826125: step: 340/463, loss: 0.3912752568721771 2023-01-24 01:07:00.435911: step: 342/463, loss: 0.14781580865383148 2023-01-24 01:07:01.037610: step: 344/463, loss: 0.2638659179210663 2023-01-24 01:07:01.642341: step: 346/463, loss: 0.09076520055532455 2023-01-24 01:07:02.233919: step: 348/463, loss: 0.26640796661376953 2023-01-24 01:07:02.844786: step: 350/463, loss: 0.22564326226711273 2023-01-24 01:07:03.461017: step: 352/463, loss: 0.13490281999111176 2023-01-24 01:07:04.086489: step: 354/463, loss: 0.4779565632343292 2023-01-24 01:07:04.685263: step: 356/463, loss: 0.13475587964057922 2023-01-24 01:07:05.260162: step: 358/463, loss: 0.26774728298187256 2023-01-24 01:07:05.858483: step: 360/463, loss: 0.7304152250289917 2023-01-24 01:07:06.464654: step: 362/463, loss: 0.28829339146614075 2023-01-24 01:07:07.065333: step: 364/463, loss: 0.21928000450134277 2023-01-24 01:07:07.655375: step: 366/463, loss: 0.4275215268135071 2023-01-24 01:07:08.268188: step: 368/463, loss: 0.6500132083892822 2023-01-24 01:07:08.804850: step: 370/463, loss: 0.1395910084247589 2023-01-24 01:07:09.406100: step: 372/463, loss: 0.32921189069747925 2023-01-24 01:07:10.021539: step: 374/463, loss: 0.1203937977552414 2023-01-24 01:07:10.614957: step: 376/463, loss: 0.5926775336265564 2023-01-24 01:07:11.325465: step: 378/463, loss: 0.4914388060569763 2023-01-24 01:07:11.949624: step: 380/463, loss: 0.33103182911872864 2023-01-24 01:07:12.562646: step: 382/463, loss: 0.1833011507987976 2023-01-24 01:07:13.131798: step: 384/463, loss: 0.17184047400951385 2023-01-24 01:07:13.684337: step: 386/463, loss: 0.08801168203353882 2023-01-24 01:07:14.298039: step: 388/463, loss: 0.12636591494083405 2023-01-24 01:07:14.875456: step: 390/463, loss: 0.15098901093006134 2023-01-24 01:07:15.508646: step: 392/463, loss: 0.15603704750537872 2023-01-24 01:07:16.071751: step: 394/463, loss: 0.1481577754020691 2023-01-24 01:07:16.678328: step: 396/463, loss: 0.37629637122154236 2023-01-24 01:07:17.296617: step: 398/463, loss: 0.29679200053215027 2023-01-24 01:07:17.886245: step: 400/463, loss: 0.05918826162815094 2023-01-24 01:07:18.610088: step: 402/463, loss: 0.7189347147941589 2023-01-24 01:07:19.271325: step: 404/463, loss: 0.149513840675354 2023-01-24 01:07:19.955092: step: 406/463, loss: 0.1751466691493988 2023-01-24 01:07:20.621203: step: 408/463, loss: 0.12888486683368683 2023-01-24 01:07:21.208873: step: 410/463, loss: 0.06725501269102097 2023-01-24 01:07:21.803398: step: 412/463, loss: 0.46183106303215027 2023-01-24 01:07:22.388808: step: 414/463, loss: 0.5635194778442383 2023-01-24 01:07:22.950863: step: 416/463, loss: 0.12534135580062866 2023-01-24 01:07:23.620027: step: 418/463, loss: 0.10064512491226196 2023-01-24 01:07:24.232687: step: 420/463, loss: 0.2672666609287262 2023-01-24 01:07:24.861740: step: 422/463, loss: 0.1969425082206726 2023-01-24 01:07:25.429556: step: 424/463, loss: 0.493498831987381 2023-01-24 01:07:25.993136: step: 426/463, loss: 0.31862872838974 2023-01-24 01:07:26.559734: step: 428/463, loss: 0.14827391505241394 2023-01-24 01:07:27.167951: step: 430/463, loss: 0.11653280258178711 2023-01-24 01:07:27.769517: step: 432/463, loss: 0.6190656423568726 2023-01-24 01:07:28.376958: step: 434/463, loss: 0.5870165824890137 2023-01-24 01:07:28.943345: step: 436/463, loss: 0.12024692445993423 2023-01-24 01:07:29.525557: step: 438/463, loss: 0.12173856794834137 2023-01-24 01:07:30.138472: step: 440/463, loss: 0.42035773396492004 2023-01-24 01:07:30.760312: step: 442/463, loss: 0.23534147441387177 2023-01-24 01:07:31.396973: step: 444/463, loss: 0.18608425557613373 2023-01-24 01:07:32.041594: step: 446/463, loss: 0.09951744973659515 2023-01-24 01:07:32.659639: step: 448/463, loss: 0.3251801133155823 2023-01-24 01:07:33.271372: step: 450/463, loss: 0.1532084196805954 2023-01-24 01:07:33.876635: step: 452/463, loss: 0.18904155492782593 2023-01-24 01:07:34.476275: step: 454/463, loss: 0.3416127860546112 2023-01-24 01:07:35.065525: step: 456/463, loss: 0.13285750150680542 2023-01-24 01:07:35.662913: step: 458/463, loss: 0.38581329584121704 2023-01-24 01:07:36.306079: step: 460/463, loss: 0.07741407305002213 2023-01-24 01:07:36.900913: step: 462/463, loss: 0.1496533453464508 2023-01-24 01:07:37.517575: step: 464/463, loss: 0.4598150849342346 2023-01-24 01:07:38.172574: step: 466/463, loss: 0.2529575228691101 2023-01-24 01:07:38.867815: step: 468/463, loss: 0.15284910798072815 2023-01-24 01:07:39.493004: step: 470/463, loss: 0.7500278949737549 2023-01-24 01:07:40.161168: step: 472/463, loss: 0.4187954366207123 2023-01-24 01:07:40.731327: step: 474/463, loss: 0.1004641056060791 2023-01-24 01:07:41.369492: step: 476/463, loss: 0.2837781012058258 2023-01-24 01:07:41.953421: step: 478/463, loss: 0.234932079911232 2023-01-24 01:07:42.576634: step: 480/463, loss: 0.14485178887844086 2023-01-24 01:07:43.193957: step: 482/463, loss: 0.8474454283714294 2023-01-24 01:07:43.807077: step: 484/463, loss: 0.04368195682764053 2023-01-24 01:07:44.412285: step: 486/463, loss: 0.45092302560806274 2023-01-24 01:07:44.981664: step: 488/463, loss: 0.403274267911911 2023-01-24 01:07:45.596369: step: 490/463, loss: 0.07356066256761551 2023-01-24 01:07:46.280630: step: 492/463, loss: 1.185387134552002 2023-01-24 01:07:46.872229: step: 494/463, loss: 0.8510439395904541 2023-01-24 01:07:47.539874: step: 496/463, loss: 0.34438633918762207 2023-01-24 01:07:48.137870: step: 498/463, loss: 0.5409513115882874 2023-01-24 01:07:48.771763: step: 500/463, loss: 0.5065198540687561 2023-01-24 01:07:49.306975: step: 502/463, loss: 0.1975422203540802 2023-01-24 01:07:49.931830: step: 504/463, loss: 0.5251680612564087 2023-01-24 01:07:50.555364: step: 506/463, loss: 0.2176342010498047 2023-01-24 01:07:51.144279: step: 508/463, loss: 0.28560739755630493 2023-01-24 01:07:51.733411: step: 510/463, loss: 0.9183048009872437 2023-01-24 01:07:52.345891: step: 512/463, loss: 0.24042105674743652 2023-01-24 01:07:52.972221: step: 514/463, loss: 0.488264262676239 2023-01-24 01:07:53.545942: step: 516/463, loss: 0.4155459403991699 2023-01-24 01:07:54.098567: step: 518/463, loss: 0.5750282406806946 2023-01-24 01:07:54.696614: step: 520/463, loss: 0.09353403747081757 2023-01-24 01:07:55.347426: step: 522/463, loss: 0.5741830468177795 2023-01-24 01:07:56.088413: step: 524/463, loss: 0.5304901599884033 2023-01-24 01:07:56.675445: step: 526/463, loss: 0.11579668521881104 2023-01-24 01:07:57.309301: step: 528/463, loss: 0.806922435760498 2023-01-24 01:07:57.921071: step: 530/463, loss: 0.13459539413452148 2023-01-24 01:07:58.539059: step: 532/463, loss: 0.9403744339942932 2023-01-24 01:07:59.142628: step: 534/463, loss: 0.32428431510925293 2023-01-24 01:07:59.872512: step: 536/463, loss: 1.3836514949798584 2023-01-24 01:08:00.585947: step: 538/463, loss: 0.1800803244113922 2023-01-24 01:08:01.234122: step: 540/463, loss: 0.11934054642915726 2023-01-24 01:08:01.868548: step: 542/463, loss: 0.458288311958313 2023-01-24 01:08:02.415729: step: 544/463, loss: 0.3857291340827942 2023-01-24 01:08:02.973284: step: 546/463, loss: 0.06559847295284271 2023-01-24 01:08:03.628827: step: 548/463, loss: 0.4696250855922699 2023-01-24 01:08:04.326581: step: 550/463, loss: 0.24462640285491943 2023-01-24 01:08:04.912806: step: 552/463, loss: 0.39239171147346497 2023-01-24 01:08:05.617684: step: 554/463, loss: 0.8460155129432678 2023-01-24 01:08:06.251203: step: 556/463, loss: 0.1752811223268509 2023-01-24 01:08:06.878719: step: 558/463, loss: 0.3194790184497833 2023-01-24 01:08:07.615540: step: 560/463, loss: 0.21365021169185638 2023-01-24 01:08:08.241946: step: 562/463, loss: 0.22283637523651123 2023-01-24 01:08:08.847153: step: 564/463, loss: 0.8638373613357544 2023-01-24 01:08:09.442102: step: 566/463, loss: 0.17956224083900452 2023-01-24 01:08:10.083065: step: 568/463, loss: 0.2995205819606781 2023-01-24 01:08:10.743007: step: 570/463, loss: 0.4693411588668823 2023-01-24 01:08:11.355358: step: 572/463, loss: 0.0845862329006195 2023-01-24 01:08:12.010068: step: 574/463, loss: 0.06597862392663956 2023-01-24 01:08:12.578986: step: 576/463, loss: 0.19842566549777985 2023-01-24 01:08:13.183972: step: 578/463, loss: 0.17455452680587769 2023-01-24 01:08:13.779808: step: 580/463, loss: 0.15945930778980255 2023-01-24 01:08:14.414961: step: 582/463, loss: 0.3631148040294647 2023-01-24 01:08:14.927660: step: 584/463, loss: 0.3113175928592682 2023-01-24 01:08:15.598532: step: 586/463, loss: 0.13008259236812592 2023-01-24 01:08:16.260236: step: 588/463, loss: 0.18816684186458588 2023-01-24 01:08:16.860017: step: 590/463, loss: 0.6339176297187805 2023-01-24 01:08:17.487008: step: 592/463, loss: 0.12655837833881378 2023-01-24 01:08:18.164935: step: 594/463, loss: 0.12447237968444824 2023-01-24 01:08:18.806843: step: 596/463, loss: 0.23655134439468384 2023-01-24 01:08:19.424422: step: 598/463, loss: 0.270586758852005 2023-01-24 01:08:19.992694: step: 600/463, loss: 0.7481467127799988 2023-01-24 01:08:20.606821: step: 602/463, loss: 1.033585548400879 2023-01-24 01:08:21.201294: step: 604/463, loss: 0.17560365796089172 2023-01-24 01:08:21.799528: step: 606/463, loss: 0.2297007441520691 2023-01-24 01:08:22.490770: step: 608/463, loss: 0.34454545378685 2023-01-24 01:08:23.097584: step: 610/463, loss: 0.1451507806777954 2023-01-24 01:08:23.782565: step: 612/463, loss: 0.18311670422554016 2023-01-24 01:08:24.403039: step: 614/463, loss: 0.22084030508995056 2023-01-24 01:08:25.038712: step: 616/463, loss: 0.23423704504966736 2023-01-24 01:08:25.706558: step: 618/463, loss: 0.5065959095954895 2023-01-24 01:08:26.356621: step: 620/463, loss: 0.2406112104654312 2023-01-24 01:08:26.954423: step: 622/463, loss: 0.21883827447891235 2023-01-24 01:08:27.571753: step: 624/463, loss: 0.5106756687164307 2023-01-24 01:08:28.118185: step: 626/463, loss: 0.13174429535865784 2023-01-24 01:08:28.772324: step: 628/463, loss: 0.08035876601934433 2023-01-24 01:08:29.382090: step: 630/463, loss: 0.19846908748149872 2023-01-24 01:08:30.001772: step: 632/463, loss: 0.1697191447019577 2023-01-24 01:08:30.668803: step: 634/463, loss: 0.7762958407402039 2023-01-24 01:08:31.346991: step: 636/463, loss: 0.2021734118461609 2023-01-24 01:08:31.966043: step: 638/463, loss: 0.6043620109558105 2023-01-24 01:08:32.600776: step: 640/463, loss: 0.2545052468776703 2023-01-24 01:08:33.266195: step: 642/463, loss: 0.852972686290741 2023-01-24 01:08:33.889972: step: 644/463, loss: 0.11844108998775482 2023-01-24 01:08:34.488007: step: 646/463, loss: 3.2565789222717285 2023-01-24 01:08:35.083681: step: 648/463, loss: 0.3924574851989746 2023-01-24 01:08:35.777132: step: 650/463, loss: 0.7701157331466675 2023-01-24 01:08:36.347367: step: 652/463, loss: 0.15826819837093353 2023-01-24 01:08:37.074223: step: 654/463, loss: 0.6228159666061401 2023-01-24 01:08:37.710761: step: 656/463, loss: 0.10164700448513031 2023-01-24 01:08:38.322277: step: 658/463, loss: 0.941778838634491 2023-01-24 01:08:38.975146: step: 660/463, loss: 0.40705472230911255 2023-01-24 01:08:39.609677: step: 662/463, loss: 0.19260768592357635 2023-01-24 01:08:40.213972: step: 664/463, loss: 0.5180705189704895 2023-01-24 01:08:40.850221: step: 666/463, loss: 0.3370892405509949 2023-01-24 01:08:41.446507: step: 668/463, loss: 0.4702625274658203 2023-01-24 01:08:42.038436: step: 670/463, loss: 0.2752023935317993 2023-01-24 01:08:42.708392: step: 672/463, loss: 1.0023704767227173 2023-01-24 01:08:43.292642: step: 674/463, loss: 0.26584145426750183 2023-01-24 01:08:43.905997: step: 676/463, loss: 0.2884277105331421 2023-01-24 01:08:44.508281: step: 678/463, loss: 0.17747734487056732 2023-01-24 01:08:45.128360: step: 680/463, loss: 0.5460485816001892 2023-01-24 01:08:45.715547: step: 682/463, loss: 0.20242799818515778 2023-01-24 01:08:46.311836: step: 684/463, loss: 0.11478574573993683 2023-01-24 01:08:47.068261: step: 686/463, loss: 0.5837790369987488 2023-01-24 01:08:47.693807: step: 688/463, loss: 0.24769118428230286 2023-01-24 01:08:48.313791: step: 690/463, loss: 0.08140033483505249 2023-01-24 01:08:48.947236: step: 692/463, loss: 0.20934681594371796 2023-01-24 01:08:49.543228: step: 694/463, loss: 0.12450077384710312 2023-01-24 01:08:50.178880: step: 696/463, loss: 0.9106389284133911 2023-01-24 01:08:50.880475: step: 698/463, loss: 1.0714139938354492 2023-01-24 01:08:51.485996: step: 700/463, loss: 0.18466581404209137 2023-01-24 01:08:52.112683: step: 702/463, loss: 0.45947933197021484 2023-01-24 01:08:52.741835: step: 704/463, loss: 0.3965025544166565 2023-01-24 01:08:53.336931: step: 706/463, loss: 0.22625593841075897 2023-01-24 01:08:53.961503: step: 708/463, loss: 0.11809322237968445 2023-01-24 01:08:54.632127: step: 710/463, loss: 0.30682238936424255 2023-01-24 01:08:55.260116: step: 712/463, loss: 0.11762760579586029 2023-01-24 01:08:55.859926: step: 714/463, loss: 0.16235385835170746 2023-01-24 01:08:56.482183: step: 716/463, loss: 0.3187079131603241 2023-01-24 01:08:57.045323: step: 718/463, loss: 0.4351699650287628 2023-01-24 01:08:57.679742: step: 720/463, loss: 0.3702765703201294 2023-01-24 01:08:58.287442: step: 722/463, loss: 0.337517648935318 2023-01-24 01:08:58.893283: step: 724/463, loss: 0.17862853407859802 2023-01-24 01:08:59.483137: step: 726/463, loss: 0.1480695605278015 2023-01-24 01:09:00.065855: step: 728/463, loss: 0.09627832472324371 2023-01-24 01:09:00.687578: step: 730/463, loss: 0.10118921846151352 2023-01-24 01:09:01.297860: step: 732/463, loss: 0.2089187353849411 2023-01-24 01:09:01.887527: step: 734/463, loss: 0.2085215449333191 2023-01-24 01:09:02.539357: step: 736/463, loss: 0.3316103518009186 2023-01-24 01:09:03.171517: step: 738/463, loss: 0.17901891469955444 2023-01-24 01:09:03.757586: step: 740/463, loss: 0.32212668657302856 2023-01-24 01:09:04.360147: step: 742/463, loss: 0.119500070810318 2023-01-24 01:09:04.967523: step: 744/463, loss: 0.10016391426324844 2023-01-24 01:09:05.583129: step: 746/463, loss: 0.4700267016887665 2023-01-24 01:09:06.254673: step: 748/463, loss: 0.10390250384807587 2023-01-24 01:09:06.998558: step: 750/463, loss: 0.2568435072898865 2023-01-24 01:09:07.644698: step: 752/463, loss: 0.2085363119840622 2023-01-24 01:09:08.369773: step: 754/463, loss: 0.03869916498661041 2023-01-24 01:09:08.973097: step: 756/463, loss: 0.4686168134212494 2023-01-24 01:09:09.494406: step: 758/463, loss: 0.4501277506351471 2023-01-24 01:09:10.101101: step: 760/463, loss: 0.08514165878295898 2023-01-24 01:09:10.742350: step: 762/463, loss: 0.16509270668029785 2023-01-24 01:09:11.324778: step: 764/463, loss: 0.5358824729919434 2023-01-24 01:09:11.984483: step: 766/463, loss: 0.34480440616607666 2023-01-24 01:09:12.630712: step: 768/463, loss: 0.211360901594162 2023-01-24 01:09:13.219463: step: 770/463, loss: 0.2957981824874878 2023-01-24 01:09:13.854894: step: 772/463, loss: 0.09436078369617462 2023-01-24 01:09:14.468405: step: 774/463, loss: 0.06603935360908508 2023-01-24 01:09:15.061865: step: 776/463, loss: 0.14114435017108917 2023-01-24 01:09:15.681746: step: 778/463, loss: 0.1795409768819809 2023-01-24 01:09:16.302401: step: 780/463, loss: 0.3366824984550476 2023-01-24 01:09:16.976501: step: 782/463, loss: 0.3289976418018341 2023-01-24 01:09:17.536719: step: 784/463, loss: 0.1650543510913849 2023-01-24 01:09:18.128805: step: 786/463, loss: 0.13097169995307922 2023-01-24 01:09:18.680949: step: 788/463, loss: 0.24702036380767822 2023-01-24 01:09:19.259416: step: 790/463, loss: 0.18632261455059052 2023-01-24 01:09:19.915713: step: 792/463, loss: 0.18176907300949097 2023-01-24 01:09:20.529564: step: 794/463, loss: 0.5092656016349792 2023-01-24 01:09:21.132843: step: 796/463, loss: 0.07761086523532867 2023-01-24 01:09:21.750884: step: 798/463, loss: 0.4100775122642517 2023-01-24 01:09:22.376902: step: 800/463, loss: 0.20849396288394928 2023-01-24 01:09:23.049284: step: 802/463, loss: 1.4022369384765625 2023-01-24 01:09:23.701820: step: 804/463, loss: 0.1023639440536499 2023-01-24 01:09:24.370097: step: 806/463, loss: 0.16859178245067596 2023-01-24 01:09:24.998249: step: 808/463, loss: 0.2601999342441559 2023-01-24 01:09:25.590183: step: 810/463, loss: 0.38377615809440613 2023-01-24 01:09:26.152417: step: 812/463, loss: 0.3133040964603424 2023-01-24 01:09:26.724873: step: 814/463, loss: 0.47720515727996826 2023-01-24 01:09:27.317267: step: 816/463, loss: 2.147118330001831 2023-01-24 01:09:27.924473: step: 818/463, loss: 0.04397233948111534 2023-01-24 01:09:28.564189: step: 820/463, loss: 0.1503096967935562 2023-01-24 01:09:29.225259: step: 822/463, loss: 0.4594634473323822 2023-01-24 01:09:29.826300: step: 824/463, loss: 0.12943784892559052 2023-01-24 01:09:30.426328: step: 826/463, loss: 0.46011871099472046 2023-01-24 01:09:31.054563: step: 828/463, loss: 0.8428401350975037 2023-01-24 01:09:31.653848: step: 830/463, loss: 0.23051264882087708 2023-01-24 01:09:32.281580: step: 832/463, loss: 0.10706888884305954 2023-01-24 01:09:32.893759: step: 834/463, loss: 0.5708986520767212 2023-01-24 01:09:33.477295: step: 836/463, loss: 0.1993168443441391 2023-01-24 01:09:34.138821: step: 838/463, loss: 0.7545018792152405 2023-01-24 01:09:34.773044: step: 840/463, loss: 0.5041952133178711 2023-01-24 01:09:35.385384: step: 842/463, loss: 0.1180022805929184 2023-01-24 01:09:35.962716: step: 844/463, loss: 0.34608232975006104 2023-01-24 01:09:36.603381: step: 846/463, loss: 0.060121431946754456 2023-01-24 01:09:37.197773: step: 848/463, loss: 0.10963843762874603 2023-01-24 01:09:37.787320: step: 850/463, loss: 0.2471901923418045 2023-01-24 01:09:38.403485: step: 852/463, loss: 0.5380828976631165 2023-01-24 01:09:38.992196: step: 854/463, loss: 0.467693030834198 2023-01-24 01:09:39.598410: step: 856/463, loss: 3.8073570728302 2023-01-24 01:09:40.223569: step: 858/463, loss: 2.411452293395996 2023-01-24 01:09:40.865777: step: 860/463, loss: 0.27982455492019653 2023-01-24 01:09:41.452987: step: 862/463, loss: 2.2248191833496094 2023-01-24 01:09:42.072876: step: 864/463, loss: 0.22622354328632355 2023-01-24 01:09:42.640248: step: 866/463, loss: 0.14150923490524292 2023-01-24 01:09:43.354609: step: 868/463, loss: 0.22644901275634766 2023-01-24 01:09:43.958563: step: 870/463, loss: 0.16060954332351685 2023-01-24 01:09:44.573901: step: 872/463, loss: 0.22159768640995026 2023-01-24 01:09:45.217042: step: 874/463, loss: 0.2330881804227829 2023-01-24 01:09:45.885356: step: 876/463, loss: 0.16592679917812347 2023-01-24 01:09:46.514669: step: 878/463, loss: 0.7527386546134949 2023-01-24 01:09:47.166753: step: 880/463, loss: 0.17801988124847412 2023-01-24 01:09:47.785743: step: 882/463, loss: 0.25850164890289307 2023-01-24 01:09:48.439166: step: 884/463, loss: 0.8933775424957275 2023-01-24 01:09:49.084198: step: 886/463, loss: 0.28092578053474426 2023-01-24 01:09:49.741086: step: 888/463, loss: 0.8770626187324524 2023-01-24 01:09:50.377774: step: 890/463, loss: 0.2462228685617447 2023-01-24 01:09:51.049144: step: 892/463, loss: 0.4454179108142853 2023-01-24 01:09:51.660805: step: 894/463, loss: 0.22192177176475525 2023-01-24 01:09:52.266831: step: 896/463, loss: 0.19274753332138062 2023-01-24 01:09:52.879444: step: 898/463, loss: 0.3427952527999878 2023-01-24 01:09:53.519134: step: 900/463, loss: 0.3240050673484802 2023-01-24 01:09:54.073137: step: 902/463, loss: 0.08743232488632202 2023-01-24 01:09:54.678829: step: 904/463, loss: 0.3861238956451416 2023-01-24 01:09:55.252974: step: 906/463, loss: 0.3245295584201813 2023-01-24 01:09:55.861262: step: 908/463, loss: 0.11214976757764816 2023-01-24 01:09:56.542109: step: 910/463, loss: 0.8596938252449036 2023-01-24 01:09:57.126124: step: 912/463, loss: 0.5284637212753296 2023-01-24 01:09:57.721772: step: 914/463, loss: 0.30676794052124023 2023-01-24 01:09:58.378668: step: 916/463, loss: 0.15184001624584198 2023-01-24 01:09:58.996616: step: 918/463, loss: 0.591627299785614 2023-01-24 01:09:59.753339: step: 920/463, loss: 0.1877332627773285 2023-01-24 01:10:00.403055: step: 922/463, loss: 0.13615302741527557 2023-01-24 01:10:00.973642: step: 924/463, loss: 0.3489464521408081 2023-01-24 01:10:01.542257: step: 926/463, loss: 0.24088625609874725 ================================================== Loss: 0.372 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3293722635358964, 'r': 0.32999725834336485, 'f1': 0.3296844647335607}, 'combined': 0.2429253950668342, 'epoch': 9} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34823746738096467, 'r': 0.3475984995509078, 'f1': 0.34791769009227685}, 'combined': 0.2696778267700902, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3114145839242707, 'r': 0.3332786059455193, 'f1': 0.3219758484569911}, 'combined': 0.23724536202094082, 'epoch': 9} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3346211674046554, 'r': 0.3468895878411231, 'f1': 0.34064495079627843}, 'combined': 0.2640405838707996, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3116559167373173, 'r': 0.328214485368522, 'f1': 0.3197209497027932}, 'combined': 0.23558385767574233, 'epoch': 9} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34040347687543165, 'r': 0.3450965395161775, 'f1': 0.34273394339899693}, 'combined': 0.26565980301740433, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30514705882352944, 'r': 0.29642857142857143, 'f1': 0.3007246376811594}, 'combined': 0.20048309178743962, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34, 'r': 0.3695652173913043, 'f1': 0.3541666666666667}, 'combined': 0.17708333333333334, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36538461538461536, 'r': 0.16379310344827586, 'f1': 0.2261904761904762}, 'combined': 0.15079365079365079, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3664599181442166, 'r': 0.3122210687794179, 'f1': 0.33717316239088774}, 'combined': 0.2484433828143383, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3655563493147106, 'r': 0.32203773630105453, 'f1': 0.34241987150998204}, 'combined': 0.26541635973501004, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42857142857142855, 'r': 0.2571428571428571, 'f1': 0.3214285714285714}, 'combined': 0.21428571428571425, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3283040393999639, 'r': 0.31335281179920654, 'f1': 0.3206542365401589}, 'combined': 0.23627154271380127, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34970123997901775, 'r': 0.3199189915192662, 'f1': 0.3341478132367659}, 'combined': 0.2590045250926128, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38, 'r': 0.41304347826086957, 'f1': 0.39583333333333337}, 'combined': 0.19791666666666669, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32326799298325637, 'r': 0.29632899356798503, 'f1': 0.30921286285354954}, 'combined': 0.22784105683945755, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32287388038511977, 'r': 0.3347442436345727, 'f1': 0.32870192876752263}, 'combined': 0.2547833132073621, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.59375, 'r': 0.16379310344827586, 'f1': 0.25675675675675674}, 'combined': 0.17117117117117114, 'epoch': 5} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:12:35.822897: step: 2/463, loss: 0.16869837045669556 2023-01-24 01:12:36.428454: step: 4/463, loss: 0.3342270851135254 2023-01-24 01:12:37.051066: step: 6/463, loss: 0.5534235239028931 2023-01-24 01:12:37.603116: step: 8/463, loss: 0.8908038139343262 2023-01-24 01:12:38.219193: step: 10/463, loss: 0.20792478322982788 2023-01-24 01:12:38.810833: step: 12/463, loss: 0.11852412670850754 2023-01-24 01:12:39.581844: step: 14/463, loss: 0.26360851526260376 2023-01-24 01:12:40.191724: step: 16/463, loss: 0.5815234184265137 2023-01-24 01:12:40.774757: step: 18/463, loss: 0.4189456105232239 2023-01-24 01:12:41.421121: step: 20/463, loss: 0.11026723682880402 2023-01-24 01:12:42.032122: step: 22/463, loss: 0.19458189606666565 2023-01-24 01:12:42.645861: step: 24/463, loss: 0.15351428091526031 2023-01-24 01:12:43.251821: step: 26/463, loss: 0.1480802446603775 2023-01-24 01:12:43.909842: step: 28/463, loss: 0.12472604960203171 2023-01-24 01:12:44.598589: step: 30/463, loss: 0.03652859851717949 2023-01-24 01:12:45.163085: step: 32/463, loss: 0.6001203656196594 2023-01-24 01:12:45.799010: step: 34/463, loss: 0.6026164293289185 2023-01-24 01:12:46.356397: step: 36/463, loss: 0.07915201783180237 2023-01-24 01:12:46.978657: step: 38/463, loss: 0.18657277524471283 2023-01-24 01:12:47.546006: step: 40/463, loss: 0.1918850839138031 2023-01-24 01:12:48.057114: step: 42/463, loss: 0.11573538929224014 2023-01-24 01:12:48.681372: step: 44/463, loss: 1.5855425596237183 2023-01-24 01:12:49.284141: step: 46/463, loss: 0.026589874178171158 2023-01-24 01:12:49.909682: step: 48/463, loss: 0.4069761037826538 2023-01-24 01:12:50.506395: step: 50/463, loss: 0.319105863571167 2023-01-24 01:12:51.173804: step: 52/463, loss: 0.20788046717643738 2023-01-24 01:12:51.713865: step: 54/463, loss: 0.8110256195068359 2023-01-24 01:12:52.244005: step: 56/463, loss: 0.14753176271915436 2023-01-24 01:12:52.815022: step: 58/463, loss: 0.6654013395309448 2023-01-24 01:12:53.430413: step: 60/463, loss: 0.14762844145298004 2023-01-24 01:12:54.051046: step: 62/463, loss: 0.6875556111335754 2023-01-24 01:12:54.655760: step: 64/463, loss: 0.05807267874479294 2023-01-24 01:12:55.210172: step: 66/463, loss: 0.5644345283508301 2023-01-24 01:12:55.877532: step: 68/463, loss: 0.117078997194767 2023-01-24 01:12:56.532852: step: 70/463, loss: 0.31340864300727844 2023-01-24 01:12:57.176132: step: 72/463, loss: 0.3335522413253784 2023-01-24 01:12:57.777711: step: 74/463, loss: 0.5517826080322266 2023-01-24 01:12:58.301184: step: 76/463, loss: 1.1152862310409546 2023-01-24 01:12:58.853928: step: 78/463, loss: 0.11360624432563782 2023-01-24 01:12:59.485107: step: 80/463, loss: 0.38960960507392883 2023-01-24 01:13:00.103946: step: 82/463, loss: 0.3981383442878723 2023-01-24 01:13:00.731712: step: 84/463, loss: 0.21463234722614288 2023-01-24 01:13:01.403607: step: 86/463, loss: 0.18379442393779755 2023-01-24 01:13:02.055897: step: 88/463, loss: 0.14944233000278473 2023-01-24 01:13:02.665617: step: 90/463, loss: 0.5530070662498474 2023-01-24 01:13:03.239502: step: 92/463, loss: 2.2206058502197266 2023-01-24 01:13:03.843536: step: 94/463, loss: 0.21944405138492584 2023-01-24 01:13:04.447058: step: 96/463, loss: 0.3404669761657715 2023-01-24 01:13:05.060000: step: 98/463, loss: 0.88285893201828 2023-01-24 01:13:05.721222: step: 100/463, loss: 0.26726895570755005 2023-01-24 01:13:06.511028: step: 102/463, loss: 0.14909866452217102 2023-01-24 01:13:07.140818: step: 104/463, loss: 0.1025569811463356 2023-01-24 01:13:07.843566: step: 106/463, loss: 0.17497482895851135 2023-01-24 01:13:08.459148: step: 108/463, loss: 0.12367457151412964 2023-01-24 01:13:09.056558: step: 110/463, loss: 0.12136532366275787 2023-01-24 01:13:09.808468: step: 112/463, loss: 0.46946007013320923 2023-01-24 01:13:10.336746: step: 114/463, loss: 0.10456369817256927 2023-01-24 01:13:10.897637: step: 116/463, loss: 0.24857363104820251 2023-01-24 01:13:11.539230: step: 118/463, loss: 0.19830164313316345 2023-01-24 01:13:12.167486: step: 120/463, loss: 0.8026825785636902 2023-01-24 01:13:12.739763: step: 122/463, loss: 0.19431965053081512 2023-01-24 01:13:13.388183: step: 124/463, loss: 0.473052054643631 2023-01-24 01:13:14.013581: step: 126/463, loss: 0.23367270827293396 2023-01-24 01:13:14.600256: step: 128/463, loss: 0.1543530970811844 2023-01-24 01:13:15.163298: step: 130/463, loss: 0.04371014982461929 2023-01-24 01:13:15.809234: step: 132/463, loss: 0.39058345556259155 2023-01-24 01:13:16.413253: step: 134/463, loss: 0.11146162450313568 2023-01-24 01:13:16.992651: step: 136/463, loss: 0.15062418580055237 2023-01-24 01:13:17.647593: step: 138/463, loss: 0.14504064619541168 2023-01-24 01:13:18.205282: step: 140/463, loss: 0.11511898785829544 2023-01-24 01:13:18.841786: step: 142/463, loss: 0.5905314087867737 2023-01-24 01:13:19.485266: step: 144/463, loss: 1.299288272857666 2023-01-24 01:13:20.099944: step: 146/463, loss: 1.1239265203475952 2023-01-24 01:13:20.721496: step: 148/463, loss: 0.4312412440776825 2023-01-24 01:13:21.349661: step: 150/463, loss: 2.4450268745422363 2023-01-24 01:13:21.946462: step: 152/463, loss: 0.14248216152191162 2023-01-24 01:13:22.514616: step: 154/463, loss: 0.18731839954853058 2023-01-24 01:13:23.321402: step: 156/463, loss: 0.23078471422195435 2023-01-24 01:13:23.906413: step: 158/463, loss: 0.14505517482757568 2023-01-24 01:13:24.526435: step: 160/463, loss: 0.1900167167186737 2023-01-24 01:13:25.099794: step: 162/463, loss: 0.3618879020214081 2023-01-24 01:13:25.702622: step: 164/463, loss: 0.3488426208496094 2023-01-24 01:13:26.343819: step: 166/463, loss: 0.081473208963871 2023-01-24 01:13:26.976892: step: 168/463, loss: 0.17818912863731384 2023-01-24 01:13:27.576969: step: 170/463, loss: 0.8628257513046265 2023-01-24 01:13:28.189003: step: 172/463, loss: 0.2598787248134613 2023-01-24 01:13:28.808751: step: 174/463, loss: 0.17548741400241852 2023-01-24 01:13:29.419502: step: 176/463, loss: 0.2954922914505005 2023-01-24 01:13:30.058386: step: 178/463, loss: 0.15275795757770538 2023-01-24 01:13:30.646534: step: 180/463, loss: 0.3680973947048187 2023-01-24 01:13:31.337061: step: 182/463, loss: 0.34629032015800476 2023-01-24 01:13:31.953869: step: 184/463, loss: 0.15489205718040466 2023-01-24 01:13:32.581873: step: 186/463, loss: 0.25586605072021484 2023-01-24 01:13:33.249859: step: 188/463, loss: 0.25235360860824585 2023-01-24 01:13:33.843070: step: 190/463, loss: 0.3492814600467682 2023-01-24 01:13:34.477561: step: 192/463, loss: 0.7814868688583374 2023-01-24 01:13:35.098009: step: 194/463, loss: 0.1842203289270401 2023-01-24 01:13:35.741842: step: 196/463, loss: 0.23700620234012604 2023-01-24 01:13:36.411080: step: 198/463, loss: 0.15423765778541565 2023-01-24 01:13:37.054339: step: 200/463, loss: 0.5841436386108398 2023-01-24 01:13:37.730687: step: 202/463, loss: 0.43622252345085144 2023-01-24 01:13:38.299763: step: 204/463, loss: 0.18335981667041779 2023-01-24 01:13:38.865715: step: 206/463, loss: 0.13302578032016754 2023-01-24 01:13:39.472967: step: 208/463, loss: 0.08598335087299347 2023-01-24 01:13:40.121945: step: 210/463, loss: 0.13705244660377502 2023-01-24 01:13:40.741024: step: 212/463, loss: 0.24282808601856232 2023-01-24 01:13:41.380759: step: 214/463, loss: 0.07543016225099564 2023-01-24 01:13:41.963285: step: 216/463, loss: 0.26602137088775635 2023-01-24 01:13:42.527216: step: 218/463, loss: 0.18413710594177246 2023-01-24 01:13:43.172747: step: 220/463, loss: 0.5791482925415039 2023-01-24 01:13:43.784629: step: 222/463, loss: 0.16703982651233673 2023-01-24 01:13:44.435103: step: 224/463, loss: 0.14707669615745544 2023-01-24 01:13:45.079641: step: 226/463, loss: 0.20876748859882355 2023-01-24 01:13:45.751788: step: 228/463, loss: 0.23287977278232574 2023-01-24 01:13:46.368276: step: 230/463, loss: 0.20351676642894745 2023-01-24 01:13:47.001128: step: 232/463, loss: 0.897995114326477 2023-01-24 01:13:47.624963: step: 234/463, loss: 0.30385085940361023 2023-01-24 01:13:48.256045: step: 236/463, loss: 0.09378073364496231 2023-01-24 01:13:48.894288: step: 238/463, loss: 0.22015248239040375 2023-01-24 01:13:49.491890: step: 240/463, loss: 0.21884196996688843 2023-01-24 01:13:50.211912: step: 242/463, loss: 0.6067631244659424 2023-01-24 01:13:50.911190: step: 244/463, loss: 0.5320797562599182 2023-01-24 01:13:51.546771: step: 246/463, loss: 0.17964306473731995 2023-01-24 01:13:52.218276: step: 248/463, loss: 1.1467785835266113 2023-01-24 01:13:52.914200: step: 250/463, loss: 0.25201496481895447 2023-01-24 01:13:53.542202: step: 252/463, loss: 0.1855405867099762 2023-01-24 01:13:54.179598: step: 254/463, loss: 0.22568370401859283 2023-01-24 01:13:54.754866: step: 256/463, loss: 0.2871897518634796 2023-01-24 01:13:55.497993: step: 258/463, loss: 0.0970970019698143 2023-01-24 01:13:56.060508: step: 260/463, loss: 0.28054147958755493 2023-01-24 01:13:56.647690: step: 262/463, loss: 0.21849049627780914 2023-01-24 01:13:57.260994: step: 264/463, loss: 0.25020843744277954 2023-01-24 01:13:57.876991: step: 266/463, loss: 0.16708190739154816 2023-01-24 01:13:58.477051: step: 268/463, loss: 0.15879333019256592 2023-01-24 01:13:59.076931: step: 270/463, loss: 0.900672197341919 2023-01-24 01:13:59.758302: step: 272/463, loss: 0.14571386575698853 2023-01-24 01:14:00.486507: step: 274/463, loss: 0.4086513817310333 2023-01-24 01:14:01.164311: step: 276/463, loss: 0.45200464129447937 2023-01-24 01:14:01.710401: step: 278/463, loss: 0.27273863554000854 2023-01-24 01:14:02.340347: step: 280/463, loss: 0.3633576035499573 2023-01-24 01:14:03.021298: step: 282/463, loss: 0.15236657857894897 2023-01-24 01:14:03.632053: step: 284/463, loss: 0.23210710287094116 2023-01-24 01:14:04.248453: step: 286/463, loss: 0.2957319915294647 2023-01-24 01:14:04.894750: step: 288/463, loss: 0.19323796033859253 2023-01-24 01:14:05.506957: step: 290/463, loss: 0.21700216829776764 2023-01-24 01:14:06.076985: step: 292/463, loss: 0.14365844428539276 2023-01-24 01:14:06.748755: step: 294/463, loss: 0.17817026376724243 2023-01-24 01:14:07.328601: step: 296/463, loss: 0.25463876128196716 2023-01-24 01:14:07.943116: step: 298/463, loss: 0.10557904839515686 2023-01-24 01:14:08.589520: step: 300/463, loss: 0.2822621166706085 2023-01-24 01:14:09.214398: step: 302/463, loss: 0.1397036612033844 2023-01-24 01:14:09.825142: step: 304/463, loss: 0.2683257758617401 2023-01-24 01:14:10.483451: step: 306/463, loss: 0.2542690336704254 2023-01-24 01:14:11.155800: step: 308/463, loss: 0.263831228017807 2023-01-24 01:14:11.863536: step: 310/463, loss: 1.9460499286651611 2023-01-24 01:14:12.451434: step: 312/463, loss: 0.2618602514266968 2023-01-24 01:14:13.049924: step: 314/463, loss: 0.13222338259220123 2023-01-24 01:14:13.619750: step: 316/463, loss: 0.11337538808584213 2023-01-24 01:14:14.163221: step: 318/463, loss: 0.2254640758037567 2023-01-24 01:14:14.783388: step: 320/463, loss: 0.11492456495761871 2023-01-24 01:14:15.403366: step: 322/463, loss: 1.0367741584777832 2023-01-24 01:14:16.018752: step: 324/463, loss: 0.25470981001853943 2023-01-24 01:14:16.603621: step: 326/463, loss: 0.05796697363257408 2023-01-24 01:14:17.228760: step: 328/463, loss: 0.2255239188671112 2023-01-24 01:14:17.865385: step: 330/463, loss: 0.30796173214912415 2023-01-24 01:14:18.461620: step: 332/463, loss: 0.24017490446567535 2023-01-24 01:14:19.075239: step: 334/463, loss: 0.08183560520410538 2023-01-24 01:14:19.673916: step: 336/463, loss: 0.3996495008468628 2023-01-24 01:14:20.265812: step: 338/463, loss: 0.4513796269893646 2023-01-24 01:14:20.851055: step: 340/463, loss: 0.3590623438358307 2023-01-24 01:14:21.485351: step: 342/463, loss: 0.21036890149116516 2023-01-24 01:14:22.117848: step: 344/463, loss: 0.5369996428489685 2023-01-24 01:14:22.702609: step: 346/463, loss: 0.18285968899726868 2023-01-24 01:14:23.350861: step: 348/463, loss: 0.21790096163749695 2023-01-24 01:14:23.963037: step: 350/463, loss: 0.22410736978054047 2023-01-24 01:14:24.613816: step: 352/463, loss: 0.1446080207824707 2023-01-24 01:14:25.260860: step: 354/463, loss: 0.7221202254295349 2023-01-24 01:14:25.934486: step: 356/463, loss: 0.17447739839553833 2023-01-24 01:14:26.634342: step: 358/463, loss: 0.26540112495422363 2023-01-24 01:14:27.261155: step: 360/463, loss: 0.14820483326911926 2023-01-24 01:14:27.873797: step: 362/463, loss: 0.11222131550312042 2023-01-24 01:14:28.551985: step: 364/463, loss: 0.1932050883769989 2023-01-24 01:14:29.134475: step: 366/463, loss: 0.09613216668367386 2023-01-24 01:14:29.820386: step: 368/463, loss: 0.32115912437438965 2023-01-24 01:14:30.451047: step: 370/463, loss: 0.22972719371318817 2023-01-24 01:14:31.032105: step: 372/463, loss: 0.07318393886089325 2023-01-24 01:14:31.645644: step: 374/463, loss: 0.30374351143836975 2023-01-24 01:14:32.220250: step: 376/463, loss: 0.4182368814945221 2023-01-24 01:14:32.813435: step: 378/463, loss: 0.15355642139911652 2023-01-24 01:14:33.445291: step: 380/463, loss: 0.053889188915491104 2023-01-24 01:14:34.054380: step: 382/463, loss: 0.23191863298416138 2023-01-24 01:14:34.701593: step: 384/463, loss: 0.2678071856498718 2023-01-24 01:14:35.326082: step: 386/463, loss: 0.15507501363754272 2023-01-24 01:14:35.876469: step: 388/463, loss: 0.17070400714874268 2023-01-24 01:14:36.521608: step: 390/463, loss: 0.2675144672393799 2023-01-24 01:14:37.220166: step: 392/463, loss: 0.18658870458602905 2023-01-24 01:14:37.843964: step: 394/463, loss: 0.09802158176898956 2023-01-24 01:14:38.474781: step: 396/463, loss: 0.5530169010162354 2023-01-24 01:14:39.069389: step: 398/463, loss: 0.13036175072193146 2023-01-24 01:14:39.674962: step: 400/463, loss: 0.5727664232254028 2023-01-24 01:14:40.283668: step: 402/463, loss: 0.09940128028392792 2023-01-24 01:14:40.897845: step: 404/463, loss: 0.31986093521118164 2023-01-24 01:14:41.506655: step: 406/463, loss: 0.7030962109565735 2023-01-24 01:14:42.162065: step: 408/463, loss: 0.11078163981437683 2023-01-24 01:14:42.835672: step: 410/463, loss: 0.3519532084465027 2023-01-24 01:14:43.440031: step: 412/463, loss: 0.23051252961158752 2023-01-24 01:14:44.115583: step: 414/463, loss: 0.1121712401509285 2023-01-24 01:14:44.696417: step: 416/463, loss: 0.3549158573150635 2023-01-24 01:14:45.288454: step: 418/463, loss: 0.31530246138572693 2023-01-24 01:14:45.829266: step: 420/463, loss: 0.3870757520198822 2023-01-24 01:14:46.393020: step: 422/463, loss: 0.16960963606834412 2023-01-24 01:14:47.061658: step: 424/463, loss: 0.5314084887504578 2023-01-24 01:14:47.695403: step: 426/463, loss: 0.14330002665519714 2023-01-24 01:14:48.290673: step: 428/463, loss: 0.5325902700424194 2023-01-24 01:14:49.012395: step: 430/463, loss: 0.2526671886444092 2023-01-24 01:14:49.608875: step: 432/463, loss: 0.3225879669189453 2023-01-24 01:14:50.216741: step: 434/463, loss: 0.2309187948703766 2023-01-24 01:14:50.829456: step: 436/463, loss: 0.16792495548725128 2023-01-24 01:14:51.450598: step: 438/463, loss: 0.6884794235229492 2023-01-24 01:14:52.082423: step: 440/463, loss: 0.059158552438020706 2023-01-24 01:14:52.666481: step: 442/463, loss: 0.31807082891464233 2023-01-24 01:14:53.275915: step: 444/463, loss: 0.8425359725952148 2023-01-24 01:14:53.854703: step: 446/463, loss: 0.22956913709640503 2023-01-24 01:14:54.445675: step: 448/463, loss: 0.11970395594835281 2023-01-24 01:14:55.048806: step: 450/463, loss: 0.4807383120059967 2023-01-24 01:14:55.685655: step: 452/463, loss: 0.21160563826560974 2023-01-24 01:14:56.330150: step: 454/463, loss: 0.633194625377655 2023-01-24 01:14:56.940617: step: 456/463, loss: 0.1652614027261734 2023-01-24 01:14:57.592173: step: 458/463, loss: 0.13365940749645233 2023-01-24 01:14:58.252662: step: 460/463, loss: 0.22545641660690308 2023-01-24 01:14:58.879980: step: 462/463, loss: 0.6370010375976562 2023-01-24 01:14:59.481073: step: 464/463, loss: 0.08026273548603058 2023-01-24 01:15:00.144731: step: 466/463, loss: 0.5588064789772034 2023-01-24 01:15:00.773050: step: 468/463, loss: 0.9731622338294983 2023-01-24 01:15:01.433398: step: 470/463, loss: 0.12446781992912292 2023-01-24 01:15:02.067404: step: 472/463, loss: 0.45057204365730286 2023-01-24 01:15:02.738866: step: 474/463, loss: 0.0737568810582161 2023-01-24 01:15:03.345096: step: 476/463, loss: 0.43063193559646606 2023-01-24 01:15:03.959059: step: 478/463, loss: 0.46717628836631775 2023-01-24 01:15:04.601389: step: 480/463, loss: 0.22403323650360107 2023-01-24 01:15:05.160022: step: 482/463, loss: 0.7971420288085938 2023-01-24 01:15:05.733067: step: 484/463, loss: 0.10911144316196442 2023-01-24 01:15:06.358532: step: 486/463, loss: 0.3648187518119812 2023-01-24 01:15:06.963643: step: 488/463, loss: 0.1820109486579895 2023-01-24 01:15:07.625509: step: 490/463, loss: 0.05152883008122444 2023-01-24 01:15:08.273086: step: 492/463, loss: 0.3507932126522064 2023-01-24 01:15:08.970648: step: 494/463, loss: 0.980003833770752 2023-01-24 01:15:09.518799: step: 496/463, loss: 0.06188702583312988 2023-01-24 01:15:10.214533: step: 498/463, loss: 0.12341655045747757 2023-01-24 01:15:10.884165: step: 500/463, loss: 0.3014623522758484 2023-01-24 01:15:11.452818: step: 502/463, loss: 0.22810029983520508 2023-01-24 01:15:12.032138: step: 504/463, loss: 0.24539735913276672 2023-01-24 01:15:12.714290: step: 506/463, loss: 0.25342482328414917 2023-01-24 01:15:13.345956: step: 508/463, loss: 0.6143503785133362 2023-01-24 01:15:14.036406: step: 510/463, loss: 0.08440279960632324 2023-01-24 01:15:14.675197: step: 512/463, loss: 0.5813785791397095 2023-01-24 01:15:15.331814: step: 514/463, loss: 0.18750399351119995 2023-01-24 01:15:15.947564: step: 516/463, loss: 0.12383110076189041 2023-01-24 01:15:16.532227: step: 518/463, loss: 0.1619398444890976 2023-01-24 01:15:17.195808: step: 520/463, loss: 0.36592888832092285 2023-01-24 01:15:17.895090: step: 522/463, loss: 0.18676599860191345 2023-01-24 01:15:18.437440: step: 524/463, loss: 0.20253662765026093 2023-01-24 01:15:19.106187: step: 526/463, loss: 0.28952017426490784 2023-01-24 01:15:19.719122: step: 528/463, loss: 0.0837351381778717 2023-01-24 01:15:20.360213: step: 530/463, loss: 0.1996394693851471 2023-01-24 01:15:21.032594: step: 532/463, loss: 0.3002845346927643 2023-01-24 01:15:21.643150: step: 534/463, loss: 0.14645184576511383 2023-01-24 01:15:22.235136: step: 536/463, loss: 0.067990243434906 2023-01-24 01:15:22.841291: step: 538/463, loss: 0.4760712683200836 2023-01-24 01:15:23.424029: step: 540/463, loss: 0.5932899117469788 2023-01-24 01:15:23.972624: step: 542/463, loss: 0.16231553256511688 2023-01-24 01:15:24.588819: step: 544/463, loss: 0.8529051542282104 2023-01-24 01:15:25.178474: step: 546/463, loss: 0.13931874930858612 2023-01-24 01:15:25.803953: step: 548/463, loss: 0.13295228779315948 2023-01-24 01:15:26.401407: step: 550/463, loss: 0.43543460965156555 2023-01-24 01:15:27.029417: step: 552/463, loss: 0.6473884582519531 2023-01-24 01:15:27.640507: step: 554/463, loss: 0.6208295822143555 2023-01-24 01:15:28.279093: step: 556/463, loss: 0.32245805859565735 2023-01-24 01:15:28.879720: step: 558/463, loss: 1.770228385925293 2023-01-24 01:15:29.560016: step: 560/463, loss: 0.20916807651519775 2023-01-24 01:15:30.143146: step: 562/463, loss: 0.14589859545230865 2023-01-24 01:15:30.741989: step: 564/463, loss: 0.24990589916706085 2023-01-24 01:15:31.490497: step: 566/463, loss: 0.3019903898239136 2023-01-24 01:15:32.037330: step: 568/463, loss: 0.11575135588645935 2023-01-24 01:15:32.599438: step: 570/463, loss: 0.083357073366642 2023-01-24 01:15:33.160487: step: 572/463, loss: 0.05302351340651512 2023-01-24 01:15:33.764056: step: 574/463, loss: 0.2778109312057495 2023-01-24 01:15:34.364101: step: 576/463, loss: 0.05298285931348801 2023-01-24 01:15:34.958139: step: 578/463, loss: 0.8646448850631714 2023-01-24 01:15:35.515986: step: 580/463, loss: 0.5422811508178711 2023-01-24 01:15:36.077490: step: 582/463, loss: 0.14728735387325287 2023-01-24 01:15:36.674112: step: 584/463, loss: 0.187782421708107 2023-01-24 01:15:37.338548: step: 586/463, loss: 0.16480490565299988 2023-01-24 01:15:37.943914: step: 588/463, loss: 0.2632267475128174 2023-01-24 01:15:38.540723: step: 590/463, loss: 0.0823042243719101 2023-01-24 01:15:39.091949: step: 592/463, loss: 0.3487679064273834 2023-01-24 01:15:39.711939: step: 594/463, loss: 0.1836131364107132 2023-01-24 01:15:40.263622: step: 596/463, loss: 0.219617560505867 2023-01-24 01:15:40.874257: step: 598/463, loss: 0.760192334651947 2023-01-24 01:15:41.492226: step: 600/463, loss: 0.25891590118408203 2023-01-24 01:15:42.128388: step: 602/463, loss: 0.7730326652526855 2023-01-24 01:15:42.735417: step: 604/463, loss: 0.26593461632728577 2023-01-24 01:15:43.363380: step: 606/463, loss: 0.37019580602645874 2023-01-24 01:15:44.006943: step: 608/463, loss: 0.2653299868106842 2023-01-24 01:15:44.597983: step: 610/463, loss: 0.2394438236951828 2023-01-24 01:15:45.205278: step: 612/463, loss: 0.44029101729393005 2023-01-24 01:15:45.858774: step: 614/463, loss: 0.05722891539335251 2023-01-24 01:15:46.442771: step: 616/463, loss: 0.3484257161617279 2023-01-24 01:15:47.040925: step: 618/463, loss: 0.08210018277168274 2023-01-24 01:15:47.673261: step: 620/463, loss: 0.19306351244449615 2023-01-24 01:15:48.286797: step: 622/463, loss: 0.1366589218378067 2023-01-24 01:15:48.927683: step: 624/463, loss: 0.25840088725090027 2023-01-24 01:15:49.465877: step: 626/463, loss: 0.06171680614352226 2023-01-24 01:15:50.154395: step: 628/463, loss: 0.14395509660243988 2023-01-24 01:15:50.829088: step: 630/463, loss: 0.20327600836753845 2023-01-24 01:15:51.422205: step: 632/463, loss: 0.13372549414634705 2023-01-24 01:15:52.060613: step: 634/463, loss: 0.09817241132259369 2023-01-24 01:15:52.688691: step: 636/463, loss: 0.537545919418335 2023-01-24 01:15:53.345995: step: 638/463, loss: 1.0766065120697021 2023-01-24 01:15:54.009697: step: 640/463, loss: 0.10995373129844666 2023-01-24 01:15:54.630857: step: 642/463, loss: 0.154397651553154 2023-01-24 01:15:55.229885: step: 644/463, loss: 0.06673179566860199 2023-01-24 01:15:55.856450: step: 646/463, loss: 0.13693280518054962 2023-01-24 01:15:56.453960: step: 648/463, loss: 0.23969599604606628 2023-01-24 01:15:57.060358: step: 650/463, loss: 0.290406733751297 2023-01-24 01:15:57.680294: step: 652/463, loss: 0.7215564846992493 2023-01-24 01:15:58.321752: step: 654/463, loss: 0.9090859889984131 2023-01-24 01:15:58.892468: step: 656/463, loss: 0.24216076731681824 2023-01-24 01:15:59.496817: step: 658/463, loss: 0.14913325011730194 2023-01-24 01:16:00.232812: step: 660/463, loss: 0.1350444257259369 2023-01-24 01:16:00.881495: step: 662/463, loss: 0.09829533845186234 2023-01-24 01:16:01.526668: step: 664/463, loss: 0.05164027586579323 2023-01-24 01:16:02.109622: step: 666/463, loss: 0.9949164986610413 2023-01-24 01:16:02.730175: step: 668/463, loss: 0.28633543848991394 2023-01-24 01:16:03.326009: step: 670/463, loss: 0.10687119513750076 2023-01-24 01:16:03.920099: step: 672/463, loss: 0.14936110377311707 2023-01-24 01:16:04.533566: step: 674/463, loss: 0.2001819610595703 2023-01-24 01:16:05.122670: step: 676/463, loss: 0.275068461894989 2023-01-24 01:16:05.731992: step: 678/463, loss: 0.25014397501945496 2023-01-24 01:16:06.390814: step: 680/463, loss: 0.4746881127357483 2023-01-24 01:16:07.034295: step: 682/463, loss: 0.6380590200424194 2023-01-24 01:16:07.635501: step: 684/463, loss: 0.20501750707626343 2023-01-24 01:16:08.335201: step: 686/463, loss: 0.45265549421310425 2023-01-24 01:16:08.988496: step: 688/463, loss: 0.0979803279042244 2023-01-24 01:16:09.597799: step: 690/463, loss: 0.2585076689720154 2023-01-24 01:16:10.176024: step: 692/463, loss: 0.8459354639053345 2023-01-24 01:16:10.824477: step: 694/463, loss: 0.30131617188453674 2023-01-24 01:16:11.393048: step: 696/463, loss: 0.3636021316051483 2023-01-24 01:16:11.965345: step: 698/463, loss: 0.17327608168125153 2023-01-24 01:16:12.589599: step: 700/463, loss: 0.11873305588960648 2023-01-24 01:16:13.274937: step: 702/463, loss: 0.2941829562187195 2023-01-24 01:16:13.956931: step: 704/463, loss: 0.48770639300346375 2023-01-24 01:16:14.559103: step: 706/463, loss: 0.21048739552497864 2023-01-24 01:16:15.120085: step: 708/463, loss: 0.26141446828842163 2023-01-24 01:16:15.804693: step: 710/463, loss: 1.0029133558273315 2023-01-24 01:16:16.416945: step: 712/463, loss: 0.825661301612854 2023-01-24 01:16:17.028465: step: 714/463, loss: 0.27143594622612 2023-01-24 01:16:17.620078: step: 716/463, loss: 0.12954536080360413 2023-01-24 01:16:18.216417: step: 718/463, loss: 0.12344024330377579 2023-01-24 01:16:18.815207: step: 720/463, loss: 0.11319894343614578 2023-01-24 01:16:19.368028: step: 722/463, loss: 0.037569086998701096 2023-01-24 01:16:19.942854: step: 724/463, loss: 0.12392205744981766 2023-01-24 01:16:20.595018: step: 726/463, loss: 0.14336884021759033 2023-01-24 01:16:21.135258: step: 728/463, loss: 0.25060126185417175 2023-01-24 01:16:21.721597: step: 730/463, loss: 0.2917499244213104 2023-01-24 01:16:22.325139: step: 732/463, loss: 0.11173426359891891 2023-01-24 01:16:22.940094: step: 734/463, loss: 0.12138108164072037 2023-01-24 01:16:23.573255: step: 736/463, loss: 0.450734943151474 2023-01-24 01:16:24.125842: step: 738/463, loss: 0.9772469997406006 2023-01-24 01:16:24.761688: step: 740/463, loss: 2.3889918327331543 2023-01-24 01:16:25.365982: step: 742/463, loss: 0.15132781863212585 2023-01-24 01:16:25.966003: step: 744/463, loss: 0.16488295793533325 2023-01-24 01:16:26.524290: step: 746/463, loss: 0.11444897204637527 2023-01-24 01:16:27.159771: step: 748/463, loss: 0.18337833881378174 2023-01-24 01:16:27.758605: step: 750/463, loss: 0.2182713747024536 2023-01-24 01:16:28.378297: step: 752/463, loss: 0.29011327028274536 2023-01-24 01:16:29.034825: step: 754/463, loss: 0.08822859823703766 2023-01-24 01:16:29.637905: step: 756/463, loss: 0.14828428626060486 2023-01-24 01:16:30.206252: step: 758/463, loss: 0.22737431526184082 2023-01-24 01:16:30.756436: step: 760/463, loss: 0.107344850897789 2023-01-24 01:16:31.326112: step: 762/463, loss: 0.37095901370048523 2023-01-24 01:16:31.960894: step: 764/463, loss: 0.22544699907302856 2023-01-24 01:16:32.566748: step: 766/463, loss: 0.3135944902896881 2023-01-24 01:16:33.206053: step: 768/463, loss: 0.318268358707428 2023-01-24 01:16:33.784317: step: 770/463, loss: 0.8681970238685608 2023-01-24 01:16:34.406992: step: 772/463, loss: 0.17022764682769775 2023-01-24 01:16:34.994313: step: 774/463, loss: 0.12838152050971985 2023-01-24 01:16:35.695823: step: 776/463, loss: 0.10882436484098434 2023-01-24 01:16:36.350903: step: 778/463, loss: 0.10818567126989365 2023-01-24 01:16:37.049518: step: 780/463, loss: 0.14951752126216888 2023-01-24 01:16:37.701540: step: 782/463, loss: 0.6003298163414001 2023-01-24 01:16:38.454279: step: 784/463, loss: 2.19765567779541 2023-01-24 01:16:39.080478: step: 786/463, loss: 0.15947005152702332 2023-01-24 01:16:39.656758: step: 788/463, loss: 0.07240942120552063 2023-01-24 01:16:40.286901: step: 790/463, loss: 0.13658851385116577 2023-01-24 01:16:40.909444: step: 792/463, loss: 0.15842501819133759 2023-01-24 01:16:41.590012: step: 794/463, loss: 0.12694789469242096 2023-01-24 01:16:42.246327: step: 796/463, loss: 0.19413883984088898 2023-01-24 01:16:42.911154: step: 798/463, loss: 0.42260080575942993 2023-01-24 01:16:43.531154: step: 800/463, loss: 0.1677320897579193 2023-01-24 01:16:44.154425: step: 802/463, loss: 0.5036795139312744 2023-01-24 01:16:44.801249: step: 804/463, loss: 0.19367152452468872 2023-01-24 01:16:45.379888: step: 806/463, loss: 0.1507994681596756 2023-01-24 01:16:45.977875: step: 808/463, loss: 0.05353556573390961 2023-01-24 01:16:46.572763: step: 810/463, loss: 0.4089425504207611 2023-01-24 01:16:47.162445: step: 812/463, loss: 0.5463809370994568 2023-01-24 01:16:47.742807: step: 814/463, loss: 0.06974062323570251 2023-01-24 01:16:48.368308: step: 816/463, loss: 0.35333529114723206 2023-01-24 01:16:48.952655: step: 818/463, loss: 0.07739656418561935 2023-01-24 01:16:49.615427: step: 820/463, loss: 0.34687095880508423 2023-01-24 01:16:50.238755: step: 822/463, loss: 0.3159623146057129 2023-01-24 01:16:50.827998: step: 824/463, loss: 0.4063344895839691 2023-01-24 01:16:51.447611: step: 826/463, loss: 0.1810424029827118 2023-01-24 01:16:52.077799: step: 828/463, loss: 0.2012706995010376 2023-01-24 01:16:52.763694: step: 830/463, loss: 0.09158824384212494 2023-01-24 01:16:53.372870: step: 832/463, loss: 0.6169873476028442 2023-01-24 01:16:54.056024: step: 834/463, loss: 0.16272945702075958 2023-01-24 01:16:54.708131: step: 836/463, loss: 0.15982531011104584 2023-01-24 01:16:55.318128: step: 838/463, loss: 0.06779561191797256 2023-01-24 01:16:55.964701: step: 840/463, loss: 0.3595781624317169 2023-01-24 01:16:56.546406: step: 842/463, loss: 0.0791391134262085 2023-01-24 01:16:57.190155: step: 844/463, loss: 0.27549371123313904 2023-01-24 01:16:57.806850: step: 846/463, loss: 0.4131931960582733 2023-01-24 01:16:58.373015: step: 848/463, loss: 0.6292086243629456 2023-01-24 01:16:59.035244: step: 850/463, loss: 0.30800503492355347 2023-01-24 01:16:59.591882: step: 852/463, loss: 0.1196153312921524 2023-01-24 01:17:00.200781: step: 854/463, loss: 0.13580776751041412 2023-01-24 01:17:00.887861: step: 856/463, loss: 0.5255787968635559 2023-01-24 01:17:01.533643: step: 858/463, loss: 0.1318511813879013 2023-01-24 01:17:02.142162: step: 860/463, loss: 0.18214105069637299 2023-01-24 01:17:02.772199: step: 862/463, loss: 0.36088091135025024 2023-01-24 01:17:03.428727: step: 864/463, loss: 0.7797210216522217 2023-01-24 01:17:04.026372: step: 866/463, loss: 0.4265013337135315 2023-01-24 01:17:04.658651: step: 868/463, loss: 0.7816517949104309 2023-01-24 01:17:05.265821: step: 870/463, loss: 0.25371304154396057 2023-01-24 01:17:05.903636: step: 872/463, loss: 0.18142247200012207 2023-01-24 01:17:06.610112: step: 874/463, loss: 0.2577288746833801 2023-01-24 01:17:07.210476: step: 876/463, loss: 0.11698098480701447 2023-01-24 01:17:07.804519: step: 878/463, loss: 0.20003195106983185 2023-01-24 01:17:08.409257: step: 880/463, loss: 0.1782340556383133 2023-01-24 01:17:09.013564: step: 882/463, loss: 0.1248849630355835 2023-01-24 01:17:09.617948: step: 884/463, loss: 0.4253688454627991 2023-01-24 01:17:10.274128: step: 886/463, loss: 0.07649129629135132 2023-01-24 01:17:10.869275: step: 888/463, loss: 0.5954459309577942 2023-01-24 01:17:11.527360: step: 890/463, loss: 0.19676120579242706 2023-01-24 01:17:12.096779: step: 892/463, loss: 0.4825001358985901 2023-01-24 01:17:12.717944: step: 894/463, loss: 0.29770323634147644 2023-01-24 01:17:13.313547: step: 896/463, loss: 0.2951374650001526 2023-01-24 01:17:13.955512: step: 898/463, loss: 0.24668170511722565 2023-01-24 01:17:14.608383: step: 900/463, loss: 0.32290175557136536 2023-01-24 01:17:15.200218: step: 902/463, loss: 0.20828649401664734 2023-01-24 01:17:15.816057: step: 904/463, loss: 0.09835977107286453 2023-01-24 01:17:16.417148: step: 906/463, loss: 0.4512248933315277 2023-01-24 01:17:17.002992: step: 908/463, loss: 1.2370660305023193 2023-01-24 01:17:17.600943: step: 910/463, loss: 0.4330728054046631 2023-01-24 01:17:18.210289: step: 912/463, loss: 0.1754506230354309 2023-01-24 01:17:18.804004: step: 914/463, loss: 0.2189464271068573 2023-01-24 01:17:19.361975: step: 916/463, loss: 0.05806979164481163 2023-01-24 01:17:19.983815: step: 918/463, loss: 0.45422038435935974 2023-01-24 01:17:20.623512: step: 920/463, loss: 0.22595399618148804 2023-01-24 01:17:21.235572: step: 922/463, loss: 1.0573160648345947 2023-01-24 01:17:21.847488: step: 924/463, loss: 0.2549581527709961 2023-01-24 01:17:22.495542: step: 926/463, loss: 0.2506772577762604 ================================================== Loss: 0.327 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34530585106382977, 'r': 0.307957779886148, 'f1': 0.32556419257773317}, 'combined': 0.23988940505727707, 'epoch': 10} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.358390221933455, 'r': 0.3758485691416104, 'f1': 0.36691183779818043}, 'combined': 0.28440056326940305, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3208656068127354, 'r': 0.3092973970794489, 'f1': 0.31497532031085906}, 'combined': 0.23208707812379087, 'epoch': 10} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34835211498595176, 'r': 0.37876888973196776, 'f1': 0.3629243082592522}, 'combined': 0.2813097508995161, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3200210872115418, 'r': 0.30423257057491926, 'f1': 0.31192716866339}, 'combined': 0.2298410716467084, 'epoch': 10} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34436828342299075, 'r': 0.360194031742062, 'f1': 0.352103420067712}, 'combined': 0.2729222681864562, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26171875, 'r': 0.2392857142857143, 'f1': 0.25}, 'combined': 0.16666666666666666, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29838709677419356, 'r': 0.40217391304347827, 'f1': 0.34259259259259267}, 'combined': 0.17129629629629634, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32142857142857145, 'r': 0.15517241379310345, 'f1': 0.20930232558139536}, 'combined': 0.13953488372093023, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3664599181442166, 'r': 0.3122210687794179, 'f1': 0.33717316239088774}, 'combined': 0.2484433828143383, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3655563493147106, 'r': 0.32203773630105453, 'f1': 0.34241987150998204}, 'combined': 0.26541635973501004, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42857142857142855, 'r': 0.2571428571428571, 'f1': 0.3214285714285714}, 'combined': 0.21428571428571425, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3283040393999639, 'r': 0.31335281179920654, 'f1': 0.3206542365401589}, 'combined': 0.23627154271380127, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34970123997901775, 'r': 0.3199189915192662, 'f1': 0.3341478132367659}, 'combined': 0.2590045250926128, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38, 'r': 0.41304347826086957, 'f1': 0.39583333333333337}, 'combined': 0.19791666666666669, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32326799298325637, 'r': 0.29632899356798503, 'f1': 0.30921286285354954}, 'combined': 0.22784105683945755, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32287388038511977, 'r': 0.3347442436345727, 'f1': 0.32870192876752263}, 'combined': 0.2547833132073621, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.59375, 'r': 0.16379310344827586, 'f1': 0.25675675675675674}, 'combined': 0.17117117117117114, 'epoch': 5} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:19:59.043224: step: 2/463, loss: 0.38464900851249695 2023-01-24 01:19:59.720221: step: 4/463, loss: 0.24363644421100616 2023-01-24 01:20:00.406278: step: 6/463, loss: 0.12040785700082779 2023-01-24 01:20:01.053260: step: 8/463, loss: 0.09246577322483063 2023-01-24 01:20:01.653161: step: 10/463, loss: 0.3931816518306732 2023-01-24 01:20:02.234642: step: 12/463, loss: 0.2293281853199005 2023-01-24 01:20:02.819915: step: 14/463, loss: 0.13477280735969543 2023-01-24 01:20:03.474771: step: 16/463, loss: 0.22993196547031403 2023-01-24 01:20:04.205367: step: 18/463, loss: 0.16220282018184662 2023-01-24 01:20:04.820537: step: 20/463, loss: 0.07360216230154037 2023-01-24 01:20:05.472126: step: 22/463, loss: 0.12905588746070862 2023-01-24 01:20:06.072863: step: 24/463, loss: 0.2561604976654053 2023-01-24 01:20:06.641987: step: 26/463, loss: 0.6398051977157593 2023-01-24 01:20:07.300304: step: 28/463, loss: 0.3928891122341156 2023-01-24 01:20:07.903358: step: 30/463, loss: 0.21045248210430145 2023-01-24 01:20:08.555187: step: 32/463, loss: 0.5834149718284607 2023-01-24 01:20:09.143242: step: 34/463, loss: 0.29112717509269714 2023-01-24 01:20:09.699449: step: 36/463, loss: 0.3674471378326416 2023-01-24 01:20:10.319077: step: 38/463, loss: 0.16936175525188446 2023-01-24 01:20:10.893754: step: 40/463, loss: 0.18998980522155762 2023-01-24 01:20:11.489632: step: 42/463, loss: 0.12141333520412445 2023-01-24 01:20:12.076885: step: 44/463, loss: 0.4282970130443573 2023-01-24 01:20:12.704886: step: 46/463, loss: 0.5694759488105774 2023-01-24 01:20:13.322690: step: 48/463, loss: 0.24875542521476746 2023-01-24 01:20:13.899120: step: 50/463, loss: 0.17153924703598022 2023-01-24 01:20:14.513912: step: 52/463, loss: 0.07150880247354507 2023-01-24 01:20:15.144652: step: 54/463, loss: 0.862336277961731 2023-01-24 01:20:15.770177: step: 56/463, loss: 0.41236478090286255 2023-01-24 01:20:16.348423: step: 58/463, loss: 0.14219239354133606 2023-01-24 01:20:17.095954: step: 60/463, loss: 0.11710482835769653 2023-01-24 01:20:17.782610: step: 62/463, loss: 0.10768640786409378 2023-01-24 01:20:18.384851: step: 64/463, loss: 0.17711031436920166 2023-01-24 01:20:18.915912: step: 66/463, loss: 0.14531758427619934 2023-01-24 01:20:19.559692: step: 68/463, loss: 1.847318410873413 2023-01-24 01:20:20.167590: step: 70/463, loss: 0.1776900440454483 2023-01-24 01:20:20.790384: step: 72/463, loss: 0.32158163189888 2023-01-24 01:20:21.421384: step: 74/463, loss: 0.5123260617256165 2023-01-24 01:20:21.990561: step: 76/463, loss: 0.7484617829322815 2023-01-24 01:20:22.587341: step: 78/463, loss: 0.35480397939682007 2023-01-24 01:20:23.200680: step: 80/463, loss: 0.2403063327074051 2023-01-24 01:20:23.811264: step: 82/463, loss: 0.18592043220996857 2023-01-24 01:20:24.406386: step: 84/463, loss: 0.25290006399154663 2023-01-24 01:20:25.026847: step: 86/463, loss: 0.21364113688468933 2023-01-24 01:20:25.634618: step: 88/463, loss: 0.3772701025009155 2023-01-24 01:20:26.295776: step: 90/463, loss: 0.2814018428325653 2023-01-24 01:20:26.879327: step: 92/463, loss: 0.14555954933166504 2023-01-24 01:20:27.476566: step: 94/463, loss: 0.16270212829113007 2023-01-24 01:20:28.067386: step: 96/463, loss: 0.2507583498954773 2023-01-24 01:20:28.644975: step: 98/463, loss: 0.05216884985566139 2023-01-24 01:20:29.181765: step: 100/463, loss: 0.202926903963089 2023-01-24 01:20:29.810491: step: 102/463, loss: 0.09507398307323456 2023-01-24 01:20:30.439061: step: 104/463, loss: 0.13441112637519836 2023-01-24 01:20:31.098401: step: 106/463, loss: 1.1544842720031738 2023-01-24 01:20:31.719478: step: 108/463, loss: 0.09096304327249527 2023-01-24 01:20:32.320376: step: 110/463, loss: 0.165726438164711 2023-01-24 01:20:32.940871: step: 112/463, loss: 0.2630023956298828 2023-01-24 01:20:33.567918: step: 114/463, loss: 0.3290812373161316 2023-01-24 01:20:34.180419: step: 116/463, loss: 0.47309359908103943 2023-01-24 01:20:34.791200: step: 118/463, loss: 0.8138670921325684 2023-01-24 01:20:35.398102: step: 120/463, loss: 0.595111608505249 2023-01-24 01:20:36.083295: step: 122/463, loss: 0.1392076462507248 2023-01-24 01:20:36.710489: step: 124/463, loss: 0.2151304930448532 2023-01-24 01:20:37.366567: step: 126/463, loss: 0.1979188323020935 2023-01-24 01:20:37.946140: step: 128/463, loss: 0.2573375105857849 2023-01-24 01:20:38.584138: step: 130/463, loss: 0.15503670275211334 2023-01-24 01:20:39.143233: step: 132/463, loss: 0.06725090742111206 2023-01-24 01:20:39.713961: step: 134/463, loss: 0.36167651414871216 2023-01-24 01:20:40.337188: step: 136/463, loss: 0.14665812253952026 2023-01-24 01:20:40.970369: step: 138/463, loss: 0.062385838478803635 2023-01-24 01:20:41.549288: step: 140/463, loss: 0.07878714054822922 2023-01-24 01:20:42.142240: step: 142/463, loss: 0.1546541303396225 2023-01-24 01:20:42.837180: step: 144/463, loss: 0.175470232963562 2023-01-24 01:20:43.482767: step: 146/463, loss: 0.07201854139566422 2023-01-24 01:20:44.069303: step: 148/463, loss: 0.26908040046691895 2023-01-24 01:20:44.625052: step: 150/463, loss: 0.20902375876903534 2023-01-24 01:20:45.231691: step: 152/463, loss: 0.32269707322120667 2023-01-24 01:20:45.835125: step: 154/463, loss: 0.07607974112033844 2023-01-24 01:20:46.374163: step: 156/463, loss: 0.23623090982437134 2023-01-24 01:20:46.972307: step: 158/463, loss: 0.1768122762441635 2023-01-24 01:20:47.567196: step: 160/463, loss: 0.11020787060260773 2023-01-24 01:20:48.178512: step: 162/463, loss: 0.0947912409901619 2023-01-24 01:20:48.783714: step: 164/463, loss: 0.31853047013282776 2023-01-24 01:20:49.372087: step: 166/463, loss: 0.12231225520372391 2023-01-24 01:20:50.066782: step: 168/463, loss: 0.11263152956962585 2023-01-24 01:20:50.681243: step: 170/463, loss: 0.026019282639026642 2023-01-24 01:20:51.237329: step: 172/463, loss: 0.18614421784877777 2023-01-24 01:20:51.895660: step: 174/463, loss: 0.18698790669441223 2023-01-24 01:20:52.486005: step: 176/463, loss: 0.28849923610687256 2023-01-24 01:20:53.079101: step: 178/463, loss: 0.08335006237030029 2023-01-24 01:20:53.747089: step: 180/463, loss: 0.15389671921730042 2023-01-24 01:20:54.345709: step: 182/463, loss: 0.2285962849855423 2023-01-24 01:20:54.904846: step: 184/463, loss: 0.08953993767499924 2023-01-24 01:20:55.548423: step: 186/463, loss: 0.09249279648065567 2023-01-24 01:20:56.248160: step: 188/463, loss: 0.07478576898574829 2023-01-24 01:20:56.922451: step: 190/463, loss: 0.17967252433300018 2023-01-24 01:20:57.591984: step: 192/463, loss: 0.12968912720680237 2023-01-24 01:20:58.229824: step: 194/463, loss: 0.11943177878856659 2023-01-24 01:20:58.840603: step: 196/463, loss: 0.18584540486335754 2023-01-24 01:20:59.405191: step: 198/463, loss: 0.17804957926273346 2023-01-24 01:21:00.029975: step: 200/463, loss: 0.469115287065506 2023-01-24 01:21:00.647131: step: 202/463, loss: 0.19804894924163818 2023-01-24 01:21:01.276862: step: 204/463, loss: 0.05510970577597618 2023-01-24 01:21:01.857958: step: 206/463, loss: 0.3034987449645996 2023-01-24 01:21:02.463478: step: 208/463, loss: 0.0651090145111084 2023-01-24 01:21:03.038605: step: 210/463, loss: 0.0950593501329422 2023-01-24 01:21:03.669673: step: 212/463, loss: 0.3335636854171753 2023-01-24 01:21:04.252734: step: 214/463, loss: 1.0063867568969727 2023-01-24 01:21:04.907325: step: 216/463, loss: 0.1597270667552948 2023-01-24 01:21:05.480332: step: 218/463, loss: 0.18164093792438507 2023-01-24 01:21:06.112724: step: 220/463, loss: 0.13767415285110474 2023-01-24 01:21:06.731791: step: 222/463, loss: 0.031589508056640625 2023-01-24 01:21:07.418286: step: 224/463, loss: 0.34365224838256836 2023-01-24 01:21:08.068608: step: 226/463, loss: 0.0505225732922554 2023-01-24 01:21:08.649746: step: 228/463, loss: 0.5094326734542847 2023-01-24 01:21:09.210056: step: 230/463, loss: 0.1921921670436859 2023-01-24 01:21:09.812535: step: 232/463, loss: 0.17939475178718567 2023-01-24 01:21:10.404743: step: 234/463, loss: 0.07661590725183487 2023-01-24 01:21:11.025681: step: 236/463, loss: 0.5066153407096863 2023-01-24 01:21:11.628650: step: 238/463, loss: 0.28491726517677307 2023-01-24 01:21:12.285698: step: 240/463, loss: 0.5721750855445862 2023-01-24 01:21:12.914475: step: 242/463, loss: 0.20861269533634186 2023-01-24 01:21:13.537746: step: 244/463, loss: 0.37435194849967957 2023-01-24 01:21:14.192180: step: 246/463, loss: 0.09008286148309708 2023-01-24 01:21:14.792807: step: 248/463, loss: 0.10733573883771896 2023-01-24 01:21:15.340464: step: 250/463, loss: 0.05971158295869827 2023-01-24 01:21:16.002425: step: 252/463, loss: 0.06712017953395844 2023-01-24 01:21:16.673023: step: 254/463, loss: 0.149140402674675 2023-01-24 01:21:17.290625: step: 256/463, loss: 0.15263497829437256 2023-01-24 01:21:17.910811: step: 258/463, loss: 0.9096626043319702 2023-01-24 01:21:18.513893: step: 260/463, loss: 0.09592042118310928 2023-01-24 01:21:19.116111: step: 262/463, loss: 0.3063996732234955 2023-01-24 01:21:19.709186: step: 264/463, loss: 0.3671858310699463 2023-01-24 01:21:20.400017: step: 266/463, loss: 0.16143399477005005 2023-01-24 01:21:21.025686: step: 268/463, loss: 0.05929265916347504 2023-01-24 01:21:21.659019: step: 270/463, loss: 0.1495087444782257 2023-01-24 01:21:22.284393: step: 272/463, loss: 0.09115415066480637 2023-01-24 01:21:22.908104: step: 274/463, loss: 0.30453792214393616 2023-01-24 01:21:23.491687: step: 276/463, loss: 0.38079485297203064 2023-01-24 01:21:24.242593: step: 278/463, loss: 0.342390775680542 2023-01-24 01:21:24.860005: step: 280/463, loss: 0.2871167063713074 2023-01-24 01:21:25.580709: step: 282/463, loss: 0.6299222111701965 2023-01-24 01:21:26.286044: step: 284/463, loss: 0.18496885895729065 2023-01-24 01:21:26.923906: step: 286/463, loss: 0.5490970015525818 2023-01-24 01:21:27.582875: step: 288/463, loss: 0.10340451449155807 2023-01-24 01:21:28.255977: step: 290/463, loss: 0.16580542922019958 2023-01-24 01:21:28.915945: step: 292/463, loss: 0.13065865635871887 2023-01-24 01:21:29.502862: step: 294/463, loss: 0.10927838832139969 2023-01-24 01:21:30.107629: step: 296/463, loss: 0.1008647158741951 2023-01-24 01:21:30.681996: step: 298/463, loss: 0.11493542045354843 2023-01-24 01:21:31.372757: step: 300/463, loss: 0.18604464828968048 2023-01-24 01:21:31.922136: step: 302/463, loss: 0.1425403505563736 2023-01-24 01:21:32.560404: step: 304/463, loss: 0.03212430328130722 2023-01-24 01:21:33.197227: step: 306/463, loss: 0.34877368807792664 2023-01-24 01:21:33.793363: step: 308/463, loss: 0.15047882497310638 2023-01-24 01:21:34.444865: step: 310/463, loss: 0.07522471249103546 2023-01-24 01:21:35.042549: step: 312/463, loss: 0.49591168761253357 2023-01-24 01:21:35.681236: step: 314/463, loss: 0.5783676505088806 2023-01-24 01:21:36.313261: step: 316/463, loss: 0.1600651890039444 2023-01-24 01:21:36.918227: step: 318/463, loss: 0.4939728379249573 2023-01-24 01:21:37.488594: step: 320/463, loss: 0.07901965081691742 2023-01-24 01:21:38.078421: step: 322/463, loss: 0.13412442803382874 2023-01-24 01:21:38.663303: step: 324/463, loss: 0.8046720027923584 2023-01-24 01:21:39.256443: step: 326/463, loss: 0.24747377634048462 2023-01-24 01:21:39.880987: step: 328/463, loss: 0.12344200909137726 2023-01-24 01:21:40.522006: step: 330/463, loss: 0.05452829971909523 2023-01-24 01:21:41.096922: step: 332/463, loss: 0.23342633247375488 2023-01-24 01:21:41.730686: step: 334/463, loss: 0.15630567073822021 2023-01-24 01:21:42.290143: step: 336/463, loss: 0.10500635951757431 2023-01-24 01:21:42.904846: step: 338/463, loss: 0.3273847997188568 2023-01-24 01:21:43.536362: step: 340/463, loss: 0.06708574295043945 2023-01-24 01:21:44.247398: step: 342/463, loss: 0.3618435859680176 2023-01-24 01:21:44.901146: step: 344/463, loss: 0.1889757513999939 2023-01-24 01:21:45.476480: step: 346/463, loss: 0.0756753608584404 2023-01-24 01:21:46.095726: step: 348/463, loss: 0.08635926991701126 2023-01-24 01:21:46.738587: step: 350/463, loss: 0.7102369070053101 2023-01-24 01:21:47.382469: step: 352/463, loss: 0.08359764516353607 2023-01-24 01:21:48.021871: step: 354/463, loss: 0.07731806486845016 2023-01-24 01:21:48.593506: step: 356/463, loss: 0.24834422767162323 2023-01-24 01:21:49.208188: step: 358/463, loss: 0.12789073586463928 2023-01-24 01:21:49.916495: step: 360/463, loss: 0.3180234730243683 2023-01-24 01:21:50.469399: step: 362/463, loss: 0.10265457630157471 2023-01-24 01:21:51.102028: step: 364/463, loss: 0.07504995912313461 2023-01-24 01:21:51.682494: step: 366/463, loss: 0.06866217404603958 2023-01-24 01:21:52.357563: step: 368/463, loss: 0.07730881869792938 2023-01-24 01:21:52.947669: step: 370/463, loss: 0.6829435229301453 2023-01-24 01:21:53.534682: step: 372/463, loss: 0.114606574177742 2023-01-24 01:21:54.138937: step: 374/463, loss: 0.17876236140727997 2023-01-24 01:21:54.704379: step: 376/463, loss: 0.07609118521213531 2023-01-24 01:21:55.308143: step: 378/463, loss: 0.13067157566547394 2023-01-24 01:21:55.932930: step: 380/463, loss: 0.12436798214912415 2023-01-24 01:21:56.513405: step: 382/463, loss: 0.07902736961841583 2023-01-24 01:21:57.221410: step: 384/463, loss: 0.8428719639778137 2023-01-24 01:21:57.776021: step: 386/463, loss: 0.12940949201583862 2023-01-24 01:21:58.329370: step: 388/463, loss: 0.5174376964569092 2023-01-24 01:21:58.927641: step: 390/463, loss: 0.08864948898553848 2023-01-24 01:21:59.557730: step: 392/463, loss: 0.1381201446056366 2023-01-24 01:22:00.159147: step: 394/463, loss: 0.2679230868816376 2023-01-24 01:22:00.859331: step: 396/463, loss: 0.12718528509140015 2023-01-24 01:22:01.541416: step: 398/463, loss: 0.3463614881038666 2023-01-24 01:22:02.120944: step: 400/463, loss: 0.33551958203315735 2023-01-24 01:22:02.758060: step: 402/463, loss: 0.1317533254623413 2023-01-24 01:22:03.403254: step: 404/463, loss: 0.2918298840522766 2023-01-24 01:22:04.084678: step: 406/463, loss: 0.1276000589132309 2023-01-24 01:22:04.644334: step: 408/463, loss: 0.16821874678134918 2023-01-24 01:22:05.232784: step: 410/463, loss: 0.07860761135816574 2023-01-24 01:22:05.793171: step: 412/463, loss: 0.11053800582885742 2023-01-24 01:22:06.457616: step: 414/463, loss: 0.49289923906326294 2023-01-24 01:22:07.097099: step: 416/463, loss: 0.7267265915870667 2023-01-24 01:22:07.794849: step: 418/463, loss: 0.24886372685432434 2023-01-24 01:22:08.396334: step: 420/463, loss: 0.1400095373392105 2023-01-24 01:22:08.974532: step: 422/463, loss: 0.4976447820663452 2023-01-24 01:22:09.538149: step: 424/463, loss: 0.1688401997089386 2023-01-24 01:22:10.131255: step: 426/463, loss: 0.22347904741764069 2023-01-24 01:22:10.744125: step: 428/463, loss: 0.640465259552002 2023-01-24 01:22:11.368639: step: 430/463, loss: 0.3505455255508423 2023-01-24 01:22:11.967405: step: 432/463, loss: 0.27340126037597656 2023-01-24 01:22:12.589938: step: 434/463, loss: 0.11721798777580261 2023-01-24 01:22:13.180682: step: 436/463, loss: 0.07473170757293701 2023-01-24 01:22:13.711214: step: 438/463, loss: 0.04971243068575859 2023-01-24 01:22:14.278088: step: 440/463, loss: 0.19187520444393158 2023-01-24 01:22:14.907449: step: 442/463, loss: 0.4347236454486847 2023-01-24 01:22:15.504079: step: 444/463, loss: 0.10797803103923798 2023-01-24 01:22:16.119471: step: 446/463, loss: 0.09673857688903809 2023-01-24 01:22:16.721516: step: 448/463, loss: 0.1593562811613083 2023-01-24 01:22:17.409465: step: 450/463, loss: 0.15798698365688324 2023-01-24 01:22:18.045943: step: 452/463, loss: 0.6502160429954529 2023-01-24 01:22:18.650568: step: 454/463, loss: 0.2936711609363556 2023-01-24 01:22:19.270263: step: 456/463, loss: 0.24623417854309082 2023-01-24 01:22:19.947291: step: 458/463, loss: 0.031187953427433968 2023-01-24 01:22:20.529185: step: 460/463, loss: 0.1089807003736496 2023-01-24 01:22:21.256432: step: 462/463, loss: 0.1410052627325058 2023-01-24 01:22:21.908041: step: 464/463, loss: 0.5307961106300354 2023-01-24 01:22:22.555448: step: 466/463, loss: 1.121724247932434 2023-01-24 01:22:23.166529: step: 468/463, loss: 0.19724996387958527 2023-01-24 01:22:23.833608: step: 470/463, loss: 0.11353181302547455 2023-01-24 01:22:24.476957: step: 472/463, loss: 0.19163569808006287 2023-01-24 01:22:25.075434: step: 474/463, loss: 0.20167407393455505 2023-01-24 01:22:25.697999: step: 476/463, loss: 0.07260171324014664 2023-01-24 01:22:26.310758: step: 478/463, loss: 0.29350942373275757 2023-01-24 01:22:26.934691: step: 480/463, loss: 0.513546884059906 2023-01-24 01:22:27.499005: step: 482/463, loss: 0.0812089666724205 2023-01-24 01:22:28.063430: step: 484/463, loss: 0.2336966097354889 2023-01-24 01:22:28.636783: step: 486/463, loss: 0.13227874040603638 2023-01-24 01:22:29.218687: step: 488/463, loss: 0.12969566881656647 2023-01-24 01:22:29.807931: step: 490/463, loss: 0.2063014656305313 2023-01-24 01:22:30.421683: step: 492/463, loss: 0.19133536517620087 2023-01-24 01:22:31.055708: step: 494/463, loss: 0.4032283425331116 2023-01-24 01:22:31.700996: step: 496/463, loss: 0.12233567237854004 2023-01-24 01:22:32.211507: step: 498/463, loss: 0.2952040135860443 2023-01-24 01:22:32.802337: step: 500/463, loss: 0.2888779938220978 2023-01-24 01:22:33.386661: step: 502/463, loss: 0.6097636222839355 2023-01-24 01:22:34.083238: step: 504/463, loss: 0.14834658801555634 2023-01-24 01:22:34.691825: step: 506/463, loss: 0.15530027449131012 2023-01-24 01:22:35.301084: step: 508/463, loss: 0.16488200426101685 2023-01-24 01:22:35.900066: step: 510/463, loss: 0.03965010866522789 2023-01-24 01:22:36.418857: step: 512/463, loss: 0.7460254430770874 2023-01-24 01:22:37.038421: step: 514/463, loss: 0.09015417844057083 2023-01-24 01:22:37.624445: step: 516/463, loss: 0.09670256078243256 2023-01-24 01:22:38.232063: step: 518/463, loss: 0.5697557330131531 2023-01-24 01:22:38.854478: step: 520/463, loss: 0.10155784338712692 2023-01-24 01:22:39.421769: step: 522/463, loss: 0.17859096825122833 2023-01-24 01:22:40.018108: step: 524/463, loss: 0.25560787320137024 2023-01-24 01:22:40.612507: step: 526/463, loss: 0.153215229511261 2023-01-24 01:22:41.272080: step: 528/463, loss: 0.23221251368522644 2023-01-24 01:22:41.882117: step: 530/463, loss: 0.1168912872672081 2023-01-24 01:22:42.481685: step: 532/463, loss: 0.19208881258964539 2023-01-24 01:22:43.099154: step: 534/463, loss: 0.2637074887752533 2023-01-24 01:22:43.760379: step: 536/463, loss: 0.15540830790996552 2023-01-24 01:22:44.347361: step: 538/463, loss: 0.3781346082687378 2023-01-24 01:22:44.982629: step: 540/463, loss: 0.14690083265304565 2023-01-24 01:22:45.571765: step: 542/463, loss: 0.256965696811676 2023-01-24 01:22:46.180511: step: 544/463, loss: 0.15526212751865387 2023-01-24 01:22:46.829887: step: 546/463, loss: 0.1643633097410202 2023-01-24 01:22:47.429748: step: 548/463, loss: 0.12368601560592651 2023-01-24 01:22:48.108898: step: 550/463, loss: 0.6030208468437195 2023-01-24 01:22:48.700057: step: 552/463, loss: 0.2335471659898758 2023-01-24 01:22:49.254349: step: 554/463, loss: 0.13969428837299347 2023-01-24 01:22:49.892415: step: 556/463, loss: 0.2825043797492981 2023-01-24 01:22:50.509390: step: 558/463, loss: 0.29415857791900635 2023-01-24 01:22:51.157715: step: 560/463, loss: 0.3851672410964966 2023-01-24 01:22:51.769616: step: 562/463, loss: 0.1347467005252838 2023-01-24 01:22:52.436159: step: 564/463, loss: 0.2585058808326721 2023-01-24 01:22:53.149830: step: 566/463, loss: 0.1628204882144928 2023-01-24 01:22:53.742952: step: 568/463, loss: 0.2897280752658844 2023-01-24 01:22:54.390369: step: 570/463, loss: 0.18531332910060883 2023-01-24 01:22:55.022515: step: 572/463, loss: 0.09126812219619751 2023-01-24 01:22:55.648952: step: 574/463, loss: 0.2615398168563843 2023-01-24 01:22:56.297835: step: 576/463, loss: 0.1360349804162979 2023-01-24 01:22:56.928022: step: 578/463, loss: 0.2566956877708435 2023-01-24 01:22:57.527263: step: 580/463, loss: 0.24119022488594055 2023-01-24 01:22:58.150932: step: 582/463, loss: 0.1254299432039261 2023-01-24 01:22:58.739151: step: 584/463, loss: 0.10666186362504959 2023-01-24 01:22:59.355867: step: 586/463, loss: 0.10470999777317047 2023-01-24 01:22:59.944453: step: 588/463, loss: 1.404681921005249 2023-01-24 01:23:00.579960: step: 590/463, loss: 0.06586433202028275 2023-01-24 01:23:01.292764: step: 592/463, loss: 0.8000977635383606 2023-01-24 01:23:01.879372: step: 594/463, loss: 0.6390179395675659 2023-01-24 01:23:02.479383: step: 596/463, loss: 0.9234566688537598 2023-01-24 01:23:03.220603: step: 598/463, loss: 0.15058062970638275 2023-01-24 01:23:03.883596: step: 600/463, loss: 0.146126389503479 2023-01-24 01:23:04.532237: step: 602/463, loss: 0.11970563232898712 2023-01-24 01:23:05.141357: step: 604/463, loss: 0.1082342341542244 2023-01-24 01:23:05.731666: step: 606/463, loss: 0.1443565934896469 2023-01-24 01:23:06.376935: step: 608/463, loss: 0.5981305837631226 2023-01-24 01:23:07.006088: step: 610/463, loss: 0.05745577812194824 2023-01-24 01:23:07.581218: step: 612/463, loss: 0.24875128269195557 2023-01-24 01:23:08.173127: step: 614/463, loss: 0.09177330881357193 2023-01-24 01:23:08.823324: step: 616/463, loss: 0.09690413624048233 2023-01-24 01:23:09.533295: step: 618/463, loss: 0.18338355422019958 2023-01-24 01:23:10.171820: step: 620/463, loss: 0.13161371648311615 2023-01-24 01:23:10.778263: step: 622/463, loss: 0.13530658185482025 2023-01-24 01:23:11.395632: step: 624/463, loss: 0.10445586591959 2023-01-24 01:23:12.033778: step: 626/463, loss: 0.6663369536399841 2023-01-24 01:23:12.674235: step: 628/463, loss: 6.0904669761657715 2023-01-24 01:23:13.357124: step: 630/463, loss: 0.0396406427025795 2023-01-24 01:23:14.033455: step: 632/463, loss: 0.15473344922065735 2023-01-24 01:23:14.635136: step: 634/463, loss: 0.12488384544849396 2023-01-24 01:23:15.240126: step: 636/463, loss: 0.43946170806884766 2023-01-24 01:23:15.857199: step: 638/463, loss: 0.07207348942756653 2023-01-24 01:23:16.515258: step: 640/463, loss: 0.11279551684856415 2023-01-24 01:23:17.106456: step: 642/463, loss: 0.4018101096153259 2023-01-24 01:23:17.717577: step: 644/463, loss: 0.14402031898498535 2023-01-24 01:23:18.312419: step: 646/463, loss: 0.2797985076904297 2023-01-24 01:23:18.920151: step: 648/463, loss: 0.18237920105457306 2023-01-24 01:23:19.522662: step: 650/463, loss: 0.21783073246479034 2023-01-24 01:23:20.186569: step: 652/463, loss: 1.066781759262085 2023-01-24 01:23:20.932653: step: 654/463, loss: 0.24042648077011108 2023-01-24 01:23:21.646369: step: 656/463, loss: 0.15281209349632263 2023-01-24 01:23:22.290348: step: 658/463, loss: 0.156751811504364 2023-01-24 01:23:22.891919: step: 660/463, loss: 0.24920238554477692 2023-01-24 01:23:23.496206: step: 662/463, loss: 0.2024933099746704 2023-01-24 01:23:24.080953: step: 664/463, loss: 0.14370478689670563 2023-01-24 01:23:24.740358: step: 666/463, loss: 0.38280215859413147 2023-01-24 01:23:25.345318: step: 668/463, loss: 0.06891478598117828 2023-01-24 01:23:25.971800: step: 670/463, loss: 0.18897640705108643 2023-01-24 01:23:26.608442: step: 672/463, loss: 0.6059722900390625 2023-01-24 01:23:27.233213: step: 674/463, loss: 0.22382034361362457 2023-01-24 01:23:27.902892: step: 676/463, loss: 0.3186624348163605 2023-01-24 01:23:28.509561: step: 678/463, loss: 0.06853877007961273 2023-01-24 01:23:29.100772: step: 680/463, loss: 0.23257888853549957 2023-01-24 01:23:29.639361: step: 682/463, loss: 0.04693547263741493 2023-01-24 01:23:30.268711: step: 684/463, loss: 0.25548672676086426 2023-01-24 01:23:30.937408: step: 686/463, loss: 0.06591357290744781 2023-01-24 01:23:31.526119: step: 688/463, loss: 0.1754855513572693 2023-01-24 01:23:32.106542: step: 690/463, loss: 0.15456275641918182 2023-01-24 01:23:32.698177: step: 692/463, loss: 0.34810635447502136 2023-01-24 01:23:33.288822: step: 694/463, loss: 0.3315097689628601 2023-01-24 01:23:33.921717: step: 696/463, loss: 0.4954250752925873 2023-01-24 01:23:34.518655: step: 698/463, loss: 0.3658079504966736 2023-01-24 01:23:35.187595: step: 700/463, loss: 0.5540649890899658 2023-01-24 01:23:35.777394: step: 702/463, loss: 0.020063428208231926 2023-01-24 01:23:36.390378: step: 704/463, loss: 0.7302101850509644 2023-01-24 01:23:36.999385: step: 706/463, loss: 0.09799230098724365 2023-01-24 01:23:37.612130: step: 708/463, loss: 0.43738266825675964 2023-01-24 01:23:38.324071: step: 710/463, loss: 1.3673121929168701 2023-01-24 01:23:38.919777: step: 712/463, loss: 0.13889220356941223 2023-01-24 01:23:39.511152: step: 714/463, loss: 0.16936029493808746 2023-01-24 01:23:40.100834: step: 716/463, loss: 0.2813599109649658 2023-01-24 01:23:40.790355: step: 718/463, loss: 0.2729129493236542 2023-01-24 01:23:41.394175: step: 720/463, loss: 0.12637236714363098 2023-01-24 01:23:41.974180: step: 722/463, loss: 0.17539240419864655 2023-01-24 01:23:42.620434: step: 724/463, loss: 0.21712090075016022 2023-01-24 01:23:43.264456: step: 726/463, loss: 0.07533413916826248 2023-01-24 01:23:43.814716: step: 728/463, loss: 0.3124937415122986 2023-01-24 01:23:44.438442: step: 730/463, loss: 0.054267656058073044 2023-01-24 01:23:45.052229: step: 732/463, loss: 0.19126291573047638 2023-01-24 01:23:45.563473: step: 734/463, loss: 0.10401225090026855 2023-01-24 01:23:46.195773: step: 736/463, loss: 0.2681736946105957 2023-01-24 01:23:46.806503: step: 738/463, loss: 0.04870062693953514 2023-01-24 01:23:47.461212: step: 740/463, loss: 0.16951194405555725 2023-01-24 01:23:48.099417: step: 742/463, loss: 0.05111130699515343 2023-01-24 01:23:48.691344: step: 744/463, loss: 0.22381947934627533 2023-01-24 01:23:49.308747: step: 746/463, loss: 0.21593955159187317 2023-01-24 01:23:49.946040: step: 748/463, loss: 0.2164674699306488 2023-01-24 01:23:50.524885: step: 750/463, loss: 0.17841942608356476 2023-01-24 01:23:51.142285: step: 752/463, loss: 0.37297749519348145 2023-01-24 01:23:51.712539: step: 754/463, loss: 0.026182163506746292 2023-01-24 01:23:52.329977: step: 756/463, loss: 0.18546372652053833 2023-01-24 01:23:52.919693: step: 758/463, loss: 0.31787431240081787 2023-01-24 01:23:53.550480: step: 760/463, loss: 0.16469746828079224 2023-01-24 01:23:54.125397: step: 762/463, loss: 0.08531952649354935 2023-01-24 01:23:54.752268: step: 764/463, loss: 0.44032901525497437 2023-01-24 01:23:55.290091: step: 766/463, loss: 0.05457795411348343 2023-01-24 01:23:55.985894: step: 768/463, loss: 0.4040237069129944 2023-01-24 01:23:56.560308: step: 770/463, loss: 0.17169338464736938 2023-01-24 01:23:57.138776: step: 772/463, loss: 0.07890637964010239 2023-01-24 01:23:57.802119: step: 774/463, loss: 0.21412260830402374 2023-01-24 01:23:58.372382: step: 776/463, loss: 0.09458138793706894 2023-01-24 01:23:59.026383: step: 778/463, loss: 0.4084387421607971 2023-01-24 01:23:59.682068: step: 780/463, loss: 2.2510499954223633 2023-01-24 01:24:00.367998: step: 782/463, loss: 0.13423019647598267 2023-01-24 01:24:00.950015: step: 784/463, loss: 0.38092344999313354 2023-01-24 01:24:01.546052: step: 786/463, loss: 0.15195366740226746 2023-01-24 01:24:02.249923: step: 788/463, loss: 0.16202634572982788 2023-01-24 01:24:02.858443: step: 790/463, loss: 0.04834723472595215 2023-01-24 01:24:03.522522: step: 792/463, loss: 0.5601105690002441 2023-01-24 01:24:04.169830: step: 794/463, loss: 0.12197083234786987 2023-01-24 01:24:04.769066: step: 796/463, loss: 0.17657619714736938 2023-01-24 01:24:05.336741: step: 798/463, loss: 0.3934799134731293 2023-01-24 01:24:05.987240: step: 800/463, loss: 0.21426966786384583 2023-01-24 01:24:06.600880: step: 802/463, loss: 0.25814029574394226 2023-01-24 01:24:07.260515: step: 804/463, loss: 0.307193785905838 2023-01-24 01:24:07.885102: step: 806/463, loss: 0.22508278489112854 2023-01-24 01:24:08.502523: step: 808/463, loss: 0.16606535017490387 2023-01-24 01:24:09.144149: step: 810/463, loss: 0.8540472388267517 2023-01-24 01:24:09.760641: step: 812/463, loss: 0.1355004459619522 2023-01-24 01:24:10.359985: step: 814/463, loss: 0.214835524559021 2023-01-24 01:24:10.897770: step: 816/463, loss: 0.21123486757278442 2023-01-24 01:24:11.577229: step: 818/463, loss: 0.684298038482666 2023-01-24 01:24:12.201555: step: 820/463, loss: 0.20782868564128876 2023-01-24 01:24:12.801308: step: 822/463, loss: 0.08836875110864639 2023-01-24 01:24:13.402970: step: 824/463, loss: 0.22916240990161896 2023-01-24 01:24:14.021910: step: 826/463, loss: 0.09399758279323578 2023-01-24 01:24:14.591797: step: 828/463, loss: 0.42939338088035583 2023-01-24 01:24:15.196696: step: 830/463, loss: 0.18317995965480804 2023-01-24 01:24:15.908185: step: 832/463, loss: 0.4541419446468353 2023-01-24 01:24:16.532557: step: 834/463, loss: 0.198395237326622 2023-01-24 01:24:17.277982: step: 836/463, loss: 0.2802852988243103 2023-01-24 01:24:17.881146: step: 838/463, loss: 0.0982271358370781 2023-01-24 01:24:18.499968: step: 840/463, loss: 0.13374069333076477 2023-01-24 01:24:19.113955: step: 842/463, loss: 0.26857098937034607 2023-01-24 01:24:19.709563: step: 844/463, loss: 0.14647513628005981 2023-01-24 01:24:20.324350: step: 846/463, loss: 0.1926325559616089 2023-01-24 01:24:20.918988: step: 848/463, loss: 0.5408521890640259 2023-01-24 01:24:21.558539: step: 850/463, loss: 0.14338821172714233 2023-01-24 01:24:22.065624: step: 852/463, loss: 0.3919200003147125 2023-01-24 01:24:22.646314: step: 854/463, loss: 0.09635207802057266 2023-01-24 01:24:23.264676: step: 856/463, loss: 0.1431223601102829 2023-01-24 01:24:23.874122: step: 858/463, loss: 0.12814195454120636 2023-01-24 01:24:24.463342: step: 860/463, loss: 2.128169059753418 2023-01-24 01:24:25.010338: step: 862/463, loss: 0.37324991822242737 2023-01-24 01:24:25.576260: step: 864/463, loss: 0.20899564027786255 2023-01-24 01:24:26.208425: step: 866/463, loss: 0.18874073028564453 2023-01-24 01:24:26.815630: step: 868/463, loss: 0.24432745575904846 2023-01-24 01:24:27.418707: step: 870/463, loss: 0.3748307526111603 2023-01-24 01:24:28.031478: step: 872/463, loss: 0.10357166081666946 2023-01-24 01:24:28.667752: step: 874/463, loss: 0.5530768036842346 2023-01-24 01:24:29.218244: step: 876/463, loss: 0.292721688747406 2023-01-24 01:24:29.796618: step: 878/463, loss: 0.2837667763233185 2023-01-24 01:24:30.407156: step: 880/463, loss: 0.18089216947555542 2023-01-24 01:24:31.041488: step: 882/463, loss: 0.7346042990684509 2023-01-24 01:24:31.666435: step: 884/463, loss: 0.21073144674301147 2023-01-24 01:24:32.440782: step: 886/463, loss: 0.46338459849357605 2023-01-24 01:24:33.151164: step: 888/463, loss: 0.7236040234565735 2023-01-24 01:24:33.765382: step: 890/463, loss: 0.17800061404705048 2023-01-24 01:24:34.363920: step: 892/463, loss: 0.2058294415473938 2023-01-24 01:24:35.031411: step: 894/463, loss: 0.28168317675590515 2023-01-24 01:24:35.608784: step: 896/463, loss: 0.19440966844558716 2023-01-24 01:24:36.219705: step: 898/463, loss: 0.38313883543014526 2023-01-24 01:24:36.788531: step: 900/463, loss: 0.3498222827911377 2023-01-24 01:24:37.394635: step: 902/463, loss: 0.1202121451497078 2023-01-24 01:24:38.007988: step: 904/463, loss: 0.36205485463142395 2023-01-24 01:24:38.590355: step: 906/463, loss: 0.7415978312492371 2023-01-24 01:24:39.150309: step: 908/463, loss: 0.09956806898117065 2023-01-24 01:24:39.843278: step: 910/463, loss: 0.3380570113658905 2023-01-24 01:24:40.552674: step: 912/463, loss: 0.3525811433792114 2023-01-24 01:24:41.129508: step: 914/463, loss: 0.11312661319971085 2023-01-24 01:24:41.802063: step: 916/463, loss: 0.13564646244049072 2023-01-24 01:24:42.412618: step: 918/463, loss: 0.39960673451423645 2023-01-24 01:24:43.042878: step: 920/463, loss: 0.11281851679086685 2023-01-24 01:24:43.644361: step: 922/463, loss: 0.15116821229457855 2023-01-24 01:24:44.281260: step: 924/463, loss: 0.14059405028820038 2023-01-24 01:24:44.921558: step: 926/463, loss: 0.25888195633888245 ================================================== Loss: 0.276 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3425167931283668, 'r': 0.3353674862509246, 'f1': 0.3389044396054406}, 'combined': 0.2497190607619036, 'epoch': 11} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3541092024055338, 'r': 0.3840522599618841, 'f1': 0.36847342049253073}, 'combined': 0.28561097664971286, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32000786237549533, 'r': 0.3394390798252408, 'f1': 0.3294371916535947}, 'combined': 0.24274319385001714, 'epoch': 11} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33758751944168075, 'r': 0.3835093511304388, 'f1': 0.3590862082873644}, 'combined': 0.27833476431843557, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3276866580823204, 'r': 0.3401225843852548, 'f1': 0.333788830486088}, 'combined': 0.2459496645686964, 'epoch': 11} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33542093900234404, 'r': 0.36563348681689345, 'f1': 0.3498761949487951}, 'combined': 0.2711959023048077, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2647058823529412, 'r': 0.2571428571428571, 'f1': 0.2608695652173913}, 'combined': 0.17391304347826086, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26851851851851855, 'r': 0.31521739130434784, 'f1': 0.29000000000000004}, 'combined': 0.14500000000000002, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21428571428571427, 'r': 0.10344827586206896, 'f1': 0.1395348837209302}, 'combined': 0.09302325581395346, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3664599181442166, 'r': 0.3122210687794179, 'f1': 0.33717316239088774}, 'combined': 0.2484433828143383, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3655563493147106, 'r': 0.32203773630105453, 'f1': 0.34241987150998204}, 'combined': 0.26541635973501004, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42857142857142855, 'r': 0.2571428571428571, 'f1': 0.3214285714285714}, 'combined': 0.21428571428571425, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3283040393999639, 'r': 0.31335281179920654, 'f1': 0.3206542365401589}, 'combined': 0.23627154271380127, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34970123997901775, 'r': 0.3199189915192662, 'f1': 0.3341478132367659}, 'combined': 0.2590045250926128, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38, 'r': 0.41304347826086957, 'f1': 0.39583333333333337}, 'combined': 0.19791666666666669, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32326799298325637, 'r': 0.29632899356798503, 'f1': 0.30921286285354954}, 'combined': 0.22784105683945755, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32287388038511977, 'r': 0.3347442436345727, 'f1': 0.32870192876752263}, 'combined': 0.2547833132073621, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.59375, 'r': 0.16379310344827586, 'f1': 0.25675675675675674}, 'combined': 0.17117117117117114, 'epoch': 5} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:27:24.855325: step: 2/463, loss: 0.04709136486053467 2023-01-24 01:27:25.481039: step: 4/463, loss: 0.2920854091644287 2023-01-24 01:27:26.150447: step: 6/463, loss: 0.09253279119729996 2023-01-24 01:27:26.768812: step: 8/463, loss: 0.1371663361787796 2023-01-24 01:27:27.460294: step: 10/463, loss: 0.4698277711868286 2023-01-24 01:27:28.102419: step: 12/463, loss: 0.09835254400968552 2023-01-24 01:27:28.784999: step: 14/463, loss: 0.17487305402755737 2023-01-24 01:27:29.374017: step: 16/463, loss: 0.09919741749763489 2023-01-24 01:27:29.972627: step: 18/463, loss: 0.2199268639087677 2023-01-24 01:27:30.589147: step: 20/463, loss: 0.37634027004241943 2023-01-24 01:27:31.171322: step: 22/463, loss: 0.06089381128549576 2023-01-24 01:27:31.802381: step: 24/463, loss: 0.09642766416072845 2023-01-24 01:27:32.386964: step: 26/463, loss: 0.09353947639465332 2023-01-24 01:27:32.991340: step: 28/463, loss: 0.03183140978217125 2023-01-24 01:27:33.609108: step: 30/463, loss: 0.10085129737854004 2023-01-24 01:27:34.276726: step: 32/463, loss: 0.3874399960041046 2023-01-24 01:27:34.897425: step: 34/463, loss: 0.16750377416610718 2023-01-24 01:27:35.609457: step: 36/463, loss: 0.1750829517841339 2023-01-24 01:27:36.258273: step: 38/463, loss: 0.22163164615631104 2023-01-24 01:27:36.923642: step: 40/463, loss: 0.08728481829166412 2023-01-24 01:27:37.480305: step: 42/463, loss: 0.1055922657251358 2023-01-24 01:27:38.144196: step: 44/463, loss: 0.06161033362150192 2023-01-24 01:27:38.744850: step: 46/463, loss: 0.1445070207118988 2023-01-24 01:27:39.347828: step: 48/463, loss: 0.11023698002099991 2023-01-24 01:27:39.950872: step: 50/463, loss: 0.03614789992570877 2023-01-24 01:27:40.606699: step: 52/463, loss: 0.1414175033569336 2023-01-24 01:27:41.183917: step: 54/463, loss: 0.12846750020980835 2023-01-24 01:27:41.811675: step: 56/463, loss: 0.1323210746049881 2023-01-24 01:27:42.420572: step: 58/463, loss: 0.14289246499538422 2023-01-24 01:27:43.029293: step: 60/463, loss: 0.2360147386789322 2023-01-24 01:27:43.572969: step: 62/463, loss: 0.12843173742294312 2023-01-24 01:27:44.164848: step: 64/463, loss: 0.3064770996570587 2023-01-24 01:27:44.812208: step: 66/463, loss: 0.07110176235437393 2023-01-24 01:27:45.481932: step: 68/463, loss: 0.5093967318534851 2023-01-24 01:27:46.047711: step: 70/463, loss: 0.02749595232307911 2023-01-24 01:27:46.606741: step: 72/463, loss: 0.15308287739753723 2023-01-24 01:27:47.252596: step: 74/463, loss: 0.12487535923719406 2023-01-24 01:27:47.870204: step: 76/463, loss: 0.1353198140859604 2023-01-24 01:27:48.499013: step: 78/463, loss: 0.24977631866931915 2023-01-24 01:27:49.074548: step: 80/463, loss: 0.23683463037014008 2023-01-24 01:27:49.640123: step: 82/463, loss: 0.15805213153362274 2023-01-24 01:27:50.269648: step: 84/463, loss: 0.1394563913345337 2023-01-24 01:27:50.930008: step: 86/463, loss: 0.06980950385332108 2023-01-24 01:27:51.555679: step: 88/463, loss: 0.15665149688720703 2023-01-24 01:27:52.208607: step: 90/463, loss: 0.2898462414741516 2023-01-24 01:27:52.873026: step: 92/463, loss: 0.06743495166301727 2023-01-24 01:27:53.508019: step: 94/463, loss: 0.2256937026977539 2023-01-24 01:27:54.190309: step: 96/463, loss: 0.14297865331172943 2023-01-24 01:27:54.920886: step: 98/463, loss: 0.1014813780784607 2023-01-24 01:27:55.512882: step: 100/463, loss: 0.40818488597869873 2023-01-24 01:27:56.091343: step: 102/463, loss: 0.06096276640892029 2023-01-24 01:27:56.671071: step: 104/463, loss: 0.5698774456977844 2023-01-24 01:27:57.245143: step: 106/463, loss: 0.12469528615474701 2023-01-24 01:27:57.837989: step: 108/463, loss: 0.16977208852767944 2023-01-24 01:27:58.438575: step: 110/463, loss: 0.11705274879932404 2023-01-24 01:27:58.997856: step: 112/463, loss: 0.18914425373077393 2023-01-24 01:27:59.605014: step: 114/463, loss: 0.3135256767272949 2023-01-24 01:28:00.229233: step: 116/463, loss: 0.08335922658443451 2023-01-24 01:28:00.896384: step: 118/463, loss: 0.10146239399909973 2023-01-24 01:28:01.506417: step: 120/463, loss: 0.5673781633377075 2023-01-24 01:28:02.160617: step: 122/463, loss: 0.1318645030260086 2023-01-24 01:28:02.742155: step: 124/463, loss: 0.06457088142633438 2023-01-24 01:28:03.318751: step: 126/463, loss: 0.1355675607919693 2023-01-24 01:28:03.890317: step: 128/463, loss: 0.2858072519302368 2023-01-24 01:28:04.584848: step: 130/463, loss: 0.2889961004257202 2023-01-24 01:28:05.179177: step: 132/463, loss: 0.0627320408821106 2023-01-24 01:28:05.745926: step: 134/463, loss: 0.029495416209101677 2023-01-24 01:28:06.414972: step: 136/463, loss: 0.06627761572599411 2023-01-24 01:28:07.057052: step: 138/463, loss: 0.4275059103965759 2023-01-24 01:28:07.587409: step: 140/463, loss: 0.45004311203956604 2023-01-24 01:28:08.240635: step: 142/463, loss: 0.24258460104465485 2023-01-24 01:28:08.925995: step: 144/463, loss: 0.15017956495285034 2023-01-24 01:28:09.506494: step: 146/463, loss: 0.2845919132232666 2023-01-24 01:28:10.087740: step: 148/463, loss: 0.1482020616531372 2023-01-24 01:28:10.707856: step: 150/463, loss: 0.07862148433923721 2023-01-24 01:28:11.300010: step: 152/463, loss: 0.24219466745853424 2023-01-24 01:28:11.920838: step: 154/463, loss: 0.11889304220676422 2023-01-24 01:28:12.552759: step: 156/463, loss: 0.3058384656906128 2023-01-24 01:28:13.144801: step: 158/463, loss: 0.11666297912597656 2023-01-24 01:28:13.764663: step: 160/463, loss: 0.07814386487007141 2023-01-24 01:28:14.449086: step: 162/463, loss: 0.1686367243528366 2023-01-24 01:28:15.069502: step: 164/463, loss: 0.11788913607597351 2023-01-24 01:28:15.638836: step: 166/463, loss: 0.03485008329153061 2023-01-24 01:28:16.157898: step: 168/463, loss: 0.1416996717453003 2023-01-24 01:28:16.773652: step: 170/463, loss: 0.9084913730621338 2023-01-24 01:28:17.438164: step: 172/463, loss: 0.8498354554176331 2023-01-24 01:28:18.043054: step: 174/463, loss: 0.26384708285331726 2023-01-24 01:28:18.608034: step: 176/463, loss: 1.0271313190460205 2023-01-24 01:28:19.230291: step: 178/463, loss: 0.12873773276805878 2023-01-24 01:28:19.808428: step: 180/463, loss: 0.0576903410255909 2023-01-24 01:28:20.394769: step: 182/463, loss: 0.11037561297416687 2023-01-24 01:28:21.003897: step: 184/463, loss: 0.11606007069349289 2023-01-24 01:28:21.588376: step: 186/463, loss: 0.13565154373645782 2023-01-24 01:28:22.196158: step: 188/463, loss: 0.24710839986801147 2023-01-24 01:28:22.840914: step: 190/463, loss: 0.2567966878414154 2023-01-24 01:28:23.459755: step: 192/463, loss: 0.30008643865585327 2023-01-24 01:28:24.089596: step: 194/463, loss: 0.4114149808883667 2023-01-24 01:28:24.704969: step: 196/463, loss: 0.11192763596773148 2023-01-24 01:28:25.377165: step: 198/463, loss: 0.1867704540491104 2023-01-24 01:28:25.993739: step: 200/463, loss: 0.04718553274869919 2023-01-24 01:28:26.599465: step: 202/463, loss: 0.07107377797365189 2023-01-24 01:28:27.152251: step: 204/463, loss: 0.04993832856416702 2023-01-24 01:28:27.707569: step: 206/463, loss: 0.16794365644454956 2023-01-24 01:28:28.276781: step: 208/463, loss: 0.10026872903108597 2023-01-24 01:28:28.893731: step: 210/463, loss: 0.29637181758880615 2023-01-24 01:28:29.484286: step: 212/463, loss: 0.08233652263879776 2023-01-24 01:28:30.128667: step: 214/463, loss: 0.14846354722976685 2023-01-24 01:28:30.728544: step: 216/463, loss: 0.07847565412521362 2023-01-24 01:28:31.370278: step: 218/463, loss: 0.45206472277641296 2023-01-24 01:28:31.986520: step: 220/463, loss: 0.6849433779716492 2023-01-24 01:28:32.605165: step: 222/463, loss: 0.0510321743786335 2023-01-24 01:28:33.328572: step: 224/463, loss: 0.10107812285423279 2023-01-24 01:28:33.917969: step: 226/463, loss: 0.3550601899623871 2023-01-24 01:28:34.546291: step: 228/463, loss: 0.14961254596710205 2023-01-24 01:28:35.150467: step: 230/463, loss: 0.1446916162967682 2023-01-24 01:28:35.808645: step: 232/463, loss: 0.12215256690979004 2023-01-24 01:28:36.396960: step: 234/463, loss: 0.5524691939353943 2023-01-24 01:28:37.093063: step: 236/463, loss: 0.37820738554000854 2023-01-24 01:28:37.731689: step: 238/463, loss: 0.3266206383705139 2023-01-24 01:28:38.345660: step: 240/463, loss: 0.10210172086954117 2023-01-24 01:28:38.910218: step: 242/463, loss: 0.09358435124158859 2023-01-24 01:28:39.505572: step: 244/463, loss: 0.15797604620456696 2023-01-24 01:28:40.118331: step: 246/463, loss: 0.11809127032756805 2023-01-24 01:28:40.755729: step: 248/463, loss: 0.12133277207612991 2023-01-24 01:28:41.350450: step: 250/463, loss: 0.11855854839086533 2023-01-24 01:28:41.986760: step: 252/463, loss: 0.09213931858539581 2023-01-24 01:28:42.705060: step: 254/463, loss: 0.16537536680698395 2023-01-24 01:28:43.298837: step: 256/463, loss: 0.053565863519907 2023-01-24 01:28:43.923244: step: 258/463, loss: 0.05515823885798454 2023-01-24 01:28:44.576359: step: 260/463, loss: 0.93522709608078 2023-01-24 01:28:45.210708: step: 262/463, loss: 0.1171746551990509 2023-01-24 01:28:45.854571: step: 264/463, loss: 0.0768083930015564 2023-01-24 01:28:46.463305: step: 266/463, loss: 0.5953385233879089 2023-01-24 01:28:47.154494: step: 268/463, loss: 1.0117777585983276 2023-01-24 01:28:47.701725: step: 270/463, loss: 0.08235375583171844 2023-01-24 01:28:48.307706: step: 272/463, loss: 0.3589954078197479 2023-01-24 01:28:48.966004: step: 274/463, loss: 0.12249688059091568 2023-01-24 01:28:49.645806: step: 276/463, loss: 0.21151240170001984 2023-01-24 01:28:50.266389: step: 278/463, loss: 0.12480197101831436 2023-01-24 01:28:50.955296: step: 280/463, loss: 0.1926862895488739 2023-01-24 01:28:51.527417: step: 282/463, loss: 0.05210735276341438 2023-01-24 01:28:52.203050: step: 284/463, loss: 0.13047321140766144 2023-01-24 01:28:52.786216: step: 286/463, loss: 0.4637804329395294 2023-01-24 01:28:53.446416: step: 288/463, loss: 0.21883392333984375 2023-01-24 01:28:53.983926: step: 290/463, loss: 0.574614405632019 2023-01-24 01:28:54.747261: step: 292/463, loss: 0.09791631996631622 2023-01-24 01:28:55.374731: step: 294/463, loss: 0.36240866780281067 2023-01-24 01:28:56.036872: step: 296/463, loss: 0.3311035633087158 2023-01-24 01:28:56.703673: step: 298/463, loss: 0.07071972638368607 2023-01-24 01:28:57.294112: step: 300/463, loss: 0.09666219353675842 2023-01-24 01:28:57.949307: step: 302/463, loss: 0.0765746682882309 2023-01-24 01:28:58.537841: step: 304/463, loss: 0.22984886169433594 2023-01-24 01:28:59.255163: step: 306/463, loss: 0.07878180593252182 2023-01-24 01:28:59.873918: step: 308/463, loss: 0.042481642216444016 2023-01-24 01:29:00.466300: step: 310/463, loss: 0.19018863141536713 2023-01-24 01:29:01.150372: step: 312/463, loss: 0.37769585847854614 2023-01-24 01:29:01.769878: step: 314/463, loss: 0.10424766689538956 2023-01-24 01:29:02.325159: step: 316/463, loss: 0.16901977360248566 2023-01-24 01:29:02.979439: step: 318/463, loss: 0.11063750833272934 2023-01-24 01:29:03.553946: step: 320/463, loss: 0.0661756843328476 2023-01-24 01:29:04.153317: step: 322/463, loss: 0.08627904951572418 2023-01-24 01:29:04.694717: step: 324/463, loss: 0.1924169510602951 2023-01-24 01:29:05.271510: step: 326/463, loss: 0.3969847559928894 2023-01-24 01:29:05.905209: step: 328/463, loss: 0.05523085966706276 2023-01-24 01:29:06.505372: step: 330/463, loss: 0.17462460696697235 2023-01-24 01:29:07.094858: step: 332/463, loss: 0.06294368207454681 2023-01-24 01:29:07.698439: step: 334/463, loss: 0.03935914486646652 2023-01-24 01:29:08.253424: step: 336/463, loss: 0.11624235659837723 2023-01-24 01:29:08.949002: step: 338/463, loss: 0.28423333168029785 2023-01-24 01:29:09.581904: step: 340/463, loss: 0.2618388533592224 2023-01-24 01:29:10.191462: step: 342/463, loss: 0.5247267484664917 2023-01-24 01:29:10.834614: step: 344/463, loss: 0.35072705149650574 2023-01-24 01:29:11.370651: step: 346/463, loss: 0.11733942478895187 2023-01-24 01:29:11.955914: step: 348/463, loss: 0.05037592723965645 2023-01-24 01:29:12.528116: step: 350/463, loss: 0.21319517493247986 2023-01-24 01:29:13.177502: step: 352/463, loss: 0.2764868438243866 2023-01-24 01:29:13.795308: step: 354/463, loss: 0.0621664896607399 2023-01-24 01:29:14.388253: step: 356/463, loss: 0.20700062811374664 2023-01-24 01:29:14.987414: step: 358/463, loss: 0.14665798842906952 2023-01-24 01:29:15.607202: step: 360/463, loss: 0.19634947180747986 2023-01-24 01:29:16.265768: step: 362/463, loss: 0.31635794043540955 2023-01-24 01:29:16.944192: step: 364/463, loss: 0.14732970297336578 2023-01-24 01:29:17.481514: step: 366/463, loss: 0.055862780660390854 2023-01-24 01:29:18.095152: step: 368/463, loss: 0.1924988329410553 2023-01-24 01:29:18.721442: step: 370/463, loss: 0.18968096375465393 2023-01-24 01:29:19.336892: step: 372/463, loss: 0.12090527266263962 2023-01-24 01:29:19.919086: step: 374/463, loss: 0.17040404677391052 2023-01-24 01:29:20.503818: step: 376/463, loss: 0.10571619868278503 2023-01-24 01:29:21.068190: step: 378/463, loss: 0.4054567813873291 2023-01-24 01:29:21.651435: step: 380/463, loss: 0.13476382195949554 2023-01-24 01:29:22.297585: step: 382/463, loss: 0.32289326190948486 2023-01-24 01:29:22.888151: step: 384/463, loss: 0.23520106077194214 2023-01-24 01:29:23.543939: step: 386/463, loss: 0.21489021182060242 2023-01-24 01:29:24.101318: step: 388/463, loss: 0.33941173553466797 2023-01-24 01:29:24.747593: step: 390/463, loss: 0.2983942925930023 2023-01-24 01:29:25.353908: step: 392/463, loss: 0.08099149912595749 2023-01-24 01:29:25.966241: step: 394/463, loss: 0.5526613593101501 2023-01-24 01:29:26.554126: step: 396/463, loss: 0.07397932559251785 2023-01-24 01:29:27.141142: step: 398/463, loss: 0.3973013460636139 2023-01-24 01:29:27.861114: step: 400/463, loss: 0.19855327904224396 2023-01-24 01:29:28.493461: step: 402/463, loss: 0.5161593556404114 2023-01-24 01:29:29.060384: step: 404/463, loss: 0.21995356678962708 2023-01-24 01:29:29.640979: step: 406/463, loss: 0.18139362335205078 2023-01-24 01:29:30.275345: step: 408/463, loss: 0.12144064903259277 2023-01-24 01:29:30.866658: step: 410/463, loss: 0.6702880263328552 2023-01-24 01:29:31.496582: step: 412/463, loss: 0.18607521057128906 2023-01-24 01:29:32.032249: step: 414/463, loss: 0.0619998537003994 2023-01-24 01:29:32.629722: step: 416/463, loss: 0.1705656796693802 2023-01-24 01:29:33.252271: step: 418/463, loss: 0.15435650944709778 2023-01-24 01:29:33.830628: step: 420/463, loss: 0.17575670778751373 2023-01-24 01:29:34.468100: step: 422/463, loss: 0.3229943811893463 2023-01-24 01:29:35.068732: step: 424/463, loss: 0.08034990727901459 2023-01-24 01:29:35.721401: step: 426/463, loss: 0.2815340757369995 2023-01-24 01:29:36.303120: step: 428/463, loss: 0.4721074402332306 2023-01-24 01:29:36.897204: step: 430/463, loss: 0.1263284832239151 2023-01-24 01:29:37.538042: step: 432/463, loss: 0.13392148911952972 2023-01-24 01:29:38.165961: step: 434/463, loss: 0.25827857851982117 2023-01-24 01:29:38.783871: step: 436/463, loss: 0.09906576573848724 2023-01-24 01:29:39.386447: step: 438/463, loss: 0.13283367455005646 2023-01-24 01:29:40.034689: step: 440/463, loss: 0.2914736568927765 2023-01-24 01:29:40.673864: step: 442/463, loss: 0.09527754783630371 2023-01-24 01:29:41.312742: step: 444/463, loss: 0.13543950021266937 2023-01-24 01:29:41.955729: step: 446/463, loss: 0.1158260628581047 2023-01-24 01:29:42.593799: step: 448/463, loss: 0.13117797672748566 2023-01-24 01:29:43.256052: step: 450/463, loss: 0.2001490443944931 2023-01-24 01:29:43.902943: step: 452/463, loss: 0.0987393781542778 2023-01-24 01:29:44.488663: step: 454/463, loss: 0.3383936882019043 2023-01-24 01:29:45.095484: step: 456/463, loss: 0.5780913829803467 2023-01-24 01:29:45.751852: step: 458/463, loss: 0.040081772953271866 2023-01-24 01:29:46.385524: step: 460/463, loss: 0.13392862677574158 2023-01-24 01:29:46.992152: step: 462/463, loss: 0.1242854967713356 2023-01-24 01:29:47.633556: step: 464/463, loss: 0.16392742097377777 2023-01-24 01:29:48.246531: step: 466/463, loss: 0.1988963633775711 2023-01-24 01:29:48.876094: step: 468/463, loss: 0.43538421392440796 2023-01-24 01:29:49.535350: step: 470/463, loss: 0.19662325084209442 2023-01-24 01:29:50.092206: step: 472/463, loss: 0.1487337201833725 2023-01-24 01:29:50.718957: step: 474/463, loss: 0.13890081644058228 2023-01-24 01:29:51.326398: step: 476/463, loss: 0.13792787492275238 2023-01-24 01:29:51.970947: step: 478/463, loss: 0.10439697653055191 2023-01-24 01:29:52.573617: step: 480/463, loss: 0.40365296602249146 2023-01-24 01:29:53.171554: step: 482/463, loss: 0.10915505886077881 2023-01-24 01:29:53.744667: step: 484/463, loss: 0.1635630875825882 2023-01-24 01:29:54.371844: step: 486/463, loss: 0.15946418046951294 2023-01-24 01:29:54.931765: step: 488/463, loss: 0.04774601384997368 2023-01-24 01:29:55.512807: step: 490/463, loss: 0.08587408065795898 2023-01-24 01:29:56.115153: step: 492/463, loss: 0.4820919930934906 2023-01-24 01:29:56.760211: step: 494/463, loss: 0.08232135325670242 2023-01-24 01:29:57.390568: step: 496/463, loss: 0.3370683491230011 2023-01-24 01:29:57.974733: step: 498/463, loss: 0.28991711139678955 2023-01-24 01:29:58.611135: step: 500/463, loss: 0.05478794872760773 2023-01-24 01:29:59.231180: step: 502/463, loss: 0.12191804498434067 2023-01-24 01:29:59.841934: step: 504/463, loss: 0.1375276893377304 2023-01-24 01:30:00.484147: step: 506/463, loss: 0.10110702365636826 2023-01-24 01:30:01.083803: step: 508/463, loss: 0.09454682469367981 2023-01-24 01:30:01.683134: step: 510/463, loss: 0.08920510113239288 2023-01-24 01:30:02.263350: step: 512/463, loss: 0.12301144003868103 2023-01-24 01:30:02.880318: step: 514/463, loss: 0.0665707141160965 2023-01-24 01:30:03.509988: step: 516/463, loss: 3.8497567176818848 2023-01-24 01:30:04.151653: step: 518/463, loss: 0.13835231959819794 2023-01-24 01:30:04.795258: step: 520/463, loss: 0.18506622314453125 2023-01-24 01:30:05.447341: step: 522/463, loss: 0.1813444197177887 2023-01-24 01:30:06.120055: step: 524/463, loss: 0.4440092444419861 2023-01-24 01:30:06.670107: step: 526/463, loss: 0.08467331528663635 2023-01-24 01:30:07.282713: step: 528/463, loss: 0.1103239357471466 2023-01-24 01:30:08.010771: step: 530/463, loss: 0.17476728558540344 2023-01-24 01:30:08.729354: step: 532/463, loss: 0.20910315215587616 2023-01-24 01:30:09.313712: step: 534/463, loss: 0.18270151317119598 2023-01-24 01:30:09.908756: step: 536/463, loss: 0.02789652906358242 2023-01-24 01:30:10.507135: step: 538/463, loss: 0.1144997775554657 2023-01-24 01:30:11.131301: step: 540/463, loss: 0.2162732183933258 2023-01-24 01:30:11.766739: step: 542/463, loss: 0.06295991688966751 2023-01-24 01:30:12.376195: step: 544/463, loss: 1.3117586374282837 2023-01-24 01:30:13.017478: step: 546/463, loss: 0.17998982965946198 2023-01-24 01:30:13.685655: step: 548/463, loss: 0.335711270570755 2023-01-24 01:30:14.322833: step: 550/463, loss: 3.912230968475342 2023-01-24 01:30:14.908777: step: 552/463, loss: 1.2657710313796997 2023-01-24 01:30:15.503190: step: 554/463, loss: 0.06327852606773376 2023-01-24 01:30:16.117587: step: 556/463, loss: 0.2076507955789566 2023-01-24 01:30:16.728715: step: 558/463, loss: 0.1588555872440338 2023-01-24 01:30:17.326179: step: 560/463, loss: 0.1802477389574051 2023-01-24 01:30:17.899046: step: 562/463, loss: 0.1872415542602539 2023-01-24 01:30:18.454208: step: 564/463, loss: 0.5986431241035461 2023-01-24 01:30:19.090523: step: 566/463, loss: 0.21616558730602264 2023-01-24 01:30:19.700004: step: 568/463, loss: 0.43030881881713867 2023-01-24 01:30:20.286266: step: 570/463, loss: 0.061145681887865067 2023-01-24 01:30:20.998393: step: 572/463, loss: 0.15694135427474976 2023-01-24 01:30:21.590111: step: 574/463, loss: 0.1301918774843216 2023-01-24 01:30:22.173500: step: 576/463, loss: 0.0886097326874733 2023-01-24 01:30:22.786775: step: 578/463, loss: 2.15177583694458 2023-01-24 01:30:23.454166: step: 580/463, loss: 0.17664887011051178 2023-01-24 01:30:24.053475: step: 582/463, loss: 0.22328969836235046 2023-01-24 01:30:24.635610: step: 584/463, loss: 0.18291985988616943 2023-01-24 01:30:25.201995: step: 586/463, loss: 0.03667150437831879 2023-01-24 01:30:25.859410: step: 588/463, loss: 0.31059208512306213 2023-01-24 01:30:26.440218: step: 590/463, loss: 0.9711023569107056 2023-01-24 01:30:27.052899: step: 592/463, loss: 0.04945782944560051 2023-01-24 01:30:27.627163: step: 594/463, loss: 1.0641658306121826 2023-01-24 01:30:28.294816: step: 596/463, loss: 0.21667850017547607 2023-01-24 01:30:28.868751: step: 598/463, loss: 0.036601223051548004 2023-01-24 01:30:29.464287: step: 600/463, loss: 0.2349911332130432 2023-01-24 01:30:30.061872: step: 602/463, loss: 0.3330759108066559 2023-01-24 01:30:30.651534: step: 604/463, loss: 0.03820441663265228 2023-01-24 01:30:31.252579: step: 606/463, loss: 0.2384144812822342 2023-01-24 01:30:31.886027: step: 608/463, loss: 0.13914506137371063 2023-01-24 01:30:32.553775: step: 610/463, loss: 0.333781361579895 2023-01-24 01:30:33.212391: step: 612/463, loss: 0.12820909917354584 2023-01-24 01:30:33.825155: step: 614/463, loss: 0.09873773157596588 2023-01-24 01:30:34.502994: step: 616/463, loss: 0.1655219942331314 2023-01-24 01:30:35.089668: step: 618/463, loss: 0.08512184023857117 2023-01-24 01:30:35.754699: step: 620/463, loss: 0.5956094264984131 2023-01-24 01:30:36.387236: step: 622/463, loss: 0.059289418160915375 2023-01-24 01:30:36.945466: step: 624/463, loss: 0.12848839163780212 2023-01-24 01:30:37.531119: step: 626/463, loss: 0.24718551337718964 2023-01-24 01:30:38.171795: step: 628/463, loss: 0.1663002222776413 2023-01-24 01:30:38.748448: step: 630/463, loss: 0.23154708743095398 2023-01-24 01:30:39.424386: step: 632/463, loss: 0.26078882813453674 2023-01-24 01:30:40.089592: step: 634/463, loss: 0.10760688781738281 2023-01-24 01:30:40.736829: step: 636/463, loss: 0.08595269173383713 2023-01-24 01:30:41.396869: step: 638/463, loss: 0.16869382560253143 2023-01-24 01:30:42.028967: step: 640/463, loss: 0.2620159089565277 2023-01-24 01:30:42.621739: step: 642/463, loss: 0.43244874477386475 2023-01-24 01:30:43.224219: step: 644/463, loss: 0.6806952357292175 2023-01-24 01:30:43.905592: step: 646/463, loss: 0.1981651782989502 2023-01-24 01:30:44.472593: step: 648/463, loss: 0.16820865869522095 2023-01-24 01:30:45.163949: step: 650/463, loss: 0.20617768168449402 2023-01-24 01:30:45.755107: step: 652/463, loss: 0.14653196930885315 2023-01-24 01:30:46.258688: step: 654/463, loss: 0.16634276509284973 2023-01-24 01:30:46.813825: step: 656/463, loss: 0.19455400109291077 2023-01-24 01:30:47.473426: step: 658/463, loss: 0.6135392785072327 2023-01-24 01:30:48.080237: step: 660/463, loss: 0.2134610414505005 2023-01-24 01:30:48.697953: step: 662/463, loss: 0.047566983848810196 2023-01-24 01:30:49.318302: step: 664/463, loss: 0.16085729002952576 2023-01-24 01:30:49.950512: step: 666/463, loss: 0.20550133287906647 2023-01-24 01:30:50.533315: step: 668/463, loss: 0.3620936870574951 2023-01-24 01:30:51.093666: step: 670/463, loss: 0.5609982013702393 2023-01-24 01:30:51.699372: step: 672/463, loss: 0.6223273277282715 2023-01-24 01:30:52.350447: step: 674/463, loss: 0.6914418935775757 2023-01-24 01:30:52.999297: step: 676/463, loss: 0.36965328454971313 2023-01-24 01:30:53.555625: step: 678/463, loss: 0.13208265602588654 2023-01-24 01:30:54.225361: step: 680/463, loss: 5.155648231506348 2023-01-24 01:30:54.855735: step: 682/463, loss: 0.503275454044342 2023-01-24 01:30:55.469236: step: 684/463, loss: 0.15227098762989044 2023-01-24 01:30:56.119396: step: 686/463, loss: 0.12176734954118729 2023-01-24 01:30:56.749769: step: 688/463, loss: 0.09478053450584412 2023-01-24 01:30:57.341303: step: 690/463, loss: 0.2884616553783417 2023-01-24 01:30:57.918719: step: 692/463, loss: 1.4465655088424683 2023-01-24 01:30:58.470762: step: 694/463, loss: 1.026667833328247 2023-01-24 01:30:59.116210: step: 696/463, loss: 0.12842486798763275 2023-01-24 01:30:59.715570: step: 698/463, loss: 0.09896834194660187 2023-01-24 01:31:00.320417: step: 700/463, loss: 0.07807404547929764 2023-01-24 01:31:00.930540: step: 702/463, loss: 0.04168181121349335 2023-01-24 01:31:01.587252: step: 704/463, loss: 0.1086229458451271 2023-01-24 01:31:02.215776: step: 706/463, loss: 0.10747560113668442 2023-01-24 01:31:02.808249: step: 708/463, loss: 0.4578455090522766 2023-01-24 01:31:03.509799: step: 710/463, loss: 0.23231656849384308 2023-01-24 01:31:04.122379: step: 712/463, loss: 0.3161855936050415 2023-01-24 01:31:04.770070: step: 714/463, loss: 0.10036582499742508 2023-01-24 01:31:05.369816: step: 716/463, loss: 0.06386017054319382 2023-01-24 01:31:05.971858: step: 718/463, loss: 0.27133670449256897 2023-01-24 01:31:06.558804: step: 720/463, loss: 0.16375049948692322 2023-01-24 01:31:07.145977: step: 722/463, loss: 0.12791162729263306 2023-01-24 01:31:07.726573: step: 724/463, loss: 0.11244847625494003 2023-01-24 01:31:08.322777: step: 726/463, loss: 0.06352280080318451 2023-01-24 01:31:08.938577: step: 728/463, loss: 0.161606565117836 2023-01-24 01:31:09.485472: step: 730/463, loss: 0.11433148384094238 2023-01-24 01:31:10.075791: step: 732/463, loss: 0.19788090884685516 2023-01-24 01:31:10.725934: step: 734/463, loss: 0.08007632195949554 2023-01-24 01:31:11.341767: step: 736/463, loss: 0.1446334272623062 2023-01-24 01:31:11.977048: step: 738/463, loss: 0.29650601744651794 2023-01-24 01:31:12.644728: step: 740/463, loss: 0.7098445892333984 2023-01-24 01:31:13.223579: step: 742/463, loss: 0.21318648755550385 2023-01-24 01:31:13.830484: step: 744/463, loss: 0.1889760047197342 2023-01-24 01:31:14.481732: step: 746/463, loss: 0.12877604365348816 2023-01-24 01:31:15.073202: step: 748/463, loss: 0.12182393670082092 2023-01-24 01:31:15.686887: step: 750/463, loss: 0.2841183543205261 2023-01-24 01:31:16.265326: step: 752/463, loss: 0.15512509644031525 2023-01-24 01:31:16.843025: step: 754/463, loss: 0.12062467634677887 2023-01-24 01:31:17.432764: step: 756/463, loss: 0.15865936875343323 2023-01-24 01:31:18.037501: step: 758/463, loss: 0.12482234090566635 2023-01-24 01:31:18.653725: step: 760/463, loss: 0.06911307573318481 2023-01-24 01:31:19.284771: step: 762/463, loss: 0.7442855834960938 2023-01-24 01:31:19.928648: step: 764/463, loss: 0.24386733770370483 2023-01-24 01:31:20.503547: step: 766/463, loss: 0.09315664321184158 2023-01-24 01:31:21.058075: step: 768/463, loss: 0.15519924461841583 2023-01-24 01:31:21.630407: step: 770/463, loss: 0.12154491245746613 2023-01-24 01:31:22.219564: step: 772/463, loss: 0.08926746994256973 2023-01-24 01:31:22.855894: step: 774/463, loss: 0.04717402160167694 2023-01-24 01:31:23.610085: step: 776/463, loss: 0.16514192521572113 2023-01-24 01:31:24.223070: step: 778/463, loss: 0.2481343299150467 2023-01-24 01:31:24.775464: step: 780/463, loss: 0.1458498239517212 2023-01-24 01:31:25.398400: step: 782/463, loss: 0.12890134751796722 2023-01-24 01:31:25.990773: step: 784/463, loss: 0.2519560158252716 2023-01-24 01:31:26.659890: step: 786/463, loss: 0.10948509722948074 2023-01-24 01:31:27.325063: step: 788/463, loss: 7.444417476654053 2023-01-24 01:31:27.896883: step: 790/463, loss: 0.13372346758842468 2023-01-24 01:31:28.490571: step: 792/463, loss: 0.09975311905145645 2023-01-24 01:31:29.083960: step: 794/463, loss: 0.1279948651790619 2023-01-24 01:31:29.700659: step: 796/463, loss: 0.2808547616004944 2023-01-24 01:31:30.339851: step: 798/463, loss: 0.13118933141231537 2023-01-24 01:31:30.925779: step: 800/463, loss: 0.5269955396652222 2023-01-24 01:31:31.537867: step: 802/463, loss: 0.1489705890417099 2023-01-24 01:31:32.259266: step: 804/463, loss: 0.09192366898059845 2023-01-24 01:31:32.848548: step: 806/463, loss: 0.6658681631088257 2023-01-24 01:31:33.478606: step: 808/463, loss: 0.3161330819129944 2023-01-24 01:31:34.153344: step: 810/463, loss: 0.8518253564834595 2023-01-24 01:31:34.791809: step: 812/463, loss: 0.29379013180732727 2023-01-24 01:31:35.384159: step: 814/463, loss: 0.9104217886924744 2023-01-24 01:31:35.974228: step: 816/463, loss: 0.3292897641658783 2023-01-24 01:31:36.538526: step: 818/463, loss: 0.10330037027597427 2023-01-24 01:31:37.151513: step: 820/463, loss: 0.17516545951366425 2023-01-24 01:31:37.751623: step: 822/463, loss: 0.13572344183921814 2023-01-24 01:31:38.321406: step: 824/463, loss: 0.3371018171310425 2023-01-24 01:31:39.051273: step: 826/463, loss: 0.09697898477315903 2023-01-24 01:31:39.768843: step: 828/463, loss: 0.1890784353017807 2023-01-24 01:31:40.419711: step: 830/463, loss: 0.11863324791193008 2023-01-24 01:31:41.071983: step: 832/463, loss: 0.06436974555253983 2023-01-24 01:31:41.642115: step: 834/463, loss: 0.1450974941253662 2023-01-24 01:31:42.225267: step: 836/463, loss: 1.2689837217330933 2023-01-24 01:31:42.857605: step: 838/463, loss: 0.2716820538043976 2023-01-24 01:31:43.504334: step: 840/463, loss: 0.14493253827095032 2023-01-24 01:31:44.118550: step: 842/463, loss: 0.1309964507818222 2023-01-24 01:31:44.743469: step: 844/463, loss: 0.20912817120552063 2023-01-24 01:31:45.371083: step: 846/463, loss: 0.5990986824035645 2023-01-24 01:31:46.060646: step: 848/463, loss: 0.2713647484779358 2023-01-24 01:31:46.712667: step: 850/463, loss: 0.13493691384792328 2023-01-24 01:31:47.332894: step: 852/463, loss: 0.07767636328935623 2023-01-24 01:31:47.950357: step: 854/463, loss: 0.4309062957763672 2023-01-24 01:31:48.520530: step: 856/463, loss: 0.26253634691238403 2023-01-24 01:31:49.130306: step: 858/463, loss: 0.057649221271276474 2023-01-24 01:31:49.778511: step: 860/463, loss: 0.41135984659194946 2023-01-24 01:31:50.397293: step: 862/463, loss: 0.4964521527290344 2023-01-24 01:31:51.068377: step: 864/463, loss: 0.1342688947916031 2023-01-24 01:31:51.700029: step: 866/463, loss: 0.03948821872472763 2023-01-24 01:31:52.296103: step: 868/463, loss: 0.07460054755210876 2023-01-24 01:31:52.891104: step: 870/463, loss: 0.25301721692085266 2023-01-24 01:31:53.506577: step: 872/463, loss: 0.1827431321144104 2023-01-24 01:31:54.084511: step: 874/463, loss: 0.1411261409521103 2023-01-24 01:31:54.722457: step: 876/463, loss: 0.13656504452228546 2023-01-24 01:31:55.350880: step: 878/463, loss: 0.5958837270736694 2023-01-24 01:31:55.917086: step: 880/463, loss: 0.36162328720092773 2023-01-24 01:31:56.564592: step: 882/463, loss: 0.09074178338050842 2023-01-24 01:31:57.157634: step: 884/463, loss: 0.38318493962287903 2023-01-24 01:31:57.767165: step: 886/463, loss: 0.0527978278696537 2023-01-24 01:31:58.424239: step: 888/463, loss: 0.27555370330810547 2023-01-24 01:31:59.048227: step: 890/463, loss: 0.28918150067329407 2023-01-24 01:31:59.644545: step: 892/463, loss: 0.07164045423269272 2023-01-24 01:32:00.240477: step: 894/463, loss: 0.5946390628814697 2023-01-24 01:32:00.887234: step: 896/463, loss: 0.09591452777385712 2023-01-24 01:32:01.512984: step: 898/463, loss: 0.2889423370361328 2023-01-24 01:32:02.133759: step: 900/463, loss: 0.10507835447788239 2023-01-24 01:32:02.733592: step: 902/463, loss: 0.8069683313369751 2023-01-24 01:32:03.429913: step: 904/463, loss: 0.09550615400075912 2023-01-24 01:32:04.018317: step: 906/463, loss: 0.3159022629261017 2023-01-24 01:32:04.686452: step: 908/463, loss: 0.39066919684410095 2023-01-24 01:32:05.262421: step: 910/463, loss: 0.1605747938156128 2023-01-24 01:32:05.928660: step: 912/463, loss: 0.12333541363477707 2023-01-24 01:32:06.540924: step: 914/463, loss: 0.14795920252799988 2023-01-24 01:32:07.084190: step: 916/463, loss: 0.0860903412103653 2023-01-24 01:32:07.633689: step: 918/463, loss: 0.7397198677062988 2023-01-24 01:32:08.205504: step: 920/463, loss: 0.6861542463302612 2023-01-24 01:32:08.870749: step: 922/463, loss: 0.2602579891681671 2023-01-24 01:32:09.514016: step: 924/463, loss: 0.2313385158777237 2023-01-24 01:32:10.139870: step: 926/463, loss: 0.20819246768951416 ================================================== Loss: 0.276 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3367666257176327, 'r': 0.332293444730871, 'f1': 0.33451508189717105}, 'combined': 0.2464847971873892, 'epoch': 12} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3483744517075169, 'r': 0.392881849490003, 'f1': 0.3692919673823959}, 'combined': 0.2862454484016657, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30393742648022515, 'r': 0.330467069019296, 'f1': 0.31664753704212545}, 'combined': 0.23331923782051348, 'epoch': 12} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3267875618135704, 'r': 0.38595773614929957, 'f1': 0.3539165755840185}, 'combined': 0.27432768059622487, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30298459800597805, 'r': 0.3156329113952219, 'f1': 0.30917945038156497}, 'combined': 0.2278164371232584, 'epoch': 12} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33161881723086756, 'r': 0.37550954304083534, 'f1': 0.35220205416243866}, 'combined': 0.2729987214082061, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2672413793103448, 'r': 0.22142857142857142, 'f1': 0.2421875}, 'combined': 0.16145833333333331, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3148148148148148, 'r': 0.3695652173913043, 'f1': 0.34}, 'combined': 0.17, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 12} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3664599181442166, 'r': 0.3122210687794179, 'f1': 0.33717316239088774}, 'combined': 0.2484433828143383, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3655563493147106, 'r': 0.32203773630105453, 'f1': 0.34241987150998204}, 'combined': 0.26541635973501004, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42857142857142855, 'r': 0.2571428571428571, 'f1': 0.3214285714285714}, 'combined': 0.21428571428571425, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3283040393999639, 'r': 0.31335281179920654, 'f1': 0.3206542365401589}, 'combined': 0.23627154271380127, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34970123997901775, 'r': 0.3199189915192662, 'f1': 0.3341478132367659}, 'combined': 0.2590045250926128, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38, 'r': 0.41304347826086957, 'f1': 0.39583333333333337}, 'combined': 0.19791666666666669, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30298459800597805, 'r': 0.3156329113952219, 'f1': 0.30917945038156497}, 'combined': 0.2278164371232584, 'epoch': 12} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33161881723086756, 'r': 0.37550954304083534, 'f1': 0.35220205416243866}, 'combined': 0.2729987214082061, 'epoch': 12} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.46153846153846156, 'r': 0.20689655172413793, 'f1': 0.28571428571428575}, 'combined': 0.1904761904761905, 'epoch': 12} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:34:50.392999: step: 2/463, loss: 2.565711498260498 2023-01-24 01:34:51.088980: step: 4/463, loss: 0.16664744913578033 2023-01-24 01:34:51.727377: step: 6/463, loss: 0.1379290223121643 2023-01-24 01:34:52.334345: step: 8/463, loss: 0.1690729409456253 2023-01-24 01:34:52.922605: step: 10/463, loss: 0.17966759204864502 2023-01-24 01:34:53.501586: step: 12/463, loss: 0.22286882996559143 2023-01-24 01:34:54.106854: step: 14/463, loss: 0.15766093134880066 2023-01-24 01:34:54.779749: step: 16/463, loss: 0.1296265721321106 2023-01-24 01:34:55.431250: step: 18/463, loss: 0.07797739654779434 2023-01-24 01:34:56.036525: step: 20/463, loss: 0.14893348515033722 2023-01-24 01:34:56.594755: step: 22/463, loss: 0.08049022406339645 2023-01-24 01:34:57.182198: step: 24/463, loss: 0.8503106236457825 2023-01-24 01:34:57.762171: step: 26/463, loss: 0.1962115317583084 2023-01-24 01:34:58.388288: step: 28/463, loss: 0.13229620456695557 2023-01-24 01:34:59.044196: step: 30/463, loss: 0.47463545203208923 2023-01-24 01:34:59.646328: step: 32/463, loss: 0.2727740705013275 2023-01-24 01:35:00.221580: step: 34/463, loss: 0.0912417396903038 2023-01-24 01:35:00.874873: step: 36/463, loss: 0.04754031449556351 2023-01-24 01:35:01.503588: step: 38/463, loss: 0.3348187804222107 2023-01-24 01:35:02.120486: step: 40/463, loss: 0.09286195039749146 2023-01-24 01:35:02.829776: step: 42/463, loss: 0.23952889442443848 2023-01-24 01:35:03.424661: step: 44/463, loss: 0.07650396227836609 2023-01-24 01:35:04.052354: step: 46/463, loss: 0.15996961295604706 2023-01-24 01:35:04.680403: step: 48/463, loss: 0.05410662665963173 2023-01-24 01:35:05.359351: step: 50/463, loss: 0.33852213621139526 2023-01-24 01:35:05.980898: step: 52/463, loss: 0.04223308339715004 2023-01-24 01:35:06.571541: step: 54/463, loss: 0.5379517078399658 2023-01-24 01:35:07.217139: step: 56/463, loss: 0.1742713451385498 2023-01-24 01:35:07.907976: step: 58/463, loss: 0.21306444704532623 2023-01-24 01:35:08.516833: step: 60/463, loss: 0.11296823620796204 2023-01-24 01:35:09.196493: step: 62/463, loss: 0.28409895300865173 2023-01-24 01:35:09.940970: step: 64/463, loss: 0.060683343559503555 2023-01-24 01:35:10.581717: step: 66/463, loss: 0.08932936936616898 2023-01-24 01:35:11.219019: step: 68/463, loss: 0.21831552684307098 2023-01-24 01:35:11.820074: step: 70/463, loss: 0.08629943430423737 2023-01-24 01:35:12.444559: step: 72/463, loss: 0.3023335933685303 2023-01-24 01:35:13.152050: step: 74/463, loss: 0.11173388361930847 2023-01-24 01:35:13.770031: step: 76/463, loss: 0.09676092118024826 2023-01-24 01:35:14.459342: step: 78/463, loss: 0.10400616377592087 2023-01-24 01:35:15.052165: step: 80/463, loss: 0.6862213015556335 2023-01-24 01:35:15.693612: step: 82/463, loss: 0.14357812702655792 2023-01-24 01:35:16.306687: step: 84/463, loss: 0.0835581049323082 2023-01-24 01:35:16.902263: step: 86/463, loss: 0.08401966094970703 2023-01-24 01:35:17.490573: step: 88/463, loss: 0.3063279092311859 2023-01-24 01:35:18.104658: step: 90/463, loss: 0.12771596014499664 2023-01-24 01:35:18.754020: step: 92/463, loss: 0.15308119356632233 2023-01-24 01:35:19.382647: step: 94/463, loss: 0.05297008529305458 2023-01-24 01:35:20.059277: step: 96/463, loss: 0.1168137788772583 2023-01-24 01:35:20.643648: step: 98/463, loss: 0.06922732293605804 2023-01-24 01:35:21.209892: step: 100/463, loss: 0.16214075684547424 2023-01-24 01:35:21.824720: step: 102/463, loss: 0.07149788737297058 2023-01-24 01:35:22.530821: step: 104/463, loss: 0.27030134201049805 2023-01-24 01:35:23.157767: step: 106/463, loss: 0.07457955926656723 2023-01-24 01:35:23.742618: step: 108/463, loss: 0.022927306592464447 2023-01-24 01:35:24.399071: step: 110/463, loss: 0.14719931781291962 2023-01-24 01:35:24.955671: step: 112/463, loss: 0.799362301826477 2023-01-24 01:35:25.514401: step: 114/463, loss: 0.05564933270215988 2023-01-24 01:35:26.135843: step: 116/463, loss: 0.07638891041278839 2023-01-24 01:35:26.752270: step: 118/463, loss: 0.24894677102565765 2023-01-24 01:35:27.364686: step: 120/463, loss: 0.06474931538105011 2023-01-24 01:35:27.990582: step: 122/463, loss: 0.12605683505535126 2023-01-24 01:35:28.548208: step: 124/463, loss: 0.12097879499197006 2023-01-24 01:35:29.189140: step: 126/463, loss: 0.2464277297258377 2023-01-24 01:35:29.795986: step: 128/463, loss: 0.264183908700943 2023-01-24 01:35:30.424521: step: 130/463, loss: 0.18785572052001953 2023-01-24 01:35:31.014993: step: 132/463, loss: 0.05885416641831398 2023-01-24 01:35:31.668200: step: 134/463, loss: 0.11870820075273514 2023-01-24 01:35:32.323777: step: 136/463, loss: 0.12184017151594162 2023-01-24 01:35:32.974859: step: 138/463, loss: 0.11965828388929367 2023-01-24 01:35:33.586620: step: 140/463, loss: 0.3557186424732208 2023-01-24 01:35:34.126905: step: 142/463, loss: 0.04448102414608002 2023-01-24 01:35:34.715157: step: 144/463, loss: 0.19215553998947144 2023-01-24 01:35:35.356401: step: 146/463, loss: 0.10128043591976166 2023-01-24 01:35:35.994772: step: 148/463, loss: 6.752254009246826 2023-01-24 01:35:36.649610: step: 150/463, loss: 0.06240927055478096 2023-01-24 01:35:37.267252: step: 152/463, loss: 0.5488643646240234 2023-01-24 01:35:37.824854: step: 154/463, loss: 0.2403147667646408 2023-01-24 01:35:38.415401: step: 156/463, loss: 0.1681583970785141 2023-01-24 01:35:38.976815: step: 158/463, loss: 0.10901160538196564 2023-01-24 01:35:39.593264: step: 160/463, loss: 0.22435398399829865 2023-01-24 01:35:40.266353: step: 162/463, loss: 0.09708784520626068 2023-01-24 01:35:40.889449: step: 164/463, loss: 0.11457079648971558 2023-01-24 01:35:41.469260: step: 166/463, loss: 0.3900364637374878 2023-01-24 01:35:42.049255: step: 168/463, loss: 0.28347858786582947 2023-01-24 01:35:42.630151: step: 170/463, loss: 0.12434875220060349 2023-01-24 01:35:43.281372: step: 172/463, loss: 0.022395463660359383 2023-01-24 01:35:43.942263: step: 174/463, loss: 0.08362209796905518 2023-01-24 01:35:44.533857: step: 176/463, loss: 0.05168960988521576 2023-01-24 01:35:45.157686: step: 178/463, loss: 0.12967200577259064 2023-01-24 01:35:45.774201: step: 180/463, loss: 0.11016476154327393 2023-01-24 01:35:46.369252: step: 182/463, loss: 0.14662586152553558 2023-01-24 01:35:46.982040: step: 184/463, loss: 0.3271436095237732 2023-01-24 01:35:47.667095: step: 186/463, loss: 0.04441945254802704 2023-01-24 01:35:48.288259: step: 188/463, loss: 0.11458349972963333 2023-01-24 01:35:48.874382: step: 190/463, loss: 0.14119161665439606 2023-01-24 01:35:49.514503: step: 192/463, loss: 0.08486742526292801 2023-01-24 01:35:50.138257: step: 194/463, loss: 0.08645319938659668 2023-01-24 01:35:50.792213: step: 196/463, loss: 0.12720341980457306 2023-01-24 01:35:51.364577: step: 198/463, loss: 0.13414324820041656 2023-01-24 01:35:51.992225: step: 200/463, loss: 0.12985605001449585 2023-01-24 01:35:52.603829: step: 202/463, loss: 0.2638033926486969 2023-01-24 01:35:53.284843: step: 204/463, loss: 0.3215266168117523 2023-01-24 01:35:53.980942: step: 206/463, loss: 0.11334839463233948 2023-01-24 01:35:54.626350: step: 208/463, loss: 0.30918675661087036 2023-01-24 01:35:55.173165: step: 210/463, loss: 0.06669747084379196 2023-01-24 01:35:55.841591: step: 212/463, loss: 0.17311087250709534 2023-01-24 01:35:56.478422: step: 214/463, loss: 0.26042595505714417 2023-01-24 01:35:57.105903: step: 216/463, loss: 0.3607120215892792 2023-01-24 01:35:57.676183: step: 218/463, loss: 0.07826436311006546 2023-01-24 01:35:58.283188: step: 220/463, loss: 0.1280171126127243 2023-01-24 01:35:58.901019: step: 222/463, loss: 0.07049118727445602 2023-01-24 01:35:59.550006: step: 224/463, loss: 0.1527566909790039 2023-01-24 01:36:00.202419: step: 226/463, loss: 0.11164062470197678 2023-01-24 01:36:00.775510: step: 228/463, loss: 0.07164878398180008 2023-01-24 01:36:01.421674: step: 230/463, loss: 0.10502752661705017 2023-01-24 01:36:02.051309: step: 232/463, loss: 0.17939122021198273 2023-01-24 01:36:02.698125: step: 234/463, loss: 0.33820387721061707 2023-01-24 01:36:03.263220: step: 236/463, loss: 0.352577805519104 2023-01-24 01:36:03.871815: step: 238/463, loss: 0.07940854877233505 2023-01-24 01:36:04.509464: step: 240/463, loss: 0.2308470606803894 2023-01-24 01:36:05.099633: step: 242/463, loss: 0.17906461656093597 2023-01-24 01:36:05.732289: step: 244/463, loss: 0.1280842423439026 2023-01-24 01:36:06.282910: step: 246/463, loss: 0.2776082158088684 2023-01-24 01:36:06.896817: step: 248/463, loss: 0.30667030811309814 2023-01-24 01:36:07.496114: step: 250/463, loss: 0.04092853143811226 2023-01-24 01:36:08.166330: step: 252/463, loss: 0.3598691523075104 2023-01-24 01:36:08.791639: step: 254/463, loss: 0.061636462807655334 2023-01-24 01:36:09.477178: step: 256/463, loss: 0.12093415856361389 2023-01-24 01:36:10.142708: step: 258/463, loss: 0.10726702958345413 2023-01-24 01:36:10.741620: step: 260/463, loss: 0.20799991488456726 2023-01-24 01:36:11.361199: step: 262/463, loss: 0.11698877066373825 2023-01-24 01:36:12.063156: step: 264/463, loss: 0.9746891856193542 2023-01-24 01:36:12.772875: step: 266/463, loss: 0.11387989670038223 2023-01-24 01:36:13.371572: step: 268/463, loss: 0.25322672724723816 2023-01-24 01:36:14.014121: step: 270/463, loss: 0.35835787653923035 2023-01-24 01:36:14.586581: step: 272/463, loss: 0.10283799469470978 2023-01-24 01:36:15.245752: step: 274/463, loss: 0.10783140361309052 2023-01-24 01:36:15.897741: step: 276/463, loss: 0.2683259844779968 2023-01-24 01:36:16.630012: step: 278/463, loss: 0.3214755058288574 2023-01-24 01:36:17.257017: step: 280/463, loss: 0.04341986030340195 2023-01-24 01:36:17.950176: step: 282/463, loss: 0.1504833847284317 2023-01-24 01:36:18.573449: step: 284/463, loss: 0.09483832120895386 2023-01-24 01:36:19.197667: step: 286/463, loss: 0.013112193904817104 2023-01-24 01:36:19.838414: step: 288/463, loss: 0.20012256503105164 2023-01-24 01:36:20.477440: step: 290/463, loss: 0.3879971206188202 2023-01-24 01:36:21.112854: step: 292/463, loss: 0.07313261926174164 2023-01-24 01:36:21.732434: step: 294/463, loss: 0.03994950279593468 2023-01-24 01:36:22.452906: step: 296/463, loss: 0.17103610932826996 2023-01-24 01:36:23.128504: step: 298/463, loss: 0.13750629127025604 2023-01-24 01:36:23.761566: step: 300/463, loss: 0.067560113966465 2023-01-24 01:36:24.389382: step: 302/463, loss: 0.3391919434070587 2023-01-24 01:36:25.018840: step: 304/463, loss: 0.12707072496414185 2023-01-24 01:36:25.631958: step: 306/463, loss: 0.4030746817588806 2023-01-24 01:36:26.249845: step: 308/463, loss: 0.24013054370880127 2023-01-24 01:36:26.877470: step: 310/463, loss: 0.11444075405597687 2023-01-24 01:36:27.424644: step: 312/463, loss: 0.05910976603627205 2023-01-24 01:36:28.090368: step: 314/463, loss: 0.18328982591629028 2023-01-24 01:36:28.672927: step: 316/463, loss: 0.21811260282993317 2023-01-24 01:36:29.279873: step: 318/463, loss: 0.08503499627113342 2023-01-24 01:36:29.882763: step: 320/463, loss: 0.05776338651776314 2023-01-24 01:36:30.448304: step: 322/463, loss: 0.1270158886909485 2023-01-24 01:36:31.160709: step: 324/463, loss: 0.11699722707271576 2023-01-24 01:36:31.908381: step: 326/463, loss: 0.07528286427259445 2023-01-24 01:36:32.527957: step: 328/463, loss: 0.0936053916811943 2023-01-24 01:36:33.196112: step: 330/463, loss: 0.14466506242752075 2023-01-24 01:36:33.768248: step: 332/463, loss: 0.13197006285190582 2023-01-24 01:36:34.382339: step: 334/463, loss: 0.10153637081384659 2023-01-24 01:36:34.947966: step: 336/463, loss: 0.06331086158752441 2023-01-24 01:36:35.556839: step: 338/463, loss: 0.1506926715373993 2023-01-24 01:36:36.168448: step: 340/463, loss: 0.38452062010765076 2023-01-24 01:36:36.743149: step: 342/463, loss: 0.08930423855781555 2023-01-24 01:36:37.334536: step: 344/463, loss: 0.14809022843837738 2023-01-24 01:36:37.964717: step: 346/463, loss: 0.12939181923866272 2023-01-24 01:36:38.553273: step: 348/463, loss: 0.24422359466552734 2023-01-24 01:36:39.221470: step: 350/463, loss: 0.555176854133606 2023-01-24 01:36:39.823435: step: 352/463, loss: 0.03883303701877594 2023-01-24 01:36:40.389025: step: 354/463, loss: 0.07291294634342194 2023-01-24 01:36:41.056160: step: 356/463, loss: 0.19484737515449524 2023-01-24 01:36:41.693199: step: 358/463, loss: 0.20809970796108246 2023-01-24 01:36:42.323429: step: 360/463, loss: 0.18012313544750214 2023-01-24 01:36:42.930033: step: 362/463, loss: 0.11634792387485504 2023-01-24 01:36:43.528012: step: 364/463, loss: 0.7800766825675964 2023-01-24 01:36:44.071807: step: 366/463, loss: 8.991044044494629 2023-01-24 01:36:44.674784: step: 368/463, loss: 0.3084280788898468 2023-01-24 01:36:45.264099: step: 370/463, loss: 0.07614462822675705 2023-01-24 01:36:45.901893: step: 372/463, loss: 0.3468588590621948 2023-01-24 01:36:46.545926: step: 374/463, loss: 0.1128867119550705 2023-01-24 01:36:47.148705: step: 376/463, loss: 0.12678956985473633 2023-01-24 01:36:47.737233: step: 378/463, loss: 0.19237515330314636 2023-01-24 01:36:48.392791: step: 380/463, loss: 0.10420754551887512 2023-01-24 01:36:48.999667: step: 382/463, loss: 0.6848708391189575 2023-01-24 01:36:49.566412: step: 384/463, loss: 0.26128289103507996 2023-01-24 01:36:50.205763: step: 386/463, loss: 0.037828296422958374 2023-01-24 01:36:50.955094: step: 388/463, loss: 0.49120399355888367 2023-01-24 01:36:51.515590: step: 390/463, loss: 0.03331886976957321 2023-01-24 01:36:52.145496: step: 392/463, loss: 0.1993192881345749 2023-01-24 01:36:52.796464: step: 394/463, loss: 0.06030919402837753 2023-01-24 01:36:53.390172: step: 396/463, loss: 0.10487332195043564 2023-01-24 01:36:53.983579: step: 398/463, loss: 0.03509514406323433 2023-01-24 01:36:54.677348: step: 400/463, loss: 0.11517808586359024 2023-01-24 01:36:55.250702: step: 402/463, loss: 0.05602710321545601 2023-01-24 01:36:55.863808: step: 404/463, loss: 0.43672317266464233 2023-01-24 01:36:56.431689: step: 406/463, loss: 0.1379372626543045 2023-01-24 01:36:57.020659: step: 408/463, loss: 0.07176533341407776 2023-01-24 01:36:57.580171: step: 410/463, loss: 0.07386907935142517 2023-01-24 01:36:58.260420: step: 412/463, loss: 0.1997169405221939 2023-01-24 01:36:58.853902: step: 414/463, loss: 0.6408401727676392 2023-01-24 01:36:59.475934: step: 416/463, loss: 0.11517812311649323 2023-01-24 01:37:00.104284: step: 418/463, loss: 0.2481084167957306 2023-01-24 01:37:00.704487: step: 420/463, loss: 0.07898946851491928 2023-01-24 01:37:01.315035: step: 422/463, loss: 0.09962379187345505 2023-01-24 01:37:01.947025: step: 424/463, loss: 0.20499686896800995 2023-01-24 01:37:02.595217: step: 426/463, loss: 0.0725511908531189 2023-01-24 01:37:03.271449: step: 428/463, loss: 0.1533195674419403 2023-01-24 01:37:03.856923: step: 430/463, loss: 0.22300267219543457 2023-01-24 01:37:04.479666: step: 432/463, loss: 0.3045209050178528 2023-01-24 01:37:05.118223: step: 434/463, loss: 0.1204148530960083 2023-01-24 01:37:05.738738: step: 436/463, loss: 0.19843950867652893 2023-01-24 01:37:06.278541: step: 438/463, loss: 0.12317971885204315 2023-01-24 01:37:06.867243: step: 440/463, loss: 0.058026187121868134 2023-01-24 01:37:07.440169: step: 442/463, loss: 0.1778186559677124 2023-01-24 01:37:08.138020: step: 444/463, loss: 0.10294865071773529 2023-01-24 01:37:08.809205: step: 446/463, loss: 0.1299281120300293 2023-01-24 01:37:09.374511: step: 448/463, loss: 0.07985076308250427 2023-01-24 01:37:09.937845: step: 450/463, loss: 0.17704281210899353 2023-01-24 01:37:10.508095: step: 452/463, loss: 0.3871375322341919 2023-01-24 01:37:11.068997: step: 454/463, loss: 0.21685855090618134 2023-01-24 01:37:11.676783: step: 456/463, loss: 0.2827184498310089 2023-01-24 01:37:12.307015: step: 458/463, loss: 2.6488826274871826 2023-01-24 01:37:12.894207: step: 460/463, loss: 0.09979060292243958 2023-01-24 01:37:13.475124: step: 462/463, loss: 0.1777031421661377 2023-01-24 01:37:14.091010: step: 464/463, loss: 0.22616173326969147 2023-01-24 01:37:14.698237: step: 466/463, loss: 0.35037845373153687 2023-01-24 01:37:15.359066: step: 468/463, loss: 0.26178237795829773 2023-01-24 01:37:16.059987: step: 470/463, loss: 0.11901715397834778 2023-01-24 01:37:16.639132: step: 472/463, loss: 0.17046403884887695 2023-01-24 01:37:17.264929: step: 474/463, loss: 0.35896649956703186 2023-01-24 01:37:17.927347: step: 476/463, loss: 0.1533816158771515 2023-01-24 01:37:18.567277: step: 478/463, loss: 0.817365825176239 2023-01-24 01:37:19.258716: step: 480/463, loss: 0.5074924826622009 2023-01-24 01:37:19.836807: step: 482/463, loss: 0.06507206708192825 2023-01-24 01:37:20.546111: step: 484/463, loss: 0.09565138816833496 2023-01-24 01:37:21.128896: step: 486/463, loss: 0.10228903591632843 2023-01-24 01:37:21.708903: step: 488/463, loss: 0.11880123615264893 2023-01-24 01:37:22.405497: step: 490/463, loss: 0.40800732374191284 2023-01-24 01:37:23.033061: step: 492/463, loss: 0.14800399541854858 2023-01-24 01:37:23.682345: step: 494/463, loss: 0.09144240617752075 2023-01-24 01:37:24.280589: step: 496/463, loss: 0.30299872159957886 2023-01-24 01:37:24.951185: step: 498/463, loss: 0.3642329275608063 2023-01-24 01:37:25.533965: step: 500/463, loss: 0.12590834498405457 2023-01-24 01:37:26.173703: step: 502/463, loss: 0.05030178278684616 2023-01-24 01:37:26.862996: step: 504/463, loss: 0.09050225466489792 2023-01-24 01:37:27.485839: step: 506/463, loss: 0.17131109535694122 2023-01-24 01:37:28.130533: step: 508/463, loss: 0.0957876667380333 2023-01-24 01:37:28.692775: step: 510/463, loss: 0.5671736598014832 2023-01-24 01:37:29.289380: step: 512/463, loss: 0.19126763939857483 2023-01-24 01:37:29.949864: step: 514/463, loss: 0.08095432072877884 2023-01-24 01:37:30.613336: step: 516/463, loss: 0.1317719668149948 2023-01-24 01:37:31.258327: step: 518/463, loss: 0.40390151739120483 2023-01-24 01:37:31.937495: step: 520/463, loss: 0.08847786486148834 2023-01-24 01:37:32.554520: step: 522/463, loss: 0.15618719160556793 2023-01-24 01:37:33.265930: step: 524/463, loss: 0.20740501582622528 2023-01-24 01:37:33.854261: step: 526/463, loss: 0.19934247434139252 2023-01-24 01:37:34.523017: step: 528/463, loss: 0.18491405248641968 2023-01-24 01:37:35.132625: step: 530/463, loss: 0.07651141285896301 2023-01-24 01:37:35.711313: step: 532/463, loss: 1.7004739046096802 2023-01-24 01:37:36.366064: step: 534/463, loss: 0.059145186096429825 2023-01-24 01:37:36.975635: step: 536/463, loss: 0.11134350299835205 2023-01-24 01:37:37.588275: step: 538/463, loss: 0.08535700291395187 2023-01-24 01:37:38.211212: step: 540/463, loss: 0.5215968489646912 2023-01-24 01:37:38.854450: step: 542/463, loss: 0.12605799734592438 2023-01-24 01:37:39.462754: step: 544/463, loss: 0.11664626747369766 2023-01-24 01:37:40.033980: step: 546/463, loss: 0.2695358395576477 2023-01-24 01:37:40.700073: step: 548/463, loss: 0.23393937945365906 2023-01-24 01:37:41.295522: step: 550/463, loss: 0.2841077148914337 2023-01-24 01:37:41.859370: step: 552/463, loss: 4.392961025238037 2023-01-24 01:37:42.486945: step: 554/463, loss: 2.2758395671844482 2023-01-24 01:37:43.076250: step: 556/463, loss: 0.08077837526798248 2023-01-24 01:37:43.701621: step: 558/463, loss: 0.1420605629682541 2023-01-24 01:37:44.383464: step: 560/463, loss: 0.1339176446199417 2023-01-24 01:37:44.965213: step: 562/463, loss: 0.11798575520515442 2023-01-24 01:37:45.620765: step: 564/463, loss: 0.9329640865325928 2023-01-24 01:37:46.177250: step: 566/463, loss: 0.07212428003549576 2023-01-24 01:37:46.777742: step: 568/463, loss: 0.3512352705001831 2023-01-24 01:37:47.430929: step: 570/463, loss: 0.15040907263755798 2023-01-24 01:37:47.989726: step: 572/463, loss: 0.4386076331138611 2023-01-24 01:37:48.581411: step: 574/463, loss: 0.08159884810447693 2023-01-24 01:37:49.228598: step: 576/463, loss: 0.16141971945762634 2023-01-24 01:37:49.789162: step: 578/463, loss: 0.1143106147646904 2023-01-24 01:37:50.417836: step: 580/463, loss: 0.37543439865112305 2023-01-24 01:37:51.056436: step: 582/463, loss: 0.16019099950790405 2023-01-24 01:37:51.674592: step: 584/463, loss: 0.18394871056079865 2023-01-24 01:37:52.345636: step: 586/463, loss: 0.08112835884094238 2023-01-24 01:37:52.931630: step: 588/463, loss: 0.17310790717601776 2023-01-24 01:37:53.516413: step: 590/463, loss: 0.12453050166368484 2023-01-24 01:37:54.045428: step: 592/463, loss: 0.16222529113292694 2023-01-24 01:37:54.704956: step: 594/463, loss: 0.2559312880039215 2023-01-24 01:37:55.325013: step: 596/463, loss: 0.3091009259223938 2023-01-24 01:37:55.963004: step: 598/463, loss: 0.06659483164548874 2023-01-24 01:37:56.561083: step: 600/463, loss: 0.15208786725997925 2023-01-24 01:37:57.222398: step: 602/463, loss: 0.09993623197078705 2023-01-24 01:37:57.824386: step: 604/463, loss: 0.22920849919319153 2023-01-24 01:37:58.432208: step: 606/463, loss: 0.11052244901657104 2023-01-24 01:37:59.021289: step: 608/463, loss: 0.3802890479564667 2023-01-24 01:37:59.676796: step: 610/463, loss: 0.1383506804704666 2023-01-24 01:38:00.356903: step: 612/463, loss: 0.028482681140303612 2023-01-24 01:38:00.945512: step: 614/463, loss: 1.3009333610534668 2023-01-24 01:38:01.557832: step: 616/463, loss: 0.18642878532409668 2023-01-24 01:38:02.140931: step: 618/463, loss: 0.12663745880126953 2023-01-24 01:38:02.705533: step: 620/463, loss: 0.05997009947896004 2023-01-24 01:38:03.276699: step: 622/463, loss: 0.12205195426940918 2023-01-24 01:38:03.942227: step: 624/463, loss: 0.3809179365634918 2023-01-24 01:38:04.478620: step: 626/463, loss: 0.18063528835773468 2023-01-24 01:38:05.122600: step: 628/463, loss: 0.14560022950172424 2023-01-24 01:38:05.867217: step: 630/463, loss: 0.36161816120147705 2023-01-24 01:38:06.487700: step: 632/463, loss: 0.7624815702438354 2023-01-24 01:38:07.083065: step: 634/463, loss: 0.042645346373319626 2023-01-24 01:38:07.706762: step: 636/463, loss: 0.1863245815038681 2023-01-24 01:38:08.348894: step: 638/463, loss: 0.06144614890217781 2023-01-24 01:38:08.889915: step: 640/463, loss: 0.12099575996398926 2023-01-24 01:38:09.477168: step: 642/463, loss: 0.06944125890731812 2023-01-24 01:38:10.060993: step: 644/463, loss: 0.09125297516584396 2023-01-24 01:38:10.724763: step: 646/463, loss: 0.31644928455352783 2023-01-24 01:38:11.316965: step: 648/463, loss: 0.244612917304039 2023-01-24 01:38:11.926767: step: 650/463, loss: 0.1229962483048439 2023-01-24 01:38:12.561817: step: 652/463, loss: 0.16088464856147766 2023-01-24 01:38:13.307637: step: 654/463, loss: 0.592374861240387 2023-01-24 01:38:13.977436: step: 656/463, loss: 0.12203109264373779 2023-01-24 01:38:14.620039: step: 658/463, loss: 0.15243643522262573 2023-01-24 01:38:15.237902: step: 660/463, loss: 0.06060110405087471 2023-01-24 01:38:15.855337: step: 662/463, loss: 0.1305978149175644 2023-01-24 01:38:16.489424: step: 664/463, loss: 0.0422486811876297 2023-01-24 01:38:17.093223: step: 666/463, loss: 0.04774981364607811 2023-01-24 01:38:17.732235: step: 668/463, loss: 0.361824631690979 2023-01-24 01:38:18.364734: step: 670/463, loss: 0.1629701405763626 2023-01-24 01:38:18.976932: step: 672/463, loss: 0.10527696460485458 2023-01-24 01:38:19.659158: step: 674/463, loss: 0.10681945085525513 2023-01-24 01:38:20.267906: step: 676/463, loss: 0.4135546088218689 2023-01-24 01:38:20.898433: step: 678/463, loss: 0.08351342380046844 2023-01-24 01:38:21.496183: step: 680/463, loss: 0.07854975014925003 2023-01-24 01:38:22.232185: step: 682/463, loss: 0.5218799710273743 2023-01-24 01:38:22.844420: step: 684/463, loss: 0.06561725586652756 2023-01-24 01:38:23.474571: step: 686/463, loss: 0.40641576051712036 2023-01-24 01:38:24.073151: step: 688/463, loss: 0.07957141101360321 2023-01-24 01:38:24.706926: step: 690/463, loss: 0.14579223096370697 2023-01-24 01:38:25.252463: step: 692/463, loss: 0.6082996726036072 2023-01-24 01:38:25.854921: step: 694/463, loss: 0.1418827623128891 2023-01-24 01:38:26.491922: step: 696/463, loss: 0.2510085105895996 2023-01-24 01:38:27.113415: step: 698/463, loss: 0.0828835517168045 2023-01-24 01:38:27.630018: step: 700/463, loss: 0.1153096854686737 2023-01-24 01:38:28.242030: step: 702/463, loss: 0.29370972514152527 2023-01-24 01:38:28.901265: step: 704/463, loss: 0.15233348309993744 2023-01-24 01:38:29.486535: step: 706/463, loss: 0.199041947722435 2023-01-24 01:38:30.095745: step: 708/463, loss: 0.08511388301849365 2023-01-24 01:38:30.703575: step: 710/463, loss: 0.5866748094558716 2023-01-24 01:38:31.331041: step: 712/463, loss: 0.11261086165904999 2023-01-24 01:38:31.891761: step: 714/463, loss: 0.1824646145105362 2023-01-24 01:38:32.487745: step: 716/463, loss: 0.566473126411438 2023-01-24 01:38:33.050796: step: 718/463, loss: 0.21250124275684357 2023-01-24 01:38:33.601425: step: 720/463, loss: 0.14525753259658813 2023-01-24 01:38:34.288629: step: 722/463, loss: 0.1874159425497055 2023-01-24 01:38:34.936136: step: 724/463, loss: 0.5277420878410339 2023-01-24 01:38:35.588964: step: 726/463, loss: 0.08111932873725891 2023-01-24 01:38:36.192397: step: 728/463, loss: 0.5192026495933533 2023-01-24 01:38:36.758139: step: 730/463, loss: 0.05693024396896362 2023-01-24 01:38:37.367298: step: 732/463, loss: 0.10415806621313095 2023-01-24 01:38:37.956066: step: 734/463, loss: 0.14154361188411713 2023-01-24 01:38:38.593616: step: 736/463, loss: 0.24166518449783325 2023-01-24 01:38:39.263255: step: 738/463, loss: 0.057798732072114944 2023-01-24 01:38:39.895091: step: 740/463, loss: 0.47920742630958557 2023-01-24 01:38:40.444065: step: 742/463, loss: 0.13112948834896088 2023-01-24 01:38:41.092727: step: 744/463, loss: 0.13557542860507965 2023-01-24 01:38:41.704803: step: 746/463, loss: 0.07506858557462692 2023-01-24 01:38:42.359472: step: 748/463, loss: 0.09090665727853775 2023-01-24 01:38:43.003144: step: 750/463, loss: 0.1906386762857437 2023-01-24 01:38:43.666772: step: 752/463, loss: 0.05091777443885803 2023-01-24 01:38:44.215041: step: 754/463, loss: 0.8419276475906372 2023-01-24 01:38:44.783333: step: 756/463, loss: 0.06863037496805191 2023-01-24 01:38:45.463445: step: 758/463, loss: 0.08943196386098862 2023-01-24 01:38:46.093856: step: 760/463, loss: 0.2722793221473694 2023-01-24 01:38:46.717286: step: 762/463, loss: 0.23512597382068634 2023-01-24 01:38:47.310119: step: 764/463, loss: 0.10137669742107391 2023-01-24 01:38:47.883346: step: 766/463, loss: 0.21107909083366394 2023-01-24 01:38:48.581358: step: 768/463, loss: 0.042703546583652496 2023-01-24 01:38:49.259297: step: 770/463, loss: 0.18837223947048187 2023-01-24 01:38:49.842400: step: 772/463, loss: 0.4225706160068512 2023-01-24 01:38:50.475482: step: 774/463, loss: 0.07106520980596542 2023-01-24 01:38:51.186486: step: 776/463, loss: 0.19308072328567505 2023-01-24 01:38:51.781261: step: 778/463, loss: 0.21760398149490356 2023-01-24 01:38:52.453545: step: 780/463, loss: 0.29596397280693054 2023-01-24 01:38:53.126383: step: 782/463, loss: 0.1805022656917572 2023-01-24 01:38:53.746315: step: 784/463, loss: 0.4072858691215515 2023-01-24 01:38:54.299305: step: 786/463, loss: 0.04376452416181564 2023-01-24 01:38:55.058556: step: 788/463, loss: 0.2397332340478897 2023-01-24 01:38:55.688858: step: 790/463, loss: 0.11247055232524872 2023-01-24 01:38:56.279655: step: 792/463, loss: 0.11076261848211288 2023-01-24 01:38:56.928840: step: 794/463, loss: 0.06936417520046234 2023-01-24 01:38:57.558290: step: 796/463, loss: 0.17803461849689484 2023-01-24 01:38:58.128358: step: 798/463, loss: 0.5655487775802612 2023-01-24 01:38:58.752612: step: 800/463, loss: 0.20942959189414978 2023-01-24 01:38:59.375007: step: 802/463, loss: 0.04037226736545563 2023-01-24 01:38:59.919173: step: 804/463, loss: 0.08091913163661957 2023-01-24 01:39:00.600160: step: 806/463, loss: 0.08412367105484009 2023-01-24 01:39:01.198887: step: 808/463, loss: 0.12440893054008484 2023-01-24 01:39:01.842630: step: 810/463, loss: 0.17068389058113098 2023-01-24 01:39:02.519588: step: 812/463, loss: 0.09930028021335602 2023-01-24 01:39:03.104862: step: 814/463, loss: 0.17937523126602173 2023-01-24 01:39:03.733243: step: 816/463, loss: 0.2971479296684265 2023-01-24 01:39:04.325795: step: 818/463, loss: 0.08814814686775208 2023-01-24 01:39:04.911571: step: 820/463, loss: 0.046063054352998734 2023-01-24 01:39:05.510345: step: 822/463, loss: 0.24929530918598175 2023-01-24 01:39:06.108540: step: 824/463, loss: 0.24770347774028778 2023-01-24 01:39:06.666756: step: 826/463, loss: 0.09377054125070572 2023-01-24 01:39:07.247607: step: 828/463, loss: 0.06481131166219711 2023-01-24 01:39:07.821661: step: 830/463, loss: 0.6607604026794434 2023-01-24 01:39:08.487215: step: 832/463, loss: 0.1551063507795334 2023-01-24 01:39:09.158557: step: 834/463, loss: 0.09614669531583786 2023-01-24 01:39:09.775688: step: 836/463, loss: 0.19514885544776917 2023-01-24 01:39:10.391743: step: 838/463, loss: 0.09270910173654556 2023-01-24 01:39:11.015938: step: 840/463, loss: 0.10282081365585327 2023-01-24 01:39:11.646824: step: 842/463, loss: 0.11490132659673691 2023-01-24 01:39:12.253242: step: 844/463, loss: 0.07945281267166138 2023-01-24 01:39:12.825935: step: 846/463, loss: 0.08512766659259796 2023-01-24 01:39:13.604879: step: 848/463, loss: 0.07688874006271362 2023-01-24 01:39:14.200858: step: 850/463, loss: 0.14501172304153442 2023-01-24 01:39:14.808623: step: 852/463, loss: 0.20960292220115662 2023-01-24 01:39:15.476115: step: 854/463, loss: 0.31516411900520325 2023-01-24 01:39:16.115219: step: 856/463, loss: 0.09838423132896423 2023-01-24 01:39:16.670131: step: 858/463, loss: 0.09700152277946472 2023-01-24 01:39:17.358417: step: 860/463, loss: 0.31320977210998535 2023-01-24 01:39:17.991050: step: 862/463, loss: 0.14346487820148468 2023-01-24 01:39:18.671372: step: 864/463, loss: 0.06369202584028244 2023-01-24 01:39:19.202036: step: 866/463, loss: 0.4695272445678711 2023-01-24 01:39:19.763156: step: 868/463, loss: 0.06738274544477463 2023-01-24 01:39:20.399408: step: 870/463, loss: 0.07605982571840286 2023-01-24 01:39:21.042713: step: 872/463, loss: 0.053483352065086365 2023-01-24 01:39:21.722809: step: 874/463, loss: 0.06770220398902893 2023-01-24 01:39:22.430807: step: 876/463, loss: 0.09027217328548431 2023-01-24 01:39:23.018555: step: 878/463, loss: 0.7285488843917847 2023-01-24 01:39:23.734234: step: 880/463, loss: 0.18153871595859528 2023-01-24 01:39:24.303933: step: 882/463, loss: 0.05441578850150108 2023-01-24 01:39:24.913157: step: 884/463, loss: 0.17478787899017334 2023-01-24 01:39:25.538808: step: 886/463, loss: 0.0539497546851635 2023-01-24 01:39:26.150432: step: 888/463, loss: 0.2606639266014099 2023-01-24 01:39:26.738625: step: 890/463, loss: 0.05971585959196091 2023-01-24 01:39:27.362735: step: 892/463, loss: 0.11647982895374298 2023-01-24 01:39:27.946581: step: 894/463, loss: 0.19568830728530884 2023-01-24 01:39:28.615753: step: 896/463, loss: 0.10186778008937836 2023-01-24 01:39:29.210811: step: 898/463, loss: 0.15877921879291534 2023-01-24 01:39:29.817675: step: 900/463, loss: 0.059198394417762756 2023-01-24 01:39:30.458621: step: 902/463, loss: 0.17944201827049255 2023-01-24 01:39:31.035371: step: 904/463, loss: 0.913094162940979 2023-01-24 01:39:31.617056: step: 906/463, loss: 0.16600652039051056 2023-01-24 01:39:32.247329: step: 908/463, loss: 0.034814320504665375 2023-01-24 01:39:32.855462: step: 910/463, loss: 0.1060621440410614 2023-01-24 01:39:33.516878: step: 912/463, loss: 0.05400104448199272 2023-01-24 01:39:34.109636: step: 914/463, loss: 0.13888785243034363 2023-01-24 01:39:34.759515: step: 916/463, loss: 0.2110578715801239 2023-01-24 01:39:35.386880: step: 918/463, loss: 0.11863532662391663 2023-01-24 01:39:35.963248: step: 920/463, loss: 0.19453021883964539 2023-01-24 01:39:36.533473: step: 922/463, loss: 0.0719323381781578 2023-01-24 01:39:37.159020: step: 924/463, loss: 0.22827577590942383 2023-01-24 01:39:37.767369: step: 926/463, loss: 0.26101136207580566 ================================================== Loss: 0.254 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3668275457482662, 'r': 0.31044608236001275, 'f1': 0.33629000082985966}, 'combined': 0.24779263219042288, 'epoch': 13} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3566039167197559, 'r': 0.3726642034102596, 'f1': 0.36445721645875273}, 'combined': 0.2824979381163538, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3395809221609249, 'r': 0.3112288148078306, 'f1': 0.32478729782916177}, 'combined': 0.23931695629517183, 'epoch': 13} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34499432273954556, 'r': 0.37860590197703803, 'f1': 0.36101947532955075}, 'combined': 0.27983327752816856, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3322927279464418, 'r': 0.30013536717743133, 'f1': 0.3153964875423855}, 'combined': 0.23239741187333665, 'epoch': 13} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3419664652928791, 'r': 0.3639679841995901, 'f1': 0.35262436937591635}, 'combined': 0.27332606621482514, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.33564814814814814, 'r': 0.25892857142857145, 'f1': 0.2923387096774194}, 'combined': 0.19489247311827956, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3541666666666667, 'r': 0.3695652173913043, 'f1': 0.3617021276595745}, 'combined': 0.18085106382978725, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 13} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3664599181442166, 'r': 0.3122210687794179, 'f1': 0.33717316239088774}, 'combined': 0.2484433828143383, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3655563493147106, 'r': 0.32203773630105453, 'f1': 0.34241987150998204}, 'combined': 0.26541635973501004, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.42857142857142855, 'r': 0.2571428571428571, 'f1': 0.3214285714285714}, 'combined': 0.21428571428571425, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3283040393999639, 'r': 0.31335281179920654, 'f1': 0.3206542365401589}, 'combined': 0.23627154271380127, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34970123997901775, 'r': 0.3199189915192662, 'f1': 0.3341478132367659}, 'combined': 0.2590045250926128, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38, 'r': 0.41304347826086957, 'f1': 0.39583333333333337}, 'combined': 0.19791666666666669, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3322927279464418, 'r': 0.30013536717743133, 'f1': 0.3153964875423855}, 'combined': 0.23239741187333665, 'epoch': 13} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3419664652928791, 'r': 0.3639679841995901, 'f1': 0.35262436937591635}, 'combined': 0.27332606621482514, 'epoch': 13} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 13} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:42:21.010689: step: 2/463, loss: 0.10308526456356049 2023-01-24 01:42:21.686822: step: 4/463, loss: 0.16714175045490265 2023-01-24 01:42:22.317775: step: 6/463, loss: 0.010249671526253223 2023-01-24 01:42:22.968948: step: 8/463, loss: 0.34679824113845825 2023-01-24 01:42:23.620806: step: 10/463, loss: 0.05703078210353851 2023-01-24 01:42:24.203100: step: 12/463, loss: 0.14334838092327118 2023-01-24 01:42:24.799389: step: 14/463, loss: 0.10722551494836807 2023-01-24 01:42:25.352590: step: 16/463, loss: 0.23808908462524414 2023-01-24 01:42:26.020763: step: 18/463, loss: 0.18627379834651947 2023-01-24 01:42:26.553377: step: 20/463, loss: 0.06190408393740654 2023-01-24 01:42:27.170435: step: 22/463, loss: 0.2690027356147766 2023-01-24 01:42:27.763205: step: 24/463, loss: 0.029313528910279274 2023-01-24 01:42:28.350223: step: 26/463, loss: 0.08370479941368103 2023-01-24 01:42:28.958980: step: 28/463, loss: 0.17957046627998352 2023-01-24 01:42:29.566128: step: 30/463, loss: 0.32404181361198425 2023-01-24 01:42:30.159111: step: 32/463, loss: 0.11481583118438721 2023-01-24 01:42:30.738172: step: 34/463, loss: 0.17802946269512177 2023-01-24 01:42:31.346646: step: 36/463, loss: 0.18063296377658844 2023-01-24 01:42:31.882857: step: 38/463, loss: 0.043350815773010254 2023-01-24 01:42:32.403124: step: 40/463, loss: 0.34427180886268616 2023-01-24 01:42:33.049514: step: 42/463, loss: 0.08118477463722229 2023-01-24 01:42:33.721177: step: 44/463, loss: 0.18487665057182312 2023-01-24 01:42:34.297711: step: 46/463, loss: 0.12369714677333832 2023-01-24 01:42:34.941729: step: 48/463, loss: 1.3812236785888672 2023-01-24 01:42:35.590840: step: 50/463, loss: 0.14374962449073792 2023-01-24 01:42:36.148340: step: 52/463, loss: 0.05366966500878334 2023-01-24 01:42:36.778089: step: 54/463, loss: 3.730415105819702 2023-01-24 01:42:37.458413: step: 56/463, loss: 0.430982768535614 2023-01-24 01:42:38.042067: step: 58/463, loss: 0.12790866196155548 2023-01-24 01:42:38.642682: step: 60/463, loss: 0.3020043671131134 2023-01-24 01:42:39.258923: step: 62/463, loss: 0.9591559767723083 2023-01-24 01:42:39.932402: step: 64/463, loss: 0.014086284674704075 2023-01-24 01:42:40.526638: step: 66/463, loss: 0.6733552813529968 2023-01-24 01:42:41.138569: step: 68/463, loss: 0.14249533414840698 2023-01-24 01:42:41.710090: step: 70/463, loss: 1.796962857246399 2023-01-24 01:42:42.346991: step: 72/463, loss: 0.061169784516096115 2023-01-24 01:42:42.937510: step: 74/463, loss: 0.06592588871717453 2023-01-24 01:42:43.573289: step: 76/463, loss: 0.0839243084192276 2023-01-24 01:42:44.166677: step: 78/463, loss: 0.3629399836063385 2023-01-24 01:42:44.762837: step: 80/463, loss: 0.8248944878578186 2023-01-24 01:42:45.373263: step: 82/463, loss: 0.2846137583255768 2023-01-24 01:42:46.011120: step: 84/463, loss: 0.10227452963590622 2023-01-24 01:42:46.619867: step: 86/463, loss: 1.1886745691299438 2023-01-24 01:42:47.183880: step: 88/463, loss: 0.05253385007381439 2023-01-24 01:42:47.785024: step: 90/463, loss: 0.055734481662511826 2023-01-24 01:42:48.411015: step: 92/463, loss: 0.030808456242084503 2023-01-24 01:42:49.114282: step: 94/463, loss: 0.8350070714950562 2023-01-24 01:42:49.723925: step: 96/463, loss: 0.24728679656982422 2023-01-24 01:42:50.385615: step: 98/463, loss: 0.9745913147926331 2023-01-24 01:42:50.947571: step: 100/463, loss: 0.17144189774990082 2023-01-24 01:42:51.599642: step: 102/463, loss: 0.07628318667411804 2023-01-24 01:42:52.269926: step: 104/463, loss: 0.056118909269571304 2023-01-24 01:42:52.838189: step: 106/463, loss: 0.16399188339710236 2023-01-24 01:42:53.422684: step: 108/463, loss: 0.6762322783470154 2023-01-24 01:42:54.096962: step: 110/463, loss: 0.0123615562915802 2023-01-24 01:42:54.723446: step: 112/463, loss: 0.0478915199637413 2023-01-24 01:42:55.315595: step: 114/463, loss: 0.08068694174289703 2023-01-24 01:42:55.897380: step: 116/463, loss: 0.0741761326789856 2023-01-24 01:42:56.495298: step: 118/463, loss: 1.2439968585968018 2023-01-24 01:42:57.106504: step: 120/463, loss: 0.047961268573999405 2023-01-24 01:42:57.726450: step: 122/463, loss: 0.38738203048706055 2023-01-24 01:42:58.335705: step: 124/463, loss: 0.12012168765068054 2023-01-24 01:42:58.961872: step: 126/463, loss: 0.04441584646701813 2023-01-24 01:42:59.547862: step: 128/463, loss: 0.08495910465717316 2023-01-24 01:43:00.154656: step: 130/463, loss: 0.04328349232673645 2023-01-24 01:43:00.797626: step: 132/463, loss: 0.3903776705265045 2023-01-24 01:43:01.377969: step: 134/463, loss: 0.03481090068817139 2023-01-24 01:43:02.040669: step: 136/463, loss: 0.10361975431442261 2023-01-24 01:43:02.714888: step: 138/463, loss: 0.1403813362121582 2023-01-24 01:43:03.370002: step: 140/463, loss: 0.025142883881926537 2023-01-24 01:43:03.888675: step: 142/463, loss: 0.12225999683141708 2023-01-24 01:43:04.498175: step: 144/463, loss: 0.06028455123305321 2023-01-24 01:43:05.108389: step: 146/463, loss: 0.13699457049369812 2023-01-24 01:43:05.782755: step: 148/463, loss: 0.28219887614250183 2023-01-24 01:43:06.384686: step: 150/463, loss: 0.28693246841430664 2023-01-24 01:43:06.968823: step: 152/463, loss: 0.3915354311466217 2023-01-24 01:43:07.555856: step: 154/463, loss: 0.06182054057717323 2023-01-24 01:43:08.143038: step: 156/463, loss: 0.16470405459403992 2023-01-24 01:43:08.689752: step: 158/463, loss: 0.12182014435529709 2023-01-24 01:43:09.201113: step: 160/463, loss: 0.11554552614688873 2023-01-24 01:43:09.811898: step: 162/463, loss: 0.5084474086761475 2023-01-24 01:43:10.432483: step: 164/463, loss: 0.16285544633865356 2023-01-24 01:43:11.079952: step: 166/463, loss: 2.6137630939483643 2023-01-24 01:43:11.690538: step: 168/463, loss: 0.03740686550736427 2023-01-24 01:43:12.395931: step: 170/463, loss: 0.14077307283878326 2023-01-24 01:43:12.979296: step: 172/463, loss: 0.09140763431787491 2023-01-24 01:43:13.601328: step: 174/463, loss: 0.09143885225057602 2023-01-24 01:43:14.227322: step: 176/463, loss: 0.07552574574947357 2023-01-24 01:43:14.838821: step: 178/463, loss: 0.06250397861003876 2023-01-24 01:43:15.445182: step: 180/463, loss: 0.1933819204568863 2023-01-24 01:43:16.000940: step: 182/463, loss: 0.21095974743366241 2023-01-24 01:43:16.686569: step: 184/463, loss: 0.1560210883617401 2023-01-24 01:43:17.265342: step: 186/463, loss: 0.0751553401350975 2023-01-24 01:43:17.970052: step: 188/463, loss: 0.09765796363353729 2023-01-24 01:43:18.649753: step: 190/463, loss: 0.39050137996673584 2023-01-24 01:43:19.262860: step: 192/463, loss: 0.08853725343942642 2023-01-24 01:43:19.901405: step: 194/463, loss: 0.1773661971092224 2023-01-24 01:43:20.502605: step: 196/463, loss: 0.0525774210691452 2023-01-24 01:43:21.144788: step: 198/463, loss: 0.05022650212049484 2023-01-24 01:43:21.765802: step: 200/463, loss: 0.1793585568666458 2023-01-24 01:43:22.371300: step: 202/463, loss: 0.12643963098526 2023-01-24 01:43:23.027470: step: 204/463, loss: 0.05982673168182373 2023-01-24 01:43:23.611149: step: 206/463, loss: 0.16795258224010468 2023-01-24 01:43:24.203298: step: 208/463, loss: 0.3783799707889557 2023-01-24 01:43:24.740980: step: 210/463, loss: 0.22475412487983704 2023-01-24 01:43:25.354732: step: 212/463, loss: 0.20961827039718628 2023-01-24 01:43:25.923482: step: 214/463, loss: 0.03246244043111801 2023-01-24 01:43:26.504709: step: 216/463, loss: 0.10360752046108246 2023-01-24 01:43:27.087718: step: 218/463, loss: 0.08381180465221405 2023-01-24 01:43:27.794767: step: 220/463, loss: 0.22995568811893463 2023-01-24 01:43:28.401452: step: 222/463, loss: 0.07965733855962753 2023-01-24 01:43:29.006237: step: 224/463, loss: 0.12934575974941254 2023-01-24 01:43:29.578242: step: 226/463, loss: 0.08320559561252594 2023-01-24 01:43:30.227129: step: 228/463, loss: 0.13966448605060577 2023-01-24 01:43:30.826888: step: 230/463, loss: 0.6244457364082336 2023-01-24 01:43:31.387194: step: 232/463, loss: 0.2568169832229614 2023-01-24 01:43:31.967920: step: 234/463, loss: 0.0757133886218071 2023-01-24 01:43:32.637550: step: 236/463, loss: 4.109152317047119 2023-01-24 01:43:33.336320: step: 238/463, loss: 0.5262159109115601 2023-01-24 01:43:33.967876: step: 240/463, loss: 0.09857461601495743 2023-01-24 01:43:34.542568: step: 242/463, loss: 0.4256633222103119 2023-01-24 01:43:35.234717: step: 244/463, loss: 0.04133503884077072 2023-01-24 01:43:35.915747: step: 246/463, loss: 0.35673826932907104 2023-01-24 01:43:36.610020: step: 248/463, loss: 0.12713421881198883 2023-01-24 01:43:37.296272: step: 250/463, loss: 0.10045666247606277 2023-01-24 01:43:37.877583: step: 252/463, loss: 0.05888919532299042 2023-01-24 01:43:38.490567: step: 254/463, loss: 0.0960940271615982 2023-01-24 01:43:39.107300: step: 256/463, loss: 0.12819786369800568 2023-01-24 01:43:39.808455: step: 258/463, loss: 0.10528997331857681 2023-01-24 01:43:40.434798: step: 260/463, loss: 0.16441646218299866 2023-01-24 01:43:41.043586: step: 262/463, loss: 0.1633167564868927 2023-01-24 01:43:41.630078: step: 264/463, loss: 0.8436607718467712 2023-01-24 01:43:42.248157: step: 266/463, loss: 0.1013575866818428 2023-01-24 01:43:42.883560: step: 268/463, loss: 0.10785786807537079 2023-01-24 01:43:43.493315: step: 270/463, loss: 0.34189173579216003 2023-01-24 01:43:44.061109: step: 272/463, loss: 0.04892894998192787 2023-01-24 01:43:44.734149: step: 274/463, loss: 0.049560192972421646 2023-01-24 01:43:45.362840: step: 276/463, loss: 0.018713340163230896 2023-01-24 01:43:45.968126: step: 278/463, loss: 0.0486927255988121 2023-01-24 01:43:46.574539: step: 280/463, loss: 0.15397493541240692 2023-01-24 01:43:47.283385: step: 282/463, loss: 0.1246669590473175 2023-01-24 01:43:47.868590: step: 284/463, loss: 0.023076198995113373 2023-01-24 01:43:48.456400: step: 286/463, loss: 0.038538575172424316 2023-01-24 01:43:49.060014: step: 288/463, loss: 0.09551247954368591 2023-01-24 01:43:49.719121: step: 290/463, loss: 0.11477039009332657 2023-01-24 01:43:50.353857: step: 292/463, loss: 0.11501011252403259 2023-01-24 01:43:50.982206: step: 294/463, loss: 0.1834041327238083 2023-01-24 01:43:51.596819: step: 296/463, loss: 0.1461332142353058 2023-01-24 01:43:52.218764: step: 298/463, loss: 0.19254513084888458 2023-01-24 01:43:52.828595: step: 300/463, loss: 0.08515982329845428 2023-01-24 01:43:53.460467: step: 302/463, loss: 0.23184725642204285 2023-01-24 01:43:54.081299: step: 304/463, loss: 0.10106851905584335 2023-01-24 01:43:54.666598: step: 306/463, loss: 0.09503541886806488 2023-01-24 01:43:55.322155: step: 308/463, loss: 0.4804510176181793 2023-01-24 01:43:56.013804: step: 310/463, loss: 0.6137498617172241 2023-01-24 01:43:56.679473: step: 312/463, loss: 0.14076784253120422 2023-01-24 01:43:57.262811: step: 314/463, loss: 0.08881313353776932 2023-01-24 01:43:57.906277: step: 316/463, loss: 0.26988980174064636 2023-01-24 01:43:58.545786: step: 318/463, loss: 0.14868198335170746 2023-01-24 01:43:59.129320: step: 320/463, loss: 0.13986501097679138 2023-01-24 01:43:59.764259: step: 322/463, loss: 0.082999087870121 2023-01-24 01:44:00.488140: step: 324/463, loss: 0.10994285345077515 2023-01-24 01:44:01.116226: step: 326/463, loss: 0.08804375678300858 2023-01-24 01:44:01.737744: step: 328/463, loss: 0.08210384845733643 2023-01-24 01:44:02.320929: step: 330/463, loss: 2.456934928894043 2023-01-24 01:44:02.972205: step: 332/463, loss: 0.13021592795848846 2023-01-24 01:44:03.609253: step: 334/463, loss: 0.5375983119010925 2023-01-24 01:44:04.220060: step: 336/463, loss: 0.06238596886396408 2023-01-24 01:44:04.855295: step: 338/463, loss: 0.11796872317790985 2023-01-24 01:44:05.472995: step: 340/463, loss: 0.08578043431043625 2023-01-24 01:44:06.068129: step: 342/463, loss: 0.12710529565811157 2023-01-24 01:44:06.663836: step: 344/463, loss: 0.17944364249706268 2023-01-24 01:44:07.244535: step: 346/463, loss: 0.07273881137371063 2023-01-24 01:44:07.870675: step: 348/463, loss: 0.11611678451299667 2023-01-24 01:44:08.472171: step: 350/463, loss: 0.07212795317173004 2023-01-24 01:44:08.996367: step: 352/463, loss: 0.21441487967967987 2023-01-24 01:44:09.667492: step: 354/463, loss: 0.07607550173997879 2023-01-24 01:44:10.296920: step: 356/463, loss: 0.06200196221470833 2023-01-24 01:44:10.916001: step: 358/463, loss: 0.043460387736558914 2023-01-24 01:44:11.669434: step: 360/463, loss: 0.03744329512119293 2023-01-24 01:44:12.332809: step: 362/463, loss: 0.11077764630317688 2023-01-24 01:44:12.961091: step: 364/463, loss: 0.10055799782276154 2023-01-24 01:44:13.646583: step: 366/463, loss: 0.1503055989742279 2023-01-24 01:44:14.294145: step: 368/463, loss: 0.03452283516526222 2023-01-24 01:44:14.867865: step: 370/463, loss: 0.17187127470970154 2023-01-24 01:44:15.523336: step: 372/463, loss: 0.36115145683288574 2023-01-24 01:44:16.130071: step: 374/463, loss: 0.1219983696937561 2023-01-24 01:44:16.771388: step: 376/463, loss: 0.12503086030483246 2023-01-24 01:44:17.367061: step: 378/463, loss: 0.18449562788009644 2023-01-24 01:44:18.024178: step: 380/463, loss: 0.1929616630077362 2023-01-24 01:44:18.687548: step: 382/463, loss: 0.06423379480838776 2023-01-24 01:44:19.436806: step: 384/463, loss: 0.2761771082878113 2023-01-24 01:44:20.068817: step: 386/463, loss: 0.08925159275531769 2023-01-24 01:44:20.769047: step: 388/463, loss: 0.2523607313632965 2023-01-24 01:44:21.467596: step: 390/463, loss: 0.06463966518640518 2023-01-24 01:44:22.075798: step: 392/463, loss: 0.11851001530885696 2023-01-24 01:44:22.657561: step: 394/463, loss: 0.08548794686794281 2023-01-24 01:44:23.275017: step: 396/463, loss: 1.2107921838760376 2023-01-24 01:44:23.877443: step: 398/463, loss: 0.016283638775348663 2023-01-24 01:44:24.461243: step: 400/463, loss: 0.08287622034549713 2023-01-24 01:44:24.998896: step: 402/463, loss: 0.10828357189893723 2023-01-24 01:44:25.650514: step: 404/463, loss: 0.17843776941299438 2023-01-24 01:44:26.276109: step: 406/463, loss: 0.1125880628824234 2023-01-24 01:44:26.900179: step: 408/463, loss: 0.09485150873661041 2023-01-24 01:44:27.546973: step: 410/463, loss: 0.07006898522377014 2023-01-24 01:44:28.157265: step: 412/463, loss: 0.034832850098609924 2023-01-24 01:44:28.818547: step: 414/463, loss: 0.8836945295333862 2023-01-24 01:44:29.502351: step: 416/463, loss: 0.057975392788648605 2023-01-24 01:44:30.151232: step: 418/463, loss: 0.29926618933677673 2023-01-24 01:44:30.777103: step: 420/463, loss: 0.1539737582206726 2023-01-24 01:44:31.399785: step: 422/463, loss: 0.12392600625753403 2023-01-24 01:44:32.024891: step: 424/463, loss: 0.13452214002609253 2023-01-24 01:44:32.633662: step: 426/463, loss: 0.1232166513800621 2023-01-24 01:44:33.258095: step: 428/463, loss: 0.07230601459741592 2023-01-24 01:44:33.896671: step: 430/463, loss: 0.07903006672859192 2023-01-24 01:44:34.550161: step: 432/463, loss: 0.1106240525841713 2023-01-24 01:44:35.170175: step: 434/463, loss: 0.12355376780033112 2023-01-24 01:44:35.696597: step: 436/463, loss: 0.09508159011602402 2023-01-24 01:44:36.320232: step: 438/463, loss: 0.07258486747741699 2023-01-24 01:44:36.922432: step: 440/463, loss: 0.05816726014018059 2023-01-24 01:44:37.542017: step: 442/463, loss: 0.032864321023225784 2023-01-24 01:44:38.155726: step: 444/463, loss: 0.12572872638702393 2023-01-24 01:44:38.793701: step: 446/463, loss: 0.16226233541965485 2023-01-24 01:44:39.452823: step: 448/463, loss: 0.04432658106088638 2023-01-24 01:44:40.063409: step: 450/463, loss: 0.0903104692697525 2023-01-24 01:44:40.812156: step: 452/463, loss: 0.06945456564426422 2023-01-24 01:44:41.454443: step: 454/463, loss: 0.5241070985794067 2023-01-24 01:44:42.089071: step: 456/463, loss: 0.3017179071903229 2023-01-24 01:44:42.745630: step: 458/463, loss: 0.254629909992218 2023-01-24 01:44:43.417951: step: 460/463, loss: 0.33663269877433777 2023-01-24 01:44:44.117171: step: 462/463, loss: 0.09105712920427322 2023-01-24 01:44:44.754997: step: 464/463, loss: 0.07593025267124176 2023-01-24 01:44:45.407730: step: 466/463, loss: 0.0862785205245018 2023-01-24 01:44:46.045140: step: 468/463, loss: 0.1536739468574524 2023-01-24 01:44:46.702809: step: 470/463, loss: 0.12506939470767975 2023-01-24 01:44:47.323599: step: 472/463, loss: 0.11008240282535553 2023-01-24 01:44:47.947932: step: 474/463, loss: 0.3848530650138855 2023-01-24 01:44:48.555091: step: 476/463, loss: 0.3592287302017212 2023-01-24 01:44:49.165259: step: 478/463, loss: 0.0907362625002861 2023-01-24 01:44:49.776111: step: 480/463, loss: 1.080715537071228 2023-01-24 01:44:50.432839: step: 482/463, loss: 0.5033161640167236 2023-01-24 01:44:51.129902: step: 484/463, loss: 0.07604740560054779 2023-01-24 01:44:51.710402: step: 486/463, loss: 0.15551824867725372 2023-01-24 01:44:52.398736: step: 488/463, loss: 0.1979314684867859 2023-01-24 01:44:53.002017: step: 490/463, loss: 0.1543964296579361 2023-01-24 01:44:53.616061: step: 492/463, loss: 0.12534230947494507 2023-01-24 01:44:54.230425: step: 494/463, loss: 0.12120353430509567 2023-01-24 01:44:54.980150: step: 496/463, loss: 0.14934006333351135 2023-01-24 01:44:55.578292: step: 498/463, loss: 0.05352311581373215 2023-01-24 01:44:56.247212: step: 500/463, loss: 0.2460317313671112 2023-01-24 01:44:56.806428: step: 502/463, loss: 0.052115026861429214 2023-01-24 01:44:57.416521: step: 504/463, loss: 0.1804737150669098 2023-01-24 01:44:57.985203: step: 506/463, loss: 0.038035281002521515 2023-01-24 01:44:58.561258: step: 508/463, loss: 0.07762838900089264 2023-01-24 01:44:59.131588: step: 510/463, loss: 0.1987018883228302 2023-01-24 01:44:59.747503: step: 512/463, loss: 0.0974213108420372 2023-01-24 01:45:00.430051: step: 514/463, loss: 0.14953991770744324 2023-01-24 01:45:01.047769: step: 516/463, loss: 0.02866896614432335 2023-01-24 01:45:01.680460: step: 518/463, loss: 0.08622793853282928 2023-01-24 01:45:02.326118: step: 520/463, loss: 0.13547734916210175 2023-01-24 01:45:02.896584: step: 522/463, loss: 0.09220833331346512 2023-01-24 01:45:03.546085: step: 524/463, loss: 0.05490013211965561 2023-01-24 01:45:04.182192: step: 526/463, loss: 0.15554659068584442 2023-01-24 01:45:04.831368: step: 528/463, loss: 0.22622404992580414 2023-01-24 01:45:05.489198: step: 530/463, loss: 0.058216605335474014 2023-01-24 01:45:06.216210: step: 532/463, loss: 0.15217728912830353 2023-01-24 01:45:06.827315: step: 534/463, loss: 0.1438591182231903 2023-01-24 01:45:07.510163: step: 536/463, loss: 0.028094308450818062 2023-01-24 01:45:08.124001: step: 538/463, loss: 0.2892555296421051 2023-01-24 01:45:08.718627: step: 540/463, loss: 0.0639430359005928 2023-01-24 01:45:09.332813: step: 542/463, loss: 0.1315533071756363 2023-01-24 01:45:09.984589: step: 544/463, loss: 0.12075936049222946 2023-01-24 01:45:10.626116: step: 546/463, loss: 0.35180947184562683 2023-01-24 01:45:11.228975: step: 548/463, loss: 0.14668837189674377 2023-01-24 01:45:11.814816: step: 550/463, loss: 4.931124687194824 2023-01-24 01:45:12.407703: step: 552/463, loss: 0.14781951904296875 2023-01-24 01:45:13.030855: step: 554/463, loss: 0.10335811972618103 2023-01-24 01:45:13.630843: step: 556/463, loss: 0.1324312388896942 2023-01-24 01:45:14.254436: step: 558/463, loss: 0.10734470933675766 2023-01-24 01:45:14.869838: step: 560/463, loss: 0.06613200157880783 2023-01-24 01:45:15.499196: step: 562/463, loss: 0.4820294976234436 2023-01-24 01:45:16.131476: step: 564/463, loss: 0.18396775424480438 2023-01-24 01:45:16.704943: step: 566/463, loss: 0.01487127784639597 2023-01-24 01:45:17.317765: step: 568/463, loss: 0.08771449327468872 2023-01-24 01:45:17.874130: step: 570/463, loss: 0.13871484994888306 2023-01-24 01:45:18.472624: step: 572/463, loss: 0.11097869277000427 2023-01-24 01:45:19.097440: step: 574/463, loss: 0.12313664704561234 2023-01-24 01:45:19.754656: step: 576/463, loss: 0.05219290032982826 2023-01-24 01:45:20.418319: step: 578/463, loss: 0.04261372610926628 2023-01-24 01:45:21.088026: step: 580/463, loss: 0.16862012445926666 2023-01-24 01:45:21.709857: step: 582/463, loss: 0.15329620242118835 2023-01-24 01:45:22.281636: step: 584/463, loss: 0.0879918709397316 2023-01-24 01:45:22.878655: step: 586/463, loss: 0.10960620641708374 2023-01-24 01:45:23.591768: step: 588/463, loss: 0.06510740518569946 2023-01-24 01:45:24.246641: step: 590/463, loss: 0.1167898029088974 2023-01-24 01:45:24.912888: step: 592/463, loss: 0.37295305728912354 2023-01-24 01:45:25.645521: step: 594/463, loss: 0.0940602496266365 2023-01-24 01:45:26.237750: step: 596/463, loss: 0.08408602327108383 2023-01-24 01:45:26.835806: step: 598/463, loss: 0.14934015274047852 2023-01-24 01:45:27.377699: step: 600/463, loss: 0.1785050332546234 2023-01-24 01:45:27.987868: step: 602/463, loss: 0.07750960439443588 2023-01-24 01:45:28.621237: step: 604/463, loss: 0.2220855951309204 2023-01-24 01:45:29.244902: step: 606/463, loss: 0.09620335698127747 2023-01-24 01:45:29.860339: step: 608/463, loss: 0.15664805471897125 2023-01-24 01:45:30.530427: step: 610/463, loss: 0.06983480602502823 2023-01-24 01:45:31.205079: step: 612/463, loss: 0.5398403406143188 2023-01-24 01:45:31.846485: step: 614/463, loss: 0.1134791448712349 2023-01-24 01:45:32.427170: step: 616/463, loss: 0.11525683104991913 2023-01-24 01:45:33.265502: step: 618/463, loss: 0.2604376971721649 2023-01-24 01:45:33.906314: step: 620/463, loss: 0.4766441583633423 2023-01-24 01:45:34.524633: step: 622/463, loss: 0.1398719698190689 2023-01-24 01:45:35.142922: step: 624/463, loss: 0.44540050625801086 2023-01-24 01:45:35.702480: step: 626/463, loss: 0.05978726968169212 2023-01-24 01:45:36.334133: step: 628/463, loss: 0.2011292725801468 2023-01-24 01:45:36.979429: step: 630/463, loss: 0.1322762370109558 2023-01-24 01:45:37.667455: step: 632/463, loss: 0.24957512319087982 2023-01-24 01:45:38.217298: step: 634/463, loss: 0.08489439636468887 2023-01-24 01:45:38.830178: step: 636/463, loss: 0.07555403560400009 2023-01-24 01:45:39.465343: step: 638/463, loss: 0.06060856208205223 2023-01-24 01:45:40.075684: step: 640/463, loss: 0.07927530258893967 2023-01-24 01:45:40.676502: step: 642/463, loss: 0.07236307859420776 2023-01-24 01:45:41.293329: step: 644/463, loss: 0.04239349067211151 2023-01-24 01:45:41.982444: step: 646/463, loss: 0.03760574385523796 2023-01-24 01:45:42.576554: step: 648/463, loss: 0.9239044189453125 2023-01-24 01:45:43.224184: step: 650/463, loss: 0.1705137938261032 2023-01-24 01:45:43.848748: step: 652/463, loss: 0.11683175712823868 2023-01-24 01:45:44.461800: step: 654/463, loss: 0.5038808584213257 2023-01-24 01:45:45.079203: step: 656/463, loss: 0.2034892737865448 2023-01-24 01:45:45.701047: step: 658/463, loss: 0.09336759150028229 2023-01-24 01:45:46.321066: step: 660/463, loss: 0.05636840686202049 2023-01-24 01:45:46.895630: step: 662/463, loss: 0.11377755552530289 2023-01-24 01:45:47.583018: step: 664/463, loss: 0.2625134587287903 2023-01-24 01:45:48.222340: step: 666/463, loss: 0.21744701266288757 2023-01-24 01:45:48.837651: step: 668/463, loss: 0.0924687534570694 2023-01-24 01:45:49.412983: step: 670/463, loss: 0.5260862112045288 2023-01-24 01:45:50.008438: step: 672/463, loss: 0.1251498907804489 2023-01-24 01:45:50.667642: step: 674/463, loss: 0.10914484411478043 2023-01-24 01:45:51.260726: step: 676/463, loss: 0.10781820118427277 2023-01-24 01:45:51.876180: step: 678/463, loss: 0.08858411759138107 2023-01-24 01:45:52.551283: step: 680/463, loss: 0.11982058733701706 2023-01-24 01:45:53.145743: step: 682/463, loss: 0.12719088792800903 2023-01-24 01:45:53.739854: step: 684/463, loss: 0.12275876104831696 2023-01-24 01:45:54.290006: step: 686/463, loss: 0.0874873623251915 2023-01-24 01:45:54.867242: step: 688/463, loss: 0.5289443135261536 2023-01-24 01:45:55.555156: step: 690/463, loss: 0.09343475848436356 2023-01-24 01:45:56.159501: step: 692/463, loss: 0.1273340880870819 2023-01-24 01:45:56.832733: step: 694/463, loss: 0.12136916071176529 2023-01-24 01:45:57.394237: step: 696/463, loss: 0.7252759337425232 2023-01-24 01:45:57.962469: step: 698/463, loss: 0.07977968454360962 2023-01-24 01:45:58.596824: step: 700/463, loss: 0.15106070041656494 2023-01-24 01:45:59.141957: step: 702/463, loss: 0.044744301587343216 2023-01-24 01:45:59.704970: step: 704/463, loss: 0.5736659169197083 2023-01-24 01:46:00.319858: step: 706/463, loss: 0.08373929560184479 2023-01-24 01:46:00.907638: step: 708/463, loss: 0.04708670452237129 2023-01-24 01:46:01.556772: step: 710/463, loss: 0.09988915175199509 2023-01-24 01:46:02.230020: step: 712/463, loss: 0.1506214290857315 2023-01-24 01:46:02.820902: step: 714/463, loss: 0.2638477087020874 2023-01-24 01:46:03.455422: step: 716/463, loss: 0.20712675154209137 2023-01-24 01:46:04.100361: step: 718/463, loss: 0.1186700165271759 2023-01-24 01:46:04.733641: step: 720/463, loss: 0.18628276884555817 2023-01-24 01:46:05.295274: step: 722/463, loss: 0.07569295912981033 2023-01-24 01:46:05.914990: step: 724/463, loss: 0.14078210294246674 2023-01-24 01:46:06.503363: step: 726/463, loss: 0.07991614192724228 2023-01-24 01:46:07.180488: step: 728/463, loss: 0.2108035385608673 2023-01-24 01:46:07.840928: step: 730/463, loss: 1.492363452911377 2023-01-24 01:46:08.500796: step: 732/463, loss: 0.09867862612009048 2023-01-24 01:46:09.135851: step: 734/463, loss: 0.04936336353421211 2023-01-24 01:46:09.762755: step: 736/463, loss: 0.5099826455116272 2023-01-24 01:46:10.503570: step: 738/463, loss: 0.07888239622116089 2023-01-24 01:46:11.096177: step: 740/463, loss: 0.16393595933914185 2023-01-24 01:46:11.819618: step: 742/463, loss: 0.14436742663383484 2023-01-24 01:46:12.492616: step: 744/463, loss: 0.07070403546094894 2023-01-24 01:46:13.095255: step: 746/463, loss: 0.6342538595199585 2023-01-24 01:46:13.807945: step: 748/463, loss: 0.1487412005662918 2023-01-24 01:46:14.460171: step: 750/463, loss: 0.14240650832653046 2023-01-24 01:46:15.065504: step: 752/463, loss: 0.2808184027671814 2023-01-24 01:46:15.669722: step: 754/463, loss: 0.07024453580379486 2023-01-24 01:46:16.303215: step: 756/463, loss: 1.5754814147949219 2023-01-24 01:46:16.909043: step: 758/463, loss: 0.42156901955604553 2023-01-24 01:46:17.528292: step: 760/463, loss: 0.05434051528573036 2023-01-24 01:46:18.160265: step: 762/463, loss: 0.06405092030763626 2023-01-24 01:46:18.809630: step: 764/463, loss: 0.13429778814315796 2023-01-24 01:46:19.356314: step: 766/463, loss: 0.15609587728977203 2023-01-24 01:46:20.002269: step: 768/463, loss: 0.26023316383361816 2023-01-24 01:46:20.575071: step: 770/463, loss: 0.17534756660461426 2023-01-24 01:46:21.153099: step: 772/463, loss: 0.03903448209166527 2023-01-24 01:46:21.783644: step: 774/463, loss: 0.21714311838150024 2023-01-24 01:46:22.384362: step: 776/463, loss: 0.13616353273391724 2023-01-24 01:46:22.970648: step: 778/463, loss: 0.1222415417432785 2023-01-24 01:46:23.594673: step: 780/463, loss: 0.09427404403686523 2023-01-24 01:46:24.254025: step: 782/463, loss: 0.14996539056301117 2023-01-24 01:46:24.897996: step: 784/463, loss: 0.24420617520809174 2023-01-24 01:46:25.516756: step: 786/463, loss: 0.5018675923347473 2023-01-24 01:46:26.197439: step: 788/463, loss: 0.2124158889055252 2023-01-24 01:46:26.812635: step: 790/463, loss: 0.3533600866794586 2023-01-24 01:46:27.473968: step: 792/463, loss: 0.22449904680252075 2023-01-24 01:46:28.082654: step: 794/463, loss: 0.13094858825206757 2023-01-24 01:46:28.662843: step: 796/463, loss: 0.6667919754981995 2023-01-24 01:46:29.262713: step: 798/463, loss: 0.03039870783686638 2023-01-24 01:46:29.880045: step: 800/463, loss: 0.08532169461250305 2023-01-24 01:46:30.545481: step: 802/463, loss: 0.0879097655415535 2023-01-24 01:46:31.177228: step: 804/463, loss: 0.039150360971689224 2023-01-24 01:46:31.776278: step: 806/463, loss: 0.12181497365236282 2023-01-24 01:46:32.347668: step: 808/463, loss: 0.409475177526474 2023-01-24 01:46:32.894793: step: 810/463, loss: 0.160431906580925 2023-01-24 01:46:33.561913: step: 812/463, loss: 0.12796978652477264 2023-01-24 01:46:34.162947: step: 814/463, loss: 0.06792762875556946 2023-01-24 01:46:34.704949: step: 816/463, loss: 0.09617536514997482 2023-01-24 01:46:35.341151: step: 818/463, loss: 0.08104352653026581 2023-01-24 01:46:35.961051: step: 820/463, loss: 0.13039171695709229 2023-01-24 01:46:36.620636: step: 822/463, loss: 0.8699960112571716 2023-01-24 01:46:37.199826: step: 824/463, loss: 0.07382344454526901 2023-01-24 01:46:37.764969: step: 826/463, loss: 0.34303969144821167 2023-01-24 01:46:38.395367: step: 828/463, loss: 0.11564187705516815 2023-01-24 01:46:38.996262: step: 830/463, loss: 0.0836898684501648 2023-01-24 01:46:39.613581: step: 832/463, loss: 0.1890992969274521 2023-01-24 01:46:40.228833: step: 834/463, loss: 0.06025129184126854 2023-01-24 01:46:40.864606: step: 836/463, loss: 0.20985692739486694 2023-01-24 01:46:41.479658: step: 838/463, loss: 0.08359216153621674 2023-01-24 01:46:42.183175: step: 840/463, loss: 0.051489606499671936 2023-01-24 01:46:42.745741: step: 842/463, loss: 0.11594334989786148 2023-01-24 01:46:43.312085: step: 844/463, loss: 0.1484472006559372 2023-01-24 01:46:43.932248: step: 846/463, loss: 0.06749340146780014 2023-01-24 01:46:44.549450: step: 848/463, loss: 0.1993170529603958 2023-01-24 01:46:45.145166: step: 850/463, loss: 0.2687409222126007 2023-01-24 01:46:45.837407: step: 852/463, loss: 0.09031051397323608 2023-01-24 01:46:46.525523: step: 854/463, loss: 0.11227161437273026 2023-01-24 01:46:47.157127: step: 856/463, loss: 0.048474475741386414 2023-01-24 01:46:47.902416: step: 858/463, loss: 0.08880440145730972 2023-01-24 01:46:48.581795: step: 860/463, loss: 3.4016387462615967 2023-01-24 01:46:49.175427: step: 862/463, loss: 0.205790713429451 2023-01-24 01:46:49.798072: step: 864/463, loss: 0.14389650523662567 2023-01-24 01:46:50.453183: step: 866/463, loss: 0.01701684482395649 2023-01-24 01:46:51.003282: step: 868/463, loss: 0.07375527173280716 2023-01-24 01:46:51.626590: step: 870/463, loss: 0.05160212516784668 2023-01-24 01:46:52.210528: step: 872/463, loss: 0.20682241022586823 2023-01-24 01:46:52.822047: step: 874/463, loss: 0.0913391262292862 2023-01-24 01:46:53.435876: step: 876/463, loss: 0.13339069485664368 2023-01-24 01:46:54.016270: step: 878/463, loss: 0.04677043855190277 2023-01-24 01:46:54.558457: step: 880/463, loss: 0.16638848185539246 2023-01-24 01:46:55.167747: step: 882/463, loss: 0.08223778009414673 2023-01-24 01:46:55.754866: step: 884/463, loss: 0.07895812392234802 2023-01-24 01:46:56.450242: step: 886/463, loss: 0.0934305414557457 2023-01-24 01:46:57.030837: step: 888/463, loss: 0.10494174808263779 2023-01-24 01:46:57.662673: step: 890/463, loss: 0.046117182821035385 2023-01-24 01:46:58.233774: step: 892/463, loss: 0.1175730749964714 2023-01-24 01:46:58.851355: step: 894/463, loss: 0.2696318030357361 2023-01-24 01:46:59.445044: step: 896/463, loss: 0.04674027860164642 2023-01-24 01:47:00.064432: step: 898/463, loss: 0.12179773300886154 2023-01-24 01:47:00.660450: step: 900/463, loss: 0.1435602456331253 2023-01-24 01:47:01.257171: step: 902/463, loss: 0.11121556162834167 2023-01-24 01:47:01.902265: step: 904/463, loss: 0.10599341988563538 2023-01-24 01:47:02.443304: step: 906/463, loss: 0.19182957708835602 2023-01-24 01:47:03.054362: step: 908/463, loss: 0.09481100738048553 2023-01-24 01:47:03.737444: step: 910/463, loss: 0.28158435225486755 2023-01-24 01:47:04.323200: step: 912/463, loss: 0.08506053686141968 2023-01-24 01:47:04.926892: step: 914/463, loss: 0.2001275271177292 2023-01-24 01:47:05.544077: step: 916/463, loss: 0.06941293925046921 2023-01-24 01:47:06.179398: step: 918/463, loss: 0.8327589631080627 2023-01-24 01:47:06.705271: step: 920/463, loss: 0.17840911448001862 2023-01-24 01:47:07.291517: step: 922/463, loss: 0.12436456233263016 2023-01-24 01:47:07.901541: step: 924/463, loss: 0.13938581943511963 2023-01-24 01:47:08.554344: step: 926/463, loss: 0.19173870980739594 ================================================== Loss: 0.234 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34837914937759334, 'r': 0.31863140417457303, 'f1': 0.3328419226957383}, 'combined': 0.24525194303896505, 'epoch': 14} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3461635956880312, 'r': 0.3677988204185331, 'f1': 0.3566534016179715}, 'combined': 0.2764490481440736, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3344460462993694, 'r': 0.33254217886313014, 'f1': 0.33349139535845784}, 'combined': 0.24573050184307418, 'epoch': 14} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3330692257349551, 'r': 0.3688864126935854, 'f1': 0.35006403577027556}, 'combined': 0.2713415014104528, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3922413793103448, 'r': 0.325, 'f1': 0.35546875000000006}, 'combined': 0.23697916666666669, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3229166666666667, 'r': 0.33695652173913043, 'f1': 0.3297872340425532}, 'combined': 0.1648936170212766, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34837914937759334, 'r': 0.31863140417457303, 'f1': 0.3328419226957383}, 'combined': 0.24525194303896505, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3461635956880312, 'r': 0.3677988204185331, 'f1': 0.3566534016179715}, 'combined': 0.2764490481440736, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3922413793103448, 'r': 0.325, 'f1': 0.35546875000000006}, 'combined': 0.23697916666666669, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3283040393999639, 'r': 0.31335281179920654, 'f1': 0.3206542365401589}, 'combined': 0.23627154271380127, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34970123997901775, 'r': 0.3199189915192662, 'f1': 0.3341478132367659}, 'combined': 0.2590045250926128, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38, 'r': 0.41304347826086957, 'f1': 0.39583333333333337}, 'combined': 0.19791666666666669, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:50:00.638266: step: 2/463, loss: 0.15447042882442474 2023-01-24 01:50:01.217604: step: 4/463, loss: 0.02770250476896763 2023-01-24 01:50:01.837703: step: 6/463, loss: 0.08984176069498062 2023-01-24 01:50:02.416665: step: 8/463, loss: 0.03866231441497803 2023-01-24 01:50:03.036358: step: 10/463, loss: 0.10324466228485107 2023-01-24 01:50:03.669335: step: 12/463, loss: 0.046043239533901215 2023-01-24 01:50:04.248451: step: 14/463, loss: 0.2281324863433838 2023-01-24 01:50:04.855283: step: 16/463, loss: 0.08534029871225357 2023-01-24 01:50:05.470851: step: 18/463, loss: 0.0729091614484787 2023-01-24 01:50:06.122133: step: 20/463, loss: 0.0972452163696289 2023-01-24 01:50:06.733687: step: 22/463, loss: 0.6486608386039734 2023-01-24 01:50:07.318207: step: 24/463, loss: 0.09822855144739151 2023-01-24 01:50:07.898404: step: 26/463, loss: 0.06654565781354904 2023-01-24 01:50:08.572220: step: 28/463, loss: 0.07373248040676117 2023-01-24 01:50:09.228671: step: 30/463, loss: 0.10150497406721115 2023-01-24 01:50:09.926713: step: 32/463, loss: 0.11966792494058609 2023-01-24 01:50:10.482718: step: 34/463, loss: 0.2044639140367508 2023-01-24 01:50:11.071985: step: 36/463, loss: 0.027446717023849487 2023-01-24 01:50:11.688953: step: 38/463, loss: 0.031536053866147995 2023-01-24 01:50:12.325271: step: 40/463, loss: 0.1815873235464096 2023-01-24 01:50:12.945318: step: 42/463, loss: 0.11311647295951843 2023-01-24 01:50:13.523702: step: 44/463, loss: 0.1141190305352211 2023-01-24 01:50:14.111585: step: 46/463, loss: 0.15072199702262878 2023-01-24 01:50:14.831021: step: 48/463, loss: 0.17301441729068756 2023-01-24 01:50:15.536278: step: 50/463, loss: 0.24043482542037964 2023-01-24 01:50:16.116347: step: 52/463, loss: 0.17363114655017853 2023-01-24 01:50:16.734082: step: 54/463, loss: 0.04408092424273491 2023-01-24 01:50:17.352730: step: 56/463, loss: 0.5342665910720825 2023-01-24 01:50:17.992402: step: 58/463, loss: 0.06617753207683563 2023-01-24 01:50:18.681376: step: 60/463, loss: 0.18174047768115997 2023-01-24 01:50:19.274981: step: 62/463, loss: 0.07808267325162888 2023-01-24 01:50:19.920660: step: 64/463, loss: 0.6816559433937073 2023-01-24 01:50:20.528341: step: 66/463, loss: 0.09485840797424316 2023-01-24 01:50:21.134954: step: 68/463, loss: 0.20093004405498505 2023-01-24 01:50:21.786269: step: 70/463, loss: 0.08908053487539291 2023-01-24 01:50:22.367448: step: 72/463, loss: 0.6995729804039001 2023-01-24 01:50:23.018158: step: 74/463, loss: 0.3616619408130646 2023-01-24 01:50:23.621728: step: 76/463, loss: 0.08824338763952255 2023-01-24 01:50:24.266377: step: 78/463, loss: 0.11003921926021576 2023-01-24 01:50:24.907325: step: 80/463, loss: 0.06630346924066544 2023-01-24 01:50:25.567632: step: 82/463, loss: 0.06528856605291367 2023-01-24 01:50:26.157299: step: 84/463, loss: 0.09057553112506866 2023-01-24 01:50:26.748499: step: 86/463, loss: 0.5225180983543396 2023-01-24 01:50:27.448050: step: 88/463, loss: 0.19920146465301514 2023-01-24 01:50:28.034299: step: 90/463, loss: 0.06880192458629608 2023-01-24 01:50:28.675020: step: 92/463, loss: 0.028462907299399376 2023-01-24 01:50:29.318430: step: 94/463, loss: 0.0857849195599556 2023-01-24 01:50:29.956111: step: 96/463, loss: 0.13245193660259247 2023-01-24 01:50:30.563009: step: 98/463, loss: 0.1076107993721962 2023-01-24 01:50:31.156279: step: 100/463, loss: 0.20794677734375 2023-01-24 01:50:31.797396: step: 102/463, loss: 0.0402422659099102 2023-01-24 01:50:32.344093: step: 104/463, loss: 0.055854782462120056 2023-01-24 01:50:32.980880: step: 106/463, loss: 0.05146775022149086 2023-01-24 01:50:33.558267: step: 108/463, loss: 0.09846664220094681 2023-01-24 01:50:34.189495: step: 110/463, loss: 0.06401441246271133 2023-01-24 01:50:34.804595: step: 112/463, loss: 0.0930890366435051 2023-01-24 01:50:35.397735: step: 114/463, loss: 0.4546594023704529 2023-01-24 01:50:35.965983: step: 116/463, loss: 0.12890735268592834 2023-01-24 01:50:36.544234: step: 118/463, loss: 0.16613930463790894 2023-01-24 01:50:37.099297: step: 120/463, loss: 0.040514469146728516 2023-01-24 01:50:37.771500: step: 122/463, loss: 0.048164013773202896 2023-01-24 01:50:38.388125: step: 124/463, loss: 0.040693316608667374 2023-01-24 01:50:39.022457: step: 126/463, loss: 0.06072457507252693 2023-01-24 01:50:39.662045: step: 128/463, loss: 0.023524239659309387 2023-01-24 01:50:40.247761: step: 130/463, loss: 0.07855761796236038 2023-01-24 01:50:40.878901: step: 132/463, loss: 0.12775106728076935 2023-01-24 01:50:41.436389: step: 134/463, loss: 0.041023239493370056 2023-01-24 01:50:42.042829: step: 136/463, loss: 0.5008127093315125 2023-01-24 01:50:42.758607: step: 138/463, loss: 0.07672007381916046 2023-01-24 01:50:43.388645: step: 140/463, loss: 0.10578087717294693 2023-01-24 01:50:44.030542: step: 142/463, loss: 0.028427373617887497 2023-01-24 01:50:44.627884: step: 144/463, loss: 0.06272473186254501 2023-01-24 01:50:45.213974: step: 146/463, loss: 0.1564113348722458 2023-01-24 01:50:45.828453: step: 148/463, loss: 0.07387494295835495 2023-01-24 01:50:46.442951: step: 150/463, loss: 0.056282248347997665 2023-01-24 01:50:47.045213: step: 152/463, loss: 0.06835278123617172 2023-01-24 01:50:47.656488: step: 154/463, loss: 0.1928885132074356 2023-01-24 01:50:48.243107: step: 156/463, loss: 0.07516981661319733 2023-01-24 01:50:48.838990: step: 158/463, loss: 0.7094244360923767 2023-01-24 01:50:49.450544: step: 160/463, loss: 0.038302235305309296 2023-01-24 01:50:50.029312: step: 162/463, loss: 0.05817342922091484 2023-01-24 01:50:50.645125: step: 164/463, loss: 0.01996573433279991 2023-01-24 01:50:51.284121: step: 166/463, loss: 0.47693943977355957 2023-01-24 01:50:51.912864: step: 168/463, loss: 0.12544715404510498 2023-01-24 01:50:52.499134: step: 170/463, loss: 0.12725156545639038 2023-01-24 01:50:53.104704: step: 172/463, loss: 0.06777847558259964 2023-01-24 01:50:53.687287: step: 174/463, loss: 0.09384545683860779 2023-01-24 01:50:54.277748: step: 176/463, loss: 0.08972682803869247 2023-01-24 01:50:54.861694: step: 178/463, loss: 0.5601344704627991 2023-01-24 01:50:55.449704: step: 180/463, loss: 0.11076360940933228 2023-01-24 01:50:56.043269: step: 182/463, loss: 0.14465606212615967 2023-01-24 01:50:56.616970: step: 184/463, loss: 0.03793802484869957 2023-01-24 01:50:57.219814: step: 186/463, loss: 0.15447227656841278 2023-01-24 01:50:57.845995: step: 188/463, loss: 0.05333952605724335 2023-01-24 01:50:58.458701: step: 190/463, loss: 0.16936078667640686 2023-01-24 01:50:59.073840: step: 192/463, loss: 0.12389491498470306 2023-01-24 01:50:59.688750: step: 194/463, loss: 0.12573525309562683 2023-01-24 01:51:00.251453: step: 196/463, loss: 0.32096216082572937 2023-01-24 01:51:00.869241: step: 198/463, loss: 0.09927521646022797 2023-01-24 01:51:01.484883: step: 200/463, loss: 0.10896749049425125 2023-01-24 01:51:02.068471: step: 202/463, loss: 0.1163899302482605 2023-01-24 01:51:02.709457: step: 204/463, loss: 0.10979020595550537 2023-01-24 01:51:03.297905: step: 206/463, loss: 0.05496586486697197 2023-01-24 01:51:03.892500: step: 208/463, loss: 0.10316412150859833 2023-01-24 01:51:04.449195: step: 210/463, loss: 0.35646650195121765 2023-01-24 01:51:05.010067: step: 212/463, loss: 0.2738085985183716 2023-01-24 01:51:05.638713: step: 214/463, loss: 5.22773551940918 2023-01-24 01:51:06.230405: step: 216/463, loss: 0.09675916284322739 2023-01-24 01:51:06.839608: step: 218/463, loss: 0.1906154900789261 2023-01-24 01:51:07.449558: step: 220/463, loss: 0.07194644957780838 2023-01-24 01:51:08.085949: step: 222/463, loss: 0.10693216323852539 2023-01-24 01:51:08.601236: step: 224/463, loss: 0.11566611379384995 2023-01-24 01:51:09.133760: step: 226/463, loss: 0.22901403903961182 2023-01-24 01:51:09.717885: step: 228/463, loss: 0.21474911272525787 2023-01-24 01:51:10.281244: step: 230/463, loss: 0.08919728547334671 2023-01-24 01:51:10.941496: step: 232/463, loss: 0.0692751333117485 2023-01-24 01:51:11.526861: step: 234/463, loss: 0.07684727758169174 2023-01-24 01:51:12.104030: step: 236/463, loss: 0.3953125476837158 2023-01-24 01:51:12.690561: step: 238/463, loss: 0.07253973931074142 2023-01-24 01:51:13.310280: step: 240/463, loss: 0.17964813113212585 2023-01-24 01:51:13.901516: step: 242/463, loss: 0.06846963614225388 2023-01-24 01:51:14.476583: step: 244/463, loss: 0.08263533562421799 2023-01-24 01:51:15.140435: step: 246/463, loss: 0.12028109282255173 2023-01-24 01:51:15.747710: step: 248/463, loss: 0.09234782308340073 2023-01-24 01:51:16.383899: step: 250/463, loss: 0.12176024168729782 2023-01-24 01:51:16.949821: step: 252/463, loss: 0.06890027970075607 2023-01-24 01:51:17.567407: step: 254/463, loss: 0.3822815418243408 2023-01-24 01:51:18.114516: step: 256/463, loss: 0.029156535863876343 2023-01-24 01:51:18.777916: step: 258/463, loss: 0.05815451592206955 2023-01-24 01:51:19.344400: step: 260/463, loss: 0.08179517090320587 2023-01-24 01:51:19.961560: step: 262/463, loss: 0.37647223472595215 2023-01-24 01:51:20.544808: step: 264/463, loss: 0.2492261528968811 2023-01-24 01:51:21.268150: step: 266/463, loss: 0.20476193726062775 2023-01-24 01:51:21.878673: step: 268/463, loss: 0.039153389632701874 2023-01-24 01:51:22.518294: step: 270/463, loss: 0.054536063224077225 2023-01-24 01:51:23.107896: step: 272/463, loss: 0.2026681751012802 2023-01-24 01:51:23.757806: step: 274/463, loss: 0.2152506560087204 2023-01-24 01:51:24.379341: step: 276/463, loss: 0.24596165120601654 2023-01-24 01:51:25.042818: step: 278/463, loss: 0.36492782831192017 2023-01-24 01:51:25.659225: step: 280/463, loss: 0.06065182387828827 2023-01-24 01:51:26.239625: step: 282/463, loss: 0.5141716599464417 2023-01-24 01:51:26.856775: step: 284/463, loss: 0.07557240128517151 2023-01-24 01:51:27.472186: step: 286/463, loss: 0.15395209193229675 2023-01-24 01:51:28.041096: step: 288/463, loss: 0.061565786600112915 2023-01-24 01:51:28.657682: step: 290/463, loss: 0.13776887953281403 2023-01-24 01:51:29.252686: step: 292/463, loss: 0.035926662385463715 2023-01-24 01:51:29.826011: step: 294/463, loss: 0.07954820990562439 2023-01-24 01:51:30.491776: step: 296/463, loss: 0.0456632599234581 2023-01-24 01:51:31.114822: step: 298/463, loss: 0.088347427546978 2023-01-24 01:51:31.741786: step: 300/463, loss: 0.11988343298435211 2023-01-24 01:51:32.370927: step: 302/463, loss: 0.14200922846794128 2023-01-24 01:51:32.968744: step: 304/463, loss: 0.07645564526319504 2023-01-24 01:51:33.577934: step: 306/463, loss: 0.11405491083860397 2023-01-24 01:51:34.144244: step: 308/463, loss: 0.14642997086048126 2023-01-24 01:51:34.742099: step: 310/463, loss: 0.15205056965351105 2023-01-24 01:51:35.323799: step: 312/463, loss: 0.03897145390510559 2023-01-24 01:51:35.932393: step: 314/463, loss: 0.20041872560977936 2023-01-24 01:51:36.580883: step: 316/463, loss: 0.03764211758971214 2023-01-24 01:51:37.228007: step: 318/463, loss: 0.2205481082201004 2023-01-24 01:51:37.869173: step: 320/463, loss: 0.028743749484419823 2023-01-24 01:51:38.476489: step: 322/463, loss: 0.04010086506605148 2023-01-24 01:51:39.090321: step: 324/463, loss: 0.08388634026050568 2023-01-24 01:51:39.786249: step: 326/463, loss: 0.04325639829039574 2023-01-24 01:51:40.460880: step: 328/463, loss: 0.3005983829498291 2023-01-24 01:51:41.021013: step: 330/463, loss: 0.07320655882358551 2023-01-24 01:51:41.588527: step: 332/463, loss: 0.04171394929289818 2023-01-24 01:51:42.232636: step: 334/463, loss: 0.21179060637950897 2023-01-24 01:51:42.867161: step: 336/463, loss: 0.031535230576992035 2023-01-24 01:51:43.464435: step: 338/463, loss: 0.059583790600299835 2023-01-24 01:51:44.032903: step: 340/463, loss: 0.23425628244876862 2023-01-24 01:51:44.629702: step: 342/463, loss: 0.4230680763721466 2023-01-24 01:51:45.177664: step: 344/463, loss: 0.026921847835183144 2023-01-24 01:51:45.793503: step: 346/463, loss: 0.03770776465535164 2023-01-24 01:51:46.406514: step: 348/463, loss: 0.044509172439575195 2023-01-24 01:51:46.991929: step: 350/463, loss: 0.25164082646369934 2023-01-24 01:51:47.552361: step: 352/463, loss: 3.1193528175354004 2023-01-24 01:51:48.125745: step: 354/463, loss: 0.008484726771712303 2023-01-24 01:51:48.711656: step: 356/463, loss: 0.14153073728084564 2023-01-24 01:51:49.388923: step: 358/463, loss: 0.09729599207639694 2023-01-24 01:51:50.016470: step: 360/463, loss: 0.01014737132936716 2023-01-24 01:51:50.652591: step: 362/463, loss: 0.6379090547561646 2023-01-24 01:51:51.262885: step: 364/463, loss: 0.06369558721780777 2023-01-24 01:51:51.868871: step: 366/463, loss: 0.17121385037899017 2023-01-24 01:51:52.415231: step: 368/463, loss: 0.09536228328943253 2023-01-24 01:51:53.007626: step: 370/463, loss: 0.0385630838572979 2023-01-24 01:51:53.590069: step: 372/463, loss: 0.02957984246313572 2023-01-24 01:51:54.177773: step: 374/463, loss: 0.08003193140029907 2023-01-24 01:51:54.849170: step: 376/463, loss: 0.09847112745046616 2023-01-24 01:51:55.519655: step: 378/463, loss: 0.33379536867141724 2023-01-24 01:51:56.153306: step: 380/463, loss: 0.08243055641651154 2023-01-24 01:51:56.774846: step: 382/463, loss: 0.08104237914085388 2023-01-24 01:51:57.417180: step: 384/463, loss: 0.061112821102142334 2023-01-24 01:51:57.966882: step: 386/463, loss: 0.13808482885360718 2023-01-24 01:51:58.617441: step: 388/463, loss: 0.07363472878932953 2023-01-24 01:51:59.223615: step: 390/463, loss: 0.04818311333656311 2023-01-24 01:51:59.785251: step: 392/463, loss: 0.10252054035663605 2023-01-24 01:52:00.359089: step: 394/463, loss: 0.10292989015579224 2023-01-24 01:52:01.032933: step: 396/463, loss: 0.02241295576095581 2023-01-24 01:52:01.611703: step: 398/463, loss: 0.018636174499988556 2023-01-24 01:52:02.234613: step: 400/463, loss: 0.06365367025136948 2023-01-24 01:52:02.818017: step: 402/463, loss: 0.08667969703674316 2023-01-24 01:52:03.481869: step: 404/463, loss: 0.1124776229262352 2023-01-24 01:52:04.035105: step: 406/463, loss: 1.2408829927444458 2023-01-24 01:52:04.639440: step: 408/463, loss: 0.0829954668879509 2023-01-24 01:52:05.280384: step: 410/463, loss: 0.1821594089269638 2023-01-24 01:52:05.903688: step: 412/463, loss: 0.17024026811122894 2023-01-24 01:52:06.503417: step: 414/463, loss: 0.13009324669837952 2023-01-24 01:52:07.183530: step: 416/463, loss: 0.08420077711343765 2023-01-24 01:52:07.785745: step: 418/463, loss: 0.2093186378479004 2023-01-24 01:52:08.342646: step: 420/463, loss: 0.12217625975608826 2023-01-24 01:52:09.000512: step: 422/463, loss: 0.08837151527404785 2023-01-24 01:52:09.580832: step: 424/463, loss: 0.14958344399929047 2023-01-24 01:52:10.228340: step: 426/463, loss: 0.04692188277840614 2023-01-24 01:52:10.868606: step: 428/463, loss: 0.2951260209083557 2023-01-24 01:52:11.490259: step: 430/463, loss: 0.19845955073833466 2023-01-24 01:52:12.124191: step: 432/463, loss: 0.03155672177672386 2023-01-24 01:52:12.726244: step: 434/463, loss: 0.7305673360824585 2023-01-24 01:52:13.308163: step: 436/463, loss: 0.11973927915096283 2023-01-24 01:52:14.003111: step: 438/463, loss: 0.19857928156852722 2023-01-24 01:52:14.641803: step: 440/463, loss: 0.30530673265457153 2023-01-24 01:52:15.307791: step: 442/463, loss: 0.106770820915699 2023-01-24 01:52:15.854990: step: 444/463, loss: 0.06208482384681702 2023-01-24 01:52:16.414801: step: 446/463, loss: 0.6903566122055054 2023-01-24 01:52:17.081680: step: 448/463, loss: 0.06192076578736305 2023-01-24 01:52:17.716946: step: 450/463, loss: 0.061335619539022446 2023-01-24 01:52:18.336962: step: 452/463, loss: 0.036922723054885864 2023-01-24 01:52:18.983922: step: 454/463, loss: 0.20546886324882507 2023-01-24 01:52:19.618681: step: 456/463, loss: 0.14055319130420685 2023-01-24 01:52:20.258704: step: 458/463, loss: 0.072812020778656 2023-01-24 01:52:20.913759: step: 460/463, loss: 0.15294283628463745 2023-01-24 01:52:21.529090: step: 462/463, loss: 0.029308443889021873 2023-01-24 01:52:22.195019: step: 464/463, loss: 0.2965143322944641 2023-01-24 01:52:22.786300: step: 466/463, loss: 0.039474621415138245 2023-01-24 01:52:23.466700: step: 468/463, loss: 0.16768600046634674 2023-01-24 01:52:24.088360: step: 470/463, loss: 0.13155882060527802 2023-01-24 01:52:24.761401: step: 472/463, loss: 0.0809381976723671 2023-01-24 01:52:25.356076: step: 474/463, loss: 0.5181536674499512 2023-01-24 01:52:25.963230: step: 476/463, loss: 0.14347179234027863 2023-01-24 01:52:26.588457: step: 478/463, loss: 0.10302814096212387 2023-01-24 01:52:27.197692: step: 480/463, loss: 2.829655647277832 2023-01-24 01:52:27.814374: step: 482/463, loss: 0.06911370158195496 2023-01-24 01:52:28.399976: step: 484/463, loss: 0.26369795203208923 2023-01-24 01:52:29.031745: step: 486/463, loss: 0.11180444806814194 2023-01-24 01:52:29.679220: step: 488/463, loss: 0.10409456491470337 2023-01-24 01:52:30.329163: step: 490/463, loss: 0.12288526445627213 2023-01-24 01:52:30.992430: step: 492/463, loss: 0.04631849005818367 2023-01-24 01:52:31.623549: step: 494/463, loss: 0.5771387219429016 2023-01-24 01:52:32.237952: step: 496/463, loss: 0.27514976263046265 2023-01-24 01:52:32.879496: step: 498/463, loss: 0.16434890031814575 2023-01-24 01:52:33.535498: step: 500/463, loss: 0.43808069825172424 2023-01-24 01:52:34.113354: step: 502/463, loss: 0.4952459931373596 2023-01-24 01:52:34.699765: step: 504/463, loss: 0.19927358627319336 2023-01-24 01:52:35.438766: step: 506/463, loss: 0.19992642104625702 2023-01-24 01:52:36.062595: step: 508/463, loss: 0.031801093369722366 2023-01-24 01:52:36.649634: step: 510/463, loss: 0.12465578317642212 2023-01-24 01:52:37.347883: step: 512/463, loss: 0.07475098967552185 2023-01-24 01:52:37.990737: step: 514/463, loss: 0.10618216544389725 2023-01-24 01:52:38.599500: step: 516/463, loss: 0.019582342356443405 2023-01-24 01:52:39.146709: step: 518/463, loss: 0.10032596439123154 2023-01-24 01:52:39.762117: step: 520/463, loss: 0.11950547993183136 2023-01-24 01:52:40.358928: step: 522/463, loss: 0.043447431176900864 2023-01-24 01:52:41.013171: step: 524/463, loss: 0.10636359453201294 2023-01-24 01:52:41.631263: step: 526/463, loss: 0.031716953963041306 2023-01-24 01:52:42.262295: step: 528/463, loss: 0.11483673006296158 2023-01-24 01:52:42.865134: step: 530/463, loss: 0.04162720963358879 2023-01-24 01:52:43.520910: step: 532/463, loss: 0.01935102976858616 2023-01-24 01:52:44.096421: step: 534/463, loss: 0.7199362516403198 2023-01-24 01:52:44.741646: step: 536/463, loss: 0.07453575730323792 2023-01-24 01:52:45.323998: step: 538/463, loss: 0.11602141708135605 2023-01-24 01:52:45.902655: step: 540/463, loss: 0.14062979817390442 2023-01-24 01:52:46.476668: step: 542/463, loss: 0.09119509905576706 2023-01-24 01:52:47.143822: step: 544/463, loss: 0.5877248644828796 2023-01-24 01:52:47.839292: step: 546/463, loss: 0.07616828382015228 2023-01-24 01:52:48.503151: step: 548/463, loss: 0.10570330172777176 2023-01-24 01:52:49.248576: step: 550/463, loss: 0.06280627101659775 2023-01-24 01:52:49.869743: step: 552/463, loss: 5.030261993408203 2023-01-24 01:52:50.516627: step: 554/463, loss: 0.03418249636888504 2023-01-24 01:52:51.127268: step: 556/463, loss: 0.34527361392974854 2023-01-24 01:52:51.780584: step: 558/463, loss: 0.19497722387313843 2023-01-24 01:52:52.437780: step: 560/463, loss: 0.05754159763455391 2023-01-24 01:52:53.107341: step: 562/463, loss: 0.07024494558572769 2023-01-24 01:52:53.746104: step: 564/463, loss: 0.19119054079055786 2023-01-24 01:52:54.347043: step: 566/463, loss: 0.07003539800643921 2023-01-24 01:52:55.017925: step: 568/463, loss: 0.07729244977235794 2023-01-24 01:52:55.708340: step: 570/463, loss: 0.047074176371097565 2023-01-24 01:52:56.309449: step: 572/463, loss: 0.06070127338171005 2023-01-24 01:52:56.902430: step: 574/463, loss: 0.04916515201330185 2023-01-24 01:52:57.493959: step: 576/463, loss: 0.1154446229338646 2023-01-24 01:52:58.152587: step: 578/463, loss: 0.33982861042022705 2023-01-24 01:52:58.763178: step: 580/463, loss: 0.3637949526309967 2023-01-24 01:52:59.358517: step: 582/463, loss: 0.09657775610685349 2023-01-24 01:52:59.984167: step: 584/463, loss: 0.11097947508096695 2023-01-24 01:53:00.570409: step: 586/463, loss: 0.1281764805316925 2023-01-24 01:53:01.227010: step: 588/463, loss: 0.40551435947418213 2023-01-24 01:53:01.866873: step: 590/463, loss: 0.03508146479725838 2023-01-24 01:53:02.562602: step: 592/463, loss: 0.14648066461086273 2023-01-24 01:53:03.221225: step: 594/463, loss: 0.06095085293054581 2023-01-24 01:53:03.795152: step: 596/463, loss: 0.08103873580694199 2023-01-24 01:53:04.433439: step: 598/463, loss: 0.2542259693145752 2023-01-24 01:53:05.049103: step: 600/463, loss: 0.04838281124830246 2023-01-24 01:53:05.623295: step: 602/463, loss: 0.11283738166093826 2023-01-24 01:53:06.241232: step: 604/463, loss: 0.07624384015798569 2023-01-24 01:53:06.854824: step: 606/463, loss: 0.12532417476177216 2023-01-24 01:53:07.467371: step: 608/463, loss: 0.9456416368484497 2023-01-24 01:53:08.140443: step: 610/463, loss: 0.08015812933444977 2023-01-24 01:53:08.739892: step: 612/463, loss: 0.06637389212846756 2023-01-24 01:53:09.418607: step: 614/463, loss: 0.5557191371917725 2023-01-24 01:53:10.041578: step: 616/463, loss: 0.2358798235654831 2023-01-24 01:53:10.656333: step: 618/463, loss: 0.0686626061797142 2023-01-24 01:53:11.271295: step: 620/463, loss: 0.05433014780282974 2023-01-24 01:53:11.962645: step: 622/463, loss: 0.10262155532836914 2023-01-24 01:53:12.635603: step: 624/463, loss: 0.17306746542453766 2023-01-24 01:53:13.259008: step: 626/463, loss: 0.14202268421649933 2023-01-24 01:53:13.852171: step: 628/463, loss: 0.02132358029484749 2023-01-24 01:53:14.453246: step: 630/463, loss: 0.11165513098239899 2023-01-24 01:53:15.030418: step: 632/463, loss: 0.05399324744939804 2023-01-24 01:53:15.651704: step: 634/463, loss: 0.13710065186023712 2023-01-24 01:53:16.264521: step: 636/463, loss: 0.3590250611305237 2023-01-24 01:53:16.809336: step: 638/463, loss: 0.016618745401501656 2023-01-24 01:53:17.467408: step: 640/463, loss: 0.25671032071113586 2023-01-24 01:53:18.089567: step: 642/463, loss: 0.12181492894887924 2023-01-24 01:53:18.695131: step: 644/463, loss: 0.028102317824959755 2023-01-24 01:53:19.273453: step: 646/463, loss: 0.22035276889801025 2023-01-24 01:53:19.946269: step: 648/463, loss: 0.07939334958791733 2023-01-24 01:53:20.653086: step: 650/463, loss: 0.1954226940870285 2023-01-24 01:53:21.285375: step: 652/463, loss: 0.08822217583656311 2023-01-24 01:53:21.922311: step: 654/463, loss: 0.0699228048324585 2023-01-24 01:53:22.522283: step: 656/463, loss: 0.1583009511232376 2023-01-24 01:53:23.265404: step: 658/463, loss: 0.2637269198894501 2023-01-24 01:53:23.934236: step: 660/463, loss: 0.1944698989391327 2023-01-24 01:53:24.548634: step: 662/463, loss: 0.161536306142807 2023-01-24 01:53:25.192832: step: 664/463, loss: 0.09331867098808289 2023-01-24 01:53:25.805447: step: 666/463, loss: 0.014847720973193645 2023-01-24 01:53:26.461519: step: 668/463, loss: 0.12522071599960327 2023-01-24 01:53:27.226666: step: 670/463, loss: 0.06093982979655266 2023-01-24 01:53:27.871365: step: 672/463, loss: 0.06558510661125183 2023-01-24 01:53:28.507987: step: 674/463, loss: 0.05113206058740616 2023-01-24 01:53:29.219482: step: 676/463, loss: 0.20429395139217377 2023-01-24 01:53:29.964185: step: 678/463, loss: 0.9285648465156555 2023-01-24 01:53:30.677203: step: 680/463, loss: 0.03893982246518135 2023-01-24 01:53:31.370164: step: 682/463, loss: 0.07223246246576309 2023-01-24 01:53:32.087846: step: 684/463, loss: 0.0838775485754013 2023-01-24 01:53:32.757252: step: 686/463, loss: 0.05167365074157715 2023-01-24 01:53:33.312751: step: 688/463, loss: 0.11468897759914398 2023-01-24 01:53:34.014556: step: 690/463, loss: 0.0827876478433609 2023-01-24 01:53:34.725718: step: 692/463, loss: 0.04692939668893814 2023-01-24 01:53:35.292468: step: 694/463, loss: 0.09748489409685135 2023-01-24 01:53:36.087102: step: 696/463, loss: 0.1157601922750473 2023-01-24 01:53:36.754449: step: 698/463, loss: 0.058855295181274414 2023-01-24 01:53:37.392542: step: 700/463, loss: 0.041207291185855865 2023-01-24 01:53:38.008793: step: 702/463, loss: 0.43969887495040894 2023-01-24 01:53:38.669833: step: 704/463, loss: 0.0555635541677475 2023-01-24 01:53:39.319490: step: 706/463, loss: 0.18539059162139893 2023-01-24 01:53:39.929180: step: 708/463, loss: 0.058708589524030685 2023-01-24 01:53:40.630280: step: 710/463, loss: 0.09798247367143631 2023-01-24 01:53:41.284045: step: 712/463, loss: 0.11035928875207901 2023-01-24 01:53:41.911372: step: 714/463, loss: 0.09508532285690308 2023-01-24 01:53:42.501011: step: 716/463, loss: 0.14314141869544983 2023-01-24 01:53:43.148305: step: 718/463, loss: 0.09973669797182083 2023-01-24 01:53:43.791267: step: 720/463, loss: 0.29856204986572266 2023-01-24 01:53:44.387069: step: 722/463, loss: 0.4054211676120758 2023-01-24 01:53:45.060245: step: 724/463, loss: 0.0859195664525032 2023-01-24 01:53:45.684383: step: 726/463, loss: 0.09558829665184021 2023-01-24 01:53:46.332163: step: 728/463, loss: 0.20391599833965302 2023-01-24 01:53:46.898202: step: 730/463, loss: 0.0953231006860733 2023-01-24 01:53:47.562667: step: 732/463, loss: 0.07102783024311066 2023-01-24 01:53:48.203660: step: 734/463, loss: 0.0794430747628212 2023-01-24 01:53:48.812076: step: 736/463, loss: 0.07178302854299545 2023-01-24 01:53:49.407254: step: 738/463, loss: 0.07235711812973022 2023-01-24 01:53:49.986701: step: 740/463, loss: 0.12912996113300323 2023-01-24 01:53:50.655029: step: 742/463, loss: 0.06768649816513062 2023-01-24 01:53:51.250456: step: 744/463, loss: 0.029639367014169693 2023-01-24 01:53:51.860407: step: 746/463, loss: 0.1089198887348175 2023-01-24 01:53:52.498401: step: 748/463, loss: 0.2974899709224701 2023-01-24 01:53:53.161555: step: 750/463, loss: 0.111813023686409 2023-01-24 01:53:53.816689: step: 752/463, loss: 0.2780773937702179 2023-01-24 01:53:54.409766: step: 754/463, loss: 0.0642269179224968 2023-01-24 01:53:55.011835: step: 756/463, loss: 0.08696962147951126 2023-01-24 01:53:55.642292: step: 758/463, loss: 0.05171702057123184 2023-01-24 01:53:56.254296: step: 760/463, loss: 0.15012021362781525 2023-01-24 01:53:56.879727: step: 762/463, loss: 0.022429676726460457 2023-01-24 01:53:57.522517: step: 764/463, loss: 0.051252152770757675 2023-01-24 01:53:58.062353: step: 766/463, loss: 0.14745859801769257 2023-01-24 01:53:58.690128: step: 768/463, loss: 0.1641344130039215 2023-01-24 01:53:59.290329: step: 770/463, loss: 0.040366142988204956 2023-01-24 01:53:59.909587: step: 772/463, loss: 0.11375197023153305 2023-01-24 01:54:00.529406: step: 774/463, loss: 0.19613416492938995 2023-01-24 01:54:01.100069: step: 776/463, loss: 0.1344902366399765 2023-01-24 01:54:01.700747: step: 778/463, loss: 0.32456353306770325 2023-01-24 01:54:02.364206: step: 780/463, loss: 0.04453514516353607 2023-01-24 01:54:02.983475: step: 782/463, loss: 0.234833762049675 2023-01-24 01:54:03.585290: step: 784/463, loss: 0.03816795349121094 2023-01-24 01:54:04.297225: step: 786/463, loss: 0.16284196078777313 2023-01-24 01:54:04.904033: step: 788/463, loss: 0.07719583809375763 2023-01-24 01:54:05.546771: step: 790/463, loss: 0.14916366338729858 2023-01-24 01:54:06.180374: step: 792/463, loss: 0.2743769884109497 2023-01-24 01:54:06.781714: step: 794/463, loss: 0.2819214165210724 2023-01-24 01:54:07.416091: step: 796/463, loss: 0.13958901166915894 2023-01-24 01:54:08.100377: step: 798/463, loss: 0.024825511500239372 2023-01-24 01:54:08.746024: step: 800/463, loss: 0.11520198732614517 2023-01-24 01:54:09.358986: step: 802/463, loss: 0.5912827849388123 2023-01-24 01:54:09.999635: step: 804/463, loss: 0.11832356452941895 2023-01-24 01:54:11.242351: step: 806/463, loss: 1.5584622621536255 2023-01-24 01:54:11.876593: step: 808/463, loss: 0.11311966180801392 2023-01-24 01:54:12.520289: step: 810/463, loss: 0.1519646793603897 2023-01-24 01:54:13.195515: step: 812/463, loss: 0.1936693787574768 2023-01-24 01:54:13.873555: step: 814/463, loss: 0.0492548905313015 2023-01-24 01:54:14.556247: step: 816/463, loss: 0.45868703722953796 2023-01-24 01:54:15.247515: step: 818/463, loss: 0.08923227339982986 2023-01-24 01:54:15.887386: step: 820/463, loss: 0.08991425484418869 2023-01-24 01:54:16.496529: step: 822/463, loss: 0.06471320986747742 2023-01-24 01:54:17.027503: step: 824/463, loss: 0.052482862025499344 2023-01-24 01:54:17.626813: step: 826/463, loss: 0.19096697866916656 2023-01-24 01:54:18.185411: step: 828/463, loss: 0.15873336791992188 2023-01-24 01:54:18.803044: step: 830/463, loss: 0.16968882083892822 2023-01-24 01:54:19.409972: step: 832/463, loss: 0.11979441344738007 2023-01-24 01:54:20.037570: step: 834/463, loss: 0.316781610250473 2023-01-24 01:54:20.654616: step: 836/463, loss: 0.11227911710739136 2023-01-24 01:54:21.278503: step: 838/463, loss: 0.07925921678543091 2023-01-24 01:54:21.932966: step: 840/463, loss: 0.1379840224981308 2023-01-24 01:54:22.519743: step: 842/463, loss: 0.07753778249025345 2023-01-24 01:54:23.119092: step: 844/463, loss: 0.5877388715744019 2023-01-24 01:54:23.755963: step: 846/463, loss: 0.21845382452011108 2023-01-24 01:54:24.388067: step: 848/463, loss: 0.10501208901405334 2023-01-24 01:54:25.040507: step: 850/463, loss: 0.05404500290751457 2023-01-24 01:54:25.660128: step: 852/463, loss: 0.20136189460754395 2023-01-24 01:54:26.337937: step: 854/463, loss: 0.09578698128461838 2023-01-24 01:54:26.946098: step: 856/463, loss: 0.2713068723678589 2023-01-24 01:54:27.544822: step: 858/463, loss: 0.16097477078437805 2023-01-24 01:54:28.158888: step: 860/463, loss: 0.21929654479026794 2023-01-24 01:54:28.729511: step: 862/463, loss: 0.10486377775669098 2023-01-24 01:54:29.336146: step: 864/463, loss: 0.11905966699123383 2023-01-24 01:54:29.920847: step: 866/463, loss: 0.04707135632634163 2023-01-24 01:54:30.563675: step: 868/463, loss: 0.15588407218456268 2023-01-24 01:54:31.243597: step: 870/463, loss: 0.07511145621538162 2023-01-24 01:54:31.823868: step: 872/463, loss: 0.021590089425444603 2023-01-24 01:54:32.456866: step: 874/463, loss: 0.04562314599752426 2023-01-24 01:54:33.030440: step: 876/463, loss: 0.9890525341033936 2023-01-24 01:54:33.649018: step: 878/463, loss: 0.06867709010839462 2023-01-24 01:54:34.250949: step: 880/463, loss: 0.04112846776843071 2023-01-24 01:54:34.850947: step: 882/463, loss: 0.2877635955810547 2023-01-24 01:54:35.445010: step: 884/463, loss: 0.0786876529455185 2023-01-24 01:54:36.007576: step: 886/463, loss: 0.06392823904752731 2023-01-24 01:54:36.627019: step: 888/463, loss: 0.03156006336212158 2023-01-24 01:54:37.314508: step: 890/463, loss: 0.5516294240951538 2023-01-24 01:54:37.875263: step: 892/463, loss: 0.11540041118860245 2023-01-24 01:54:38.622603: step: 894/463, loss: 0.21750399470329285 2023-01-24 01:54:39.243503: step: 896/463, loss: 0.37393686175346375 2023-01-24 01:54:39.857520: step: 898/463, loss: 0.17360295355319977 2023-01-24 01:54:40.400344: step: 900/463, loss: 0.16414818167686462 2023-01-24 01:54:40.966897: step: 902/463, loss: 0.10427167266607285 2023-01-24 01:54:41.623017: step: 904/463, loss: 0.116034135222435 2023-01-24 01:54:42.254100: step: 906/463, loss: 0.3298241198062897 2023-01-24 01:54:42.864391: step: 908/463, loss: 0.1641688346862793 2023-01-24 01:54:43.456713: step: 910/463, loss: 0.0668342337012291 2023-01-24 01:54:44.026282: step: 912/463, loss: 0.052535999566316605 2023-01-24 01:54:44.576357: step: 914/463, loss: 0.07598011195659637 2023-01-24 01:54:45.150223: step: 916/463, loss: 0.05164685100317001 2023-01-24 01:54:45.733549: step: 918/463, loss: 0.0815548375248909 2023-01-24 01:54:46.321611: step: 920/463, loss: 0.20160195231437683 2023-01-24 01:54:46.938432: step: 922/463, loss: 0.07057924568653107 2023-01-24 01:54:47.550372: step: 924/463, loss: 0.10972698777914047 2023-01-24 01:54:48.155638: step: 926/463, loss: 0.046164121478796005 ================================================== Loss: 0.190 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33719835907335904, 'r': 0.3314397533206831, 'f1': 0.3342942583732057}, 'combined': 0.2463220851170989, 'epoch': 15} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35909619035492396, 'r': 0.36239670681039204, 'f1': 0.36073889936844145}, 'combined': 0.279615797596591, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3033029359430605, 'r': 0.32344639468690706, 'f1': 0.3130509641873278}, 'combined': 0.23066913150645205, 'epoch': 15} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33741933243968636, 'r': 0.3603688090945915, 'f1': 0.34851667937325825}, 'combined': 0.2701421151122863, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3137106188383886, 'r': 0.3315689083358301, 'f1': 0.32239264703502296}, 'combined': 0.23755247676264848, 'epoch': 15} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34452698092054435, 'r': 0.3549767882462594, 'f1': 0.3496738303412677}, 'combined': 0.2710390455276812, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3577586206896552, 'r': 0.29642857142857143, 'f1': 0.32421875}, 'combined': 0.21614583333333331, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2980769230769231, 'r': 0.33695652173913043, 'f1': 0.31632653061224486}, 'combined': 0.15816326530612243, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.475, 'r': 0.16379310344827586, 'f1': 0.24358974358974356}, 'combined': 0.16239316239316237, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34837914937759334, 'r': 0.31863140417457303, 'f1': 0.3328419226957383}, 'combined': 0.24525194303896505, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3461635956880312, 'r': 0.3677988204185331, 'f1': 0.3566534016179715}, 'combined': 0.2764490481440736, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3922413793103448, 'r': 0.325, 'f1': 0.35546875000000006}, 'combined': 0.23697916666666669, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3283040393999639, 'r': 0.31335281179920654, 'f1': 0.3206542365401589}, 'combined': 0.23627154271380127, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34970123997901775, 'r': 0.3199189915192662, 'f1': 0.3341478132367659}, 'combined': 0.2590045250926128, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.38, 'r': 0.41304347826086957, 'f1': 0.39583333333333337}, 'combined': 0.19791666666666669, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:57:22.107599: step: 2/463, loss: 0.09392962604761124 2023-01-24 01:57:22.782237: step: 4/463, loss: 0.09815418720245361 2023-01-24 01:57:23.485980: step: 6/463, loss: 0.035261865705251694 2023-01-24 01:57:24.181955: step: 8/463, loss: 0.1329983025789261 2023-01-24 01:57:24.774752: step: 10/463, loss: 0.03475189581513405 2023-01-24 01:57:25.418021: step: 12/463, loss: 0.1617887318134308 2023-01-24 01:57:26.039226: step: 14/463, loss: 0.07033581286668777 2023-01-24 01:57:26.725054: step: 16/463, loss: 0.05658769607543945 2023-01-24 01:57:27.345470: step: 18/463, loss: 0.1192563846707344 2023-01-24 01:57:27.900151: step: 20/463, loss: 0.12088558822870255 2023-01-24 01:57:28.509222: step: 22/463, loss: 0.11392351984977722 2023-01-24 01:57:29.163830: step: 24/463, loss: 0.47715991735458374 2023-01-24 01:57:29.742262: step: 26/463, loss: 0.057405758649110794 2023-01-24 01:57:30.297017: step: 28/463, loss: 0.0925820916891098 2023-01-24 01:57:30.980594: step: 30/463, loss: 0.01833358220756054 2023-01-24 01:57:31.658029: step: 32/463, loss: 0.1886693388223648 2023-01-24 01:57:32.266984: step: 34/463, loss: 0.1294589638710022 2023-01-24 01:57:32.805235: step: 36/463, loss: 0.034382808953523636 2023-01-24 01:57:33.466425: step: 38/463, loss: 0.2707841992378235 2023-01-24 01:57:34.105331: step: 40/463, loss: 0.042352087795734406 2023-01-24 01:57:34.762086: step: 42/463, loss: 0.1567608267068863 2023-01-24 01:57:35.517112: step: 44/463, loss: 0.041665706783533096 2023-01-24 01:57:36.119003: step: 46/463, loss: 0.5418013334274292 2023-01-24 01:57:36.728842: step: 48/463, loss: 0.03634791821241379 2023-01-24 01:57:37.340824: step: 50/463, loss: 0.041979484260082245 2023-01-24 01:57:37.928445: step: 52/463, loss: 0.08751584589481354 2023-01-24 01:57:38.586626: step: 54/463, loss: 0.07277601212263107 2023-01-24 01:57:39.217061: step: 56/463, loss: 0.018951643258333206 2023-01-24 01:57:39.838378: step: 58/463, loss: 0.029824329540133476 2023-01-24 01:57:40.461507: step: 60/463, loss: 0.103655606508255 2023-01-24 01:57:41.063937: step: 62/463, loss: 0.1494414210319519 2023-01-24 01:57:41.719188: step: 64/463, loss: 0.19174297153949738 2023-01-24 01:57:42.301003: step: 66/463, loss: 0.01808498427271843 2023-01-24 01:57:42.944902: step: 68/463, loss: 0.2951965928077698 2023-01-24 01:57:43.587906: step: 70/463, loss: 1.166751742362976 2023-01-24 01:57:44.210455: step: 72/463, loss: 0.10040746629238129 2023-01-24 01:57:44.776325: step: 74/463, loss: 0.18768052756786346 2023-01-24 01:57:45.398198: step: 76/463, loss: 0.11471167951822281 2023-01-24 01:57:45.995903: step: 78/463, loss: 0.1377413272857666 2023-01-24 01:57:46.637854: step: 80/463, loss: 0.04321456700563431 2023-01-24 01:57:47.235621: step: 82/463, loss: 0.02770810015499592 2023-01-24 01:57:47.885871: step: 84/463, loss: 0.06217678263783455 2023-01-24 01:57:48.462945: step: 86/463, loss: 0.048308681696653366 2023-01-24 01:57:49.089686: step: 88/463, loss: 0.2211398482322693 2023-01-24 01:57:49.723558: step: 90/463, loss: 0.058094583451747894 2023-01-24 01:57:50.378214: step: 92/463, loss: 4.702839374542236 2023-01-24 01:57:51.016214: step: 94/463, loss: 0.16564664244651794 2023-01-24 01:57:51.591635: step: 96/463, loss: 0.07649922370910645 2023-01-24 01:57:52.204204: step: 98/463, loss: 0.18419580161571503 2023-01-24 01:57:52.798941: step: 100/463, loss: 0.009880303405225277 2023-01-24 01:57:53.440246: step: 102/463, loss: 0.05654880404472351 2023-01-24 01:57:54.087793: step: 104/463, loss: 0.08590541034936905 2023-01-24 01:57:54.760902: step: 106/463, loss: 0.015450265258550644 2023-01-24 01:57:55.389763: step: 108/463, loss: 0.09660845249891281 2023-01-24 01:57:56.062883: step: 110/463, loss: 0.07592320442199707 2023-01-24 01:57:56.648239: step: 112/463, loss: 0.13545340299606323 2023-01-24 01:57:57.246590: step: 114/463, loss: 0.10005438327789307 2023-01-24 01:57:57.907419: step: 116/463, loss: 0.027361372485756874 2023-01-24 01:57:58.567952: step: 118/463, loss: 0.06829187273979187 2023-01-24 01:57:59.204235: step: 120/463, loss: 0.02588752657175064 2023-01-24 01:57:59.860638: step: 122/463, loss: 0.23005206882953644 2023-01-24 01:58:00.520887: step: 124/463, loss: 0.14744412899017334 2023-01-24 01:58:01.186613: step: 126/463, loss: 0.06101863831281662 2023-01-24 01:58:01.826404: step: 128/463, loss: 0.04215003550052643 2023-01-24 01:58:02.408139: step: 130/463, loss: 0.10857198387384415 2023-01-24 01:58:02.971278: step: 132/463, loss: 0.01935398392379284 2023-01-24 01:58:03.542695: step: 134/463, loss: 0.03415912762284279 2023-01-24 01:58:04.098658: step: 136/463, loss: 0.09079161286354065 2023-01-24 01:58:04.739628: step: 138/463, loss: 0.04729504883289337 2023-01-24 01:58:05.415603: step: 140/463, loss: 0.20324020087718964 2023-01-24 01:58:06.091421: step: 142/463, loss: 0.1295374631881714 2023-01-24 01:58:06.685014: step: 144/463, loss: 0.08116629719734192 2023-01-24 01:58:07.264071: step: 146/463, loss: 0.10156693309545517 2023-01-24 01:58:07.850226: step: 148/463, loss: 0.07977838814258575 2023-01-24 01:58:08.429464: step: 150/463, loss: 0.09160180389881134 2023-01-24 01:58:09.030748: step: 152/463, loss: 0.0389992855489254 2023-01-24 01:58:09.672722: step: 154/463, loss: 0.16848435997962952 2023-01-24 01:58:10.304221: step: 156/463, loss: 0.04797053709626198 2023-01-24 01:58:10.950260: step: 158/463, loss: 0.15641289949417114 2023-01-24 01:58:11.543787: step: 160/463, loss: 0.03032768703997135 2023-01-24 01:58:12.157146: step: 162/463, loss: 0.17668519914150238 2023-01-24 01:58:12.812864: step: 164/463, loss: 0.16846053302288055 2023-01-24 01:58:13.442795: step: 166/463, loss: 0.1726134717464447 2023-01-24 01:58:14.044716: step: 168/463, loss: 0.04289911314845085 2023-01-24 01:58:14.565803: step: 170/463, loss: 0.02088414877653122 2023-01-24 01:58:15.137070: step: 172/463, loss: 0.11820492148399353 2023-01-24 01:58:15.757758: step: 174/463, loss: 0.023546187207102776 2023-01-24 01:58:16.372415: step: 176/463, loss: 0.03653356060385704 2023-01-24 01:58:16.967657: step: 178/463, loss: 0.05195843055844307 2023-01-24 01:58:17.550190: step: 180/463, loss: 0.1093713566660881 2023-01-24 01:58:18.131808: step: 182/463, loss: 0.09825106710195541 2023-01-24 01:58:18.769106: step: 184/463, loss: 0.13142557442188263 2023-01-24 01:58:19.409349: step: 186/463, loss: 0.05736227706074715 2023-01-24 01:58:19.972953: step: 188/463, loss: 0.06639780849218369 2023-01-24 01:58:20.630071: step: 190/463, loss: 0.03851672634482384 2023-01-24 01:58:21.211338: step: 192/463, loss: 0.038353074342012405 2023-01-24 01:58:21.798728: step: 194/463, loss: 0.0741320252418518 2023-01-24 01:58:22.380151: step: 196/463, loss: 0.0817531943321228 2023-01-24 01:58:22.939629: step: 198/463, loss: 0.10499387979507446 2023-01-24 01:58:23.555654: step: 200/463, loss: 0.010208223946392536 2023-01-24 01:58:24.196860: step: 202/463, loss: 0.16373755037784576 2023-01-24 01:58:24.810536: step: 204/463, loss: 0.08796074241399765 2023-01-24 01:58:25.483312: step: 206/463, loss: 0.061089225113391876 2023-01-24 01:58:26.060528: step: 208/463, loss: 0.1279318928718567 2023-01-24 01:58:26.656295: step: 210/463, loss: 0.04245173931121826 2023-01-24 01:58:27.268064: step: 212/463, loss: 0.05689255893230438 2023-01-24 01:58:27.881667: step: 214/463, loss: 0.2360590696334839 2023-01-24 01:58:28.439264: step: 216/463, loss: 0.8640976548194885 2023-01-24 01:58:29.045502: step: 218/463, loss: 0.7225779891014099 2023-01-24 01:58:29.676701: step: 220/463, loss: 0.2748264968395233 2023-01-24 01:58:30.314083: step: 222/463, loss: 0.0847368836402893 2023-01-24 01:58:30.924095: step: 224/463, loss: 0.06542827188968658 2023-01-24 01:58:31.528527: step: 226/463, loss: 0.09589657187461853 2023-01-24 01:58:32.135287: step: 228/463, loss: 0.16839241981506348 2023-01-24 01:58:32.733050: step: 230/463, loss: 0.09850162267684937 2023-01-24 01:58:33.479481: step: 232/463, loss: 0.10379438102245331 2023-01-24 01:58:34.035107: step: 234/463, loss: 0.04850511625409126 2023-01-24 01:58:34.684818: step: 236/463, loss: 0.0336916521191597 2023-01-24 01:58:35.245581: step: 238/463, loss: 0.09501839429140091 2023-01-24 01:58:35.893978: step: 240/463, loss: 0.16434313356876373 2023-01-24 01:58:36.469301: step: 242/463, loss: 0.0782540887594223 2023-01-24 01:58:37.069517: step: 244/463, loss: 0.11280696839094162 2023-01-24 01:58:37.690218: step: 246/463, loss: 0.08513292670249939 2023-01-24 01:58:38.284085: step: 248/463, loss: 0.05924632027745247 2023-01-24 01:58:38.900099: step: 250/463, loss: 0.07422038167715073 2023-01-24 01:58:39.522778: step: 252/463, loss: 1.486720323562622 2023-01-24 01:58:40.190829: step: 254/463, loss: 0.07059735059738159 2023-01-24 01:58:40.836783: step: 256/463, loss: 0.10897721350193024 2023-01-24 01:58:41.460956: step: 258/463, loss: 0.13812844455242157 2023-01-24 01:58:42.148175: step: 260/463, loss: 0.18931354582309723 2023-01-24 01:58:42.740785: step: 262/463, loss: 0.04782324656844139 2023-01-24 01:58:43.330946: step: 264/463, loss: 0.06735178083181381 2023-01-24 01:58:44.007608: step: 266/463, loss: 0.17109337449073792 2023-01-24 01:58:44.587305: step: 268/463, loss: 0.06339196860790253 2023-01-24 01:58:45.233985: step: 270/463, loss: 0.06292379647493362 2023-01-24 01:58:45.860064: step: 272/463, loss: 0.18615616858005524 2023-01-24 01:58:46.475676: step: 274/463, loss: 0.02608204260468483 2023-01-24 01:58:47.068940: step: 276/463, loss: 0.07891713827848434 2023-01-24 01:58:47.602591: step: 278/463, loss: 0.5470752120018005 2023-01-24 01:58:48.181722: step: 280/463, loss: 0.0880608856678009 2023-01-24 01:58:48.750854: step: 282/463, loss: 0.11302398890256882 2023-01-24 01:58:49.434564: step: 284/463, loss: 0.7214975357055664 2023-01-24 01:58:50.097085: step: 286/463, loss: 0.19971822202205658 2023-01-24 01:58:50.680066: step: 288/463, loss: 0.016864435747265816 2023-01-24 01:58:51.294965: step: 290/463, loss: 0.05873178690671921 2023-01-24 01:58:51.903258: step: 292/463, loss: 0.0906856432557106 2023-01-24 01:58:52.554644: step: 294/463, loss: 0.07318469882011414 2023-01-24 01:58:53.153117: step: 296/463, loss: 0.007730225101113319 2023-01-24 01:58:53.736019: step: 298/463, loss: 0.02793276682496071 2023-01-24 01:58:54.363557: step: 300/463, loss: 0.08700327575206757 2023-01-24 01:58:54.986053: step: 302/463, loss: 0.2188662439584732 2023-01-24 01:58:55.613349: step: 304/463, loss: 0.14005441963672638 2023-01-24 01:58:56.254778: step: 306/463, loss: 0.049777571111917496 2023-01-24 01:58:56.875782: step: 308/463, loss: 0.0704931691288948 2023-01-24 01:58:57.441457: step: 310/463, loss: 0.03475925326347351 2023-01-24 01:58:57.971896: step: 312/463, loss: 0.4202437996864319 2023-01-24 01:58:58.555097: step: 314/463, loss: 0.11502720415592194 2023-01-24 01:58:59.154847: step: 316/463, loss: 0.06981195509433746 2023-01-24 01:58:59.769645: step: 318/463, loss: 0.38931483030319214 2023-01-24 01:59:00.349817: step: 320/463, loss: 0.04184393957257271 2023-01-24 01:59:00.975228: step: 322/463, loss: 0.022175397723913193 2023-01-24 01:59:01.544203: step: 324/463, loss: 0.08440104871988297 2023-01-24 01:59:02.165891: step: 326/463, loss: 0.07439716160297394 2023-01-24 01:59:02.719089: step: 328/463, loss: 0.6232091784477234 2023-01-24 01:59:03.354752: step: 330/463, loss: 0.12134966999292374 2023-01-24 01:59:03.979474: step: 332/463, loss: 0.20825999975204468 2023-01-24 01:59:04.594303: step: 334/463, loss: 0.020665599033236504 2023-01-24 01:59:05.178239: step: 336/463, loss: 0.038438279181718826 2023-01-24 01:59:05.801851: step: 338/463, loss: 0.03745255991816521 2023-01-24 01:59:06.435028: step: 340/463, loss: 0.7567254304885864 2023-01-24 01:59:07.070561: step: 342/463, loss: 0.0475521981716156 2023-01-24 01:59:07.713201: step: 344/463, loss: 0.12182119488716125 2023-01-24 01:59:08.336643: step: 346/463, loss: 0.27585577964782715 2023-01-24 01:59:08.945233: step: 348/463, loss: 0.08076296001672745 2023-01-24 01:59:09.597920: step: 350/463, loss: 0.05759488791227341 2023-01-24 01:59:10.166712: step: 352/463, loss: 0.07504604756832123 2023-01-24 01:59:10.791878: step: 354/463, loss: 0.3895440399646759 2023-01-24 01:59:11.383484: step: 356/463, loss: 0.04366806149482727 2023-01-24 01:59:11.960543: step: 358/463, loss: 0.13325652480125427 2023-01-24 01:59:12.562473: step: 360/463, loss: 0.7767708897590637 2023-01-24 01:59:13.115265: step: 362/463, loss: 0.045997437089681625 2023-01-24 01:59:13.696255: step: 364/463, loss: 0.09603548049926758 2023-01-24 01:59:14.285064: step: 366/463, loss: 0.08423476666212082 2023-01-24 01:59:14.859488: step: 368/463, loss: 0.12476283311843872 2023-01-24 01:59:15.421166: step: 370/463, loss: 0.1248638927936554 2023-01-24 01:59:16.111365: step: 372/463, loss: 0.9874676465988159 2023-01-24 01:59:16.695201: step: 374/463, loss: 0.9051044583320618 2023-01-24 01:59:17.344607: step: 376/463, loss: 0.02435396797955036 2023-01-24 01:59:17.986652: step: 378/463, loss: 0.054532065987586975 2023-01-24 01:59:18.579628: step: 380/463, loss: 1.1708176136016846 2023-01-24 01:59:19.147602: step: 382/463, loss: 0.06749589741230011 2023-01-24 01:59:19.884509: step: 384/463, loss: 0.07421965897083282 2023-01-24 01:59:20.546810: step: 386/463, loss: 0.06768511235713959 2023-01-24 01:59:21.106332: step: 388/463, loss: 0.05945784971117973 2023-01-24 01:59:21.712481: step: 390/463, loss: 0.08609744161367416 2023-01-24 01:59:22.313595: step: 392/463, loss: 0.06459945440292358 2023-01-24 01:59:23.028328: step: 394/463, loss: 0.11390979588031769 2023-01-24 01:59:23.603913: step: 396/463, loss: 0.09988781809806824 2023-01-24 01:59:24.218544: step: 398/463, loss: 0.03948311507701874 2023-01-24 01:59:25.015029: step: 400/463, loss: 0.13762977719306946 2023-01-24 01:59:25.605947: step: 402/463, loss: 0.30338630080223083 2023-01-24 01:59:26.187227: step: 404/463, loss: 0.11689325422048569 2023-01-24 01:59:26.794494: step: 406/463, loss: 0.07996908575296402 2023-01-24 01:59:27.394675: step: 408/463, loss: 0.0428166538476944 2023-01-24 01:59:28.000836: step: 410/463, loss: 0.06921526789665222 2023-01-24 01:59:28.645744: step: 412/463, loss: 0.06507103890180588 2023-01-24 01:59:29.288036: step: 414/463, loss: 0.11402113735675812 2023-01-24 01:59:29.926414: step: 416/463, loss: 0.266824334859848 2023-01-24 01:59:30.555243: step: 418/463, loss: 0.07551302760839462 2023-01-24 01:59:31.122939: step: 420/463, loss: 0.0763019323348999 2023-01-24 01:59:31.696046: step: 422/463, loss: 0.1426866203546524 2023-01-24 01:59:32.299117: step: 424/463, loss: 0.0955159068107605 2023-01-24 01:59:32.894533: step: 426/463, loss: 0.046397652477025986 2023-01-24 01:59:33.623954: step: 428/463, loss: 0.05934610962867737 2023-01-24 01:59:34.218820: step: 430/463, loss: 0.1134750247001648 2023-01-24 01:59:34.805999: step: 432/463, loss: 0.585870087146759 2023-01-24 01:59:35.415618: step: 434/463, loss: 0.028768330812454224 2023-01-24 01:59:36.018721: step: 436/463, loss: 0.21743199229240417 2023-01-24 01:59:36.584791: step: 438/463, loss: 0.11486555635929108 2023-01-24 01:59:37.275564: step: 440/463, loss: 0.12417805939912796 2023-01-24 01:59:37.867811: step: 442/463, loss: 0.11379312723875046 2023-01-24 01:59:38.503689: step: 444/463, loss: 0.24372287094593048 2023-01-24 01:59:39.186264: step: 446/463, loss: 0.010859759524464607 2023-01-24 01:59:39.847205: step: 448/463, loss: 0.2165071666240692 2023-01-24 01:59:40.401577: step: 450/463, loss: 0.04450880363583565 2023-01-24 01:59:41.022719: step: 452/463, loss: 0.14124315977096558 2023-01-24 01:59:41.625560: step: 454/463, loss: 0.14413774013519287 2023-01-24 01:59:42.270121: step: 456/463, loss: 0.10476325452327728 2023-01-24 01:59:42.953257: step: 458/463, loss: 0.030846182256937027 2023-01-24 01:59:43.564949: step: 460/463, loss: 0.13036273419857025 2023-01-24 01:59:44.204777: step: 462/463, loss: 0.17384564876556396 2023-01-24 01:59:44.780769: step: 464/463, loss: 0.08751040697097778 2023-01-24 01:59:45.374665: step: 466/463, loss: 0.06706613302230835 2023-01-24 01:59:46.043479: step: 468/463, loss: 0.08410689979791641 2023-01-24 01:59:46.675111: step: 470/463, loss: 0.3431842625141144 2023-01-24 01:59:47.274351: step: 472/463, loss: 0.035335078835487366 2023-01-24 01:59:47.900432: step: 474/463, loss: 0.11014202237129211 2023-01-24 01:59:48.499556: step: 476/463, loss: 0.08605427294969559 2023-01-24 01:59:49.151984: step: 478/463, loss: 0.1027742400765419 2023-01-24 01:59:49.792195: step: 480/463, loss: 0.1063171923160553 2023-01-24 01:59:50.399880: step: 482/463, loss: 0.003478048136457801 2023-01-24 01:59:51.040646: step: 484/463, loss: 0.1457296758890152 2023-01-24 01:59:51.659588: step: 486/463, loss: 0.1212291270494461 2023-01-24 01:59:52.256671: step: 488/463, loss: 0.04395725578069687 2023-01-24 01:59:52.873111: step: 490/463, loss: 0.0438603051006794 2023-01-24 01:59:53.490157: step: 492/463, loss: 0.06670898199081421 2023-01-24 01:59:54.140339: step: 494/463, loss: 0.06832463294267654 2023-01-24 01:59:54.807091: step: 496/463, loss: 0.10354993492364883 2023-01-24 01:59:55.438860: step: 498/463, loss: 0.1462126076221466 2023-01-24 01:59:56.089156: step: 500/463, loss: 0.49212706089019775 2023-01-24 01:59:56.616974: step: 502/463, loss: 0.20970354974269867 2023-01-24 01:59:57.331312: step: 504/463, loss: 0.062238797545433044 2023-01-24 01:59:57.948371: step: 506/463, loss: 0.3281267285346985 2023-01-24 01:59:58.637230: step: 508/463, loss: 0.08876488357782364 2023-01-24 01:59:59.216158: step: 510/463, loss: 0.0857429951429367 2023-01-24 01:59:59.815595: step: 512/463, loss: 0.027456985786557198 2023-01-24 02:00:00.439252: step: 514/463, loss: 0.8833217620849609 2023-01-24 02:00:01.174229: step: 516/463, loss: 0.11567305773496628 2023-01-24 02:00:01.798149: step: 518/463, loss: 0.3653855323791504 2023-01-24 02:00:02.413591: step: 520/463, loss: 0.08590216934680939 2023-01-24 02:00:03.030657: step: 522/463, loss: 0.06966375559568405 2023-01-24 02:00:03.631655: step: 524/463, loss: 0.12032757699489594 2023-01-24 02:00:04.281716: step: 526/463, loss: 0.1204896792769432 2023-01-24 02:00:04.913823: step: 528/463, loss: 0.16273993253707886 2023-01-24 02:00:05.493366: step: 530/463, loss: 0.0689341276884079 2023-01-24 02:00:06.044923: step: 532/463, loss: 0.08280076831579208 2023-01-24 02:00:06.649527: step: 534/463, loss: 0.06909555941820145 2023-01-24 02:00:07.224029: step: 536/463, loss: 0.08990427851676941 2023-01-24 02:00:07.804175: step: 538/463, loss: 0.120759017765522 2023-01-24 02:00:08.288915: step: 540/463, loss: 0.08502160757780075 2023-01-24 02:00:08.910008: step: 542/463, loss: 0.06618203967809677 2023-01-24 02:00:09.585792: step: 544/463, loss: 0.16904030740261078 2023-01-24 02:00:10.253292: step: 546/463, loss: 0.05971629172563553 2023-01-24 02:00:10.820736: step: 548/463, loss: 0.0433935821056366 2023-01-24 02:00:11.434198: step: 550/463, loss: 0.89015793800354 2023-01-24 02:00:12.093480: step: 552/463, loss: 2.8742613792419434 2023-01-24 02:00:12.717749: step: 554/463, loss: 0.5891845226287842 2023-01-24 02:00:13.394714: step: 556/463, loss: 1.4074040651321411 2023-01-24 02:00:14.043595: step: 558/463, loss: 0.15140140056610107 2023-01-24 02:00:14.647936: step: 560/463, loss: 0.01083358284085989 2023-01-24 02:00:15.349332: step: 562/463, loss: 0.16573470830917358 2023-01-24 02:00:15.973214: step: 564/463, loss: 0.09017710387706757 2023-01-24 02:00:16.594161: step: 566/463, loss: 0.11884922534227371 2023-01-24 02:00:17.168759: step: 568/463, loss: 0.18361549079418182 2023-01-24 02:00:17.717514: step: 570/463, loss: 0.0705447569489479 2023-01-24 02:00:18.355727: step: 572/463, loss: 0.10155633836984634 2023-01-24 02:00:18.950310: step: 574/463, loss: 0.0580846406519413 2023-01-24 02:00:19.556122: step: 576/463, loss: 0.07159799337387085 2023-01-24 02:00:20.204407: step: 578/463, loss: 0.17857329547405243 2023-01-24 02:00:20.849231: step: 580/463, loss: 0.12237653136253357 2023-01-24 02:00:21.534614: step: 582/463, loss: 0.04622486233711243 2023-01-24 02:00:22.115561: step: 584/463, loss: 0.04272199794650078 2023-01-24 02:00:22.698446: step: 586/463, loss: 0.4913056194782257 2023-01-24 02:00:23.349620: step: 588/463, loss: 0.13528907299041748 2023-01-24 02:00:23.886297: step: 590/463, loss: 0.0324455164372921 2023-01-24 02:00:24.474701: step: 592/463, loss: 0.21316123008728027 2023-01-24 02:00:25.105339: step: 594/463, loss: 0.06118357554078102 2023-01-24 02:00:25.801513: step: 596/463, loss: 0.023242700845003128 2023-01-24 02:00:26.432906: step: 598/463, loss: 0.11060090363025665 2023-01-24 02:00:27.044026: step: 600/463, loss: 0.05608632043004036 2023-01-24 02:00:27.606229: step: 602/463, loss: 0.10846617072820663 2023-01-24 02:00:28.262739: step: 604/463, loss: 0.2460298091173172 2023-01-24 02:00:28.868454: step: 606/463, loss: 0.13271866738796234 2023-01-24 02:00:29.520010: step: 608/463, loss: 0.1562288999557495 2023-01-24 02:00:30.053268: step: 610/463, loss: 0.050401438027620316 2023-01-24 02:00:30.743558: step: 612/463, loss: 0.2274414449930191 2023-01-24 02:00:31.328268: step: 614/463, loss: 0.11631855368614197 2023-01-24 02:00:31.862686: step: 616/463, loss: 0.04366886988282204 2023-01-24 02:00:32.452362: step: 618/463, loss: 0.17520736157894135 2023-01-24 02:00:33.146675: step: 620/463, loss: 0.4167800545692444 2023-01-24 02:00:33.715425: step: 622/463, loss: 0.040646426379680634 2023-01-24 02:00:34.332387: step: 624/463, loss: 0.29388168454170227 2023-01-24 02:00:34.978074: step: 626/463, loss: 0.08017006516456604 2023-01-24 02:00:35.609675: step: 628/463, loss: 0.14051766693592072 2023-01-24 02:00:36.280145: step: 630/463, loss: 0.12048596888780594 2023-01-24 02:00:36.856861: step: 632/463, loss: 0.0794791504740715 2023-01-24 02:00:37.416221: step: 634/463, loss: 0.10982103645801544 2023-01-24 02:00:38.047489: step: 636/463, loss: 0.08861525356769562 2023-01-24 02:00:38.610742: step: 638/463, loss: 0.11401385068893433 2023-01-24 02:00:39.185506: step: 640/463, loss: 0.19314491748809814 2023-01-24 02:00:39.703741: step: 642/463, loss: 0.028576646000146866 2023-01-24 02:00:40.280621: step: 644/463, loss: 0.14793406426906586 2023-01-24 02:00:40.894969: step: 646/463, loss: 0.15287043154239655 2023-01-24 02:00:41.500631: step: 648/463, loss: 0.08074107021093369 2023-01-24 02:00:42.095880: step: 650/463, loss: 0.24262771010398865 2023-01-24 02:00:42.722695: step: 652/463, loss: 0.13698968291282654 2023-01-24 02:00:43.325770: step: 654/463, loss: 0.03198055550456047 2023-01-24 02:00:44.039007: step: 656/463, loss: 0.06725078821182251 2023-01-24 02:00:44.653896: step: 658/463, loss: 0.056147851049900055 2023-01-24 02:00:45.180569: step: 660/463, loss: 0.11801397055387497 2023-01-24 02:00:45.834696: step: 662/463, loss: 0.07209347188472748 2023-01-24 02:00:46.406134: step: 664/463, loss: 0.13116048276424408 2023-01-24 02:00:47.024299: step: 666/463, loss: 0.18685312569141388 2023-01-24 02:00:47.648317: step: 668/463, loss: 0.09314344078302383 2023-01-24 02:00:48.288014: step: 670/463, loss: 0.032252855598926544 2023-01-24 02:00:48.917309: step: 672/463, loss: 0.08380568772554398 2023-01-24 02:00:49.517148: step: 674/463, loss: 0.08002911508083344 2023-01-24 02:00:50.244495: step: 676/463, loss: 0.20918309688568115 2023-01-24 02:00:50.809330: step: 678/463, loss: 0.12389720231294632 2023-01-24 02:00:51.450673: step: 680/463, loss: 0.05499038100242615 2023-01-24 02:00:52.045014: step: 682/463, loss: 0.12249012291431427 2023-01-24 02:00:52.665144: step: 684/463, loss: 0.1472024917602539 2023-01-24 02:00:53.300101: step: 686/463, loss: 0.12132182717323303 2023-01-24 02:00:53.934092: step: 688/463, loss: 0.1617874652147293 2023-01-24 02:00:54.585847: step: 690/463, loss: 0.04404882341623306 2023-01-24 02:00:55.223195: step: 692/463, loss: 0.10959150642156601 2023-01-24 02:00:55.906411: step: 694/463, loss: 0.5979035496711731 2023-01-24 02:00:56.531091: step: 696/463, loss: 0.051047198474407196 2023-01-24 02:00:57.216104: step: 698/463, loss: 0.06753871589899063 2023-01-24 02:00:57.930040: step: 700/463, loss: 0.08026600629091263 2023-01-24 02:00:58.511968: step: 702/463, loss: 0.10995573550462723 2023-01-24 02:00:59.142574: step: 704/463, loss: 0.24805115163326263 2023-01-24 02:00:59.803508: step: 706/463, loss: 0.08290494978427887 2023-01-24 02:01:00.454869: step: 708/463, loss: 1.1645796298980713 2023-01-24 02:01:01.075487: step: 710/463, loss: 0.02313629724085331 2023-01-24 02:01:01.679707: step: 712/463, loss: 0.12914200127124786 2023-01-24 02:01:02.316803: step: 714/463, loss: 0.036950692534446716 2023-01-24 02:01:02.947781: step: 716/463, loss: 0.2037443071603775 2023-01-24 02:01:03.529982: step: 718/463, loss: 0.05308006331324577 2023-01-24 02:01:04.127687: step: 720/463, loss: 0.016386108472943306 2023-01-24 02:01:04.659433: step: 722/463, loss: 0.04003317281603813 2023-01-24 02:01:05.207616: step: 724/463, loss: 0.0642763078212738 2023-01-24 02:01:05.790790: step: 726/463, loss: 27.997026443481445 2023-01-24 02:01:06.408241: step: 728/463, loss: 0.11276333779096603 2023-01-24 02:01:07.043043: step: 730/463, loss: 0.06293115764856339 2023-01-24 02:01:07.663028: step: 732/463, loss: 0.13065288960933685 2023-01-24 02:01:08.298600: step: 734/463, loss: 0.0786309465765953 2023-01-24 02:01:08.884304: step: 736/463, loss: 0.03341026231646538 2023-01-24 02:01:09.493766: step: 738/463, loss: 0.11513106524944305 2023-01-24 02:01:10.085982: step: 740/463, loss: 0.09458275139331818 2023-01-24 02:01:10.738221: step: 742/463, loss: 0.07368090748786926 2023-01-24 02:01:11.308805: step: 744/463, loss: 0.032300472259521484 2023-01-24 02:01:11.897471: step: 746/463, loss: 0.051260169595479965 2023-01-24 02:01:12.515451: step: 748/463, loss: 0.10917123407125473 2023-01-24 02:01:13.110827: step: 750/463, loss: 0.07715870440006256 2023-01-24 02:01:13.690295: step: 752/463, loss: 0.06744939088821411 2023-01-24 02:01:14.302150: step: 754/463, loss: 0.0620088204741478 2023-01-24 02:01:14.966178: step: 756/463, loss: 0.12787121534347534 2023-01-24 02:01:15.666605: step: 758/463, loss: 0.27201634645462036 2023-01-24 02:01:16.307142: step: 760/463, loss: 0.45338839292526245 2023-01-24 02:01:16.940991: step: 762/463, loss: 0.2032330334186554 2023-01-24 02:01:17.540005: step: 764/463, loss: 0.09003154188394547 2023-01-24 02:01:18.154389: step: 766/463, loss: 0.11048809438943863 2023-01-24 02:01:18.884477: step: 768/463, loss: 0.05917493253946304 2023-01-24 02:01:19.528137: step: 770/463, loss: 0.2775425910949707 2023-01-24 02:01:20.140742: step: 772/463, loss: 0.18640881776809692 2023-01-24 02:01:20.765390: step: 774/463, loss: 0.664874255657196 2023-01-24 02:01:21.408491: step: 776/463, loss: 0.1160912811756134 2023-01-24 02:01:21.966632: step: 778/463, loss: 0.11691008508205414 2023-01-24 02:01:22.553311: step: 780/463, loss: 0.694681704044342 2023-01-24 02:01:23.188264: step: 782/463, loss: 0.06484073400497437 2023-01-24 02:01:23.815729: step: 784/463, loss: 0.12963417172431946 2023-01-24 02:01:24.444815: step: 786/463, loss: 0.3227337598800659 2023-01-24 02:01:25.105094: step: 788/463, loss: 0.11776954680681229 2023-01-24 02:01:25.712986: step: 790/463, loss: 0.5221192836761475 2023-01-24 02:01:26.316048: step: 792/463, loss: 0.20026175677776337 2023-01-24 02:01:26.901532: step: 794/463, loss: 0.1020629033446312 2023-01-24 02:01:27.506743: step: 796/463, loss: 0.04261652007699013 2023-01-24 02:01:28.151623: step: 798/463, loss: 0.12003949284553528 2023-01-24 02:01:28.794583: step: 800/463, loss: 0.12371769547462463 2023-01-24 02:01:29.443779: step: 802/463, loss: 0.04024579003453255 2023-01-24 02:01:30.047522: step: 804/463, loss: 0.09820590913295746 2023-01-24 02:01:30.657278: step: 806/463, loss: 0.08650613576173782 2023-01-24 02:01:31.287222: step: 808/463, loss: 0.0983637124300003 2023-01-24 02:01:31.903993: step: 810/463, loss: 0.058388736099004745 2023-01-24 02:01:32.465378: step: 812/463, loss: 0.07422322779893875 2023-01-24 02:01:33.123593: step: 814/463, loss: 0.1438947468996048 2023-01-24 02:01:33.740655: step: 816/463, loss: 0.7769674062728882 2023-01-24 02:01:34.327496: step: 818/463, loss: 0.16482600569725037 2023-01-24 02:01:34.927473: step: 820/463, loss: 0.0664047971367836 2023-01-24 02:01:35.609958: step: 822/463, loss: 0.06674491614103317 2023-01-24 02:01:36.196466: step: 824/463, loss: 0.1059534102678299 2023-01-24 02:01:36.845023: step: 826/463, loss: 0.15020324289798737 2023-01-24 02:01:37.445838: step: 828/463, loss: 0.10606904327869415 2023-01-24 02:01:38.067734: step: 830/463, loss: 0.0406748466193676 2023-01-24 02:01:38.661281: step: 832/463, loss: 0.054141566157341 2023-01-24 02:01:39.233832: step: 834/463, loss: 0.1280638575553894 2023-01-24 02:01:39.865877: step: 836/463, loss: 0.12861491739749908 2023-01-24 02:01:40.468078: step: 838/463, loss: 0.10322591662406921 2023-01-24 02:01:41.102960: step: 840/463, loss: 0.2662043273448944 2023-01-24 02:01:41.707194: step: 842/463, loss: 0.06580877304077148 2023-01-24 02:01:42.334856: step: 844/463, loss: 0.18206185102462769 2023-01-24 02:01:43.008071: step: 846/463, loss: 0.15396100282669067 2023-01-24 02:01:43.627241: step: 848/463, loss: 0.108707956969738 2023-01-24 02:01:44.257605: step: 850/463, loss: 0.23792918026447296 2023-01-24 02:01:44.850706: step: 852/463, loss: 0.27361077070236206 2023-01-24 02:01:45.409967: step: 854/463, loss: 0.35999900102615356 2023-01-24 02:01:46.040833: step: 856/463, loss: 0.09687886387109756 2023-01-24 02:01:46.713056: step: 858/463, loss: 0.10489597916603088 2023-01-24 02:01:47.277538: step: 860/463, loss: 0.11961831152439117 2023-01-24 02:01:47.850192: step: 862/463, loss: 0.014973568730056286 2023-01-24 02:01:48.492717: step: 864/463, loss: 0.3130744695663452 2023-01-24 02:01:49.106692: step: 866/463, loss: 0.2770446836948395 2023-01-24 02:01:49.708115: step: 868/463, loss: 0.7259390950202942 2023-01-24 02:01:50.327529: step: 870/463, loss: 0.12542423605918884 2023-01-24 02:01:50.993487: step: 872/463, loss: 0.013127334415912628 2023-01-24 02:01:51.566230: step: 874/463, loss: 0.4161779284477234 2023-01-24 02:01:52.179855: step: 876/463, loss: 0.03206418827176094 2023-01-24 02:01:52.873232: step: 878/463, loss: 0.042767807841300964 2023-01-24 02:01:53.558997: step: 880/463, loss: 0.08185069262981415 2023-01-24 02:01:54.176488: step: 882/463, loss: 0.46371862292289734 2023-01-24 02:01:54.834114: step: 884/463, loss: 0.09322955459356308 2023-01-24 02:01:55.431724: step: 886/463, loss: 0.07903829216957092 2023-01-24 02:01:56.061661: step: 888/463, loss: 0.04770863801240921 2023-01-24 02:01:56.672090: step: 890/463, loss: 0.4304206669330597 2023-01-24 02:01:57.314924: step: 892/463, loss: 0.1135517805814743 2023-01-24 02:01:57.940875: step: 894/463, loss: 0.4329846203327179 2023-01-24 02:01:58.618017: step: 896/463, loss: 0.09617350250482559 2023-01-24 02:01:59.224630: step: 898/463, loss: 0.06635677814483643 2023-01-24 02:01:59.812462: step: 900/463, loss: 0.05550463870167732 2023-01-24 02:02:00.473676: step: 902/463, loss: 0.2550792992115021 2023-01-24 02:02:01.072791: step: 904/463, loss: 0.27705320715904236 2023-01-24 02:02:01.757647: step: 906/463, loss: 0.10798818618059158 2023-01-24 02:02:02.392038: step: 908/463, loss: 0.33742591738700867 2023-01-24 02:02:02.967631: step: 910/463, loss: 0.13067403435707092 2023-01-24 02:02:03.679365: step: 912/463, loss: 1.6436805725097656 2023-01-24 02:02:04.334987: step: 914/463, loss: 0.15067541599273682 2023-01-24 02:02:05.043460: step: 916/463, loss: 0.30853891372680664 2023-01-24 02:02:05.611489: step: 918/463, loss: 0.1106971800327301 2023-01-24 02:02:06.262918: step: 920/463, loss: 0.5722981691360474 2023-01-24 02:02:06.900319: step: 922/463, loss: 0.28582245111465454 2023-01-24 02:02:07.606756: step: 924/463, loss: 0.9868047833442688 2023-01-24 02:02:08.262099: step: 926/463, loss: 0.275545209646225 ================================================== Loss: 0.236 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3539612676056338, 'r': 0.333811669829222, 'f1': 0.34359130859375}, 'combined': 0.25317254317434207, 'epoch': 16} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34578513583790643, 'r': 0.355319652451084, 'f1': 0.3504875628891925}, 'combined': 0.2716697855887521, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32427486187845306, 'r': 0.3341200189753321, 'f1': 0.329123831775701}, 'combined': 0.2425122970978849, 'epoch': 16} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3295268579657006, 'r': 0.3561797655952793, 'f1': 0.3423353224095971}, 'combined': 0.26535082406868293, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33519243435914264, 'r': 0.33519243435914264, 'f1': 0.33519243435914264}, 'combined': 0.24698389900147352, 'epoch': 16} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3295700414227579, 'r': 0.34744194624255825, 'f1': 0.3382701006817927}, 'combined': 0.2621997909590929, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3090277777777778, 'r': 0.31785714285714284, 'f1': 0.3133802816901408}, 'combined': 0.2089201877934272, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36538461538461536, 'r': 0.41304347826086957, 'f1': 0.3877551020408163}, 'combined': 0.19387755102040816, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45454545454545453, 'r': 0.1724137931034483, 'f1': 0.25000000000000006}, 'combined': 0.16666666666666669, 'epoch': 16} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34837914937759334, 'r': 0.31863140417457303, 'f1': 0.3328419226957383}, 'combined': 0.24525194303896505, 'epoch': 14} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3461635956880312, 'r': 0.3677988204185331, 'f1': 0.3566534016179715}, 'combined': 0.2764490481440736, 'epoch': 14} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3922413793103448, 'r': 0.325, 'f1': 0.35546875000000006}, 'combined': 0.23697916666666669, 'epoch': 14} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32427486187845306, 'r': 0.3341200189753321, 'f1': 0.329123831775701}, 'combined': 0.2425122970978849, 'epoch': 16} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3295268579657006, 'r': 0.3561797655952793, 'f1': 0.3423353224095971}, 'combined': 0.26535082406868293, 'epoch': 16} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36538461538461536, 'r': 0.41304347826086957, 'f1': 0.3877551020408163}, 'combined': 0.19387755102040816, 'epoch': 16} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:04:51.185162: step: 2/463, loss: 0.061208248138427734 2023-01-24 02:04:51.877591: step: 4/463, loss: 1.429548740386963 2023-01-24 02:04:52.483324: step: 6/463, loss: 0.07827788591384888 2023-01-24 02:04:53.107870: step: 8/463, loss: 0.10401318967342377 2023-01-24 02:04:53.717788: step: 10/463, loss: 0.07103412598371506 2023-01-24 02:04:54.309407: step: 12/463, loss: 0.05524838715791702 2023-01-24 02:04:54.916867: step: 14/463, loss: 0.2792140543460846 2023-01-24 02:04:55.557951: step: 16/463, loss: 0.19342955946922302 2023-01-24 02:04:56.174738: step: 18/463, loss: 0.043455831706523895 2023-01-24 02:04:56.822383: step: 20/463, loss: 0.04472591355443001 2023-01-24 02:04:57.452218: step: 22/463, loss: 0.28313031792640686 2023-01-24 02:04:58.046068: step: 24/463, loss: 0.2675344944000244 2023-01-24 02:04:58.696480: step: 26/463, loss: 0.014542317017912865 2023-01-24 02:04:59.282127: step: 28/463, loss: 0.058553364127874374 2023-01-24 02:04:59.869218: step: 30/463, loss: 0.04602428898215294 2023-01-24 02:05:00.571527: step: 32/463, loss: 0.8514562845230103 2023-01-24 02:05:01.157145: step: 34/463, loss: 0.15211881697177887 2023-01-24 02:05:01.730186: step: 36/463, loss: 0.16342812776565552 2023-01-24 02:05:02.316354: step: 38/463, loss: 0.9615288376808167 2023-01-24 02:05:02.927109: step: 40/463, loss: 0.41481366753578186 2023-01-24 02:05:03.513656: step: 42/463, loss: 0.09782220423221588 2023-01-24 02:05:04.060883: step: 44/463, loss: 0.04602811112999916 2023-01-24 02:05:04.639816: step: 46/463, loss: 0.11853159219026566 2023-01-24 02:05:05.277710: step: 48/463, loss: 0.05424831435084343 2023-01-24 02:05:05.872176: step: 50/463, loss: 0.08900800347328186 2023-01-24 02:05:06.530890: step: 52/463, loss: 0.3510980010032654 2023-01-24 02:05:07.120260: step: 54/463, loss: 0.09976553916931152 2023-01-24 02:05:07.803041: step: 56/463, loss: 0.23481567203998566 2023-01-24 02:05:08.427838: step: 58/463, loss: 0.010560777969658375 2023-01-24 02:05:09.078362: step: 60/463, loss: 0.11461454629898071 2023-01-24 02:05:09.686376: step: 62/463, loss: 0.1466471254825592 2023-01-24 02:05:10.231038: step: 64/463, loss: 0.054992325603961945 2023-01-24 02:05:10.805565: step: 66/463, loss: 0.1268271952867508 2023-01-24 02:05:11.445124: step: 68/463, loss: 0.019168691709637642 2023-01-24 02:05:12.017889: step: 70/463, loss: 0.006661098450422287 2023-01-24 02:05:12.598048: step: 72/463, loss: 0.10852862894535065 2023-01-24 02:05:13.259251: step: 74/463, loss: 0.10976220667362213 2023-01-24 02:05:13.917228: step: 76/463, loss: 0.026197120547294617 2023-01-24 02:05:14.526646: step: 78/463, loss: 0.06628680229187012 2023-01-24 02:05:15.149877: step: 80/463, loss: 0.12858764827251434 2023-01-24 02:05:15.722298: step: 82/463, loss: 0.11108959466218948 2023-01-24 02:05:16.371163: step: 84/463, loss: 0.08075893670320511 2023-01-24 02:05:17.025708: step: 86/463, loss: 0.010406739078462124 2023-01-24 02:05:17.641508: step: 88/463, loss: 0.061715152114629745 2023-01-24 02:05:18.196691: step: 90/463, loss: 0.0024050285574048758 2023-01-24 02:05:18.818680: step: 92/463, loss: 0.01013980619609356 2023-01-24 02:05:19.413081: step: 94/463, loss: 0.07551796734333038 2023-01-24 02:05:20.098245: step: 96/463, loss: 0.24460965394973755 2023-01-24 02:05:20.700520: step: 98/463, loss: 0.20156948268413544 2023-01-24 02:05:21.332982: step: 100/463, loss: 0.033421777188777924 2023-01-24 02:05:21.896818: step: 102/463, loss: 0.07469838857650757 2023-01-24 02:05:22.472830: step: 104/463, loss: 0.016260556876659393 2023-01-24 02:05:23.115781: step: 106/463, loss: 0.29703056812286377 2023-01-24 02:05:23.716678: step: 108/463, loss: 0.022552143782377243 2023-01-24 02:05:24.286408: step: 110/463, loss: 0.03729711100459099 2023-01-24 02:05:24.822212: step: 112/463, loss: 0.03979526460170746 2023-01-24 02:05:25.409404: step: 114/463, loss: 0.08763419836759567 2023-01-24 02:05:25.993027: step: 116/463, loss: 0.03372212126851082 2023-01-24 02:05:26.629579: step: 118/463, loss: 0.044284623116254807 2023-01-24 02:05:27.228090: step: 120/463, loss: 0.08314429968595505 2023-01-24 02:05:27.883677: step: 122/463, loss: 0.10130926221609116 2023-01-24 02:05:28.486622: step: 124/463, loss: 0.1690724939107895 2023-01-24 02:05:29.052116: step: 126/463, loss: 0.07167366147041321 2023-01-24 02:05:29.644392: step: 128/463, loss: 0.020160706713795662 2023-01-24 02:05:30.193461: step: 130/463, loss: 0.09696692228317261 2023-01-24 02:05:30.843081: step: 132/463, loss: 0.027926130220294 2023-01-24 02:05:31.469859: step: 134/463, loss: 0.13102640211582184 2023-01-24 02:05:32.126762: step: 136/463, loss: 0.23273417353630066 2023-01-24 02:05:32.808444: step: 138/463, loss: 0.0639929547905922 2023-01-24 02:05:33.471364: step: 140/463, loss: 0.023816965520381927 2023-01-24 02:05:34.078711: step: 142/463, loss: 0.05975884944200516 2023-01-24 02:05:34.708636: step: 144/463, loss: 0.09767508506774902 2023-01-24 02:05:35.343534: step: 146/463, loss: 0.05500223487615585 2023-01-24 02:05:35.986451: step: 148/463, loss: 0.21780887246131897 2023-01-24 02:05:36.622987: step: 150/463, loss: 0.08378569036722183 2023-01-24 02:05:37.237447: step: 152/463, loss: 0.049797382205724716 2023-01-24 02:05:37.826621: step: 154/463, loss: 0.2314986288547516 2023-01-24 02:05:38.441915: step: 156/463, loss: 0.04122598469257355 2023-01-24 02:05:39.046032: step: 158/463, loss: 0.058828771114349365 2023-01-24 02:05:39.730633: step: 160/463, loss: 0.05163532868027687 2023-01-24 02:05:40.341177: step: 162/463, loss: 0.2564520835876465 2023-01-24 02:05:40.940241: step: 164/463, loss: 0.06570015102624893 2023-01-24 02:05:41.550494: step: 166/463, loss: 0.06088108569383621 2023-01-24 02:05:42.196311: step: 168/463, loss: 0.05846810340881348 2023-01-24 02:05:42.770999: step: 170/463, loss: 0.011461921036243439 2023-01-24 02:05:43.312326: step: 172/463, loss: 0.05215035751461983 2023-01-24 02:05:43.867280: step: 174/463, loss: 0.019592925906181335 2023-01-24 02:05:44.460170: step: 176/463, loss: 0.9721873998641968 2023-01-24 02:05:45.028391: step: 178/463, loss: 0.04376525804400444 2023-01-24 02:05:45.642143: step: 180/463, loss: 0.065320685505867 2023-01-24 02:05:46.239716: step: 182/463, loss: 0.011112179607152939 2023-01-24 02:05:46.828903: step: 184/463, loss: 0.07126247137784958 2023-01-24 02:05:47.403947: step: 186/463, loss: 0.0482366569340229 2023-01-24 02:05:48.011425: step: 188/463, loss: 0.40055400133132935 2023-01-24 02:05:48.589414: step: 190/463, loss: 0.07437822222709656 2023-01-24 02:05:49.153962: step: 192/463, loss: 0.04190584644675255 2023-01-24 02:05:49.790018: step: 194/463, loss: 0.036455072462558746 2023-01-24 02:05:50.409422: step: 196/463, loss: 0.037047579884529114 2023-01-24 02:05:51.013262: step: 198/463, loss: 0.05403919517993927 2023-01-24 02:05:51.634581: step: 200/463, loss: 0.21770811080932617 2023-01-24 02:05:52.298985: step: 202/463, loss: 0.026273254305124283 2023-01-24 02:05:52.904862: step: 204/463, loss: 0.18321312963962555 2023-01-24 02:05:53.514124: step: 206/463, loss: 0.08238894492387772 2023-01-24 02:05:54.110066: step: 208/463, loss: 0.14070747792720795 2023-01-24 02:05:54.702252: step: 210/463, loss: 0.033078089356422424 2023-01-24 02:05:55.259938: step: 212/463, loss: 0.017914224416017532 2023-01-24 02:05:55.893574: step: 214/463, loss: 0.08555115759372711 2023-01-24 02:05:56.538446: step: 216/463, loss: 0.042873188853263855 2023-01-24 02:05:57.153922: step: 218/463, loss: 0.12525737285614014 2023-01-24 02:05:57.796850: step: 220/463, loss: 0.0575256310403347 2023-01-24 02:05:58.459585: step: 222/463, loss: 0.23342075943946838 2023-01-24 02:05:59.050787: step: 224/463, loss: 0.15358057618141174 2023-01-24 02:05:59.672991: step: 226/463, loss: 0.10575661808252335 2023-01-24 02:06:00.313779: step: 228/463, loss: 0.0845039114356041 2023-01-24 02:06:00.923175: step: 230/463, loss: 0.07489588111639023 2023-01-24 02:06:01.548490: step: 232/463, loss: 0.07380463182926178 2023-01-24 02:06:02.125660: step: 234/463, loss: 0.06168938800692558 2023-01-24 02:06:02.742971: step: 236/463, loss: 0.0815376564860344 2023-01-24 02:06:03.356094: step: 238/463, loss: 0.03183263912796974 2023-01-24 02:06:03.911056: step: 240/463, loss: 0.12306354194879532 2023-01-24 02:06:04.521816: step: 242/463, loss: 0.7525889873504639 2023-01-24 02:06:05.216195: step: 244/463, loss: 0.07208044081926346 2023-01-24 02:06:05.823893: step: 246/463, loss: 0.14129307866096497 2023-01-24 02:06:06.415446: step: 248/463, loss: 0.04030977934598923 2023-01-24 02:06:07.010720: step: 250/463, loss: 0.040892407298088074 2023-01-24 02:06:07.747670: step: 252/463, loss: 0.0148267587646842 2023-01-24 02:06:08.562562: step: 254/463, loss: 0.10283830761909485 2023-01-24 02:06:09.187171: step: 256/463, loss: 0.05003456771373749 2023-01-24 02:06:09.810297: step: 258/463, loss: 0.04177189990878105 2023-01-24 02:06:10.351405: step: 260/463, loss: 0.04389839619398117 2023-01-24 02:06:10.970478: step: 262/463, loss: 0.14525310695171356 2023-01-24 02:06:11.589069: step: 264/463, loss: 0.131211519241333 2023-01-24 02:06:12.222655: step: 266/463, loss: 0.09023848176002502 2023-01-24 02:06:12.850211: step: 268/463, loss: 0.030174342915415764 2023-01-24 02:06:13.436273: step: 270/463, loss: 0.07709553092718124 2023-01-24 02:06:14.043935: step: 272/463, loss: 0.060623396188020706 2023-01-24 02:06:14.665779: step: 274/463, loss: 0.06450683623552322 2023-01-24 02:06:15.301059: step: 276/463, loss: 0.09293755888938904 2023-01-24 02:06:15.915668: step: 278/463, loss: 0.2938070595264435 2023-01-24 02:06:16.522535: step: 280/463, loss: 0.02832983247935772 2023-01-24 02:06:17.093068: step: 282/463, loss: 0.01586303487420082 2023-01-24 02:06:17.687106: step: 284/463, loss: 0.03767448663711548 2023-01-24 02:06:18.313782: step: 286/463, loss: 0.119881272315979 2023-01-24 02:06:18.942114: step: 288/463, loss: 0.16706766188144684 2023-01-24 02:06:19.538357: step: 290/463, loss: 0.0495007298886776 2023-01-24 02:06:20.167075: step: 292/463, loss: 0.018432429060339928 2023-01-24 02:06:20.805676: step: 294/463, loss: 0.05264729633927345 2023-01-24 02:06:21.419912: step: 296/463, loss: 0.028099611401557922 2023-01-24 02:06:22.020443: step: 298/463, loss: 0.18391457200050354 2023-01-24 02:06:22.586987: step: 300/463, loss: 0.16782978177070618 2023-01-24 02:06:23.272348: step: 302/463, loss: 0.28628191351890564 2023-01-24 02:06:23.799364: step: 304/463, loss: 0.1303740292787552 2023-01-24 02:06:24.385103: step: 306/463, loss: 0.10783737152814865 2023-01-24 02:06:24.985863: step: 308/463, loss: 0.06982791423797607 2023-01-24 02:06:25.672801: step: 310/463, loss: 0.18841378390789032 2023-01-24 02:06:26.259322: step: 312/463, loss: 0.14633822441101074 2023-01-24 02:06:26.907590: step: 314/463, loss: 0.12172915786504745 2023-01-24 02:06:27.575460: step: 316/463, loss: 0.024316532537341118 2023-01-24 02:06:28.224719: step: 318/463, loss: 0.23092934489250183 2023-01-24 02:06:28.819942: step: 320/463, loss: 0.09777764230966568 2023-01-24 02:06:29.416375: step: 322/463, loss: 0.9154044389724731 2023-01-24 02:06:29.962176: step: 324/463, loss: 0.09682703763246536 2023-01-24 02:06:30.621813: step: 326/463, loss: 0.06696777790784836 2023-01-24 02:06:31.169574: step: 328/463, loss: 0.1595633625984192 2023-01-24 02:06:31.785272: step: 330/463, loss: 0.026845725253224373 2023-01-24 02:06:32.333405: step: 332/463, loss: 0.06576258689165115 2023-01-24 02:06:32.927937: step: 334/463, loss: 0.14095371961593628 2023-01-24 02:06:33.546682: step: 336/463, loss: 0.05589592084288597 2023-01-24 02:06:34.111312: step: 338/463, loss: 0.08479630202054977 2023-01-24 02:06:34.713596: step: 340/463, loss: 0.10343464463949203 2023-01-24 02:06:35.347923: step: 342/463, loss: 0.07250874489545822 2023-01-24 02:06:35.967112: step: 344/463, loss: 0.03034813329577446 2023-01-24 02:06:36.517120: step: 346/463, loss: 0.037292033433914185 2023-01-24 02:06:37.119400: step: 348/463, loss: 0.4737735986709595 2023-01-24 02:06:37.700503: step: 350/463, loss: 0.03390367329120636 2023-01-24 02:06:38.243489: step: 352/463, loss: 0.08879923820495605 2023-01-24 02:06:38.875205: step: 354/463, loss: 0.37008336186408997 2023-01-24 02:06:39.495642: step: 356/463, loss: 0.15540654957294464 2023-01-24 02:06:40.117584: step: 358/463, loss: 0.10053622722625732 2023-01-24 02:06:40.712875: step: 360/463, loss: 0.16275997459888458 2023-01-24 02:06:41.257738: step: 362/463, loss: 0.05168945714831352 2023-01-24 02:06:41.852696: step: 364/463, loss: 0.0929010733962059 2023-01-24 02:06:42.542802: step: 366/463, loss: 0.03830442950129509 2023-01-24 02:06:43.233907: step: 368/463, loss: 0.05620065703988075 2023-01-24 02:06:43.829415: step: 370/463, loss: 0.028583606705069542 2023-01-24 02:06:44.389191: step: 372/463, loss: 0.038274135440588 2023-01-24 02:06:44.975876: step: 374/463, loss: 0.04940010979771614 2023-01-24 02:06:45.605359: step: 376/463, loss: 0.026470765471458435 2023-01-24 02:06:46.158908: step: 378/463, loss: 0.3079555034637451 2023-01-24 02:06:46.809942: step: 380/463, loss: 0.026415301486849785 2023-01-24 02:06:47.359031: step: 382/463, loss: 0.02228335663676262 2023-01-24 02:06:47.919286: step: 384/463, loss: 0.024316811934113503 2023-01-24 02:06:48.491620: step: 386/463, loss: 0.21048268675804138 2023-01-24 02:06:49.093442: step: 388/463, loss: 0.18815036118030548 2023-01-24 02:06:49.728964: step: 390/463, loss: 0.2901209890842438 2023-01-24 02:06:50.335865: step: 392/463, loss: 0.08952522277832031 2023-01-24 02:06:50.919959: step: 394/463, loss: 0.13785718381404877 2023-01-24 02:06:51.573774: step: 396/463, loss: 0.015936898067593575 2023-01-24 02:06:52.127586: step: 398/463, loss: 1.0970044136047363 2023-01-24 02:06:52.762353: step: 400/463, loss: 0.051290832459926605 2023-01-24 02:06:53.368955: step: 402/463, loss: 0.09496838599443436 2023-01-24 02:06:54.052134: step: 404/463, loss: 0.10430146753787994 2023-01-24 02:06:54.621642: step: 406/463, loss: 0.0883362665772438 2023-01-24 02:06:55.262023: step: 408/463, loss: 0.047782305628061295 2023-01-24 02:06:55.916644: step: 410/463, loss: 0.06281021237373352 2023-01-24 02:06:56.583355: step: 412/463, loss: 0.28949886560440063 2023-01-24 02:06:57.241629: step: 414/463, loss: 0.11550695449113846 2023-01-24 02:06:57.894008: step: 416/463, loss: 0.08808974176645279 2023-01-24 02:06:58.565834: step: 418/463, loss: 0.0716627985239029 2023-01-24 02:06:59.191181: step: 420/463, loss: 0.12860070168972015 2023-01-24 02:06:59.783355: step: 422/463, loss: 0.05852087587118149 2023-01-24 02:07:00.511271: step: 424/463, loss: 0.0697203278541565 2023-01-24 02:07:01.193279: step: 426/463, loss: 0.10973231494426727 2023-01-24 02:07:01.781621: step: 428/463, loss: 0.5625197291374207 2023-01-24 02:07:02.384753: step: 430/463, loss: 0.006370704621076584 2023-01-24 02:07:02.983268: step: 432/463, loss: 0.04488911107182503 2023-01-24 02:07:03.625445: step: 434/463, loss: 0.12623180449008942 2023-01-24 02:07:04.227971: step: 436/463, loss: 0.10643579810857773 2023-01-24 02:07:04.800626: step: 438/463, loss: 0.0876208022236824 2023-01-24 02:07:05.407088: step: 440/463, loss: 0.1903076171875 2023-01-24 02:07:06.036048: step: 442/463, loss: 0.08365371823310852 2023-01-24 02:07:06.653603: step: 444/463, loss: 0.07737105339765549 2023-01-24 02:07:07.271854: step: 446/463, loss: 0.14239755272865295 2023-01-24 02:07:07.872146: step: 448/463, loss: 0.19661210477352142 2023-01-24 02:07:08.488007: step: 450/463, loss: 0.0818001851439476 2023-01-24 02:07:09.178502: step: 452/463, loss: 0.3861251473426819 2023-01-24 02:07:09.836286: step: 454/463, loss: 0.030177662149071693 2023-01-24 02:07:10.467752: step: 456/463, loss: 0.026532568037509918 2023-01-24 02:07:11.087508: step: 458/463, loss: 0.12874577939510345 2023-01-24 02:07:11.704678: step: 460/463, loss: 0.08066501468420029 2023-01-24 02:07:12.290252: step: 462/463, loss: 0.05050784349441528 2023-01-24 02:07:12.910276: step: 464/463, loss: 0.018761588260531425 2023-01-24 02:07:13.517368: step: 466/463, loss: 0.09819670021533966 2023-01-24 02:07:14.097861: step: 468/463, loss: 0.014587967656552792 2023-01-24 02:07:14.803632: step: 470/463, loss: 0.10897120088338852 2023-01-24 02:07:15.509799: step: 472/463, loss: 0.02867637760937214 2023-01-24 02:07:16.079835: step: 474/463, loss: 0.012765491381287575 2023-01-24 02:07:16.648358: step: 476/463, loss: 0.8871848583221436 2023-01-24 02:07:17.248367: step: 478/463, loss: 0.11894292384386063 2023-01-24 02:07:18.031616: step: 480/463, loss: 0.08654782176017761 2023-01-24 02:07:18.638891: step: 482/463, loss: 0.047143712639808655 2023-01-24 02:07:19.313694: step: 484/463, loss: 0.08438290655612946 2023-01-24 02:07:19.936096: step: 486/463, loss: 0.11088824272155762 2023-01-24 02:07:20.531668: step: 488/463, loss: 0.08681084960699081 2023-01-24 02:07:21.124502: step: 490/463, loss: 0.057544413954019547 2023-01-24 02:07:21.662480: step: 492/463, loss: 0.021170584484934807 2023-01-24 02:07:22.218193: step: 494/463, loss: 0.04923864081501961 2023-01-24 02:07:22.867694: step: 496/463, loss: 0.06782519817352295 2023-01-24 02:07:23.550299: step: 498/463, loss: 0.7223324179649353 2023-01-24 02:07:24.165943: step: 500/463, loss: 0.35374197363853455 2023-01-24 02:07:24.770816: step: 502/463, loss: 0.12288373708724976 2023-01-24 02:07:25.363466: step: 504/463, loss: 0.044332731515169144 2023-01-24 02:07:25.973382: step: 506/463, loss: 0.07677768915891647 2023-01-24 02:07:26.604962: step: 508/463, loss: 0.16519592702388763 2023-01-24 02:07:27.236934: step: 510/463, loss: 0.09459847956895828 2023-01-24 02:07:27.856377: step: 512/463, loss: 0.026427011936903 2023-01-24 02:07:28.492267: step: 514/463, loss: 0.07196318358182907 2023-01-24 02:07:29.072258: step: 516/463, loss: 0.21120774745941162 2023-01-24 02:07:29.752622: step: 518/463, loss: 0.04945854842662811 2023-01-24 02:07:30.348242: step: 520/463, loss: 0.03394310548901558 2023-01-24 02:07:30.987118: step: 522/463, loss: 0.0673530250787735 2023-01-24 02:07:31.579286: step: 524/463, loss: 0.07183351367712021 2023-01-24 02:07:32.174988: step: 526/463, loss: 0.10946710407733917 2023-01-24 02:07:32.836842: step: 528/463, loss: 0.16529658436775208 2023-01-24 02:07:33.429649: step: 530/463, loss: 0.147574782371521 2023-01-24 02:07:34.043820: step: 532/463, loss: 0.22300377488136292 2023-01-24 02:07:34.636884: step: 534/463, loss: 0.01837911456823349 2023-01-24 02:07:35.195559: step: 536/463, loss: 0.044504567980766296 2023-01-24 02:07:35.832266: step: 538/463, loss: 0.039813846349716187 2023-01-24 02:07:36.440944: step: 540/463, loss: 0.05233435705304146 2023-01-24 02:07:37.056228: step: 542/463, loss: 0.11505598574876785 2023-01-24 02:07:37.661284: step: 544/463, loss: 0.061726927757263184 2023-01-24 02:07:38.301320: step: 546/463, loss: 0.09867498278617859 2023-01-24 02:07:38.933417: step: 548/463, loss: 0.054983608424663544 2023-01-24 02:07:39.529314: step: 550/463, loss: 0.057428546249866486 2023-01-24 02:07:40.277809: step: 552/463, loss: 0.0610651895403862 2023-01-24 02:07:40.862747: step: 554/463, loss: 0.06848292052745819 2023-01-24 02:07:41.530219: step: 556/463, loss: 0.1013367623090744 2023-01-24 02:07:42.111975: step: 558/463, loss: 0.011504033580422401 2023-01-24 02:07:42.730214: step: 560/463, loss: 0.05420619994401932 2023-01-24 02:07:43.364310: step: 562/463, loss: 0.09809090942144394 2023-01-24 02:07:43.984246: step: 564/463, loss: 0.004097132943570614 2023-01-24 02:07:44.700972: step: 566/463, loss: 0.051969267427921295 2023-01-24 02:07:45.383059: step: 568/463, loss: 0.09839022904634476 2023-01-24 02:07:46.071950: step: 570/463, loss: 0.1341186761856079 2023-01-24 02:07:46.681685: step: 572/463, loss: 0.16783669590950012 2023-01-24 02:07:47.310522: step: 574/463, loss: 0.09007496386766434 2023-01-24 02:07:47.883116: step: 576/463, loss: 0.0610160268843174 2023-01-24 02:07:48.549056: step: 578/463, loss: 0.03092244826257229 2023-01-24 02:07:49.120777: step: 580/463, loss: 0.10315368324518204 2023-01-24 02:07:49.759635: step: 582/463, loss: 0.15601463615894318 2023-01-24 02:07:50.381810: step: 584/463, loss: 0.11801301687955856 2023-01-24 02:07:51.013887: step: 586/463, loss: 0.10612158477306366 2023-01-24 02:07:51.693925: step: 588/463, loss: 0.07959010452032089 2023-01-24 02:07:52.384707: step: 590/463, loss: 0.12788362801074982 2023-01-24 02:07:53.019100: step: 592/463, loss: 0.09778382629156113 2023-01-24 02:07:53.654928: step: 594/463, loss: 1.2908345460891724 2023-01-24 02:07:54.292392: step: 596/463, loss: 0.07603273540735245 2023-01-24 02:07:54.979804: step: 598/463, loss: 0.16689461469650269 2023-01-24 02:07:55.563906: step: 600/463, loss: 0.036326803267002106 2023-01-24 02:07:56.047815: step: 602/463, loss: 0.00665775453671813 2023-01-24 02:07:56.687636: step: 604/463, loss: 0.11159547418355942 2023-01-24 02:07:57.301365: step: 606/463, loss: 0.03686422482132912 2023-01-24 02:07:57.875753: step: 608/463, loss: 0.1336565911769867 2023-01-24 02:07:58.455041: step: 610/463, loss: 0.18711890280246735 2023-01-24 02:07:59.005856: step: 612/463, loss: 0.019359368830919266 2023-01-24 02:07:59.620916: step: 614/463, loss: 0.06999849528074265 2023-01-24 02:08:00.200045: step: 616/463, loss: 0.15988655388355255 2023-01-24 02:08:00.843357: step: 618/463, loss: 0.026183636859059334 2023-01-24 02:08:01.544981: step: 620/463, loss: 0.03431953489780426 2023-01-24 02:08:02.161793: step: 622/463, loss: 0.03576991334557533 2023-01-24 02:08:02.744103: step: 624/463, loss: 0.04971986263990402 2023-01-24 02:08:03.341273: step: 626/463, loss: 0.08186327666044235 2023-01-24 02:08:03.934560: step: 628/463, loss: 0.4426536560058594 2023-01-24 02:08:04.490747: step: 630/463, loss: 0.060494065284729004 2023-01-24 02:08:05.118086: step: 632/463, loss: 0.10471770167350769 2023-01-24 02:08:05.754975: step: 634/463, loss: 0.08326321095228195 2023-01-24 02:08:06.387707: step: 636/463, loss: 0.07041725516319275 2023-01-24 02:08:07.054698: step: 638/463, loss: 0.06596620380878448 2023-01-24 02:08:07.678167: step: 640/463, loss: 0.06585359573364258 2023-01-24 02:08:08.304206: step: 642/463, loss: 0.0484500490128994 2023-01-24 02:08:08.938144: step: 644/463, loss: 0.11510353535413742 2023-01-24 02:08:09.618851: step: 646/463, loss: 0.13813678920269012 2023-01-24 02:08:10.214075: step: 648/463, loss: 0.16530629992485046 2023-01-24 02:08:10.804075: step: 650/463, loss: 0.1415550410747528 2023-01-24 02:08:11.441917: step: 652/463, loss: 0.11802859604358673 2023-01-24 02:08:12.051709: step: 654/463, loss: 0.07877757400274277 2023-01-24 02:08:12.635763: step: 656/463, loss: 0.04003753885626793 2023-01-24 02:08:13.244267: step: 658/463, loss: 0.017675675451755524 2023-01-24 02:08:13.944347: step: 660/463, loss: 0.10854945331811905 2023-01-24 02:08:14.600930: step: 662/463, loss: 0.2632792294025421 2023-01-24 02:08:15.190490: step: 664/463, loss: 0.08814460784196854 2023-01-24 02:08:15.762362: step: 666/463, loss: 0.187367781996727 2023-01-24 02:08:16.466812: step: 668/463, loss: 0.024920079857110977 2023-01-24 02:08:17.020921: step: 670/463, loss: 0.10781847685575485 2023-01-24 02:08:17.691270: step: 672/463, loss: 0.03659043833613396 2023-01-24 02:08:18.373520: step: 674/463, loss: 0.3417806029319763 2023-01-24 02:08:18.954861: step: 676/463, loss: 0.13100674748420715 2023-01-24 02:08:19.536502: step: 678/463, loss: 0.17757271230220795 2023-01-24 02:08:20.202198: step: 680/463, loss: 0.10012602806091309 2023-01-24 02:08:20.762509: step: 682/463, loss: 0.12737083435058594 2023-01-24 02:08:21.323378: step: 684/463, loss: 0.035889822989702225 2023-01-24 02:08:21.900727: step: 686/463, loss: 0.26752403378486633 2023-01-24 02:08:22.458213: step: 688/463, loss: 0.37662583589553833 2023-01-24 02:08:23.035446: step: 690/463, loss: 0.07922212779521942 2023-01-24 02:08:23.726641: step: 692/463, loss: 0.13277605175971985 2023-01-24 02:08:24.304221: step: 694/463, loss: 0.1505248099565506 2023-01-24 02:08:24.888120: step: 696/463, loss: 0.0532086007297039 2023-01-24 02:08:25.527282: step: 698/463, loss: 0.14752548933029175 2023-01-24 02:08:26.109917: step: 700/463, loss: 0.01776857115328312 2023-01-24 02:08:26.696374: step: 702/463, loss: 0.046205390244722366 2023-01-24 02:08:27.277592: step: 704/463, loss: 0.09470363706350327 2023-01-24 02:08:27.886696: step: 706/463, loss: 0.08440268784761429 2023-01-24 02:08:28.525190: step: 708/463, loss: 0.057107601314783096 2023-01-24 02:08:29.159333: step: 710/463, loss: 0.21635369956493378 2023-01-24 02:08:29.754445: step: 712/463, loss: 0.09460914880037308 2023-01-24 02:08:30.353605: step: 714/463, loss: 0.06444735080003738 2023-01-24 02:08:31.004145: step: 716/463, loss: 0.08464868366718292 2023-01-24 02:08:31.588415: step: 718/463, loss: 0.052589572966098785 2023-01-24 02:08:32.177816: step: 720/463, loss: 0.03821743279695511 2023-01-24 02:08:32.815365: step: 722/463, loss: 0.08025336265563965 2023-01-24 02:08:33.435227: step: 724/463, loss: 0.09823353588581085 2023-01-24 02:08:34.142791: step: 726/463, loss: 0.08961719274520874 2023-01-24 02:08:34.696863: step: 728/463, loss: 0.09130401164293289 2023-01-24 02:08:35.302887: step: 730/463, loss: 0.03249844163656235 2023-01-24 02:08:35.845243: step: 732/463, loss: 0.10508283972740173 2023-01-24 02:08:36.535153: step: 734/463, loss: 0.040572043508291245 2023-01-24 02:08:37.112411: step: 736/463, loss: 0.054280102252960205 2023-01-24 02:08:37.724241: step: 738/463, loss: 0.08831977099180222 2023-01-24 02:08:38.337905: step: 740/463, loss: 0.09214296191930771 2023-01-24 02:08:39.005673: step: 742/463, loss: 0.0737830400466919 2023-01-24 02:08:39.657069: step: 744/463, loss: 0.07194742560386658 2023-01-24 02:08:40.265689: step: 746/463, loss: 0.15930859744548798 2023-01-24 02:08:40.899280: step: 748/463, loss: 0.025986719876527786 2023-01-24 02:08:41.584426: step: 750/463, loss: 0.07572995871305466 2023-01-24 02:08:42.247801: step: 752/463, loss: 0.0746951624751091 2023-01-24 02:08:42.894593: step: 754/463, loss: 0.04142388328909874 2023-01-24 02:08:43.547491: step: 756/463, loss: 0.10964342206716537 2023-01-24 02:08:44.117538: step: 758/463, loss: 0.1708681434392929 2023-01-24 02:08:44.810765: step: 760/463, loss: 0.040345918387174606 2023-01-24 02:08:45.391056: step: 762/463, loss: 0.0728670209646225 2023-01-24 02:08:45.966588: step: 764/463, loss: 0.0607268251478672 2023-01-24 02:08:46.566016: step: 766/463, loss: 0.09249906986951828 2023-01-24 02:08:47.184969: step: 768/463, loss: 5.884969711303711 2023-01-24 02:08:47.758410: step: 770/463, loss: 0.06606926769018173 2023-01-24 02:08:48.366966: step: 772/463, loss: 0.14891093969345093 2023-01-24 02:08:49.001172: step: 774/463, loss: 0.08733320981264114 2023-01-24 02:08:49.590991: step: 776/463, loss: 0.023043058812618256 2023-01-24 02:08:50.209527: step: 778/463, loss: 0.05227816849946976 2023-01-24 02:08:50.808121: step: 780/463, loss: 0.044602736830711365 2023-01-24 02:08:51.419270: step: 782/463, loss: 0.35288313031196594 2023-01-24 02:08:52.043956: step: 784/463, loss: 0.2534416913986206 2023-01-24 02:08:52.623090: step: 786/463, loss: 0.15967564284801483 2023-01-24 02:08:53.237364: step: 788/463, loss: 0.1509036272764206 2023-01-24 02:08:53.883019: step: 790/463, loss: 0.09032673388719559 2023-01-24 02:08:54.510609: step: 792/463, loss: 0.25398415327072144 2023-01-24 02:08:55.140172: step: 794/463, loss: 0.26440057158470154 2023-01-24 02:08:55.800531: step: 796/463, loss: 0.08120163530111313 2023-01-24 02:08:56.402409: step: 798/463, loss: 0.05926734209060669 2023-01-24 02:08:57.046141: step: 800/463, loss: 0.15492422878742218 2023-01-24 02:08:57.646722: step: 802/463, loss: 0.0764470025897026 2023-01-24 02:08:58.253445: step: 804/463, loss: 0.1691388338804245 2023-01-24 02:08:58.829631: step: 806/463, loss: 0.06764068454504013 2023-01-24 02:08:59.542577: step: 808/463, loss: 0.031133485957980156 2023-01-24 02:09:00.114017: step: 810/463, loss: 0.2223028838634491 2023-01-24 02:09:00.701658: step: 812/463, loss: 0.06927622854709625 2023-01-24 02:09:01.303352: step: 814/463, loss: 0.0654175728559494 2023-01-24 02:09:01.875424: step: 816/463, loss: 0.09919803589582443 2023-01-24 02:09:02.568750: step: 818/463, loss: 0.14725886285305023 2023-01-24 02:09:03.284219: step: 820/463, loss: 1.3823851346969604 2023-01-24 02:09:03.871087: step: 822/463, loss: 0.14381439983844757 2023-01-24 02:09:04.467263: step: 824/463, loss: 0.1574321687221527 2023-01-24 02:09:05.109787: step: 826/463, loss: 0.16179075837135315 2023-01-24 02:09:05.687085: step: 828/463, loss: 0.12029152363538742 2023-01-24 02:09:06.348323: step: 830/463, loss: 0.24406377971172333 2023-01-24 02:09:06.920170: step: 832/463, loss: 0.04816749319434166 2023-01-24 02:09:07.445803: step: 834/463, loss: 0.11215320229530334 2023-01-24 02:09:07.973644: step: 836/463, loss: 0.7431363463401794 2023-01-24 02:09:08.591390: step: 838/463, loss: 0.11409196257591248 2023-01-24 02:09:09.116948: step: 840/463, loss: 0.0903141126036644 2023-01-24 02:09:09.691929: step: 842/463, loss: 0.023744821548461914 2023-01-24 02:09:10.329112: step: 844/463, loss: 0.0793248787522316 2023-01-24 02:09:11.036991: step: 846/463, loss: 0.06717075407505035 2023-01-24 02:09:11.663690: step: 848/463, loss: 0.03410543128848076 2023-01-24 02:09:12.287891: step: 850/463, loss: 0.256104052066803 2023-01-24 02:09:12.863519: step: 852/463, loss: 0.15324832499027252 2023-01-24 02:09:13.506452: step: 854/463, loss: 0.07798101752996445 2023-01-24 02:09:14.180981: step: 856/463, loss: 0.10182260721921921 2023-01-24 02:09:14.777057: step: 858/463, loss: 0.14487837255001068 2023-01-24 02:09:15.348920: step: 860/463, loss: 0.08484792709350586 2023-01-24 02:09:15.941469: step: 862/463, loss: 0.10247684270143509 2023-01-24 02:09:16.566869: step: 864/463, loss: 0.0686623752117157 2023-01-24 02:09:17.162122: step: 866/463, loss: 0.013124987483024597 2023-01-24 02:09:17.835424: step: 868/463, loss: 0.11345875263214111 2023-01-24 02:09:18.462068: step: 870/463, loss: 0.02788414992392063 2023-01-24 02:09:19.114498: step: 872/463, loss: 0.05029228329658508 2023-01-24 02:09:19.748905: step: 874/463, loss: 0.036897026002407074 2023-01-24 02:09:20.427275: step: 876/463, loss: 0.09647022932767868 2023-01-24 02:09:21.097910: step: 878/463, loss: 0.1591930389404297 2023-01-24 02:09:21.696790: step: 880/463, loss: 0.3004050850868225 2023-01-24 02:09:22.327536: step: 882/463, loss: 0.05016656965017319 2023-01-24 02:09:22.994603: step: 884/463, loss: 0.062221091240644455 2023-01-24 02:09:23.659055: step: 886/463, loss: 0.045141298323869705 2023-01-24 02:09:24.282508: step: 888/463, loss: 0.6509191393852234 2023-01-24 02:09:24.851971: step: 890/463, loss: 0.09943657368421555 2023-01-24 02:09:25.468856: step: 892/463, loss: 0.058808110654354095 2023-01-24 02:09:26.042735: step: 894/463, loss: 0.3118111193180084 2023-01-24 02:09:26.609334: step: 896/463, loss: 0.4183562695980072 2023-01-24 02:09:27.320750: step: 898/463, loss: 0.19850215315818787 2023-01-24 02:09:27.895906: step: 900/463, loss: 0.061391137540340424 2023-01-24 02:09:28.482998: step: 902/463, loss: 0.1711278259754181 2023-01-24 02:09:29.128437: step: 904/463, loss: 0.046011898666620255 2023-01-24 02:09:29.830350: step: 906/463, loss: 0.039038825780153275 2023-01-24 02:09:30.407898: step: 908/463, loss: 0.007601679768413305 2023-01-24 02:09:31.057099: step: 910/463, loss: 0.30937790870666504 2023-01-24 02:09:31.706454: step: 912/463, loss: 0.039402756839990616 2023-01-24 02:09:32.420679: step: 914/463, loss: 0.05828939005732536 2023-01-24 02:09:33.116444: step: 916/463, loss: 0.09467930346727371 2023-01-24 02:09:33.710581: step: 918/463, loss: 0.2072845995426178 2023-01-24 02:09:34.299986: step: 920/463, loss: 0.05487317591905594 2023-01-24 02:09:34.887783: step: 922/463, loss: 0.09829351305961609 2023-01-24 02:09:35.498264: step: 924/463, loss: 0.07700677961111069 2023-01-24 02:09:36.205877: step: 926/463, loss: 0.6743320226669312 ================================================== Loss: 0.140 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33308608949416346, 'r': 0.3248695445920304, 'f1': 0.3289265129682997}, 'combined': 0.24236690429243135, 'epoch': 17} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33709816419785643, 'r': 0.3841927606666746, 'f1': 0.3591080099702251}, 'combined': 0.2783516632305094, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33645593066331164, 'r': 0.32049502313658906, 'f1': 0.32828158832455284}, 'combined': 0.24189169666019683, 'epoch': 17} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33874384529144946, 'r': 0.3723691534637624, 'f1': 0.3547615052264217}, 'combined': 0.2749826021372264, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3888888888888889, 'r': 0.45652173913043476, 'f1': 0.42}, 'combined': 0.21, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.575, 'r': 0.19827586206896552, 'f1': 0.2948717948717949}, 'combined': 0.19658119658119658, 'epoch': 17} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33308608949416346, 'r': 0.3248695445920304, 'f1': 0.3289265129682997}, 'combined': 0.24236690429243135, 'epoch': 17} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33709816419785643, 'r': 0.3841927606666746, 'f1': 0.3591080099702251}, 'combined': 0.2783516632305094, 'epoch': 17} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3888888888888889, 'r': 0.45652173913043476, 'f1': 0.42}, 'combined': 0.21, 'epoch': 17} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:12:23.516406: step: 2/463, loss: 0.016212042421102524 2023-01-24 02:12:24.184323: step: 4/463, loss: 0.9640198945999146 2023-01-24 02:12:24.742861: step: 6/463, loss: 0.033647093921899796 2023-01-24 02:12:25.355780: step: 8/463, loss: 0.05437152087688446 2023-01-24 02:12:25.961927: step: 10/463, loss: 0.07215385138988495 2023-01-24 02:12:26.555098: step: 12/463, loss: 0.0493975393474102 2023-01-24 02:12:27.081215: step: 14/463, loss: 0.058827850967645645 2023-01-24 02:12:27.743991: step: 16/463, loss: 0.09795331209897995 2023-01-24 02:12:28.396001: step: 18/463, loss: 0.020355330780148506 2023-01-24 02:12:28.964673: step: 20/463, loss: 0.03013799898326397 2023-01-24 02:12:29.602785: step: 22/463, loss: 0.05115118250250816 2023-01-24 02:12:30.213949: step: 24/463, loss: 0.010236588306725025 2023-01-24 02:12:30.790859: step: 26/463, loss: 0.048115234822034836 2023-01-24 02:12:31.450821: step: 28/463, loss: 0.10292096436023712 2023-01-24 02:12:32.079569: step: 30/463, loss: 0.37770336866378784 2023-01-24 02:12:32.662021: step: 32/463, loss: 0.07796694338321686 2023-01-24 02:12:33.257644: step: 34/463, loss: 0.03693462908267975 2023-01-24 02:12:33.867064: step: 36/463, loss: 0.042251937091350555 2023-01-24 02:12:34.554609: step: 38/463, loss: 0.18346579372882843 2023-01-24 02:12:35.179922: step: 40/463, loss: 0.4044155776500702 2023-01-24 02:12:35.753703: step: 42/463, loss: 0.08191284537315369 2023-01-24 02:12:36.353147: step: 44/463, loss: 0.07532992213964462 2023-01-24 02:12:37.003666: step: 46/463, loss: 0.11294304579496384 2023-01-24 02:12:37.598518: step: 48/463, loss: 0.05083438381552696 2023-01-24 02:12:38.237236: step: 50/463, loss: 0.05494581162929535 2023-01-24 02:12:38.810651: step: 52/463, loss: 0.06806744635105133 2023-01-24 02:12:39.457565: step: 54/463, loss: 0.32477229833602905 2023-01-24 02:12:40.068613: step: 56/463, loss: 0.07577423751354218 2023-01-24 02:12:40.670452: step: 58/463, loss: 0.04088554531335831 2023-01-24 02:12:41.281085: step: 60/463, loss: 0.034088753163814545 2023-01-24 02:12:41.869770: step: 62/463, loss: 0.019968640059232712 2023-01-24 02:12:42.507701: step: 64/463, loss: 0.11061062663793564 2023-01-24 02:12:43.098493: step: 66/463, loss: 0.05607874318957329 2023-01-24 02:12:43.695425: step: 68/463, loss: 0.03767048195004463 2023-01-24 02:12:44.293237: step: 70/463, loss: 0.039938606321811676 2023-01-24 02:12:44.953729: step: 72/463, loss: 0.01798577792942524 2023-01-24 02:12:45.549222: step: 74/463, loss: 0.03460743650794029 2023-01-24 02:12:46.144827: step: 76/463, loss: 0.018315061926841736 2023-01-24 02:12:46.700041: step: 78/463, loss: 0.09156130254268646 2023-01-24 02:12:47.255493: step: 80/463, loss: 0.20378705859184265 2023-01-24 02:12:47.906602: step: 82/463, loss: 0.105721615254879 2023-01-24 02:12:48.509357: step: 84/463, loss: 0.560261070728302 2023-01-24 02:12:49.108591: step: 86/463, loss: 0.046659983694553375 2023-01-24 02:12:49.711017: step: 88/463, loss: 0.028879733756184578 2023-01-24 02:12:50.299171: step: 90/463, loss: 0.0708206444978714 2023-01-24 02:12:50.871301: step: 92/463, loss: 0.03559279069304466 2023-01-24 02:12:51.491308: step: 94/463, loss: 0.09447220712900162 2023-01-24 02:12:52.137110: step: 96/463, loss: 0.13801820576190948 2023-01-24 02:12:52.735546: step: 98/463, loss: 0.08263944834470749 2023-01-24 02:12:53.428610: step: 100/463, loss: 0.015989182516932487 2023-01-24 02:12:54.074051: step: 102/463, loss: 0.09653551876544952 2023-01-24 02:12:54.681678: step: 104/463, loss: 0.07864303141832352 2023-01-24 02:12:55.278168: step: 106/463, loss: 0.18088604509830475 2023-01-24 02:12:55.919396: step: 108/463, loss: 0.06670957803726196 2023-01-24 02:12:56.566531: step: 110/463, loss: 0.10206970572471619 2023-01-24 02:12:57.199698: step: 112/463, loss: 0.07119515538215637 2023-01-24 02:12:57.745996: step: 114/463, loss: 0.02532247081398964 2023-01-24 02:12:58.353189: step: 116/463, loss: 1.8201171159744263 2023-01-24 02:12:58.955384: step: 118/463, loss: 0.048613276332616806 2023-01-24 02:12:59.545127: step: 120/463, loss: 0.01938631944358349 2023-01-24 02:13:00.176475: step: 122/463, loss: 0.0514845997095108 2023-01-24 02:13:00.769057: step: 124/463, loss: 0.09292665123939514 2023-01-24 02:13:01.411272: step: 126/463, loss: 0.1530778855085373 2023-01-24 02:13:02.048579: step: 128/463, loss: 0.2171950787305832 2023-01-24 02:13:02.654079: step: 130/463, loss: 0.03372624143958092 2023-01-24 02:13:03.312370: step: 132/463, loss: 0.04612148553133011 2023-01-24 02:13:03.898053: step: 134/463, loss: 0.09772387892007828 2023-01-24 02:13:04.557341: step: 136/463, loss: 0.043327052146196365 2023-01-24 02:13:05.220402: step: 138/463, loss: 0.05234206095337868 2023-01-24 02:13:05.866822: step: 140/463, loss: 0.09337692707777023 2023-01-24 02:13:06.522385: step: 142/463, loss: 0.175821915268898 2023-01-24 02:13:07.200113: step: 144/463, loss: 0.07792326807975769 2023-01-24 02:13:07.873146: step: 146/463, loss: 0.04891330748796463 2023-01-24 02:13:08.572627: step: 148/463, loss: 0.03033485822379589 2023-01-24 02:13:09.203470: step: 150/463, loss: 0.12295791506767273 2023-01-24 02:13:09.802546: step: 152/463, loss: 0.03542214632034302 2023-01-24 02:13:10.411889: step: 154/463, loss: 0.07410010695457458 2023-01-24 02:13:10.994127: step: 156/463, loss: 0.05069688707590103 2023-01-24 02:13:11.594541: step: 158/463, loss: 1.4251694679260254 2023-01-24 02:13:12.186447: step: 160/463, loss: 0.10487470030784607 2023-01-24 02:13:12.899563: step: 162/463, loss: 0.2275792956352234 2023-01-24 02:13:13.448205: step: 164/463, loss: 0.00966346450150013 2023-01-24 02:13:14.016283: step: 166/463, loss: 0.012949720025062561 2023-01-24 02:13:14.640164: step: 168/463, loss: 0.06521248072385788 2023-01-24 02:13:15.279603: step: 170/463, loss: 0.03360576182603836 2023-01-24 02:13:15.882654: step: 172/463, loss: 0.1602082997560501 2023-01-24 02:13:16.438677: step: 174/463, loss: 0.0936887189745903 2023-01-24 02:13:17.060172: step: 176/463, loss: 0.10705532133579254 2023-01-24 02:13:17.682455: step: 178/463, loss: 0.04392546787858009 2023-01-24 02:13:18.317997: step: 180/463, loss: 0.00891173630952835 2023-01-24 02:13:18.988704: step: 182/463, loss: 0.08424145728349686 2023-01-24 02:13:19.671583: step: 184/463, loss: 0.025389881804585457 2023-01-24 02:13:20.270983: step: 186/463, loss: 0.06496134400367737 2023-01-24 02:13:20.919926: step: 188/463, loss: 0.028190920129418373 2023-01-24 02:13:21.574985: step: 190/463, loss: 0.0437450185418129 2023-01-24 02:13:22.187760: step: 192/463, loss: 0.07979384809732437 2023-01-24 02:13:22.784188: step: 194/463, loss: 0.01194043830037117 2023-01-24 02:13:23.449719: step: 196/463, loss: 0.07126058638095856 2023-01-24 02:13:24.050943: step: 198/463, loss: 0.31532159447669983 2023-01-24 02:13:24.665538: step: 200/463, loss: 0.04363734647631645 2023-01-24 02:13:25.211661: step: 202/463, loss: 0.011161845177412033 2023-01-24 02:13:25.861855: step: 204/463, loss: 0.18528276681900024 2023-01-24 02:13:26.484590: step: 206/463, loss: 0.17572219669818878 2023-01-24 02:13:27.116138: step: 208/463, loss: 0.04270785301923752 2023-01-24 02:13:27.693732: step: 210/463, loss: 0.3265828788280487 2023-01-24 02:13:28.362941: step: 212/463, loss: 0.24936145544052124 2023-01-24 02:13:28.979751: step: 214/463, loss: 0.0223789531737566 2023-01-24 02:13:29.572970: step: 216/463, loss: 0.10817316919565201 2023-01-24 02:13:30.199978: step: 218/463, loss: 0.05690561607480049 2023-01-24 02:13:30.873518: step: 220/463, loss: 0.16372817754745483 2023-01-24 02:13:31.415052: step: 222/463, loss: 0.01704958640038967 2023-01-24 02:13:32.100229: step: 224/463, loss: 0.03656533360481262 2023-01-24 02:13:32.683323: step: 226/463, loss: 0.05308743566274643 2023-01-24 02:13:33.326095: step: 228/463, loss: 0.1447989046573639 2023-01-24 02:13:33.898012: step: 230/463, loss: 0.015387726947665215 2023-01-24 02:13:34.457625: step: 232/463, loss: 0.032462846487760544 2023-01-24 02:13:35.139593: step: 234/463, loss: 0.05053481459617615 2023-01-24 02:13:35.806131: step: 236/463, loss: 0.03542039170861244 2023-01-24 02:13:36.395515: step: 238/463, loss: 0.0714607983827591 2023-01-24 02:13:36.980637: step: 240/463, loss: 0.17304441332817078 2023-01-24 02:13:37.589126: step: 242/463, loss: 0.028681520372629166 2023-01-24 02:13:38.163276: step: 244/463, loss: 0.23255522549152374 2023-01-24 02:13:38.786396: step: 246/463, loss: 0.11861496418714523 2023-01-24 02:13:39.352230: step: 248/463, loss: 0.0869048461318016 2023-01-24 02:13:40.007943: step: 250/463, loss: 0.062077596783638 2023-01-24 02:13:40.634799: step: 252/463, loss: 0.12153871357440948 2023-01-24 02:13:41.220528: step: 254/463, loss: 0.12055100500583649 2023-01-24 02:13:41.740669: step: 256/463, loss: 0.038677528500556946 2023-01-24 02:13:42.376969: step: 258/463, loss: 0.28849631547927856 2023-01-24 02:13:42.969428: step: 260/463, loss: 0.053183041512966156 2023-01-24 02:13:43.552976: step: 262/463, loss: 0.11701661348342896 2023-01-24 02:13:44.199837: step: 264/463, loss: 0.15511856973171234 2023-01-24 02:13:44.825016: step: 266/463, loss: 0.03338862583041191 2023-01-24 02:13:45.466021: step: 268/463, loss: 0.271430104970932 2023-01-24 02:13:46.058987: step: 270/463, loss: 0.1402658075094223 2023-01-24 02:13:46.683133: step: 272/463, loss: 0.05355101078748703 2023-01-24 02:13:47.316628: step: 274/463, loss: 0.05734524503350258 2023-01-24 02:13:47.911579: step: 276/463, loss: 0.11237657070159912 2023-01-24 02:13:48.548485: step: 278/463, loss: 0.025945687666535378 2023-01-24 02:13:49.136407: step: 280/463, loss: 0.2567163109779358 2023-01-24 02:13:49.731104: step: 282/463, loss: 0.053242068737745285 2023-01-24 02:13:50.325920: step: 284/463, loss: 0.03981756046414375 2023-01-24 02:13:50.918420: step: 286/463, loss: 0.07972758263349533 2023-01-24 02:13:51.506069: step: 288/463, loss: 0.12114940583705902 2023-01-24 02:13:52.117917: step: 290/463, loss: 0.06699173897504807 2023-01-24 02:13:52.706088: step: 292/463, loss: 0.18497785925865173 2023-01-24 02:13:53.295085: step: 294/463, loss: 0.08582198619842529 2023-01-24 02:13:53.920058: step: 296/463, loss: 0.1405123770236969 2023-01-24 02:13:54.550325: step: 298/463, loss: 0.12506988644599915 2023-01-24 02:13:55.183282: step: 300/463, loss: 0.0543283186852932 2023-01-24 02:13:55.785401: step: 302/463, loss: 0.0586734302341938 2023-01-24 02:13:56.383807: step: 304/463, loss: 0.08253441005945206 2023-01-24 02:13:57.021800: step: 306/463, loss: 0.05644260719418526 2023-01-24 02:13:57.619095: step: 308/463, loss: 0.08561031520366669 2023-01-24 02:13:58.302970: step: 310/463, loss: 0.045978374779224396 2023-01-24 02:13:58.920171: step: 312/463, loss: 0.06379242986440659 2023-01-24 02:13:59.584205: step: 314/463, loss: 0.07658006250858307 2023-01-24 02:14:00.124032: step: 316/463, loss: 0.03514092415571213 2023-01-24 02:14:00.739947: step: 318/463, loss: 0.01809585653245449 2023-01-24 02:14:01.356250: step: 320/463, loss: 0.04561099037528038 2023-01-24 02:14:01.918801: step: 322/463, loss: 0.09169846028089523 2023-01-24 02:14:02.496179: step: 324/463, loss: 0.03276433050632477 2023-01-24 02:14:03.205643: step: 326/463, loss: 0.02641312964260578 2023-01-24 02:14:03.822118: step: 328/463, loss: 0.012321699410676956 2023-01-24 02:14:04.421136: step: 330/463, loss: 0.04649710655212402 2023-01-24 02:14:05.028752: step: 332/463, loss: 0.13455890119075775 2023-01-24 02:14:05.648856: step: 334/463, loss: 0.29181528091430664 2023-01-24 02:14:06.250325: step: 336/463, loss: 0.4780136048793793 2023-01-24 02:14:06.953713: step: 338/463, loss: 0.04577526077628136 2023-01-24 02:14:07.629388: step: 340/463, loss: 0.025612493976950645 2023-01-24 02:14:08.217576: step: 342/463, loss: 0.6556715369224548 2023-01-24 02:14:08.826204: step: 344/463, loss: 0.06692571938037872 2023-01-24 02:14:09.495033: step: 346/463, loss: 0.08582320809364319 2023-01-24 02:14:10.111587: step: 348/463, loss: 0.034909769892692566 2023-01-24 02:14:10.764494: step: 350/463, loss: 0.059673845767974854 2023-01-24 02:14:11.335205: step: 352/463, loss: 0.037033919245004654 2023-01-24 02:14:11.889567: step: 354/463, loss: 0.06435783207416534 2023-01-24 02:14:12.489494: step: 356/463, loss: 0.07478026300668716 2023-01-24 02:14:13.116778: step: 358/463, loss: 0.4015183746814728 2023-01-24 02:14:13.800291: step: 360/463, loss: 0.10015476495027542 2023-01-24 02:14:14.418888: step: 362/463, loss: 0.3877939283847809 2023-01-24 02:14:15.012368: step: 364/463, loss: 0.13038840889930725 2023-01-24 02:14:15.642627: step: 366/463, loss: 0.01645221933722496 2023-01-24 02:14:16.277810: step: 368/463, loss: 0.013122377917170525 2023-01-24 02:14:16.893211: step: 370/463, loss: 0.058484334498643875 2023-01-24 02:14:17.485666: step: 372/463, loss: 0.036616064608097076 2023-01-24 02:14:18.097041: step: 374/463, loss: 0.019679535180330276 2023-01-24 02:14:18.679317: step: 376/463, loss: 0.10396323353052139 2023-01-24 02:14:19.251273: step: 378/463, loss: 0.07096651941537857 2023-01-24 02:14:19.908073: step: 380/463, loss: 0.038530830293893814 2023-01-24 02:14:20.491094: step: 382/463, loss: 0.056489501148462296 2023-01-24 02:14:21.082602: step: 384/463, loss: 0.053779445588588715 2023-01-24 02:14:21.657627: step: 386/463, loss: 0.04143083095550537 2023-01-24 02:14:22.268907: step: 388/463, loss: 0.05594902113080025 2023-01-24 02:14:22.908142: step: 390/463, loss: 0.0597904734313488 2023-01-24 02:14:23.530693: step: 392/463, loss: 0.16412444412708282 2023-01-24 02:14:24.192373: step: 394/463, loss: 0.22092363238334656 2023-01-24 02:14:24.835544: step: 396/463, loss: 0.1443624645471573 2023-01-24 02:14:25.517577: step: 398/463, loss: 0.15668828785419464 2023-01-24 02:14:26.139265: step: 400/463, loss: 0.09394413232803345 2023-01-24 02:14:26.747624: step: 402/463, loss: 0.2287728488445282 2023-01-24 02:14:27.429715: step: 404/463, loss: 1.1928030252456665 2023-01-24 02:14:28.036655: step: 406/463, loss: 0.03721761330962181 2023-01-24 02:14:28.650606: step: 408/463, loss: 0.14985324442386627 2023-01-24 02:14:29.266408: step: 410/463, loss: 0.06386313587427139 2023-01-24 02:14:29.813830: step: 412/463, loss: 0.038746338337659836 2023-01-24 02:14:30.405946: step: 414/463, loss: 0.04794111102819443 2023-01-24 02:14:31.034229: step: 416/463, loss: 0.03729976713657379 2023-01-24 02:14:31.603232: step: 418/463, loss: 0.12474322319030762 2023-01-24 02:14:32.250507: step: 420/463, loss: 0.1808999478816986 2023-01-24 02:14:32.867116: step: 422/463, loss: 0.01970197632908821 2023-01-24 02:14:33.488144: step: 424/463, loss: 0.2853885889053345 2023-01-24 02:14:34.074933: step: 426/463, loss: 0.6189596056938171 2023-01-24 02:14:34.655018: step: 428/463, loss: 0.03852459043264389 2023-01-24 02:14:35.205998: step: 430/463, loss: 0.02537391521036625 2023-01-24 02:14:35.796402: step: 432/463, loss: 0.04569804668426514 2023-01-24 02:14:36.442958: step: 434/463, loss: 0.12830261886119843 2023-01-24 02:14:37.122168: step: 436/463, loss: 0.3350285291671753 2023-01-24 02:14:37.772433: step: 438/463, loss: 0.1817103922367096 2023-01-24 02:14:38.533209: step: 440/463, loss: 0.08359193801879883 2023-01-24 02:14:39.072937: step: 442/463, loss: 0.13854703307151794 2023-01-24 02:14:39.719506: step: 444/463, loss: 0.11180835217237473 2023-01-24 02:14:40.254782: step: 446/463, loss: 0.0782872810959816 2023-01-24 02:14:40.835378: step: 448/463, loss: 0.22110621631145477 2023-01-24 02:14:41.441351: step: 450/463, loss: 0.06191756948828697 2023-01-24 02:14:42.097226: step: 452/463, loss: 0.2135303020477295 2023-01-24 02:14:42.724707: step: 454/463, loss: 0.033807553350925446 2023-01-24 02:14:43.367518: step: 456/463, loss: 0.25119462609291077 2023-01-24 02:14:43.994374: step: 458/463, loss: 0.05710972100496292 2023-01-24 02:14:44.544031: step: 460/463, loss: 0.08755708485841751 2023-01-24 02:14:45.138155: step: 462/463, loss: 0.07173552364110947 2023-01-24 02:14:45.806042: step: 464/463, loss: 0.1192893236875534 2023-01-24 02:14:46.404593: step: 466/463, loss: 1.0184739828109741 2023-01-24 02:14:47.021979: step: 468/463, loss: 0.31705155968666077 2023-01-24 02:14:47.599184: step: 470/463, loss: 0.017777632921934128 2023-01-24 02:14:48.256922: step: 472/463, loss: 0.06604321300983429 2023-01-24 02:14:48.848880: step: 474/463, loss: 0.062269970774650574 2023-01-24 02:14:49.413122: step: 476/463, loss: 0.07097317278385162 2023-01-24 02:14:50.096771: step: 478/463, loss: 0.06228772923350334 2023-01-24 02:14:50.673117: step: 480/463, loss: 0.8452541828155518 2023-01-24 02:14:51.335484: step: 482/463, loss: 0.18494856357574463 2023-01-24 02:14:51.951783: step: 484/463, loss: 0.08495352417230606 2023-01-24 02:14:52.477572: step: 486/463, loss: 0.037212520837783813 2023-01-24 02:14:53.136045: step: 488/463, loss: 0.04034935683012009 2023-01-24 02:14:53.834256: step: 490/463, loss: 0.04735064506530762 2023-01-24 02:14:54.452113: step: 492/463, loss: 0.0665123462677002 2023-01-24 02:14:55.115601: step: 494/463, loss: 0.04500744864344597 2023-01-24 02:14:55.686613: step: 496/463, loss: 0.08867621421813965 2023-01-24 02:14:56.272631: step: 498/463, loss: 0.09523636102676392 2023-01-24 02:14:56.875073: step: 500/463, loss: 0.128495991230011 2023-01-24 02:14:57.422835: step: 502/463, loss: 0.26585331559181213 2023-01-24 02:14:57.996989: step: 504/463, loss: 0.09859692305326462 2023-01-24 02:14:58.600920: step: 506/463, loss: 0.05489655211567879 2023-01-24 02:14:59.236020: step: 508/463, loss: 0.07878757268190384 2023-01-24 02:14:59.981203: step: 510/463, loss: 0.09481282532215118 2023-01-24 02:15:00.629422: step: 512/463, loss: 0.14488010108470917 2023-01-24 02:15:01.193627: step: 514/463, loss: 0.22927995026111603 2023-01-24 02:15:01.847716: step: 516/463, loss: 0.03839798644185066 2023-01-24 02:15:02.526276: step: 518/463, loss: 0.08230244368314743 2023-01-24 02:15:03.179146: step: 520/463, loss: 0.03852389380335808 2023-01-24 02:15:03.831656: step: 522/463, loss: 0.07344438135623932 2023-01-24 02:15:04.474416: step: 524/463, loss: 0.13041947782039642 2023-01-24 02:15:05.124129: step: 526/463, loss: 0.08121833950281143 2023-01-24 02:15:05.756295: step: 528/463, loss: 0.0040967268869280815 2023-01-24 02:15:06.392676: step: 530/463, loss: 0.04251902177929878 2023-01-24 02:15:06.967234: step: 532/463, loss: 0.03161359205842018 2023-01-24 02:15:07.544346: step: 534/463, loss: 0.02550979144871235 2023-01-24 02:15:08.169250: step: 536/463, loss: 0.044608063995838165 2023-01-24 02:15:08.761137: step: 538/463, loss: 0.12192875891923904 2023-01-24 02:15:09.377008: step: 540/463, loss: 0.06912586838006973 2023-01-24 02:15:09.951028: step: 542/463, loss: 0.2503063678741455 2023-01-24 02:15:10.566128: step: 544/463, loss: 0.041538435965776443 2023-01-24 02:15:11.146018: step: 546/463, loss: 0.010474284179508686 2023-01-24 02:15:11.815067: step: 548/463, loss: 0.03558509051799774 2023-01-24 02:15:12.393724: step: 550/463, loss: 0.16151836514472961 2023-01-24 02:15:13.025098: step: 552/463, loss: 0.11909962445497513 2023-01-24 02:15:13.704533: step: 554/463, loss: 0.08550700545310974 2023-01-24 02:15:14.358754: step: 556/463, loss: 0.08892244845628738 2023-01-24 02:15:14.968867: step: 558/463, loss: 0.020676758140325546 2023-01-24 02:15:15.524504: step: 560/463, loss: 0.0018308733124285936 2023-01-24 02:15:16.122650: step: 562/463, loss: 0.08395480364561081 2023-01-24 02:15:16.807830: step: 564/463, loss: 0.07625464349985123 2023-01-24 02:15:17.422952: step: 566/463, loss: 0.07002590596675873 2023-01-24 02:15:18.136477: step: 568/463, loss: 0.1519494354724884 2023-01-24 02:15:18.767886: step: 570/463, loss: 0.1494431048631668 2023-01-24 02:15:19.410917: step: 572/463, loss: 0.04557216167449951 2023-01-24 02:15:20.087685: step: 574/463, loss: 0.1067340224981308 2023-01-24 02:15:20.764691: step: 576/463, loss: 0.22042644023895264 2023-01-24 02:15:21.413494: step: 578/463, loss: 0.08238231390714645 2023-01-24 02:15:22.049053: step: 580/463, loss: 0.07242780178785324 2023-01-24 02:15:22.681238: step: 582/463, loss: 0.099951833486557 2023-01-24 02:15:23.338535: step: 584/463, loss: 0.04533222317695618 2023-01-24 02:15:23.928490: step: 586/463, loss: 0.030246000736951828 2023-01-24 02:15:24.533455: step: 588/463, loss: 0.771239697933197 2023-01-24 02:15:25.163460: step: 590/463, loss: 0.07724495977163315 2023-01-24 02:15:25.766969: step: 592/463, loss: 0.07560603320598602 2023-01-24 02:15:26.413656: step: 594/463, loss: 0.09971284121274948 2023-01-24 02:15:27.019732: step: 596/463, loss: 0.05480746924877167 2023-01-24 02:15:27.657096: step: 598/463, loss: 0.0808015763759613 2023-01-24 02:15:28.303238: step: 600/463, loss: 0.09976930916309357 2023-01-24 02:15:28.939444: step: 602/463, loss: 0.24950411915779114 2023-01-24 02:15:29.596098: step: 604/463, loss: 0.2291785180568695 2023-01-24 02:15:30.296462: step: 606/463, loss: 0.03308527544140816 2023-01-24 02:15:30.889251: step: 608/463, loss: 0.13219588994979858 2023-01-24 02:15:31.481416: step: 610/463, loss: 0.2507569491863251 2023-01-24 02:15:32.069768: step: 612/463, loss: 0.03770134225487709 2023-01-24 02:15:32.740758: step: 614/463, loss: 0.19599707424640656 2023-01-24 02:15:33.455046: step: 616/463, loss: 0.257695734500885 2023-01-24 02:15:34.126920: step: 618/463, loss: 0.2742365896701813 2023-01-24 02:15:34.774087: step: 620/463, loss: 0.08532330393791199 2023-01-24 02:15:35.330535: step: 622/463, loss: 0.02215675637125969 2023-01-24 02:15:36.034476: step: 624/463, loss: 0.3949394226074219 2023-01-24 02:15:36.849971: step: 626/463, loss: 0.06408332288265228 2023-01-24 02:15:37.501692: step: 628/463, loss: 0.05704617127776146 2023-01-24 02:15:38.106462: step: 630/463, loss: 0.36017370223999023 2023-01-24 02:15:38.747374: step: 632/463, loss: 0.0625026598572731 2023-01-24 02:15:39.509226: step: 634/463, loss: 0.20242901146411896 2023-01-24 02:15:40.106963: step: 636/463, loss: 0.07182762771844864 2023-01-24 02:15:40.767189: step: 638/463, loss: 0.01648804545402527 2023-01-24 02:15:41.396353: step: 640/463, loss: 0.03705969080328941 2023-01-24 02:15:42.044063: step: 642/463, loss: 0.09607403725385666 2023-01-24 02:15:42.674578: step: 644/463, loss: 0.08543264120817184 2023-01-24 02:15:43.278934: step: 646/463, loss: 0.03443874791264534 2023-01-24 02:15:43.924782: step: 648/463, loss: 0.04173902049660683 2023-01-24 02:15:44.492421: step: 650/463, loss: 0.10508611053228378 2023-01-24 02:15:45.184068: step: 652/463, loss: 0.04642002284526825 2023-01-24 02:15:45.840949: step: 654/463, loss: 0.023942647501826286 2023-01-24 02:15:46.455130: step: 656/463, loss: 0.17312268912792206 2023-01-24 02:15:47.062245: step: 658/463, loss: 0.058510906994342804 2023-01-24 02:15:47.587866: step: 660/463, loss: 0.09554650634527206 2023-01-24 02:15:48.225876: step: 662/463, loss: 0.10357148945331573 2023-01-24 02:15:48.897720: step: 664/463, loss: 0.10483022034168243 2023-01-24 02:15:49.509542: step: 666/463, loss: 0.04725139960646629 2023-01-24 02:15:50.154124: step: 668/463, loss: 0.08748379349708557 2023-01-24 02:15:50.742025: step: 670/463, loss: 0.02116716280579567 2023-01-24 02:15:51.343092: step: 672/463, loss: 0.04394834116101265 2023-01-24 02:15:51.980790: step: 674/463, loss: 0.2515764534473419 2023-01-24 02:15:52.622420: step: 676/463, loss: 0.050028614699840546 2023-01-24 02:15:53.269067: step: 678/463, loss: 0.12193179130554199 2023-01-24 02:15:53.941026: step: 680/463, loss: 0.08774694800376892 2023-01-24 02:15:54.567277: step: 682/463, loss: 0.2395859658718109 2023-01-24 02:15:55.173693: step: 684/463, loss: 0.20482489466667175 2023-01-24 02:15:55.803338: step: 686/463, loss: 0.0740254819393158 2023-01-24 02:15:56.470827: step: 688/463, loss: 0.1632409244775772 2023-01-24 02:15:57.098111: step: 690/463, loss: 0.05424309894442558 2023-01-24 02:15:57.720836: step: 692/463, loss: 0.14362530410289764 2023-01-24 02:15:58.361902: step: 694/463, loss: 0.06453726440668106 2023-01-24 02:15:58.976120: step: 696/463, loss: 0.08554147183895111 2023-01-24 02:15:59.630048: step: 698/463, loss: 0.0859757587313652 2023-01-24 02:16:00.237617: step: 700/463, loss: 0.2012406885623932 2023-01-24 02:16:00.844906: step: 702/463, loss: 0.16985918581485748 2023-01-24 02:16:01.473297: step: 704/463, loss: 0.10331309586763382 2023-01-24 02:16:02.078022: step: 706/463, loss: 0.07001883536577225 2023-01-24 02:16:02.712110: step: 708/463, loss: 0.3086925745010376 2023-01-24 02:16:03.354204: step: 710/463, loss: 0.06393751502037048 2023-01-24 02:16:03.908858: step: 712/463, loss: 0.02271709032356739 2023-01-24 02:16:04.553840: step: 714/463, loss: 0.046378519386053085 2023-01-24 02:16:05.159842: step: 716/463, loss: 0.1023581251502037 2023-01-24 02:16:05.767995: step: 718/463, loss: 0.5596896409988403 2023-01-24 02:16:06.333530: step: 720/463, loss: 0.4274919033050537 2023-01-24 02:16:06.947239: step: 722/463, loss: 0.0569419339299202 2023-01-24 02:16:07.534311: step: 724/463, loss: 0.057337820529937744 2023-01-24 02:16:08.232290: step: 726/463, loss: 0.06475400179624557 2023-01-24 02:16:08.820484: step: 728/463, loss: 0.22655194997787476 2023-01-24 02:16:09.410483: step: 730/463, loss: 0.043507952243089676 2023-01-24 02:16:10.001369: step: 732/463, loss: 0.07175178825855255 2023-01-24 02:16:10.689180: step: 734/463, loss: 0.06373856961727142 2023-01-24 02:16:11.282419: step: 736/463, loss: 0.2925908863544464 2023-01-24 02:16:11.940185: step: 738/463, loss: 0.05292638763785362 2023-01-24 02:16:12.604766: step: 740/463, loss: 0.03942211717367172 2023-01-24 02:16:13.249510: step: 742/463, loss: 0.04859014227986336 2023-01-24 02:16:13.857274: step: 744/463, loss: 0.02931671403348446 2023-01-24 02:16:14.484866: step: 746/463, loss: 0.08924669772386551 2023-01-24 02:16:15.102156: step: 748/463, loss: 0.04821545630693436 2023-01-24 02:16:15.687402: step: 750/463, loss: 0.09331602603197098 2023-01-24 02:16:16.334530: step: 752/463, loss: 0.06827334314584732 2023-01-24 02:16:16.927720: step: 754/463, loss: 0.2204732447862625 2023-01-24 02:16:17.575338: step: 756/463, loss: 0.03531024977564812 2023-01-24 02:16:18.153703: step: 758/463, loss: 0.006756413262337446 2023-01-24 02:16:18.752199: step: 760/463, loss: 0.009446326643228531 2023-01-24 02:16:19.342619: step: 762/463, loss: 0.027397599071264267 2023-01-24 02:16:19.940744: step: 764/463, loss: 0.12816797196865082 2023-01-24 02:16:20.551486: step: 766/463, loss: 0.0505727082490921 2023-01-24 02:16:21.260853: step: 768/463, loss: 0.174888014793396 2023-01-24 02:16:21.867823: step: 770/463, loss: 0.07031068950891495 2023-01-24 02:16:22.443542: step: 772/463, loss: 0.06723949313163757 2023-01-24 02:16:23.050281: step: 774/463, loss: 0.05044790729880333 2023-01-24 02:16:23.613373: step: 776/463, loss: 0.00863361544907093 2023-01-24 02:16:24.238692: step: 778/463, loss: 0.028967279940843582 2023-01-24 02:16:24.875465: step: 780/463, loss: 0.040094662457704544 2023-01-24 02:16:25.520874: step: 782/463, loss: 0.1270979940891266 2023-01-24 02:16:26.118484: step: 784/463, loss: 0.03343029320240021 2023-01-24 02:16:26.667681: step: 786/463, loss: 0.0570879690349102 2023-01-24 02:16:27.296891: step: 788/463, loss: 0.10782936215400696 2023-01-24 02:16:27.900052: step: 790/463, loss: 0.4224686324596405 2023-01-24 02:16:28.497078: step: 792/463, loss: 0.025654686614871025 2023-01-24 02:16:29.081838: step: 794/463, loss: 0.12931935489177704 2023-01-24 02:16:29.664500: step: 796/463, loss: 0.6991729736328125 2023-01-24 02:16:30.282264: step: 798/463, loss: 0.055386822670698166 2023-01-24 02:16:30.902791: step: 800/463, loss: 0.0575665645301342 2023-01-24 02:16:31.516311: step: 802/463, loss: 0.1003890335559845 2023-01-24 02:16:32.112646: step: 804/463, loss: 0.04175858199596405 2023-01-24 02:16:32.653360: step: 806/463, loss: 0.05459296330809593 2023-01-24 02:16:33.343396: step: 808/463, loss: 0.08588755875825882 2023-01-24 02:16:33.981388: step: 810/463, loss: 0.09460239112377167 2023-01-24 02:16:34.538230: step: 812/463, loss: 0.0710568055510521 2023-01-24 02:16:35.240391: step: 814/463, loss: 0.06560572236776352 2023-01-24 02:16:35.885018: step: 816/463, loss: 0.04303162544965744 2023-01-24 02:16:36.503695: step: 818/463, loss: 0.06561961024999619 2023-01-24 02:16:37.165420: step: 820/463, loss: 0.05641253665089607 2023-01-24 02:16:37.786490: step: 822/463, loss: 0.09744884818792343 2023-01-24 02:16:38.419906: step: 824/463, loss: 0.035097185522317886 2023-01-24 02:16:39.044373: step: 826/463, loss: 0.14613856375217438 2023-01-24 02:16:39.684282: step: 828/463, loss: 0.1462436467409134 2023-01-24 02:16:40.315604: step: 830/463, loss: 0.06405829638242722 2023-01-24 02:16:40.950515: step: 832/463, loss: 0.07300765812397003 2023-01-24 02:16:41.506484: step: 834/463, loss: 0.07874969393014908 2023-01-24 02:16:42.087584: step: 836/463, loss: 0.21054033935070038 2023-01-24 02:16:42.740464: step: 838/463, loss: 0.08521483093500137 2023-01-24 02:16:43.358253: step: 840/463, loss: 0.05369347706437111 2023-01-24 02:16:44.024574: step: 842/463, loss: 0.075861357152462 2023-01-24 02:16:44.640608: step: 844/463, loss: 0.10476447641849518 2023-01-24 02:16:45.223478: step: 846/463, loss: 0.10134749859571457 2023-01-24 02:16:45.888397: step: 848/463, loss: 0.01853303797543049 2023-01-24 02:16:46.454158: step: 850/463, loss: 0.09675605595111847 2023-01-24 02:16:47.066890: step: 852/463, loss: 0.04180556535720825 2023-01-24 02:16:47.617246: step: 854/463, loss: 0.016176365315914154 2023-01-24 02:16:48.269921: step: 856/463, loss: 0.07607618719339371 2023-01-24 02:16:48.897809: step: 858/463, loss: 0.02337942086160183 2023-01-24 02:16:49.511902: step: 860/463, loss: 0.055050596594810486 2023-01-24 02:16:50.101616: step: 862/463, loss: 0.20243799686431885 2023-01-24 02:16:50.688840: step: 864/463, loss: 0.10927688330411911 2023-01-24 02:16:51.284345: step: 866/463, loss: 0.09429860860109329 2023-01-24 02:16:51.831435: step: 868/463, loss: 0.05700754374265671 2023-01-24 02:16:52.421768: step: 870/463, loss: 0.13355594873428345 2023-01-24 02:16:53.017381: step: 872/463, loss: 0.03869052976369858 2023-01-24 02:16:53.677830: step: 874/463, loss: 0.04819696396589279 2023-01-24 02:16:54.360301: step: 876/463, loss: 0.07907703518867493 2023-01-24 02:16:55.065615: step: 878/463, loss: 0.31548523902893066 2023-01-24 02:16:55.691498: step: 880/463, loss: 0.17865057289600372 2023-01-24 02:16:56.366687: step: 882/463, loss: 0.20975464582443237 2023-01-24 02:16:57.086852: step: 884/463, loss: 0.041771065443754196 2023-01-24 02:16:57.685518: step: 886/463, loss: 0.10529671609401703 2023-01-24 02:16:58.328574: step: 888/463, loss: 0.10200495272874832 2023-01-24 02:16:58.956956: step: 890/463, loss: 0.12388349324464798 2023-01-24 02:16:59.538189: step: 892/463, loss: 0.1299099624156952 2023-01-24 02:17:00.132435: step: 894/463, loss: 0.03455130010843277 2023-01-24 02:17:00.713861: step: 896/463, loss: 0.17462481558322906 2023-01-24 02:17:01.346639: step: 898/463, loss: 0.06822777539491653 2023-01-24 02:17:01.956567: step: 900/463, loss: 0.08120286464691162 2023-01-24 02:17:02.643658: step: 902/463, loss: 0.12477584928274155 2023-01-24 02:17:03.326328: step: 904/463, loss: 0.031804341822862625 2023-01-24 02:17:03.937947: step: 906/463, loss: 0.14419203996658325 2023-01-24 02:17:04.533294: step: 908/463, loss: 0.08351092785596848 2023-01-24 02:17:05.180863: step: 910/463, loss: 0.1912321001291275 2023-01-24 02:17:05.740594: step: 912/463, loss: 0.03968960419297218 2023-01-24 02:17:06.285790: step: 914/463, loss: 0.04105373099446297 2023-01-24 02:17:06.869506: step: 916/463, loss: 0.22343170642852783 2023-01-24 02:17:07.463660: step: 918/463, loss: 0.08241358399391174 2023-01-24 02:17:08.042844: step: 920/463, loss: 0.058043330907821655 2023-01-24 02:17:08.624276: step: 922/463, loss: 0.008563480339944363 2023-01-24 02:17:09.237058: step: 924/463, loss: 0.03655325248837471 2023-01-24 02:17:09.851332: step: 926/463, loss: 0.07631516456604004 ================================================== Loss: 0.117 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3480783752860412, 'r': 0.3137328809504166, 'f1': 0.3300144276664063}, 'combined': 0.24316852564893096, 'epoch': 18} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35317387212579293, 'r': 0.3671320306748822, 'f1': 0.3600177101165136}, 'combined': 0.27905678965969, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33248493849005734, 'r': 0.32554502516294037, 'f1': 0.32897838592688317}, 'combined': 0.24240512647244022, 'epoch': 18} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.338332257970747, 'r': 0.3706728414532449, 'f1': 0.353764957457132}, 'combined': 0.2742101584117483, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3408352295211713, 'r': 0.31949260604071844, 'f1': 0.3298190076071667}, 'combined': 0.24302453192107018, 'epoch': 18} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3382642313507632, 'r': 0.35598579494174987, 'f1': 0.34689883107624175}, 'combined': 0.26888808915957496, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3448275862068966, 'r': 0.2857142857142857, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4318181818181818, 'r': 0.41304347826086957, 'f1': 0.4222222222222222}, 'combined': 0.2111111111111111, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 18} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33248493849005734, 'r': 0.32554502516294037, 'f1': 0.32897838592688317}, 'combined': 0.24240512647244022, 'epoch': 18} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.338332257970747, 'r': 0.3706728414532449, 'f1': 0.353764957457132}, 'combined': 0.2742101584117483, 'epoch': 18} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4318181818181818, 'r': 0.41304347826086957, 'f1': 0.4222222222222222}, 'combined': 0.2111111111111111, 'epoch': 18} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:19:52.468080: step: 2/463, loss: 0.09843173623085022 2023-01-24 02:19:53.064701: step: 4/463, loss: 0.10880041122436523 2023-01-24 02:19:53.738501: step: 6/463, loss: 0.08595127612352371 2023-01-24 02:19:54.351938: step: 8/463, loss: 0.017172206193208694 2023-01-24 02:19:55.020893: step: 10/463, loss: 0.047578807920217514 2023-01-24 02:19:55.677506: step: 12/463, loss: 0.021150914952158928 2023-01-24 02:19:56.307939: step: 14/463, loss: 0.056047938764095306 2023-01-24 02:19:56.998403: step: 16/463, loss: 0.1916477531194687 2023-01-24 02:19:57.581218: step: 18/463, loss: 0.0209858026355505 2023-01-24 02:19:58.155274: step: 20/463, loss: 0.07440309226512909 2023-01-24 02:19:58.786381: step: 22/463, loss: 0.13617275655269623 2023-01-24 02:19:59.393532: step: 24/463, loss: 0.3860137462615967 2023-01-24 02:20:00.044546: step: 26/463, loss: 0.01971534825861454 2023-01-24 02:20:00.639118: step: 28/463, loss: 0.10272450745105743 2023-01-24 02:20:01.306481: step: 30/463, loss: 0.020555952563881874 2023-01-24 02:20:01.991099: step: 32/463, loss: 0.05352829024195671 2023-01-24 02:20:02.614724: step: 34/463, loss: 0.040844038128852844 2023-01-24 02:20:03.282984: step: 36/463, loss: 0.2471899688243866 2023-01-24 02:20:03.879946: step: 38/463, loss: 0.11653846502304077 2023-01-24 02:20:04.504972: step: 40/463, loss: 0.07643802464008331 2023-01-24 02:20:05.149910: step: 42/463, loss: 0.04667147621512413 2023-01-24 02:20:05.782690: step: 44/463, loss: 0.20017176866531372 2023-01-24 02:20:06.328274: step: 46/463, loss: 0.031188489869236946 2023-01-24 02:20:06.968419: step: 48/463, loss: 0.08504738658666611 2023-01-24 02:20:07.645621: step: 50/463, loss: 0.17975234985351562 2023-01-24 02:20:08.250103: step: 52/463, loss: 0.09627849608659744 2023-01-24 02:20:08.907010: step: 54/463, loss: 0.03888966515660286 2023-01-24 02:20:09.508919: step: 56/463, loss: 0.023116713389754295 2023-01-24 02:20:10.136382: step: 58/463, loss: 0.45193710923194885 2023-01-24 02:20:10.738802: step: 60/463, loss: 0.11137518286705017 2023-01-24 02:20:11.329965: step: 62/463, loss: 0.015942148864269257 2023-01-24 02:20:11.936529: step: 64/463, loss: 0.31105291843414307 2023-01-24 02:20:12.534387: step: 66/463, loss: 0.04012041166424751 2023-01-24 02:20:13.086517: step: 68/463, loss: 1.0168992280960083 2023-01-24 02:20:13.677844: step: 70/463, loss: 0.043375808745622635 2023-01-24 02:20:14.291733: step: 72/463, loss: 0.15089629590511322 2023-01-24 02:20:14.905518: step: 74/463, loss: 0.08969930559396744 2023-01-24 02:20:15.537326: step: 76/463, loss: 0.029811233282089233 2023-01-24 02:20:16.122126: step: 78/463, loss: 0.09008944779634476 2023-01-24 02:20:16.712581: step: 80/463, loss: 0.24686199426651 2023-01-24 02:20:17.281260: step: 82/463, loss: 0.06390325725078583 2023-01-24 02:20:17.856319: step: 84/463, loss: 0.0450165830552578 2023-01-24 02:20:18.429399: step: 86/463, loss: 0.037968046963214874 2023-01-24 02:20:19.089453: step: 88/463, loss: 0.02627360075712204 2023-01-24 02:20:19.697631: step: 90/463, loss: 0.07466509938240051 2023-01-24 02:20:20.304825: step: 92/463, loss: 0.1126580536365509 2023-01-24 02:20:20.896443: step: 94/463, loss: 0.1909865438938141 2023-01-24 02:20:21.528545: step: 96/463, loss: 0.16507361829280853 2023-01-24 02:20:22.134825: step: 98/463, loss: 0.04729074612259865 2023-01-24 02:20:22.877656: step: 100/463, loss: 0.06562110781669617 2023-01-24 02:20:23.468304: step: 102/463, loss: 0.023107284680008888 2023-01-24 02:20:24.086264: step: 104/463, loss: 0.017375657334923744 2023-01-24 02:20:24.690047: step: 106/463, loss: 0.04252321645617485 2023-01-24 02:20:25.346198: step: 108/463, loss: 0.12977533042430878 2023-01-24 02:20:25.890059: step: 110/463, loss: 0.27735015749931335 2023-01-24 02:20:26.525678: step: 112/463, loss: 0.24240757524967194 2023-01-24 02:20:27.121265: step: 114/463, loss: 0.04336678236722946 2023-01-24 02:20:27.690334: step: 116/463, loss: 0.02648986130952835 2023-01-24 02:20:28.294129: step: 118/463, loss: 0.0028780458960682154 2023-01-24 02:20:28.878582: step: 120/463, loss: 0.3205176889896393 2023-01-24 02:20:29.505004: step: 122/463, loss: 0.07898540049791336 2023-01-24 02:20:30.072621: step: 124/463, loss: 0.13012634217739105 2023-01-24 02:20:30.704417: step: 126/463, loss: 0.05570770427584648 2023-01-24 02:20:31.309025: step: 128/463, loss: 0.13161610066890717 2023-01-24 02:20:31.864384: step: 130/463, loss: 0.024745089933276176 2023-01-24 02:20:32.565349: step: 132/463, loss: 0.07149546593427658 2023-01-24 02:20:33.202078: step: 134/463, loss: 0.02650449052453041 2023-01-24 02:20:33.816071: step: 136/463, loss: 0.057667359709739685 2023-01-24 02:20:34.332013: step: 138/463, loss: 0.07903765141963959 2023-01-24 02:20:34.945445: step: 140/463, loss: 0.03494666889309883 2023-01-24 02:20:35.658606: step: 142/463, loss: 0.017760442569851875 2023-01-24 02:20:36.261253: step: 144/463, loss: 0.01631774567067623 2023-01-24 02:20:36.878947: step: 146/463, loss: 0.32027414441108704 2023-01-24 02:20:37.468707: step: 148/463, loss: 0.12576505541801453 2023-01-24 02:20:38.042743: step: 150/463, loss: 0.011223982088267803 2023-01-24 02:20:38.614421: step: 152/463, loss: 0.07385571300983429 2023-01-24 02:20:39.213766: step: 154/463, loss: 0.07310042530298233 2023-01-24 02:20:39.857982: step: 156/463, loss: 0.021011769771575928 2023-01-24 02:20:40.515375: step: 158/463, loss: 0.5341897010803223 2023-01-24 02:20:41.130412: step: 160/463, loss: 0.02520231157541275 2023-01-24 02:20:41.757429: step: 162/463, loss: 0.03399830311536789 2023-01-24 02:20:42.407473: step: 164/463, loss: 0.030097810551524162 2023-01-24 02:20:43.023092: step: 166/463, loss: 0.05835844203829765 2023-01-24 02:20:43.651479: step: 168/463, loss: 0.0858881026506424 2023-01-24 02:20:44.286448: step: 170/463, loss: 0.026198573410511017 2023-01-24 02:20:44.920312: step: 172/463, loss: 0.01906190626323223 2023-01-24 02:20:45.531928: step: 174/463, loss: 0.05735083296895027 2023-01-24 02:20:46.150250: step: 176/463, loss: 0.1060795858502388 2023-01-24 02:20:46.677408: step: 178/463, loss: 0.007871311157941818 2023-01-24 02:20:47.286812: step: 180/463, loss: 0.0605691596865654 2023-01-24 02:20:47.963274: step: 182/463, loss: 0.06482990831136703 2023-01-24 02:20:48.614847: step: 184/463, loss: 0.03421315550804138 2023-01-24 02:20:49.174541: step: 186/463, loss: 0.07118727266788483 2023-01-24 02:20:49.786132: step: 188/463, loss: 0.09793812036514282 2023-01-24 02:20:50.426988: step: 190/463, loss: 0.05160688981413841 2023-01-24 02:20:51.056104: step: 192/463, loss: 0.03019959107041359 2023-01-24 02:20:51.659060: step: 194/463, loss: 0.014281491748988628 2023-01-24 02:20:52.242946: step: 196/463, loss: 0.020693454891443253 2023-01-24 02:20:52.897830: step: 198/463, loss: 0.17934651672840118 2023-01-24 02:20:53.496858: step: 200/463, loss: 0.0876389816403389 2023-01-24 02:20:54.159929: step: 202/463, loss: 0.1064068153500557 2023-01-24 02:20:54.811612: step: 204/463, loss: 0.2992803156375885 2023-01-24 02:20:55.435701: step: 206/463, loss: 0.03400499001145363 2023-01-24 02:20:56.010613: step: 208/463, loss: 0.017595138400793076 2023-01-24 02:20:56.747563: step: 210/463, loss: 0.041268959641456604 2023-01-24 02:20:57.369840: step: 212/463, loss: 0.00404773373156786 2023-01-24 02:20:57.986191: step: 214/463, loss: 0.08111508190631866 2023-01-24 02:20:58.504891: step: 216/463, loss: 0.12711355090141296 2023-01-24 02:20:59.110193: step: 218/463, loss: 0.08298701792955399 2023-01-24 02:20:59.718171: step: 220/463, loss: 0.13042567670345306 2023-01-24 02:21:00.329084: step: 222/463, loss: 0.05210493877530098 2023-01-24 02:21:00.896409: step: 224/463, loss: 0.035714223980903625 2023-01-24 02:21:01.529013: step: 226/463, loss: 0.04237968847155571 2023-01-24 02:21:02.168574: step: 228/463, loss: 0.04566330462694168 2023-01-24 02:21:02.713605: step: 230/463, loss: 0.13896937668323517 2023-01-24 02:21:03.313110: step: 232/463, loss: 0.0792575404047966 2023-01-24 02:21:03.948292: step: 234/463, loss: 0.13525839149951935 2023-01-24 02:21:04.558268: step: 236/463, loss: 0.061421819031238556 2023-01-24 02:21:05.164439: step: 238/463, loss: 0.025934318080544472 2023-01-24 02:21:05.738648: step: 240/463, loss: 0.08406510204076767 2023-01-24 02:21:06.326394: step: 242/463, loss: 0.01069964561611414 2023-01-24 02:21:06.929341: step: 244/463, loss: 0.19616185128688812 2023-01-24 02:21:07.578408: step: 246/463, loss: 0.06312242895364761 2023-01-24 02:21:08.249901: step: 248/463, loss: 0.04364520311355591 2023-01-24 02:21:08.889929: step: 250/463, loss: 0.0789756253361702 2023-01-24 02:21:09.485662: step: 252/463, loss: 0.8129016757011414 2023-01-24 02:21:10.119442: step: 254/463, loss: 0.34075844287872314 2023-01-24 02:21:10.781940: step: 256/463, loss: 0.032831981778144836 2023-01-24 02:21:11.412919: step: 258/463, loss: 0.06537775695323944 2023-01-24 02:21:12.057400: step: 260/463, loss: 0.014605416916310787 2023-01-24 02:21:12.632253: step: 262/463, loss: 0.20081882178783417 2023-01-24 02:21:13.300546: step: 264/463, loss: 0.06995569914579391 2023-01-24 02:21:13.954725: step: 266/463, loss: 0.05390030890703201 2023-01-24 02:21:14.556823: step: 268/463, loss: 0.03299311175942421 2023-01-24 02:21:15.226924: step: 270/463, loss: 0.2626551389694214 2023-01-24 02:21:15.916632: step: 272/463, loss: 0.04602523520588875 2023-01-24 02:21:16.499864: step: 274/463, loss: 0.19189557433128357 2023-01-24 02:21:17.093593: step: 276/463, loss: 0.0708315297961235 2023-01-24 02:21:17.781487: step: 278/463, loss: 0.07911559194326401 2023-01-24 02:21:18.395042: step: 280/463, loss: 0.07578671723604202 2023-01-24 02:21:19.017108: step: 282/463, loss: 0.07183322310447693 2023-01-24 02:21:19.701650: step: 284/463, loss: 0.015800967812538147 2023-01-24 02:21:20.333764: step: 286/463, loss: 0.021861545741558075 2023-01-24 02:21:20.952870: step: 288/463, loss: 0.0278567336499691 2023-01-24 02:21:21.699717: step: 290/463, loss: 0.061614371836185455 2023-01-24 02:21:22.287776: step: 292/463, loss: 0.06986402720212936 2023-01-24 02:21:22.911412: step: 294/463, loss: 0.08326896280050278 2023-01-24 02:21:23.532705: step: 296/463, loss: 0.09929545968770981 2023-01-24 02:21:24.205764: step: 298/463, loss: 0.012018418870866299 2023-01-24 02:21:24.823708: step: 300/463, loss: 0.05839642137289047 2023-01-24 02:21:25.461060: step: 302/463, loss: 0.09725894033908844 2023-01-24 02:21:26.118073: step: 304/463, loss: 0.04040858522057533 2023-01-24 02:21:26.691593: step: 306/463, loss: 0.045502495020627975 2023-01-24 02:21:27.400667: step: 308/463, loss: 0.04373488575220108 2023-01-24 02:21:28.037909: step: 310/463, loss: 0.01789252460002899 2023-01-24 02:21:28.600858: step: 312/463, loss: 0.04127953201532364 2023-01-24 02:21:29.264978: step: 314/463, loss: 0.07758328318595886 2023-01-24 02:21:29.846458: step: 316/463, loss: 0.10204042494297028 2023-01-24 02:21:30.484533: step: 318/463, loss: 0.07615073025226593 2023-01-24 02:21:31.122636: step: 320/463, loss: 0.0411040261387825 2023-01-24 02:21:31.752533: step: 322/463, loss: 0.03426584601402283 2023-01-24 02:21:32.375691: step: 324/463, loss: 0.03321380540728569 2023-01-24 02:21:33.027733: step: 326/463, loss: 0.021382590755820274 2023-01-24 02:21:33.706511: step: 328/463, loss: 0.08947504311800003 2023-01-24 02:21:34.276438: step: 330/463, loss: 0.07809731364250183 2023-01-24 02:21:34.903064: step: 332/463, loss: 0.04807485267519951 2023-01-24 02:21:35.483207: step: 334/463, loss: 0.04300491884350777 2023-01-24 02:21:36.138018: step: 336/463, loss: 0.12432550638914108 2023-01-24 02:21:36.737700: step: 338/463, loss: 0.03360001742839813 2023-01-24 02:21:37.401902: step: 340/463, loss: 0.03814920410513878 2023-01-24 02:21:38.007729: step: 342/463, loss: 0.7751902937889099 2023-01-24 02:21:38.557431: step: 344/463, loss: 0.025956541299819946 2023-01-24 02:21:39.192046: step: 346/463, loss: 0.0020128029864281416 2023-01-24 02:21:39.784877: step: 348/463, loss: 0.09925977885723114 2023-01-24 02:21:40.390099: step: 350/463, loss: 0.07244475185871124 2023-01-24 02:21:41.035517: step: 352/463, loss: 3.778965950012207 2023-01-24 02:21:41.639778: step: 354/463, loss: 0.02549927495419979 2023-01-24 02:21:42.274157: step: 356/463, loss: 0.03316636011004448 2023-01-24 02:21:42.924452: step: 358/463, loss: 0.06820200383663177 2023-01-24 02:21:43.560169: step: 360/463, loss: 0.08836186677217484 2023-01-24 02:21:44.178267: step: 362/463, loss: 0.052920252084732056 2023-01-24 02:21:44.763185: step: 364/463, loss: 0.0044011990539729595 2023-01-24 02:21:45.369221: step: 366/463, loss: 0.09695777297019958 2023-01-24 02:21:45.968527: step: 368/463, loss: 0.02842007204890251 2023-01-24 02:21:46.599003: step: 370/463, loss: 0.029343625530600548 2023-01-24 02:21:47.248090: step: 372/463, loss: 0.4363693594932556 2023-01-24 02:21:47.865045: step: 374/463, loss: 0.02570498362183571 2023-01-24 02:21:48.467627: step: 376/463, loss: 0.11292698234319687 2023-01-24 02:21:49.105788: step: 378/463, loss: 0.13322654366493225 2023-01-24 02:21:49.691260: step: 380/463, loss: 0.06595969200134277 2023-01-24 02:21:50.323325: step: 382/463, loss: 0.018003471195697784 2023-01-24 02:21:50.976581: step: 384/463, loss: 0.08815129101276398 2023-01-24 02:21:51.620723: step: 386/463, loss: 1.4322130680084229 2023-01-24 02:21:52.247998: step: 388/463, loss: 0.11667540669441223 2023-01-24 02:21:52.849016: step: 390/463, loss: 0.10367243736982346 2023-01-24 02:21:53.511252: step: 392/463, loss: 0.3529355525970459 2023-01-24 02:21:54.138284: step: 394/463, loss: 0.053055375814437866 2023-01-24 02:21:54.805286: step: 396/463, loss: 0.04888347536325455 2023-01-24 02:21:55.423424: step: 398/463, loss: 0.06784988939762115 2023-01-24 02:21:56.013612: step: 400/463, loss: 0.031823400408029556 2023-01-24 02:21:56.620166: step: 402/463, loss: 0.12803302705287933 2023-01-24 02:21:57.227583: step: 404/463, loss: 0.31832724809646606 2023-01-24 02:21:57.852627: step: 406/463, loss: 0.019247863441705704 2023-01-24 02:21:58.427064: step: 408/463, loss: 0.6069300174713135 2023-01-24 02:21:59.137019: step: 410/463, loss: 0.05580489709973335 2023-01-24 02:21:59.796309: step: 412/463, loss: 0.017603939399123192 2023-01-24 02:22:00.340720: step: 414/463, loss: 0.03991398215293884 2023-01-24 02:22:00.997620: step: 416/463, loss: 0.5047932863235474 2023-01-24 02:22:01.613153: step: 418/463, loss: 0.01245469506829977 2023-01-24 02:22:02.256436: step: 420/463, loss: 0.514163613319397 2023-01-24 02:22:02.926092: step: 422/463, loss: 0.06172584369778633 2023-01-24 02:22:03.528644: step: 424/463, loss: 0.04152445122599602 2023-01-24 02:22:04.143633: step: 426/463, loss: 0.03773270919919014 2023-01-24 02:22:04.817404: step: 428/463, loss: 0.03397948667407036 2023-01-24 02:22:05.400659: step: 430/463, loss: 0.03543838858604431 2023-01-24 02:22:06.027049: step: 432/463, loss: 0.03854277729988098 2023-01-24 02:22:06.645253: step: 434/463, loss: 0.0606447234749794 2023-01-24 02:22:07.384736: step: 436/463, loss: 0.04678954929113388 2023-01-24 02:22:08.054418: step: 438/463, loss: 0.08492782711982727 2023-01-24 02:22:08.678939: step: 440/463, loss: 0.05202166736125946 2023-01-24 02:22:09.305593: step: 442/463, loss: 0.0697241723537445 2023-01-24 02:22:09.977136: step: 444/463, loss: 0.023754039779305458 2023-01-24 02:22:10.639921: step: 446/463, loss: 0.08370796591043472 2023-01-24 02:22:11.244231: step: 448/463, loss: 0.008632220327854156 2023-01-24 02:22:11.810037: step: 450/463, loss: 0.01273120567202568 2023-01-24 02:22:12.462319: step: 452/463, loss: 0.20612354576587677 2023-01-24 02:22:13.006244: step: 454/463, loss: 0.03448454290628433 2023-01-24 02:22:13.694325: step: 456/463, loss: 0.09639697521924973 2023-01-24 02:22:14.311597: step: 458/463, loss: 0.008333135396242142 2023-01-24 02:22:14.942116: step: 460/463, loss: 0.07454507052898407 2023-01-24 02:22:15.524586: step: 462/463, loss: 0.03628313168883324 2023-01-24 02:22:16.269953: step: 464/463, loss: 0.0862494558095932 2023-01-24 02:22:16.912702: step: 466/463, loss: 0.06211767718195915 2023-01-24 02:22:17.508707: step: 468/463, loss: 0.046333055943250656 2023-01-24 02:22:18.136127: step: 470/463, loss: 0.07183966785669327 2023-01-24 02:22:18.686987: step: 472/463, loss: 0.0159163698554039 2023-01-24 02:22:19.248217: step: 474/463, loss: 0.0667303204536438 2023-01-24 02:22:19.846527: step: 476/463, loss: 0.12091360986232758 2023-01-24 02:22:20.465291: step: 478/463, loss: 0.03381025046110153 2023-01-24 02:22:21.111907: step: 480/463, loss: 0.18052664399147034 2023-01-24 02:22:21.691796: step: 482/463, loss: 0.1478506624698639 2023-01-24 02:22:22.301654: step: 484/463, loss: 0.11141731590032578 2023-01-24 02:22:22.860421: step: 486/463, loss: 0.015993352979421616 2023-01-24 02:22:23.472828: step: 488/463, loss: 0.03572019934654236 2023-01-24 02:22:24.068131: step: 490/463, loss: 0.00907127559185028 2023-01-24 02:22:24.698568: step: 492/463, loss: 0.04498286172747612 2023-01-24 02:22:25.369300: step: 494/463, loss: 0.0726713165640831 2023-01-24 02:22:26.043265: step: 496/463, loss: 0.09351115673780441 2023-01-24 02:22:26.605510: step: 498/463, loss: 0.08858353644609451 2023-01-24 02:22:27.209293: step: 500/463, loss: 0.10741402208805084 2023-01-24 02:22:27.827786: step: 502/463, loss: 0.018738150596618652 2023-01-24 02:22:28.465747: step: 504/463, loss: 0.010571149177849293 2023-01-24 02:22:29.124181: step: 506/463, loss: 0.05955428630113602 2023-01-24 02:22:29.765983: step: 508/463, loss: 0.23404785990715027 2023-01-24 02:22:30.381958: step: 510/463, loss: 0.01985454186797142 2023-01-24 02:22:30.973781: step: 512/463, loss: 0.29550686478614807 2023-01-24 02:22:31.666881: step: 514/463, loss: 0.04698484018445015 2023-01-24 02:22:32.348886: step: 516/463, loss: 0.1044471338391304 2023-01-24 02:22:32.962612: step: 518/463, loss: 0.03806724771857262 2023-01-24 02:22:33.521523: step: 520/463, loss: 0.017785893753170967 2023-01-24 02:22:34.213510: step: 522/463, loss: 0.04650108516216278 2023-01-24 02:22:34.884903: step: 524/463, loss: 0.12200185656547546 2023-01-24 02:22:35.522370: step: 526/463, loss: 0.156696617603302 2023-01-24 02:22:36.172444: step: 528/463, loss: 0.055220555514097214 2023-01-24 02:22:36.810376: step: 530/463, loss: 0.0432281568646431 2023-01-24 02:22:37.459505: step: 532/463, loss: 0.09745674580335617 2023-01-24 02:22:38.104197: step: 534/463, loss: 0.016220947727560997 2023-01-24 02:22:38.760795: step: 536/463, loss: 0.027817638590931892 2023-01-24 02:22:39.340447: step: 538/463, loss: 0.016521908342838287 2023-01-24 02:22:39.918362: step: 540/463, loss: 0.05771870166063309 2023-01-24 02:22:40.654208: step: 542/463, loss: 0.517961859703064 2023-01-24 02:22:41.283509: step: 544/463, loss: 0.14686883985996246 2023-01-24 02:22:41.832791: step: 546/463, loss: 0.059000756591558456 2023-01-24 02:22:42.491695: step: 548/463, loss: 0.05594101548194885 2023-01-24 02:22:43.104861: step: 550/463, loss: 0.036989595741033554 2023-01-24 02:22:43.723014: step: 552/463, loss: 0.05881727114319801 2023-01-24 02:22:44.337469: step: 554/463, loss: 0.03861016780138016 2023-01-24 02:22:44.969711: step: 556/463, loss: 0.12573686242103577 2023-01-24 02:22:45.581995: step: 558/463, loss: 0.013770697638392448 2023-01-24 02:22:46.194101: step: 560/463, loss: 0.03228835016489029 2023-01-24 02:22:46.836634: step: 562/463, loss: 0.12452410906553268 2023-01-24 02:22:47.451301: step: 564/463, loss: 0.03419553115963936 2023-01-24 02:22:48.095519: step: 566/463, loss: 0.25904738903045654 2023-01-24 02:22:48.748913: step: 568/463, loss: 0.14948739111423492 2023-01-24 02:22:49.342724: step: 570/463, loss: 0.10230248421430588 2023-01-24 02:22:49.985930: step: 572/463, loss: 0.046159714460372925 2023-01-24 02:22:50.543808: step: 574/463, loss: 0.03526247292757034 2023-01-24 02:22:51.174305: step: 576/463, loss: 0.037601035088300705 2023-01-24 02:22:51.779594: step: 578/463, loss: 0.07432859390974045 2023-01-24 02:22:52.390852: step: 580/463, loss: 0.043209258466959 2023-01-24 02:22:53.018075: step: 582/463, loss: 0.2695164680480957 2023-01-24 02:22:53.638727: step: 584/463, loss: 0.09182669222354889 2023-01-24 02:22:54.216027: step: 586/463, loss: 0.05101155489683151 2023-01-24 02:22:54.858929: step: 588/463, loss: 0.06139244884252548 2023-01-24 02:22:55.460218: step: 590/463, loss: 0.121304452419281 2023-01-24 02:22:56.058440: step: 592/463, loss: 0.019609736278653145 2023-01-24 02:22:56.725846: step: 594/463, loss: 0.19669921696186066 2023-01-24 02:22:57.292145: step: 596/463, loss: 0.1184077039361 2023-01-24 02:22:57.806472: step: 598/463, loss: 0.02450251765549183 2023-01-24 02:22:58.410545: step: 600/463, loss: 0.08885645866394043 2023-01-24 02:22:59.098492: step: 602/463, loss: 0.03356620669364929 2023-01-24 02:22:59.723555: step: 604/463, loss: 0.03580283746123314 2023-01-24 02:23:00.357533: step: 606/463, loss: 0.08973096311092377 2023-01-24 02:23:00.962730: step: 608/463, loss: 0.1778457909822464 2023-01-24 02:23:01.540386: step: 610/463, loss: 0.004249132238328457 2023-01-24 02:23:02.182751: step: 612/463, loss: 0.04661310464143753 2023-01-24 02:23:02.836843: step: 614/463, loss: 0.0789538323879242 2023-01-24 02:23:03.460744: step: 616/463, loss: 0.021688908338546753 2023-01-24 02:23:04.122262: step: 618/463, loss: 0.06403544545173645 2023-01-24 02:23:04.792609: step: 620/463, loss: 0.023572752252221107 2023-01-24 02:23:05.382681: step: 622/463, loss: 0.028784917667508125 2023-01-24 02:23:05.998289: step: 624/463, loss: 0.06966814398765564 2023-01-24 02:23:06.639614: step: 626/463, loss: 0.04270821064710617 2023-01-24 02:23:07.270919: step: 628/463, loss: 0.030175073072314262 2023-01-24 02:23:07.843176: step: 630/463, loss: 0.06828122586011887 2023-01-24 02:23:08.430758: step: 632/463, loss: 0.11224208027124405 2023-01-24 02:23:09.063284: step: 634/463, loss: 0.00522087886929512 2023-01-24 02:23:09.729763: step: 636/463, loss: 0.7217215895652771 2023-01-24 02:23:10.434162: step: 638/463, loss: 0.04315922036767006 2023-01-24 02:23:11.120514: step: 640/463, loss: 0.05577242374420166 2023-01-24 02:23:11.709830: step: 642/463, loss: 0.11067011207342148 2023-01-24 02:23:12.350701: step: 644/463, loss: 0.0692674070596695 2023-01-24 02:23:13.002200: step: 646/463, loss: 0.08684011548757553 2023-01-24 02:23:13.651874: step: 648/463, loss: 0.15701603889465332 2023-01-24 02:23:14.254622: step: 650/463, loss: 0.07535672932863235 2023-01-24 02:23:14.900824: step: 652/463, loss: 0.08991323411464691 2023-01-24 02:23:15.595311: step: 654/463, loss: 0.10656873136758804 2023-01-24 02:23:16.234534: step: 656/463, loss: 0.2171446979045868 2023-01-24 02:23:16.871583: step: 658/463, loss: 0.10648135840892792 2023-01-24 02:23:17.480133: step: 660/463, loss: 0.14075885713100433 2023-01-24 02:23:18.082501: step: 662/463, loss: 0.018190212547779083 2023-01-24 02:23:18.710533: step: 664/463, loss: 0.22377075254917145 2023-01-24 02:23:19.290478: step: 666/463, loss: 0.0262388177216053 2023-01-24 02:23:19.848668: step: 668/463, loss: 7.585052013397217 2023-01-24 02:23:20.458217: step: 670/463, loss: 0.11799109727144241 2023-01-24 02:23:21.066276: step: 672/463, loss: 0.05876987427473068 2023-01-24 02:23:21.679589: step: 674/463, loss: 0.05100049823522568 2023-01-24 02:23:22.289386: step: 676/463, loss: 0.04835920035839081 2023-01-24 02:23:22.950600: step: 678/463, loss: 0.1110786646604538 2023-01-24 02:23:23.552332: step: 680/463, loss: 0.05327126011252403 2023-01-24 02:23:24.185869: step: 682/463, loss: 0.02951670251786709 2023-01-24 02:23:24.791169: step: 684/463, loss: 0.06278248131275177 2023-01-24 02:23:25.418865: step: 686/463, loss: 0.1263350546360016 2023-01-24 02:23:26.033298: step: 688/463, loss: 0.08174838870763779 2023-01-24 02:23:26.612926: step: 690/463, loss: 0.0589042492210865 2023-01-24 02:23:27.261801: step: 692/463, loss: 0.2909718155860901 2023-01-24 02:23:27.874772: step: 694/463, loss: 0.13094452023506165 2023-01-24 02:23:28.463371: step: 696/463, loss: 0.046198680996894836 2023-01-24 02:23:29.095916: step: 698/463, loss: 0.039940908551216125 2023-01-24 02:23:29.694414: step: 700/463, loss: 0.06243208795785904 2023-01-24 02:23:30.266895: step: 702/463, loss: 0.04296639561653137 2023-01-24 02:23:30.867528: step: 704/463, loss: 0.011986475437879562 2023-01-24 02:23:31.464870: step: 706/463, loss: 0.05324605852365494 2023-01-24 02:23:32.079005: step: 708/463, loss: 0.0964583232998848 2023-01-24 02:23:32.711152: step: 710/463, loss: 0.050626084208488464 2023-01-24 02:23:33.352047: step: 712/463, loss: 0.0022668896708637476 2023-01-24 02:23:34.035823: step: 714/463, loss: 0.10261612385511398 2023-01-24 02:23:34.665218: step: 716/463, loss: 0.006185358390212059 2023-01-24 02:23:35.313645: step: 718/463, loss: 0.14385487139225006 2023-01-24 02:23:35.921160: step: 720/463, loss: 0.0318455770611763 2023-01-24 02:23:36.509380: step: 722/463, loss: 0.07089218497276306 2023-01-24 02:23:37.110060: step: 724/463, loss: 0.16315022110939026 2023-01-24 02:23:37.703375: step: 726/463, loss: 0.3529326915740967 2023-01-24 02:23:38.304127: step: 728/463, loss: 0.43606990575790405 2023-01-24 02:23:38.874526: step: 730/463, loss: 0.11184833198785782 2023-01-24 02:23:39.441754: step: 732/463, loss: 0.16461403667926788 2023-01-24 02:23:40.054821: step: 734/463, loss: 0.08659656345844269 2023-01-24 02:23:40.768123: step: 736/463, loss: 0.12101877480745316 2023-01-24 02:23:41.434156: step: 738/463, loss: 0.33059343695640564 2023-01-24 02:23:42.032364: step: 740/463, loss: 0.027007615193724632 2023-01-24 02:23:42.668498: step: 742/463, loss: 0.02912762761116028 2023-01-24 02:23:43.349237: step: 744/463, loss: 0.05541957542300224 2023-01-24 02:23:43.920969: step: 746/463, loss: 0.04138094559311867 2023-01-24 02:23:44.559161: step: 748/463, loss: 0.0278069656342268 2023-01-24 02:23:45.156629: step: 750/463, loss: 0.04563678056001663 2023-01-24 02:23:45.747589: step: 752/463, loss: 0.12673847377300262 2023-01-24 02:23:46.384594: step: 754/463, loss: 0.040106259286403656 2023-01-24 02:23:46.970414: step: 756/463, loss: 0.0527421310544014 2023-01-24 02:23:47.560213: step: 758/463, loss: 0.005679921247065067 2023-01-24 02:23:48.152055: step: 760/463, loss: 0.045885588973760605 2023-01-24 02:23:48.748355: step: 762/463, loss: 0.02683146297931671 2023-01-24 02:23:49.384142: step: 764/463, loss: 0.4279610812664032 2023-01-24 02:23:50.030216: step: 766/463, loss: 0.046658970415592194 2023-01-24 02:23:50.724743: step: 768/463, loss: 0.03990132734179497 2023-01-24 02:23:51.331312: step: 770/463, loss: 0.06619829684495926 2023-01-24 02:23:51.919780: step: 772/463, loss: 0.048530030995607376 2023-01-24 02:23:52.505346: step: 774/463, loss: 0.022241076454520226 2023-01-24 02:23:53.123619: step: 776/463, loss: 0.08825267851352692 2023-01-24 02:23:53.719424: step: 778/463, loss: 0.03845822066068649 2023-01-24 02:23:54.368754: step: 780/463, loss: 0.11483372002840042 2023-01-24 02:23:54.955965: step: 782/463, loss: 0.12185422331094742 2023-01-24 02:23:55.530860: step: 784/463, loss: 0.040376223623752594 2023-01-24 02:23:56.164459: step: 786/463, loss: 0.026955559849739075 2023-01-24 02:23:56.783130: step: 788/463, loss: 0.019123699516057968 2023-01-24 02:23:57.399915: step: 790/463, loss: 0.1206919401884079 2023-01-24 02:23:58.047926: step: 792/463, loss: 0.13978567719459534 2023-01-24 02:23:58.628496: step: 794/463, loss: 0.06431294977664948 2023-01-24 02:23:59.191222: step: 796/463, loss: 0.12576158344745636 2023-01-24 02:23:59.825833: step: 798/463, loss: 0.029137829318642616 2023-01-24 02:24:00.548255: step: 800/463, loss: 0.0604996532201767 2023-01-24 02:24:01.192360: step: 802/463, loss: 0.042811159044504166 2023-01-24 02:24:01.802380: step: 804/463, loss: 0.29408660531044006 2023-01-24 02:24:02.502206: step: 806/463, loss: 0.025647656992077827 2023-01-24 02:24:03.210267: step: 808/463, loss: 0.05955564230680466 2023-01-24 02:24:03.829181: step: 810/463, loss: 0.07902026921510696 2023-01-24 02:24:04.387202: step: 812/463, loss: 0.06509557366371155 2023-01-24 02:24:04.964939: step: 814/463, loss: 0.03752699866890907 2023-01-24 02:24:05.580067: step: 816/463, loss: 0.03674310818314552 2023-01-24 02:24:06.263820: step: 818/463, loss: 0.02645660936832428 2023-01-24 02:24:06.856099: step: 820/463, loss: 0.04556189477443695 2023-01-24 02:24:07.445031: step: 822/463, loss: 0.013146854937076569 2023-01-24 02:24:08.008562: step: 824/463, loss: 0.04506433382630348 2023-01-24 02:24:08.603288: step: 826/463, loss: 0.03950195759534836 2023-01-24 02:24:09.261165: step: 828/463, loss: 0.10667979717254639 2023-01-24 02:24:09.852322: step: 830/463, loss: 0.02954714745283127 2023-01-24 02:24:10.431739: step: 832/463, loss: 0.05457380786538124 2023-01-24 02:24:11.072506: step: 834/463, loss: 0.026683717966079712 2023-01-24 02:24:11.628392: step: 836/463, loss: 0.08873116970062256 2023-01-24 02:24:12.295619: step: 838/463, loss: 0.08901484310626984 2023-01-24 02:24:12.928224: step: 840/463, loss: 0.06199616566300392 2023-01-24 02:24:13.522637: step: 842/463, loss: 0.03429458290338516 2023-01-24 02:24:14.198504: step: 844/463, loss: 0.09406204521656036 2023-01-24 02:24:14.870135: step: 846/463, loss: 0.04288970306515694 2023-01-24 02:24:15.499433: step: 848/463, loss: 0.02828223817050457 2023-01-24 02:24:16.122745: step: 850/463, loss: 0.3204517066478729 2023-01-24 02:24:16.786927: step: 852/463, loss: 0.021954761818051338 2023-01-24 02:24:17.347440: step: 854/463, loss: 0.19784387946128845 2023-01-24 02:24:17.943105: step: 856/463, loss: 0.03635372593998909 2023-01-24 02:24:18.631598: step: 858/463, loss: 0.033746834844350815 2023-01-24 02:24:19.296373: step: 860/463, loss: 0.022397411987185478 2023-01-24 02:24:19.896615: step: 862/463, loss: 0.14670518040657043 2023-01-24 02:24:20.502361: step: 864/463, loss: 0.0737113505601883 2023-01-24 02:24:21.156161: step: 866/463, loss: 0.059790484607219696 2023-01-24 02:24:21.781943: step: 868/463, loss: 0.013940228149294853 2023-01-24 02:24:22.395152: step: 870/463, loss: 0.042724523693323135 2023-01-24 02:24:23.025248: step: 872/463, loss: 0.08587834984064102 2023-01-24 02:24:23.587478: step: 874/463, loss: 0.009747837670147419 2023-01-24 02:24:24.239078: step: 876/463, loss: 0.07691461592912674 2023-01-24 02:24:24.845495: step: 878/463, loss: 0.008721565827727318 2023-01-24 02:24:25.504046: step: 880/463, loss: 0.08181518316268921 2023-01-24 02:24:26.115353: step: 882/463, loss: 0.0121048828586936 2023-01-24 02:24:26.848282: step: 884/463, loss: 0.042846113443374634 2023-01-24 02:24:27.467045: step: 886/463, loss: 0.09717530012130737 2023-01-24 02:24:28.048801: step: 888/463, loss: 0.03697590529918671 2023-01-24 02:24:28.710419: step: 890/463, loss: 0.05085385590791702 2023-01-24 02:24:29.320985: step: 892/463, loss: 0.03842558339238167 2023-01-24 02:24:29.901165: step: 894/463, loss: 0.05055253580212593 2023-01-24 02:24:30.482160: step: 896/463, loss: 0.03438316285610199 2023-01-24 02:24:31.113573: step: 898/463, loss: 0.059900399297475815 2023-01-24 02:24:31.643155: step: 900/463, loss: 0.09600280225276947 2023-01-24 02:24:32.310725: step: 902/463, loss: 0.06828293204307556 2023-01-24 02:24:33.030556: step: 904/463, loss: 0.02007468417286873 2023-01-24 02:24:33.691297: step: 906/463, loss: 0.07203416526317596 2023-01-24 02:24:34.320103: step: 908/463, loss: 0.05091244354844093 2023-01-24 02:24:34.975090: step: 910/463, loss: 0.010370898991823196 2023-01-24 02:24:35.563368: step: 912/463, loss: 0.679309606552124 2023-01-24 02:24:36.227006: step: 914/463, loss: 0.14878763258457184 2023-01-24 02:24:36.853281: step: 916/463, loss: 0.02331198751926422 2023-01-24 02:24:37.473775: step: 918/463, loss: 0.1366574913263321 2023-01-24 02:24:38.060690: step: 920/463, loss: 0.02488672360777855 2023-01-24 02:24:38.648990: step: 922/463, loss: 0.15476688742637634 2023-01-24 02:24:39.343068: step: 924/463, loss: 0.003639964619651437 2023-01-24 02:24:39.950904: step: 926/463, loss: 0.07893939316272736 ================================================== Loss: 0.119 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36852829732344405, 'r': 0.309787544049878, 'f1': 0.33661450662739323}, 'combined': 0.24803174172544762, 'epoch': 19} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.36852602025491243, 'r': 0.36310651995704607, 'f1': 0.3657961978826538}, 'combined': 0.2835358088851192, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491219603061708, 'r': 0.3186483167120838, 'f1': 0.3331898073556908}, 'combined': 0.2455082791041932, 'epoch': 19} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3495350623454762, 'r': 0.3533902284742866, 'f1': 0.3514520736563289}, 'combined': 0.2724173968053841, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.37037037037037035, 'r': 0.2857142857142857, 'f1': 0.3225806451612903}, 'combined': 0.2150537634408602, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.59375, 'r': 0.16379310344827586, 'f1': 0.25675675675675674}, 'combined': 0.17117117117117114, 'epoch': 19} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:27:23.037011: step: 2/463, loss: 0.04250545799732208 2023-01-24 02:27:23.673682: step: 4/463, loss: 0.08801089972257614 2023-01-24 02:27:24.285251: step: 6/463, loss: 0.10532476752996445 2023-01-24 02:27:24.880827: step: 8/463, loss: 0.0667368546128273 2023-01-24 02:27:25.480586: step: 10/463, loss: 0.04844643548130989 2023-01-24 02:27:26.043721: step: 12/463, loss: 2.2428369522094727 2023-01-24 02:27:26.650794: step: 14/463, loss: 0.38902151584625244 2023-01-24 02:27:27.240285: step: 16/463, loss: 0.03707941621541977 2023-01-24 02:27:27.850191: step: 18/463, loss: 0.08036953955888748 2023-01-24 02:27:28.426454: step: 20/463, loss: 0.02767416276037693 2023-01-24 02:27:28.996058: step: 22/463, loss: 0.09718021005392075 2023-01-24 02:27:29.601295: step: 24/463, loss: 0.04521965980529785 2023-01-24 02:27:30.222464: step: 26/463, loss: 0.09202714264392853 2023-01-24 02:27:30.787210: step: 28/463, loss: 0.055955443531274796 2023-01-24 02:27:31.387337: step: 30/463, loss: 0.03415090963244438 2023-01-24 02:27:32.029297: step: 32/463, loss: 0.6268648505210876 2023-01-24 02:27:32.726697: step: 34/463, loss: 0.059366077184677124 2023-01-24 02:27:33.321858: step: 36/463, loss: 0.0190225038677454 2023-01-24 02:27:33.963817: step: 38/463, loss: 0.046397119760513306 2023-01-24 02:27:34.572512: step: 40/463, loss: 0.08980638533830643 2023-01-24 02:27:35.169317: step: 42/463, loss: 0.7615835666656494 2023-01-24 02:27:35.804287: step: 44/463, loss: 0.04266585409641266 2023-01-24 02:27:36.458889: step: 46/463, loss: 0.09545864164829254 2023-01-24 02:27:37.081239: step: 48/463, loss: 0.040855683386325836 2023-01-24 02:27:37.697190: step: 50/463, loss: 0.04735015705227852 2023-01-24 02:27:38.267323: step: 52/463, loss: 0.11715692281723022 2023-01-24 02:27:38.898989: step: 54/463, loss: 0.01710144430398941 2023-01-24 02:27:39.623066: step: 56/463, loss: 0.045234616845846176 2023-01-24 02:27:40.182688: step: 58/463, loss: 0.04355061054229736 2023-01-24 02:27:40.738741: step: 60/463, loss: 0.029900608584284782 2023-01-24 02:27:41.361618: step: 62/463, loss: 0.07774461805820465 2023-01-24 02:27:41.933600: step: 64/463, loss: 0.09202646464109421 2023-01-24 02:27:42.587839: step: 66/463, loss: 0.05168768763542175 2023-01-24 02:27:43.204009: step: 68/463, loss: 0.5962936878204346 2023-01-24 02:27:43.856765: step: 70/463, loss: 0.06609019637107849 2023-01-24 02:27:44.520973: step: 72/463, loss: 0.059408511966466904 2023-01-24 02:27:45.204999: step: 74/463, loss: 0.049335163086652756 2023-01-24 02:27:45.864541: step: 76/463, loss: 0.010667647235095501 2023-01-24 02:27:46.478761: step: 78/463, loss: 0.15229101479053497 2023-01-24 02:27:47.219850: step: 80/463, loss: 0.035849448293447495 2023-01-24 02:27:47.826520: step: 82/463, loss: 0.025260496884584427 2023-01-24 02:27:48.442595: step: 84/463, loss: 0.058970965445041656 2023-01-24 02:27:49.029228: step: 86/463, loss: 0.029864775016903877 2023-01-24 02:27:49.655649: step: 88/463, loss: 0.0796518623828888 2023-01-24 02:27:50.208445: step: 90/463, loss: 0.14115923643112183 2023-01-24 02:27:50.847599: step: 92/463, loss: 0.05154859274625778 2023-01-24 02:27:51.502975: step: 94/463, loss: 0.3532062768936157 2023-01-24 02:27:52.178121: step: 96/463, loss: 0.04024605453014374 2023-01-24 02:27:52.791818: step: 98/463, loss: 0.04854840412735939 2023-01-24 02:27:53.455596: step: 100/463, loss: 0.9367901682853699 2023-01-24 02:27:54.043224: step: 102/463, loss: 0.01247659046202898 2023-01-24 02:27:54.684411: step: 104/463, loss: 0.9505859017372131 2023-01-24 02:27:55.327798: step: 106/463, loss: 0.05853164568543434 2023-01-24 02:27:55.981263: step: 108/463, loss: 0.012301057577133179 2023-01-24 02:27:56.641614: step: 110/463, loss: 0.07176526635885239 2023-01-24 02:27:57.293451: step: 112/463, loss: 0.36822131276130676 2023-01-24 02:27:57.930386: step: 114/463, loss: 3.4288408756256104 2023-01-24 02:27:58.534410: step: 116/463, loss: 0.07779096066951752 2023-01-24 02:27:59.219384: step: 118/463, loss: 0.06815287470817566 2023-01-24 02:27:59.884042: step: 120/463, loss: 0.10610940307378769 2023-01-24 02:28:00.517268: step: 122/463, loss: 0.09823642671108246 2023-01-24 02:28:01.160728: step: 124/463, loss: 0.025018181651830673 2023-01-24 02:28:01.672610: step: 126/463, loss: 0.034618958830833435 2023-01-24 02:28:02.347936: step: 128/463, loss: 0.0320744588971138 2023-01-24 02:28:02.994534: step: 130/463, loss: 0.019785603508353233 2023-01-24 02:28:03.636385: step: 132/463, loss: 0.06175101920962334 2023-01-24 02:28:04.237083: step: 134/463, loss: 0.04201485216617584 2023-01-24 02:28:04.916582: step: 136/463, loss: 0.07357841730117798 2023-01-24 02:28:05.519655: step: 138/463, loss: 0.0372995026409626 2023-01-24 02:28:06.138664: step: 140/463, loss: 0.07504367083311081 2023-01-24 02:28:06.727426: step: 142/463, loss: 0.06548241525888443 2023-01-24 02:28:07.305300: step: 144/463, loss: 0.038432713598012924 2023-01-24 02:28:07.912091: step: 146/463, loss: 0.020114658400416374 2023-01-24 02:28:08.539848: step: 148/463, loss: 0.025309262797236443 2023-01-24 02:28:09.137611: step: 150/463, loss: 0.2232677936553955 2023-01-24 02:28:09.740177: step: 152/463, loss: 0.05970882624387741 2023-01-24 02:28:10.396846: step: 154/463, loss: 0.0957968458533287 2023-01-24 02:28:11.018498: step: 156/463, loss: 0.019476965069770813 2023-01-24 02:28:11.626655: step: 158/463, loss: 0.006689433474093676 2023-01-24 02:28:12.320604: step: 160/463, loss: 0.02838699333369732 2023-01-24 02:28:12.974303: step: 162/463, loss: 0.06594093143939972 2023-01-24 02:28:13.600892: step: 164/463, loss: 0.03180518001317978 2023-01-24 02:28:14.219421: step: 166/463, loss: 0.02431909553706646 2023-01-24 02:28:14.874286: step: 168/463, loss: 0.0583120621740818 2023-01-24 02:28:15.489430: step: 170/463, loss: 0.06335896253585815 2023-01-24 02:28:16.104670: step: 172/463, loss: 0.08673340082168579 2023-01-24 02:28:16.721385: step: 174/463, loss: 0.07836554944515228 2023-01-24 02:28:17.351921: step: 176/463, loss: 0.0231226347386837 2023-01-24 02:28:17.973722: step: 178/463, loss: 0.027215857058763504 2023-01-24 02:28:18.546162: step: 180/463, loss: 0.010634574107825756 2023-01-24 02:28:19.168938: step: 182/463, loss: 0.05315183475613594 2023-01-24 02:28:19.768612: step: 184/463, loss: 0.062360480427742004 2023-01-24 02:28:20.430207: step: 186/463, loss: 0.03155219554901123 2023-01-24 02:28:20.987749: step: 188/463, loss: 0.06969751417636871 2023-01-24 02:28:21.559567: step: 190/463, loss: 0.012527299113571644 2023-01-24 02:28:22.137543: step: 192/463, loss: 0.06745979934930801 2023-01-24 02:28:22.757594: step: 194/463, loss: 0.03917837515473366 2023-01-24 02:28:23.390340: step: 196/463, loss: 0.03979508578777313 2023-01-24 02:28:24.022183: step: 198/463, loss: 0.033088989555835724 2023-01-24 02:28:24.637211: step: 200/463, loss: 0.013852034695446491 2023-01-24 02:28:25.363809: step: 202/463, loss: 0.004486322868615389 2023-01-24 02:28:25.934665: step: 204/463, loss: 0.04540802910923958 2023-01-24 02:28:26.526096: step: 206/463, loss: 0.027821453288197517 2023-01-24 02:28:27.192497: step: 208/463, loss: 0.19811707735061646 2023-01-24 02:28:27.811576: step: 210/463, loss: 0.032117076218128204 2023-01-24 02:28:28.440055: step: 212/463, loss: 0.07389821112155914 2023-01-24 02:28:29.000801: step: 214/463, loss: 0.020998641848564148 2023-01-24 02:28:29.694399: step: 216/463, loss: 0.09907980263233185 2023-01-24 02:28:30.352493: step: 218/463, loss: 0.02236035466194153 2023-01-24 02:28:30.934037: step: 220/463, loss: 0.031027527526021004 2023-01-24 02:28:31.555039: step: 222/463, loss: 0.010798958130180836 2023-01-24 02:28:32.222311: step: 224/463, loss: 0.08692353218793869 2023-01-24 02:28:32.805505: step: 226/463, loss: 0.04171307384967804 2023-01-24 02:28:33.463776: step: 228/463, loss: 0.017737584188580513 2023-01-24 02:28:34.066802: step: 230/463, loss: 0.08151774853467941 2023-01-24 02:28:34.656051: step: 232/463, loss: 0.05702713504433632 2023-01-24 02:28:35.297721: step: 234/463, loss: 0.03371307998895645 2023-01-24 02:28:35.938367: step: 236/463, loss: 0.03732139244675636 2023-01-24 02:28:36.563317: step: 238/463, loss: 0.05353681743144989 2023-01-24 02:28:37.140067: step: 240/463, loss: 0.11210224777460098 2023-01-24 02:28:37.753915: step: 242/463, loss: 0.13348416984081268 2023-01-24 02:28:38.444379: step: 244/463, loss: 0.6534484028816223 2023-01-24 02:28:39.085113: step: 246/463, loss: 0.02931036613881588 2023-01-24 02:28:39.756819: step: 248/463, loss: 0.17572371661663055 2023-01-24 02:28:40.397758: step: 250/463, loss: 0.0404517836868763 2023-01-24 02:28:41.022691: step: 252/463, loss: 0.12899602949619293 2023-01-24 02:28:41.666608: step: 254/463, loss: 0.04055695980787277 2023-01-24 02:28:42.335159: step: 256/463, loss: 0.8264485597610474 2023-01-24 02:28:43.009973: step: 258/463, loss: 0.0633826032280922 2023-01-24 02:28:43.652707: step: 260/463, loss: 0.02122495137155056 2023-01-24 02:28:44.269087: step: 262/463, loss: 0.025803962722420692 2023-01-24 02:28:44.909801: step: 264/463, loss: 0.022498609498143196 2023-01-24 02:28:45.413956: step: 266/463, loss: 0.04139091446995735 2023-01-24 02:28:46.059375: step: 268/463, loss: 0.04816873371601105 2023-01-24 02:28:46.735246: step: 270/463, loss: 0.3591598570346832 2023-01-24 02:28:47.331163: step: 272/463, loss: 0.006877565290778875 2023-01-24 02:28:47.967735: step: 274/463, loss: 0.19716450572013855 2023-01-24 02:28:48.565241: step: 276/463, loss: 0.44062668085098267 2023-01-24 02:28:49.193785: step: 278/463, loss: 0.07158097624778748 2023-01-24 02:28:49.775193: step: 280/463, loss: 0.004711037967354059 2023-01-24 02:28:50.423747: step: 282/463, loss: 0.0697772428393364 2023-01-24 02:28:50.991694: step: 284/463, loss: 0.2478795349597931 2023-01-24 02:28:51.587187: step: 286/463, loss: 0.09423603117465973 2023-01-24 02:28:52.214716: step: 288/463, loss: 0.06738671660423279 2023-01-24 02:28:52.973358: step: 290/463, loss: 0.02402724325656891 2023-01-24 02:28:53.662945: step: 292/463, loss: 0.4583995044231415 2023-01-24 02:28:54.264264: step: 294/463, loss: 0.05294394493103027 2023-01-24 02:28:54.893568: step: 296/463, loss: 0.08964475244283676 2023-01-24 02:28:55.566531: step: 298/463, loss: 0.010876458138227463 2023-01-24 02:28:56.248924: step: 300/463, loss: 0.020989466458559036 2023-01-24 02:28:56.856228: step: 302/463, loss: 0.021016955375671387 2023-01-24 02:28:57.597016: step: 304/463, loss: 0.24152059853076935 2023-01-24 02:28:58.260010: step: 306/463, loss: 0.006914257071912289 2023-01-24 02:28:58.946183: step: 308/463, loss: 0.47118011116981506 2023-01-24 02:28:59.531942: step: 310/463, loss: 0.0913260355591774 2023-01-24 02:29:00.258141: step: 312/463, loss: 0.023262247443199158 2023-01-24 02:29:00.904683: step: 314/463, loss: 0.13974708318710327 2023-01-24 02:29:01.520787: step: 316/463, loss: 0.014410230331122875 2023-01-24 02:29:02.190428: step: 318/463, loss: 0.029110895469784737 2023-01-24 02:29:02.793272: step: 320/463, loss: 0.04063471406698227 2023-01-24 02:29:03.539363: step: 322/463, loss: 0.0740271732211113 2023-01-24 02:29:04.239382: step: 324/463, loss: 0.026210526004433632 2023-01-24 02:29:04.811308: step: 326/463, loss: 0.052800897508859634 2023-01-24 02:29:05.467041: step: 328/463, loss: 0.13307073712348938 2023-01-24 02:29:06.169254: step: 330/463, loss: 0.03350155055522919 2023-01-24 02:29:06.835063: step: 332/463, loss: 0.09340842068195343 2023-01-24 02:29:07.545133: step: 334/463, loss: 0.09332094341516495 2023-01-24 02:29:08.188063: step: 336/463, loss: 0.02414083294570446 2023-01-24 02:29:08.771376: step: 338/463, loss: 0.0835174098610878 2023-01-24 02:29:09.441136: step: 340/463, loss: 0.04582780972123146 2023-01-24 02:29:10.116359: step: 342/463, loss: 0.3220927119255066 2023-01-24 02:29:10.707084: step: 344/463, loss: 0.035379808396101 2023-01-24 02:29:11.388236: step: 346/463, loss: 0.09873564541339874 2023-01-24 02:29:11.989577: step: 348/463, loss: 0.02120424248278141 2023-01-24 02:29:12.650324: step: 350/463, loss: 0.0415227934718132 2023-01-24 02:29:13.274778: step: 352/463, loss: 0.11348246037960052 2023-01-24 02:29:13.871650: step: 354/463, loss: 0.035063933581113815 2023-01-24 02:29:14.462091: step: 356/463, loss: 0.06592081487178802 2023-01-24 02:29:15.076333: step: 358/463, loss: 0.034271836280822754 2023-01-24 02:29:15.680411: step: 360/463, loss: 0.03443994000554085 2023-01-24 02:29:16.342506: step: 362/463, loss: 0.0638202503323555 2023-01-24 02:29:16.959696: step: 364/463, loss: 0.027193138375878334 2023-01-24 02:29:17.552870: step: 366/463, loss: 0.04081645607948303 2023-01-24 02:29:18.213583: step: 368/463, loss: 0.032549817115068436 2023-01-24 02:29:18.917161: step: 370/463, loss: 0.025189336389303207 2023-01-24 02:29:19.675577: step: 372/463, loss: 0.16192680597305298 2023-01-24 02:29:20.358541: step: 374/463, loss: 0.05168411508202553 2023-01-24 02:29:20.999511: step: 376/463, loss: 0.04189447686076164 2023-01-24 02:29:21.740575: step: 378/463, loss: 0.01261840295046568 2023-01-24 02:29:22.391219: step: 380/463, loss: 0.017110368236899376 2023-01-24 02:29:22.987473: step: 382/463, loss: 0.046921804547309875 2023-01-24 02:29:23.549531: step: 384/463, loss: 0.43964457511901855 2023-01-24 02:29:24.164491: step: 386/463, loss: 0.03576177358627319 2023-01-24 02:29:24.759753: step: 388/463, loss: 0.579998791217804 2023-01-24 02:29:25.392084: step: 390/463, loss: 0.13867127895355225 2023-01-24 02:29:26.023409: step: 392/463, loss: 0.0749794989824295 2023-01-24 02:29:26.704650: step: 394/463, loss: 0.09178011864423752 2023-01-24 02:29:27.288620: step: 396/463, loss: 0.25698941946029663 2023-01-24 02:29:27.924364: step: 398/463, loss: 0.09495487064123154 2023-01-24 02:29:28.602392: step: 400/463, loss: 0.01723267324268818 2023-01-24 02:29:29.186817: step: 402/463, loss: 0.024175819009542465 2023-01-24 02:29:29.789974: step: 404/463, loss: 0.03752123937010765 2023-01-24 02:29:30.445420: step: 406/463, loss: 0.017812224105000496 2023-01-24 02:29:31.091826: step: 408/463, loss: 0.0754508301615715 2023-01-24 02:29:31.706298: step: 410/463, loss: 0.025097239762544632 2023-01-24 02:29:32.288662: step: 412/463, loss: 0.05229254439473152 2023-01-24 02:29:32.991683: step: 414/463, loss: 0.031025778502225876 2023-01-24 02:29:33.581500: step: 416/463, loss: 0.5713721513748169 2023-01-24 02:29:34.169732: step: 418/463, loss: 0.3474188446998596 2023-01-24 02:29:34.816449: step: 420/463, loss: 0.076308973133564 2023-01-24 02:29:35.507700: step: 422/463, loss: 0.2900967597961426 2023-01-24 02:29:36.143181: step: 424/463, loss: 0.01907758042216301 2023-01-24 02:29:36.833462: step: 426/463, loss: 0.019886748865246773 2023-01-24 02:29:37.435901: step: 428/463, loss: 0.07937715202569962 2023-01-24 02:29:38.020708: step: 430/463, loss: 0.040851034224033356 2023-01-24 02:29:38.612160: step: 432/463, loss: 0.020795831456780434 2023-01-24 02:29:39.206518: step: 434/463, loss: 0.21069321036338806 2023-01-24 02:29:39.874633: step: 436/463, loss: 0.1839502602815628 2023-01-24 02:29:40.510934: step: 438/463, loss: 0.04914311692118645 2023-01-24 02:29:41.112022: step: 440/463, loss: 0.006894220598042011 2023-01-24 02:29:41.755417: step: 442/463, loss: 0.7006751298904419 2023-01-24 02:29:42.378007: step: 444/463, loss: 0.018299002200365067 2023-01-24 02:29:43.036114: step: 446/463, loss: 0.010620677843689919 2023-01-24 02:29:43.609470: step: 448/463, loss: 0.018639035522937775 2023-01-24 02:29:44.234361: step: 450/463, loss: 0.002787784906104207 2023-01-24 02:29:44.876271: step: 452/463, loss: 0.030481547117233276 2023-01-24 02:29:45.433739: step: 454/463, loss: 0.15855535864830017 2023-01-24 02:29:46.030329: step: 456/463, loss: 0.10882546007633209 2023-01-24 02:29:46.624094: step: 458/463, loss: 0.13556592166423798 2023-01-24 02:29:47.250590: step: 460/463, loss: 0.013005250133574009 2023-01-24 02:29:47.839699: step: 462/463, loss: 0.1138322651386261 2023-01-24 02:29:48.453656: step: 464/463, loss: 0.06282660365104675 2023-01-24 02:29:49.075666: step: 466/463, loss: 0.03192172199487686 2023-01-24 02:29:49.684043: step: 468/463, loss: 0.015465976670384407 2023-01-24 02:29:50.275971: step: 470/463, loss: 0.04728606715798378 2023-01-24 02:29:50.927712: step: 472/463, loss: 0.023267779499292374 2023-01-24 02:29:51.515960: step: 474/463, loss: 0.09839379042387009 2023-01-24 02:29:52.117563: step: 476/463, loss: 0.03243930637836456 2023-01-24 02:29:52.748505: step: 478/463, loss: 0.24113982915878296 2023-01-24 02:29:53.357271: step: 480/463, loss: 0.02225543186068535 2023-01-24 02:29:53.899817: step: 482/463, loss: 0.022931937128305435 2023-01-24 02:29:54.480358: step: 484/463, loss: 0.09219290316104889 2023-01-24 02:29:55.139855: step: 486/463, loss: 0.04385121166706085 2023-01-24 02:29:55.738616: step: 488/463, loss: 0.5618979334831238 2023-01-24 02:29:56.388150: step: 490/463, loss: 0.02479241043329239 2023-01-24 02:29:56.986197: step: 492/463, loss: 0.12881645560264587 2023-01-24 02:29:57.602719: step: 494/463, loss: 0.006323047913610935 2023-01-24 02:29:58.192991: step: 496/463, loss: 0.004740823991596699 2023-01-24 02:29:58.765059: step: 498/463, loss: 0.13583901524543762 2023-01-24 02:29:59.352672: step: 500/463, loss: 0.10019021481275558 2023-01-24 02:29:59.956300: step: 502/463, loss: 0.07118593901395798 2023-01-24 02:30:00.563567: step: 504/463, loss: 0.17805695533752441 2023-01-24 02:30:01.173028: step: 506/463, loss: 0.04027596116065979 2023-01-24 02:30:01.822018: step: 508/463, loss: 0.03283923491835594 2023-01-24 02:30:02.447984: step: 510/463, loss: 0.02354482188820839 2023-01-24 02:30:03.103047: step: 512/463, loss: 0.035854145884513855 2023-01-24 02:30:03.721785: step: 514/463, loss: 0.11175724118947983 2023-01-24 02:30:04.370280: step: 516/463, loss: 0.028801606968045235 2023-01-24 02:30:04.987526: step: 518/463, loss: 0.022815560922026634 2023-01-24 02:30:05.539526: step: 520/463, loss: 0.3163912296295166 2023-01-24 02:30:06.141874: step: 522/463, loss: 0.0188890527933836 2023-01-24 02:30:06.730921: step: 524/463, loss: 0.0735621452331543 2023-01-24 02:30:07.344438: step: 526/463, loss: 0.0456823967397213 2023-01-24 02:30:07.904400: step: 528/463, loss: 0.05895118787884712 2023-01-24 02:30:08.559936: step: 530/463, loss: 0.06408503651618958 2023-01-24 02:30:09.180075: step: 532/463, loss: 0.029579926282167435 2023-01-24 02:30:09.819731: step: 534/463, loss: 0.05187416821718216 2023-01-24 02:30:10.450273: step: 536/463, loss: 0.043843403458595276 2023-01-24 02:30:11.106567: step: 538/463, loss: 0.043029166758060455 2023-01-24 02:30:11.765757: step: 540/463, loss: 0.1379927098751068 2023-01-24 02:30:12.406225: step: 542/463, loss: 0.031104056164622307 2023-01-24 02:30:13.029150: step: 544/463, loss: 0.047198131680488586 2023-01-24 02:30:13.664901: step: 546/463, loss: 0.05940878763794899 2023-01-24 02:30:14.313365: step: 548/463, loss: 0.0956902876496315 2023-01-24 02:30:14.952140: step: 550/463, loss: 0.01451434288173914 2023-01-24 02:30:15.547498: step: 552/463, loss: 0.08825964480638504 2023-01-24 02:30:16.186197: step: 554/463, loss: 0.07416350394487381 2023-01-24 02:30:16.756754: step: 556/463, loss: 0.12103522568941116 2023-01-24 02:30:17.474604: step: 558/463, loss: 0.042535293847322464 2023-01-24 02:30:18.077420: step: 560/463, loss: 0.00959598459303379 2023-01-24 02:30:18.723877: step: 562/463, loss: 0.05251099914312363 2023-01-24 02:30:19.383672: step: 564/463, loss: 0.0059568071737885475 2023-01-24 02:30:19.982351: step: 566/463, loss: 0.054183561354875565 2023-01-24 02:30:20.633180: step: 568/463, loss: 0.07713527977466583 2023-01-24 02:30:21.227959: step: 570/463, loss: 0.06466488540172577 2023-01-24 02:30:21.809243: step: 572/463, loss: 0.0050270589999854565 2023-01-24 02:30:22.459519: step: 574/463, loss: 0.0593777559697628 2023-01-24 02:30:23.177738: step: 576/463, loss: 0.0652415007352829 2023-01-24 02:30:23.880993: step: 578/463, loss: 0.10233879089355469 2023-01-24 02:30:24.451755: step: 580/463, loss: 0.054444149136543274 2023-01-24 02:30:25.087402: step: 582/463, loss: 0.41499340534210205 2023-01-24 02:30:25.710536: step: 584/463, loss: 0.03818292170763016 2023-01-24 02:30:26.338646: step: 586/463, loss: 0.042066749185323715 2023-01-24 02:30:27.085641: step: 588/463, loss: 0.040170393884181976 2023-01-24 02:30:27.681017: step: 590/463, loss: 0.0734221488237381 2023-01-24 02:30:28.229067: step: 592/463, loss: 0.05708552896976471 2023-01-24 02:30:28.920271: step: 594/463, loss: 0.14176659286022186 2023-01-24 02:30:29.573463: step: 596/463, loss: 0.12767548859119415 2023-01-24 02:30:30.155235: step: 598/463, loss: 0.05253032594919205 2023-01-24 02:30:30.818974: step: 600/463, loss: 0.028156928718090057 2023-01-24 02:30:31.421568: step: 602/463, loss: 0.216679185628891 2023-01-24 02:30:32.031208: step: 604/463, loss: 0.08172070235013962 2023-01-24 02:30:32.695120: step: 606/463, loss: 0.0089958431199193 2023-01-24 02:30:33.247839: step: 608/463, loss: 0.0861603319644928 2023-01-24 02:30:33.913268: step: 610/463, loss: 0.015388678759336472 2023-01-24 02:30:34.570101: step: 612/463, loss: 1.600328803062439 2023-01-24 02:30:35.216202: step: 614/463, loss: 0.06564446538686752 2023-01-24 02:30:35.863788: step: 616/463, loss: 0.09172430634498596 2023-01-24 02:30:36.446810: step: 618/463, loss: 0.017240114510059357 2023-01-24 02:30:37.065402: step: 620/463, loss: 0.048070311546325684 2023-01-24 02:30:37.786584: step: 622/463, loss: 0.0269217137247324 2023-01-24 02:30:38.384185: step: 624/463, loss: 0.04842456802725792 2023-01-24 02:30:38.965219: step: 626/463, loss: 0.015573249198496342 2023-01-24 02:30:39.586948: step: 628/463, loss: 0.07935141772031784 2023-01-24 02:30:40.222662: step: 630/463, loss: 0.06779279559850693 2023-01-24 02:30:40.846172: step: 632/463, loss: 0.09676484763622284 2023-01-24 02:30:41.453122: step: 634/463, loss: 0.07864780724048615 2023-01-24 02:30:42.126763: step: 636/463, loss: 0.018157804384827614 2023-01-24 02:30:42.701721: step: 638/463, loss: 0.05776529759168625 2023-01-24 02:30:43.271576: step: 640/463, loss: 0.10632098466157913 2023-01-24 02:30:43.881635: step: 642/463, loss: 0.06286674737930298 2023-01-24 02:30:44.492468: step: 644/463, loss: 0.23010165989398956 2023-01-24 02:30:45.127421: step: 646/463, loss: 0.06769032031297684 2023-01-24 02:30:45.740523: step: 648/463, loss: 0.1818058341741562 2023-01-24 02:30:46.457186: step: 650/463, loss: 0.08866716176271439 2023-01-24 02:30:47.037557: step: 652/463, loss: 0.017704658210277557 2023-01-24 02:30:47.602256: step: 654/463, loss: 0.001529622240923345 2023-01-24 02:30:48.123134: step: 656/463, loss: 0.061790235340595245 2023-01-24 02:30:48.784956: step: 658/463, loss: 0.17239807546138763 2023-01-24 02:30:49.429318: step: 660/463, loss: 0.09937804192304611 2023-01-24 02:30:50.027117: step: 662/463, loss: 0.15803074836730957 2023-01-24 02:30:50.635030: step: 664/463, loss: 0.04156778007745743 2023-01-24 02:30:51.258584: step: 666/463, loss: 0.031508464366197586 2023-01-24 02:30:51.926000: step: 668/463, loss: 0.02762809582054615 2023-01-24 02:30:52.534005: step: 670/463, loss: 0.00946555845439434 2023-01-24 02:30:53.116097: step: 672/463, loss: 0.1350875049829483 2023-01-24 02:30:53.701681: step: 674/463, loss: 0.0069588604383170605 2023-01-24 02:30:54.312042: step: 676/463, loss: 0.07308116555213928 2023-01-24 02:30:54.850699: step: 678/463, loss: 0.006525191944092512 2023-01-24 02:30:55.407771: step: 680/463, loss: 0.8197893500328064 2023-01-24 02:30:56.018000: step: 682/463, loss: 0.023802677169442177 2023-01-24 02:30:56.636187: step: 684/463, loss: 1.0044938325881958 2023-01-24 02:30:57.304814: step: 686/463, loss: 0.051745470613241196 2023-01-24 02:30:57.944329: step: 688/463, loss: 0.041098348796367645 2023-01-24 02:30:58.459406: step: 690/463, loss: 0.024875445291399956 2023-01-24 02:30:59.042643: step: 692/463, loss: 0.042112741619348526 2023-01-24 02:30:59.656549: step: 694/463, loss: 0.035496532917022705 2023-01-24 02:31:00.263479: step: 696/463, loss: 0.03009173460304737 2023-01-24 02:31:00.836541: step: 698/463, loss: 0.06345498561859131 2023-01-24 02:31:01.461133: step: 700/463, loss: 0.06617318093776703 2023-01-24 02:31:02.094736: step: 702/463, loss: 0.05781959742307663 2023-01-24 02:31:02.687432: step: 704/463, loss: 0.29335883259773254 2023-01-24 02:31:03.282672: step: 706/463, loss: 0.026401255279779434 2023-01-24 02:31:03.897317: step: 708/463, loss: 0.06644775718450546 2023-01-24 02:31:04.505126: step: 710/463, loss: 0.015934163704514503 2023-01-24 02:31:05.140045: step: 712/463, loss: 0.03380304574966431 2023-01-24 02:31:05.736934: step: 714/463, loss: 0.15150348842144012 2023-01-24 02:31:06.378779: step: 716/463, loss: 0.026380375027656555 2023-01-24 02:31:06.931004: step: 718/463, loss: 0.09296206384897232 2023-01-24 02:31:07.548814: step: 720/463, loss: 0.12324699014425278 2023-01-24 02:31:08.196261: step: 722/463, loss: 0.0270185898989439 2023-01-24 02:31:08.859154: step: 724/463, loss: 0.12142333388328552 2023-01-24 02:31:09.495518: step: 726/463, loss: 0.03840349242091179 2023-01-24 02:31:10.119878: step: 728/463, loss: 0.0963706523180008 2023-01-24 02:31:10.752844: step: 730/463, loss: 0.180616095662117 2023-01-24 02:31:11.376061: step: 732/463, loss: 0.027679789811372757 2023-01-24 02:31:12.020627: step: 734/463, loss: 0.10157615691423416 2023-01-24 02:31:12.653712: step: 736/463, loss: 0.13992363214492798 2023-01-24 02:31:13.194517: step: 738/463, loss: 0.10379081964492798 2023-01-24 02:31:13.703707: step: 740/463, loss: 0.32321059703826904 2023-01-24 02:31:14.288943: step: 742/463, loss: 0.0481429360806942 2023-01-24 02:31:14.908606: step: 744/463, loss: 0.27094048261642456 2023-01-24 02:31:15.527813: step: 746/463, loss: 0.061478398740291595 2023-01-24 02:31:16.141956: step: 748/463, loss: 0.11156271398067474 2023-01-24 02:31:16.787688: step: 750/463, loss: 0.13841915130615234 2023-01-24 02:31:17.475257: step: 752/463, loss: 0.06979890912771225 2023-01-24 02:31:18.116508: step: 754/463, loss: 0.0640893206000328 2023-01-24 02:31:18.779697: step: 756/463, loss: 0.10758936405181885 2023-01-24 02:31:19.401752: step: 758/463, loss: 0.41406506299972534 2023-01-24 02:31:20.041761: step: 760/463, loss: 0.07428723573684692 2023-01-24 02:31:20.639923: step: 762/463, loss: 0.10503371804952621 2023-01-24 02:31:21.250360: step: 764/463, loss: 0.029797162860631943 2023-01-24 02:31:21.853352: step: 766/463, loss: 0.05734733119606972 2023-01-24 02:31:22.494057: step: 768/463, loss: 0.054886799305677414 2023-01-24 02:31:23.205202: step: 770/463, loss: 0.03895945847034454 2023-01-24 02:31:23.835436: step: 772/463, loss: 0.10972396284341812 2023-01-24 02:31:24.389690: step: 774/463, loss: 0.03161252662539482 2023-01-24 02:31:25.020958: step: 776/463, loss: 0.03737922012805939 2023-01-24 02:31:25.569581: step: 778/463, loss: 0.08636228740215302 2023-01-24 02:31:26.199112: step: 780/463, loss: 0.08123937249183655 2023-01-24 02:31:26.839583: step: 782/463, loss: 0.005904227960854769 2023-01-24 02:31:27.457282: step: 784/463, loss: 0.023396696895360947 2023-01-24 02:31:28.025489: step: 786/463, loss: 0.020909195765852928 2023-01-24 02:31:28.613382: step: 788/463, loss: 0.010047647170722485 2023-01-24 02:31:29.223797: step: 790/463, loss: 0.040804263204336166 2023-01-24 02:31:29.864003: step: 792/463, loss: 0.06735529005527496 2023-01-24 02:31:30.453725: step: 794/463, loss: 0.029419753700494766 2023-01-24 02:31:31.077920: step: 796/463, loss: 0.03022507205605507 2023-01-24 02:31:31.729178: step: 798/463, loss: 0.13375067710876465 2023-01-24 02:31:32.376554: step: 800/463, loss: 0.006585591472685337 2023-01-24 02:31:32.982677: step: 802/463, loss: 0.03674134984612465 2023-01-24 02:31:33.600718: step: 804/463, loss: 0.04075662046670914 2023-01-24 02:31:34.191749: step: 806/463, loss: 0.0020204675383865833 2023-01-24 02:31:34.810789: step: 808/463, loss: 0.029373684898018837 2023-01-24 02:31:35.368385: step: 810/463, loss: 0.017697075381875038 2023-01-24 02:31:35.962524: step: 812/463, loss: 0.021982727572321892 2023-01-24 02:31:36.613581: step: 814/463, loss: 0.040190767496824265 2023-01-24 02:31:37.231453: step: 816/463, loss: 0.1584051251411438 2023-01-24 02:31:37.763246: step: 818/463, loss: 0.007187770213931799 2023-01-24 02:31:38.434484: step: 820/463, loss: 0.06534581631422043 2023-01-24 02:31:39.084119: step: 822/463, loss: 0.07006745040416718 2023-01-24 02:31:39.730065: step: 824/463, loss: 0.055165085941553116 2023-01-24 02:31:40.381149: step: 826/463, loss: 0.05553819239139557 2023-01-24 02:31:41.050071: step: 828/463, loss: 0.05101989582180977 2023-01-24 02:31:41.691912: step: 830/463, loss: 0.012289288453757763 2023-01-24 02:31:42.380909: step: 832/463, loss: 0.04550894349813461 2023-01-24 02:31:42.974827: step: 834/463, loss: 1.0833523273468018 2023-01-24 02:31:43.575489: step: 836/463, loss: 0.012613041326403618 2023-01-24 02:31:44.252949: step: 838/463, loss: 0.08594474196434021 2023-01-24 02:31:44.882227: step: 840/463, loss: 0.006145032122731209 2023-01-24 02:31:45.438956: step: 842/463, loss: 0.02410002052783966 2023-01-24 02:31:46.062492: step: 844/463, loss: 0.08066446334123611 2023-01-24 02:31:46.696317: step: 846/463, loss: 0.048206426203250885 2023-01-24 02:31:47.315523: step: 848/463, loss: 0.0734802708029747 2023-01-24 02:31:47.943838: step: 850/463, loss: 0.41602522134780884 2023-01-24 02:31:48.581567: step: 852/463, loss: 0.028285112231969833 2023-01-24 02:31:49.185944: step: 854/463, loss: 0.5295729041099548 2023-01-24 02:31:49.855659: step: 856/463, loss: 0.022632591426372528 2023-01-24 02:31:50.537838: step: 858/463, loss: 0.052682146430015564 2023-01-24 02:31:51.329135: step: 860/463, loss: 0.2440948784351349 2023-01-24 02:31:51.954298: step: 862/463, loss: 0.005891554988920689 2023-01-24 02:31:52.599907: step: 864/463, loss: 0.23199871182441711 2023-01-24 02:31:53.184731: step: 866/463, loss: 0.04123353585600853 2023-01-24 02:31:53.865645: step: 868/463, loss: 0.04443017765879631 2023-01-24 02:31:54.437155: step: 870/463, loss: 0.00010082490189233795 2023-01-24 02:31:55.081532: step: 872/463, loss: 0.027467235922813416 2023-01-24 02:31:55.696685: step: 874/463, loss: 0.011563832871615887 2023-01-24 02:31:56.297885: step: 876/463, loss: 0.14248572289943695 2023-01-24 02:31:56.867540: step: 878/463, loss: 0.10942830890417099 2023-01-24 02:31:57.482086: step: 880/463, loss: 0.06527270376682281 2023-01-24 02:31:58.138233: step: 882/463, loss: 0.014301876537501812 2023-01-24 02:31:58.836976: step: 884/463, loss: 0.045024458318948746 2023-01-24 02:31:59.483275: step: 886/463, loss: 0.15976426005363464 2023-01-24 02:32:00.065758: step: 888/463, loss: 0.03387608006596565 2023-01-24 02:32:00.672711: step: 890/463, loss: 0.1103304773569107 2023-01-24 02:32:01.267026: step: 892/463, loss: 0.08368083089590073 2023-01-24 02:32:01.877999: step: 894/463, loss: 0.04028048366308212 2023-01-24 02:32:02.497847: step: 896/463, loss: 0.12643633782863617 2023-01-24 02:32:03.069118: step: 898/463, loss: 0.06794684380292892 2023-01-24 02:32:03.660993: step: 900/463, loss: 0.06601326912641525 2023-01-24 02:32:04.222576: step: 902/463, loss: 0.06228647381067276 2023-01-24 02:32:04.784501: step: 904/463, loss: 0.07344299554824829 2023-01-24 02:32:05.338501: step: 906/463, loss: 0.051804061979055405 2023-01-24 02:32:05.937175: step: 908/463, loss: 0.07039500772953033 2023-01-24 02:32:06.552295: step: 910/463, loss: 0.060677725821733475 2023-01-24 02:32:07.127446: step: 912/463, loss: 0.08781065791845322 2023-01-24 02:32:07.694168: step: 914/463, loss: 0.004770986270159483 2023-01-24 02:32:08.293164: step: 916/463, loss: 0.0813886970281601 2023-01-24 02:32:08.952537: step: 918/463, loss: 0.02861078456044197 2023-01-24 02:32:09.572451: step: 920/463, loss: 0.03386520966887474 2023-01-24 02:32:10.208316: step: 922/463, loss: 0.043594326823949814 2023-01-24 02:32:10.826416: step: 924/463, loss: 0.06822507828474045 2023-01-24 02:32:11.462798: step: 926/463, loss: 0.07693836092948914 ================================================== Loss: 0.111 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34951213080168775, 'r': 0.314361954459203, 'f1': 0.33100649350649347}, 'combined': 0.24389952153110045, 'epoch': 20} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34893632029761135, 'r': 0.37908339208802994, 'f1': 0.36338566571962694}, 'combined': 0.28166735811760557, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33023681640625, 'r': 0.32083728652751425, 'f1': 0.3254692011549567}, 'combined': 0.2398194113773365, 'epoch': 20} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3364888190997896, 'r': 0.3875188330257687, 'f1': 0.36020545948913835}, 'combined': 0.2792023178815331, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33432160702772407, 'r': 0.32797774351676157, 'f1': 0.33111929278416347}, 'combined': 0.24398263678833096, 'epoch': 20} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34009457238529556, 'r': 0.37697982931678903, 'f1': 0.3575885390555069}, 'combined': 0.27717389151670874, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31048387096774194, 'r': 0.275, 'f1': 0.2916666666666667}, 'combined': 0.19444444444444445, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29, 'r': 0.31521739130434784, 'f1': 0.3020833333333333}, 'combined': 0.15104166666666666, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4791666666666667, 'r': 0.19827586206896552, 'f1': 0.28048780487804875}, 'combined': 0.18699186991869915, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:34:47.173859: step: 2/463, loss: 0.10842946171760559 2023-01-24 02:34:47.858553: step: 4/463, loss: 0.32674673199653625 2023-01-24 02:34:48.489244: step: 6/463, loss: 0.1941383183002472 2023-01-24 02:34:49.175806: step: 8/463, loss: 0.14849162101745605 2023-01-24 02:34:49.779279: step: 10/463, loss: 0.0326245054602623 2023-01-24 02:34:50.424489: step: 12/463, loss: 0.03208509460091591 2023-01-24 02:34:51.053002: step: 14/463, loss: 0.04421968013048172 2023-01-24 02:34:51.595164: step: 16/463, loss: 0.011603240855038166 2023-01-24 02:34:52.205118: step: 18/463, loss: 0.5193854570388794 2023-01-24 02:34:52.761567: step: 20/463, loss: 0.13878828287124634 2023-01-24 02:34:53.360479: step: 22/463, loss: 0.04849619045853615 2023-01-24 02:34:53.961984: step: 24/463, loss: 0.03346783295273781 2023-01-24 02:34:54.600176: step: 26/463, loss: 0.018788419663906097 2023-01-24 02:34:55.198592: step: 28/463, loss: 0.011331932619214058 2023-01-24 02:34:55.818313: step: 30/463, loss: 0.015411270782351494 2023-01-24 02:34:56.449284: step: 32/463, loss: 0.11832278221845627 2023-01-24 02:34:57.020967: step: 34/463, loss: 0.006202243268489838 2023-01-24 02:34:57.594623: step: 36/463, loss: 0.1826816201210022 2023-01-24 02:34:58.188592: step: 38/463, loss: 0.025419119745492935 2023-01-24 02:34:58.786035: step: 40/463, loss: 0.030588358640670776 2023-01-24 02:34:59.381987: step: 42/463, loss: 0.05161495879292488 2023-01-24 02:35:00.025642: step: 44/463, loss: 0.03308122605085373 2023-01-24 02:35:00.648428: step: 46/463, loss: 0.026122014969587326 2023-01-24 02:35:01.282094: step: 48/463, loss: 0.013910619542002678 2023-01-24 02:35:01.966510: step: 50/463, loss: 1.746422529220581 2023-01-24 02:35:02.579824: step: 52/463, loss: 0.09099697321653366 2023-01-24 02:35:03.218604: step: 54/463, loss: 0.12887287139892578 2023-01-24 02:35:03.854813: step: 56/463, loss: 0.02142437919974327 2023-01-24 02:35:04.434352: step: 58/463, loss: 0.013874801807105541 2023-01-24 02:35:05.073901: step: 60/463, loss: 0.03262742981314659 2023-01-24 02:35:05.675531: step: 62/463, loss: 0.24533799290657043 2023-01-24 02:35:06.440814: step: 64/463, loss: 0.02403237856924534 2023-01-24 02:35:07.029612: step: 66/463, loss: 0.0285759586840868 2023-01-24 02:35:07.683638: step: 68/463, loss: 0.20021891593933105 2023-01-24 02:35:08.288071: step: 70/463, loss: 0.02089514583349228 2023-01-24 02:35:08.887601: step: 72/463, loss: 0.19935274124145508 2023-01-24 02:35:09.508630: step: 74/463, loss: 0.014258578419685364 2023-01-24 02:35:10.142230: step: 76/463, loss: 0.009998084045946598 2023-01-24 02:35:10.756736: step: 78/463, loss: 0.010598234832286835 2023-01-24 02:35:11.407043: step: 80/463, loss: 0.3295132517814636 2023-01-24 02:35:12.042091: step: 82/463, loss: 0.06613471359014511 2023-01-24 02:35:12.669971: step: 84/463, loss: 0.05376961827278137 2023-01-24 02:35:13.356426: step: 86/463, loss: 0.17067649960517883 2023-01-24 02:35:14.026122: step: 88/463, loss: 0.010756397619843483 2023-01-24 02:35:14.732197: step: 90/463, loss: 0.06448723375797272 2023-01-24 02:35:15.377044: step: 92/463, loss: 0.08899065107107162 2023-01-24 02:35:16.026225: step: 94/463, loss: 0.010376730002462864 2023-01-24 02:35:16.609123: step: 96/463, loss: 0.0562828928232193 2023-01-24 02:35:17.225813: step: 98/463, loss: 0.07136587053537369 2023-01-24 02:35:17.818207: step: 100/463, loss: 0.025876399129629135 2023-01-24 02:35:18.455670: step: 102/463, loss: 0.04205651208758354 2023-01-24 02:35:19.056654: step: 104/463, loss: 0.0895906314253807 2023-01-24 02:35:19.637247: step: 106/463, loss: 0.4598084092140198 2023-01-24 02:35:20.274489: step: 108/463, loss: 0.021469302475452423 2023-01-24 02:35:20.906789: step: 110/463, loss: 0.12939992547035217 2023-01-24 02:35:21.493507: step: 112/463, loss: 0.026458879932761192 2023-01-24 02:35:22.099078: step: 114/463, loss: 0.03705986589193344 2023-01-24 02:35:22.722332: step: 116/463, loss: 0.07023543119430542 2023-01-24 02:35:23.274949: step: 118/463, loss: 0.0032180838752537966 2023-01-24 02:35:23.896016: step: 120/463, loss: 0.052397649735212326 2023-01-24 02:35:24.606171: step: 122/463, loss: 0.022886991500854492 2023-01-24 02:35:25.228625: step: 124/463, loss: 0.1497543901205063 2023-01-24 02:35:25.862487: step: 126/463, loss: 0.17021460831165314 2023-01-24 02:35:26.471501: step: 128/463, loss: 0.036867544054985046 2023-01-24 02:35:27.058469: step: 130/463, loss: 0.38709887862205505 2023-01-24 02:35:27.672054: step: 132/463, loss: 0.0445268377661705 2023-01-24 02:35:28.297647: step: 134/463, loss: 0.05580996349453926 2023-01-24 02:35:28.916338: step: 136/463, loss: 0.02718171663582325 2023-01-24 02:35:29.457741: step: 138/463, loss: 0.037013012915849686 2023-01-24 02:35:30.035736: step: 140/463, loss: 0.07311011105775833 2023-01-24 02:35:30.701273: step: 142/463, loss: 0.03917272388935089 2023-01-24 02:35:31.302217: step: 144/463, loss: 0.017371615394949913 2023-01-24 02:35:31.917992: step: 146/463, loss: 0.36177748441696167 2023-01-24 02:35:32.504937: step: 148/463, loss: 0.03513642027974129 2023-01-24 02:35:33.166205: step: 150/463, loss: 0.041847191751003265 2023-01-24 02:35:33.785213: step: 152/463, loss: 0.2530669569969177 2023-01-24 02:35:34.397323: step: 154/463, loss: 0.05493073910474777 2023-01-24 02:35:35.003934: step: 156/463, loss: 10.177796363830566 2023-01-24 02:35:35.623887: step: 158/463, loss: 0.06300440430641174 2023-01-24 02:35:36.206471: step: 160/463, loss: 0.0221159178763628 2023-01-24 02:35:36.785073: step: 162/463, loss: 0.04124729707837105 2023-01-24 02:35:37.327436: step: 164/463, loss: 0.02808583527803421 2023-01-24 02:35:37.851433: step: 166/463, loss: 0.008351719006896019 2023-01-24 02:35:38.465609: step: 168/463, loss: 0.07916621118783951 2023-01-24 02:35:39.127535: step: 170/463, loss: 0.08258962631225586 2023-01-24 02:35:39.854273: step: 172/463, loss: 0.03964458405971527 2023-01-24 02:35:40.485063: step: 174/463, loss: 0.022951669991016388 2023-01-24 02:35:41.145618: step: 176/463, loss: 0.029361989349126816 2023-01-24 02:35:41.776861: step: 178/463, loss: 0.35478201508522034 2023-01-24 02:35:42.355939: step: 180/463, loss: 0.02392745018005371 2023-01-24 02:35:42.973788: step: 182/463, loss: 0.0789598599076271 2023-01-24 02:35:43.563292: step: 184/463, loss: 0.019819023087620735 2023-01-24 02:35:44.233143: step: 186/463, loss: 0.02444101870059967 2023-01-24 02:35:44.785744: step: 188/463, loss: 0.002599931787699461 2023-01-24 02:35:45.488359: step: 190/463, loss: 0.1535947620868683 2023-01-24 02:35:46.119194: step: 192/463, loss: 0.24095189571380615 2023-01-24 02:35:46.680999: step: 194/463, loss: 0.019466117024421692 2023-01-24 02:35:47.335270: step: 196/463, loss: 0.11408529430627823 2023-01-24 02:35:47.900228: step: 198/463, loss: 0.05276991054415703 2023-01-24 02:35:48.500898: step: 200/463, loss: 0.012610968202352524 2023-01-24 02:35:49.144216: step: 202/463, loss: 0.02691648341715336 2023-01-24 02:35:49.789305: step: 204/463, loss: 0.12883315980434418 2023-01-24 02:35:50.394426: step: 206/463, loss: 0.006722901947796345 2023-01-24 02:35:50.985038: step: 208/463, loss: 0.006243908312171698 2023-01-24 02:35:51.554119: step: 210/463, loss: 0.027313238009810448 2023-01-24 02:35:52.206666: step: 212/463, loss: 0.04436987265944481 2023-01-24 02:35:52.853433: step: 214/463, loss: 0.663070559501648 2023-01-24 02:35:53.578393: step: 216/463, loss: 0.003890047548338771 2023-01-24 02:35:54.200325: step: 218/463, loss: 0.26263242959976196 2023-01-24 02:35:54.792557: step: 220/463, loss: 3.3298776149749756 2023-01-24 02:35:55.406717: step: 222/463, loss: 0.03333837166428566 2023-01-24 02:35:56.042313: step: 224/463, loss: 0.03925684466958046 2023-01-24 02:35:56.646896: step: 226/463, loss: 0.0636269748210907 2023-01-24 02:35:57.212011: step: 228/463, loss: 0.04158621281385422 2023-01-24 02:35:57.786014: step: 230/463, loss: 0.09085356444120407 2023-01-24 02:35:58.469772: step: 232/463, loss: 0.06672497093677521 2023-01-24 02:35:59.091678: step: 234/463, loss: 0.02141600102186203 2023-01-24 02:35:59.741429: step: 236/463, loss: 0.04261079058051109 2023-01-24 02:36:00.327123: step: 238/463, loss: 0.004412642680108547 2023-01-24 02:36:00.971599: step: 240/463, loss: 0.02278420701622963 2023-01-24 02:36:01.628742: step: 242/463, loss: 0.18767501413822174 2023-01-24 02:36:02.253646: step: 244/463, loss: 0.04235748201608658 2023-01-24 02:36:02.935934: step: 246/463, loss: 0.03558219596743584 2023-01-24 02:36:03.512420: step: 248/463, loss: 0.10046972334384918 2023-01-24 02:36:04.134825: step: 250/463, loss: 0.1349455863237381 2023-01-24 02:36:04.754854: step: 252/463, loss: 0.07888194173574448 2023-01-24 02:36:05.408609: step: 254/463, loss: 0.021533489227294922 2023-01-24 02:36:05.960569: step: 256/463, loss: 0.003026509890332818 2023-01-24 02:36:06.534792: step: 258/463, loss: 0.02129051648080349 2023-01-24 02:36:07.081116: step: 260/463, loss: 0.5292913913726807 2023-01-24 02:36:07.700576: step: 262/463, loss: 0.05269792675971985 2023-01-24 02:36:08.307505: step: 264/463, loss: 0.07120180130004883 2023-01-24 02:36:08.904082: step: 266/463, loss: 0.18456529080867767 2023-01-24 02:36:09.672599: step: 268/463, loss: 0.029535286128520966 2023-01-24 02:36:10.354886: step: 270/463, loss: 0.011946989223361015 2023-01-24 02:36:11.001989: step: 272/463, loss: 0.034383513033390045 2023-01-24 02:36:11.699700: step: 274/463, loss: 0.21425393223762512 2023-01-24 02:36:12.287958: step: 276/463, loss: 0.10387646406888962 2023-01-24 02:36:12.949539: step: 278/463, loss: 0.03382065147161484 2023-01-24 02:36:13.625310: step: 280/463, loss: 0.09736025333404541 2023-01-24 02:36:14.271831: step: 282/463, loss: 0.26629090309143066 2023-01-24 02:36:14.821320: step: 284/463, loss: 0.02002924680709839 2023-01-24 02:36:15.429573: step: 286/463, loss: 0.07871459424495697 2023-01-24 02:36:16.070354: step: 288/463, loss: 0.0659560114145279 2023-01-24 02:36:16.694473: step: 290/463, loss: 0.2082950621843338 2023-01-24 02:36:17.406128: step: 292/463, loss: 0.07427112013101578 2023-01-24 02:36:18.056448: step: 294/463, loss: 0.133941188454628 2023-01-24 02:36:18.644192: step: 296/463, loss: 0.41010451316833496 2023-01-24 02:36:19.250485: step: 298/463, loss: 0.5334764122962952 2023-01-24 02:36:19.860125: step: 300/463, loss: 0.08961945027112961 2023-01-24 02:36:20.604903: step: 302/463, loss: 0.03199724107980728 2023-01-24 02:36:21.169861: step: 304/463, loss: 0.011130590923130512 2023-01-24 02:36:21.869092: step: 306/463, loss: 0.025210335850715637 2023-01-24 02:36:22.556823: step: 308/463, loss: 0.013276624493300915 2023-01-24 02:36:23.186205: step: 310/463, loss: 0.09065524488687515 2023-01-24 02:36:23.831268: step: 312/463, loss: 0.14581745862960815 2023-01-24 02:36:24.459695: step: 314/463, loss: 0.0038363952189683914 2023-01-24 02:36:25.082968: step: 316/463, loss: 0.06042252480983734 2023-01-24 02:36:25.662383: step: 318/463, loss: 0.05053623020648956 2023-01-24 02:36:26.287195: step: 320/463, loss: 0.27989882230758667 2023-01-24 02:36:26.920653: step: 322/463, loss: 0.036740854382514954 2023-01-24 02:36:27.577643: step: 324/463, loss: 0.014833400957286358 2023-01-24 02:36:28.189411: step: 326/463, loss: 0.024892011657357216 2023-01-24 02:36:28.743547: step: 328/463, loss: 0.03105071187019348 2023-01-24 02:36:29.310349: step: 330/463, loss: 0.004904492292553186 2023-01-24 02:36:29.921854: step: 332/463, loss: 0.05492234230041504 2023-01-24 02:36:30.542914: step: 334/463, loss: 0.02544984593987465 2023-01-24 02:36:31.308840: step: 336/463, loss: 0.04948979243636131 2023-01-24 02:36:31.922540: step: 338/463, loss: 0.06087706610560417 2023-01-24 02:36:32.544722: step: 340/463, loss: 0.058417242020368576 2023-01-24 02:36:33.276889: step: 342/463, loss: 0.041188452392816544 2023-01-24 02:36:33.904072: step: 344/463, loss: 0.08050759881734848 2023-01-24 02:36:34.539823: step: 346/463, loss: 0.07650867104530334 2023-01-24 02:36:35.113176: step: 348/463, loss: 0.02573893405497074 2023-01-24 02:36:35.748461: step: 350/463, loss: 0.05392170324921608 2023-01-24 02:36:36.338100: step: 352/463, loss: 0.05369649454951286 2023-01-24 02:36:36.930343: step: 354/463, loss: 0.01662009209394455 2023-01-24 02:36:37.616891: step: 356/463, loss: 0.17258664965629578 2023-01-24 02:36:38.310116: step: 358/463, loss: 0.03924667462706566 2023-01-24 02:36:38.943978: step: 360/463, loss: 0.06593135744333267 2023-01-24 02:36:39.624640: step: 362/463, loss: 0.08732613176107407 2023-01-24 02:36:40.238085: step: 364/463, loss: 0.04834168031811714 2023-01-24 02:36:40.930843: step: 366/463, loss: 0.11759504675865173 2023-01-24 02:36:41.543803: step: 368/463, loss: 0.053821392357349396 2023-01-24 02:36:42.235351: step: 370/463, loss: 0.3536268174648285 2023-01-24 02:36:42.842203: step: 372/463, loss: 0.11019283533096313 2023-01-24 02:36:43.489002: step: 374/463, loss: 0.16470111906528473 2023-01-24 02:36:44.078380: step: 376/463, loss: 0.02005312591791153 2023-01-24 02:36:44.764456: step: 378/463, loss: 0.07273054867982864 2023-01-24 02:36:45.308568: step: 380/463, loss: 0.012648736126720905 2023-01-24 02:36:45.959513: step: 382/463, loss: 0.03662315383553505 2023-01-24 02:36:46.610484: step: 384/463, loss: 0.026740530505776405 2023-01-24 02:36:47.150179: step: 386/463, loss: 0.016627658158540726 2023-01-24 02:36:47.821780: step: 388/463, loss: 0.03042297065258026 2023-01-24 02:36:48.442208: step: 390/463, loss: 0.04296514019370079 2023-01-24 02:36:49.058349: step: 392/463, loss: 0.029243730008602142 2023-01-24 02:36:49.725868: step: 394/463, loss: 0.022035405039787292 2023-01-24 02:36:50.368001: step: 396/463, loss: 0.031127285212278366 2023-01-24 02:36:50.945257: step: 398/463, loss: 0.03823459520936012 2023-01-24 02:36:51.566931: step: 400/463, loss: 0.06997408717870712 2023-01-24 02:36:52.190904: step: 402/463, loss: 0.9979336857795715 2023-01-24 02:36:52.808778: step: 404/463, loss: 0.02430730313062668 2023-01-24 02:36:53.428031: step: 406/463, loss: 0.019080815836787224 2023-01-24 02:36:54.030957: step: 408/463, loss: 0.011614823713898659 2023-01-24 02:36:54.695372: step: 410/463, loss: 0.04508303105831146 2023-01-24 02:36:55.254364: step: 412/463, loss: 0.05451278015971184 2023-01-24 02:36:55.834833: step: 414/463, loss: 0.019044803455471992 2023-01-24 02:36:56.383575: step: 416/463, loss: 0.008757064118981361 2023-01-24 02:36:56.974008: step: 418/463, loss: 0.006078549660742283 2023-01-24 02:36:57.607504: step: 420/463, loss: 0.031054750084877014 2023-01-24 02:36:58.230742: step: 422/463, loss: 0.010163325816392899 2023-01-24 02:36:58.792082: step: 424/463, loss: 0.26389050483703613 2023-01-24 02:36:59.413252: step: 426/463, loss: 0.016442103311419487 2023-01-24 02:37:00.007174: step: 428/463, loss: 0.09762738645076752 2023-01-24 02:37:00.655156: step: 430/463, loss: 0.08909660577774048 2023-01-24 02:37:01.302610: step: 432/463, loss: 0.027438441291451454 2023-01-24 02:37:02.015460: step: 434/463, loss: 0.0046327016316354275 2023-01-24 02:37:02.672607: step: 436/463, loss: 0.10689191520214081 2023-01-24 02:37:03.305266: step: 438/463, loss: 0.061745233833789825 2023-01-24 02:37:03.871358: step: 440/463, loss: 0.017984019592404366 2023-01-24 02:37:04.472666: step: 442/463, loss: 0.010926088318228722 2023-01-24 02:37:05.109031: step: 444/463, loss: 0.09590668976306915 2023-01-24 02:37:05.673662: step: 446/463, loss: 0.01594839245080948 2023-01-24 02:37:06.325180: step: 448/463, loss: 0.04712144285440445 2023-01-24 02:37:06.926315: step: 450/463, loss: 0.024126214906573296 2023-01-24 02:37:07.616782: step: 452/463, loss: 0.08683865517377853 2023-01-24 02:37:08.270547: step: 454/463, loss: 0.00911947526037693 2023-01-24 02:37:08.917215: step: 456/463, loss: 0.019975975155830383 2023-01-24 02:37:09.500388: step: 458/463, loss: 0.07355379313230515 2023-01-24 02:37:10.144204: step: 460/463, loss: 0.04055371508002281 2023-01-24 02:37:10.776200: step: 462/463, loss: 0.07014424353837967 2023-01-24 02:37:11.445299: step: 464/463, loss: 0.46137484908103943 2023-01-24 02:37:12.046180: step: 466/463, loss: 0.04279604181647301 2023-01-24 02:37:12.693543: step: 468/463, loss: 0.02388113923370838 2023-01-24 02:37:13.289258: step: 470/463, loss: 0.07741028070449829 2023-01-24 02:37:13.880898: step: 472/463, loss: 0.04212034493684769 2023-01-24 02:37:14.505042: step: 474/463, loss: 0.033438291400671005 2023-01-24 02:37:15.145897: step: 476/463, loss: 0.011724998243153095 2023-01-24 02:37:15.757443: step: 478/463, loss: 0.038654226809740067 2023-01-24 02:37:16.385436: step: 480/463, loss: 0.04388156160712242 2023-01-24 02:37:17.039913: step: 482/463, loss: 0.028641149401664734 2023-01-24 02:37:17.656947: step: 484/463, loss: 0.14782431721687317 2023-01-24 02:37:18.272833: step: 486/463, loss: 0.14872324466705322 2023-01-24 02:37:18.891876: step: 488/463, loss: 0.19952279329299927 2023-01-24 02:37:19.460643: step: 490/463, loss: 0.05978836119174957 2023-01-24 02:37:20.046825: step: 492/463, loss: 0.007217899430543184 2023-01-24 02:37:20.628151: step: 494/463, loss: 0.011317849159240723 2023-01-24 02:37:21.262092: step: 496/463, loss: 0.05255355313420296 2023-01-24 02:37:21.887890: step: 498/463, loss: 0.04206959530711174 2023-01-24 02:37:22.450741: step: 500/463, loss: 0.08046004176139832 2023-01-24 02:37:23.094040: step: 502/463, loss: 0.27565500140190125 2023-01-24 02:37:23.763510: step: 504/463, loss: 0.0955238789319992 2023-01-24 02:37:24.337517: step: 506/463, loss: 0.22641132771968842 2023-01-24 02:37:24.929170: step: 508/463, loss: 0.03411635383963585 2023-01-24 02:37:25.594172: step: 510/463, loss: 0.010817231610417366 2023-01-24 02:37:26.193199: step: 512/463, loss: 0.09243965893983841 2023-01-24 02:37:26.787823: step: 514/463, loss: 0.0375051274895668 2023-01-24 02:37:27.520601: step: 516/463, loss: 0.10578754544258118 2023-01-24 02:37:28.170828: step: 518/463, loss: 0.031016208231449127 2023-01-24 02:37:28.822371: step: 520/463, loss: 0.037243012338876724 2023-01-24 02:37:29.422609: step: 522/463, loss: 0.044066064059734344 2023-01-24 02:37:30.047363: step: 524/463, loss: 0.013921495527029037 2023-01-24 02:37:30.673449: step: 526/463, loss: 0.14074905216693878 2023-01-24 02:37:31.386733: step: 528/463, loss: 0.02002963051199913 2023-01-24 02:37:31.962013: step: 530/463, loss: 0.038184359669685364 2023-01-24 02:37:32.553201: step: 532/463, loss: 0.01914183236658573 2023-01-24 02:37:33.140728: step: 534/463, loss: 0.09031576663255692 2023-01-24 02:37:33.708934: step: 536/463, loss: 0.06424997746944427 2023-01-24 02:37:34.309602: step: 538/463, loss: 0.046822234988212585 2023-01-24 02:37:34.898439: step: 540/463, loss: 0.19398356974124908 2023-01-24 02:37:35.482429: step: 542/463, loss: 0.005893049295991659 2023-01-24 02:37:36.120289: step: 544/463, loss: 0.02531193383038044 2023-01-24 02:37:36.766979: step: 546/463, loss: 0.0798172876238823 2023-01-24 02:37:37.312712: step: 548/463, loss: 0.008896934799849987 2023-01-24 02:37:37.953042: step: 550/463, loss: 0.04455339536070824 2023-01-24 02:37:38.541784: step: 552/463, loss: 1.1011356115341187 2023-01-24 02:37:39.122036: step: 554/463, loss: 0.0317896269261837 2023-01-24 02:37:39.723344: step: 556/463, loss: 0.05265568569302559 2023-01-24 02:37:40.331531: step: 558/463, loss: 0.11968901753425598 2023-01-24 02:37:40.908378: step: 560/463, loss: 0.026453066617250443 2023-01-24 02:37:41.585205: step: 562/463, loss: 0.01327671017497778 2023-01-24 02:37:42.239117: step: 564/463, loss: 0.008031188510358334 2023-01-24 02:37:42.903689: step: 566/463, loss: 0.07956541329622269 2023-01-24 02:37:43.498632: step: 568/463, loss: 0.0658179223537445 2023-01-24 02:37:44.168881: step: 570/463, loss: 0.034820958971977234 2023-01-24 02:37:44.810780: step: 572/463, loss: 0.04435864835977554 2023-01-24 02:37:45.480657: step: 574/463, loss: 0.06307648122310638 2023-01-24 02:37:46.138847: step: 576/463, loss: 0.08670160919427872 2023-01-24 02:37:46.786209: step: 578/463, loss: 0.247022807598114 2023-01-24 02:37:47.448345: step: 580/463, loss: 0.04984545707702637 2023-01-24 02:37:48.094671: step: 582/463, loss: 0.013959272764623165 2023-01-24 02:37:48.734285: step: 584/463, loss: 0.03683488443493843 2023-01-24 02:37:49.372701: step: 586/463, loss: 0.07650494575500488 2023-01-24 02:37:49.960568: step: 588/463, loss: 0.19359926879405975 2023-01-24 02:37:50.535222: step: 590/463, loss: 0.01624019630253315 2023-01-24 02:37:51.189865: step: 592/463, loss: 0.01648665778338909 2023-01-24 02:37:51.881193: step: 594/463, loss: 0.022035900503396988 2023-01-24 02:37:52.515030: step: 596/463, loss: 0.04287360981106758 2023-01-24 02:37:53.081292: step: 598/463, loss: 0.02396414428949356 2023-01-24 02:37:53.672525: step: 600/463, loss: 0.05802258849143982 2023-01-24 02:37:54.292291: step: 602/463, loss: 0.12769955396652222 2023-01-24 02:37:54.934745: step: 604/463, loss: 0.0201233122497797 2023-01-24 02:37:55.576197: step: 606/463, loss: 0.1204633116722107 2023-01-24 02:37:56.226588: step: 608/463, loss: 0.004853670485317707 2023-01-24 02:37:56.939768: step: 610/463, loss: 0.0698622316122055 2023-01-24 02:37:57.553955: step: 612/463, loss: 0.050907183438539505 2023-01-24 02:37:58.163434: step: 614/463, loss: 0.0035860727075487375 2023-01-24 02:37:58.840737: step: 616/463, loss: 0.06869044899940491 2023-01-24 02:37:59.458399: step: 618/463, loss: 0.059697411954402924 2023-01-24 02:38:00.095653: step: 620/463, loss: 0.015558258630335331 2023-01-24 02:38:00.780119: step: 622/463, loss: 0.044381264597177505 2023-01-24 02:38:01.357126: step: 624/463, loss: 0.03939526528120041 2023-01-24 02:38:01.934531: step: 626/463, loss: 0.04848700761795044 2023-01-24 02:38:02.514982: step: 628/463, loss: 0.09361636638641357 2023-01-24 02:38:03.182320: step: 630/463, loss: 0.017986519262194633 2023-01-24 02:38:03.864229: step: 632/463, loss: 0.047542911022901535 2023-01-24 02:38:04.527305: step: 634/463, loss: 0.286553293466568 2023-01-24 02:38:05.171693: step: 636/463, loss: 0.05607404559850693 2023-01-24 02:38:05.862024: step: 638/463, loss: 0.07178696990013123 2023-01-24 02:38:06.491871: step: 640/463, loss: 0.45497363805770874 2023-01-24 02:38:07.133531: step: 642/463, loss: 0.055788442492485046 2023-01-24 02:38:07.745836: step: 644/463, loss: 0.36743780970573425 2023-01-24 02:38:08.420362: step: 646/463, loss: 0.03997711464762688 2023-01-24 02:38:08.995021: step: 648/463, loss: 0.01330278068780899 2023-01-24 02:38:09.681331: step: 650/463, loss: 0.07009090483188629 2023-01-24 02:38:10.282070: step: 652/463, loss: 0.06956334412097931 2023-01-24 02:38:10.996608: step: 654/463, loss: 0.3698880970478058 2023-01-24 02:38:11.649282: step: 656/463, loss: 0.10815276205539703 2023-01-24 02:38:12.201361: step: 658/463, loss: 0.06989423930644989 2023-01-24 02:38:12.794687: step: 660/463, loss: 0.015422336757183075 2023-01-24 02:38:13.385857: step: 662/463, loss: 0.07557134330272675 2023-01-24 02:38:13.996837: step: 664/463, loss: 0.0898468941450119 2023-01-24 02:38:14.581282: step: 666/463, loss: 0.024765724316239357 2023-01-24 02:38:15.219463: step: 668/463, loss: 0.032509028911590576 2023-01-24 02:38:15.851202: step: 670/463, loss: 0.0723862424492836 2023-01-24 02:38:16.468199: step: 672/463, loss: 0.0674487054347992 2023-01-24 02:38:17.021633: step: 674/463, loss: 0.3608826696872711 2023-01-24 02:38:17.650558: step: 676/463, loss: 0.019310543313622475 2023-01-24 02:38:18.324251: step: 678/463, loss: 0.05626765638589859 2023-01-24 02:38:18.928060: step: 680/463, loss: 0.01838117279112339 2023-01-24 02:38:19.558379: step: 682/463, loss: 0.24786485731601715 2023-01-24 02:38:20.141052: step: 684/463, loss: 0.08619832992553711 2023-01-24 02:38:20.775290: step: 686/463, loss: 0.04205776005983353 2023-01-24 02:38:21.340468: step: 688/463, loss: 0.04023507609963417 2023-01-24 02:38:21.950281: step: 690/463, loss: 0.06675650924444199 2023-01-24 02:38:22.490041: step: 692/463, loss: 0.023906243965029716 2023-01-24 02:38:23.093865: step: 694/463, loss: 0.08106192201375961 2023-01-24 02:38:23.695687: step: 696/463, loss: 0.030015112832188606 2023-01-24 02:38:24.391176: step: 698/463, loss: 0.09135319292545319 2023-01-24 02:38:25.062381: step: 700/463, loss: 0.0055384705774486065 2023-01-24 02:38:25.718357: step: 702/463, loss: 0.1287613809108734 2023-01-24 02:38:26.321484: step: 704/463, loss: 0.11701430380344391 2023-01-24 02:38:26.913128: step: 706/463, loss: 0.009921292774379253 2023-01-24 02:38:27.546564: step: 708/463, loss: 0.03269552066922188 2023-01-24 02:38:28.189645: step: 710/463, loss: 0.15854285657405853 2023-01-24 02:38:28.753447: step: 712/463, loss: 0.054304350167512894 2023-01-24 02:38:29.330678: step: 714/463, loss: 0.00865735299885273 2023-01-24 02:38:29.931635: step: 716/463, loss: 0.09294646233320236 2023-01-24 02:38:30.604470: step: 718/463, loss: 0.10276513546705246 2023-01-24 02:38:31.265830: step: 720/463, loss: 0.40639618039131165 2023-01-24 02:38:31.843627: step: 722/463, loss: 0.14518621563911438 2023-01-24 02:38:32.506950: step: 724/463, loss: 0.06630782037973404 2023-01-24 02:38:33.119242: step: 726/463, loss: 0.05585341528058052 2023-01-24 02:38:33.720590: step: 728/463, loss: 0.008550072088837624 2023-01-24 02:38:34.314767: step: 730/463, loss: 0.017883699387311935 2023-01-24 02:38:34.992091: step: 732/463, loss: 0.03385250270366669 2023-01-24 02:38:35.644537: step: 734/463, loss: 0.2274763584136963 2023-01-24 02:38:36.272530: step: 736/463, loss: 0.007525038439780474 2023-01-24 02:38:36.896932: step: 738/463, loss: 0.06738711148500443 2023-01-24 02:38:37.478239: step: 740/463, loss: 0.10399018228054047 2023-01-24 02:38:38.079477: step: 742/463, loss: 0.3323141038417816 2023-01-24 02:38:38.677336: step: 744/463, loss: 0.051415394991636276 2023-01-24 02:38:39.332678: step: 746/463, loss: 0.11459618806838989 2023-01-24 02:38:39.948001: step: 748/463, loss: 0.0683571845293045 2023-01-24 02:38:40.582135: step: 750/463, loss: 0.04533589631319046 2023-01-24 02:38:41.242805: step: 752/463, loss: 0.02209385484457016 2023-01-24 02:38:41.899144: step: 754/463, loss: 0.012202427722513676 2023-01-24 02:38:42.517439: step: 756/463, loss: 0.028971416875720024 2023-01-24 02:38:43.055306: step: 758/463, loss: 0.052366968244314194 2023-01-24 02:38:43.698876: step: 760/463, loss: 0.046881161630153656 2023-01-24 02:38:44.335034: step: 762/463, loss: 0.051236484199762344 2023-01-24 02:38:44.983202: step: 764/463, loss: 0.147416353225708 2023-01-24 02:38:45.585334: step: 766/463, loss: 0.01865893229842186 2023-01-24 02:38:46.132761: step: 768/463, loss: 0.01721896044909954 2023-01-24 02:38:46.789732: step: 770/463, loss: 0.02192249521613121 2023-01-24 02:38:47.492245: step: 772/463, loss: 0.20242924988269806 2023-01-24 02:38:48.139875: step: 774/463, loss: 0.0168751310557127 2023-01-24 02:38:48.763649: step: 776/463, loss: 0.05086857080459595 2023-01-24 02:38:49.324317: step: 778/463, loss: 0.04998449608683586 2023-01-24 02:38:49.983101: step: 780/463, loss: 0.10142890363931656 2023-01-24 02:38:50.601061: step: 782/463, loss: 0.10507799685001373 2023-01-24 02:38:51.204866: step: 784/463, loss: 0.015436510555446148 2023-01-24 02:38:51.909504: step: 786/463, loss: 7.141009330749512 2023-01-24 02:38:52.494225: step: 788/463, loss: 0.02300797775387764 2023-01-24 02:38:53.200157: step: 790/463, loss: 0.025129621848464012 2023-01-24 02:38:53.847077: step: 792/463, loss: 0.2690727412700653 2023-01-24 02:38:54.461470: step: 794/463, loss: 0.07071518152952194 2023-01-24 02:38:55.154765: step: 796/463, loss: 0.2657509446144104 2023-01-24 02:38:55.802828: step: 798/463, loss: 0.01510376762598753 2023-01-24 02:38:56.498456: step: 800/463, loss: 0.05102163180708885 2023-01-24 02:38:57.103728: step: 802/463, loss: 0.3241473436355591 2023-01-24 02:38:57.721247: step: 804/463, loss: 0.01089604664593935 2023-01-24 02:38:58.301588: step: 806/463, loss: 0.01734016090631485 2023-01-24 02:38:58.916760: step: 808/463, loss: 0.4902510941028595 2023-01-24 02:38:59.537889: step: 810/463, loss: 0.04687965661287308 2023-01-24 02:39:00.159824: step: 812/463, loss: 0.05287744104862213 2023-01-24 02:39:00.885418: step: 814/463, loss: 0.0641651302576065 2023-01-24 02:39:01.459603: step: 816/463, loss: 0.6312924027442932 2023-01-24 02:39:02.057962: step: 818/463, loss: 0.05096278712153435 2023-01-24 02:39:02.655200: step: 820/463, loss: 0.23992712795734406 2023-01-24 02:39:03.251769: step: 822/463, loss: 0.05443326383829117 2023-01-24 02:39:03.900011: step: 824/463, loss: 0.04396352916955948 2023-01-24 02:39:04.488308: step: 826/463, loss: 0.04003969579935074 2023-01-24 02:39:05.089729: step: 828/463, loss: 0.06067924574017525 2023-01-24 02:39:05.697239: step: 830/463, loss: 0.19403262436389923 2023-01-24 02:39:06.317136: step: 832/463, loss: 0.012331809848546982 2023-01-24 02:39:06.928341: step: 834/463, loss: 0.2989160418510437 2023-01-24 02:39:07.525384: step: 836/463, loss: 0.10023240000009537 2023-01-24 02:39:08.137442: step: 838/463, loss: 0.06966780126094818 2023-01-24 02:39:08.749735: step: 840/463, loss: 0.045513153076171875 2023-01-24 02:39:09.394208: step: 842/463, loss: 0.018077896907925606 2023-01-24 02:39:10.080181: step: 844/463, loss: 0.05654609948396683 2023-01-24 02:39:10.778080: step: 846/463, loss: 0.07529649883508682 2023-01-24 02:39:11.470638: step: 848/463, loss: 0.04621685668826103 2023-01-24 02:39:12.053581: step: 850/463, loss: 0.04698904976248741 2023-01-24 02:39:12.705838: step: 852/463, loss: 0.035035859793424606 2023-01-24 02:39:13.335328: step: 854/463, loss: 0.030948083847761154 2023-01-24 02:39:13.980843: step: 856/463, loss: 0.033439572900533676 2023-01-24 02:39:14.644982: step: 858/463, loss: 0.054421015083789825 2023-01-24 02:39:15.303128: step: 860/463, loss: 0.07792049646377563 2023-01-24 02:39:15.921317: step: 862/463, loss: 0.04047496244311333 2023-01-24 02:39:16.538632: step: 864/463, loss: 0.045256830751895905 2023-01-24 02:39:17.144191: step: 866/463, loss: 0.186228409409523 2023-01-24 02:39:17.770496: step: 868/463, loss: 0.4676240384578705 2023-01-24 02:39:18.454239: step: 870/463, loss: 0.33627933263778687 2023-01-24 02:39:19.059628: step: 872/463, loss: 0.04321770742535591 2023-01-24 02:39:19.737831: step: 874/463, loss: 0.02136065997183323 2023-01-24 02:39:20.292491: step: 876/463, loss: 0.14712314307689667 2023-01-24 02:39:20.906128: step: 878/463, loss: 0.25933483242988586 2023-01-24 02:39:21.725222: step: 880/463, loss: 0.18477784097194672 2023-01-24 02:39:22.364883: step: 882/463, loss: 0.2101510763168335 2023-01-24 02:39:22.981524: step: 884/463, loss: 0.04477240517735481 2023-01-24 02:39:23.578563: step: 886/463, loss: 0.030523166060447693 2023-01-24 02:39:24.205912: step: 888/463, loss: 0.07675585150718689 2023-01-24 02:39:24.845760: step: 890/463, loss: 0.02099417708814144 2023-01-24 02:39:25.433617: step: 892/463, loss: 0.06957526504993439 2023-01-24 02:39:26.061187: step: 894/463, loss: 0.006169005297124386 2023-01-24 02:39:26.689981: step: 896/463, loss: 0.04441026225686073 2023-01-24 02:39:27.271438: step: 898/463, loss: 0.020933441817760468 2023-01-24 02:39:27.921259: step: 900/463, loss: 0.03602498024702072 2023-01-24 02:39:28.560167: step: 902/463, loss: 0.45958322286605835 2023-01-24 02:39:29.162505: step: 904/463, loss: 0.0485958494246006 2023-01-24 02:39:29.763093: step: 906/463, loss: 0.015496374107897282 2023-01-24 02:39:30.386547: step: 908/463, loss: 0.038306329399347305 2023-01-24 02:39:31.120109: step: 910/463, loss: 0.008287390694022179 2023-01-24 02:39:31.729822: step: 912/463, loss: 0.06858644634485245 2023-01-24 02:39:32.330082: step: 914/463, loss: 0.04479951784014702 2023-01-24 02:39:32.934950: step: 916/463, loss: 0.037823405116796494 2023-01-24 02:39:33.530273: step: 918/463, loss: 0.07592744380235672 2023-01-24 02:39:34.238399: step: 920/463, loss: 0.10402562469244003 2023-01-24 02:39:34.778156: step: 922/463, loss: 0.2988271713256836 2023-01-24 02:39:35.393893: step: 924/463, loss: 0.013648463413119316 2023-01-24 02:39:35.954046: step: 926/463, loss: 0.031043766066432 ================================================== Loss: 0.136 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35920572916666665, 'r': 0.3271703036053131, 'f1': 0.342440417080437}, 'combined': 0.25232451784874305, 'epoch': 21} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3534402534163098, 'r': 0.3856007176518012, 'f1': 0.36882073037816243}, 'combined': 0.2858801833553221, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3320297270955166, 'r': 0.32320920303605316, 'f1': 0.3275600961538462}, 'combined': 0.24136007085020245, 'epoch': 21} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34383429267357074, 'r': 0.39123791758261084, 'f1': 0.36600761335329374}, 'combined': 0.28369968116379707, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3364937542999656, 'r': 0.32563911706448284, 'f1': 0.3309774632458678}, 'combined': 0.24387813081274468, 'epoch': 21} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3412411297968264, 'r': 0.3769961746468615, 'f1': 0.3582286794897689}, 'combined': 0.27767007692508405, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3235294117647059, 'r': 0.3142857142857143, 'f1': 0.31884057971014496}, 'combined': 0.21256038647342995, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4423076923076923, 'r': 0.19827586206896552, 'f1': 0.2738095238095238}, 'combined': 0.1825396825396825, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:42:12.584714: step: 2/463, loss: 0.0845867320895195 2023-01-24 02:42:13.250259: step: 4/463, loss: 0.055136144161224365 2023-01-24 02:42:13.910361: step: 6/463, loss: 0.10939284414052963 2023-01-24 02:42:14.527631: step: 8/463, loss: 0.008023769594728947 2023-01-24 02:42:15.212207: step: 10/463, loss: 0.22477668523788452 2023-01-24 02:42:15.850078: step: 12/463, loss: 0.01714356429874897 2023-01-24 02:42:16.553516: step: 14/463, loss: 0.02461835741996765 2023-01-24 02:42:17.205085: step: 16/463, loss: 0.010595309548079967 2023-01-24 02:42:17.814700: step: 18/463, loss: 0.025409379974007607 2023-01-24 02:42:18.430422: step: 20/463, loss: 0.010673393495380878 2023-01-24 02:42:19.049560: step: 22/463, loss: 0.1065206527709961 2023-01-24 02:42:19.645536: step: 24/463, loss: 0.024548014625906944 2023-01-24 02:42:20.319189: step: 26/463, loss: 0.06019691377878189 2023-01-24 02:42:20.957023: step: 28/463, loss: 0.049314118921756744 2023-01-24 02:42:21.539847: step: 30/463, loss: 0.02850780449807644 2023-01-24 02:42:22.209153: step: 32/463, loss: 0.07022781670093536 2023-01-24 02:42:22.858389: step: 34/463, loss: 0.012563971802592278 2023-01-24 02:42:23.502117: step: 36/463, loss: 0.046172142028808594 2023-01-24 02:42:24.068189: step: 38/463, loss: 0.2738581895828247 2023-01-24 02:42:24.698781: step: 40/463, loss: 0.02885405160486698 2023-01-24 02:42:25.362587: step: 42/463, loss: 0.03181665018200874 2023-01-24 02:42:25.959867: step: 44/463, loss: 0.029062461107969284 2023-01-24 02:42:26.516086: step: 46/463, loss: 0.002969985129311681 2023-01-24 02:42:27.124856: step: 48/463, loss: 0.017055639997124672 2023-01-24 02:42:27.730715: step: 50/463, loss: 0.03472554683685303 2023-01-24 02:42:28.328040: step: 52/463, loss: 0.6524677276611328 2023-01-24 02:42:29.019247: step: 54/463, loss: 0.06620614975690842 2023-01-24 02:42:29.639488: step: 56/463, loss: 0.005133425816893578 2023-01-24 02:42:30.230581: step: 58/463, loss: 0.36377400159835815 2023-01-24 02:42:30.806792: step: 60/463, loss: 0.07737980037927628 2023-01-24 02:42:31.479576: step: 62/463, loss: 0.033549461513757706 2023-01-24 02:42:32.128105: step: 64/463, loss: 0.011558969505131245 2023-01-24 02:42:32.784167: step: 66/463, loss: 0.12483873218297958 2023-01-24 02:42:33.391325: step: 68/463, loss: 0.013231134973466396 2023-01-24 02:42:33.999090: step: 70/463, loss: 0.007984398864209652 2023-01-24 02:42:34.564164: step: 72/463, loss: 0.0069300392642617226 2023-01-24 02:42:35.234552: step: 74/463, loss: 0.02041485719382763 2023-01-24 02:42:35.846205: step: 76/463, loss: 0.015719905495643616 2023-01-24 02:42:36.429149: step: 78/463, loss: 0.10667863488197327 2023-01-24 02:42:37.074720: step: 80/463, loss: 0.05963480472564697 2023-01-24 02:42:37.678238: step: 82/463, loss: 0.06292406469583511 2023-01-24 02:42:38.321593: step: 84/463, loss: 0.04972478002309799 2023-01-24 02:42:39.016490: step: 86/463, loss: 0.019618812948465347 2023-01-24 02:42:39.615048: step: 88/463, loss: 0.055557891726493835 2023-01-24 02:42:40.123762: step: 90/463, loss: 0.0877290740609169 2023-01-24 02:42:40.768818: step: 92/463, loss: 0.005274410359561443 2023-01-24 02:42:41.368258: step: 94/463, loss: 0.08651704341173172 2023-01-24 02:42:41.951836: step: 96/463, loss: 0.061573419719934464 2023-01-24 02:42:42.623530: step: 98/463, loss: 0.03633836284279823 2023-01-24 02:42:43.239043: step: 100/463, loss: 0.05434788763523102 2023-01-24 02:42:43.829917: step: 102/463, loss: 0.053024712949991226 2023-01-24 02:42:44.407128: step: 104/463, loss: 1.0160967111587524 2023-01-24 02:42:45.071162: step: 106/463, loss: 0.07882880419492722 2023-01-24 02:42:45.638240: step: 108/463, loss: 0.2912052273750305 2023-01-24 02:42:46.207293: step: 110/463, loss: 0.05966493487358093 2023-01-24 02:42:46.897440: step: 112/463, loss: 0.010922667570412159 2023-01-24 02:42:47.457561: step: 114/463, loss: 14.686832427978516 2023-01-24 02:42:47.974274: step: 116/463, loss: 0.0011662731412798166 2023-01-24 02:42:49.251310: step: 118/463, loss: 0.0961623340845108 2023-01-24 02:42:49.985003: step: 120/463, loss: 0.5014403462409973 2023-01-24 02:42:50.666390: step: 122/463, loss: 0.750453531742096 2023-01-24 02:42:51.255669: step: 124/463, loss: 0.2161422073841095 2023-01-24 02:42:51.836103: step: 126/463, loss: 0.12633661925792694 2023-01-24 02:42:52.436253: step: 128/463, loss: 0.012642335146665573 2023-01-24 02:42:53.070108: step: 130/463, loss: 0.0704883486032486 2023-01-24 02:42:53.707290: step: 132/463, loss: 0.01433706097304821 2023-01-24 02:42:54.367921: step: 134/463, loss: 0.001600269926711917 2023-01-24 02:42:54.999785: step: 136/463, loss: 0.04002080857753754 2023-01-24 02:42:55.632339: step: 138/463, loss: 0.17413103580474854 2023-01-24 02:42:56.286435: step: 140/463, loss: 0.030261939391493797 2023-01-24 02:42:56.975931: step: 142/463, loss: 0.002093089744448662 2023-01-24 02:42:57.550329: step: 144/463, loss: 0.05285588651895523 2023-01-24 02:42:58.152875: step: 146/463, loss: 0.03583652526140213 2023-01-24 02:42:58.772114: step: 148/463, loss: 0.01902391016483307 2023-01-24 02:42:59.338226: step: 150/463, loss: 0.004319375846534967 2023-01-24 02:43:00.090101: step: 152/463, loss: 0.18238073587417603 2023-01-24 02:43:00.754549: step: 154/463, loss: 0.022598445415496826 2023-01-24 02:43:01.357633: step: 156/463, loss: 0.04447271302342415 2023-01-24 02:43:01.986573: step: 158/463, loss: 0.015802208334207535 2023-01-24 02:43:02.648285: step: 160/463, loss: 0.4238336980342865 2023-01-24 02:43:03.283714: step: 162/463, loss: 0.0012278840877115726 2023-01-24 02:43:03.921443: step: 164/463, loss: 0.023539898917078972 2023-01-24 02:43:04.516780: step: 166/463, loss: 0.07528958469629288 2023-01-24 02:43:05.164409: step: 168/463, loss: 0.07155793905258179 2023-01-24 02:43:05.802270: step: 170/463, loss: 0.18612246215343475 2023-01-24 02:43:06.482859: step: 172/463, loss: 0.016407344490289688 2023-01-24 02:43:07.125430: step: 174/463, loss: 0.17127607762813568 2023-01-24 02:43:07.712091: step: 176/463, loss: 0.049892738461494446 2023-01-24 02:43:08.393339: step: 178/463, loss: 0.03789215534925461 2023-01-24 02:43:08.959046: step: 180/463, loss: 0.002035361248999834 2023-01-24 02:43:09.622094: step: 182/463, loss: 0.051167696714401245 2023-01-24 02:43:10.285538: step: 184/463, loss: 0.041595276445150375 2023-01-24 02:43:10.895338: step: 186/463, loss: 0.062073949724435806 2023-01-24 02:43:11.485869: step: 188/463, loss: 0.0916956290602684 2023-01-24 02:43:12.105260: step: 190/463, loss: 0.03840848430991173 2023-01-24 02:43:12.698580: step: 192/463, loss: 0.2149547040462494 2023-01-24 02:43:13.356331: step: 194/463, loss: 0.035140734165906906 2023-01-24 02:43:13.944063: step: 196/463, loss: 0.009174899198114872 2023-01-24 02:43:14.605620: step: 198/463, loss: 0.03373410925269127 2023-01-24 02:43:15.233764: step: 200/463, loss: 0.05076950788497925 2023-01-24 02:43:15.874318: step: 202/463, loss: 0.04133080318570137 2023-01-24 02:43:16.500621: step: 204/463, loss: 0.07168225944042206 2023-01-24 02:43:17.159445: step: 206/463, loss: 0.017578035593032837 2023-01-24 02:43:17.774047: step: 208/463, loss: 0.05091509595513344 2023-01-24 02:43:18.411883: step: 210/463, loss: 0.14064140617847443 2023-01-24 02:43:19.015623: step: 212/463, loss: 0.014854749664664268 2023-01-24 02:43:19.584134: step: 214/463, loss: 0.018066704273223877 2023-01-24 02:43:20.258815: step: 216/463, loss: 0.077924445271492 2023-01-24 02:43:20.837051: step: 218/463, loss: 0.045320775359869 2023-01-24 02:43:21.538609: step: 220/463, loss: 0.02095767669379711 2023-01-24 02:43:22.173318: step: 222/463, loss: 0.11336018145084381 2023-01-24 02:43:22.886991: step: 224/463, loss: 0.06618199497461319 2023-01-24 02:43:23.513427: step: 226/463, loss: 0.032144319266080856 2023-01-24 02:43:24.196249: step: 228/463, loss: 0.01780652441084385 2023-01-24 02:43:24.748591: step: 230/463, loss: 0.02497301995754242 2023-01-24 02:43:25.340657: step: 232/463, loss: 0.056469835340976715 2023-01-24 02:43:26.020343: step: 234/463, loss: 0.01768629625439644 2023-01-24 02:43:26.659940: step: 236/463, loss: 0.04045190289616585 2023-01-24 02:43:27.299789: step: 238/463, loss: 0.08207670599222183 2023-01-24 02:43:27.902052: step: 240/463, loss: 0.10335548222064972 2023-01-24 02:43:28.494006: step: 242/463, loss: 0.003417340340092778 2023-01-24 02:43:29.133942: step: 244/463, loss: 0.0884276032447815 2023-01-24 02:43:29.657582: step: 246/463, loss: 0.20539116859436035 2023-01-24 02:43:30.272720: step: 248/463, loss: 0.04645257815718651 2023-01-24 02:43:30.921059: step: 250/463, loss: 0.008458147756755352 2023-01-24 02:43:31.594952: step: 252/463, loss: 0.024484852328896523 2023-01-24 02:43:32.217221: step: 254/463, loss: 0.05897795781493187 2023-01-24 02:43:32.840903: step: 256/463, loss: 0.10937213897705078 2023-01-24 02:43:33.464178: step: 258/463, loss: 0.026765946298837662 2023-01-24 02:43:34.067402: step: 260/463, loss: 0.009288814850151539 2023-01-24 02:43:34.695597: step: 262/463, loss: 0.0523076094686985 2023-01-24 02:43:35.312609: step: 264/463, loss: 0.05381142720580101 2023-01-24 02:43:35.963439: step: 266/463, loss: 0.35319727659225464 2023-01-24 02:43:36.625085: step: 268/463, loss: 0.10190211236476898 2023-01-24 02:43:37.280311: step: 270/463, loss: 0.018330315127968788 2023-01-24 02:43:37.865335: step: 272/463, loss: 0.07109789550304413 2023-01-24 02:43:38.448145: step: 274/463, loss: 0.016778625547885895 2023-01-24 02:43:39.035681: step: 276/463, loss: 0.01825481280684471 2023-01-24 02:43:39.623033: step: 278/463, loss: 0.013317803852260113 2023-01-24 02:43:40.258548: step: 280/463, loss: 0.0818164125084877 2023-01-24 02:43:40.880013: step: 282/463, loss: 0.04808545857667923 2023-01-24 02:43:41.552690: step: 284/463, loss: 0.010904652997851372 2023-01-24 02:43:42.154667: step: 286/463, loss: 0.075593963265419 2023-01-24 02:43:42.749697: step: 288/463, loss: 0.3891074061393738 2023-01-24 02:43:43.353338: step: 290/463, loss: 0.01172602642327547 2023-01-24 02:43:43.931251: step: 292/463, loss: 0.007304557599127293 2023-01-24 02:43:44.577920: step: 294/463, loss: 0.024865420535206795 2023-01-24 02:43:45.240718: step: 296/463, loss: 0.41033780574798584 2023-01-24 02:43:45.822843: step: 298/463, loss: 0.030303271487355232 2023-01-24 02:43:46.470136: step: 300/463, loss: 0.07504013925790787 2023-01-24 02:43:47.039001: step: 302/463, loss: 0.0629776269197464 2023-01-24 02:43:47.663330: step: 304/463, loss: 0.024631312116980553 2023-01-24 02:43:48.312860: step: 306/463, loss: 0.3940405547618866 2023-01-24 02:43:48.907900: step: 308/463, loss: 0.011382722295820713 2023-01-24 02:43:49.535420: step: 310/463, loss: 0.008826471865177155 2023-01-24 02:43:50.153207: step: 312/463, loss: 0.021369939669966698 2023-01-24 02:43:50.805340: step: 314/463, loss: 0.022341836243867874 2023-01-24 02:43:51.339547: step: 316/463, loss: 0.021838104352355003 2023-01-24 02:43:51.930573: step: 318/463, loss: 0.031893715262413025 2023-01-24 02:43:52.579407: step: 320/463, loss: 0.02486550249159336 2023-01-24 02:43:53.178395: step: 322/463, loss: 0.033111006021499634 2023-01-24 02:43:53.785746: step: 324/463, loss: 0.00798120815306902 2023-01-24 02:43:54.470529: step: 326/463, loss: 0.010326381772756577 2023-01-24 02:43:55.058178: step: 328/463, loss: 0.0234376210719347 2023-01-24 02:43:55.628462: step: 330/463, loss: 0.02511567622423172 2023-01-24 02:43:56.186933: step: 332/463, loss: 0.010141482576727867 2023-01-24 02:43:56.776898: step: 334/463, loss: 0.14301179349422455 2023-01-24 02:43:57.431055: step: 336/463, loss: 0.0386841744184494 2023-01-24 02:43:58.026189: step: 338/463, loss: 0.018559901043772697 2023-01-24 02:43:58.670704: step: 340/463, loss: 0.07188437879085541 2023-01-24 02:43:59.291903: step: 342/463, loss: 0.035321999341249466 2023-01-24 02:43:59.868958: step: 344/463, loss: 0.01174361165612936 2023-01-24 02:44:00.439596: step: 346/463, loss: 0.01247880794107914 2023-01-24 02:44:01.108529: step: 348/463, loss: 0.020676346495747566 2023-01-24 02:44:01.715690: step: 350/463, loss: 0.04405326768755913 2023-01-24 02:44:02.269838: step: 352/463, loss: 0.026704121381044388 2023-01-24 02:44:02.946124: step: 354/463, loss: 0.04153049737215042 2023-01-24 02:44:03.579016: step: 356/463, loss: 0.034516237676143646 2023-01-24 02:44:04.183641: step: 358/463, loss: 0.04867745563387871 2023-01-24 02:44:04.741760: step: 360/463, loss: 0.03840473294258118 2023-01-24 02:44:05.321061: step: 362/463, loss: 0.04419498145580292 2023-01-24 02:44:06.043334: step: 364/463, loss: 0.10025046765804291 2023-01-24 02:44:06.673968: step: 366/463, loss: 0.008328991942107677 2023-01-24 02:44:07.321630: step: 368/463, loss: 0.4963262677192688 2023-01-24 02:44:07.962330: step: 370/463, loss: 0.023153891786932945 2023-01-24 02:44:08.620682: step: 372/463, loss: 0.06186239793896675 2023-01-24 02:44:09.258678: step: 374/463, loss: 0.07661768794059753 2023-01-24 02:44:09.892407: step: 376/463, loss: 0.03685622289776802 2023-01-24 02:44:10.448888: step: 378/463, loss: 0.5296953916549683 2023-01-24 02:44:11.021064: step: 380/463, loss: 0.04537447914481163 2023-01-24 02:44:11.725337: step: 382/463, loss: 0.07284008711576462 2023-01-24 02:44:12.312018: step: 384/463, loss: 0.0784781202673912 2023-01-24 02:44:12.933907: step: 386/463, loss: 0.004668379668146372 2023-01-24 02:44:13.620538: step: 388/463, loss: 0.04326092451810837 2023-01-24 02:44:14.278126: step: 390/463, loss: 0.29147958755493164 2023-01-24 02:44:14.878336: step: 392/463, loss: 0.07475181668996811 2023-01-24 02:44:15.561206: step: 394/463, loss: 0.036760374903678894 2023-01-24 02:44:16.168669: step: 396/463, loss: 0.033690277487039566 2023-01-24 02:44:16.843134: step: 398/463, loss: 0.045227665454149246 2023-01-24 02:44:17.405715: step: 400/463, loss: 0.0044243293814361095 2023-01-24 02:44:18.032412: step: 402/463, loss: 0.0560561865568161 2023-01-24 02:44:18.663440: step: 404/463, loss: 0.08145659416913986 2023-01-24 02:44:19.250798: step: 406/463, loss: 0.05191114544868469 2023-01-24 02:44:19.853656: step: 408/463, loss: 0.016322242096066475 2023-01-24 02:44:20.508906: step: 410/463, loss: 0.054643746465444565 2023-01-24 02:44:21.118294: step: 412/463, loss: 0.5975512266159058 2023-01-24 02:44:21.765784: step: 414/463, loss: 0.05992380157113075 2023-01-24 02:44:22.395356: step: 416/463, loss: 0.08229263126850128 2023-01-24 02:44:23.119426: step: 418/463, loss: 0.2635195851325989 2023-01-24 02:44:23.708274: step: 420/463, loss: 0.027859417721629143 2023-01-24 02:44:24.323577: step: 422/463, loss: 0.9496694207191467 2023-01-24 02:44:24.976634: step: 424/463, loss: 0.09949032962322235 2023-01-24 02:44:25.584548: step: 426/463, loss: 0.019798634573817253 2023-01-24 02:44:26.251642: step: 428/463, loss: 0.2418743222951889 2023-01-24 02:44:26.854737: step: 430/463, loss: 0.04883335158228874 2023-01-24 02:44:27.357400: step: 432/463, loss: 0.015203049406409264 2023-01-24 02:44:27.998842: step: 434/463, loss: 0.09791691601276398 2023-01-24 02:44:28.597675: step: 436/463, loss: 0.027008995413780212 2023-01-24 02:44:29.260566: step: 438/463, loss: 0.0531264990568161 2023-01-24 02:44:29.911522: step: 440/463, loss: 0.027465471997857094 2023-01-24 02:44:30.598805: step: 442/463, loss: 0.04369494691491127 2023-01-24 02:44:31.174824: step: 444/463, loss: 1.1143391132354736 2023-01-24 02:44:31.848054: step: 446/463, loss: 0.02195393294095993 2023-01-24 02:44:32.426975: step: 448/463, loss: 0.030329877510666847 2023-01-24 02:44:32.973379: step: 450/463, loss: 0.17108339071273804 2023-01-24 02:44:33.628419: step: 452/463, loss: 0.0349968820810318 2023-01-24 02:44:34.239285: step: 454/463, loss: 0.07262814044952393 2023-01-24 02:44:34.873790: step: 456/463, loss: 0.007454673293977976 2023-01-24 02:44:35.461477: step: 458/463, loss: 0.054338742047548294 2023-01-24 02:44:36.103246: step: 460/463, loss: 0.01876160316169262 2023-01-24 02:44:36.747397: step: 462/463, loss: 0.03726070001721382 2023-01-24 02:44:37.381299: step: 464/463, loss: 0.0870211124420166 2023-01-24 02:44:37.908821: step: 466/463, loss: 0.016749411821365356 2023-01-24 02:44:38.564480: step: 468/463, loss: 0.026697583496570587 2023-01-24 02:44:39.233126: step: 470/463, loss: 0.0754585713148117 2023-01-24 02:44:39.834143: step: 472/463, loss: 0.030796436592936516 2023-01-24 02:44:40.499248: step: 474/463, loss: 0.04457510635256767 2023-01-24 02:44:41.128911: step: 476/463, loss: 0.07331067323684692 2023-01-24 02:44:41.749993: step: 478/463, loss: 0.019977036863565445 2023-01-24 02:44:42.384784: step: 480/463, loss: 0.017528826370835304 2023-01-24 02:44:43.009818: step: 482/463, loss: 0.04221212863922119 2023-01-24 02:44:43.638774: step: 484/463, loss: 0.015823766589164734 2023-01-24 02:44:44.275848: step: 486/463, loss: 0.03506346419453621 2023-01-24 02:44:44.876360: step: 488/463, loss: 0.09809498488903046 2023-01-24 02:44:45.515502: step: 490/463, loss: 0.2927551865577698 2023-01-24 02:44:46.087365: step: 492/463, loss: 0.6947611570358276 2023-01-24 02:44:46.721422: step: 494/463, loss: 0.7500637769699097 2023-01-24 02:44:47.387391: step: 496/463, loss: 0.13056710362434387 2023-01-24 02:44:48.084519: step: 498/463, loss: 0.013169165700674057 2023-01-24 02:44:48.669706: step: 500/463, loss: 0.01657673716545105 2023-01-24 02:44:49.277223: step: 502/463, loss: 0.08607902377843857 2023-01-24 02:44:49.927232: step: 504/463, loss: 0.04229207709431648 2023-01-24 02:44:50.559818: step: 506/463, loss: 0.01435808651149273 2023-01-24 02:44:51.248705: step: 508/463, loss: 0.0861494243144989 2023-01-24 02:44:51.889994: step: 510/463, loss: 0.03750699758529663 2023-01-24 02:44:52.517442: step: 512/463, loss: 0.20145943760871887 2023-01-24 02:44:53.132703: step: 514/463, loss: 0.20735104382038116 2023-01-24 02:44:53.697217: step: 516/463, loss: 0.06900104135274887 2023-01-24 02:44:54.286767: step: 518/463, loss: 0.02022702246904373 2023-01-24 02:44:54.924259: step: 520/463, loss: 0.1689458042383194 2023-01-24 02:44:55.589194: step: 522/463, loss: 0.0423530638217926 2023-01-24 02:44:56.154840: step: 524/463, loss: 0.02521570771932602 2023-01-24 02:44:56.768270: step: 526/463, loss: 0.019922278821468353 2023-01-24 02:44:57.362124: step: 528/463, loss: 0.5382683873176575 2023-01-24 02:44:57.938691: step: 530/463, loss: 8.46787166595459 2023-01-24 02:44:58.552964: step: 532/463, loss: 0.08376821875572205 2023-01-24 02:44:59.231121: step: 534/463, loss: 0.0022371658124029636 2023-01-24 02:44:59.890539: step: 536/463, loss: 0.08728696405887604 2023-01-24 02:45:00.524156: step: 538/463, loss: 0.06019327789545059 2023-01-24 02:45:01.134018: step: 540/463, loss: 0.06385639309883118 2023-01-24 02:45:01.791199: step: 542/463, loss: 0.03904721513390541 2023-01-24 02:45:02.410308: step: 544/463, loss: 0.15966156125068665 2023-01-24 02:45:02.995030: step: 546/463, loss: 0.06001008674502373 2023-01-24 02:45:03.536342: step: 548/463, loss: 0.01917579211294651 2023-01-24 02:45:04.167726: step: 550/463, loss: 0.057754792273044586 2023-01-24 02:45:04.886988: step: 552/463, loss: 0.21190311014652252 2023-01-24 02:45:05.611809: step: 554/463, loss: 0.021517438814044 2023-01-24 02:45:06.306370: step: 556/463, loss: 0.05135061591863632 2023-01-24 02:45:06.962910: step: 558/463, loss: 0.4718446731567383 2023-01-24 02:45:07.602147: step: 560/463, loss: 0.029273126274347305 2023-01-24 02:45:08.223483: step: 562/463, loss: 0.028569400310516357 2023-01-24 02:45:08.870710: step: 564/463, loss: 0.25605496764183044 2023-01-24 02:45:09.418309: step: 566/463, loss: 0.016277015209197998 2023-01-24 02:45:10.006132: step: 568/463, loss: 0.06891877204179764 2023-01-24 02:45:10.600987: step: 570/463, loss: 0.3750668168067932 2023-01-24 02:45:11.271064: step: 572/463, loss: 0.0220035370439291 2023-01-24 02:45:11.880032: step: 574/463, loss: 0.040341295301914215 2023-01-24 02:45:12.475056: step: 576/463, loss: 0.10551276803016663 2023-01-24 02:45:13.074927: step: 578/463, loss: 0.06054641678929329 2023-01-24 02:45:13.710247: step: 580/463, loss: 0.11303631961345673 2023-01-24 02:45:14.389210: step: 582/463, loss: 0.012128137052059174 2023-01-24 02:45:14.987287: step: 584/463, loss: 0.0325421467423439 2023-01-24 02:45:15.621989: step: 586/463, loss: 0.03865436464548111 2023-01-24 02:45:16.296148: step: 588/463, loss: 0.021554769948124886 2023-01-24 02:45:16.951824: step: 590/463, loss: 0.060039252042770386 2023-01-24 02:45:17.607613: step: 592/463, loss: 0.1342383623123169 2023-01-24 02:45:18.196739: step: 594/463, loss: 0.023065274581313133 2023-01-24 02:45:18.783415: step: 596/463, loss: 0.043455976992845535 2023-01-24 02:45:19.406723: step: 598/463, loss: 0.05807381495833397 2023-01-24 02:45:20.028355: step: 600/463, loss: 0.18533416092395782 2023-01-24 02:45:20.565937: step: 602/463, loss: 0.059211406856775284 2023-01-24 02:45:21.202282: step: 604/463, loss: 0.011857549659907818 2023-01-24 02:45:21.842419: step: 606/463, loss: 0.09485988318920135 2023-01-24 02:45:22.493313: step: 608/463, loss: 0.08637198805809021 2023-01-24 02:45:23.066333: step: 610/463, loss: 0.041609227657318115 2023-01-24 02:45:23.707055: step: 612/463, loss: 0.06289152055978775 2023-01-24 02:45:24.321738: step: 614/463, loss: 0.14114490151405334 2023-01-24 02:45:24.917668: step: 616/463, loss: 0.07924644649028778 2023-01-24 02:45:25.424080: step: 618/463, loss: 0.013395792804658413 2023-01-24 02:45:26.007005: step: 620/463, loss: 0.027372779324650764 2023-01-24 02:45:26.706956: step: 622/463, loss: 0.012691386975347996 2023-01-24 02:45:27.337126: step: 624/463, loss: 0.18363018333911896 2023-01-24 02:45:27.872271: step: 626/463, loss: 0.021132204681634903 2023-01-24 02:45:28.554016: step: 628/463, loss: 0.06695834547281265 2023-01-24 02:45:29.138669: step: 630/463, loss: 0.10878331959247589 2023-01-24 02:45:29.783938: step: 632/463, loss: 0.05908231437206268 2023-01-24 02:45:30.351353: step: 634/463, loss: 0.04573296383023262 2023-01-24 02:45:30.943555: step: 636/463, loss: 0.06109517440199852 2023-01-24 02:45:31.628848: step: 638/463, loss: 0.047686848789453506 2023-01-24 02:45:32.177404: step: 640/463, loss: 0.04905715957283974 2023-01-24 02:45:32.809522: step: 642/463, loss: 0.017701629549264908 2023-01-24 02:45:33.456666: step: 644/463, loss: 0.01931239850819111 2023-01-24 02:45:34.059077: step: 646/463, loss: 0.014090669341385365 2023-01-24 02:45:34.684755: step: 648/463, loss: 0.46045982837677 2023-01-24 02:45:35.282368: step: 650/463, loss: 0.011598912999033928 2023-01-24 02:45:35.962610: step: 652/463, loss: 0.4908623993396759 2023-01-24 02:45:36.655095: step: 654/463, loss: 0.04658355563879013 2023-01-24 02:45:37.298073: step: 656/463, loss: 0.03780944272875786 2023-01-24 02:45:37.906533: step: 658/463, loss: 0.043316230177879333 2023-01-24 02:45:38.523369: step: 660/463, loss: 0.03826010972261429 2023-01-24 02:45:39.262122: step: 662/463, loss: 0.25017714500427246 2023-01-24 02:45:39.849201: step: 664/463, loss: 0.06542626023292542 2023-01-24 02:45:40.493960: step: 666/463, loss: 0.01193663664162159 2023-01-24 02:45:41.111401: step: 668/463, loss: 0.7353683710098267 2023-01-24 02:45:41.722942: step: 670/463, loss: 0.11667991429567337 2023-01-24 02:45:42.313333: step: 672/463, loss: 0.09642888605594635 2023-01-24 02:45:42.927231: step: 674/463, loss: 0.0075606112368404865 2023-01-24 02:45:43.584698: step: 676/463, loss: 0.012751021422445774 2023-01-24 02:45:44.151676: step: 678/463, loss: 0.09923321008682251 2023-01-24 02:45:44.864478: step: 680/463, loss: 0.004397675395011902 2023-01-24 02:45:45.527304: step: 682/463, loss: 0.1764136701822281 2023-01-24 02:45:46.146238: step: 684/463, loss: 0.0556979775428772 2023-01-24 02:45:46.702817: step: 686/463, loss: 0.15813681483268738 2023-01-24 02:45:47.235770: step: 688/463, loss: 0.02768089435994625 2023-01-24 02:45:47.836804: step: 690/463, loss: 0.5395810008049011 2023-01-24 02:45:48.500406: step: 692/463, loss: 0.06268332153558731 2023-01-24 02:45:49.115174: step: 694/463, loss: 0.01339671015739441 2023-01-24 02:45:49.692973: step: 696/463, loss: 0.025722624734044075 2023-01-24 02:45:50.288523: step: 698/463, loss: 0.06820755451917648 2023-01-24 02:45:50.926794: step: 700/463, loss: 0.09104981273412704 2023-01-24 02:45:51.540184: step: 702/463, loss: 0.7487509846687317 2023-01-24 02:45:52.191243: step: 704/463, loss: 0.2617780566215515 2023-01-24 02:45:52.766548: step: 706/463, loss: 0.004109368193894625 2023-01-24 02:45:53.410483: step: 708/463, loss: 0.15713359415531158 2023-01-24 02:45:54.008496: step: 710/463, loss: 0.9107391238212585 2023-01-24 02:45:54.686594: step: 712/463, loss: 0.13343945145606995 2023-01-24 02:45:55.219727: step: 714/463, loss: 0.006756212562322617 2023-01-24 02:45:55.861366: step: 716/463, loss: 0.01382492296397686 2023-01-24 02:45:56.503030: step: 718/463, loss: 0.030257657170295715 2023-01-24 02:45:57.127819: step: 720/463, loss: 0.06503091752529144 2023-01-24 02:45:57.749509: step: 722/463, loss: 0.012537148781120777 2023-01-24 02:45:58.357420: step: 724/463, loss: 0.025673488155007362 2023-01-24 02:45:58.899451: step: 726/463, loss: 0.03498581424355507 2023-01-24 02:45:59.500418: step: 728/463, loss: 0.0922277495265007 2023-01-24 02:46:00.088210: step: 730/463, loss: 0.01603466272354126 2023-01-24 02:46:00.693304: step: 732/463, loss: 0.10051598399877548 2023-01-24 02:46:01.329907: step: 734/463, loss: 0.04973762854933739 2023-01-24 02:46:02.029748: step: 736/463, loss: 0.04688718169927597 2023-01-24 02:46:02.635422: step: 738/463, loss: 0.12407837063074112 2023-01-24 02:46:03.223207: step: 740/463, loss: 0.005452557932585478 2023-01-24 02:46:03.829819: step: 742/463, loss: 0.039612412452697754 2023-01-24 02:46:04.480384: step: 744/463, loss: 0.0330551452934742 2023-01-24 02:46:05.096959: step: 746/463, loss: 0.038212452083826065 2023-01-24 02:46:05.700584: step: 748/463, loss: 0.0795302540063858 2023-01-24 02:46:06.295119: step: 750/463, loss: 0.01583004929125309 2023-01-24 02:46:06.887660: step: 752/463, loss: 0.019439078867435455 2023-01-24 02:46:07.483155: step: 754/463, loss: 0.11109906435012817 2023-01-24 02:46:08.117311: step: 756/463, loss: 0.10784592479467392 2023-01-24 02:46:08.793949: step: 758/463, loss: 0.04472780600190163 2023-01-24 02:46:09.430912: step: 760/463, loss: 0.05449187755584717 2023-01-24 02:46:10.041072: step: 762/463, loss: 0.022585907950997353 2023-01-24 02:46:10.885430: step: 764/463, loss: 1.8076122999191284 2023-01-24 02:46:11.438626: step: 766/463, loss: 0.01235184259712696 2023-01-24 02:46:12.136787: step: 768/463, loss: 0.03791683912277222 2023-01-24 02:46:12.720330: step: 770/463, loss: 0.038350652903318405 2023-01-24 02:46:13.318252: step: 772/463, loss: 0.015076812356710434 2023-01-24 02:46:13.903518: step: 774/463, loss: 0.05062127113342285 2023-01-24 02:46:14.545783: step: 776/463, loss: 0.11897704005241394 2023-01-24 02:46:15.162526: step: 778/463, loss: 0.025867735967040062 2023-01-24 02:46:15.738123: step: 780/463, loss: 0.010863203555345535 2023-01-24 02:46:16.348734: step: 782/463, loss: 0.0549979992210865 2023-01-24 02:46:17.013832: step: 784/463, loss: 0.07117492705583572 2023-01-24 02:46:17.639463: step: 786/463, loss: 0.05387565866112709 2023-01-24 02:46:18.326310: step: 788/463, loss: 0.024039993062615395 2023-01-24 02:46:19.071608: step: 790/463, loss: 0.04744725301861763 2023-01-24 02:46:19.712519: step: 792/463, loss: 0.09313289821147919 2023-01-24 02:46:20.342491: step: 794/463, loss: 0.020370492711663246 2023-01-24 02:46:20.957363: step: 796/463, loss: 0.1677919179201126 2023-01-24 02:46:21.537879: step: 798/463, loss: 0.015784895047545433 2023-01-24 02:46:22.122509: step: 800/463, loss: 0.045689504593610764 2023-01-24 02:46:22.740168: step: 802/463, loss: 0.0012720375088974833 2023-01-24 02:46:23.307799: step: 804/463, loss: 0.040277477353811264 2023-01-24 02:46:23.952288: step: 806/463, loss: 0.05956878513097763 2023-01-24 02:46:24.564449: step: 808/463, loss: 0.07988844066858292 2023-01-24 02:46:25.207103: step: 810/463, loss: 0.024668551981449127 2023-01-24 02:46:25.825993: step: 812/463, loss: 0.027776291593909264 2023-01-24 02:46:26.496384: step: 814/463, loss: 0.06997717171907425 2023-01-24 02:46:27.159503: step: 816/463, loss: 0.04439396783709526 2023-01-24 02:46:27.760250: step: 818/463, loss: 0.10873216390609741 2023-01-24 02:46:28.488098: step: 820/463, loss: 0.3765268921852112 2023-01-24 02:46:29.065303: step: 822/463, loss: 0.089633509516716 2023-01-24 02:46:29.651456: step: 824/463, loss: 0.012148439884185791 2023-01-24 02:46:30.255037: step: 826/463, loss: 0.018590301275253296 2023-01-24 02:46:30.908466: step: 828/463, loss: 0.03449949622154236 2023-01-24 02:46:31.496586: step: 830/463, loss: 0.048406459391117096 2023-01-24 02:46:32.153818: step: 832/463, loss: 0.013000254519283772 2023-01-24 02:46:32.805541: step: 834/463, loss: 0.016134848818182945 2023-01-24 02:46:33.480962: step: 836/463, loss: 0.023261623457074165 2023-01-24 02:46:34.089795: step: 838/463, loss: 0.04515976086258888 2023-01-24 02:46:34.737648: step: 840/463, loss: 0.04853673651814461 2023-01-24 02:46:35.469169: step: 842/463, loss: 0.03299034386873245 2023-01-24 02:46:36.105725: step: 844/463, loss: 0.0691247284412384 2023-01-24 02:46:36.792379: step: 846/463, loss: 0.03056233562529087 2023-01-24 02:46:37.373758: step: 848/463, loss: 0.023293346166610718 2023-01-24 02:46:38.047890: step: 850/463, loss: 0.22583970427513123 2023-01-24 02:46:38.658457: step: 852/463, loss: 0.07189960032701492 2023-01-24 02:46:39.265364: step: 854/463, loss: 0.010108246468007565 2023-01-24 02:46:39.883424: step: 856/463, loss: 0.025587771087884903 2023-01-24 02:46:40.526583: step: 858/463, loss: 0.01010210532695055 2023-01-24 02:46:41.267584: step: 860/463, loss: 1.3255618810653687 2023-01-24 02:46:41.925637: step: 862/463, loss: 0.10920163244009018 2023-01-24 02:46:42.555195: step: 864/463, loss: 0.042911626398563385 2023-01-24 02:46:43.164806: step: 866/463, loss: 0.0149594871327281 2023-01-24 02:46:43.780873: step: 868/463, loss: 0.026271982118487358 2023-01-24 02:46:44.410473: step: 870/463, loss: 0.1612677127122879 2023-01-24 02:46:45.005625: step: 872/463, loss: 0.011072476394474506 2023-01-24 02:46:45.674103: step: 874/463, loss: 0.02402637153863907 2023-01-24 02:46:46.279212: step: 876/463, loss: 0.011492603458464146 2023-01-24 02:46:46.876091: step: 878/463, loss: 0.04561467096209526 2023-01-24 02:46:47.492085: step: 880/463, loss: 0.03588796406984329 2023-01-24 02:46:48.135534: step: 882/463, loss: 0.00598631938919425 2023-01-24 02:46:48.717061: step: 884/463, loss: 0.01217762753367424 2023-01-24 02:46:49.277731: step: 886/463, loss: 0.033257659524679184 2023-01-24 02:46:49.866306: step: 888/463, loss: 0.02424241602420807 2023-01-24 02:46:50.535228: step: 890/463, loss: 0.14332902431488037 2023-01-24 02:46:51.021392: step: 892/463, loss: 0.08317562937736511 2023-01-24 02:46:51.598260: step: 894/463, loss: 0.010957978665828705 2023-01-24 02:46:52.231125: step: 896/463, loss: 0.25936129689216614 2023-01-24 02:46:52.921705: step: 898/463, loss: 0.3190154731273651 2023-01-24 02:46:53.563900: step: 900/463, loss: 0.02404329553246498 2023-01-24 02:46:54.248110: step: 902/463, loss: 0.006931992247700691 2023-01-24 02:46:54.852229: step: 904/463, loss: 0.05773597210645676 2023-01-24 02:46:55.477826: step: 906/463, loss: 0.06446735560894012 2023-01-24 02:46:56.101964: step: 908/463, loss: 0.0477750338613987 2023-01-24 02:46:56.696468: step: 910/463, loss: 0.018832825124263763 2023-01-24 02:46:57.292004: step: 912/463, loss: 0.16611790657043457 2023-01-24 02:46:57.914785: step: 914/463, loss: 0.11244844645261765 2023-01-24 02:46:58.488071: step: 916/463, loss: 0.06754232197999954 2023-01-24 02:46:59.186483: step: 918/463, loss: 0.5503419637680054 2023-01-24 02:46:59.796383: step: 920/463, loss: 0.01725826971232891 2023-01-24 02:47:00.472779: step: 922/463, loss: 0.09535322338342667 2023-01-24 02:47:01.078335: step: 924/463, loss: 0.029331080615520477 2023-01-24 02:47:01.704643: step: 926/463, loss: 0.01061540748924017 ================================================== Loss: 0.146 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35059529795425814, 'r': 0.300700331452229, 'f1': 0.3237366183356991}, 'combined': 0.23854277140525196, 'epoch': 22} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3443169240464443, 'r': 0.3816601198529521, 'f1': 0.36202808230166683}, 'combined': 0.2806150685783255, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3213695843630054, 'r': 0.3012458722491929, 'f1': 0.310982516504064}, 'combined': 0.22914501216088926, 'epoch': 22} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3304969484889256, 'r': 0.38031450322438864, 'f1': 0.35365998248558533}, 'combined': 0.27412879025198483, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33381760805897326, 'r': 0.3040463982320819, 'f1': 0.3182372430353667}, 'combined': 0.23449060013132283, 'epoch': 22} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3245692844136627, 'r': 0.3663337143933619, 'f1': 0.3441891893436768}, 'combined': 0.2667877926970127, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39, 'r': 0.2785714285714286, 'f1': 0.325}, 'combined': 0.21666666666666667, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2826086956521739, 'r': 0.2826086956521739, 'f1': 0.2826086956521739}, 'combined': 0.14130434782608695, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:49:39.183053: step: 2/463, loss: 0.014017235487699509 2023-01-24 02:49:39.823578: step: 4/463, loss: 0.03739186003804207 2023-01-24 02:49:40.389753: step: 6/463, loss: 0.005126425065100193 2023-01-24 02:49:41.093698: step: 8/463, loss: 0.17005835473537445 2023-01-24 02:49:41.719098: step: 10/463, loss: 0.05091765895485878 2023-01-24 02:49:42.384094: step: 12/463, loss: 0.10212653875350952 2023-01-24 02:49:42.968179: step: 14/463, loss: 0.10305936634540558 2023-01-24 02:49:43.565344: step: 16/463, loss: 0.05862724408507347 2023-01-24 02:49:44.136992: step: 18/463, loss: 0.1589479297399521 2023-01-24 02:49:44.765856: step: 20/463, loss: 0.07201798260211945 2023-01-24 02:49:45.365703: step: 22/463, loss: 0.014142402447760105 2023-01-24 02:49:45.996921: step: 24/463, loss: 0.16887494921684265 2023-01-24 02:49:46.623408: step: 26/463, loss: 0.024210674688220024 2023-01-24 02:49:47.261336: step: 28/463, loss: 0.03990819677710533 2023-01-24 02:49:47.869136: step: 30/463, loss: 0.045158859342336655 2023-01-24 02:49:48.484524: step: 32/463, loss: 0.033288709819316864 2023-01-24 02:49:49.073437: step: 34/463, loss: 0.016043180599808693 2023-01-24 02:49:49.701382: step: 36/463, loss: 0.011747236363589764 2023-01-24 02:49:50.334522: step: 38/463, loss: 0.4941384494304657 2023-01-24 02:49:51.013341: step: 40/463, loss: 0.013136547058820724 2023-01-24 02:49:51.682542: step: 42/463, loss: 0.02664070390164852 2023-01-24 02:49:52.326050: step: 44/463, loss: 0.0012856022221967578 2023-01-24 02:49:52.959641: step: 46/463, loss: 0.049490366131067276 2023-01-24 02:49:53.582206: step: 48/463, loss: 0.2771098017692566 2023-01-24 02:49:54.213025: step: 50/463, loss: 0.13030368089675903 2023-01-24 02:49:54.871343: step: 52/463, loss: 0.27063220739364624 2023-01-24 02:49:55.512425: step: 54/463, loss: 0.36873123049736023 2023-01-24 02:49:56.118762: step: 56/463, loss: 0.006062122527509928 2023-01-24 02:49:56.761459: step: 58/463, loss: 0.053145766258239746 2023-01-24 02:49:57.476390: step: 60/463, loss: 0.02280028536915779 2023-01-24 02:49:58.071155: step: 62/463, loss: 0.04428766295313835 2023-01-24 02:49:58.682902: step: 64/463, loss: 0.03361102566123009 2023-01-24 02:49:59.266633: step: 66/463, loss: 0.07194822281599045 2023-01-24 02:49:59.874341: step: 68/463, loss: 0.021001052111387253 2023-01-24 02:50:00.528400: step: 70/463, loss: 0.037330612540245056 2023-01-24 02:50:01.221172: step: 72/463, loss: 0.040480900555849075 2023-01-24 02:50:01.875453: step: 74/463, loss: 0.036421723663806915 2023-01-24 02:50:02.505431: step: 76/463, loss: 0.18305079638957977 2023-01-24 02:50:03.152739: step: 78/463, loss: 0.029274918138980865 2023-01-24 02:50:03.819458: step: 80/463, loss: 0.04791848734021187 2023-01-24 02:50:04.453801: step: 82/463, loss: 0.042708199471235275 2023-01-24 02:50:05.031322: step: 84/463, loss: 0.00914548896253109 2023-01-24 02:50:05.620476: step: 86/463, loss: 0.09204190969467163 2023-01-24 02:50:06.167183: step: 88/463, loss: 0.07539556175470352 2023-01-24 02:50:06.794546: step: 90/463, loss: 0.05940885469317436 2023-01-24 02:50:07.525363: step: 92/463, loss: 0.02225893922150135 2023-01-24 02:50:08.173023: step: 94/463, loss: 0.013536185957491398 2023-01-24 02:50:08.789469: step: 96/463, loss: 0.028028864413499832 2023-01-24 02:50:09.365161: step: 98/463, loss: 0.03247951716184616 2023-01-24 02:50:10.049447: step: 100/463, loss: 0.17127849161624908 2023-01-24 02:50:10.632529: step: 102/463, loss: 0.014644253998994827 2023-01-24 02:50:11.271950: step: 104/463, loss: 0.03772616758942604 2023-01-24 02:50:11.950975: step: 106/463, loss: 0.05379296839237213 2023-01-24 02:50:12.545945: step: 108/463, loss: 0.052070584148168564 2023-01-24 02:50:13.115642: step: 110/463, loss: 0.02765987627208233 2023-01-24 02:50:13.754815: step: 112/463, loss: 0.006331499200314283 2023-01-24 02:50:14.403500: step: 114/463, loss: 0.006876499857753515 2023-01-24 02:50:15.000913: step: 116/463, loss: 0.02339460887014866 2023-01-24 02:50:15.597866: step: 118/463, loss: 0.01504096295684576 2023-01-24 02:50:16.220908: step: 120/463, loss: 0.011009931564331055 2023-01-24 02:50:16.912775: step: 122/463, loss: 0.06653671711683273 2023-01-24 02:50:17.505098: step: 124/463, loss: 0.06888808310031891 2023-01-24 02:50:18.136661: step: 126/463, loss: 0.09010745584964752 2023-01-24 02:50:18.752281: step: 128/463, loss: 0.02575107105076313 2023-01-24 02:50:19.369385: step: 130/463, loss: 0.03347647190093994 2023-01-24 02:50:20.024093: step: 132/463, loss: 0.03668920695781708 2023-01-24 02:50:20.614069: step: 134/463, loss: 0.24982771277427673 2023-01-24 02:50:21.272207: step: 136/463, loss: 0.013085935264825821 2023-01-24 02:50:21.898589: step: 138/463, loss: 0.017120778560638428 2023-01-24 02:50:22.501730: step: 140/463, loss: 0.005012511275708675 2023-01-24 02:50:23.131202: step: 142/463, loss: 0.022262532263994217 2023-01-24 02:50:23.822110: step: 144/463, loss: 0.026318036019802094 2023-01-24 02:50:24.382977: step: 146/463, loss: 0.10604467242956161 2023-01-24 02:50:24.961331: step: 148/463, loss: 0.022634495049715042 2023-01-24 02:50:25.520194: step: 150/463, loss: 0.02065012790262699 2023-01-24 02:50:26.133305: step: 152/463, loss: 0.014927259646356106 2023-01-24 02:50:26.801580: step: 154/463, loss: 0.052358001470565796 2023-01-24 02:50:27.472129: step: 156/463, loss: 0.0679406225681305 2023-01-24 02:50:28.140717: step: 158/463, loss: 0.0314582921564579 2023-01-24 02:50:28.764354: step: 160/463, loss: 0.19728310406208038 2023-01-24 02:50:29.331255: step: 162/463, loss: 0.03380490839481354 2023-01-24 02:50:29.956246: step: 164/463, loss: 0.037048306316137314 2023-01-24 02:50:30.517732: step: 166/463, loss: 0.06547125428915024 2023-01-24 02:50:31.113465: step: 168/463, loss: 0.019977785646915436 2023-01-24 02:50:31.775282: step: 170/463, loss: 0.0842510312795639 2023-01-24 02:50:32.416711: step: 172/463, loss: 0.01584748551249504 2023-01-24 02:50:33.025730: step: 174/463, loss: 0.01813764125108719 2023-01-24 02:50:33.768056: step: 176/463, loss: 0.03895140811800957 2023-01-24 02:50:34.350995: step: 178/463, loss: 0.16105754673480988 2023-01-24 02:50:34.972603: step: 180/463, loss: 0.00824294425547123 2023-01-24 02:50:35.494371: step: 182/463, loss: 0.011547056958079338 2023-01-24 02:50:36.133623: step: 184/463, loss: 0.047988519072532654 2023-01-24 02:50:36.723535: step: 186/463, loss: 0.07069321721792221 2023-01-24 02:50:37.369198: step: 188/463, loss: 0.011686659418046474 2023-01-24 02:50:37.979658: step: 190/463, loss: 0.03281664475798607 2023-01-24 02:50:38.586668: step: 192/463, loss: 0.021711131557822227 2023-01-24 02:50:39.187341: step: 194/463, loss: 0.00901354942470789 2023-01-24 02:50:39.842965: step: 196/463, loss: 0.0044713797979056835 2023-01-24 02:50:40.456003: step: 198/463, loss: 0.06275229156017303 2023-01-24 02:50:41.127101: step: 200/463, loss: 0.09739736467599869 2023-01-24 02:50:41.761901: step: 202/463, loss: 0.024020083248615265 2023-01-24 02:50:42.385403: step: 204/463, loss: 0.029642995446920395 2023-01-24 02:50:43.027556: step: 206/463, loss: 0.08722873777151108 2023-01-24 02:50:43.613147: step: 208/463, loss: 0.04675403982400894 2023-01-24 02:50:44.178336: step: 210/463, loss: 0.0379650853574276 2023-01-24 02:50:44.787296: step: 212/463, loss: 0.05018343776464462 2023-01-24 02:50:45.424067: step: 214/463, loss: 0.00901576317846775 2023-01-24 02:50:45.972178: step: 216/463, loss: 0.0041709220968186855 2023-01-24 02:50:46.618583: step: 218/463, loss: 0.049517132341861725 2023-01-24 02:50:47.284448: step: 220/463, loss: 0.049965955317020416 2023-01-24 02:50:47.910754: step: 222/463, loss: 0.04325241968035698 2023-01-24 02:50:48.519205: step: 224/463, loss: 0.0521477572619915 2023-01-24 02:50:49.197789: step: 226/463, loss: 0.012771560810506344 2023-01-24 02:50:49.737791: step: 228/463, loss: 0.06846003234386444 2023-01-24 02:50:50.351942: step: 230/463, loss: 0.04135928675532341 2023-01-24 02:50:50.942051: step: 232/463, loss: 0.039707936346530914 2023-01-24 02:50:51.604858: step: 234/463, loss: 0.860537588596344 2023-01-24 02:50:52.286189: step: 236/463, loss: 0.4724205434322357 2023-01-24 02:50:52.832433: step: 238/463, loss: 0.11152902245521545 2023-01-24 02:50:53.437591: step: 240/463, loss: 0.2870498299598694 2023-01-24 02:50:54.079605: step: 242/463, loss: 0.08885440230369568 2023-01-24 02:50:54.785132: step: 244/463, loss: 0.025001036003232002 2023-01-24 02:50:55.300756: step: 246/463, loss: 0.016671447083353996 2023-01-24 02:50:55.919984: step: 248/463, loss: 0.02292332425713539 2023-01-24 02:50:56.558537: step: 250/463, loss: 0.058331362903118134 2023-01-24 02:50:57.213740: step: 252/463, loss: 0.4987170696258545 2023-01-24 02:50:57.810889: step: 254/463, loss: 0.008618383668363094 2023-01-24 02:50:58.381642: step: 256/463, loss: 0.0457315631210804 2023-01-24 02:50:59.062515: step: 258/463, loss: 0.046473924070596695 2023-01-24 02:50:59.711904: step: 260/463, loss: 0.028331683948636055 2023-01-24 02:51:00.361937: step: 262/463, loss: 0.014770310372114182 2023-01-24 02:51:01.007557: step: 264/463, loss: 0.11662633717060089 2023-01-24 02:51:01.617472: step: 266/463, loss: 0.023096852004528046 2023-01-24 02:51:02.422207: step: 268/463, loss: 0.015185844153165817 2023-01-24 02:51:03.060239: step: 270/463, loss: 0.026402989402413368 2023-01-24 02:51:03.763995: step: 272/463, loss: 0.007479024585336447 2023-01-24 02:51:04.351819: step: 274/463, loss: 0.03724350035190582 2023-01-24 02:51:05.099623: step: 276/463, loss: 0.0006274238112382591 2023-01-24 02:51:05.742081: step: 278/463, loss: 0.01200056355446577 2023-01-24 02:51:06.335440: step: 280/463, loss: 0.07404100149869919 2023-01-24 02:51:06.929902: step: 282/463, loss: 0.052078571170568466 2023-01-24 02:51:07.554958: step: 284/463, loss: 0.05625596269965172 2023-01-24 02:51:08.197004: step: 286/463, loss: 0.08361687511205673 2023-01-24 02:51:08.805897: step: 288/463, loss: 0.0884314775466919 2023-01-24 02:51:09.387627: step: 290/463, loss: 0.006218751892447472 2023-01-24 02:51:09.977346: step: 292/463, loss: 0.01155450101941824 2023-01-24 02:51:10.604037: step: 294/463, loss: 0.02805587835609913 2023-01-24 02:51:11.199120: step: 296/463, loss: 0.01961454004049301 2023-01-24 02:51:11.850830: step: 298/463, loss: 0.03049517422914505 2023-01-24 02:51:12.413775: step: 300/463, loss: 0.012401064857840538 2023-01-24 02:51:13.004920: step: 302/463, loss: 0.06635325402021408 2023-01-24 02:51:13.591373: step: 304/463, loss: 0.04234880208969116 2023-01-24 02:51:14.154596: step: 306/463, loss: 0.24127665162086487 2023-01-24 02:51:14.744368: step: 308/463, loss: 0.03843383118510246 2023-01-24 02:51:15.391350: step: 310/463, loss: 0.0485689714550972 2023-01-24 02:51:16.032103: step: 312/463, loss: 0.05422547459602356 2023-01-24 02:51:16.741748: step: 314/463, loss: 0.020614638924598694 2023-01-24 02:51:17.325843: step: 316/463, loss: 0.04707881063222885 2023-01-24 02:51:17.972902: step: 318/463, loss: 0.1666991263628006 2023-01-24 02:51:18.584821: step: 320/463, loss: 0.023372367024421692 2023-01-24 02:51:19.177126: step: 322/463, loss: 0.013147193938493729 2023-01-24 02:51:19.822117: step: 324/463, loss: 0.40948766469955444 2023-01-24 02:51:20.412028: step: 326/463, loss: 0.11535700410604477 2023-01-24 02:51:21.007978: step: 328/463, loss: 0.04028856009244919 2023-01-24 02:51:21.594990: step: 330/463, loss: 1.2974011898040771 2023-01-24 02:51:22.220018: step: 332/463, loss: 0.1290600448846817 2023-01-24 02:51:22.842981: step: 334/463, loss: 0.022823981940746307 2023-01-24 02:51:23.666653: step: 336/463, loss: 0.007736002095043659 2023-01-24 02:51:24.276416: step: 338/463, loss: 0.008360418491065502 2023-01-24 02:51:24.888522: step: 340/463, loss: 0.0717049241065979 2023-01-24 02:51:25.438929: step: 342/463, loss: 0.009852655231952667 2023-01-24 02:51:26.052159: step: 344/463, loss: 0.039784081280231476 2023-01-24 02:51:26.748595: step: 346/463, loss: 0.0010238605318590999 2023-01-24 02:51:27.390814: step: 348/463, loss: 0.01217496208846569 2023-01-24 02:51:27.959236: step: 350/463, loss: 0.03211135417222977 2023-01-24 02:51:28.632016: step: 352/463, loss: 0.007680355105549097 2023-01-24 02:51:29.235732: step: 354/463, loss: 0.022676914930343628 2023-01-24 02:51:29.827753: step: 356/463, loss: 0.1013747975230217 2023-01-24 02:51:30.388838: step: 358/463, loss: 0.0569462925195694 2023-01-24 02:51:30.970620: step: 360/463, loss: 0.038382649421691895 2023-01-24 02:51:31.627017: step: 362/463, loss: 0.027897289022803307 2023-01-24 02:51:32.283305: step: 364/463, loss: 0.012575906701385975 2023-01-24 02:51:32.969945: step: 366/463, loss: 1.4045130014419556 2023-01-24 02:51:33.599542: step: 368/463, loss: 0.03215888887643814 2023-01-24 02:51:34.209506: step: 370/463, loss: 0.042204152792692184 2023-01-24 02:51:34.799478: step: 372/463, loss: 0.08064927160739899 2023-01-24 02:51:35.431253: step: 374/463, loss: 0.030682578682899475 2023-01-24 02:51:36.081038: step: 376/463, loss: 0.020798666402697563 2023-01-24 02:51:36.744634: step: 378/463, loss: 0.013296670280396938 2023-01-24 02:51:37.406086: step: 380/463, loss: 0.009307530708611012 2023-01-24 02:51:38.026201: step: 382/463, loss: 0.050121158361434937 2023-01-24 02:51:38.644844: step: 384/463, loss: 0.033195070922374725 2023-01-24 02:51:39.296526: step: 386/463, loss: 0.09362410008907318 2023-01-24 02:51:39.951908: step: 388/463, loss: 0.024581091478466988 2023-01-24 02:51:40.617873: step: 390/463, loss: 0.015465802513062954 2023-01-24 02:51:41.291668: step: 392/463, loss: 0.00540462089702487 2023-01-24 02:51:41.918777: step: 394/463, loss: 0.011925828643143177 2023-01-24 02:51:42.551274: step: 396/463, loss: 0.06564971059560776 2023-01-24 02:51:43.154369: step: 398/463, loss: 0.3053116500377655 2023-01-24 02:51:43.729287: step: 400/463, loss: 0.015455009415745735 2023-01-24 02:51:44.327258: step: 402/463, loss: 0.04941024258732796 2023-01-24 02:51:44.949956: step: 404/463, loss: 0.34712353348731995 2023-01-24 02:51:45.568533: step: 406/463, loss: 0.029072783887386322 2023-01-24 02:51:46.135042: step: 408/463, loss: 0.01681593991816044 2023-01-24 02:51:46.750596: step: 410/463, loss: 0.7202590107917786 2023-01-24 02:51:47.411421: step: 412/463, loss: 0.6915916204452515 2023-01-24 02:51:48.076841: step: 414/463, loss: 0.005883147940039635 2023-01-24 02:51:48.663335: step: 416/463, loss: 0.9827921986579895 2023-01-24 02:51:49.254650: step: 418/463, loss: 0.03431384637951851 2023-01-24 02:51:49.884498: step: 420/463, loss: 0.052616264671087265 2023-01-24 02:51:50.477156: step: 422/463, loss: 0.1803695410490036 2023-01-24 02:51:51.125441: step: 424/463, loss: 0.13254913687705994 2023-01-24 02:51:51.733814: step: 426/463, loss: 0.0035180964041501284 2023-01-24 02:51:52.357851: step: 428/463, loss: 0.026401324197649956 2023-01-24 02:51:52.956074: step: 430/463, loss: 0.044410549104213715 2023-01-24 02:51:53.574949: step: 432/463, loss: 0.049371637403964996 2023-01-24 02:51:54.157334: step: 434/463, loss: 0.041617993265390396 2023-01-24 02:51:54.776156: step: 436/463, loss: 0.015298226848244667 2023-01-24 02:51:55.442854: step: 438/463, loss: 0.011941668577492237 2023-01-24 02:51:56.081737: step: 440/463, loss: 0.056415703147649765 2023-01-24 02:51:56.717686: step: 442/463, loss: 0.1737140566110611 2023-01-24 02:51:57.281364: step: 444/463, loss: 0.05153241753578186 2023-01-24 02:51:57.871951: step: 446/463, loss: 0.01888580061495304 2023-01-24 02:51:58.468174: step: 448/463, loss: 0.029533660039305687 2023-01-24 02:51:59.015187: step: 450/463, loss: 0.10582781583070755 2023-01-24 02:51:59.592377: step: 452/463, loss: 1.2449015378952026 2023-01-24 02:52:00.167468: step: 454/463, loss: 0.017602896317839622 2023-01-24 02:52:00.733024: step: 456/463, loss: 0.10646458715200424 2023-01-24 02:52:01.360945: step: 458/463, loss: 0.08195698261260986 2023-01-24 02:52:02.007866: step: 460/463, loss: 0.002595989964902401 2023-01-24 02:52:02.612066: step: 462/463, loss: 0.047766849398612976 2023-01-24 02:52:03.214142: step: 464/463, loss: 0.02091420255601406 2023-01-24 02:52:03.809922: step: 466/463, loss: 0.014015436172485352 2023-01-24 02:52:04.430259: step: 468/463, loss: 0.016696209087967873 2023-01-24 02:52:05.080437: step: 470/463, loss: 0.10650330781936646 2023-01-24 02:52:05.694069: step: 472/463, loss: 0.04598746821284294 2023-01-24 02:52:06.272414: step: 474/463, loss: 0.045018021017313004 2023-01-24 02:52:06.870135: step: 476/463, loss: 0.01934024691581726 2023-01-24 02:52:07.492522: step: 478/463, loss: 0.16567553579807281 2023-01-24 02:52:08.151347: step: 480/463, loss: 0.05272384732961655 2023-01-24 02:52:08.806791: step: 482/463, loss: 0.007351537235081196 2023-01-24 02:52:09.498447: step: 484/463, loss: 0.11825446784496307 2023-01-24 02:52:10.158794: step: 486/463, loss: 0.049544557929039 2023-01-24 02:52:10.786887: step: 488/463, loss: 0.3693927526473999 2023-01-24 02:52:11.399455: step: 490/463, loss: 0.03016485832631588 2023-01-24 02:52:12.037160: step: 492/463, loss: 0.036366693675518036 2023-01-24 02:52:12.643304: step: 494/463, loss: 0.09071648120880127 2023-01-24 02:52:13.161753: step: 496/463, loss: 0.012566464953124523 2023-01-24 02:52:13.792637: step: 498/463, loss: 0.010331925936043262 2023-01-24 02:52:14.381198: step: 500/463, loss: 0.028316188603639603 2023-01-24 02:52:15.012451: step: 502/463, loss: 0.022236144170165062 2023-01-24 02:52:15.635618: step: 504/463, loss: 0.016863614320755005 2023-01-24 02:52:16.230933: step: 506/463, loss: 0.10762310773134232 2023-01-24 02:52:16.847704: step: 508/463, loss: 0.0328141450881958 2023-01-24 02:52:17.438881: step: 510/463, loss: 0.014507602900266647 2023-01-24 02:52:18.056702: step: 512/463, loss: 0.3011797368526459 2023-01-24 02:52:18.666823: step: 514/463, loss: 0.01493346318602562 2023-01-24 02:52:19.259896: step: 516/463, loss: 0.005622792057693005 2023-01-24 02:52:19.823050: step: 518/463, loss: 0.5354915261268616 2023-01-24 02:52:20.367382: step: 520/463, loss: 0.015625547617673874 2023-01-24 02:52:20.938495: step: 522/463, loss: 0.0063484408892691135 2023-01-24 02:52:21.574028: step: 524/463, loss: 0.14776206016540527 2023-01-24 02:52:22.195476: step: 526/463, loss: 0.0332455188035965 2023-01-24 02:52:22.793356: step: 528/463, loss: 0.03845450282096863 2023-01-24 02:52:23.508585: step: 530/463, loss: 0.11550804227590561 2023-01-24 02:52:24.205699: step: 532/463, loss: 0.11595935374498367 2023-01-24 02:52:24.862933: step: 534/463, loss: 0.02594004198908806 2023-01-24 02:52:25.442139: step: 536/463, loss: 0.04169909283518791 2023-01-24 02:52:26.073378: step: 538/463, loss: 0.027592720463871956 2023-01-24 02:52:26.710622: step: 540/463, loss: 0.07582306116819382 2023-01-24 02:52:27.393654: step: 542/463, loss: 0.013903995975852013 2023-01-24 02:52:27.969368: step: 544/463, loss: 0.00185700214933604 2023-01-24 02:52:28.619734: step: 546/463, loss: 0.030083443969488144 2023-01-24 02:52:29.208071: step: 548/463, loss: 0.019753124564886093 2023-01-24 02:52:29.826398: step: 550/463, loss: 0.0333632193505764 2023-01-24 02:52:30.450571: step: 552/463, loss: 0.13638849556446075 2023-01-24 02:52:31.116064: step: 554/463, loss: 0.05351114273071289 2023-01-24 02:52:31.740364: step: 556/463, loss: 0.0046550920233130455 2023-01-24 02:52:32.448748: step: 558/463, loss: 0.018831003457307816 2023-01-24 02:52:33.031558: step: 560/463, loss: 0.010704170912504196 2023-01-24 02:52:33.667353: step: 562/463, loss: 0.09039629250764847 2023-01-24 02:52:34.301661: step: 564/463, loss: 0.0254190806299448 2023-01-24 02:52:34.885940: step: 566/463, loss: 0.055956847965717316 2023-01-24 02:52:35.471136: step: 568/463, loss: 0.05081801488995552 2023-01-24 02:52:36.089717: step: 570/463, loss: 0.043163515627384186 2023-01-24 02:52:36.747620: step: 572/463, loss: 0.01678292080760002 2023-01-24 02:52:37.395801: step: 574/463, loss: 0.023207440972328186 2023-01-24 02:52:38.004397: step: 576/463, loss: 0.4035690724849701 2023-01-24 02:52:38.600478: step: 578/463, loss: 0.1154913380742073 2023-01-24 02:52:39.245729: step: 580/463, loss: 0.12089009582996368 2023-01-24 02:52:39.942168: step: 582/463, loss: 0.17366036772727966 2023-01-24 02:52:40.591314: step: 584/463, loss: 0.2882901728153229 2023-01-24 02:52:41.156890: step: 586/463, loss: 0.05161074548959732 2023-01-24 02:52:41.777052: step: 588/463, loss: 0.35056570172309875 2023-01-24 02:52:42.414932: step: 590/463, loss: 0.0208967886865139 2023-01-24 02:52:43.058069: step: 592/463, loss: 0.20979025959968567 2023-01-24 02:52:43.683110: step: 594/463, loss: 0.06464440375566483 2023-01-24 02:52:44.277049: step: 596/463, loss: 0.03202921897172928 2023-01-24 02:52:44.816935: step: 598/463, loss: 0.07376474887132645 2023-01-24 02:52:45.390724: step: 600/463, loss: 0.04743989184498787 2023-01-24 02:52:45.997076: step: 602/463, loss: 0.055407650768756866 2023-01-24 02:52:46.725163: step: 604/463, loss: 0.05285326763987541 2023-01-24 02:52:47.418205: step: 606/463, loss: 0.10778897255659103 2023-01-24 02:52:48.064902: step: 608/463, loss: 0.038896068930625916 2023-01-24 02:52:48.708713: step: 610/463, loss: 0.08925910294055939 2023-01-24 02:52:49.332957: step: 612/463, loss: 0.1635931432247162 2023-01-24 02:52:49.927664: step: 614/463, loss: 0.8457882404327393 2023-01-24 02:52:50.521074: step: 616/463, loss: 0.03410865738987923 2023-01-24 02:52:51.085009: step: 618/463, loss: 0.015545263886451721 2023-01-24 02:52:51.738003: step: 620/463, loss: 0.06418374925851822 2023-01-24 02:52:52.372133: step: 622/463, loss: 0.009307267144322395 2023-01-24 02:52:52.943402: step: 624/463, loss: 0.00849175825715065 2023-01-24 02:52:53.539228: step: 626/463, loss: 0.028681280091404915 2023-01-24 02:52:54.148230: step: 628/463, loss: 0.0491660051047802 2023-01-24 02:52:54.793491: step: 630/463, loss: 0.47927841544151306 2023-01-24 02:52:55.394343: step: 632/463, loss: 0.009912054054439068 2023-01-24 02:52:55.999695: step: 634/463, loss: 0.0031794968526810408 2023-01-24 02:52:56.639756: step: 636/463, loss: 0.03298630937933922 2023-01-24 02:52:57.271366: step: 638/463, loss: 0.5963751077651978 2023-01-24 02:52:57.901188: step: 640/463, loss: 1.1689939498901367 2023-01-24 02:52:58.526340: step: 642/463, loss: 0.008170985616743565 2023-01-24 02:52:59.131373: step: 644/463, loss: 0.04075191915035248 2023-01-24 02:52:59.726465: step: 646/463, loss: 0.00829495582729578 2023-01-24 02:53:00.334922: step: 648/463, loss: 0.4498771131038666 2023-01-24 02:53:00.953000: step: 650/463, loss: 0.14536608755588531 2023-01-24 02:53:01.560440: step: 652/463, loss: 0.024806663393974304 2023-01-24 02:53:02.230838: step: 654/463, loss: 0.05230482667684555 2023-01-24 02:53:02.827358: step: 656/463, loss: 0.16959525644779205 2023-01-24 02:53:03.535224: step: 658/463, loss: 0.026518968865275383 2023-01-24 02:53:04.217482: step: 660/463, loss: 0.010883325710892677 2023-01-24 02:53:04.845031: step: 662/463, loss: 0.039032381027936935 2023-01-24 02:53:05.496765: step: 664/463, loss: 0.014130848459899426 2023-01-24 02:53:06.202795: step: 666/463, loss: 0.07402977347373962 2023-01-24 02:53:06.890216: step: 668/463, loss: 0.14945565164089203 2023-01-24 02:53:07.499753: step: 670/463, loss: 0.311570405960083 2023-01-24 02:53:08.069669: step: 672/463, loss: 0.8940517902374268 2023-01-24 02:53:08.671857: step: 674/463, loss: 0.06886366754770279 2023-01-24 02:53:09.281166: step: 676/463, loss: 0.10833122581243515 2023-01-24 02:53:09.905865: step: 678/463, loss: 0.09355586022138596 2023-01-24 02:53:10.548376: step: 680/463, loss: 0.06463313847780228 2023-01-24 02:53:11.107484: step: 682/463, loss: 0.014167838729918003 2023-01-24 02:53:11.753354: step: 684/463, loss: 0.06615443527698517 2023-01-24 02:53:12.439904: step: 686/463, loss: 0.3571716248989105 2023-01-24 02:53:13.011949: step: 688/463, loss: 0.07164973020553589 2023-01-24 02:53:13.647572: step: 690/463, loss: 0.894069254398346 2023-01-24 02:53:14.230600: step: 692/463, loss: 0.010855305008590221 2023-01-24 02:53:14.785373: step: 694/463, loss: 0.019030166789889336 2023-01-24 02:53:15.417758: step: 696/463, loss: 0.044320087879896164 2023-01-24 02:53:16.021828: step: 698/463, loss: 0.0061429706402122974 2023-01-24 02:53:16.627308: step: 700/463, loss: 0.01441947091370821 2023-01-24 02:53:17.222779: step: 702/463, loss: 0.050518304109573364 2023-01-24 02:53:17.814229: step: 704/463, loss: 0.013496499508619308 2023-01-24 02:53:18.479118: step: 706/463, loss: 0.04813800007104874 2023-01-24 02:53:19.072310: step: 708/463, loss: 0.09693578630685806 2023-01-24 02:53:19.662355: step: 710/463, loss: 0.03551693633198738 2023-01-24 02:53:20.215775: step: 712/463, loss: 0.007124726660549641 2023-01-24 02:53:20.846871: step: 714/463, loss: 0.006127381697297096 2023-01-24 02:53:21.430183: step: 716/463, loss: 0.023651884868741035 2023-01-24 02:53:22.048529: step: 718/463, loss: 0.042162906378507614 2023-01-24 02:53:22.693287: step: 720/463, loss: 0.011224256828427315 2023-01-24 02:53:23.251772: step: 722/463, loss: 0.4536055326461792 2023-01-24 02:53:23.882896: step: 724/463, loss: 0.06699256598949432 2023-01-24 02:53:24.519520: step: 726/463, loss: 0.08349325507879257 2023-01-24 02:53:25.090632: step: 728/463, loss: 0.009899456053972244 2023-01-24 02:53:25.702770: step: 730/463, loss: 0.010055750608444214 2023-01-24 02:53:26.322680: step: 732/463, loss: 0.05743797495961189 2023-01-24 02:53:27.063970: step: 734/463, loss: 0.023769082501530647 2023-01-24 02:53:27.740230: step: 736/463, loss: 0.10105634480714798 2023-01-24 02:53:28.404854: step: 738/463, loss: 0.5656456351280212 2023-01-24 02:53:29.042043: step: 740/463, loss: 0.0425361767411232 2023-01-24 02:53:29.641493: step: 742/463, loss: 0.07228134572505951 2023-01-24 02:53:30.280105: step: 744/463, loss: 0.003149681957438588 2023-01-24 02:53:30.864431: step: 746/463, loss: 0.003421169938519597 2023-01-24 02:53:31.495134: step: 748/463, loss: 0.42067599296569824 2023-01-24 02:53:32.082308: step: 750/463, loss: 0.1624079793691635 2023-01-24 02:53:32.689469: step: 752/463, loss: 0.04837449640035629 2023-01-24 02:53:33.292579: step: 754/463, loss: 0.012449271976947784 2023-01-24 02:53:33.930428: step: 756/463, loss: 0.0254416074603796 2023-01-24 02:53:34.560566: step: 758/463, loss: 0.023787543177604675 2023-01-24 02:53:35.198417: step: 760/463, loss: 0.06165437400341034 2023-01-24 02:53:35.800585: step: 762/463, loss: 0.20485974848270416 2023-01-24 02:53:36.417219: step: 764/463, loss: 0.28936567902565 2023-01-24 02:53:37.033624: step: 766/463, loss: 0.113552987575531 2023-01-24 02:53:37.621678: step: 768/463, loss: 0.040712811052799225 2023-01-24 02:53:38.265422: step: 770/463, loss: 0.02676175907254219 2023-01-24 02:53:38.861250: step: 772/463, loss: 0.06688287109136581 2023-01-24 02:53:39.423324: step: 774/463, loss: 0.7004122138023376 2023-01-24 02:53:39.944689: step: 776/463, loss: 0.07582084089517593 2023-01-24 02:53:40.562261: step: 778/463, loss: 0.1933680921792984 2023-01-24 02:53:41.120814: step: 780/463, loss: 0.04180564358830452 2023-01-24 02:53:41.755436: step: 782/463, loss: 0.055355582386255264 2023-01-24 02:53:42.364097: step: 784/463, loss: 0.08811771124601364 2023-01-24 02:53:42.959870: step: 786/463, loss: 0.028482133522629738 2023-01-24 02:53:43.557559: step: 788/463, loss: 0.004225490614771843 2023-01-24 02:53:44.149595: step: 790/463, loss: 0.036629583686590195 2023-01-24 02:53:44.755239: step: 792/463, loss: 0.054877351969480515 2023-01-24 02:53:45.462995: step: 794/463, loss: 0.3363019824028015 2023-01-24 02:53:46.107375: step: 796/463, loss: 0.05422777682542801 2023-01-24 02:53:46.675837: step: 798/463, loss: 0.006331595126539469 2023-01-24 02:53:47.281068: step: 800/463, loss: 0.07655809819698334 2023-01-24 02:53:47.947705: step: 802/463, loss: 0.07453346997499466 2023-01-24 02:53:48.584270: step: 804/463, loss: 0.04395703598856926 2023-01-24 02:53:49.147387: step: 806/463, loss: 0.045428112149238586 2023-01-24 02:53:49.811081: step: 808/463, loss: 0.01732235588133335 2023-01-24 02:53:50.497326: step: 810/463, loss: 0.0650506392121315 2023-01-24 02:53:51.086433: step: 812/463, loss: 0.009408474899828434 2023-01-24 02:53:51.723977: step: 814/463, loss: 0.06196252629160881 2023-01-24 02:53:52.307347: step: 816/463, loss: 0.4552536606788635 2023-01-24 02:53:53.009397: step: 818/463, loss: 0.15773415565490723 2023-01-24 02:53:53.686333: step: 820/463, loss: 0.03790159150958061 2023-01-24 02:53:54.298346: step: 822/463, loss: 0.037578023970127106 2023-01-24 02:53:54.979936: step: 824/463, loss: 0.011477263644337654 2023-01-24 02:53:55.601140: step: 826/463, loss: 0.09522224217653275 2023-01-24 02:53:56.262574: step: 828/463, loss: 0.03931977599859238 2023-01-24 02:53:56.893127: step: 830/463, loss: 0.00780304754152894 2023-01-24 02:53:57.504266: step: 832/463, loss: 0.03427712991833687 2023-01-24 02:53:58.158443: step: 834/463, loss: 0.035402603447437286 2023-01-24 02:53:58.796012: step: 836/463, loss: 0.2521076202392578 2023-01-24 02:53:59.433218: step: 838/463, loss: 0.03218855708837509 2023-01-24 02:54:00.075681: step: 840/463, loss: 0.09992952644824982 2023-01-24 02:54:00.757204: step: 842/463, loss: 0.044545166194438934 2023-01-24 02:54:01.466305: step: 844/463, loss: 0.028844941407442093 2023-01-24 02:54:02.147338: step: 846/463, loss: 0.003648433368653059 2023-01-24 02:54:02.801452: step: 848/463, loss: 0.08368342369794846 2023-01-24 02:54:03.432300: step: 850/463, loss: 0.048617392778396606 2023-01-24 02:54:04.076906: step: 852/463, loss: 0.01440340280532837 2023-01-24 02:54:04.643861: step: 854/463, loss: 0.025879204273223877 2023-01-24 02:54:05.254374: step: 856/463, loss: 1.2511944770812988 2023-01-24 02:54:05.840312: step: 858/463, loss: 0.04545079171657562 2023-01-24 02:54:06.448964: step: 860/463, loss: 0.017826208844780922 2023-01-24 02:54:07.079831: step: 862/463, loss: 0.05402884632349014 2023-01-24 02:54:07.751590: step: 864/463, loss: 0.16573858261108398 2023-01-24 02:54:08.325278: step: 866/463, loss: 0.00812308769673109 2023-01-24 02:54:08.935563: step: 868/463, loss: 0.026841744780540466 2023-01-24 02:54:09.507441: step: 870/463, loss: 0.02524542063474655 2023-01-24 02:54:10.095976: step: 872/463, loss: 0.0005836548516526818 2023-01-24 02:54:10.690953: step: 874/463, loss: 0.022158058360219002 2023-01-24 02:54:11.360217: step: 876/463, loss: 0.0849095955491066 2023-01-24 02:54:11.917950: step: 878/463, loss: 0.08241454511880875 2023-01-24 02:54:12.519099: step: 880/463, loss: 0.021352414041757584 2023-01-24 02:54:13.240876: step: 882/463, loss: 0.10853380709886551 2023-01-24 02:54:13.858593: step: 884/463, loss: 0.0011751658748835325 2023-01-24 02:54:14.502989: step: 886/463, loss: 0.032426316291093826 2023-01-24 02:54:15.115226: step: 888/463, loss: 0.9668465256690979 2023-01-24 02:54:15.714473: step: 890/463, loss: 0.004351734183728695 2023-01-24 02:54:16.372189: step: 892/463, loss: 0.058757588267326355 2023-01-24 02:54:17.039316: step: 894/463, loss: 0.015361789613962173 2023-01-24 02:54:17.666221: step: 896/463, loss: 0.01097427774220705 2023-01-24 02:54:18.307642: step: 898/463, loss: 0.09028850495815277 2023-01-24 02:54:18.914443: step: 900/463, loss: 0.10042666643857956 2023-01-24 02:54:19.589805: step: 902/463, loss: 0.04647265374660492 2023-01-24 02:54:20.221290: step: 904/463, loss: 0.021175991743803024 2023-01-24 02:54:20.837478: step: 906/463, loss: 0.004938920494168997 2023-01-24 02:54:21.424116: step: 908/463, loss: 0.050417378544807434 2023-01-24 02:54:22.095429: step: 910/463, loss: 0.13690118491649628 2023-01-24 02:54:22.711725: step: 912/463, loss: 0.11915592849254608 2023-01-24 02:54:23.379683: step: 914/463, loss: 0.023038644343614578 2023-01-24 02:54:23.959994: step: 916/463, loss: 0.10941333323717117 2023-01-24 02:54:24.637096: step: 918/463, loss: 0.04674834758043289 2023-01-24 02:54:25.235956: step: 920/463, loss: 0.015698300674557686 2023-01-24 02:54:25.851248: step: 922/463, loss: 0.0308549664914608 2023-01-24 02:54:26.445648: step: 924/463, loss: 0.022881057113409042 2023-01-24 02:54:27.050116: step: 926/463, loss: 0.022768784314393997 ================================================== Loss: 0.098 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3604937661276728, 'r': 0.32765941930769504, 'f1': 0.34329326834026896}, 'combined': 0.25295293456651397, 'epoch': 23} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3512447537428383, 'r': 0.37965425588380314, 'f1': 0.36489737667983907}, 'combined': 0.2828391149384399, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3380266932860101, 'r': 0.33674385953539904, 'f1': 0.3373840569869872}, 'combined': 0.24859877883251688, 'epoch': 23} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3313832893857388, 'r': 0.38285734812304567, 'f1': 0.35526549659520146}, 'combined': 0.2753732557340796, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3408145075575787, 'r': 0.33434743910297565, 'f1': 0.33755000078020725}, 'combined': 0.24872105320646848, 'epoch': 23} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33616725949496234, 'r': 0.3723175989810934, 'f1': 0.3533201462637851}, 'combined': 0.2738653765298239, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32575757575757575, 'r': 0.30714285714285716, 'f1': 0.31617647058823534}, 'combined': 0.21078431372549022, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37962962962962965, 'r': 0.44565217391304346, 'f1': 0.41000000000000003}, 'combined': 0.20500000000000002, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.13793103448275862, 'f1': 0.2162162162162162}, 'combined': 0.14414414414414412, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:57:01.485698: step: 2/463, loss: 0.061294905841350555 2023-01-24 02:57:02.071671: step: 4/463, loss: 0.006343673914670944 2023-01-24 02:57:02.685412: step: 6/463, loss: 0.020606111735105515 2023-01-24 02:57:03.266074: step: 8/463, loss: 0.16978049278259277 2023-01-24 02:57:03.860638: step: 10/463, loss: 0.024464059621095657 2023-01-24 02:57:04.510700: step: 12/463, loss: 0.06635589152574539 2023-01-24 02:57:05.076635: step: 14/463, loss: 0.019664715975522995 2023-01-24 02:57:05.660319: step: 16/463, loss: 0.009384295903146267 2023-01-24 02:57:06.272284: step: 18/463, loss: 0.015013431198894978 2023-01-24 02:57:06.885442: step: 20/463, loss: 0.010599148459732533 2023-01-24 02:57:07.479158: step: 22/463, loss: 0.7978048324584961 2023-01-24 02:57:08.064543: step: 24/463, loss: 0.050290998071432114 2023-01-24 02:57:08.709427: step: 26/463, loss: 0.028274554759263992 2023-01-24 02:57:09.304469: step: 28/463, loss: 0.02509673498570919 2023-01-24 02:57:09.868323: step: 30/463, loss: 0.04342924430966377 2023-01-24 02:57:10.501756: step: 32/463, loss: 0.21507760882377625 2023-01-24 02:57:11.070460: step: 34/463, loss: 0.010677285492420197 2023-01-24 02:57:11.669764: step: 36/463, loss: 0.04876670613884926 2023-01-24 02:57:12.401323: step: 38/463, loss: 0.0214841291308403 2023-01-24 02:57:13.049131: step: 40/463, loss: 0.3389042615890503 2023-01-24 02:57:13.699992: step: 42/463, loss: 0.001654837396927178 2023-01-24 02:57:14.264862: step: 44/463, loss: 0.01829889416694641 2023-01-24 02:57:14.845779: step: 46/463, loss: 0.031863272190093994 2023-01-24 02:57:15.476291: step: 48/463, loss: 0.3395855128765106 2023-01-24 02:57:16.108829: step: 50/463, loss: 0.01945796236395836 2023-01-24 02:57:16.673991: step: 52/463, loss: 0.011326232925057411 2023-01-24 02:57:17.266386: step: 54/463, loss: 0.060719557106494904 2023-01-24 02:57:17.826766: step: 56/463, loss: 0.27859875559806824 2023-01-24 02:57:18.524038: step: 58/463, loss: 0.04889040067791939 2023-01-24 02:57:19.200922: step: 60/463, loss: 0.01434397604316473 2023-01-24 02:57:19.831686: step: 62/463, loss: 0.06446627527475357 2023-01-24 02:57:20.398841: step: 64/463, loss: 0.0687406063079834 2023-01-24 02:57:20.941888: step: 66/463, loss: 0.009840857237577438 2023-01-24 02:57:21.556064: step: 68/463, loss: 0.03942156583070755 2023-01-24 02:57:22.132272: step: 70/463, loss: 7.957068010000512e-05 2023-01-24 02:57:22.702433: step: 72/463, loss: 0.1568048745393753 2023-01-24 02:57:23.406030: step: 74/463, loss: 0.018056249246001244 2023-01-24 02:57:24.045215: step: 76/463, loss: 0.16042578220367432 2023-01-24 02:57:24.758556: step: 78/463, loss: 0.02998526021838188 2023-01-24 02:57:25.426103: step: 80/463, loss: 0.03995296731591225 2023-01-24 02:57:26.021629: step: 82/463, loss: 0.07112596929073334 2023-01-24 02:57:26.647547: step: 84/463, loss: 0.45406466722488403 2023-01-24 02:57:27.285754: step: 86/463, loss: 0.07411360740661621 2023-01-24 02:57:27.893122: step: 88/463, loss: 0.06382579356431961 2023-01-24 02:57:28.515155: step: 90/463, loss: 0.05234390124678612 2023-01-24 02:57:29.147438: step: 92/463, loss: 0.02708696387708187 2023-01-24 02:57:29.711680: step: 94/463, loss: 0.06781347841024399 2023-01-24 02:57:30.290295: step: 96/463, loss: 0.0020681589376181364 2023-01-24 02:57:30.908190: step: 98/463, loss: 0.010786955244839191 2023-01-24 02:57:31.478628: step: 100/463, loss: 0.007816801778972149 2023-01-24 02:57:32.126194: step: 102/463, loss: 0.11301437020301819 2023-01-24 02:57:32.859908: step: 104/463, loss: 0.005041416268795729 2023-01-24 02:57:33.447559: step: 106/463, loss: 0.09249261766672134 2023-01-24 02:57:33.994102: step: 108/463, loss: 0.0031062893103808165 2023-01-24 02:57:34.595985: step: 110/463, loss: 0.0008614999242126942 2023-01-24 02:57:35.237018: step: 112/463, loss: 0.03815028816461563 2023-01-24 02:57:35.909643: step: 114/463, loss: 1.0728869438171387 2023-01-24 02:57:36.613726: step: 116/463, loss: 0.04436250776052475 2023-01-24 02:57:37.234565: step: 118/463, loss: 0.023092331364750862 2023-01-24 02:57:37.871094: step: 120/463, loss: 0.026097899302840233 2023-01-24 02:57:38.499944: step: 122/463, loss: 0.03697734698653221 2023-01-24 02:57:39.115887: step: 124/463, loss: 0.06943757086992264 2023-01-24 02:57:39.653286: step: 126/463, loss: 0.09368494153022766 2023-01-24 02:57:40.198523: step: 128/463, loss: 0.1767715960741043 2023-01-24 02:57:40.856026: step: 130/463, loss: 0.020567482337355614 2023-01-24 02:57:41.440456: step: 132/463, loss: 0.0010978621430695057 2023-01-24 02:57:42.066341: step: 134/463, loss: 0.0038466467522084713 2023-01-24 02:57:42.682426: step: 136/463, loss: 0.002895973389968276 2023-01-24 02:57:43.295647: step: 138/463, loss: 0.6470154523849487 2023-01-24 02:57:43.887616: step: 140/463, loss: 0.023963846266269684 2023-01-24 02:57:44.461838: step: 142/463, loss: 0.05351338908076286 2023-01-24 02:57:45.042890: step: 144/463, loss: 0.007462936453521252 2023-01-24 02:57:45.643727: step: 146/463, loss: 0.04259811341762543 2023-01-24 02:57:46.259519: step: 148/463, loss: 0.07926907390356064 2023-01-24 02:57:46.857690: step: 150/463, loss: 0.005697930231690407 2023-01-24 02:57:47.454108: step: 152/463, loss: 0.01964966580271721 2023-01-24 02:57:48.023859: step: 154/463, loss: 0.07333017140626907 2023-01-24 02:57:48.658609: step: 156/463, loss: 0.11011946201324463 2023-01-24 02:57:49.312075: step: 158/463, loss: 0.16936245560646057 2023-01-24 02:57:49.969456: step: 160/463, loss: 0.07704948633909225 2023-01-24 02:57:50.560011: step: 162/463, loss: 0.02215109020471573 2023-01-24 02:57:51.148602: step: 164/463, loss: 0.1850002408027649 2023-01-24 02:57:51.744053: step: 166/463, loss: 0.06054436042904854 2023-01-24 02:57:52.362835: step: 168/463, loss: 0.07248638570308685 2023-01-24 02:57:52.973491: step: 170/463, loss: 0.021736355498433113 2023-01-24 02:57:53.578988: step: 172/463, loss: 0.057320334017276764 2023-01-24 02:57:54.221993: step: 174/463, loss: 0.03244979307055473 2023-01-24 02:57:54.797826: step: 176/463, loss: 0.07547648251056671 2023-01-24 02:57:55.485777: step: 178/463, loss: 0.05297822877764702 2023-01-24 02:57:56.066717: step: 180/463, loss: 0.02737613581120968 2023-01-24 02:57:56.708432: step: 182/463, loss: 0.12709006667137146 2023-01-24 02:57:57.267266: step: 184/463, loss: 0.014109165407717228 2023-01-24 02:57:57.820981: step: 186/463, loss: 0.009420432150363922 2023-01-24 02:57:58.430277: step: 188/463, loss: 0.08026494830846786 2023-01-24 02:57:59.027288: step: 190/463, loss: 0.014953376725316048 2023-01-24 02:57:59.654976: step: 192/463, loss: 0.01593739725649357 2023-01-24 02:58:00.278632: step: 194/463, loss: 0.04336761310696602 2023-01-24 02:58:00.873392: step: 196/463, loss: 0.004452051594853401 2023-01-24 02:58:01.488547: step: 198/463, loss: 0.019622813910245895 2023-01-24 02:58:02.105999: step: 200/463, loss: 0.5644239187240601 2023-01-24 02:58:02.749269: step: 202/463, loss: 0.03331039473414421 2023-01-24 02:58:03.383082: step: 204/463, loss: 0.042713724076747894 2023-01-24 02:58:04.049105: step: 206/463, loss: 0.0993862971663475 2023-01-24 02:58:04.691855: step: 208/463, loss: 0.012197350151836872 2023-01-24 02:58:05.259943: step: 210/463, loss: 0.04980878159403801 2023-01-24 02:58:05.829771: step: 212/463, loss: 0.012405703775584698 2023-01-24 02:58:06.415333: step: 214/463, loss: 0.014293259009718895 2023-01-24 02:58:07.008975: step: 216/463, loss: 0.08689762651920319 2023-01-24 02:58:07.622126: step: 218/463, loss: 0.009880058467388153 2023-01-24 02:58:08.269131: step: 220/463, loss: 0.03535918518900871 2023-01-24 02:58:08.874781: step: 222/463, loss: 0.007327070459723473 2023-01-24 02:58:09.565690: step: 224/463, loss: 0.0013676685048267245 2023-01-24 02:58:10.128387: step: 226/463, loss: 4.08041524887085 2023-01-24 02:58:10.733302: step: 228/463, loss: 0.18339861929416656 2023-01-24 02:58:11.323311: step: 230/463, loss: 0.004719461780041456 2023-01-24 02:58:11.993230: step: 232/463, loss: 0.005354912020266056 2023-01-24 02:58:12.555038: step: 234/463, loss: 0.009334675036370754 2023-01-24 02:58:13.128341: step: 236/463, loss: 0.006596957799047232 2023-01-24 02:58:13.767851: step: 238/463, loss: 0.108518585562706 2023-01-24 02:58:14.423401: step: 240/463, loss: 0.01934421807527542 2023-01-24 02:58:15.029996: step: 242/463, loss: 0.007274204865098 2023-01-24 02:58:15.696411: step: 244/463, loss: 0.002430056221783161 2023-01-24 02:58:16.347664: step: 246/463, loss: 0.018906325101852417 2023-01-24 02:58:16.979507: step: 248/463, loss: 0.09841140359640121 2023-01-24 02:58:17.627226: step: 250/463, loss: 0.2465820610523224 2023-01-24 02:58:18.282233: step: 252/463, loss: 0.09582491964101791 2023-01-24 02:58:18.921658: step: 254/463, loss: 0.023469191044569016 2023-01-24 02:58:19.569558: step: 256/463, loss: 0.01208808645606041 2023-01-24 02:58:20.159984: step: 258/463, loss: 0.1477283090353012 2023-01-24 02:58:20.855197: step: 260/463, loss: 0.03751266375184059 2023-01-24 02:58:21.452810: step: 262/463, loss: 0.05043237656354904 2023-01-24 02:58:22.056417: step: 264/463, loss: 0.0037228099536150694 2023-01-24 02:58:22.698195: step: 266/463, loss: 0.027014896273612976 2023-01-24 02:58:23.332363: step: 268/463, loss: 0.4177056550979614 2023-01-24 02:58:23.984793: step: 270/463, loss: 0.032803405076265335 2023-01-24 02:58:24.631648: step: 272/463, loss: 0.12205017358064651 2023-01-24 02:58:25.200409: step: 274/463, loss: 0.010484627448022366 2023-01-24 02:58:25.822640: step: 276/463, loss: 0.020143119618296623 2023-01-24 02:58:26.463012: step: 278/463, loss: 0.06068652868270874 2023-01-24 02:58:27.064960: step: 280/463, loss: 0.007905379869043827 2023-01-24 02:58:27.654306: step: 282/463, loss: 0.10386954993009567 2023-01-24 02:58:28.283579: step: 284/463, loss: 0.02009562961757183 2023-01-24 02:58:28.883893: step: 286/463, loss: 0.06188122555613518 2023-01-24 02:58:29.483294: step: 288/463, loss: 0.03297923877835274 2023-01-24 02:58:30.087337: step: 290/463, loss: 0.006055665202438831 2023-01-24 02:58:30.683335: step: 292/463, loss: 0.018160022795200348 2023-01-24 02:58:31.201674: step: 294/463, loss: 0.12376287579536438 2023-01-24 02:58:31.838999: step: 296/463, loss: 0.14398975670337677 2023-01-24 02:58:32.426786: step: 298/463, loss: 0.024244364351034164 2023-01-24 02:58:32.962668: step: 300/463, loss: 0.16585154831409454 2023-01-24 02:58:33.557965: step: 302/463, loss: 0.031117623671889305 2023-01-24 02:58:34.188004: step: 304/463, loss: 1.4300962686538696 2023-01-24 02:58:34.911877: step: 306/463, loss: 0.16099242866039276 2023-01-24 02:58:35.526316: step: 308/463, loss: 0.01632041484117508 2023-01-24 02:58:36.134265: step: 310/463, loss: 0.09779483824968338 2023-01-24 02:58:36.774683: step: 312/463, loss: 0.0809023380279541 2023-01-24 02:58:37.371818: step: 314/463, loss: 0.02666088379919529 2023-01-24 02:58:37.931957: step: 316/463, loss: 0.0623222254216671 2023-01-24 02:58:38.508296: step: 318/463, loss: 0.007449913304299116 2023-01-24 02:58:39.096409: step: 320/463, loss: 0.0750175192952156 2023-01-24 02:58:39.761659: step: 322/463, loss: 0.00797069538384676 2023-01-24 02:58:40.415649: step: 324/463, loss: 0.016828155145049095 2023-01-24 02:58:41.032603: step: 326/463, loss: 2.867417097091675 2023-01-24 02:58:41.768208: step: 328/463, loss: 0.07930314540863037 2023-01-24 02:58:42.376111: step: 330/463, loss: 0.016846250742673874 2023-01-24 02:58:43.016592: step: 332/463, loss: 0.01605255901813507 2023-01-24 02:58:43.633802: step: 334/463, loss: 0.46677348017692566 2023-01-24 02:58:44.194010: step: 336/463, loss: 0.0068283299915492535 2023-01-24 02:58:44.806828: step: 338/463, loss: 0.03346501290798187 2023-01-24 02:58:45.599043: step: 340/463, loss: 0.020151332020759583 2023-01-24 02:58:46.156352: step: 342/463, loss: 0.009587912820279598 2023-01-24 02:58:46.763458: step: 344/463, loss: 0.012885798700153828 2023-01-24 02:58:47.517156: step: 346/463, loss: 0.06680378317832947 2023-01-24 02:58:48.162062: step: 348/463, loss: 0.03617333620786667 2023-01-24 02:58:48.832754: step: 350/463, loss: 0.03804966062307358 2023-01-24 02:58:49.424811: step: 352/463, loss: 0.20497827231884003 2023-01-24 02:58:50.056499: step: 354/463, loss: 0.0541839562356472 2023-01-24 02:58:50.732785: step: 356/463, loss: 0.06272148340940475 2023-01-24 02:58:51.373680: step: 358/463, loss: 0.6745994091033936 2023-01-24 02:58:52.020800: step: 360/463, loss: 0.0856032744050026 2023-01-24 02:58:52.574366: step: 362/463, loss: 0.0008191489614546299 2023-01-24 02:58:53.197766: step: 364/463, loss: 0.007314196787774563 2023-01-24 02:58:53.784073: step: 366/463, loss: 0.8927024006843567 2023-01-24 02:58:54.476089: step: 368/463, loss: 0.025360634550452232 2023-01-24 02:58:55.159621: step: 370/463, loss: 0.015439298935234547 2023-01-24 02:58:55.824605: step: 372/463, loss: 0.012043795548379421 2023-01-24 02:58:56.491961: step: 374/463, loss: 0.002002270892262459 2023-01-24 02:58:57.135732: step: 376/463, loss: 0.10055901855230331 2023-01-24 02:58:57.702739: step: 378/463, loss: 0.03908461332321167 2023-01-24 02:58:58.333571: step: 380/463, loss: 0.7719851136207581 2023-01-24 02:58:59.028338: step: 382/463, loss: 0.09145240485668182 2023-01-24 02:58:59.691386: step: 384/463, loss: 0.15544719994068146 2023-01-24 02:59:00.277032: step: 386/463, loss: 0.03723986819386482 2023-01-24 02:59:00.889054: step: 388/463, loss: 0.00631159171462059 2023-01-24 02:59:01.505789: step: 390/463, loss: 0.06047888845205307 2023-01-24 02:59:02.119144: step: 392/463, loss: 0.012118219397962093 2023-01-24 02:59:02.702514: step: 394/463, loss: 0.03209967911243439 2023-01-24 02:59:03.326207: step: 396/463, loss: 0.022075003013014793 2023-01-24 02:59:04.048655: step: 398/463, loss: 0.013022121042013168 2023-01-24 02:59:04.642458: step: 400/463, loss: 0.009034481830894947 2023-01-24 02:59:05.316117: step: 402/463, loss: 0.1878408044576645 2023-01-24 02:59:05.915765: step: 404/463, loss: 0.03177250549197197 2023-01-24 02:59:06.535028: step: 406/463, loss: 0.05853579193353653 2023-01-24 02:59:07.109370: step: 408/463, loss: 0.0261134784668684 2023-01-24 02:59:07.738013: step: 410/463, loss: 1.5228782892227173 2023-01-24 02:59:08.303326: step: 412/463, loss: 0.052909981459379196 2023-01-24 02:59:08.942312: step: 414/463, loss: 0.03807179629802704 2023-01-24 02:59:09.613731: step: 416/463, loss: 0.16231660544872284 2023-01-24 02:59:10.168245: step: 418/463, loss: 0.03277049958705902 2023-01-24 02:59:10.709107: step: 420/463, loss: 0.0011334748705849051 2023-01-24 02:59:11.284813: step: 422/463, loss: 0.03694593161344528 2023-01-24 02:59:11.905598: step: 424/463, loss: 0.06627807021141052 2023-01-24 02:59:12.468883: step: 426/463, loss: 0.009900666773319244 2023-01-24 02:59:13.109631: step: 428/463, loss: 0.027718110010027885 2023-01-24 02:59:13.716117: step: 430/463, loss: 0.07971540838479996 2023-01-24 02:59:14.339516: step: 432/463, loss: 0.01798867993056774 2023-01-24 02:59:14.951430: step: 434/463, loss: 0.03339134529232979 2023-01-24 02:59:15.599338: step: 436/463, loss: 0.01760207675397396 2023-01-24 02:59:16.214773: step: 438/463, loss: 0.01757189631462097 2023-01-24 02:59:16.857053: step: 440/463, loss: 0.022632887586951256 2023-01-24 02:59:17.540150: step: 442/463, loss: 0.009646367281675339 2023-01-24 02:59:18.127782: step: 444/463, loss: 0.035784099251031876 2023-01-24 02:59:18.797609: step: 446/463, loss: 0.041638486087322235 2023-01-24 02:59:19.415946: step: 448/463, loss: 0.03877909481525421 2023-01-24 02:59:20.055345: step: 450/463, loss: 0.007765759713947773 2023-01-24 02:59:20.676800: step: 452/463, loss: 0.01499869953840971 2023-01-24 02:59:21.292441: step: 454/463, loss: 0.11714904010295868 2023-01-24 02:59:21.894354: step: 456/463, loss: 0.0636599063873291 2023-01-24 02:59:22.516301: step: 458/463, loss: 0.05196675285696983 2023-01-24 02:59:23.093074: step: 460/463, loss: 0.028296956792473793 2023-01-24 02:59:23.735091: step: 462/463, loss: 0.04651957377791405 2023-01-24 02:59:24.340865: step: 464/463, loss: 0.028146252036094666 2023-01-24 02:59:24.956319: step: 466/463, loss: 0.14111503958702087 2023-01-24 02:59:25.487520: step: 468/463, loss: 0.05156390741467476 2023-01-24 02:59:26.108350: step: 470/463, loss: 0.013155822642147541 2023-01-24 02:59:26.756901: step: 472/463, loss: 0.06433604657649994 2023-01-24 02:59:27.356678: step: 474/463, loss: 0.0026400901842862368 2023-01-24 02:59:27.963653: step: 476/463, loss: 0.5728375911712646 2023-01-24 02:59:28.538652: step: 478/463, loss: 0.04358116537332535 2023-01-24 02:59:29.195256: step: 480/463, loss: 0.01163279078900814 2023-01-24 02:59:29.852861: step: 482/463, loss: 0.05154048651456833 2023-01-24 02:59:30.452451: step: 484/463, loss: 0.013692816719412804 2023-01-24 02:59:31.019885: step: 486/463, loss: 0.0017231665551662445 2023-01-24 02:59:31.599990: step: 488/463, loss: 0.020986400544643402 2023-01-24 02:59:32.190029: step: 490/463, loss: 0.0148360850289464 2023-01-24 02:59:32.770580: step: 492/463, loss: 0.037350621074438095 2023-01-24 02:59:33.419510: step: 494/463, loss: 0.023542063310742378 2023-01-24 02:59:34.016234: step: 496/463, loss: 0.0386778898537159 2023-01-24 02:59:34.655379: step: 498/463, loss: 0.12982696294784546 2023-01-24 02:59:35.311904: step: 500/463, loss: 0.015460880473256111 2023-01-24 02:59:35.952917: step: 502/463, loss: 0.03337129205465317 2023-01-24 02:59:36.550068: step: 504/463, loss: 0.014453509822487831 2023-01-24 02:59:37.087925: step: 506/463, loss: 0.20806880295276642 2023-01-24 02:59:37.683206: step: 508/463, loss: 0.02205553837120533 2023-01-24 02:59:38.288983: step: 510/463, loss: 0.06790025532245636 2023-01-24 02:59:38.906207: step: 512/463, loss: 0.03640223667025566 2023-01-24 02:59:39.489509: step: 514/463, loss: 0.04309734329581261 2023-01-24 02:59:40.044234: step: 516/463, loss: 0.0029864616226404905 2023-01-24 02:59:40.674311: step: 518/463, loss: 0.13067884743213654 2023-01-24 02:59:41.390995: step: 520/463, loss: 0.009787706658244133 2023-01-24 02:59:41.959436: step: 522/463, loss: 0.010587071068584919 2023-01-24 02:59:42.564652: step: 524/463, loss: 0.03710310533642769 2023-01-24 02:59:43.179111: step: 526/463, loss: 0.008763926103711128 2023-01-24 02:59:43.808190: step: 528/463, loss: 0.05441180616617203 2023-01-24 02:59:44.392602: step: 530/463, loss: 0.06790116429328918 2023-01-24 02:59:45.002419: step: 532/463, loss: 0.04022807627916336 2023-01-24 02:59:45.685862: step: 534/463, loss: 0.037353623658418655 2023-01-24 02:59:46.320208: step: 536/463, loss: 0.07474343478679657 2023-01-24 02:59:46.907215: step: 538/463, loss: 0.01485541369765997 2023-01-24 02:59:47.572147: step: 540/463, loss: 0.07869409024715424 2023-01-24 02:59:48.269785: step: 542/463, loss: 0.018076535314321518 2023-01-24 02:59:48.859886: step: 544/463, loss: 0.013396735303103924 2023-01-24 02:59:49.494344: step: 546/463, loss: 0.019285796210169792 2023-01-24 02:59:50.108577: step: 548/463, loss: 0.003920732531696558 2023-01-24 02:59:50.733541: step: 550/463, loss: 0.34737080335617065 2023-01-24 02:59:51.308376: step: 552/463, loss: 0.006014332640916109 2023-01-24 02:59:51.945595: step: 554/463, loss: 0.06717444956302643 2023-01-24 02:59:52.581425: step: 556/463, loss: 0.007173856254667044 2023-01-24 02:59:53.239706: step: 558/463, loss: 0.026601284742355347 2023-01-24 02:59:53.897075: step: 560/463, loss: 0.14075852930545807 2023-01-24 02:59:54.522359: step: 562/463, loss: 0.4079218804836273 2023-01-24 02:59:55.211006: step: 564/463, loss: 0.01909816823899746 2023-01-24 02:59:55.832603: step: 566/463, loss: 0.09817704558372498 2023-01-24 02:59:56.451670: step: 568/463, loss: 0.006945817265659571 2023-01-24 02:59:57.101389: step: 570/463, loss: 0.033449966460466385 2023-01-24 02:59:57.817491: step: 572/463, loss: 0.08662756532430649 2023-01-24 02:59:58.422182: step: 574/463, loss: 0.04582451656460762 2023-01-24 02:59:59.015380: step: 576/463, loss: 0.09477768838405609 2023-01-24 02:59:59.648888: step: 578/463, loss: 0.042833440005779266 2023-01-24 03:00:00.273591: step: 580/463, loss: 0.012643870897591114 2023-01-24 03:00:00.827033: step: 582/463, loss: 0.03557109832763672 2023-01-24 03:00:01.400729: step: 584/463, loss: 0.007770916912704706 2023-01-24 03:00:02.008440: step: 586/463, loss: 0.011807246133685112 2023-01-24 03:00:02.675133: step: 588/463, loss: 0.11939933151006699 2023-01-24 03:00:03.318206: step: 590/463, loss: 0.016973499208688736 2023-01-24 03:00:03.890524: step: 592/463, loss: 0.01595240831375122 2023-01-24 03:00:04.529229: step: 594/463, loss: 0.01475546695291996 2023-01-24 03:00:05.030919: step: 596/463, loss: 0.022738425061106682 2023-01-24 03:00:05.618974: step: 598/463, loss: 0.00842673797160387 2023-01-24 03:00:06.283524: step: 600/463, loss: 0.04169292002916336 2023-01-24 03:00:06.952206: step: 602/463, loss: 0.023298628628253937 2023-01-24 03:00:07.611086: step: 604/463, loss: 0.008476962335407734 2023-01-24 03:00:08.183842: step: 606/463, loss: 0.037734705954790115 2023-01-24 03:00:08.806511: step: 608/463, loss: 0.03686380758881569 2023-01-24 03:00:09.425430: step: 610/463, loss: 0.24258795380592346 2023-01-24 03:00:10.024628: step: 612/463, loss: 0.009392541833221912 2023-01-24 03:00:10.715919: step: 614/463, loss: 0.03928908705711365 2023-01-24 03:00:11.372771: step: 616/463, loss: 0.05161019787192345 2023-01-24 03:00:12.026201: step: 618/463, loss: 0.04276500269770622 2023-01-24 03:00:12.600230: step: 620/463, loss: 0.01665964536368847 2023-01-24 03:00:13.191595: step: 622/463, loss: 0.02709885872900486 2023-01-24 03:00:13.728873: step: 624/463, loss: 0.003725471207872033 2023-01-24 03:00:14.310140: step: 626/463, loss: 0.007334047928452492 2023-01-24 03:00:14.947125: step: 628/463, loss: 0.06844276934862137 2023-01-24 03:00:15.529755: step: 630/463, loss: 0.0035458628553897142 2023-01-24 03:00:16.150732: step: 632/463, loss: 0.022977720946073532 2023-01-24 03:00:16.882389: step: 634/463, loss: 0.1353224813938141 2023-01-24 03:00:17.547676: step: 636/463, loss: 0.04656210541725159 2023-01-24 03:00:18.238924: step: 638/463, loss: 0.004388928879052401 2023-01-24 03:00:18.847997: step: 640/463, loss: 0.014378667809069157 2023-01-24 03:00:19.418836: step: 642/463, loss: 0.08394616842269897 2023-01-24 03:00:20.134782: step: 644/463, loss: 0.06383013725280762 2023-01-24 03:00:20.780057: step: 646/463, loss: 0.0629916712641716 2023-01-24 03:00:21.351126: step: 648/463, loss: 0.02984936162829399 2023-01-24 03:00:21.934596: step: 650/463, loss: 0.010352713987231255 2023-01-24 03:00:22.672646: step: 652/463, loss: 0.01131836324930191 2023-01-24 03:00:23.341111: step: 654/463, loss: 0.053480587899684906 2023-01-24 03:00:23.964188: step: 656/463, loss: 0.021115224808454514 2023-01-24 03:00:24.558258: step: 658/463, loss: 0.38840916752815247 2023-01-24 03:00:25.170769: step: 660/463, loss: 0.031535156071186066 2023-01-24 03:00:25.786323: step: 662/463, loss: 0.007785444613546133 2023-01-24 03:00:26.421691: step: 664/463, loss: 0.016515525057911873 2023-01-24 03:00:27.033389: step: 666/463, loss: 0.04285736009478569 2023-01-24 03:00:27.566768: step: 668/463, loss: 0.1247667446732521 2023-01-24 03:00:28.199823: step: 670/463, loss: 0.027118753641843796 2023-01-24 03:00:28.786953: step: 672/463, loss: 0.011570295318961143 2023-01-24 03:00:29.393348: step: 674/463, loss: 0.03898542374372482 2023-01-24 03:00:29.990007: step: 676/463, loss: 0.059527862817049026 2023-01-24 03:00:30.516645: step: 678/463, loss: 0.007822910323739052 2023-01-24 03:00:31.163301: step: 680/463, loss: 0.02750343270599842 2023-01-24 03:00:31.795622: step: 682/463, loss: 0.053470540791749954 2023-01-24 03:00:32.396354: step: 684/463, loss: 0.00998302735388279 2023-01-24 03:00:33.032801: step: 686/463, loss: 0.04914086312055588 2023-01-24 03:00:33.687325: step: 688/463, loss: 0.049978163093328476 2023-01-24 03:00:34.311534: step: 690/463, loss: 0.003041710937395692 2023-01-24 03:00:34.889095: step: 692/463, loss: 0.010977196507155895 2023-01-24 03:00:35.483874: step: 694/463, loss: 0.18136140704154968 2023-01-24 03:00:36.141469: step: 696/463, loss: 0.053340643644332886 2023-01-24 03:00:36.750538: step: 698/463, loss: 0.03233860060572624 2023-01-24 03:00:37.377867: step: 700/463, loss: 0.029896695166826248 2023-01-24 03:00:38.025871: step: 702/463, loss: 0.07791373133659363 2023-01-24 03:00:38.636049: step: 704/463, loss: 0.033302806317806244 2023-01-24 03:00:39.323733: step: 706/463, loss: 0.01567240059375763 2023-01-24 03:00:39.958453: step: 708/463, loss: 0.07613213360309601 2023-01-24 03:00:40.677425: step: 710/463, loss: 0.00995422713458538 2023-01-24 03:00:41.253016: step: 712/463, loss: 0.016865411773324013 2023-01-24 03:00:41.877332: step: 714/463, loss: 0.04179783910512924 2023-01-24 03:00:42.500531: step: 716/463, loss: 0.0659717470407486 2023-01-24 03:00:43.133195: step: 718/463, loss: 0.014638642780482769 2023-01-24 03:00:43.774541: step: 720/463, loss: 0.13202062249183655 2023-01-24 03:00:44.375682: step: 722/463, loss: 0.013809260912239552 2023-01-24 03:00:45.019304: step: 724/463, loss: 0.051346078515052795 2023-01-24 03:00:45.663761: step: 726/463, loss: 0.08912888914346695 2023-01-24 03:00:46.334037: step: 728/463, loss: 0.03262588009238243 2023-01-24 03:00:46.951222: step: 730/463, loss: 0.04851076379418373 2023-01-24 03:00:47.529309: step: 732/463, loss: 0.001667431672103703 2023-01-24 03:00:48.240368: step: 734/463, loss: 0.010037771426141262 2023-01-24 03:00:48.848485: step: 736/463, loss: 0.00821502972394228 2023-01-24 03:00:49.479215: step: 738/463, loss: 0.0088306600227952 2023-01-24 03:00:50.121645: step: 740/463, loss: 0.161117285490036 2023-01-24 03:00:50.760346: step: 742/463, loss: 0.07299044728279114 2023-01-24 03:00:51.454351: step: 744/463, loss: 0.01481643971055746 2023-01-24 03:00:52.114069: step: 746/463, loss: 0.07599938660860062 2023-01-24 03:00:52.796714: step: 748/463, loss: 0.07450322061777115 2023-01-24 03:00:53.362875: step: 750/463, loss: 0.00329537782818079 2023-01-24 03:00:53.975815: step: 752/463, loss: 0.4358955919742584 2023-01-24 03:00:54.576493: step: 754/463, loss: 0.17394433915615082 2023-01-24 03:00:55.122931: step: 756/463, loss: 0.05408826470375061 2023-01-24 03:00:55.774138: step: 758/463, loss: 0.1048794835805893 2023-01-24 03:00:56.410406: step: 760/463, loss: 0.4868112802505493 2023-01-24 03:00:57.062534: step: 762/463, loss: 0.07624334841966629 2023-01-24 03:00:57.695414: step: 764/463, loss: 0.04283759370446205 2023-01-24 03:00:58.305520: step: 766/463, loss: 0.0119438786059618 2023-01-24 03:00:58.948455: step: 768/463, loss: 0.07219856977462769 2023-01-24 03:00:59.519824: step: 770/463, loss: 0.026810215786099434 2023-01-24 03:01:00.120740: step: 772/463, loss: 0.011506829410791397 2023-01-24 03:01:00.752433: step: 774/463, loss: 0.07864825427532196 2023-01-24 03:01:01.373067: step: 776/463, loss: 0.04946066811680794 2023-01-24 03:01:01.963667: step: 778/463, loss: 0.007479182444512844 2023-01-24 03:01:02.616565: step: 780/463, loss: 0.023926807567477226 2023-01-24 03:01:03.247951: step: 782/463, loss: 0.07274968177080154 2023-01-24 03:01:03.838242: step: 784/463, loss: 0.00914521049708128 2023-01-24 03:01:04.432825: step: 786/463, loss: 0.021941477432847023 2023-01-24 03:01:04.997812: step: 788/463, loss: 0.020317258313298225 2023-01-24 03:01:05.578968: step: 790/463, loss: 0.028045807033777237 2023-01-24 03:01:06.282931: step: 792/463, loss: 0.07886797189712524 2023-01-24 03:01:07.041843: step: 794/463, loss: 0.028771137818694115 2023-01-24 03:01:07.727015: step: 796/463, loss: 0.09927427768707275 2023-01-24 03:01:08.287767: step: 798/463, loss: 0.027209876105189323 2023-01-24 03:01:08.925783: step: 800/463, loss: 0.02445671521127224 2023-01-24 03:01:09.531772: step: 802/463, loss: 0.04573247954249382 2023-01-24 03:01:10.173795: step: 804/463, loss: 0.045140888541936874 2023-01-24 03:01:10.794731: step: 806/463, loss: 0.00567480456084013 2023-01-24 03:01:11.456868: step: 808/463, loss: 0.017955539748072624 2023-01-24 03:01:12.099700: step: 810/463, loss: 0.03771361708641052 2023-01-24 03:01:12.736455: step: 812/463, loss: 0.05207766219973564 2023-01-24 03:01:13.332850: step: 814/463, loss: 0.49033501744270325 2023-01-24 03:01:13.905157: step: 816/463, loss: 0.09133315831422806 2023-01-24 03:01:14.487338: step: 818/463, loss: 0.04222963750362396 2023-01-24 03:01:15.141329: step: 820/463, loss: 0.012394051998853683 2023-01-24 03:01:15.706555: step: 822/463, loss: 0.06862370669841766 2023-01-24 03:01:16.365091: step: 824/463, loss: 0.3051982820034027 2023-01-24 03:01:17.002196: step: 826/463, loss: 0.02007221430540085 2023-01-24 03:01:17.630023: step: 828/463, loss: 0.01869572326540947 2023-01-24 03:01:18.243011: step: 830/463, loss: 2.292128801345825 2023-01-24 03:01:18.881863: step: 832/463, loss: 0.12170281261205673 2023-01-24 03:01:19.471908: step: 834/463, loss: 0.027322562411427498 2023-01-24 03:01:20.134223: step: 836/463, loss: 0.02945788949728012 2023-01-24 03:01:20.719149: step: 838/463, loss: 0.0032249148935079575 2023-01-24 03:01:21.292384: step: 840/463, loss: 0.1998729556798935 2023-01-24 03:01:21.947624: step: 842/463, loss: 0.01237794104963541 2023-01-24 03:01:22.536795: step: 844/463, loss: 0.051994867622852325 2023-01-24 03:01:23.121097: step: 846/463, loss: 0.030856935307383537 2023-01-24 03:01:23.751620: step: 848/463, loss: 0.025783058255910873 2023-01-24 03:01:24.399967: step: 850/463, loss: 0.3581508696079254 2023-01-24 03:01:25.059615: step: 852/463, loss: 0.6835121512413025 2023-01-24 03:01:25.660856: step: 854/463, loss: 0.04245099052786827 2023-01-24 03:01:26.302403: step: 856/463, loss: 0.006474511232227087 2023-01-24 03:01:26.907308: step: 858/463, loss: 0.0609377883374691 2023-01-24 03:01:27.463661: step: 860/463, loss: 0.025864077731966972 2023-01-24 03:01:28.058093: step: 862/463, loss: 0.006679542362689972 2023-01-24 03:01:28.666464: step: 864/463, loss: 0.014713131822645664 2023-01-24 03:01:29.262178: step: 866/463, loss: 0.028294630348682404 2023-01-24 03:01:29.922580: step: 868/463, loss: 0.0871676504611969 2023-01-24 03:01:30.456453: step: 870/463, loss: 0.004147926811128855 2023-01-24 03:01:31.061118: step: 872/463, loss: 0.038954958319664 2023-01-24 03:01:31.626421: step: 874/463, loss: 0.0804627537727356 2023-01-24 03:01:32.252962: step: 876/463, loss: 0.3137335181236267 2023-01-24 03:01:32.865486: step: 878/463, loss: 0.03683378919959068 2023-01-24 03:01:33.535004: step: 880/463, loss: 0.008188140578567982 2023-01-24 03:01:34.252188: step: 882/463, loss: 0.005750198848545551 2023-01-24 03:01:34.932301: step: 884/463, loss: 0.006674158852547407 2023-01-24 03:01:35.573300: step: 886/463, loss: 0.08540567010641098 2023-01-24 03:01:36.209698: step: 888/463, loss: 0.017784301191568375 2023-01-24 03:01:36.821666: step: 890/463, loss: 0.07429122179746628 2023-01-24 03:01:37.421727: step: 892/463, loss: 0.029044259339571 2023-01-24 03:01:38.093948: step: 894/463, loss: 0.04374846816062927 2023-01-24 03:01:38.789620: step: 896/463, loss: 0.020005352795124054 2023-01-24 03:01:39.428825: step: 898/463, loss: 0.01138212624937296 2023-01-24 03:01:40.035917: step: 900/463, loss: 0.13206884264945984 2023-01-24 03:01:40.606352: step: 902/463, loss: 0.0037485677748918533 2023-01-24 03:01:41.216858: step: 904/463, loss: 0.05094145983457565 2023-01-24 03:01:41.910185: step: 906/463, loss: 0.0736050233244896 2023-01-24 03:01:42.539142: step: 908/463, loss: 0.05084287375211716 2023-01-24 03:01:43.176939: step: 910/463, loss: 0.057781118899583817 2023-01-24 03:01:43.774617: step: 912/463, loss: 0.016191350296139717 2023-01-24 03:01:44.343439: step: 914/463, loss: 0.025147559121251106 2023-01-24 03:01:44.905362: step: 916/463, loss: 0.019232554361224174 2023-01-24 03:01:45.484385: step: 918/463, loss: 0.05542212352156639 2023-01-24 03:01:46.087622: step: 920/463, loss: 0.010585951618850231 2023-01-24 03:01:46.775110: step: 922/463, loss: 0.03650703653693199 2023-01-24 03:01:47.370724: step: 924/463, loss: 0.03709341958165169 2023-01-24 03:01:47.971190: step: 926/463, loss: 0.01807141862809658 ================================================== Loss: 0.096 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36603739705037347, 'r': 0.31533392459368037, 'f1': 0.33879914018525903}, 'combined': 0.249641471715454, 'epoch': 24} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35037406624003964, 'r': 0.38032332006386654, 'f1': 0.36473492483868386}, 'combined': 0.2827131953295062, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3517167289808356, 'r': 0.32635575042054765, 'f1': 0.3385619694323398}, 'combined': 0.24946671431856615, 'epoch': 24} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3356653730437814, 'r': 0.38410237999954766, 'f1': 0.35825408438877654}, 'combined': 0.27768976876067847, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34476681362378975, 'r': 0.3153275221378874, 'f1': 0.3293906921043938}, 'combined': 0.24270893102429017, 'epoch': 24} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3410159543746614, 'r': 0.38144891220033356, 'f1': 0.3601010121249136}, 'combined': 0.27912135868055504, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3485576923076923, 'r': 0.25892857142857145, 'f1': 0.29713114754098363}, 'combined': 0.19808743169398907, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.34615384615384615, 'r': 0.391304347826087, 'f1': 0.36734693877551017}, 'combined': 0.18367346938775508, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.625, 'r': 0.1724137931034483, 'f1': 0.2702702702702703}, 'combined': 0.18018018018018017, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:04:24.333088: step: 2/463, loss: 0.021109038963913918 2023-01-24 03:04:24.918774: step: 4/463, loss: 0.015032060444355011 2023-01-24 03:04:25.524741: step: 6/463, loss: 0.004206069279462099 2023-01-24 03:04:26.100144: step: 8/463, loss: 0.06405909359455109 2023-01-24 03:04:26.810098: step: 10/463, loss: 0.031621113419532776 2023-01-24 03:04:27.395813: step: 12/463, loss: 0.024908708408474922 2023-01-24 03:04:27.933192: step: 14/463, loss: 0.006067849230021238 2023-01-24 03:04:28.555696: step: 16/463, loss: 0.007873881608247757 2023-01-24 03:04:29.214126: step: 18/463, loss: 0.05631072819232941 2023-01-24 03:04:29.781683: step: 20/463, loss: 0.05310551077127457 2023-01-24 03:04:30.381372: step: 22/463, loss: 0.008314433507621288 2023-01-24 03:04:30.940791: step: 24/463, loss: 0.021149680018424988 2023-01-24 03:04:31.537778: step: 26/463, loss: 0.03197513520717621 2023-01-24 03:04:32.145779: step: 28/463, loss: 0.037717562168836594 2023-01-24 03:04:32.767972: step: 30/463, loss: 0.07420859485864639 2023-01-24 03:04:33.379889: step: 32/463, loss: 0.018345043063163757 2023-01-24 03:04:33.935421: step: 34/463, loss: 0.003830455709248781 2023-01-24 03:04:34.555868: step: 36/463, loss: 0.05385967344045639 2023-01-24 03:04:35.288114: step: 38/463, loss: 0.00831892341375351 2023-01-24 03:04:35.872123: step: 40/463, loss: 0.0027974292170256376 2023-01-24 03:04:36.516994: step: 42/463, loss: 0.01534788403660059 2023-01-24 03:04:37.317301: step: 44/463, loss: 0.0005573664093390107 2023-01-24 03:04:37.916957: step: 46/463, loss: 0.010175914503633976 2023-01-24 03:04:38.556949: step: 48/463, loss: 0.08881822228431702 2023-01-24 03:04:39.127094: step: 50/463, loss: 0.02033882401883602 2023-01-24 03:04:39.701447: step: 52/463, loss: 0.016143348067998886 2023-01-24 03:04:40.324465: step: 54/463, loss: 0.7632724046707153 2023-01-24 03:04:40.922624: step: 56/463, loss: 0.14287510514259338 2023-01-24 03:04:41.507893: step: 58/463, loss: 0.10106900334358215 2023-01-24 03:04:42.161435: step: 60/463, loss: 0.23967446386814117 2023-01-24 03:04:42.813640: step: 62/463, loss: 0.047041356563568115 2023-01-24 03:04:43.406981: step: 64/463, loss: 0.005142926704138517 2023-01-24 03:04:44.077718: step: 66/463, loss: 0.013033051043748856 2023-01-24 03:04:44.766181: step: 68/463, loss: 0.015337551012635231 2023-01-24 03:04:45.352684: step: 70/463, loss: 0.0034709018655121326 2023-01-24 03:04:45.970305: step: 72/463, loss: 0.024042075499892235 2023-01-24 03:04:46.537372: step: 74/463, loss: 0.12544138729572296 2023-01-24 03:04:47.134829: step: 76/463, loss: 0.080272376537323 2023-01-24 03:04:47.695145: step: 78/463, loss: 0.006036247126758099 2023-01-24 03:04:48.278772: step: 80/463, loss: 0.06610135734081268 2023-01-24 03:04:48.911003: step: 82/463, loss: 0.0021496752742677927 2023-01-24 03:04:49.546786: step: 84/463, loss: 0.014056055806577206 2023-01-24 03:04:50.184307: step: 86/463, loss: 0.1626308709383011 2023-01-24 03:04:50.756203: step: 88/463, loss: 0.03438613563776016 2023-01-24 03:04:51.362649: step: 90/463, loss: 0.001762519939802587 2023-01-24 03:04:51.932392: step: 92/463, loss: 0.34651169180870056 2023-01-24 03:04:52.592916: step: 94/463, loss: 0.0530976839363575 2023-01-24 03:04:53.162267: step: 96/463, loss: 0.006734954193234444 2023-01-24 03:04:53.838980: step: 98/463, loss: 0.03821629285812378 2023-01-24 03:04:54.426294: step: 100/463, loss: 0.03260607272386551 2023-01-24 03:04:55.072872: step: 102/463, loss: 0.005948987323790789 2023-01-24 03:04:55.654708: step: 104/463, loss: 0.006378699094057083 2023-01-24 03:04:56.284997: step: 106/463, loss: 0.018693476915359497 2023-01-24 03:04:56.900324: step: 108/463, loss: 0.32867875695228577 2023-01-24 03:04:57.529245: step: 110/463, loss: 0.05938457325100899 2023-01-24 03:04:58.131768: step: 112/463, loss: 0.19775789976119995 2023-01-24 03:04:58.810180: step: 114/463, loss: 0.041698336601257324 2023-01-24 03:04:59.396215: step: 116/463, loss: 0.00804909598082304 2023-01-24 03:05:00.049635: step: 118/463, loss: 0.003847773652523756 2023-01-24 03:05:00.699343: step: 120/463, loss: 0.02876480296254158 2023-01-24 03:05:01.342582: step: 122/463, loss: 0.08666608482599258 2023-01-24 03:05:02.020602: step: 124/463, loss: 0.01279232744127512 2023-01-24 03:05:02.615379: step: 126/463, loss: 0.004803666844964027 2023-01-24 03:05:03.277137: step: 128/463, loss: 0.05707241967320442 2023-01-24 03:05:03.878854: step: 130/463, loss: 0.007230275776237249 2023-01-24 03:05:04.519721: step: 132/463, loss: 0.01328516285866499 2023-01-24 03:05:05.113799: step: 134/463, loss: 0.017243817448616028 2023-01-24 03:05:05.760275: step: 136/463, loss: 0.08714798837900162 2023-01-24 03:05:06.346192: step: 138/463, loss: 0.008012217469513416 2023-01-24 03:05:06.957216: step: 140/463, loss: 0.012429896742105484 2023-01-24 03:05:07.609180: step: 142/463, loss: 0.04306786507368088 2023-01-24 03:05:08.239776: step: 144/463, loss: 0.014547363854944706 2023-01-24 03:05:08.877762: step: 146/463, loss: 0.04231032729148865 2023-01-24 03:05:09.509705: step: 148/463, loss: 0.019984044134616852 2023-01-24 03:05:10.100372: step: 150/463, loss: 0.08253974467515945 2023-01-24 03:05:10.654685: step: 152/463, loss: 0.0031620506197214127 2023-01-24 03:05:11.296982: step: 154/463, loss: 0.21046596765518188 2023-01-24 03:05:11.937640: step: 156/463, loss: 0.36317431926727295 2023-01-24 03:05:12.567084: step: 158/463, loss: 0.014552735723555088 2023-01-24 03:05:13.187428: step: 160/463, loss: 0.03379379212856293 2023-01-24 03:05:13.744800: step: 162/463, loss: 0.007758683990687132 2023-01-24 03:05:14.364048: step: 164/463, loss: 0.037862636148929596 2023-01-24 03:05:14.914601: step: 166/463, loss: 0.11231420934200287 2023-01-24 03:05:15.503596: step: 168/463, loss: 0.7467936873435974 2023-01-24 03:05:16.095114: step: 170/463, loss: 0.036791544407606125 2023-01-24 03:05:16.723491: step: 172/463, loss: 0.06604337692260742 2023-01-24 03:05:17.356219: step: 174/463, loss: 0.05442347005009651 2023-01-24 03:05:18.014613: step: 176/463, loss: 0.018999068066477776 2023-01-24 03:05:18.604642: step: 178/463, loss: 0.027576731517910957 2023-01-24 03:05:19.283879: step: 180/463, loss: 0.013368839398026466 2023-01-24 03:05:19.814139: step: 182/463, loss: 0.03161909058690071 2023-01-24 03:05:20.597338: step: 184/463, loss: 0.39545440673828125 2023-01-24 03:05:21.212590: step: 186/463, loss: 0.024257712066173553 2023-01-24 03:05:21.820122: step: 188/463, loss: 0.013289108872413635 2023-01-24 03:05:22.457521: step: 190/463, loss: 0.02358366549015045 2023-01-24 03:05:23.103465: step: 192/463, loss: 0.023323222994804382 2023-01-24 03:05:23.717975: step: 194/463, loss: 0.03617088869214058 2023-01-24 03:05:24.301917: step: 196/463, loss: 0.4803995192050934 2023-01-24 03:05:24.922421: step: 198/463, loss: 0.012015839107334614 2023-01-24 03:05:25.530958: step: 200/463, loss: 0.009398750960826874 2023-01-24 03:05:26.137770: step: 202/463, loss: 0.0317588709294796 2023-01-24 03:05:26.778686: step: 204/463, loss: 0.026876982301473618 2023-01-24 03:05:27.438158: step: 206/463, loss: 0.014062246307730675 2023-01-24 03:05:28.033535: step: 208/463, loss: 0.01816861890256405 2023-01-24 03:05:28.692634: step: 210/463, loss: 0.005815466400235891 2023-01-24 03:05:29.315160: step: 212/463, loss: 0.0068742139264941216 2023-01-24 03:05:29.975341: step: 214/463, loss: 0.10584881156682968 2023-01-24 03:05:30.564311: step: 216/463, loss: 0.0007289816858246922 2023-01-24 03:05:31.127344: step: 218/463, loss: 0.004263969603925943 2023-01-24 03:05:31.727969: step: 220/463, loss: 0.24089030921459198 2023-01-24 03:05:32.282455: step: 222/463, loss: 0.003271252615377307 2023-01-24 03:05:32.964156: step: 224/463, loss: 0.007085755467414856 2023-01-24 03:05:33.628237: step: 226/463, loss: 0.05333036184310913 2023-01-24 03:05:34.261511: step: 228/463, loss: 0.019386721774935722 2023-01-24 03:05:34.876043: step: 230/463, loss: 0.005321658216416836 2023-01-24 03:05:35.458478: step: 232/463, loss: 0.075748972594738 2023-01-24 03:05:36.012084: step: 234/463, loss: 0.005093716084957123 2023-01-24 03:05:36.662466: step: 236/463, loss: 0.03959015756845474 2023-01-24 03:05:37.225749: step: 238/463, loss: 0.03758808225393295 2023-01-24 03:05:37.834936: step: 240/463, loss: 0.023736048489809036 2023-01-24 03:05:38.527705: step: 242/463, loss: 0.007279905490577221 2023-01-24 03:05:39.101508: step: 244/463, loss: 0.01607891544699669 2023-01-24 03:05:39.741060: step: 246/463, loss: 0.052973464131355286 2023-01-24 03:05:40.326240: step: 248/463, loss: 0.1929202824831009 2023-01-24 03:05:40.917943: step: 250/463, loss: 0.009210491552948952 2023-01-24 03:05:41.529273: step: 252/463, loss: 0.012920624576508999 2023-01-24 03:05:42.164784: step: 254/463, loss: 0.062179360538721085 2023-01-24 03:05:42.830647: step: 256/463, loss: 0.03255145251750946 2023-01-24 03:05:43.423654: step: 258/463, loss: 0.03749460354447365 2023-01-24 03:05:44.023323: step: 260/463, loss: 0.05946195498108864 2023-01-24 03:05:44.627316: step: 262/463, loss: 0.014497810043394566 2023-01-24 03:05:45.206888: step: 264/463, loss: 0.007724605035036802 2023-01-24 03:05:45.846073: step: 266/463, loss: 0.015686891973018646 2023-01-24 03:05:46.542929: step: 268/463, loss: 0.0695074126124382 2023-01-24 03:05:47.210362: step: 270/463, loss: 0.07650492340326309 2023-01-24 03:05:47.742924: step: 272/463, loss: 0.01807987131178379 2023-01-24 03:05:48.400640: step: 274/463, loss: 0.04756436124444008 2023-01-24 03:05:49.022952: step: 276/463, loss: 0.055589038878679276 2023-01-24 03:05:49.592767: step: 278/463, loss: 0.054017044603824615 2023-01-24 03:05:50.193693: step: 280/463, loss: 0.06430082768201828 2023-01-24 03:05:50.812714: step: 282/463, loss: 0.006243611220270395 2023-01-24 03:05:51.394355: step: 284/463, loss: 0.05633990466594696 2023-01-24 03:05:52.066649: step: 286/463, loss: 0.005521700717508793 2023-01-24 03:05:52.667647: step: 288/463, loss: 0.04734700173139572 2023-01-24 03:05:53.286106: step: 290/463, loss: 0.0034006533678621054 2023-01-24 03:05:53.948944: step: 292/463, loss: 0.032506439834833145 2023-01-24 03:05:54.586230: step: 294/463, loss: 0.010944775305688381 2023-01-24 03:05:55.152974: step: 296/463, loss: 0.21538224816322327 2023-01-24 03:05:55.763424: step: 298/463, loss: 0.002352335723116994 2023-01-24 03:05:56.373746: step: 300/463, loss: 0.018740560859441757 2023-01-24 03:05:57.046047: step: 302/463, loss: 0.03530663996934891 2023-01-24 03:05:57.642357: step: 304/463, loss: 0.015676328912377357 2023-01-24 03:05:58.312980: step: 306/463, loss: 0.018409233540296555 2023-01-24 03:05:58.939610: step: 308/463, loss: 0.006620985455811024 2023-01-24 03:05:59.592463: step: 310/463, loss: 0.2368318885564804 2023-01-24 03:06:00.177849: step: 312/463, loss: 0.08788546919822693 2023-01-24 03:06:00.806633: step: 314/463, loss: 0.49794620275497437 2023-01-24 03:06:01.392658: step: 316/463, loss: 0.1598878800868988 2023-01-24 03:06:02.046831: step: 318/463, loss: 0.024894490838050842 2023-01-24 03:06:02.688725: step: 320/463, loss: 0.004775006789714098 2023-01-24 03:06:03.373744: step: 322/463, loss: 0.08404680341482162 2023-01-24 03:06:03.971723: step: 324/463, loss: 0.0188343096524477 2023-01-24 03:06:04.541101: step: 326/463, loss: 0.10189911723136902 2023-01-24 03:06:05.235564: step: 328/463, loss: 1.1384214162826538 2023-01-24 03:06:05.934527: step: 330/463, loss: 0.12365762144327164 2023-01-24 03:06:06.577465: step: 332/463, loss: 0.01701941154897213 2023-01-24 03:06:07.171405: step: 334/463, loss: 0.13388878107070923 2023-01-24 03:06:07.734318: step: 336/463, loss: 0.018471360206604004 2023-01-24 03:06:08.325314: step: 338/463, loss: 0.002054607030004263 2023-01-24 03:06:08.953506: step: 340/463, loss: 0.11944381892681122 2023-01-24 03:06:09.497633: step: 342/463, loss: 0.0077651264145970345 2023-01-24 03:06:10.138905: step: 344/463, loss: 0.020249146968126297 2023-01-24 03:06:10.737654: step: 346/463, loss: 0.022314513102173805 2023-01-24 03:06:11.337187: step: 348/463, loss: 0.04259047284722328 2023-01-24 03:06:11.890799: step: 350/463, loss: 0.025952594354748726 2023-01-24 03:06:12.580638: step: 352/463, loss: 0.06064560264348984 2023-01-24 03:06:13.164912: step: 354/463, loss: 0.021432967856526375 2023-01-24 03:06:13.851526: step: 356/463, loss: 0.055798523128032684 2023-01-24 03:06:14.491737: step: 358/463, loss: 0.008654449135065079 2023-01-24 03:06:15.115868: step: 360/463, loss: 0.017986932769417763 2023-01-24 03:06:15.733082: step: 362/463, loss: 0.017297813668847084 2023-01-24 03:06:16.353277: step: 364/463, loss: 0.06619615107774734 2023-01-24 03:06:16.920918: step: 366/463, loss: 0.07104445993900299 2023-01-24 03:06:17.554708: step: 368/463, loss: 0.0013265646994113922 2023-01-24 03:06:18.215283: step: 370/463, loss: 0.0579276867210865 2023-01-24 03:06:18.789120: step: 372/463, loss: 0.29696202278137207 2023-01-24 03:06:19.442914: step: 374/463, loss: 0.011779862456023693 2023-01-24 03:06:20.058876: step: 376/463, loss: 0.0030276374891400337 2023-01-24 03:06:20.687357: step: 378/463, loss: 0.04143163561820984 2023-01-24 03:06:21.302953: step: 380/463, loss: 0.046900972723960876 2023-01-24 03:06:21.875541: step: 382/463, loss: 0.009964827448129654 2023-01-24 03:06:22.514649: step: 384/463, loss: 0.05939556285738945 2023-01-24 03:06:23.109093: step: 386/463, loss: 0.018957680091261864 2023-01-24 03:06:23.672867: step: 388/463, loss: 0.033746786415576935 2023-01-24 03:06:24.299188: step: 390/463, loss: 0.15031953155994415 2023-01-24 03:06:24.874845: step: 392/463, loss: 0.063644178211689 2023-01-24 03:06:25.530570: step: 394/463, loss: 0.03243599832057953 2023-01-24 03:06:26.103602: step: 396/463, loss: 0.1770835518836975 2023-01-24 03:06:26.718590: step: 398/463, loss: 0.010382718406617641 2023-01-24 03:06:27.292316: step: 400/463, loss: 0.06889568269252777 2023-01-24 03:06:27.961362: step: 402/463, loss: 0.02138284407556057 2023-01-24 03:06:28.645646: step: 404/463, loss: 0.11205138266086578 2023-01-24 03:06:29.222440: step: 406/463, loss: 0.0032653072848916054 2023-01-24 03:06:29.846490: step: 408/463, loss: 0.00014596592518500984 2023-01-24 03:06:30.512640: step: 410/463, loss: 0.04450834542512894 2023-01-24 03:06:31.139593: step: 412/463, loss: 0.06521690636873245 2023-01-24 03:06:31.761481: step: 414/463, loss: 0.00714716874063015 2023-01-24 03:06:32.389913: step: 416/463, loss: 0.0194182638078928 2023-01-24 03:06:33.021300: step: 418/463, loss: 0.0040181344375014305 2023-01-24 03:06:33.624220: step: 420/463, loss: 0.012683051638305187 2023-01-24 03:06:34.202455: step: 422/463, loss: 0.013107290491461754 2023-01-24 03:06:34.821915: step: 424/463, loss: 0.013452489860355854 2023-01-24 03:06:35.371555: step: 426/463, loss: 0.0151299349963665 2023-01-24 03:06:35.948904: step: 428/463, loss: 0.011891954578459263 2023-01-24 03:06:36.497003: step: 430/463, loss: 0.04470183327794075 2023-01-24 03:06:37.130965: step: 432/463, loss: 0.02655380591750145 2023-01-24 03:06:37.717872: step: 434/463, loss: 0.0958181768655777 2023-01-24 03:06:38.313528: step: 436/463, loss: 0.018091343343257904 2023-01-24 03:06:38.896346: step: 438/463, loss: 0.044184423983097076 2023-01-24 03:06:39.476138: step: 440/463, loss: 0.014248855412006378 2023-01-24 03:06:40.015902: step: 442/463, loss: 0.01624976098537445 2023-01-24 03:06:40.612510: step: 444/463, loss: 0.04363221302628517 2023-01-24 03:06:41.263194: step: 446/463, loss: 0.00930438656359911 2023-01-24 03:06:41.912344: step: 448/463, loss: 0.047498274594545364 2023-01-24 03:06:42.494022: step: 450/463, loss: 0.017569968476891518 2023-01-24 03:06:43.077855: step: 452/463, loss: 0.012686186470091343 2023-01-24 03:06:43.694883: step: 454/463, loss: 0.003199036465957761 2023-01-24 03:06:44.297059: step: 456/463, loss: 0.02025398425757885 2023-01-24 03:06:44.903563: step: 458/463, loss: 0.010163580998778343 2023-01-24 03:06:45.540520: step: 460/463, loss: 0.11155115813016891 2023-01-24 03:06:46.227070: step: 462/463, loss: 0.023705290630459785 2023-01-24 03:06:46.780449: step: 464/463, loss: 0.012525953352451324 2023-01-24 03:06:47.402158: step: 466/463, loss: 0.025331133976578712 2023-01-24 03:06:48.025028: step: 468/463, loss: 0.012545017525553703 2023-01-24 03:06:48.686855: step: 470/463, loss: 0.0689067617058754 2023-01-24 03:06:49.283125: step: 472/463, loss: 0.0471944734454155 2023-01-24 03:06:49.903255: step: 474/463, loss: 0.04190956801176071 2023-01-24 03:06:50.519975: step: 476/463, loss: 0.03935954347252846 2023-01-24 03:06:51.185779: step: 478/463, loss: 0.8625763058662415 2023-01-24 03:06:51.757866: step: 480/463, loss: 0.058481764048337936 2023-01-24 03:06:52.372895: step: 482/463, loss: 0.06489741802215576 2023-01-24 03:06:52.995865: step: 484/463, loss: 0.032589737325906754 2023-01-24 03:06:53.601166: step: 486/463, loss: 0.006202941294759512 2023-01-24 03:06:54.165220: step: 488/463, loss: 0.04396497458219528 2023-01-24 03:06:54.820086: step: 490/463, loss: 0.012699656188488007 2023-01-24 03:06:55.437183: step: 492/463, loss: 0.005220841150730848 2023-01-24 03:06:55.999884: step: 494/463, loss: 0.09663787484169006 2023-01-24 03:06:56.625392: step: 496/463, loss: 0.04988407343626022 2023-01-24 03:06:57.291857: step: 498/463, loss: 0.020586606115102768 2023-01-24 03:06:57.857626: step: 500/463, loss: 0.01781546324491501 2023-01-24 03:06:58.512123: step: 502/463, loss: 0.01299281232059002 2023-01-24 03:06:59.138051: step: 504/463, loss: 0.015273337252438068 2023-01-24 03:06:59.721523: step: 506/463, loss: 0.1717660129070282 2023-01-24 03:07:00.333173: step: 508/463, loss: 0.1285363733768463 2023-01-24 03:07:00.905359: step: 510/463, loss: 0.03423415124416351 2023-01-24 03:07:01.593252: step: 512/463, loss: 0.014066099189221859 2023-01-24 03:07:02.195521: step: 514/463, loss: 0.03062060847878456 2023-01-24 03:07:02.749867: step: 516/463, loss: 0.042481083422899246 2023-01-24 03:07:03.411838: step: 518/463, loss: 0.04198916628956795 2023-01-24 03:07:04.007724: step: 520/463, loss: 0.004535003565251827 2023-01-24 03:07:04.623850: step: 522/463, loss: 0.007059819996356964 2023-01-24 03:07:05.204364: step: 524/463, loss: 0.027343953028321266 2023-01-24 03:07:05.849121: step: 526/463, loss: 0.030763663351535797 2023-01-24 03:07:06.438803: step: 528/463, loss: 0.08413953334093094 2023-01-24 03:07:07.051367: step: 530/463, loss: 0.002163316821679473 2023-01-24 03:07:07.712316: step: 532/463, loss: 0.041637860238552094 2023-01-24 03:07:08.318365: step: 534/463, loss: 0.19003233313560486 2023-01-24 03:07:08.911004: step: 536/463, loss: 0.04156087338924408 2023-01-24 03:07:09.517787: step: 538/463, loss: 0.02915462851524353 2023-01-24 03:07:10.118277: step: 540/463, loss: 0.000522002053912729 2023-01-24 03:07:10.735868: step: 542/463, loss: 0.1276184618473053 2023-01-24 03:07:11.366912: step: 544/463, loss: 0.0008053510682657361 2023-01-24 03:07:11.987805: step: 546/463, loss: 0.0077027203515172005 2023-01-24 03:07:12.597721: step: 548/463, loss: 0.042398128658533096 2023-01-24 03:07:13.241771: step: 550/463, loss: 0.019852623343467712 2023-01-24 03:07:13.832175: step: 552/463, loss: 0.6999385952949524 2023-01-24 03:07:14.433664: step: 554/463, loss: 0.026163244619965553 2023-01-24 03:07:14.980702: step: 556/463, loss: 0.019119925796985626 2023-01-24 03:07:15.580826: step: 558/463, loss: 0.03396812453866005 2023-01-24 03:07:16.260906: step: 560/463, loss: 0.02410472184419632 2023-01-24 03:07:16.908769: step: 562/463, loss: 0.37036454677581787 2023-01-24 03:07:17.524112: step: 564/463, loss: 0.05091991648077965 2023-01-24 03:07:18.210759: step: 566/463, loss: 0.046076055616140366 2023-01-24 03:07:18.845433: step: 568/463, loss: 0.017489347606897354 2023-01-24 03:07:19.432848: step: 570/463, loss: 0.0038825231604278088 2023-01-24 03:07:20.015172: step: 572/463, loss: 0.19779585301876068 2023-01-24 03:07:20.622320: step: 574/463, loss: 1.21391761302948 2023-01-24 03:07:21.234893: step: 576/463, loss: 0.11828425526618958 2023-01-24 03:07:21.898676: step: 578/463, loss: 0.038361556828022 2023-01-24 03:07:22.575800: step: 580/463, loss: 0.11542002856731415 2023-01-24 03:07:23.155714: step: 582/463, loss: 0.4270938038825989 2023-01-24 03:07:23.789452: step: 584/463, loss: 0.02628210000693798 2023-01-24 03:07:24.344204: step: 586/463, loss: 0.006752349901944399 2023-01-24 03:07:24.990290: step: 588/463, loss: 0.0343557707965374 2023-01-24 03:07:25.641351: step: 590/463, loss: 0.013685652986168861 2023-01-24 03:07:26.270074: step: 592/463, loss: 0.1740642935037613 2023-01-24 03:07:26.902624: step: 594/463, loss: 0.40346136689186096 2023-01-24 03:07:27.563749: step: 596/463, loss: 0.01499965600669384 2023-01-24 03:07:28.202936: step: 598/463, loss: 0.049805257469415665 2023-01-24 03:07:28.892426: step: 600/463, loss: 0.017432285472750664 2023-01-24 03:07:29.497211: step: 602/463, loss: 0.05167895182967186 2023-01-24 03:07:30.102083: step: 604/463, loss: 0.08549889177083969 2023-01-24 03:07:30.832487: step: 606/463, loss: 0.027763158082962036 2023-01-24 03:07:31.452458: step: 608/463, loss: 0.18631170690059662 2023-01-24 03:07:32.034791: step: 610/463, loss: 0.019247034564614296 2023-01-24 03:07:32.708069: step: 612/463, loss: 0.012513337656855583 2023-01-24 03:07:33.283775: step: 614/463, loss: 0.09164801239967346 2023-01-24 03:07:33.882428: step: 616/463, loss: 0.01599210686981678 2023-01-24 03:07:34.588920: step: 618/463, loss: 0.01972571574151516 2023-01-24 03:07:35.158262: step: 620/463, loss: 0.05408007279038429 2023-01-24 03:07:35.769609: step: 622/463, loss: 0.016336945816874504 2023-01-24 03:07:36.390357: step: 624/463, loss: 0.002914158161729574 2023-01-24 03:07:36.991584: step: 626/463, loss: 0.03858227655291557 2023-01-24 03:07:37.674777: step: 628/463, loss: 0.0351007878780365 2023-01-24 03:07:38.251315: step: 630/463, loss: 0.038395386189222336 2023-01-24 03:07:38.862538: step: 632/463, loss: 0.07069990783929825 2023-01-24 03:07:39.442233: step: 634/463, loss: 0.10475531220436096 2023-01-24 03:07:40.072953: step: 636/463, loss: 6.333949565887451 2023-01-24 03:07:40.696988: step: 638/463, loss: 0.04134509339928627 2023-01-24 03:07:41.370166: step: 640/463, loss: 0.04865711182355881 2023-01-24 03:07:42.022266: step: 642/463, loss: 0.019447248429059982 2023-01-24 03:07:42.616867: step: 644/463, loss: 0.01640874147415161 2023-01-24 03:07:43.257622: step: 646/463, loss: 0.015553612262010574 2023-01-24 03:07:43.838111: step: 648/463, loss: 0.004172757733613253 2023-01-24 03:07:44.445933: step: 650/463, loss: 0.0319741889834404 2023-01-24 03:07:44.999296: step: 652/463, loss: 0.02179543673992157 2023-01-24 03:07:45.704121: step: 654/463, loss: 0.014115767553448677 2023-01-24 03:07:46.301601: step: 656/463, loss: 0.0225237924605608 2023-01-24 03:07:46.922120: step: 658/463, loss: 0.05737101659178734 2023-01-24 03:07:47.572689: step: 660/463, loss: 0.09486011415719986 2023-01-24 03:07:48.208305: step: 662/463, loss: 0.04127555713057518 2023-01-24 03:07:48.837339: step: 664/463, loss: 0.019895924255251884 2023-01-24 03:07:49.443121: step: 666/463, loss: 0.0013534717727452517 2023-01-24 03:07:50.044309: step: 668/463, loss: 0.01699584722518921 2023-01-24 03:07:50.658400: step: 670/463, loss: 0.026871955022215843 2023-01-24 03:07:51.280174: step: 672/463, loss: 0.021039508283138275 2023-01-24 03:07:51.908669: step: 674/463, loss: 0.42728284001350403 2023-01-24 03:07:52.520723: step: 676/463, loss: 0.38508719205856323 2023-01-24 03:07:53.083150: step: 678/463, loss: 0.01717173308134079 2023-01-24 03:07:53.640369: step: 680/463, loss: 0.032114140689373016 2023-01-24 03:07:54.254011: step: 682/463, loss: 0.03041275404393673 2023-01-24 03:07:54.883401: step: 684/463, loss: 0.15438437461853027 2023-01-24 03:07:55.456173: step: 686/463, loss: 0.05008997395634651 2023-01-24 03:07:56.039425: step: 688/463, loss: 0.006459483411163092 2023-01-24 03:07:56.760096: step: 690/463, loss: 0.014323408715426922 2023-01-24 03:07:57.374682: step: 692/463, loss: 0.05100478231906891 2023-01-24 03:07:57.997197: step: 694/463, loss: 0.01353941299021244 2023-01-24 03:07:58.577659: step: 696/463, loss: 0.06035999208688736 2023-01-24 03:07:59.174081: step: 698/463, loss: 0.264396607875824 2023-01-24 03:07:59.826079: step: 700/463, loss: 1.3326385021209717 2023-01-24 03:08:00.454059: step: 702/463, loss: 0.01582072116434574 2023-01-24 03:08:01.067766: step: 704/463, loss: 0.04864613711833954 2023-01-24 03:08:01.664337: step: 706/463, loss: 0.07334981113672256 2023-01-24 03:08:02.281734: step: 708/463, loss: 0.066672183573246 2023-01-24 03:08:02.923136: step: 710/463, loss: 0.10230937600135803 2023-01-24 03:08:03.566899: step: 712/463, loss: 0.05611540749669075 2023-01-24 03:08:04.174567: step: 714/463, loss: 0.0199806597083807 2023-01-24 03:08:04.783336: step: 716/463, loss: 0.0004946466651745141 2023-01-24 03:08:05.382116: step: 718/463, loss: 0.008347051218152046 2023-01-24 03:08:06.021696: step: 720/463, loss: 0.03771701082587242 2023-01-24 03:08:06.636497: step: 722/463, loss: 0.04547347500920296 2023-01-24 03:08:07.254783: step: 724/463, loss: 0.008449878543615341 2023-01-24 03:08:07.875308: step: 726/463, loss: 0.07088819146156311 2023-01-24 03:08:08.486268: step: 728/463, loss: 0.0038683980237692595 2023-01-24 03:08:09.125353: step: 730/463, loss: 0.022889219224452972 2023-01-24 03:08:09.759149: step: 732/463, loss: 0.03200816363096237 2023-01-24 03:08:10.326618: step: 734/463, loss: 0.08053930848836899 2023-01-24 03:08:11.013423: step: 736/463, loss: 0.10302331298589706 2023-01-24 03:08:11.664770: step: 738/463, loss: 0.012389500625431538 2023-01-24 03:08:12.373312: step: 740/463, loss: 0.09445179253816605 2023-01-24 03:08:12.900475: step: 742/463, loss: 0.011306442320346832 2023-01-24 03:08:13.552630: step: 744/463, loss: 0.027445048093795776 2023-01-24 03:08:14.244263: step: 746/463, loss: 0.0002367985580349341 2023-01-24 03:08:14.913675: step: 748/463, loss: 0.03331078588962555 2023-01-24 03:08:15.636492: step: 750/463, loss: 0.07283668965101242 2023-01-24 03:08:16.204966: step: 752/463, loss: 0.011207148432731628 2023-01-24 03:08:16.821083: step: 754/463, loss: 0.05406389385461807 2023-01-24 03:08:17.425256: step: 756/463, loss: 0.06374567747116089 2023-01-24 03:08:18.030693: step: 758/463, loss: 0.11186698079109192 2023-01-24 03:08:18.635917: step: 760/463, loss: 0.0008981148712337017 2023-01-24 03:08:19.338559: step: 762/463, loss: 0.03608124330639839 2023-01-24 03:08:19.950075: step: 764/463, loss: 0.062166716903448105 2023-01-24 03:08:20.597300: step: 766/463, loss: 0.010939487256109715 2023-01-24 03:08:21.265545: step: 768/463, loss: 0.025811409577727318 2023-01-24 03:08:21.821484: step: 770/463, loss: 0.006605848204344511 2023-01-24 03:08:22.372131: step: 772/463, loss: 0.023401781916618347 2023-01-24 03:08:23.012628: step: 774/463, loss: 0.06945550441741943 2023-01-24 03:08:23.569630: step: 776/463, loss: 0.005378463305532932 2023-01-24 03:08:24.169220: step: 778/463, loss: 0.0723014697432518 2023-01-24 03:08:24.820100: step: 780/463, loss: 0.015718448907136917 2023-01-24 03:08:25.387123: step: 782/463, loss: 0.007162286899983883 2023-01-24 03:08:26.012271: step: 784/463, loss: 0.13311530649662018 2023-01-24 03:08:26.670851: step: 786/463, loss: 0.0475311279296875 2023-01-24 03:08:27.263832: step: 788/463, loss: 0.5126577615737915 2023-01-24 03:08:27.878939: step: 790/463, loss: 0.014258397743105888 2023-01-24 03:08:28.521485: step: 792/463, loss: 0.014141318388283253 2023-01-24 03:08:29.107520: step: 794/463, loss: 0.00015304111002478749 2023-01-24 03:08:29.713002: step: 796/463, loss: 0.5294798016548157 2023-01-24 03:08:30.298859: step: 798/463, loss: 0.10145343095064163 2023-01-24 03:08:30.963750: step: 800/463, loss: 0.0023837059270590544 2023-01-24 03:08:31.503948: step: 802/463, loss: 0.021642489358782768 2023-01-24 03:08:32.200708: step: 804/463, loss: 0.095314159989357 2023-01-24 03:08:32.858373: step: 806/463, loss: 0.036659009754657745 2023-01-24 03:08:33.499185: step: 808/463, loss: 0.035530366003513336 2023-01-24 03:08:34.091665: step: 810/463, loss: 0.11219654977321625 2023-01-24 03:08:34.720290: step: 812/463, loss: 0.011211954988539219 2023-01-24 03:08:35.306929: step: 814/463, loss: 0.06999460607767105 2023-01-24 03:08:35.961727: step: 816/463, loss: 0.670013964176178 2023-01-24 03:08:36.630186: step: 818/463, loss: 0.057076532393693924 2023-01-24 03:08:37.312768: step: 820/463, loss: 0.021547608077526093 2023-01-24 03:08:37.916908: step: 822/463, loss: 0.0015010681236162782 2023-01-24 03:08:38.526511: step: 824/463, loss: 0.029652509838342667 2023-01-24 03:08:39.140300: step: 826/463, loss: 0.04036566987633705 2023-01-24 03:08:39.748448: step: 828/463, loss: 0.021810103207826614 2023-01-24 03:08:40.358119: step: 830/463, loss: 0.2934378683567047 2023-01-24 03:08:40.967010: step: 832/463, loss: 0.05934227257966995 2023-01-24 03:08:41.564047: step: 834/463, loss: 0.04169616103172302 2023-01-24 03:08:42.206463: step: 836/463, loss: 0.06327860802412033 2023-01-24 03:08:42.851948: step: 838/463, loss: 0.04495702311396599 2023-01-24 03:08:43.482580: step: 840/463, loss: 1.1065717935562134 2023-01-24 03:08:44.098597: step: 842/463, loss: 1.1235878467559814 2023-01-24 03:08:44.769622: step: 844/463, loss: 0.01518157310783863 2023-01-24 03:08:45.455380: step: 846/463, loss: 0.042465049773454666 2023-01-24 03:08:46.020630: step: 848/463, loss: 0.020969387143850327 2023-01-24 03:08:46.598122: step: 850/463, loss: 0.024549081921577454 2023-01-24 03:08:47.321988: step: 852/463, loss: 0.7430814504623413 2023-01-24 03:08:47.934311: step: 854/463, loss: 0.014579113572835922 2023-01-24 03:08:48.587289: step: 856/463, loss: 0.03826327621936798 2023-01-24 03:08:49.162994: step: 858/463, loss: 0.03686225414276123 2023-01-24 03:08:49.761252: step: 860/463, loss: 0.06891360878944397 2023-01-24 03:08:50.357650: step: 862/463, loss: 0.0070788320153951645 2023-01-24 03:08:50.968558: step: 864/463, loss: 0.007806743495166302 2023-01-24 03:08:51.567987: step: 866/463, loss: 0.06779699772596359 2023-01-24 03:08:52.243906: step: 868/463, loss: 0.012989659793674946 2023-01-24 03:08:52.852480: step: 870/463, loss: 0.08983132243156433 2023-01-24 03:08:53.501794: step: 872/463, loss: 0.0053309425711631775 2023-01-24 03:08:54.171122: step: 874/463, loss: 0.0993824377655983 2023-01-24 03:08:54.844000: step: 876/463, loss: 0.010437862016260624 2023-01-24 03:08:55.412001: step: 878/463, loss: 0.02426217496395111 2023-01-24 03:08:56.020111: step: 880/463, loss: 0.0006782116834074259 2023-01-24 03:08:56.619211: step: 882/463, loss: 0.04411047697067261 2023-01-24 03:08:57.225244: step: 884/463, loss: 0.45282459259033203 2023-01-24 03:08:57.828326: step: 886/463, loss: 0.20769591629505157 2023-01-24 03:08:58.430830: step: 888/463, loss: 0.05739240348339081 2023-01-24 03:08:59.056848: step: 890/463, loss: 0.04293574020266533 2023-01-24 03:08:59.707906: step: 892/463, loss: 0.012517374008893967 2023-01-24 03:09:00.340902: step: 894/463, loss: 0.09229957312345505 2023-01-24 03:09:00.974351: step: 896/463, loss: 0.03911591321229935 2023-01-24 03:09:01.603662: step: 898/463, loss: 0.10301031172275543 2023-01-24 03:09:02.239090: step: 900/463, loss: 0.021950650960206985 2023-01-24 03:09:02.830243: step: 902/463, loss: 0.08346524089574814 2023-01-24 03:09:03.463638: step: 904/463, loss: 0.011329183354973793 2023-01-24 03:09:04.066860: step: 906/463, loss: 0.025982363149523735 2023-01-24 03:09:04.688059: step: 908/463, loss: 0.011318699456751347 2023-01-24 03:09:05.302125: step: 910/463, loss: 0.0502050444483757 2023-01-24 03:09:06.038840: step: 912/463, loss: 0.031187161803245544 2023-01-24 03:09:06.700152: step: 914/463, loss: 0.5288580656051636 2023-01-24 03:09:07.335654: step: 916/463, loss: 0.030677052214741707 2023-01-24 03:09:07.929565: step: 918/463, loss: 0.010374422185122967 2023-01-24 03:09:08.512679: step: 920/463, loss: 0.07955756783485413 2023-01-24 03:09:09.161546: step: 922/463, loss: 0.04105282574892044 2023-01-24 03:09:09.778751: step: 924/463, loss: 0.03244278207421303 2023-01-24 03:09:10.395770: step: 926/463, loss: 0.07540711015462875 ================================================== Loss: 0.091 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3583098177954659, 'r': 0.3127561976962321, 'f1': 0.3339868615722681}, 'combined': 0.2460955822111449, 'epoch': 25} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35485330877914667, 'r': 0.3734439692574659, 'f1': 0.3639113645787039}, 'combined': 0.28207483761602886, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34476385751768435, 'r': 0.3244836306048794, 'f1': 0.33431646789593633}, 'combined': 0.24633845002858465, 'epoch': 25} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3404743997104098, 'r': 0.38053021144104626, 'f1': 0.3593896441387659}, 'combined': 0.2785699633994262, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3257337520588276, 'r': 0.30348249005860406, 'f1': 0.3142146802767865}, 'combined': 0.23152660651973742, 'epoch': 25} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.336910374253117, 'r': 0.3619928561598288, 'f1': 0.34900153079476626}, 'combined': 0.27051793296053656, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3161764705882353, 'r': 0.30714285714285716, 'f1': 0.3115942028985507}, 'combined': 0.2077294685990338, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.32608695652173914, 'f1': 0.3191489361702128}, 'combined': 0.1595744680851064, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:11:45.390901: step: 2/463, loss: 0.060164835304021835 2023-01-24 03:11:46.004395: step: 4/463, loss: 0.02558067813515663 2023-01-24 03:11:46.722472: step: 6/463, loss: 0.025403395295143127 2023-01-24 03:11:47.333145: step: 8/463, loss: 0.03196694701910019 2023-01-24 03:11:47.983874: step: 10/463, loss: 0.03795040398836136 2023-01-24 03:11:48.614729: step: 12/463, loss: 0.01664900593459606 2023-01-24 03:11:49.245270: step: 14/463, loss: 0.06506693363189697 2023-01-24 03:11:49.867073: step: 16/463, loss: 0.00960465706884861 2023-01-24 03:11:50.494588: step: 18/463, loss: 0.04162848740816116 2023-01-24 03:11:51.125704: step: 20/463, loss: 0.012118957936763763 2023-01-24 03:11:51.732627: step: 22/463, loss: 0.01889442279934883 2023-01-24 03:11:52.289618: step: 24/463, loss: 0.0547652430832386 2023-01-24 03:11:52.904860: step: 26/463, loss: 0.02215997315943241 2023-01-24 03:11:53.491008: step: 28/463, loss: 0.0023843380622565746 2023-01-24 03:11:54.104319: step: 30/463, loss: 0.005318155977874994 2023-01-24 03:11:54.673206: step: 32/463, loss: 0.016406558454036713 2023-01-24 03:11:55.285038: step: 34/463, loss: 0.003785031149163842 2023-01-24 03:11:55.856641: step: 36/463, loss: 0.0027621760964393616 2023-01-24 03:11:56.506890: step: 38/463, loss: 0.012590077705681324 2023-01-24 03:11:57.155034: step: 40/463, loss: 0.012336171232163906 2023-01-24 03:11:57.723657: step: 42/463, loss: 0.02353564463555813 2023-01-24 03:11:58.362142: step: 44/463, loss: 0.010895566083490849 2023-01-24 03:11:58.997603: step: 46/463, loss: 0.023744475096464157 2023-01-24 03:11:59.673336: step: 48/463, loss: 0.020291510969400406 2023-01-24 03:12:00.233096: step: 50/463, loss: 0.15573060512542725 2023-01-24 03:12:00.904388: step: 52/463, loss: 0.023421891033649445 2023-01-24 03:12:01.573688: step: 54/463, loss: 0.01674387790262699 2023-01-24 03:12:02.149721: step: 56/463, loss: 0.04184565320611 2023-01-24 03:12:02.771291: step: 58/463, loss: 0.016686532646417618 2023-01-24 03:12:03.391188: step: 60/463, loss: 0.023892097175121307 2023-01-24 03:12:04.037147: step: 62/463, loss: 0.05974258854985237 2023-01-24 03:12:04.640653: step: 64/463, loss: 0.09813424944877625 2023-01-24 03:12:05.228345: step: 66/463, loss: 0.018530558794736862 2023-01-24 03:12:05.862983: step: 68/463, loss: 0.05598936975002289 2023-01-24 03:12:06.449541: step: 70/463, loss: 0.005706106312572956 2023-01-24 03:12:07.097522: step: 72/463, loss: 0.02134627103805542 2023-01-24 03:12:07.690166: step: 74/463, loss: 0.10911321640014648 2023-01-24 03:12:08.285538: step: 76/463, loss: 0.12562420964241028 2023-01-24 03:12:09.135455: step: 78/463, loss: 0.008204949088394642 2023-01-24 03:12:09.802779: step: 80/463, loss: 0.020367205142974854 2023-01-24 03:12:10.472209: step: 82/463, loss: 0.028903182595968246 2023-01-24 03:12:11.030314: step: 84/463, loss: 0.009179981425404549 2023-01-24 03:12:11.631231: step: 86/463, loss: 0.011012246832251549 2023-01-24 03:12:12.218543: step: 88/463, loss: 0.001943445298820734 2023-01-24 03:12:12.815636: step: 90/463, loss: 0.000122124794870615 2023-01-24 03:12:13.392838: step: 92/463, loss: 0.0072256457060575485 2023-01-24 03:12:14.052840: step: 94/463, loss: 0.02838139794766903 2023-01-24 03:12:14.690771: step: 96/463, loss: 0.5995569825172424 2023-01-24 03:12:15.352155: step: 98/463, loss: 0.017027055844664574 2023-01-24 03:12:16.027119: step: 100/463, loss: 0.4004693031311035 2023-01-24 03:12:16.786211: step: 102/463, loss: 0.014569812454283237 2023-01-24 03:12:17.370605: step: 104/463, loss: 0.01985294185578823 2023-01-24 03:12:17.968523: step: 106/463, loss: 0.001956199062988162 2023-01-24 03:12:18.653331: step: 108/463, loss: 0.3121906518936157 2023-01-24 03:12:19.281763: step: 110/463, loss: 0.024625033140182495 2023-01-24 03:12:19.921911: step: 112/463, loss: 0.0011772587895393372 2023-01-24 03:12:20.532509: step: 114/463, loss: 0.012746881693601608 2023-01-24 03:12:21.175823: step: 116/463, loss: 0.011900834739208221 2023-01-24 03:12:21.807332: step: 118/463, loss: 0.021981066092848778 2023-01-24 03:12:22.423648: step: 120/463, loss: 0.022768914699554443 2023-01-24 03:12:23.077985: step: 122/463, loss: 0.033613648265600204 2023-01-24 03:12:23.675796: step: 124/463, loss: 0.0035845350939780474 2023-01-24 03:12:24.279451: step: 126/463, loss: 0.497938871383667 2023-01-24 03:12:24.896727: step: 128/463, loss: 0.03241952508687973 2023-01-24 03:12:25.470058: step: 130/463, loss: 0.007313554175198078 2023-01-24 03:12:26.100947: step: 132/463, loss: 0.04734067991375923 2023-01-24 03:12:26.749313: step: 134/463, loss: 0.06459705531597137 2023-01-24 03:12:27.496099: step: 136/463, loss: 0.01574254035949707 2023-01-24 03:12:28.093844: step: 138/463, loss: 0.023338835686445236 2023-01-24 03:12:28.701038: step: 140/463, loss: 0.01584446057677269 2023-01-24 03:12:29.227161: step: 142/463, loss: 0.09646377712488174 2023-01-24 03:12:29.788913: step: 144/463, loss: 0.011550257913768291 2023-01-24 03:12:30.422099: step: 146/463, loss: 0.03745417296886444 2023-01-24 03:12:31.011966: step: 148/463, loss: 0.057967957109212875 2023-01-24 03:12:31.583045: step: 150/463, loss: 0.023272907361388206 2023-01-24 03:12:32.188172: step: 152/463, loss: 0.10781530290842056 2023-01-24 03:12:32.792440: step: 154/463, loss: 0.09519363939762115 2023-01-24 03:12:33.384534: step: 156/463, loss: 0.04367026686668396 2023-01-24 03:12:33.986205: step: 158/463, loss: 0.052015941590070724 2023-01-24 03:12:34.567870: step: 160/463, loss: 0.02402150258421898 2023-01-24 03:12:35.182989: step: 162/463, loss: 0.021290911361575127 2023-01-24 03:12:35.783247: step: 164/463, loss: 0.0218874029815197 2023-01-24 03:12:36.477214: step: 166/463, loss: 0.010724851861596107 2023-01-24 03:12:37.090606: step: 168/463, loss: 0.021581370383501053 2023-01-24 03:12:37.685979: step: 170/463, loss: 0.006486351136118174 2023-01-24 03:12:38.301968: step: 172/463, loss: 0.06674645841121674 2023-01-24 03:12:38.969160: step: 174/463, loss: 0.024344950914382935 2023-01-24 03:12:39.620517: step: 176/463, loss: 0.0017576382961124182 2023-01-24 03:12:40.228468: step: 178/463, loss: 0.03176195174455643 2023-01-24 03:12:40.862206: step: 180/463, loss: 0.013579603284597397 2023-01-24 03:12:41.448791: step: 182/463, loss: 0.08059349656105042 2023-01-24 03:12:42.030357: step: 184/463, loss: 0.04349227622151375 2023-01-24 03:12:42.604385: step: 186/463, loss: 0.06103519722819328 2023-01-24 03:12:43.186264: step: 188/463, loss: 0.02008599415421486 2023-01-24 03:12:43.766275: step: 190/463, loss: 0.01783793978393078 2023-01-24 03:12:44.389602: step: 192/463, loss: 0.05660560354590416 2023-01-24 03:12:45.026438: step: 194/463, loss: 0.02937992662191391 2023-01-24 03:12:45.688493: step: 196/463, loss: 0.11923783272504807 2023-01-24 03:12:46.333905: step: 198/463, loss: 0.03090454638004303 2023-01-24 03:12:46.927985: step: 200/463, loss: 0.02430352382361889 2023-01-24 03:12:47.541246: step: 202/463, loss: 0.014538311399519444 2023-01-24 03:12:48.219402: step: 204/463, loss: 0.005214911885559559 2023-01-24 03:12:48.832622: step: 206/463, loss: 0.06654265522956848 2023-01-24 03:12:49.408636: step: 208/463, loss: 0.007305143866688013 2023-01-24 03:12:49.986364: step: 210/463, loss: 0.022058090195059776 2023-01-24 03:12:50.557921: step: 212/463, loss: 1.6268070936203003 2023-01-24 03:12:51.113048: step: 214/463, loss: 0.9395773410797119 2023-01-24 03:12:51.742505: step: 216/463, loss: 0.050523996353149414 2023-01-24 03:12:52.383570: step: 218/463, loss: 0.040161505341529846 2023-01-24 03:12:52.968102: step: 220/463, loss: 0.0412868969142437 2023-01-24 03:12:53.514494: step: 222/463, loss: 0.019668761640787125 2023-01-24 03:12:54.158122: step: 224/463, loss: 0.004019651561975479 2023-01-24 03:12:54.840039: step: 226/463, loss: 0.043066561222076416 2023-01-24 03:12:55.424165: step: 228/463, loss: 0.1460483968257904 2023-01-24 03:12:56.023900: step: 230/463, loss: 0.00818039383739233 2023-01-24 03:12:56.599452: step: 232/463, loss: 0.006487493868917227 2023-01-24 03:12:57.174082: step: 234/463, loss: 0.0703156590461731 2023-01-24 03:12:57.839236: step: 236/463, loss: 0.008411692455410957 2023-01-24 03:12:58.384384: step: 238/463, loss: 0.011830761097371578 2023-01-24 03:12:59.000678: step: 240/463, loss: 0.16915813088417053 2023-01-24 03:12:59.586564: step: 242/463, loss: 0.002356280107051134 2023-01-24 03:13:00.203745: step: 244/463, loss: 0.00916910357773304 2023-01-24 03:13:00.807475: step: 246/463, loss: 0.040827035903930664 2023-01-24 03:13:01.513337: step: 248/463, loss: 0.012625100091099739 2023-01-24 03:13:02.126414: step: 250/463, loss: 0.011709875427186489 2023-01-24 03:13:02.801914: step: 252/463, loss: 0.002782276598736644 2023-01-24 03:13:03.457041: step: 254/463, loss: 0.019311297684907913 2023-01-24 03:13:04.135813: step: 256/463, loss: 0.014067348092794418 2023-01-24 03:13:04.757527: step: 258/463, loss: 0.0077292839996516705 2023-01-24 03:13:05.454882: step: 260/463, loss: 0.07472358644008636 2023-01-24 03:13:06.108631: step: 262/463, loss: 0.5500505566596985 2023-01-24 03:13:06.719423: step: 264/463, loss: 0.01905016414821148 2023-01-24 03:13:07.314352: step: 266/463, loss: 0.002901636529713869 2023-01-24 03:13:07.905934: step: 268/463, loss: 0.31093186140060425 2023-01-24 03:13:08.499240: step: 270/463, loss: 0.04626436159014702 2023-01-24 03:13:09.152859: step: 272/463, loss: 0.07923506200313568 2023-01-24 03:13:09.767944: step: 274/463, loss: 0.1362936645746231 2023-01-24 03:13:10.400116: step: 276/463, loss: 0.052740342915058136 2023-01-24 03:13:11.015272: step: 278/463, loss: 0.031582821160554886 2023-01-24 03:13:11.589369: step: 280/463, loss: 0.020128924399614334 2023-01-24 03:13:12.155773: step: 282/463, loss: 0.11607906222343445 2023-01-24 03:13:12.842842: step: 284/463, loss: 0.0046155150048434734 2023-01-24 03:13:13.518856: step: 286/463, loss: 0.0035585558507591486 2023-01-24 03:13:14.178863: step: 288/463, loss: 0.01247965358197689 2023-01-24 03:13:14.708679: step: 290/463, loss: 0.020329592749476433 2023-01-24 03:13:15.432768: step: 292/463, loss: 0.045249029994010925 2023-01-24 03:13:15.952781: step: 294/463, loss: 0.005409767851233482 2023-01-24 03:13:16.546083: step: 296/463, loss: 0.019977781921625137 2023-01-24 03:13:17.191823: step: 298/463, loss: 0.01569727249443531 2023-01-24 03:13:17.846290: step: 300/463, loss: 0.07623255252838135 2023-01-24 03:13:18.486782: step: 302/463, loss: 0.045567549765110016 2023-01-24 03:13:19.090665: step: 304/463, loss: 0.020051337778568268 2023-01-24 03:13:19.717812: step: 306/463, loss: 0.026783756911754608 2023-01-24 03:13:20.332166: step: 308/463, loss: 0.02466309443116188 2023-01-24 03:13:20.894043: step: 310/463, loss: 0.11113037168979645 2023-01-24 03:13:21.463540: step: 312/463, loss: 0.002726368373259902 2023-01-24 03:13:22.088344: step: 314/463, loss: 0.012237334623932838 2023-01-24 03:13:22.674655: step: 316/463, loss: 0.23267483711242676 2023-01-24 03:13:23.279463: step: 318/463, loss: 0.018688658252358437 2023-01-24 03:13:23.836645: step: 320/463, loss: 0.00025364590692333877 2023-01-24 03:13:24.444721: step: 322/463, loss: 0.04208621755242348 2023-01-24 03:13:25.112640: step: 324/463, loss: 0.0012273071333765984 2023-01-24 03:13:25.766024: step: 326/463, loss: 0.0229034423828125 2023-01-24 03:13:26.429636: step: 328/463, loss: 0.004773963242769241 2023-01-24 03:13:27.063544: step: 330/463, loss: 0.04242382198572159 2023-01-24 03:13:27.655425: step: 332/463, loss: 0.10915303975343704 2023-01-24 03:13:28.315821: step: 334/463, loss: 0.0036423001438379288 2023-01-24 03:13:28.941646: step: 336/463, loss: 0.021851707249879837 2023-01-24 03:13:29.579060: step: 338/463, loss: 0.04234820604324341 2023-01-24 03:13:30.228301: step: 340/463, loss: 0.0048447963781654835 2023-01-24 03:13:30.821627: step: 342/463, loss: 0.012705449014902115 2023-01-24 03:13:31.440460: step: 344/463, loss: 0.000829791824799031 2023-01-24 03:13:32.042132: step: 346/463, loss: 0.055353257805109024 2023-01-24 03:13:32.684511: step: 348/463, loss: 0.009019903838634491 2023-01-24 03:13:33.312523: step: 350/463, loss: 0.020258044824004173 2023-01-24 03:13:34.011547: step: 352/463, loss: 0.06482817977666855 2023-01-24 03:13:34.579443: step: 354/463, loss: 0.01929132454097271 2023-01-24 03:13:35.198612: step: 356/463, loss: 0.0025611575692892075 2023-01-24 03:13:35.835441: step: 358/463, loss: 0.04649610444903374 2023-01-24 03:13:36.510250: step: 360/463, loss: 0.016384338960051537 2023-01-24 03:13:37.136036: step: 362/463, loss: 0.08392742276191711 2023-01-24 03:13:37.709483: step: 364/463, loss: 0.024707302451133728 2023-01-24 03:13:38.316272: step: 366/463, loss: 0.0018695153994485736 2023-01-24 03:13:38.923455: step: 368/463, loss: 0.02039937488734722 2023-01-24 03:13:39.557151: step: 370/463, loss: 0.0023016519844532013 2023-01-24 03:13:40.266032: step: 372/463, loss: 0.061753854155540466 2023-01-24 03:13:40.878758: step: 374/463, loss: 0.018502971157431602 2023-01-24 03:13:41.525160: step: 376/463, loss: 0.22654902935028076 2023-01-24 03:13:42.104957: step: 378/463, loss: 0.05308860167860985 2023-01-24 03:13:42.730954: step: 380/463, loss: 0.04791240394115448 2023-01-24 03:13:43.336001: step: 382/463, loss: 0.008300859481096268 2023-01-24 03:13:43.954433: step: 384/463, loss: 0.016533823683857918 2023-01-24 03:13:44.533011: step: 386/463, loss: 0.10109324753284454 2023-01-24 03:13:45.125083: step: 388/463, loss: 0.0027686231769621372 2023-01-24 03:13:45.739322: step: 390/463, loss: 0.13375160098075867 2023-01-24 03:13:46.321685: step: 392/463, loss: 0.031212475150823593 2023-01-24 03:13:46.866454: step: 394/463, loss: 0.02566165290772915 2023-01-24 03:13:47.502611: step: 396/463, loss: 0.020588349550962448 2023-01-24 03:13:48.086177: step: 398/463, loss: 0.0033577983267605305 2023-01-24 03:13:48.747740: step: 400/463, loss: 0.01071096584200859 2023-01-24 03:13:49.382958: step: 402/463, loss: 0.005720159038901329 2023-01-24 03:13:50.050333: step: 404/463, loss: 0.14227540791034698 2023-01-24 03:13:50.669343: step: 406/463, loss: 0.006035880651324987 2023-01-24 03:13:51.294876: step: 408/463, loss: 0.01138804666697979 2023-01-24 03:13:51.884797: step: 410/463, loss: 0.06716389954090118 2023-01-24 03:13:52.526136: step: 412/463, loss: 0.018787872046232224 2023-01-24 03:13:53.297055: step: 414/463, loss: 0.13190680742263794 2023-01-24 03:13:53.904482: step: 416/463, loss: 0.06670462340116501 2023-01-24 03:13:54.537070: step: 418/463, loss: 0.07727526873350143 2023-01-24 03:13:55.149019: step: 420/463, loss: 0.01599184423685074 2023-01-24 03:13:55.746724: step: 422/463, loss: 0.06036541610956192 2023-01-24 03:13:56.265955: step: 424/463, loss: 0.013064800761640072 2023-01-24 03:13:56.819388: step: 426/463, loss: 0.006833591032773256 2023-01-24 03:13:57.454602: step: 428/463, loss: 0.031750891357660294 2023-01-24 03:13:58.121729: step: 430/463, loss: 0.02114529348909855 2023-01-24 03:13:58.776913: step: 432/463, loss: 0.03230300918221474 2023-01-24 03:13:59.370554: step: 434/463, loss: 0.010033736936748028 2023-01-24 03:14:00.006314: step: 436/463, loss: 0.01874353177845478 2023-01-24 03:14:00.623438: step: 438/463, loss: 0.004949798341840506 2023-01-24 03:14:01.246437: step: 440/463, loss: 0.01905561238527298 2023-01-24 03:14:01.881496: step: 442/463, loss: 0.025847643613815308 2023-01-24 03:14:02.453612: step: 444/463, loss: 0.0005596587434411049 2023-01-24 03:14:03.058213: step: 446/463, loss: 0.025552352890372276 2023-01-24 03:14:03.694922: step: 448/463, loss: 0.00875480379909277 2023-01-24 03:14:04.299636: step: 450/463, loss: 0.010331067256629467 2023-01-24 03:14:04.848510: step: 452/463, loss: 0.00190172647126019 2023-01-24 03:14:05.469968: step: 454/463, loss: 0.037141069769859314 2023-01-24 03:14:06.097997: step: 456/463, loss: 0.03838363289833069 2023-01-24 03:14:06.686918: step: 458/463, loss: 0.059563618153333664 2023-01-24 03:14:07.250676: step: 460/463, loss: 0.0016895380103960633 2023-01-24 03:14:07.863402: step: 462/463, loss: 0.06795158237218857 2023-01-24 03:14:08.476534: step: 464/463, loss: 0.007092796266078949 2023-01-24 03:14:09.084338: step: 466/463, loss: 0.06822799891233444 2023-01-24 03:14:09.747047: step: 468/463, loss: 0.020045703276991844 2023-01-24 03:14:10.394285: step: 470/463, loss: 0.03994634747505188 2023-01-24 03:14:10.970223: step: 472/463, loss: 0.007352486252784729 2023-01-24 03:14:11.534487: step: 474/463, loss: 0.08837493509054184 2023-01-24 03:14:12.109856: step: 476/463, loss: 0.031150689348578453 2023-01-24 03:14:12.693458: step: 478/463, loss: 0.06251810491085052 2023-01-24 03:14:13.299948: step: 480/463, loss: 0.031099554151296616 2023-01-24 03:14:13.912972: step: 482/463, loss: 0.07929482311010361 2023-01-24 03:14:14.480143: step: 484/463, loss: 0.005624769255518913 2023-01-24 03:14:15.106576: step: 486/463, loss: 0.5399792790412903 2023-01-24 03:14:15.814188: step: 488/463, loss: 0.04697442799806595 2023-01-24 03:14:16.510882: step: 490/463, loss: 0.07591967284679413 2023-01-24 03:14:17.121231: step: 492/463, loss: 0.04184211418032646 2023-01-24 03:14:17.760409: step: 494/463, loss: 0.014643094502389431 2023-01-24 03:14:18.319889: step: 496/463, loss: 0.007793131750077009 2023-01-24 03:14:18.975041: step: 498/463, loss: 0.09160103648900986 2023-01-24 03:14:19.604740: step: 500/463, loss: 0.035016074776649475 2023-01-24 03:14:20.202465: step: 502/463, loss: 0.024046001955866814 2023-01-24 03:14:20.822629: step: 504/463, loss: 0.009723913855850697 2023-01-24 03:14:21.423544: step: 506/463, loss: 0.004270180594176054 2023-01-24 03:14:22.071431: step: 508/463, loss: 0.016087761148810387 2023-01-24 03:14:22.614245: step: 510/463, loss: 0.006169023457914591 2023-01-24 03:14:23.253557: step: 512/463, loss: 0.05757845193147659 2023-01-24 03:14:23.858498: step: 514/463, loss: 0.00573571166023612 2023-01-24 03:14:24.427859: step: 516/463, loss: 0.07166961580514908 2023-01-24 03:14:24.996356: step: 518/463, loss: 0.004224637523293495 2023-01-24 03:14:25.642050: step: 520/463, loss: 0.0032673669047653675 2023-01-24 03:14:26.279955: step: 522/463, loss: 0.025617776438593864 2023-01-24 03:14:26.884905: step: 524/463, loss: 0.06886880844831467 2023-01-24 03:14:27.436323: step: 526/463, loss: 0.004455070476979017 2023-01-24 03:14:28.118514: step: 528/463, loss: 0.006259072571992874 2023-01-24 03:14:28.748094: step: 530/463, loss: 0.04033125936985016 2023-01-24 03:14:29.356814: step: 532/463, loss: 0.053200703114271164 2023-01-24 03:14:29.926279: step: 534/463, loss: 0.004216287285089493 2023-01-24 03:14:30.649567: step: 536/463, loss: 0.06604675203561783 2023-01-24 03:14:31.268387: step: 538/463, loss: 0.05718152970075607 2023-01-24 03:14:31.812705: step: 540/463, loss: 0.00031880661845207214 2023-01-24 03:14:32.397895: step: 542/463, loss: 0.011241381987929344 2023-01-24 03:14:33.038335: step: 544/463, loss: 0.32326120138168335 2023-01-24 03:14:33.709376: step: 546/463, loss: 0.04331723600625992 2023-01-24 03:14:34.333559: step: 548/463, loss: 0.015126056037843227 2023-01-24 03:14:34.941068: step: 550/463, loss: 0.01892212964594364 2023-01-24 03:14:35.548043: step: 552/463, loss: 0.017604444175958633 2023-01-24 03:14:36.114329: step: 554/463, loss: 0.019526148214936256 2023-01-24 03:14:36.675984: step: 556/463, loss: 0.6796872019767761 2023-01-24 03:14:37.288079: step: 558/463, loss: 0.03267296403646469 2023-01-24 03:14:37.872656: step: 560/463, loss: 0.08375424891710281 2023-01-24 03:14:38.463855: step: 562/463, loss: 0.04032605141401291 2023-01-24 03:14:39.119418: step: 564/463, loss: 0.03152478113770485 2023-01-24 03:14:39.713820: step: 566/463, loss: 0.009987886995077133 2023-01-24 03:14:40.275484: step: 568/463, loss: 0.0707743912935257 2023-01-24 03:14:40.929513: step: 570/463, loss: 0.02383466064929962 2023-01-24 03:14:41.510780: step: 572/463, loss: 0.032773490995168686 2023-01-24 03:14:42.072435: step: 574/463, loss: 0.0003904775658156723 2023-01-24 03:14:42.642664: step: 576/463, loss: 0.003928330261260271 2023-01-24 03:14:43.200217: step: 578/463, loss: 0.33052366971969604 2023-01-24 03:14:43.884309: step: 580/463, loss: 0.0347343273460865 2023-01-24 03:14:44.500516: step: 582/463, loss: 0.05461281165480614 2023-01-24 03:14:45.113436: step: 584/463, loss: 0.1299811154603958 2023-01-24 03:14:45.718439: step: 586/463, loss: 0.007315927185118198 2023-01-24 03:14:46.357691: step: 588/463, loss: 0.0334121473133564 2023-01-24 03:14:46.931019: step: 590/463, loss: 0.08426441997289658 2023-01-24 03:14:47.545981: step: 592/463, loss: 0.050887297838926315 2023-01-24 03:14:48.155581: step: 594/463, loss: 0.10563686490058899 2023-01-24 03:14:48.727205: step: 596/463, loss: 0.07363232970237732 2023-01-24 03:14:49.376588: step: 598/463, loss: 0.054273054003715515 2023-01-24 03:14:49.953594: step: 600/463, loss: 0.07518825680017471 2023-01-24 03:14:50.570757: step: 602/463, loss: 0.04854172468185425 2023-01-24 03:14:51.180743: step: 604/463, loss: 0.007249117363244295 2023-01-24 03:14:51.818633: step: 606/463, loss: 0.06041597202420235 2023-01-24 03:14:52.374791: step: 608/463, loss: 0.5194256901741028 2023-01-24 03:14:52.975730: step: 610/463, loss: 0.039012517780065536 2023-01-24 03:14:53.616910: step: 612/463, loss: 0.028161583468317986 2023-01-24 03:14:54.266046: step: 614/463, loss: 0.0001226312160724774 2023-01-24 03:14:54.853517: step: 616/463, loss: 0.018892131745815277 2023-01-24 03:14:55.466567: step: 618/463, loss: 0.009805514477193356 2023-01-24 03:14:56.064065: step: 620/463, loss: 0.06061761453747749 2023-01-24 03:14:56.656040: step: 622/463, loss: 0.014063299633562565 2023-01-24 03:14:57.290925: step: 624/463, loss: 0.01469547487795353 2023-01-24 03:14:57.927979: step: 626/463, loss: 0.1260693520307541 2023-01-24 03:14:58.462422: step: 628/463, loss: 0.015741469338536263 2023-01-24 03:14:59.135092: step: 630/463, loss: 0.03137549012899399 2023-01-24 03:14:59.723063: step: 632/463, loss: 0.32331040501594543 2023-01-24 03:15:00.317803: step: 634/463, loss: 0.009754701517522335 2023-01-24 03:15:01.005620: step: 636/463, loss: 0.05197267234325409 2023-01-24 03:15:01.618018: step: 638/463, loss: 0.005566580221056938 2023-01-24 03:15:02.226237: step: 640/463, loss: 0.012628620490431786 2023-01-24 03:15:02.986760: step: 642/463, loss: 0.20186926424503326 2023-01-24 03:15:03.638700: step: 644/463, loss: 0.028702430427074432 2023-01-24 03:15:04.225034: step: 646/463, loss: 0.048512354493141174 2023-01-24 03:15:04.880545: step: 648/463, loss: 0.002821398666128516 2023-01-24 03:15:05.495436: step: 650/463, loss: 0.03064820170402527 2023-01-24 03:15:06.123561: step: 652/463, loss: 0.004929949529469013 2023-01-24 03:15:06.773682: step: 654/463, loss: 0.042304202914237976 2023-01-24 03:15:07.357603: step: 656/463, loss: 0.11738227307796478 2023-01-24 03:15:07.922510: step: 658/463, loss: 0.011186796240508556 2023-01-24 03:15:08.561158: step: 660/463, loss: 0.09208541363477707 2023-01-24 03:15:09.278117: step: 662/463, loss: 0.07006802409887314 2023-01-24 03:15:09.817729: step: 664/463, loss: 0.04129285365343094 2023-01-24 03:15:10.415395: step: 666/463, loss: 0.03904534876346588 2023-01-24 03:15:11.009976: step: 668/463, loss: 0.060648489743471146 2023-01-24 03:15:11.595870: step: 670/463, loss: 0.8541082143783569 2023-01-24 03:15:12.197906: step: 672/463, loss: 0.023084085434675217 2023-01-24 03:15:12.786900: step: 674/463, loss: 0.026811780408024788 2023-01-24 03:15:13.429916: step: 676/463, loss: 0.049302905797958374 2023-01-24 03:15:14.079751: step: 678/463, loss: 0.06594472378492355 2023-01-24 03:15:14.669074: step: 680/463, loss: 0.1358116716146469 2023-01-24 03:15:15.264759: step: 682/463, loss: 0.000393335911212489 2023-01-24 03:15:15.920527: step: 684/463, loss: 0.04331202059984207 2023-01-24 03:15:16.486347: step: 686/463, loss: 0.0031838964205235243 2023-01-24 03:15:17.144583: step: 688/463, loss: 0.15263943374156952 2023-01-24 03:15:17.779636: step: 690/463, loss: 0.032084375619888306 2023-01-24 03:15:18.439270: step: 692/463, loss: 0.033500246703624725 2023-01-24 03:15:19.078430: step: 694/463, loss: 0.02280724234879017 2023-01-24 03:15:19.697644: step: 696/463, loss: 0.006036567501723766 2023-01-24 03:15:20.447951: step: 698/463, loss: 0.027374595403671265 2023-01-24 03:15:21.110426: step: 700/463, loss: 0.029568640515208244 2023-01-24 03:15:21.695680: step: 702/463, loss: 0.011715746484696865 2023-01-24 03:15:22.362146: step: 704/463, loss: 0.012809795327484608 2023-01-24 03:15:23.048573: step: 706/463, loss: 0.01322891004383564 2023-01-24 03:15:23.713373: step: 708/463, loss: 0.10790175199508667 2023-01-24 03:15:24.371692: step: 710/463, loss: 0.01220192015171051 2023-01-24 03:15:24.996448: step: 712/463, loss: 0.01692129299044609 2023-01-24 03:15:25.663655: step: 714/463, loss: 0.020427130162715912 2023-01-24 03:15:26.333020: step: 716/463, loss: 0.3314805030822754 2023-01-24 03:15:26.982352: step: 718/463, loss: 0.11914760619401932 2023-01-24 03:15:27.548805: step: 720/463, loss: 0.021928012371063232 2023-01-24 03:15:28.191844: step: 722/463, loss: 0.019401831552386284 2023-01-24 03:15:28.800273: step: 724/463, loss: 0.0754886195063591 2023-01-24 03:15:29.372382: step: 726/463, loss: 0.022181248292326927 2023-01-24 03:15:29.978024: step: 728/463, loss: 0.08377305418252945 2023-01-24 03:15:30.574917: step: 730/463, loss: 0.0491647943854332 2023-01-24 03:15:31.255712: step: 732/463, loss: 0.30081477761268616 2023-01-24 03:15:31.914025: step: 734/463, loss: 0.009934273548424244 2023-01-24 03:15:32.583590: step: 736/463, loss: 0.1489568054676056 2023-01-24 03:15:33.147328: step: 738/463, loss: 0.016113916411995888 2023-01-24 03:15:33.749254: step: 740/463, loss: 0.011813441291451454 2023-01-24 03:15:34.332646: step: 742/463, loss: 0.0539797767996788 2023-01-24 03:15:34.938378: step: 744/463, loss: 0.024049753323197365 2023-01-24 03:15:35.538405: step: 746/463, loss: 0.019513430073857307 2023-01-24 03:15:36.155957: step: 748/463, loss: 0.00860277097672224 2023-01-24 03:15:36.781964: step: 750/463, loss: 0.008631817065179348 2023-01-24 03:15:37.494260: step: 752/463, loss: 0.027108194306492805 2023-01-24 03:15:38.131022: step: 754/463, loss: 0.13367898762226105 2023-01-24 03:15:38.812625: step: 756/463, loss: 0.0070381248369812965 2023-01-24 03:15:39.396743: step: 758/463, loss: 0.053361233323812485 2023-01-24 03:15:39.981284: step: 760/463, loss: 0.06090757995843887 2023-01-24 03:15:40.614442: step: 762/463, loss: 0.007821215316653252 2023-01-24 03:15:41.252298: step: 764/463, loss: 0.1697133630514145 2023-01-24 03:15:41.846315: step: 766/463, loss: 0.008045196533203125 2023-01-24 03:15:42.513937: step: 768/463, loss: 0.02766299992799759 2023-01-24 03:15:43.147424: step: 770/463, loss: 0.008898507803678513 2023-01-24 03:15:43.747736: step: 772/463, loss: 0.0013090830761939287 2023-01-24 03:15:44.387471: step: 774/463, loss: 0.02816668152809143 2023-01-24 03:15:45.011434: step: 776/463, loss: 0.021331140771508217 2023-01-24 03:15:45.598424: step: 778/463, loss: 0.10147601366043091 2023-01-24 03:15:46.365750: step: 780/463, loss: 0.04615384712815285 2023-01-24 03:15:46.950676: step: 782/463, loss: 0.04704257845878601 2023-01-24 03:15:47.592734: step: 784/463, loss: 0.005019376985728741 2023-01-24 03:15:48.347504: step: 786/463, loss: 0.04884747788310051 2023-01-24 03:15:48.911442: step: 788/463, loss: 0.015131869353353977 2023-01-24 03:15:49.484836: step: 790/463, loss: 0.000488363322801888 2023-01-24 03:15:50.076080: step: 792/463, loss: 0.045728642493486404 2023-01-24 03:15:50.685265: step: 794/463, loss: 0.05794370174407959 2023-01-24 03:15:51.354659: step: 796/463, loss: 0.05631436035037041 2023-01-24 03:15:51.977470: step: 798/463, loss: 0.012864883989095688 2023-01-24 03:15:52.572733: step: 800/463, loss: 0.036105845123529434 2023-01-24 03:15:53.269811: step: 802/463, loss: 0.012082905508577824 2023-01-24 03:15:53.923689: step: 804/463, loss: 0.01934638060629368 2023-01-24 03:15:54.608168: step: 806/463, loss: 0.032570015639066696 2023-01-24 03:15:55.292025: step: 808/463, loss: 0.17673109471797943 2023-01-24 03:15:55.841856: step: 810/463, loss: 0.008372608572244644 2023-01-24 03:15:56.485554: step: 812/463, loss: 0.03140426427125931 2023-01-24 03:15:57.118671: step: 814/463, loss: 0.03963291272521019 2023-01-24 03:15:57.740067: step: 816/463, loss: 0.04781262204051018 2023-01-24 03:15:58.375608: step: 818/463, loss: 0.0009222657536156476 2023-01-24 03:15:58.985566: step: 820/463, loss: 0.04003676399588585 2023-01-24 03:15:59.602507: step: 822/463, loss: 0.001807502587325871 2023-01-24 03:16:00.263805: step: 824/463, loss: 0.02812432125210762 2023-01-24 03:16:00.909897: step: 826/463, loss: 0.0046024015173316 2023-01-24 03:16:01.486488: step: 828/463, loss: 0.05028145760297775 2023-01-24 03:16:02.092337: step: 830/463, loss: 0.00679796701297164 2023-01-24 03:16:02.657143: step: 832/463, loss: 0.02829374186694622 2023-01-24 03:16:03.344386: step: 834/463, loss: 0.03555215522646904 2023-01-24 03:16:04.004879: step: 836/463, loss: 0.024895858019590378 2023-01-24 03:16:04.623564: step: 838/463, loss: 0.03511698544025421 2023-01-24 03:16:05.295679: step: 840/463, loss: 0.02095201052725315 2023-01-24 03:16:05.907986: step: 842/463, loss: 0.011551769450306892 2023-01-24 03:16:06.506531: step: 844/463, loss: 0.001830400782637298 2023-01-24 03:16:07.140320: step: 846/463, loss: 0.019419299438595772 2023-01-24 03:16:07.721584: step: 848/463, loss: 0.12913839519023895 2023-01-24 03:16:08.323032: step: 850/463, loss: 0.06914029270410538 2023-01-24 03:16:08.961573: step: 852/463, loss: 0.04706982150673866 2023-01-24 03:16:09.593337: step: 854/463, loss: 0.04811506345868111 2023-01-24 03:16:10.138925: step: 856/463, loss: 0.02079155296087265 2023-01-24 03:16:10.729202: step: 858/463, loss: 0.02727232128381729 2023-01-24 03:16:11.348960: step: 860/463, loss: 0.018009401857852936 2023-01-24 03:16:11.950246: step: 862/463, loss: 0.0789589062333107 2023-01-24 03:16:12.535055: step: 864/463, loss: 0.003665331983938813 2023-01-24 03:16:13.194725: step: 866/463, loss: 0.004751025699079037 2023-01-24 03:16:13.734077: step: 868/463, loss: 0.03943350166082382 2023-01-24 03:16:14.404003: step: 870/463, loss: 0.010919580236077309 2023-01-24 03:16:14.965865: step: 872/463, loss: 0.011350066401064396 2023-01-24 03:16:15.657919: step: 874/463, loss: 0.007064160890877247 2023-01-24 03:16:16.256876: step: 876/463, loss: 0.028873734176158905 2023-01-24 03:16:16.867693: step: 878/463, loss: 0.021631957963109016 2023-01-24 03:16:17.498518: step: 880/463, loss: 0.12724556028842926 2023-01-24 03:16:18.129229: step: 882/463, loss: 0.04799959436058998 2023-01-24 03:16:18.720349: step: 884/463, loss: 0.0035819008480757475 2023-01-24 03:16:19.356002: step: 886/463, loss: 0.006701014004647732 2023-01-24 03:16:19.985357: step: 888/463, loss: 0.08809933811426163 2023-01-24 03:16:20.557744: step: 890/463, loss: 0.018438981845974922 2023-01-24 03:16:21.203308: step: 892/463, loss: 0.27498531341552734 2023-01-24 03:16:21.822866: step: 894/463, loss: 0.05050842463970184 2023-01-24 03:16:22.310042: step: 896/463, loss: 0.018362948670983315 2023-01-24 03:16:22.937890: step: 898/463, loss: 0.028361402451992035 2023-01-24 03:16:23.495214: step: 900/463, loss: 0.01950116828083992 2023-01-24 03:16:24.082788: step: 902/463, loss: 0.02170678600668907 2023-01-24 03:16:24.697129: step: 904/463, loss: 0.056740038096904755 2023-01-24 03:16:25.351991: step: 906/463, loss: 0.018573565408587456 2023-01-24 03:16:25.913487: step: 908/463, loss: 0.023815959692001343 2023-01-24 03:16:26.521404: step: 910/463, loss: 0.09343187510967255 2023-01-24 03:16:27.121019: step: 912/463, loss: 0.11053943634033203 2023-01-24 03:16:27.787829: step: 914/463, loss: 0.0008950205519795418 2023-01-24 03:16:28.427075: step: 916/463, loss: 0.015571742318570614 2023-01-24 03:16:29.030876: step: 918/463, loss: 0.045694444328546524 2023-01-24 03:16:29.633346: step: 920/463, loss: 0.73042231798172 2023-01-24 03:16:30.254053: step: 922/463, loss: 0.013180899433791637 2023-01-24 03:16:30.851344: step: 924/463, loss: 0.014922033064067364 2023-01-24 03:16:31.405048: step: 926/463, loss: 0.015136092901229858 ================================================== Loss: 0.057 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3560656968733024, 'r': 0.31755384730636077, 'f1': 0.33570888170602237}, 'combined': 0.24736443915180595, 'epoch': 26} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3670230289289477, 'r': 0.3845645707527577, 'f1': 0.37558909603141866}, 'combined': 0.29112647634971206, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34207173619823156, 'r': 0.33558081141268636, 'f1': 0.3387951870009305}, 'combined': 0.24963855884279088, 'epoch': 26} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3534175500942465, 'r': 0.38849943925801367, 'f1': 0.3701290629708572}, 'combined': 0.2868942976137745, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33202795669940555, 'r': 0.31942727523073744, 'f1': 0.3256057525079277}, 'combined': 0.2399200281637362, 'epoch': 26} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3537936566390201, 'r': 0.37297915823984934, 'f1': 0.36313317598653794}, 'combined': 0.28147164837234045, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39285714285714285, 'r': 0.3142857142857143, 'f1': 0.34920634920634924}, 'combined': 0.23280423280423282, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3541666666666667, 'r': 0.3695652173913043, 'f1': 0.3617021276595745}, 'combined': 0.18085106382978725, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:19:06.461249: step: 2/463, loss: 0.006801618728786707 2023-01-24 03:19:07.080575: step: 4/463, loss: 0.024481989443302155 2023-01-24 03:19:07.742892: step: 6/463, loss: 0.0005853949696756899 2023-01-24 03:19:08.296284: step: 8/463, loss: 0.017126750200986862 2023-01-24 03:19:08.960028: step: 10/463, loss: 0.0030214719008654356 2023-01-24 03:19:09.569184: step: 12/463, loss: 0.013345425017178059 2023-01-24 03:19:10.155668: step: 14/463, loss: 0.5065322518348694 2023-01-24 03:19:10.727725: step: 16/463, loss: 0.004352352116256952 2023-01-24 03:19:11.352548: step: 18/463, loss: 0.002745497040450573 2023-01-24 03:19:11.977465: step: 20/463, loss: 0.038777854293584824 2023-01-24 03:19:12.589855: step: 22/463, loss: 0.02615339122712612 2023-01-24 03:19:13.201102: step: 24/463, loss: 0.012106833979487419 2023-01-24 03:19:13.792652: step: 26/463, loss: 0.0317654013633728 2023-01-24 03:19:14.374962: step: 28/463, loss: 0.05028447136282921 2023-01-24 03:19:14.959709: step: 30/463, loss: 0.015668125823140144 2023-01-24 03:19:15.606900: step: 32/463, loss: 0.0029103541746735573 2023-01-24 03:19:16.253468: step: 34/463, loss: 0.0645848885178566 2023-01-24 03:19:16.868579: step: 36/463, loss: 0.03229474648833275 2023-01-24 03:19:17.490606: step: 38/463, loss: 0.0023157589603215456 2023-01-24 03:19:18.017088: step: 40/463, loss: 0.02975877933204174 2023-01-24 03:19:18.602488: step: 42/463, loss: 0.004943448584526777 2023-01-24 03:19:19.210916: step: 44/463, loss: 0.0011191468220204115 2023-01-24 03:19:19.824108: step: 46/463, loss: 0.0014116100501269102 2023-01-24 03:19:20.441129: step: 48/463, loss: 0.0008793671731837094 2023-01-24 03:19:21.099549: step: 50/463, loss: 1.3605868816375732 2023-01-24 03:19:21.688975: step: 52/463, loss: 0.02973097562789917 2023-01-24 03:19:22.310084: step: 54/463, loss: 0.0024194391444325447 2023-01-24 03:19:22.942619: step: 56/463, loss: 0.016222387552261353 2023-01-24 03:19:23.572657: step: 58/463, loss: 0.025964708998799324 2023-01-24 03:19:24.163636: step: 60/463, loss: 0.02551180124282837 2023-01-24 03:19:24.741106: step: 62/463, loss: 0.0044490871950984 2023-01-24 03:19:25.330153: step: 64/463, loss: 0.0005965310847386718 2023-01-24 03:19:25.972853: step: 66/463, loss: 0.019502514973282814 2023-01-24 03:19:26.652755: step: 68/463, loss: 0.08221524208784103 2023-01-24 03:19:27.287138: step: 70/463, loss: 0.008652446791529655 2023-01-24 03:19:28.017913: step: 72/463, loss: 0.018750254064798355 2023-01-24 03:19:28.626457: step: 74/463, loss: 0.01460979226976633 2023-01-24 03:19:29.293896: step: 76/463, loss: 0.019064562395215034 2023-01-24 03:19:29.903786: step: 78/463, loss: 0.7012366652488708 2023-01-24 03:19:30.589407: step: 80/463, loss: 0.0074412464164197445 2023-01-24 03:19:31.245485: step: 82/463, loss: 0.018528498709201813 2023-01-24 03:19:31.832583: step: 84/463, loss: 0.11513300985097885 2023-01-24 03:19:32.397071: step: 86/463, loss: 0.007294955663383007 2023-01-24 03:19:32.982166: step: 88/463, loss: 0.013756808824837208 2023-01-24 03:19:33.607380: step: 90/463, loss: 0.013349073007702827 2023-01-24 03:19:34.196270: step: 92/463, loss: 0.0031716362573206425 2023-01-24 03:19:34.803770: step: 94/463, loss: 0.04813292250037193 2023-01-24 03:19:35.514501: step: 96/463, loss: 2.49385404586792 2023-01-24 03:19:36.160774: step: 98/463, loss: 0.012211144901812077 2023-01-24 03:19:36.789448: step: 100/463, loss: 0.04815521091222763 2023-01-24 03:19:37.432749: step: 102/463, loss: 0.0015425365418195724 2023-01-24 03:19:38.053038: step: 104/463, loss: 0.0004060573410242796 2023-01-24 03:19:38.720667: step: 106/463, loss: 0.011621109209954739 2023-01-24 03:19:39.335459: step: 108/463, loss: 0.21835660934448242 2023-01-24 03:19:39.895473: step: 110/463, loss: 0.002669976558536291 2023-01-24 03:19:40.449421: step: 112/463, loss: 0.0043038190342485905 2023-01-24 03:19:41.047000: step: 114/463, loss: 0.36173519492149353 2023-01-24 03:19:41.634766: step: 116/463, loss: 0.0244632326066494 2023-01-24 03:19:42.259943: step: 118/463, loss: 0.004608567804098129 2023-01-24 03:19:42.887924: step: 120/463, loss: 0.045676980167627335 2023-01-24 03:19:43.488352: step: 122/463, loss: 0.008071781136095524 2023-01-24 03:19:44.109848: step: 124/463, loss: 0.0537240132689476 2023-01-24 03:19:44.680311: step: 126/463, loss: 0.022706232964992523 2023-01-24 03:19:45.295089: step: 128/463, loss: 0.08451388776302338 2023-01-24 03:19:45.849525: step: 130/463, loss: 0.005262545309960842 2023-01-24 03:19:46.488518: step: 132/463, loss: 0.018080025911331177 2023-01-24 03:19:47.066380: step: 134/463, loss: 0.12312135845422745 2023-01-24 03:19:47.641174: step: 136/463, loss: 0.016220148652791977 2023-01-24 03:19:48.254648: step: 138/463, loss: 0.01777135580778122 2023-01-24 03:19:48.843714: step: 140/463, loss: 0.005331530701369047 2023-01-24 03:19:49.474773: step: 142/463, loss: 0.09677737206220627 2023-01-24 03:19:50.066251: step: 144/463, loss: 0.26746582984924316 2023-01-24 03:19:50.664785: step: 146/463, loss: 0.007170252501964569 2023-01-24 03:19:51.337283: step: 148/463, loss: 0.001276511582545936 2023-01-24 03:19:51.989787: step: 150/463, loss: 0.07480557262897491 2023-01-24 03:19:52.671774: step: 152/463, loss: 0.017730310559272766 2023-01-24 03:19:53.295179: step: 154/463, loss: 0.054459985345602036 2023-01-24 03:19:53.921178: step: 156/463, loss: 0.03287053108215332 2023-01-24 03:19:54.613154: step: 158/463, loss: 0.08141745626926422 2023-01-24 03:19:55.237566: step: 160/463, loss: 0.09254834800958633 2023-01-24 03:19:55.854919: step: 162/463, loss: 0.0539998933672905 2023-01-24 03:19:56.457502: step: 164/463, loss: 0.0006075088167563081 2023-01-24 03:19:57.046862: step: 166/463, loss: 0.2539507746696472 2023-01-24 03:19:57.672908: step: 168/463, loss: 0.4138997495174408 2023-01-24 03:19:58.277315: step: 170/463, loss: 0.055280543863773346 2023-01-24 03:19:58.971260: step: 172/463, loss: 0.013681205920875072 2023-01-24 03:19:59.616288: step: 174/463, loss: 0.04357951879501343 2023-01-24 03:20:00.304300: step: 176/463, loss: 0.05259454995393753 2023-01-24 03:20:00.886105: step: 178/463, loss: 0.029853422194719315 2023-01-24 03:20:01.521937: step: 180/463, loss: 0.03497356176376343 2023-01-24 03:20:02.165888: step: 182/463, loss: 0.03157559037208557 2023-01-24 03:20:02.741827: step: 184/463, loss: 0.011175544932484627 2023-01-24 03:20:03.405270: step: 186/463, loss: 0.06846903264522552 2023-01-24 03:20:04.011836: step: 188/463, loss: 0.013638272881507874 2023-01-24 03:20:04.635518: step: 190/463, loss: 0.007281886879354715 2023-01-24 03:20:05.239669: step: 192/463, loss: 0.018111051991581917 2023-01-24 03:20:05.783185: step: 194/463, loss: 0.027280418202280998 2023-01-24 03:20:06.390714: step: 196/463, loss: 0.024845674633979797 2023-01-24 03:20:07.059816: step: 198/463, loss: 0.01672469824552536 2023-01-24 03:20:07.659263: step: 200/463, loss: 0.11909183114767075 2023-01-24 03:20:08.253914: step: 202/463, loss: 1.0849922895431519 2023-01-24 03:20:08.852854: step: 204/463, loss: 0.01760043203830719 2023-01-24 03:20:09.408107: step: 206/463, loss: 0.03172234073281288 2023-01-24 03:20:10.083149: step: 208/463, loss: 0.1025736853480339 2023-01-24 03:20:10.730245: step: 210/463, loss: 0.03677133843302727 2023-01-24 03:20:11.412762: step: 212/463, loss: 0.03349140286445618 2023-01-24 03:20:12.063207: step: 214/463, loss: 0.16007618606090546 2023-01-24 03:20:12.651234: step: 216/463, loss: 0.006242662202566862 2023-01-24 03:20:13.281562: step: 218/463, loss: 0.02215118333697319 2023-01-24 03:20:13.874722: step: 220/463, loss: 0.028634389862418175 2023-01-24 03:20:14.438644: step: 222/463, loss: 0.01322889793664217 2023-01-24 03:20:15.064615: step: 224/463, loss: 0.011774728074669838 2023-01-24 03:20:15.650536: step: 226/463, loss: 0.0705314353108406 2023-01-24 03:20:16.298729: step: 228/463, loss: 0.025055989623069763 2023-01-24 03:20:16.922099: step: 230/463, loss: 0.023890916258096695 2023-01-24 03:20:17.523963: step: 232/463, loss: 0.032594770193099976 2023-01-24 03:20:18.152727: step: 234/463, loss: 0.11032899469137192 2023-01-24 03:20:18.825185: step: 236/463, loss: 0.008940963074564934 2023-01-24 03:20:19.461917: step: 238/463, loss: 0.02393309399485588 2023-01-24 03:20:20.036847: step: 240/463, loss: 5.845835039508529e-05 2023-01-24 03:20:20.695792: step: 242/463, loss: 0.02399780973792076 2023-01-24 03:20:21.346939: step: 244/463, loss: 0.03954317048192024 2023-01-24 03:20:21.956385: step: 246/463, loss: 0.0921514555811882 2023-01-24 03:20:22.560454: step: 248/463, loss: 0.0035555388312786818 2023-01-24 03:20:23.167648: step: 250/463, loss: 0.008145295083522797 2023-01-24 03:20:23.787989: step: 252/463, loss: 0.08888237923383713 2023-01-24 03:20:24.481101: step: 254/463, loss: 0.16362272202968597 2023-01-24 03:20:25.164443: step: 256/463, loss: 0.0011100545525550842 2023-01-24 03:20:25.833180: step: 258/463, loss: 0.00926621351391077 2023-01-24 03:20:26.355832: step: 260/463, loss: 0.02183162048459053 2023-01-24 03:20:26.952950: step: 262/463, loss: 0.014006501995027065 2023-01-24 03:20:27.556089: step: 264/463, loss: 0.05721067637205124 2023-01-24 03:20:28.239957: step: 266/463, loss: 0.005617084912955761 2023-01-24 03:20:28.984748: step: 268/463, loss: 0.0004247408942319453 2023-01-24 03:20:29.587680: step: 270/463, loss: 0.022351911291480064 2023-01-24 03:20:30.182033: step: 272/463, loss: 0.07162696123123169 2023-01-24 03:20:30.816293: step: 274/463, loss: 0.09420433640480042 2023-01-24 03:20:31.391333: step: 276/463, loss: 0.052588921040296555 2023-01-24 03:20:31.988017: step: 278/463, loss: 0.05290602520108223 2023-01-24 03:20:32.663250: step: 280/463, loss: 0.0029987809248268604 2023-01-24 03:20:33.307103: step: 282/463, loss: 0.01152290590107441 2023-01-24 03:20:33.876822: step: 284/463, loss: 0.011828247457742691 2023-01-24 03:20:34.522902: step: 286/463, loss: 0.02860492654144764 2023-01-24 03:20:35.113865: step: 288/463, loss: 0.049362607300281525 2023-01-24 03:20:35.748973: step: 290/463, loss: 0.007479547057300806 2023-01-24 03:20:36.348936: step: 292/463, loss: 0.13382774591445923 2023-01-24 03:20:36.987324: step: 294/463, loss: 0.017522688955068588 2023-01-24 03:20:37.568423: step: 296/463, loss: 0.019173359498381615 2023-01-24 03:20:38.214826: step: 298/463, loss: 0.026090065017342567 2023-01-24 03:20:38.837970: step: 300/463, loss: 0.21056485176086426 2023-01-24 03:20:39.477897: step: 302/463, loss: 0.08407945930957794 2023-01-24 03:20:40.092162: step: 304/463, loss: 0.0062902262434363365 2023-01-24 03:20:40.687166: step: 306/463, loss: 0.02413555048406124 2023-01-24 03:20:41.223428: step: 308/463, loss: 0.0023375474847853184 2023-01-24 03:20:41.852892: step: 310/463, loss: 0.063972607254982 2023-01-24 03:20:42.511951: step: 312/463, loss: 0.024366719648241997 2023-01-24 03:20:43.138267: step: 314/463, loss: 0.05372471734881401 2023-01-24 03:20:43.751929: step: 316/463, loss: 0.012292707338929176 2023-01-24 03:20:44.349549: step: 318/463, loss: 0.055671609938144684 2023-01-24 03:20:44.937034: step: 320/463, loss: 0.0017355423187837005 2023-01-24 03:20:45.625438: step: 322/463, loss: 0.6892048120498657 2023-01-24 03:20:46.244140: step: 324/463, loss: 0.003017947543412447 2023-01-24 03:20:46.849429: step: 326/463, loss: 0.03591708466410637 2023-01-24 03:20:47.480879: step: 328/463, loss: 0.06136185675859451 2023-01-24 03:20:48.111292: step: 330/463, loss: 0.3679247200489044 2023-01-24 03:20:48.693584: step: 332/463, loss: 0.05334608629345894 2023-01-24 03:20:49.262207: step: 334/463, loss: 0.012763542123138905 2023-01-24 03:20:49.861878: step: 336/463, loss: 0.02279418334364891 2023-01-24 03:20:50.479355: step: 338/463, loss: 0.24126674234867096 2023-01-24 03:20:51.130709: step: 340/463, loss: 0.024322649464011192 2023-01-24 03:20:51.694239: step: 342/463, loss: 0.07952488958835602 2023-01-24 03:20:52.299728: step: 344/463, loss: 0.0027727605774998665 2023-01-24 03:20:52.981661: step: 346/463, loss: 0.01889275386929512 2023-01-24 03:20:53.652196: step: 348/463, loss: 0.02207927778363228 2023-01-24 03:20:54.235314: step: 350/463, loss: 0.05525800958275795 2023-01-24 03:20:54.878971: step: 352/463, loss: 0.01295787189155817 2023-01-24 03:20:55.513675: step: 354/463, loss: 0.08429592847824097 2023-01-24 03:20:56.115445: step: 356/463, loss: 0.0337245836853981 2023-01-24 03:20:56.703341: step: 358/463, loss: 0.007017158437520266 2023-01-24 03:20:57.319504: step: 360/463, loss: 0.018636619672179222 2023-01-24 03:20:57.870288: step: 362/463, loss: 0.03909965604543686 2023-01-24 03:20:58.469425: step: 364/463, loss: 0.037853535264730453 2023-01-24 03:20:59.146051: step: 366/463, loss: 0.047538187354803085 2023-01-24 03:20:59.777437: step: 368/463, loss: 0.013282271102070808 2023-01-24 03:21:00.437957: step: 370/463, loss: 0.007747524883598089 2023-01-24 03:21:01.004438: step: 372/463, loss: 0.04123048111796379 2023-01-24 03:21:01.638122: step: 374/463, loss: 0.01996202953159809 2023-01-24 03:21:02.211002: step: 376/463, loss: 0.01084333285689354 2023-01-24 03:21:02.820726: step: 378/463, loss: 0.006899149622768164 2023-01-24 03:21:03.476347: step: 380/463, loss: 0.00422108406201005 2023-01-24 03:21:04.151419: step: 382/463, loss: 0.07936445623636246 2023-01-24 03:21:04.906015: step: 384/463, loss: 0.04219507798552513 2023-01-24 03:21:05.501545: step: 386/463, loss: 0.19513475894927979 2023-01-24 03:21:06.094205: step: 388/463, loss: 0.006555625703185797 2023-01-24 03:21:06.723798: step: 390/463, loss: 0.018704913556575775 2023-01-24 03:21:07.265772: step: 392/463, loss: 0.0010827347869053483 2023-01-24 03:21:07.847870: step: 394/463, loss: 0.013838604092597961 2023-01-24 03:21:08.388824: step: 396/463, loss: 0.0020397063344717026 2023-01-24 03:21:09.075863: step: 398/463, loss: 0.05091743916273117 2023-01-24 03:21:09.736288: step: 400/463, loss: 0.04766228422522545 2023-01-24 03:21:10.307373: step: 402/463, loss: 0.04693634435534477 2023-01-24 03:21:10.891460: step: 404/463, loss: 0.01709311082959175 2023-01-24 03:21:11.482385: step: 406/463, loss: 0.046637218445539474 2023-01-24 03:21:12.105376: step: 408/463, loss: 0.009840174578130245 2023-01-24 03:21:12.684215: step: 410/463, loss: 0.06984955072402954 2023-01-24 03:21:13.302488: step: 412/463, loss: 0.0005389424622990191 2023-01-24 03:21:13.979936: step: 414/463, loss: 0.02151060476899147 2023-01-24 03:21:14.573027: step: 416/463, loss: 0.004965255036950111 2023-01-24 03:21:15.220721: step: 418/463, loss: 0.01524600200355053 2023-01-24 03:21:15.808466: step: 420/463, loss: 0.01070441398769617 2023-01-24 03:21:16.423697: step: 422/463, loss: 0.030057663097977638 2023-01-24 03:21:17.058479: step: 424/463, loss: 0.20159301161766052 2023-01-24 03:21:17.654448: step: 426/463, loss: 0.000649317807983607 2023-01-24 03:21:18.262006: step: 428/463, loss: 0.015819711610674858 2023-01-24 03:21:18.895775: step: 430/463, loss: 0.006689096800982952 2023-01-24 03:21:19.490184: step: 432/463, loss: 0.01107083074748516 2023-01-24 03:21:20.146003: step: 434/463, loss: 0.0056923553347587585 2023-01-24 03:21:20.769989: step: 436/463, loss: 0.17163553833961487 2023-01-24 03:21:21.378157: step: 438/463, loss: 0.008319463580846786 2023-01-24 03:21:22.017147: step: 440/463, loss: 0.015703219920396805 2023-01-24 03:21:22.723371: step: 442/463, loss: 0.02817375399172306 2023-01-24 03:21:23.433424: step: 444/463, loss: 0.019657757133245468 2023-01-24 03:21:24.099318: step: 446/463, loss: 0.020900066941976547 2023-01-24 03:21:24.756387: step: 448/463, loss: 0.00202009966596961 2023-01-24 03:21:25.341905: step: 450/463, loss: 0.017271751537919044 2023-01-24 03:21:25.979879: step: 452/463, loss: 0.03575164079666138 2023-01-24 03:21:26.611237: step: 454/463, loss: 0.18235991895198822 2023-01-24 03:21:27.245611: step: 456/463, loss: 0.02527477778494358 2023-01-24 03:21:27.849175: step: 458/463, loss: 0.0724487230181694 2023-01-24 03:21:28.480481: step: 460/463, loss: 0.011069240048527718 2023-01-24 03:21:29.219888: step: 462/463, loss: 0.03572274371981621 2023-01-24 03:21:29.831470: step: 464/463, loss: 0.0008663491462357342 2023-01-24 03:21:30.468779: step: 466/463, loss: 0.04608675464987755 2023-01-24 03:21:31.063587: step: 468/463, loss: 0.0616801381111145 2023-01-24 03:21:31.721264: step: 470/463, loss: 0.007470866199582815 2023-01-24 03:21:32.359577: step: 472/463, loss: 0.00344840157777071 2023-01-24 03:21:32.940074: step: 474/463, loss: 0.10380856692790985 2023-01-24 03:21:33.653429: step: 476/463, loss: 0.0004235657979734242 2023-01-24 03:21:34.326000: step: 478/463, loss: 0.017793817445635796 2023-01-24 03:21:34.979878: step: 480/463, loss: 0.003294572001323104 2023-01-24 03:21:35.604751: step: 482/463, loss: 0.06237761676311493 2023-01-24 03:21:36.219072: step: 484/463, loss: 0.06818075478076935 2023-01-24 03:21:36.812168: step: 486/463, loss: 0.1589372605085373 2023-01-24 03:21:37.452264: step: 488/463, loss: 0.08697634190320969 2023-01-24 03:21:38.055365: step: 490/463, loss: 0.07513870298862457 2023-01-24 03:21:38.629465: step: 492/463, loss: 0.010979725979268551 2023-01-24 03:21:39.296699: step: 494/463, loss: 0.08214328438043594 2023-01-24 03:21:39.952365: step: 496/463, loss: 0.0057365428656339645 2023-01-24 03:21:40.534856: step: 498/463, loss: 0.02907068096101284 2023-01-24 03:21:41.141401: step: 500/463, loss: 0.17985635995864868 2023-01-24 03:21:41.730425: step: 502/463, loss: 0.02707492746412754 2023-01-24 03:21:42.357164: step: 504/463, loss: 0.045984383672475815 2023-01-24 03:21:42.967797: step: 506/463, loss: 0.02451905980706215 2023-01-24 03:21:43.559807: step: 508/463, loss: 0.5614974498748779 2023-01-24 03:21:44.165144: step: 510/463, loss: 0.0032363124191761017 2023-01-24 03:21:44.747166: step: 512/463, loss: 1.7188669443130493 2023-01-24 03:21:45.334534: step: 514/463, loss: 0.06953755021095276 2023-01-24 03:21:45.946300: step: 516/463, loss: 0.013105669990181923 2023-01-24 03:21:46.629381: step: 518/463, loss: 0.2076960951089859 2023-01-24 03:21:47.192077: step: 520/463, loss: 0.02279459685087204 2023-01-24 03:21:47.787396: step: 522/463, loss: 0.02158598229289055 2023-01-24 03:21:48.372079: step: 524/463, loss: 0.038456112146377563 2023-01-24 03:21:48.942791: step: 526/463, loss: 0.37574759125709534 2023-01-24 03:21:49.513314: step: 528/463, loss: 0.025516856461763382 2023-01-24 03:21:50.197149: step: 530/463, loss: 0.013479000888764858 2023-01-24 03:21:50.847984: step: 532/463, loss: 0.015561453998088837 2023-01-24 03:21:51.398321: step: 534/463, loss: 0.012920614331960678 2023-01-24 03:21:52.025717: step: 536/463, loss: 0.03763693571090698 2023-01-24 03:21:52.658207: step: 538/463, loss: 0.025825072079896927 2023-01-24 03:21:53.206574: step: 540/463, loss: 0.015384487807750702 2023-01-24 03:21:53.833701: step: 542/463, loss: 0.06949108839035034 2023-01-24 03:21:54.484559: step: 544/463, loss: 0.0169373769313097 2023-01-24 03:21:55.092832: step: 546/463, loss: 0.0032806831877678633 2023-01-24 03:21:55.705825: step: 548/463, loss: 0.012654570862650871 2023-01-24 03:21:56.257800: step: 550/463, loss: 0.02029086835682392 2023-01-24 03:21:56.870005: step: 552/463, loss: 0.0030985043849796057 2023-01-24 03:21:57.504611: step: 554/463, loss: 0.08785047382116318 2023-01-24 03:21:58.142480: step: 556/463, loss: 0.01286221481859684 2023-01-24 03:21:58.813674: step: 558/463, loss: 0.007285997737199068 2023-01-24 03:21:59.375864: step: 560/463, loss: 0.004571579862385988 2023-01-24 03:22:00.075638: step: 562/463, loss: 0.005525792948901653 2023-01-24 03:22:00.779065: step: 564/463, loss: 0.047876108437776566 2023-01-24 03:22:01.406832: step: 566/463, loss: 2.84997296333313 2023-01-24 03:22:02.013404: step: 568/463, loss: 0.00041170447366312146 2023-01-24 03:22:02.599298: step: 570/463, loss: 0.0005570273497141898 2023-01-24 03:22:03.173823: step: 572/463, loss: 0.4013631343841553 2023-01-24 03:22:03.818005: step: 574/463, loss: 0.13628098368644714 2023-01-24 03:22:04.421496: step: 576/463, loss: 0.17943322658538818 2023-01-24 03:22:05.043716: step: 578/463, loss: 0.02283461205661297 2023-01-24 03:22:05.677895: step: 580/463, loss: 0.3710094690322876 2023-01-24 03:22:06.255680: step: 582/463, loss: 0.10611604899168015 2023-01-24 03:22:06.912435: step: 584/463, loss: 0.013936948962509632 2023-01-24 03:22:07.602944: step: 586/463, loss: 0.05231943726539612 2023-01-24 03:22:08.275781: step: 588/463, loss: 0.0753253623843193 2023-01-24 03:22:08.882425: step: 590/463, loss: 0.001737541169859469 2023-01-24 03:22:09.507905: step: 592/463, loss: 0.0053075747564435005 2023-01-24 03:22:10.152113: step: 594/463, loss: 0.006139205768704414 2023-01-24 03:22:10.798741: step: 596/463, loss: 0.036545101553201675 2023-01-24 03:22:11.407598: step: 598/463, loss: 0.0047314902767539024 2023-01-24 03:22:11.961314: step: 600/463, loss: 0.01959819905459881 2023-01-24 03:22:12.613419: step: 602/463, loss: 0.1848900467157364 2023-01-24 03:22:13.252774: step: 604/463, loss: 0.03643307834863663 2023-01-24 03:22:13.841632: step: 606/463, loss: 0.030170494690537453 2023-01-24 03:22:14.505650: step: 608/463, loss: 0.022433185949921608 2023-01-24 03:22:15.103192: step: 610/463, loss: 0.008484968915581703 2023-01-24 03:22:15.681542: step: 612/463, loss: 0.0015296782366931438 2023-01-24 03:22:16.304819: step: 614/463, loss: 0.03289848938584328 2023-01-24 03:22:16.891009: step: 616/463, loss: 0.05979551747441292 2023-01-24 03:22:17.501139: step: 618/463, loss: 0.04051965847611427 2023-01-24 03:22:18.256268: step: 620/463, loss: 0.035239219665527344 2023-01-24 03:22:18.897586: step: 622/463, loss: 0.06116524338722229 2023-01-24 03:22:19.529400: step: 624/463, loss: 0.026527203619480133 2023-01-24 03:22:20.138031: step: 626/463, loss: 0.016044294461607933 2023-01-24 03:22:20.694673: step: 628/463, loss: 0.012501475401222706 2023-01-24 03:22:21.271992: step: 630/463, loss: 0.2921847701072693 2023-01-24 03:22:21.870844: step: 632/463, loss: 0.04030440375208855 2023-01-24 03:22:22.495339: step: 634/463, loss: 0.010312661528587341 2023-01-24 03:22:23.062343: step: 636/463, loss: 0.025754885748028755 2023-01-24 03:22:23.678084: step: 638/463, loss: 0.04597608745098114 2023-01-24 03:22:24.267053: step: 640/463, loss: 0.009888525120913982 2023-01-24 03:22:24.917683: step: 642/463, loss: 0.018977848812937737 2023-01-24 03:22:25.518291: step: 644/463, loss: 0.025373727083206177 2023-01-24 03:22:26.104619: step: 646/463, loss: 0.013036086224019527 2023-01-24 03:22:26.717937: step: 648/463, loss: 0.006871116813272238 2023-01-24 03:22:27.328719: step: 650/463, loss: 0.030983146280050278 2023-01-24 03:22:27.916861: step: 652/463, loss: 0.014170871116220951 2023-01-24 03:22:28.515423: step: 654/463, loss: 0.027246158570051193 2023-01-24 03:22:29.033094: step: 656/463, loss: 0.052943792194128036 2023-01-24 03:22:29.595580: step: 658/463, loss: 0.0004469984851311892 2023-01-24 03:22:30.161026: step: 660/463, loss: 0.011634543538093567 2023-01-24 03:22:30.766694: step: 662/463, loss: 0.009565292857587337 2023-01-24 03:22:31.415404: step: 664/463, loss: 0.021424278616905212 2023-01-24 03:22:32.002700: step: 666/463, loss: 0.06298372894525528 2023-01-24 03:22:32.617423: step: 668/463, loss: 0.0040954649448394775 2023-01-24 03:22:33.215167: step: 670/463, loss: 0.01814587414264679 2023-01-24 03:22:33.863051: step: 672/463, loss: 0.03335180506110191 2023-01-24 03:22:34.472089: step: 674/463, loss: 0.004504937678575516 2023-01-24 03:22:35.129879: step: 676/463, loss: 0.22467023134231567 2023-01-24 03:22:35.756989: step: 678/463, loss: 0.048772286623716354 2023-01-24 03:22:36.442661: step: 680/463, loss: 0.18958498537540436 2023-01-24 03:22:37.051404: step: 682/463, loss: 0.06526894122362137 2023-01-24 03:22:37.562710: step: 684/463, loss: 0.0037250046152621508 2023-01-24 03:22:38.136434: step: 686/463, loss: 0.02711670473217964 2023-01-24 03:22:38.722721: step: 688/463, loss: 0.02521621808409691 2023-01-24 03:22:39.287972: step: 690/463, loss: 0.009328854270279408 2023-01-24 03:22:39.890424: step: 692/463, loss: 0.009633393958210945 2023-01-24 03:22:40.536402: step: 694/463, loss: 0.007632388733327389 2023-01-24 03:22:41.121288: step: 696/463, loss: 0.010496960952877998 2023-01-24 03:22:41.895568: step: 698/463, loss: 0.014901124872267246 2023-01-24 03:22:42.535257: step: 700/463, loss: 0.007905877195298672 2023-01-24 03:22:43.148489: step: 702/463, loss: 0.05746883526444435 2023-01-24 03:22:43.762485: step: 704/463, loss: 0.036977849900722504 2023-01-24 03:22:44.395688: step: 706/463, loss: 0.029813863337039948 2023-01-24 03:22:44.971549: step: 708/463, loss: 0.027757974341511726 2023-01-24 03:22:45.541767: step: 710/463, loss: 0.027523649856448174 2023-01-24 03:22:46.125514: step: 712/463, loss: 0.15209656953811646 2023-01-24 03:22:46.807298: step: 714/463, loss: 0.0017175053944811225 2023-01-24 03:22:47.425141: step: 716/463, loss: 0.03918301686644554 2023-01-24 03:22:48.014178: step: 718/463, loss: 0.03300347179174423 2023-01-24 03:22:48.657511: step: 720/463, loss: 0.02014309912919998 2023-01-24 03:22:49.285095: step: 722/463, loss: 0.0004547167045529932 2023-01-24 03:22:49.841608: step: 724/463, loss: 0.017180675640702248 2023-01-24 03:22:50.448768: step: 726/463, loss: 0.04160762578248978 2023-01-24 03:22:51.058674: step: 728/463, loss: 0.027141336351633072 2023-01-24 03:22:51.666140: step: 730/463, loss: 0.015554307028651237 2023-01-24 03:22:52.264073: step: 732/463, loss: 0.10072067379951477 2023-01-24 03:22:52.848599: step: 734/463, loss: 0.00099479709751904 2023-01-24 03:22:53.433330: step: 736/463, loss: 0.07003404945135117 2023-01-24 03:22:54.071689: step: 738/463, loss: 0.11261841654777527 2023-01-24 03:22:54.665074: step: 740/463, loss: 0.010142209939658642 2023-01-24 03:22:55.232937: step: 742/463, loss: 0.01635134406387806 2023-01-24 03:22:55.788655: step: 744/463, loss: 0.005189536605030298 2023-01-24 03:22:56.385874: step: 746/463, loss: 0.007973375730216503 2023-01-24 03:22:56.931047: step: 748/463, loss: 0.033141378313302994 2023-01-24 03:22:57.582930: step: 750/463, loss: 0.022291144356131554 2023-01-24 03:22:58.245043: step: 752/463, loss: 0.04240809381008148 2023-01-24 03:22:58.842309: step: 754/463, loss: 0.02537102811038494 2023-01-24 03:22:59.434797: step: 756/463, loss: 0.017941009253263474 2023-01-24 03:23:00.069868: step: 758/463, loss: 0.009395868517458439 2023-01-24 03:23:00.774148: step: 760/463, loss: 0.2223188430070877 2023-01-24 03:23:01.453498: step: 762/463, loss: 0.1926819384098053 2023-01-24 03:23:02.013333: step: 764/463, loss: 0.0008233282715082169 2023-01-24 03:23:02.661845: step: 766/463, loss: 0.4056546688079834 2023-01-24 03:23:03.309972: step: 768/463, loss: 0.01469552330672741 2023-01-24 03:23:03.940994: step: 770/463, loss: 0.06593307852745056 2023-01-24 03:23:04.589797: step: 772/463, loss: 0.002064867876470089 2023-01-24 03:23:05.160448: step: 774/463, loss: 0.01166010182350874 2023-01-24 03:23:05.834849: step: 776/463, loss: 0.14776545763015747 2023-01-24 03:23:06.430124: step: 778/463, loss: 0.07784542441368103 2023-01-24 03:23:07.048501: step: 780/463, loss: 0.014094438403844833 2023-01-24 03:23:07.687830: step: 782/463, loss: 0.050974804908037186 2023-01-24 03:23:08.364350: step: 784/463, loss: 0.022339699789881706 2023-01-24 03:23:09.014183: step: 786/463, loss: 0.008100991137325764 2023-01-24 03:23:09.654026: step: 788/463, loss: 0.02096448838710785 2023-01-24 03:23:10.271044: step: 790/463, loss: 0.03886687755584717 2023-01-24 03:23:10.867442: step: 792/463, loss: 0.0005544420564547181 2023-01-24 03:23:11.426246: step: 794/463, loss: 0.0037787563633173704 2023-01-24 03:23:12.080912: step: 796/463, loss: 0.00595829589292407 2023-01-24 03:23:12.751615: step: 798/463, loss: 0.023152174428105354 2023-01-24 03:23:13.407573: step: 800/463, loss: 0.06829645484685898 2023-01-24 03:23:14.009733: step: 802/463, loss: 0.025881821289658546 2023-01-24 03:23:14.606449: step: 804/463, loss: 0.08149004727602005 2023-01-24 03:23:15.341904: step: 806/463, loss: 0.0035178614780306816 2023-01-24 03:23:16.012884: step: 808/463, loss: 0.010716564022004604 2023-01-24 03:23:16.626273: step: 810/463, loss: 0.11442716419696808 2023-01-24 03:23:17.295714: step: 812/463, loss: 0.003705686191096902 2023-01-24 03:23:17.862356: step: 814/463, loss: 0.03979633376002312 2023-01-24 03:23:18.492882: step: 816/463, loss: 0.12115366011857986 2023-01-24 03:23:19.028184: step: 818/463, loss: 0.0043911864049732685 2023-01-24 03:23:19.652943: step: 820/463, loss: 0.0011728814570233226 2023-01-24 03:23:20.276437: step: 822/463, loss: 0.18231798708438873 2023-01-24 03:23:20.767439: step: 824/463, loss: 0.012082516215741634 2023-01-24 03:23:21.341816: step: 826/463, loss: 0.05960536748170853 2023-01-24 03:23:22.042334: step: 828/463, loss: 0.02781391330063343 2023-01-24 03:23:22.674478: step: 830/463, loss: 0.02283107303082943 2023-01-24 03:23:23.313354: step: 832/463, loss: 0.03302788361907005 2023-01-24 03:23:24.058333: step: 834/463, loss: 0.018646804615855217 2023-01-24 03:23:24.698962: step: 836/463, loss: 0.0058774747885763645 2023-01-24 03:23:25.343684: step: 838/463, loss: 0.01898062415421009 2023-01-24 03:23:26.042054: step: 840/463, loss: 0.006672234740108252 2023-01-24 03:23:26.616213: step: 842/463, loss: 0.037453487515449524 2023-01-24 03:23:27.263764: step: 844/463, loss: 0.07097356021404266 2023-01-24 03:23:27.931083: step: 846/463, loss: 0.0280532855540514 2023-01-24 03:23:28.600626: step: 848/463, loss: 0.09504487365484238 2023-01-24 03:23:29.279661: step: 850/463, loss: 0.03309900313615799 2023-01-24 03:23:29.819720: step: 852/463, loss: 0.028237462043762207 2023-01-24 03:23:30.446443: step: 854/463, loss: 0.009738100692629814 2023-01-24 03:23:31.058075: step: 856/463, loss: 0.006108644884079695 2023-01-24 03:23:31.699567: step: 858/463, loss: 0.05223897472023964 2023-01-24 03:23:32.434930: step: 860/463, loss: 0.05805351957678795 2023-01-24 03:23:33.033453: step: 862/463, loss: 0.028307832777500153 2023-01-24 03:23:33.714292: step: 864/463, loss: 0.015082805417478085 2023-01-24 03:23:34.337178: step: 866/463, loss: 0.011649715714156628 2023-01-24 03:23:35.031158: step: 868/463, loss: 0.004242930095642805 2023-01-24 03:23:35.627764: step: 870/463, loss: 0.04554912820458412 2023-01-24 03:23:36.189463: step: 872/463, loss: 0.014860392548143864 2023-01-24 03:23:36.815098: step: 874/463, loss: 0.06247008591890335 2023-01-24 03:23:37.477884: step: 876/463, loss: 0.07359500229358673 2023-01-24 03:23:38.139857: step: 878/463, loss: 0.017384247854351997 2023-01-24 03:23:38.794354: step: 880/463, loss: 0.20369195938110352 2023-01-24 03:23:39.402934: step: 882/463, loss: 0.03452306240797043 2023-01-24 03:23:40.029248: step: 884/463, loss: 0.002270209603011608 2023-01-24 03:23:40.735090: step: 886/463, loss: 0.011534066870808601 2023-01-24 03:23:41.297285: step: 888/463, loss: 0.0021770147141069174 2023-01-24 03:23:41.928121: step: 890/463, loss: 0.004283386282622814 2023-01-24 03:23:42.481484: step: 892/463, loss: 0.01173703558743 2023-01-24 03:23:43.063572: step: 894/463, loss: 0.005979097448289394 2023-01-24 03:23:43.678699: step: 896/463, loss: 0.004212734289467335 2023-01-24 03:23:44.290744: step: 898/463, loss: 0.016311684623360634 2023-01-24 03:23:44.925452: step: 900/463, loss: 0.002907142508774996 2023-01-24 03:23:45.533193: step: 902/463, loss: 0.035801492631435394 2023-01-24 03:23:46.128529: step: 904/463, loss: 0.01777712069451809 2023-01-24 03:23:46.807709: step: 906/463, loss: 0.07303626835346222 2023-01-24 03:23:47.566721: step: 908/463, loss: 0.011776966974139214 2023-01-24 03:23:48.214280: step: 910/463, loss: 0.010873616673052311 2023-01-24 03:23:48.809939: step: 912/463, loss: 0.03174722567200661 2023-01-24 03:23:49.465051: step: 914/463, loss: 0.016076233237981796 2023-01-24 03:23:50.065267: step: 916/463, loss: 0.014363965019583702 2023-01-24 03:23:50.661786: step: 918/463, loss: 0.036554571241140366 2023-01-24 03:23:51.322776: step: 920/463, loss: 0.015861066058278084 2023-01-24 03:23:51.958229: step: 922/463, loss: 0.012140725739300251 2023-01-24 03:23:52.527344: step: 924/463, loss: 0.032698407769203186 2023-01-24 03:23:53.127641: step: 926/463, loss: 0.013550748117268085 ================================================== Loss: 0.068 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34350105269088993, 'r': 0.33307217822589136, 'f1': 0.33820623877657946}, 'combined': 0.24920459699326905, 'epoch': 27} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3383324329044517, 'r': 0.3815568889464726, 'f1': 0.3586469936706369}, 'combined': 0.2779943204528382, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.326763069358178, 'r': 0.34970468902848656, 'f1': 0.3378448599780246}, 'combined': 0.24893831787854442, 'epoch': 27} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32708585047416266, 'r': 0.38450625253350557, 'f1': 0.3534793432669659}, 'combined': 0.2739887732499927, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3160580554219969, 'r': 0.33045159115278994, 'f1': 0.3230945983998521}, 'combined': 0.23806970408410155, 'epoch': 27} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33094568259027224, 'r': 0.3799183433412225, 'f1': 0.3537451070220369}, 'combined': 0.27419477195009556, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3225806451612903, 'r': 0.2857142857142857, 'f1': 0.303030303030303}, 'combined': 0.202020202020202, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.30357142857142855, 'r': 0.3695652173913043, 'f1': 0.3333333333333333}, 'combined': 0.16666666666666666, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:26:28.066633: step: 2/463, loss: 0.29374492168426514 2023-01-24 03:26:28.708406: step: 4/463, loss: 0.008950339630246162 2023-01-24 03:26:29.214709: step: 6/463, loss: 0.0025884020142257214 2023-01-24 03:26:29.770905: step: 8/463, loss: 0.01560173463076353 2023-01-24 03:26:30.324739: step: 10/463, loss: 0.0012035038089379668 2023-01-24 03:26:30.868817: step: 12/463, loss: 0.013082488439977169 2023-01-24 03:26:31.505801: step: 14/463, loss: 0.0173418577760458 2023-01-24 03:26:32.223804: step: 16/463, loss: 0.030459564179182053 2023-01-24 03:26:32.808283: step: 18/463, loss: 0.0023555199149996042 2023-01-24 03:26:33.424357: step: 20/463, loss: 0.017555763944983482 2023-01-24 03:26:34.062954: step: 22/463, loss: 3.182292938232422 2023-01-24 03:26:34.688122: step: 24/463, loss: 0.0032870571594685316 2023-01-24 03:26:35.413591: step: 26/463, loss: 0.04132572188973427 2023-01-24 03:26:36.016622: step: 28/463, loss: 0.006120634730905294 2023-01-24 03:26:36.597725: step: 30/463, loss: 0.10707765817642212 2023-01-24 03:26:37.203767: step: 32/463, loss: 0.011916041374206543 2023-01-24 03:26:37.842184: step: 34/463, loss: 0.011357552371919155 2023-01-24 03:26:38.446926: step: 36/463, loss: 0.04864846169948578 2023-01-24 03:26:39.084903: step: 38/463, loss: 0.18586254119873047 2023-01-24 03:26:39.688751: step: 40/463, loss: 0.07336920499801636 2023-01-24 03:26:40.275893: step: 42/463, loss: 0.01845753751695156 2023-01-24 03:26:40.871721: step: 44/463, loss: 0.021225254982709885 2023-01-24 03:26:41.442090: step: 46/463, loss: 0.002981279045343399 2023-01-24 03:26:41.973631: step: 48/463, loss: 0.0034501601476222277 2023-01-24 03:26:42.504880: step: 50/463, loss: 0.025557447224855423 2023-01-24 03:26:43.162298: step: 52/463, loss: 0.005544594023376703 2023-01-24 03:26:43.850041: step: 54/463, loss: 0.022663110867142677 2023-01-24 03:26:44.509596: step: 56/463, loss: 0.03357920050621033 2023-01-24 03:26:45.175615: step: 58/463, loss: 0.03633473441004753 2023-01-24 03:26:45.823903: step: 60/463, loss: 0.041334718465805054 2023-01-24 03:26:46.460376: step: 62/463, loss: 0.00767884636297822 2023-01-24 03:26:47.044565: step: 64/463, loss: 0.030544543638825417 2023-01-24 03:26:47.657078: step: 66/463, loss: 0.024337437003850937 2023-01-24 03:26:48.289308: step: 68/463, loss: 0.030278237536549568 2023-01-24 03:26:48.935447: step: 70/463, loss: 0.004849262069910765 2023-01-24 03:26:49.565277: step: 72/463, loss: 0.003848511492833495 2023-01-24 03:26:50.162802: step: 74/463, loss: 0.01226518303155899 2023-01-24 03:26:50.851784: step: 76/463, loss: 0.04755386337637901 2023-01-24 03:26:51.525002: step: 78/463, loss: 0.033736661076545715 2023-01-24 03:26:52.145104: step: 80/463, loss: 0.007528581656515598 2023-01-24 03:26:52.748273: step: 82/463, loss: 0.06172417104244232 2023-01-24 03:26:53.340226: step: 84/463, loss: 0.05943514406681061 2023-01-24 03:26:54.030637: step: 86/463, loss: 0.0009893669048324227 2023-01-24 03:26:54.765499: step: 88/463, loss: 0.0063304295763373375 2023-01-24 03:26:55.328105: step: 90/463, loss: 0.007008117623627186 2023-01-24 03:26:55.975840: step: 92/463, loss: 0.06716267019510269 2023-01-24 03:26:56.570433: step: 94/463, loss: 0.007413616869598627 2023-01-24 03:26:57.237229: step: 96/463, loss: 0.12252495437860489 2023-01-24 03:26:57.860456: step: 98/463, loss: 0.0070847985334694386 2023-01-24 03:26:58.433514: step: 100/463, loss: 0.020843129605054855 2023-01-24 03:26:59.052272: step: 102/463, loss: 0.005596775561571121 2023-01-24 03:26:59.618658: step: 104/463, loss: 0.007876403629779816 2023-01-24 03:27:00.410142: step: 106/463, loss: 4.0363924199482426e-05 2023-01-24 03:27:01.119867: step: 108/463, loss: 0.07214424014091492 2023-01-24 03:27:01.791297: step: 110/463, loss: 0.005892504006624222 2023-01-24 03:27:02.461394: step: 112/463, loss: 0.031808339059352875 2023-01-24 03:27:03.104597: step: 114/463, loss: 0.016272246837615967 2023-01-24 03:27:03.740135: step: 116/463, loss: 0.030042799189686775 2023-01-24 03:27:04.376605: step: 118/463, loss: 0.01700497791171074 2023-01-24 03:27:04.999535: step: 120/463, loss: 0.013594040647149086 2023-01-24 03:27:05.632409: step: 122/463, loss: 0.010613398626446724 2023-01-24 03:27:06.233541: step: 124/463, loss: 0.007190784905105829 2023-01-24 03:27:06.894459: step: 126/463, loss: 0.011261575855314732 2023-01-24 03:27:07.597276: step: 128/463, loss: 0.006488234270364046 2023-01-24 03:27:08.184486: step: 130/463, loss: 0.06378525495529175 2023-01-24 03:27:08.803422: step: 132/463, loss: 0.01844719611108303 2023-01-24 03:27:09.414092: step: 134/463, loss: 0.0003200969658792019 2023-01-24 03:27:10.055336: step: 136/463, loss: 0.04771817475557327 2023-01-24 03:27:10.705995: step: 138/463, loss: 0.02819829247891903 2023-01-24 03:27:11.384686: step: 140/463, loss: 0.0033267666585743427 2023-01-24 03:27:11.981078: step: 142/463, loss: 0.03953809663653374 2023-01-24 03:27:12.640040: step: 144/463, loss: 0.04431101679801941 2023-01-24 03:27:13.232760: step: 146/463, loss: 0.005385186523199081 2023-01-24 03:27:13.775486: step: 148/463, loss: 0.0013555175391957164 2023-01-24 03:27:14.313093: step: 150/463, loss: 0.003086766693741083 2023-01-24 03:27:14.946270: step: 152/463, loss: 0.06461112946271896 2023-01-24 03:27:15.608054: step: 154/463, loss: 0.011391893960535526 2023-01-24 03:27:16.202276: step: 156/463, loss: 0.0019549208227545023 2023-01-24 03:27:16.825116: step: 158/463, loss: 0.053492654114961624 2023-01-24 03:27:17.450242: step: 160/463, loss: 0.0008113268995657563 2023-01-24 03:27:18.060917: step: 162/463, loss: 0.004094821400940418 2023-01-24 03:27:18.676403: step: 164/463, loss: 0.21801036596298218 2023-01-24 03:27:19.282025: step: 166/463, loss: 0.01630508340895176 2023-01-24 03:27:19.893698: step: 168/463, loss: 0.0029705564957112074 2023-01-24 03:27:20.492248: step: 170/463, loss: 0.23823057115077972 2023-01-24 03:27:21.051386: step: 172/463, loss: 0.01696964167058468 2023-01-24 03:27:21.628760: step: 174/463, loss: 0.0002627323556225747 2023-01-24 03:27:22.232126: step: 176/463, loss: 0.03307105600833893 2023-01-24 03:27:22.798912: step: 178/463, loss: 0.31701192259788513 2023-01-24 03:27:23.440178: step: 180/463, loss: 0.003947002813220024 2023-01-24 03:27:24.042334: step: 182/463, loss: 0.34261614084243774 2023-01-24 03:27:24.677324: step: 184/463, loss: 0.0035143662244081497 2023-01-24 03:27:25.373948: step: 186/463, loss: 0.013044084422290325 2023-01-24 03:27:26.015547: step: 188/463, loss: 0.0020450023002922535 2023-01-24 03:27:26.649273: step: 190/463, loss: 0.018712855875492096 2023-01-24 03:27:27.292420: step: 192/463, loss: 0.01126101054251194 2023-01-24 03:27:27.914192: step: 194/463, loss: 0.006964448373764753 2023-01-24 03:27:28.514175: step: 196/463, loss: 0.02322833426296711 2023-01-24 03:27:29.111751: step: 198/463, loss: 0.005938366986811161 2023-01-24 03:27:29.697455: step: 200/463, loss: 0.0042057582177221775 2023-01-24 03:27:30.321340: step: 202/463, loss: 0.028879977762699127 2023-01-24 03:27:31.061316: step: 204/463, loss: 0.012913842685520649 2023-01-24 03:27:31.715824: step: 206/463, loss: 0.009799282997846603 2023-01-24 03:27:32.258417: step: 208/463, loss: 0.03414357081055641 2023-01-24 03:27:32.919799: step: 210/463, loss: 0.14262324571609497 2023-01-24 03:27:33.558834: step: 212/463, loss: 0.029071707278490067 2023-01-24 03:27:34.114857: step: 214/463, loss: 0.00010934104648185894 2023-01-24 03:27:34.746236: step: 216/463, loss: 0.07483761012554169 2023-01-24 03:27:35.428083: step: 218/463, loss: 0.01736161857843399 2023-01-24 03:27:36.032138: step: 220/463, loss: 0.0006832100916653872 2023-01-24 03:27:36.635681: step: 222/463, loss: 0.04779037833213806 2023-01-24 03:27:37.259287: step: 224/463, loss: 0.006258423440158367 2023-01-24 03:27:37.891399: step: 226/463, loss: 0.012496327050030231 2023-01-24 03:27:38.499807: step: 228/463, loss: 0.02091159299015999 2023-01-24 03:27:39.151261: step: 230/463, loss: 0.009413516148924828 2023-01-24 03:27:39.744124: step: 232/463, loss: 0.003302746918052435 2023-01-24 03:27:40.369782: step: 234/463, loss: 0.019160253927111626 2023-01-24 03:27:40.911464: step: 236/463, loss: 0.009196114726364613 2023-01-24 03:27:41.581458: step: 238/463, loss: 0.004442290402948856 2023-01-24 03:27:42.158029: step: 240/463, loss: 0.028344832360744476 2023-01-24 03:27:42.777076: step: 242/463, loss: 0.0005288837128318846 2023-01-24 03:27:43.391600: step: 244/463, loss: 0.03255986422300339 2023-01-24 03:27:43.987061: step: 246/463, loss: 0.05627922713756561 2023-01-24 03:27:44.610587: step: 248/463, loss: 0.21724070608615875 2023-01-24 03:27:45.212569: step: 250/463, loss: 0.025468379259109497 2023-01-24 03:27:45.896100: step: 252/463, loss: 0.03477541729807854 2023-01-24 03:27:46.586667: step: 254/463, loss: 0.009381677024066448 2023-01-24 03:27:47.235785: step: 256/463, loss: 0.02722826972603798 2023-01-24 03:27:47.865442: step: 258/463, loss: 0.01369366142898798 2023-01-24 03:27:48.592924: step: 260/463, loss: 0.00316370720975101 2023-01-24 03:27:49.190404: step: 262/463, loss: 0.009699487127363682 2023-01-24 03:27:49.836117: step: 264/463, loss: 0.018967123702168465 2023-01-24 03:27:50.466528: step: 266/463, loss: 0.013150692917406559 2023-01-24 03:27:51.040634: step: 268/463, loss: 0.0017656952841207385 2023-01-24 03:27:51.611177: step: 270/463, loss: 0.005274574737995863 2023-01-24 03:27:52.235678: step: 272/463, loss: 0.04965560510754585 2023-01-24 03:27:52.808336: step: 274/463, loss: 0.0036977967247366905 2023-01-24 03:27:53.405181: step: 276/463, loss: 0.02303214557468891 2023-01-24 03:27:53.997667: step: 278/463, loss: 0.0029469032306224108 2023-01-24 03:27:54.620861: step: 280/463, loss: 0.02842818759381771 2023-01-24 03:27:55.183181: step: 282/463, loss: 0.007984843105077744 2023-01-24 03:27:55.811851: step: 284/463, loss: 0.00031438929727301 2023-01-24 03:27:56.443256: step: 286/463, loss: 0.00906501803547144 2023-01-24 03:27:57.011678: step: 288/463, loss: 0.0017406274564564228 2023-01-24 03:27:57.588004: step: 290/463, loss: 0.036236152052879333 2023-01-24 03:27:58.176079: step: 292/463, loss: 0.004267120733857155 2023-01-24 03:27:58.799892: step: 294/463, loss: 0.011889545246958733 2023-01-24 03:27:59.453677: step: 296/463, loss: 0.008746356703341007 2023-01-24 03:28:00.177164: step: 298/463, loss: 0.02594771794974804 2023-01-24 03:28:00.828411: step: 300/463, loss: 0.0295711699873209 2023-01-24 03:28:01.424857: step: 302/463, loss: 0.06577367335557938 2023-01-24 03:28:02.100277: step: 304/463, loss: 0.007691263686865568 2023-01-24 03:28:02.755174: step: 306/463, loss: 0.026845639571547508 2023-01-24 03:28:03.438057: step: 308/463, loss: 0.0009184012887999415 2023-01-24 03:28:04.059171: step: 310/463, loss: 0.04784321039915085 2023-01-24 03:28:04.770892: step: 312/463, loss: 0.8103326559066772 2023-01-24 03:28:05.375496: step: 314/463, loss: 0.04451471194624901 2023-01-24 03:28:05.984253: step: 316/463, loss: 0.20168262720108032 2023-01-24 03:28:06.577439: step: 318/463, loss: 0.007925400510430336 2023-01-24 03:28:07.197614: step: 320/463, loss: 0.01127647515386343 2023-01-24 03:28:07.868287: step: 322/463, loss: 0.08415776491165161 2023-01-24 03:28:08.499843: step: 324/463, loss: 0.03440093994140625 2023-01-24 03:28:09.112774: step: 326/463, loss: 0.05367375910282135 2023-01-24 03:28:09.649161: step: 328/463, loss: 0.0526382252573967 2023-01-24 03:28:10.251151: step: 330/463, loss: 0.026704581454396248 2023-01-24 03:28:10.867887: step: 332/463, loss: 0.13789011538028717 2023-01-24 03:28:11.469471: step: 334/463, loss: 0.036408569663763046 2023-01-24 03:28:12.160352: step: 336/463, loss: 0.013833659701049328 2023-01-24 03:28:12.723778: step: 338/463, loss: 0.008015003986656666 2023-01-24 03:28:13.350132: step: 340/463, loss: 0.04638589173555374 2023-01-24 03:28:13.953216: step: 342/463, loss: 0.011765706352889538 2023-01-24 03:28:14.571830: step: 344/463, loss: 0.004654136952012777 2023-01-24 03:28:15.143094: step: 346/463, loss: 0.01899045892059803 2023-01-24 03:28:15.674807: step: 348/463, loss: 0.02097213640809059 2023-01-24 03:28:16.306908: step: 350/463, loss: 0.014730531722307205 2023-01-24 03:28:16.857406: step: 352/463, loss: 0.010041667148470879 2023-01-24 03:28:17.458397: step: 354/463, loss: 0.03531422093510628 2023-01-24 03:28:18.043289: step: 356/463, loss: 0.009784921072423458 2023-01-24 03:28:18.597369: step: 358/463, loss: 0.006354726385325193 2023-01-24 03:28:19.194347: step: 360/463, loss: 0.04932844638824463 2023-01-24 03:28:19.925325: step: 362/463, loss: 0.04441177099943161 2023-01-24 03:28:20.572409: step: 364/463, loss: 0.3677092492580414 2023-01-24 03:28:21.209019: step: 366/463, loss: 0.19194917380809784 2023-01-24 03:28:22.000520: step: 368/463, loss: 0.0059125991538167 2023-01-24 03:28:22.581133: step: 370/463, loss: 0.05111651495099068 2023-01-24 03:28:23.159752: step: 372/463, loss: 0.029977114871144295 2023-01-24 03:28:23.769316: step: 374/463, loss: 0.03323173522949219 2023-01-24 03:28:24.411229: step: 376/463, loss: 0.008927146904170513 2023-01-24 03:28:25.035496: step: 378/463, loss: 0.005805583670735359 2023-01-24 03:28:25.638353: step: 380/463, loss: 0.03207562118768692 2023-01-24 03:28:26.286695: step: 382/463, loss: 0.00576637452468276 2023-01-24 03:28:26.895998: step: 384/463, loss: 0.09505612403154373 2023-01-24 03:28:27.526778: step: 386/463, loss: 0.27356088161468506 2023-01-24 03:28:28.126381: step: 388/463, loss: 0.0012583467178046703 2023-01-24 03:28:28.722216: step: 390/463, loss: 0.020944418385624886 2023-01-24 03:28:29.339406: step: 392/463, loss: 0.10776685178279877 2023-01-24 03:28:29.926700: step: 394/463, loss: 0.009196275845170021 2023-01-24 03:28:30.574326: step: 396/463, loss: 0.01127785537391901 2023-01-24 03:28:31.224390: step: 398/463, loss: 0.014327704906463623 2023-01-24 03:28:31.860213: step: 400/463, loss: 0.008281395770609379 2023-01-24 03:28:32.499575: step: 402/463, loss: 0.0006575637962669134 2023-01-24 03:28:33.122393: step: 404/463, loss: 0.21968667209148407 2023-01-24 03:28:33.818580: step: 406/463, loss: 0.1207488626241684 2023-01-24 03:28:34.428583: step: 408/463, loss: 0.06901227682828903 2023-01-24 03:28:35.041634: step: 410/463, loss: 0.021813420578837395 2023-01-24 03:28:35.640192: step: 412/463, loss: 0.005912352818995714 2023-01-24 03:28:36.254261: step: 414/463, loss: 0.024966519325971603 2023-01-24 03:28:36.865147: step: 416/463, loss: 0.05034935846924782 2023-01-24 03:28:37.516906: step: 418/463, loss: 0.012702380307018757 2023-01-24 03:28:38.104734: step: 420/463, loss: 0.021927962079644203 2023-01-24 03:28:38.778614: step: 422/463, loss: 0.08018112182617188 2023-01-24 03:28:39.452324: step: 424/463, loss: 0.21924570202827454 2023-01-24 03:28:40.061171: step: 426/463, loss: 0.10328881442546844 2023-01-24 03:28:40.639121: step: 428/463, loss: 0.05289173871278763 2023-01-24 03:28:41.199655: step: 430/463, loss: 0.002373060444369912 2023-01-24 03:28:41.789050: step: 432/463, loss: 0.005188320763409138 2023-01-24 03:28:42.382649: step: 434/463, loss: 0.019843287765979767 2023-01-24 03:28:43.044126: step: 436/463, loss: 0.0008458858937956393 2023-01-24 03:28:43.684357: step: 438/463, loss: 0.06668374687433243 2023-01-24 03:28:44.309295: step: 440/463, loss: 0.037590451538562775 2023-01-24 03:28:44.822900: step: 442/463, loss: 0.028690610080957413 2023-01-24 03:28:45.415576: step: 444/463, loss: 0.017187584191560745 2023-01-24 03:28:46.058241: step: 446/463, loss: 0.08459227532148361 2023-01-24 03:28:46.614775: step: 448/463, loss: 0.07163655757904053 2023-01-24 03:28:47.146212: step: 450/463, loss: 0.005111805163323879 2023-01-24 03:28:47.770717: step: 452/463, loss: 0.01331639476120472 2023-01-24 03:28:48.403006: step: 454/463, loss: 0.009346513077616692 2023-01-24 03:28:48.990505: step: 456/463, loss: 0.020700331777334213 2023-01-24 03:28:49.668578: step: 458/463, loss: 0.00588577426970005 2023-01-24 03:28:50.275648: step: 460/463, loss: 0.0042933207005262375 2023-01-24 03:28:50.954650: step: 462/463, loss: 0.023354649543762207 2023-01-24 03:28:51.545233: step: 464/463, loss: 0.5120725035667419 2023-01-24 03:28:52.086972: step: 466/463, loss: 0.0026860798243433237 2023-01-24 03:28:52.713274: step: 468/463, loss: 0.0032239265274256468 2023-01-24 03:28:53.404707: step: 470/463, loss: 0.059078581631183624 2023-01-24 03:28:54.047864: step: 472/463, loss: 0.011114481836557388 2023-01-24 03:28:54.632310: step: 474/463, loss: 0.00791896041482687 2023-01-24 03:28:55.353919: step: 476/463, loss: 0.012504028156399727 2023-01-24 03:28:55.890591: step: 478/463, loss: 0.0002504971926100552 2023-01-24 03:28:56.501583: step: 480/463, loss: 0.012548841536045074 2023-01-24 03:28:56.989783: step: 482/463, loss: 0.0019528002012521029 2023-01-24 03:28:57.565792: step: 484/463, loss: 0.013973971828818321 2023-01-24 03:28:58.234316: step: 486/463, loss: 1.9603378772735596 2023-01-24 03:28:58.893357: step: 488/463, loss: 0.029213057830929756 2023-01-24 03:28:59.489506: step: 490/463, loss: 0.007589337415993214 2023-01-24 03:29:00.112717: step: 492/463, loss: 0.0017818623455241323 2023-01-24 03:29:00.705627: step: 494/463, loss: 0.06031782552599907 2023-01-24 03:29:01.286711: step: 496/463, loss: 0.007109189406037331 2023-01-24 03:29:01.906738: step: 498/463, loss: 0.03351876884698868 2023-01-24 03:29:02.448617: step: 500/463, loss: 0.0027931872755289078 2023-01-24 03:29:03.182628: step: 502/463, loss: 4.954287528991699 2023-01-24 03:29:03.786912: step: 504/463, loss: 0.08425292372703552 2023-01-24 03:29:04.336888: step: 506/463, loss: 0.04355937987565994 2023-01-24 03:29:04.961876: step: 508/463, loss: 0.024976037442684174 2023-01-24 03:29:05.562191: step: 510/463, loss: 0.017483292147517204 2023-01-24 03:29:06.122076: step: 512/463, loss: 0.007971568964421749 2023-01-24 03:29:06.687773: step: 514/463, loss: 0.003153902478516102 2023-01-24 03:29:07.296008: step: 516/463, loss: 0.014386521652340889 2023-01-24 03:29:07.978432: step: 518/463, loss: 0.016516188159585 2023-01-24 03:29:08.645854: step: 520/463, loss: 0.05810907483100891 2023-01-24 03:29:09.228930: step: 522/463, loss: 0.02266695536673069 2023-01-24 03:29:09.848060: step: 524/463, loss: 0.0251521784812212 2023-01-24 03:29:10.494922: step: 526/463, loss: 0.4154609143733978 2023-01-24 03:29:11.068317: step: 528/463, loss: 0.0012287950376048684 2023-01-24 03:29:11.749202: step: 530/463, loss: 0.014101866632699966 2023-01-24 03:29:12.373719: step: 532/463, loss: 0.03135546296834946 2023-01-24 03:29:12.905325: step: 534/463, loss: 0.022536693140864372 2023-01-24 03:29:13.487476: step: 536/463, loss: 0.004082603845745325 2023-01-24 03:29:14.078793: step: 538/463, loss: 0.008292187005281448 2023-01-24 03:29:14.708678: step: 540/463, loss: 0.011333576403558254 2023-01-24 03:29:15.346364: step: 542/463, loss: 0.0026401199866086245 2023-01-24 03:29:15.962866: step: 544/463, loss: 0.49489450454711914 2023-01-24 03:29:16.619273: step: 546/463, loss: 0.0016717016696929932 2023-01-24 03:29:17.195171: step: 548/463, loss: 0.005759804509580135 2023-01-24 03:29:17.827945: step: 550/463, loss: 0.05441486835479736 2023-01-24 03:29:18.397472: step: 552/463, loss: 0.012170879170298576 2023-01-24 03:29:18.920804: step: 554/463, loss: 1.1469770470284857e-05 2023-01-24 03:29:19.518354: step: 556/463, loss: 0.1692005842924118 2023-01-24 03:29:20.158670: step: 558/463, loss: 0.0076745604164898396 2023-01-24 03:29:20.779488: step: 560/463, loss: 0.23432189226150513 2023-01-24 03:29:21.382969: step: 562/463, loss: 0.028413010761141777 2023-01-24 03:29:21.967653: step: 564/463, loss: 0.03602062910795212 2023-01-24 03:29:22.602247: step: 566/463, loss: 0.003380877897143364 2023-01-24 03:29:23.213257: step: 568/463, loss: 0.0471382662653923 2023-01-24 03:29:23.792941: step: 570/463, loss: 0.012956744059920311 2023-01-24 03:29:24.444861: step: 572/463, loss: 0.015133941546082497 2023-01-24 03:29:24.983302: step: 574/463, loss: 0.03462939336895943 2023-01-24 03:29:25.570015: step: 576/463, loss: 0.009973033331334591 2023-01-24 03:29:26.224055: step: 578/463, loss: 0.004711265210062265 2023-01-24 03:29:26.768437: step: 580/463, loss: 0.005789301358163357 2023-01-24 03:29:27.315409: step: 582/463, loss: 0.035336632281541824 2023-01-24 03:29:27.911453: step: 584/463, loss: 0.15327401459217072 2023-01-24 03:29:28.489022: step: 586/463, loss: 0.2596665620803833 2023-01-24 03:29:29.092571: step: 588/463, loss: 0.01266113668680191 2023-01-24 03:29:29.727322: step: 590/463, loss: 0.00028796232072636485 2023-01-24 03:29:30.357235: step: 592/463, loss: 0.05749647319316864 2023-01-24 03:29:31.014939: step: 594/463, loss: 0.06660754233598709 2023-01-24 03:29:31.593035: step: 596/463, loss: 0.006729778368026018 2023-01-24 03:29:32.259275: step: 598/463, loss: 0.006399865727871656 2023-01-24 03:29:32.869845: step: 600/463, loss: 0.04109904170036316 2023-01-24 03:29:33.510883: step: 602/463, loss: 0.004981099162250757 2023-01-24 03:29:34.053996: step: 604/463, loss: 0.04519573971629143 2023-01-24 03:29:34.615492: step: 606/463, loss: 0.009017454460263252 2023-01-24 03:29:35.165137: step: 608/463, loss: 0.00037248252192512155 2023-01-24 03:29:35.752253: step: 610/463, loss: 0.13918378949165344 2023-01-24 03:29:36.388717: step: 612/463, loss: 0.00314846052788198 2023-01-24 03:29:37.012132: step: 614/463, loss: 0.06371580064296722 2023-01-24 03:29:37.560003: step: 616/463, loss: 0.016104543581604958 2023-01-24 03:29:38.176299: step: 618/463, loss: 0.00015639570483472198 2023-01-24 03:29:38.810582: step: 620/463, loss: 0.036811381578445435 2023-01-24 03:29:39.412700: step: 622/463, loss: 0.012018936686217785 2023-01-24 03:29:40.090803: step: 624/463, loss: 0.028855066746473312 2023-01-24 03:29:40.716926: step: 626/463, loss: 0.006511532701551914 2023-01-24 03:29:41.316966: step: 628/463, loss: 0.032886989414691925 2023-01-24 03:29:41.908663: step: 630/463, loss: 0.006207230035215616 2023-01-24 03:29:42.546230: step: 632/463, loss: 0.043157126754522324 2023-01-24 03:29:43.189247: step: 634/463, loss: 0.015785163268446922 2023-01-24 03:29:43.771448: step: 636/463, loss: 0.008867550641298294 2023-01-24 03:29:44.467829: step: 638/463, loss: 0.01249113492667675 2023-01-24 03:29:45.082407: step: 640/463, loss: 0.0402577668428421 2023-01-24 03:29:45.646357: step: 642/463, loss: 0.04310372471809387 2023-01-24 03:29:46.318746: step: 644/463, loss: 0.6034498810768127 2023-01-24 03:29:46.945714: step: 646/463, loss: 0.0632987916469574 2023-01-24 03:29:47.524935: step: 648/463, loss: 0.026047512888908386 2023-01-24 03:29:48.141851: step: 650/463, loss: 0.0029075623024255037 2023-01-24 03:29:48.739837: step: 652/463, loss: 0.007010980974882841 2023-01-24 03:29:49.283432: step: 654/463, loss: 0.0363411121070385 2023-01-24 03:29:50.005900: step: 656/463, loss: 0.0012821756536141038 2023-01-24 03:29:50.628801: step: 658/463, loss: 0.005994519684463739 2023-01-24 03:29:51.206959: step: 660/463, loss: 0.0018810039618983865 2023-01-24 03:29:51.799016: step: 662/463, loss: 0.009131263941526413 2023-01-24 03:29:52.409156: step: 664/463, loss: 0.029725870117545128 2023-01-24 03:29:53.002985: step: 666/463, loss: 0.002402053214609623 2023-01-24 03:29:53.606854: step: 668/463, loss: 0.00358913978561759 2023-01-24 03:29:54.221926: step: 670/463, loss: 0.02485659159719944 2023-01-24 03:29:54.840983: step: 672/463, loss: 0.02293873205780983 2023-01-24 03:29:55.497762: step: 674/463, loss: 0.0862886905670166 2023-01-24 03:29:56.145400: step: 676/463, loss: 0.005062241107225418 2023-01-24 03:29:56.805811: step: 678/463, loss: 0.06858204305171967 2023-01-24 03:29:57.346205: step: 680/463, loss: 0.027368837967514992 2023-01-24 03:29:57.967803: step: 682/463, loss: 0.11823952198028564 2023-01-24 03:29:58.570019: step: 684/463, loss: 0.009590024128556252 2023-01-24 03:29:59.194314: step: 686/463, loss: 0.09056219458580017 2023-01-24 03:29:59.802321: step: 688/463, loss: 0.012177771888673306 2023-01-24 03:30:00.431965: step: 690/463, loss: 0.013546637259423733 2023-01-24 03:30:01.023873: step: 692/463, loss: 0.06701210886240005 2023-01-24 03:30:01.691785: step: 694/463, loss: 0.05578470230102539 2023-01-24 03:30:02.242321: step: 696/463, loss: 0.007104128133505583 2023-01-24 03:30:02.827241: step: 698/463, loss: 0.011056576855480671 2023-01-24 03:30:03.449390: step: 700/463, loss: 0.02336994744837284 2023-01-24 03:30:04.096231: step: 702/463, loss: 0.03981660306453705 2023-01-24 03:30:04.771426: step: 704/463, loss: 0.008403674699366093 2023-01-24 03:30:05.371544: step: 706/463, loss: 0.22430363297462463 2023-01-24 03:30:05.945163: step: 708/463, loss: 0.020615549758076668 2023-01-24 03:30:06.584395: step: 710/463, loss: 0.005166787654161453 2023-01-24 03:30:07.167816: step: 712/463, loss: 0.02336127869784832 2023-01-24 03:30:07.884582: step: 714/463, loss: 0.03210056200623512 2023-01-24 03:30:08.499397: step: 716/463, loss: 0.03605251759290695 2023-01-24 03:30:09.095879: step: 718/463, loss: 0.09759293496608734 2023-01-24 03:30:09.721407: step: 720/463, loss: 0.05771316960453987 2023-01-24 03:30:10.352397: step: 722/463, loss: 0.00856007169932127 2023-01-24 03:30:10.947045: step: 724/463, loss: 0.023083670064806938 2023-01-24 03:30:11.656286: step: 726/463, loss: 0.04058995470404625 2023-01-24 03:30:12.227314: step: 728/463, loss: 0.01586988754570484 2023-01-24 03:30:12.821367: step: 730/463, loss: 0.0280936136841774 2023-01-24 03:30:13.450867: step: 732/463, loss: 0.11945941299200058 2023-01-24 03:30:14.072794: step: 734/463, loss: 0.1073726937174797 2023-01-24 03:30:14.638405: step: 736/463, loss: 0.0005320303607732058 2023-01-24 03:30:15.236332: step: 738/463, loss: 0.005495231598615646 2023-01-24 03:30:15.818067: step: 740/463, loss: 0.010837437584996223 2023-01-24 03:30:16.524287: step: 742/463, loss: 0.008254644460976124 2023-01-24 03:30:17.117185: step: 744/463, loss: 0.013994758017361164 2023-01-24 03:30:17.726483: step: 746/463, loss: 0.004982369020581245 2023-01-24 03:30:18.361957: step: 748/463, loss: 0.07093297690153122 2023-01-24 03:30:18.986968: step: 750/463, loss: 0.06783630698919296 2023-01-24 03:30:19.620214: step: 752/463, loss: 0.010502041317522526 2023-01-24 03:30:20.268204: step: 754/463, loss: 0.025041887536644936 2023-01-24 03:30:20.874397: step: 756/463, loss: 0.01032335963100195 2023-01-24 03:30:21.488161: step: 758/463, loss: 0.02496275119483471 2023-01-24 03:30:22.103210: step: 760/463, loss: 0.023167282342910767 2023-01-24 03:30:22.761892: step: 762/463, loss: 0.014368785545229912 2023-01-24 03:30:23.372760: step: 764/463, loss: 0.0056557306088507175 2023-01-24 03:30:24.004318: step: 766/463, loss: 0.06064826622605324 2023-01-24 03:30:24.573191: step: 768/463, loss: 0.02322997897863388 2023-01-24 03:30:25.148759: step: 770/463, loss: 0.0703640952706337 2023-01-24 03:30:25.750349: step: 772/463, loss: 0.015143807977437973 2023-01-24 03:30:26.370101: step: 774/463, loss: 0.009394689463078976 2023-01-24 03:30:26.958871: step: 776/463, loss: 0.0042193131521344185 2023-01-24 03:30:27.587764: step: 778/463, loss: 0.019165704026818275 2023-01-24 03:30:28.185868: step: 780/463, loss: 0.11084091663360596 2023-01-24 03:30:28.852378: step: 782/463, loss: 0.05546008422970772 2023-01-24 03:30:29.505562: step: 784/463, loss: 0.06199384480714798 2023-01-24 03:30:30.217307: step: 786/463, loss: 0.00725259305909276 2023-01-24 03:30:30.930458: step: 788/463, loss: 0.07570730149745941 2023-01-24 03:30:31.526301: step: 790/463, loss: 0.0014948367606848478 2023-01-24 03:30:32.097914: step: 792/463, loss: 0.0061547704972326756 2023-01-24 03:30:32.683168: step: 794/463, loss: 0.003681823378428817 2023-01-24 03:30:33.258120: step: 796/463, loss: 0.015273437835276127 2023-01-24 03:30:33.895370: step: 798/463, loss: 0.044249873608350754 2023-01-24 03:30:34.657675: step: 800/463, loss: 0.002889686729758978 2023-01-24 03:30:35.275572: step: 802/463, loss: 0.0030433328356593847 2023-01-24 03:30:35.908570: step: 804/463, loss: 0.004954787902534008 2023-01-24 03:30:36.527082: step: 806/463, loss: 0.002291971119120717 2023-01-24 03:30:37.218262: step: 808/463, loss: 0.13242027163505554 2023-01-24 03:30:37.831614: step: 810/463, loss: 0.0024529832880944014 2023-01-24 03:30:38.452456: step: 812/463, loss: 0.34052878618240356 2023-01-24 03:30:39.037113: step: 814/463, loss: 1.5564850568771362 2023-01-24 03:30:39.652527: step: 816/463, loss: 0.007235885597765446 2023-01-24 03:30:40.268608: step: 818/463, loss: 0.023396898061037064 2023-01-24 03:30:40.833933: step: 820/463, loss: 0.01937183551490307 2023-01-24 03:30:41.398009: step: 822/463, loss: 0.0014902740949764848 2023-01-24 03:30:42.071808: step: 824/463, loss: 0.008005850948393345 2023-01-24 03:30:42.746993: step: 826/463, loss: 0.0597454309463501 2023-01-24 03:30:43.340672: step: 828/463, loss: 0.028225086629390717 2023-01-24 03:30:43.977781: step: 830/463, loss: 0.0008724422659724951 2023-01-24 03:30:44.626287: step: 832/463, loss: 0.05529513582587242 2023-01-24 03:30:45.300276: step: 834/463, loss: 0.05840693414211273 2023-01-24 03:30:45.872145: step: 836/463, loss: 0.014748592860996723 2023-01-24 03:30:46.500997: step: 838/463, loss: 0.026627134531736374 2023-01-24 03:30:47.132767: step: 840/463, loss: 0.06271857023239136 2023-01-24 03:30:47.783851: step: 842/463, loss: 0.02222890965640545 2023-01-24 03:30:48.351373: step: 844/463, loss: 0.0014154906384646893 2023-01-24 03:30:49.006991: step: 846/463, loss: 0.003977329470217228 2023-01-24 03:30:49.713101: step: 848/463, loss: 0.032589104026556015 2023-01-24 03:30:50.283417: step: 850/463, loss: 0.001300859497860074 2023-01-24 03:30:50.872086: step: 852/463, loss: 0.006473367568105459 2023-01-24 03:30:51.448006: step: 854/463, loss: 0.007052626460790634 2023-01-24 03:30:52.046321: step: 856/463, loss: 0.25654274225234985 2023-01-24 03:30:52.606479: step: 858/463, loss: 0.0019829077646136284 2023-01-24 03:30:53.229620: step: 860/463, loss: 0.044510066509246826 2023-01-24 03:30:53.814957: step: 862/463, loss: 0.001918377005495131 2023-01-24 03:30:54.421719: step: 864/463, loss: 0.0053252531215548515 2023-01-24 03:30:55.030464: step: 866/463, loss: 0.014237024821341038 2023-01-24 03:30:55.681989: step: 868/463, loss: 0.08253347128629684 2023-01-24 03:30:56.275145: step: 870/463, loss: 0.01173979789018631 2023-01-24 03:30:56.897408: step: 872/463, loss: 0.09331776201725006 2023-01-24 03:30:57.536219: step: 874/463, loss: 0.009608241729438305 2023-01-24 03:30:58.110564: step: 876/463, loss: 0.0012645330280065536 2023-01-24 03:30:58.749679: step: 878/463, loss: 0.007763568311929703 2023-01-24 03:30:59.405626: step: 880/463, loss: 0.03193031623959541 2023-01-24 03:31:00.053389: step: 882/463, loss: 0.004222292453050613 2023-01-24 03:31:00.661797: step: 884/463, loss: 0.03477559611201286 2023-01-24 03:31:01.293149: step: 886/463, loss: 0.01894582062959671 2023-01-24 03:31:01.948933: step: 888/463, loss: 0.7207598686218262 2023-01-24 03:31:02.551056: step: 890/463, loss: 0.09832815825939178 2023-01-24 03:31:03.113821: step: 892/463, loss: 0.0027617882005870342 2023-01-24 03:31:03.707667: step: 894/463, loss: 0.01734338328242302 2023-01-24 03:31:04.427760: step: 896/463, loss: 0.3503348231315613 2023-01-24 03:31:05.177159: step: 898/463, loss: 0.038323625922203064 2023-01-24 03:31:05.792922: step: 900/463, loss: 0.30789369344711304 2023-01-24 03:31:06.421340: step: 902/463, loss: 0.11568040400743484 2023-01-24 03:31:07.002535: step: 904/463, loss: 0.002566554583609104 2023-01-24 03:31:07.521607: step: 906/463, loss: 0.053460948169231415 2023-01-24 03:31:08.103658: step: 908/463, loss: 0.016339968889951706 2023-01-24 03:31:08.703084: step: 910/463, loss: 0.027080947533249855 2023-01-24 03:31:09.347627: step: 912/463, loss: 0.026154812425374985 2023-01-24 03:31:09.983759: step: 914/463, loss: 0.058754950761795044 2023-01-24 03:31:10.629756: step: 916/463, loss: 0.14254778623580933 2023-01-24 03:31:11.253459: step: 918/463, loss: 0.010557337664067745 2023-01-24 03:31:11.868694: step: 920/463, loss: 0.05688267946243286 2023-01-24 03:31:12.543275: step: 922/463, loss: 0.06220068410038948 2023-01-24 03:31:13.101072: step: 924/463, loss: 0.09934665262699127 2023-01-24 03:31:13.683221: step: 926/463, loss: 0.047787394374608994 ================================================== Loss: 0.069 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3588128306878307, 'r': 0.33089760097587423, 'f1': 0.34429029756028773}, 'combined': 0.25368758767600147, 'epoch': 28} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.36163548207276075, 'r': 0.38756155523606534, 'f1': 0.37414993087563353}, 'combined': 0.29001095120503656, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3275172681516385, 'r': 0.32440989369099676, 'f1': 0.32595617535777943}, 'combined': 0.24017823447415326, 'epoch': 28} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34552489358256466, 'r': 0.38681003711724615, 'f1': 0.3650037470802808}, 'combined': 0.28292156472251434, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32427473737024554, 'r': 0.32119812695876315, 'f1': 0.3227290999185285}, 'combined': 0.23780038941365256, 'epoch': 28} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34684570583490104, 'r': 0.3761745706665287, 'f1': 0.36091528473120216}, 'combined': 0.2797525173514582, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.325, 'r': 0.2785714285714286, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.36607142857142855, 'r': 0.44565217391304346, 'f1': 0.4019607843137254}, 'combined': 0.2009803921568627, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.1724137931034483, 'f1': 0.25641025641025644}, 'combined': 0.17094017094017094, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33842201576576575, 'r': 0.3326425126502214, 'f1': 0.3355073763955343}, 'combined': 0.2472159615546042, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33644544699224593, 'r': 0.3627302475385151, 'f1': 0.3490937720671424}, 'combined': 0.2705894309802731, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.20689655172413793, 'f1': 0.2926829268292683}, 'combined': 0.19512195121951217, 'epoch': 14} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:33:48.353732: step: 2/463, loss: 0.1550900638103485 2023-01-24 03:33:49.006629: step: 4/463, loss: 0.01312115229666233 2023-01-24 03:33:49.571230: step: 6/463, loss: 0.04941786453127861 2023-01-24 03:33:50.229504: step: 8/463, loss: 0.008064012974500656 2023-01-24 03:33:50.831591: step: 10/463, loss: 0.0057556587271392345 2023-01-24 03:33:51.461864: step: 12/463, loss: 0.08062035590410233 2023-01-24 03:33:52.062141: step: 14/463, loss: 0.018098358064889908 2023-01-24 03:33:52.641909: step: 16/463, loss: 0.018659701570868492 2023-01-24 03:33:53.246525: step: 18/463, loss: 4.444612979888916 2023-01-24 03:33:53.811937: step: 20/463, loss: 0.005970236379653215 2023-01-24 03:33:54.478377: step: 22/463, loss: 0.011970184743404388 2023-01-24 03:33:55.129635: step: 24/463, loss: 0.004882423672825098 2023-01-24 03:33:55.725197: step: 26/463, loss: 0.10401653498411179 2023-01-24 03:33:56.300410: step: 28/463, loss: 0.10145227611064911 2023-01-24 03:33:56.898562: step: 30/463, loss: 0.0015595429576933384 2023-01-24 03:33:57.489143: step: 32/463, loss: 0.03525329753756523 2023-01-24 03:33:58.045452: step: 34/463, loss: 0.1612314134836197 2023-01-24 03:33:58.669972: step: 36/463, loss: 0.010630753822624683 2023-01-24 03:33:59.318239: step: 38/463, loss: 0.01335727795958519 2023-01-24 03:33:59.901647: step: 40/463, loss: 0.00010043101792689413 2023-01-24 03:34:00.541669: step: 42/463, loss: 0.01780151203274727 2023-01-24 03:34:01.171176: step: 44/463, loss: 0.024259736761450768 2023-01-24 03:34:01.700866: step: 46/463, loss: 0.03141922876238823 2023-01-24 03:34:02.336712: step: 48/463, loss: 0.03314659744501114 2023-01-24 03:34:02.984337: step: 50/463, loss: 0.012423048727214336 2023-01-24 03:34:03.552536: step: 52/463, loss: 0.016100626438856125 2023-01-24 03:34:04.124200: step: 54/463, loss: 0.07544917613267899 2023-01-24 03:34:04.729642: step: 56/463, loss: 0.026606986299157143 2023-01-24 03:34:05.421355: step: 58/463, loss: 0.0028888958040624857 2023-01-24 03:34:06.028306: step: 60/463, loss: 0.061041779816150665 2023-01-24 03:34:06.670351: step: 62/463, loss: 0.00510053988546133 2023-01-24 03:34:07.242007: step: 64/463, loss: 0.030810287222266197 2023-01-24 03:34:07.880579: step: 66/463, loss: 0.003582959994673729 2023-01-24 03:34:08.438931: step: 68/463, loss: 0.0012058792635798454 2023-01-24 03:34:09.103360: step: 70/463, loss: 0.028774576261639595 2023-01-24 03:34:09.704916: step: 72/463, loss: 0.03866603225469589 2023-01-24 03:34:10.299364: step: 74/463, loss: 0.7688112258911133 2023-01-24 03:34:10.979572: step: 76/463, loss: 0.018393585458397865 2023-01-24 03:34:11.600907: step: 78/463, loss: 0.009106939658522606 2023-01-24 03:34:12.240907: step: 80/463, loss: 0.05523703992366791 2023-01-24 03:34:12.832388: step: 82/463, loss: 0.612266480922699 2023-01-24 03:34:13.434408: step: 84/463, loss: 0.0021509667858481407 2023-01-24 03:34:14.080351: step: 86/463, loss: 0.040496937930583954 2023-01-24 03:34:14.631162: step: 88/463, loss: 0.052959710359573364 2023-01-24 03:34:15.203771: step: 90/463, loss: 0.060240283608436584 2023-01-24 03:34:15.856385: step: 92/463, loss: 0.008552342653274536 2023-01-24 03:34:16.480466: step: 94/463, loss: 0.0068692597560584545 2023-01-24 03:34:17.032609: step: 96/463, loss: 0.030129818245768547 2023-01-24 03:34:17.628702: step: 98/463, loss: 0.004517947323620319 2023-01-24 03:34:18.251024: step: 100/463, loss: 0.32829606533050537 2023-01-24 03:34:18.820214: step: 102/463, loss: 0.03064805269241333 2023-01-24 03:34:19.428597: step: 104/463, loss: 0.0008148181368596852 2023-01-24 03:34:20.077327: step: 106/463, loss: 0.03350914642214775 2023-01-24 03:34:20.657250: step: 108/463, loss: 0.038696784526109695 2023-01-24 03:34:21.269140: step: 110/463, loss: 0.20961901545524597 2023-01-24 03:34:21.832549: step: 112/463, loss: 0.007900345139205456 2023-01-24 03:34:22.456075: step: 114/463, loss: 0.024062421172857285 2023-01-24 03:34:23.067754: step: 116/463, loss: 8.284940849989653e-05 2023-01-24 03:34:23.664536: step: 118/463, loss: 0.0025021443143486977 2023-01-24 03:34:24.252184: step: 120/463, loss: 0.5800390243530273 2023-01-24 03:34:24.840608: step: 122/463, loss: 0.10917577147483826 2023-01-24 03:34:25.470467: step: 124/463, loss: 0.02627609856426716 2023-01-24 03:34:26.038604: step: 126/463, loss: 0.0077028400264680386 2023-01-24 03:34:26.649525: step: 128/463, loss: 0.0035240240395069122 2023-01-24 03:34:27.197646: step: 130/463, loss: 0.008725050836801529 2023-01-24 03:34:27.793620: step: 132/463, loss: 0.0005206424393691123 2023-01-24 03:34:28.389370: step: 134/463, loss: 0.012773975729942322 2023-01-24 03:34:28.995922: step: 136/463, loss: 0.069736048579216 2023-01-24 03:34:29.581296: step: 138/463, loss: 0.008612376637756824 2023-01-24 03:34:30.161023: step: 140/463, loss: 0.017149263992905617 2023-01-24 03:34:30.769748: step: 142/463, loss: 0.0033292314037680626 2023-01-24 03:34:31.385895: step: 144/463, loss: 0.032853949815034866 2023-01-24 03:34:31.982097: step: 146/463, loss: 0.00012632431753445417 2023-01-24 03:34:32.555435: step: 148/463, loss: 0.06823589652776718 2023-01-24 03:34:33.151516: step: 150/463, loss: 0.009149501100182533 2023-01-24 03:34:33.753294: step: 152/463, loss: 0.04792388528585434 2023-01-24 03:34:34.322875: step: 154/463, loss: 0.034675247967243195 2023-01-24 03:34:35.014125: step: 156/463, loss: 0.019604550674557686 2023-01-24 03:34:35.602846: step: 158/463, loss: 0.02220567688345909 2023-01-24 03:34:36.227176: step: 160/463, loss: 0.05470237880945206 2023-01-24 03:34:36.849957: step: 162/463, loss: 0.027703681960701942 2023-01-24 03:34:37.423371: step: 164/463, loss: 0.0797567367553711 2023-01-24 03:34:38.005929: step: 166/463, loss: 0.5837057828903198 2023-01-24 03:34:38.556938: step: 168/463, loss: 0.00628327764570713 2023-01-24 03:34:39.173710: step: 170/463, loss: 0.004783878568559885 2023-01-24 03:34:39.749299: step: 172/463, loss: 0.003920532763004303 2023-01-24 03:34:40.551013: step: 174/463, loss: 0.0012897155247628689 2023-01-24 03:34:41.075489: step: 176/463, loss: 0.0040885829366743565 2023-01-24 03:34:41.657647: step: 178/463, loss: 0.03434412181377411 2023-01-24 03:34:42.310768: step: 180/463, loss: 0.018764395266771317 2023-01-24 03:34:42.931513: step: 182/463, loss: 0.03970245271921158 2023-01-24 03:34:43.595877: step: 184/463, loss: 0.024454912170767784 2023-01-24 03:34:44.183241: step: 186/463, loss: 0.04383327439427376 2023-01-24 03:34:44.807662: step: 188/463, loss: 0.024958115071058273 2023-01-24 03:34:45.475138: step: 190/463, loss: 0.045921605080366135 2023-01-24 03:34:46.077844: step: 192/463, loss: 0.015539822168648243 2023-01-24 03:34:46.685508: step: 194/463, loss: 0.039451587945222855 2023-01-24 03:34:47.299215: step: 196/463, loss: 0.007012828718870878 2023-01-24 03:34:47.890160: step: 198/463, loss: 0.0016368953511118889 2023-01-24 03:34:48.572973: step: 200/463, loss: 0.027939356863498688 2023-01-24 03:34:49.197368: step: 202/463, loss: 0.02487771026790142 2023-01-24 03:34:49.808013: step: 204/463, loss: 0.02883930876851082 2023-01-24 03:34:50.400054: step: 206/463, loss: 0.035411231219768524 2023-01-24 03:34:51.062798: step: 208/463, loss: 0.001753342105075717 2023-01-24 03:34:51.661604: step: 210/463, loss: 0.034231409430503845 2023-01-24 03:34:52.244964: step: 212/463, loss: 0.013961263000965118 2023-01-24 03:34:52.826528: step: 214/463, loss: 0.022274188697338104 2023-01-24 03:34:53.389067: step: 216/463, loss: 0.0030902153812348843 2023-01-24 03:34:53.978095: step: 218/463, loss: 0.002822666894644499 2023-01-24 03:34:54.611419: step: 220/463, loss: 0.04084483161568642 2023-01-24 03:34:55.137572: step: 222/463, loss: 0.1427854746580124 2023-01-24 03:34:55.801658: step: 224/463, loss: 0.028082339093089104 2023-01-24 03:34:56.420118: step: 226/463, loss: 0.008980338461697102 2023-01-24 03:34:56.997766: step: 228/463, loss: 0.005404004827141762 2023-01-24 03:34:57.545487: step: 230/463, loss: 0.004749061074107885 2023-01-24 03:34:58.204999: step: 232/463, loss: 0.021711250767111778 2023-01-24 03:34:58.842337: step: 234/463, loss: 0.19020888209342957 2023-01-24 03:34:59.438626: step: 236/463, loss: 0.7447059750556946 2023-01-24 03:35:00.002374: step: 238/463, loss: 0.0014882652321830392 2023-01-24 03:35:00.596659: step: 240/463, loss: 0.013305017724633217 2023-01-24 03:35:01.179478: step: 242/463, loss: 0.016160206869244576 2023-01-24 03:35:01.805957: step: 244/463, loss: 0.015276189893484116 2023-01-24 03:35:02.433566: step: 246/463, loss: 0.004023570567369461 2023-01-24 03:35:03.026418: step: 248/463, loss: 0.014682869426906109 2023-01-24 03:35:03.620423: step: 250/463, loss: 0.0011476946529000998 2023-01-24 03:35:04.238797: step: 252/463, loss: 0.08572657406330109 2023-01-24 03:35:04.882497: step: 254/463, loss: 0.13576018810272217 2023-01-24 03:35:05.465066: step: 256/463, loss: 0.00667849974706769 2023-01-24 03:35:06.117527: step: 258/463, loss: 0.0002772319712676108 2023-01-24 03:35:06.847349: step: 260/463, loss: 0.05342680960893631 2023-01-24 03:35:07.474324: step: 262/463, loss: 4.617326736450195 2023-01-24 03:35:08.034508: step: 264/463, loss: 0.0006627269322052598 2023-01-24 03:35:08.614419: step: 266/463, loss: 0.001987112918868661 2023-01-24 03:35:09.192996: step: 268/463, loss: 0.008364694193005562 2023-01-24 03:35:09.779685: step: 270/463, loss: 0.0023677332792431116 2023-01-24 03:35:10.406007: step: 272/463, loss: 0.002349935006350279 2023-01-24 03:35:11.043067: step: 274/463, loss: 0.03869195654988289 2023-01-24 03:35:11.667839: step: 276/463, loss: 0.06180592626333237 2023-01-24 03:35:12.447025: step: 278/463, loss: 0.01338766235858202 2023-01-24 03:35:13.107831: step: 280/463, loss: 0.009493292309343815 2023-01-24 03:35:13.669496: step: 282/463, loss: 0.002042797626927495 2023-01-24 03:35:14.332729: step: 284/463, loss: 0.016817327588796616 2023-01-24 03:35:14.928882: step: 286/463, loss: 0.0006671757437288761 2023-01-24 03:35:15.498411: step: 288/463, loss: 0.08631105720996857 2023-01-24 03:35:16.077397: step: 290/463, loss: 0.057567253708839417 2023-01-24 03:35:16.722013: step: 292/463, loss: 0.05429114028811455 2023-01-24 03:35:17.325169: step: 294/463, loss: 0.055227115750312805 2023-01-24 03:35:17.938548: step: 296/463, loss: 0.01286325789988041 2023-01-24 03:35:18.575090: step: 298/463, loss: 0.03145575523376465 2023-01-24 03:35:19.194974: step: 300/463, loss: 0.02212996408343315 2023-01-24 03:35:19.790317: step: 302/463, loss: 0.05742984637618065 2023-01-24 03:35:20.396321: step: 304/463, loss: 0.034636858850717545 2023-01-24 03:35:20.961468: step: 306/463, loss: 0.0009654642199166119 2023-01-24 03:35:21.574435: step: 308/463, loss: 0.0017407169798389077 2023-01-24 03:35:22.164267: step: 310/463, loss: 0.019746888428926468 2023-01-24 03:35:22.737689: step: 312/463, loss: 0.0014645576011389494 2023-01-24 03:35:23.327486: step: 314/463, loss: 0.0012500026496127248 2023-01-24 03:35:23.995928: step: 316/463, loss: 0.028996281325817108 2023-01-24 03:35:24.725724: step: 318/463, loss: 0.004588013049215078 2023-01-24 03:35:25.319888: step: 320/463, loss: 0.006899316795170307 2023-01-24 03:35:25.921684: step: 322/463, loss: 0.04672044515609741 2023-01-24 03:35:26.600489: step: 324/463, loss: 0.039173007011413574 2023-01-24 03:35:27.249961: step: 326/463, loss: 0.011062702164053917 2023-01-24 03:35:27.865869: step: 328/463, loss: 0.00412320950999856 2023-01-24 03:35:28.440290: step: 330/463, loss: 0.06949309259653091 2023-01-24 03:35:28.980706: step: 332/463, loss: 0.019062811508774757 2023-01-24 03:35:29.607264: step: 334/463, loss: 0.008088046684861183 2023-01-24 03:35:30.280742: step: 336/463, loss: 0.02030049078166485 2023-01-24 03:35:30.955969: step: 338/463, loss: 0.015328459441661835 2023-01-24 03:35:31.454189: step: 340/463, loss: 0.01732882484793663 2023-01-24 03:35:32.127711: step: 342/463, loss: 0.018980398774147034 2023-01-24 03:35:32.726883: step: 344/463, loss: 0.01901767961680889 2023-01-24 03:35:33.308366: step: 346/463, loss: 0.028678152710199356 2023-01-24 03:35:33.897765: step: 348/463, loss: 0.009891662746667862 2023-01-24 03:35:34.592750: step: 350/463, loss: 0.030378203839063644 2023-01-24 03:35:35.230244: step: 352/463, loss: 0.011372922919690609 2023-01-24 03:35:35.803589: step: 354/463, loss: 0.002102686557918787 2023-01-24 03:35:36.406219: step: 356/463, loss: 0.00036071351496502757 2023-01-24 03:35:36.971459: step: 358/463, loss: 0.00859138835221529 2023-01-24 03:35:37.541216: step: 360/463, loss: 0.006079711951315403 2023-01-24 03:35:38.136968: step: 362/463, loss: 0.0011085874866694212 2023-01-24 03:35:38.768302: step: 364/463, loss: 0.007413227576762438 2023-01-24 03:35:39.389257: step: 366/463, loss: 0.004759788513183594 2023-01-24 03:35:39.973942: step: 368/463, loss: 0.20699399709701538 2023-01-24 03:35:40.605808: step: 370/463, loss: 0.272332102060318 2023-01-24 03:35:41.276104: step: 372/463, loss: 0.0013226158916950226 2023-01-24 03:35:41.956147: step: 374/463, loss: 0.008595649152994156 2023-01-24 03:35:42.608589: step: 376/463, loss: 0.009411418810486794 2023-01-24 03:35:43.205005: step: 378/463, loss: 0.03951879218220711 2023-01-24 03:35:43.827368: step: 380/463, loss: 0.06510946154594421 2023-01-24 03:35:44.480718: step: 382/463, loss: 0.23156484961509705 2023-01-24 03:35:45.124296: step: 384/463, loss: 0.000822294969111681 2023-01-24 03:35:45.748811: step: 386/463, loss: 0.00846689473837614 2023-01-24 03:35:46.307467: step: 388/463, loss: 1.6453962326049805 2023-01-24 03:35:46.996708: step: 390/463, loss: 0.021790912374854088 2023-01-24 03:35:47.585762: step: 392/463, loss: 0.13698720932006836 2023-01-24 03:35:48.142463: step: 394/463, loss: 0.008768951520323753 2023-01-24 03:35:48.725876: step: 396/463, loss: 0.028732653707265854 2023-01-24 03:35:49.328856: step: 398/463, loss: 0.012918967753648758 2023-01-24 03:35:50.014094: step: 400/463, loss: 0.008159270510077477 2023-01-24 03:35:50.725098: step: 402/463, loss: 0.004104298539459705 2023-01-24 03:35:51.301136: step: 404/463, loss: 0.3714562952518463 2023-01-24 03:35:51.955507: step: 406/463, loss: 0.0265846885740757 2023-01-24 03:35:52.511118: step: 408/463, loss: 0.05175428092479706 2023-01-24 03:35:53.091723: step: 410/463, loss: 0.5105317831039429 2023-01-24 03:35:53.719155: step: 412/463, loss: 0.0030236411839723587 2023-01-24 03:35:54.336844: step: 414/463, loss: 0.016782555729150772 2023-01-24 03:35:55.008337: step: 416/463, loss: 0.016550594940781593 2023-01-24 03:35:55.640799: step: 418/463, loss: 0.07024864107370377 2023-01-24 03:35:56.306344: step: 420/463, loss: 0.05589844658970833 2023-01-24 03:35:56.862150: step: 422/463, loss: 0.005209112074226141 2023-01-24 03:35:57.496702: step: 424/463, loss: 0.002344045089557767 2023-01-24 03:35:58.122879: step: 426/463, loss: 0.02918173559010029 2023-01-24 03:35:58.731881: step: 428/463, loss: 0.006644498091191053 2023-01-24 03:35:59.306343: step: 430/463, loss: 0.011588048189878464 2023-01-24 03:35:59.922243: step: 432/463, loss: 0.04694873467087746 2023-01-24 03:36:00.462836: step: 434/463, loss: 0.10165604948997498 2023-01-24 03:36:01.060635: step: 436/463, loss: 0.12336350977420807 2023-01-24 03:36:01.659937: step: 438/463, loss: 0.0033318425994366407 2023-01-24 03:36:02.245470: step: 440/463, loss: 0.016334472224116325 2023-01-24 03:36:02.882881: step: 442/463, loss: 0.0026598761323839426 2023-01-24 03:36:03.468935: step: 444/463, loss: 0.0016191492322832346 2023-01-24 03:36:04.143260: step: 446/463, loss: 0.01846923679113388 2023-01-24 03:36:04.702393: step: 448/463, loss: 0.027185291051864624 2023-01-24 03:36:05.366722: step: 450/463, loss: 0.004145144019275904 2023-01-24 03:36:05.954654: step: 452/463, loss: 0.0006502980249933898 2023-01-24 03:36:06.565221: step: 454/463, loss: 0.0021698183845728636 2023-01-24 03:36:07.203386: step: 456/463, loss: 0.051979027688503265 2023-01-24 03:36:07.820509: step: 458/463, loss: 0.006482423283159733 2023-01-24 03:36:08.441438: step: 460/463, loss: 0.012627340853214264 2023-01-24 03:36:09.127242: step: 462/463, loss: 0.0426507294178009 2023-01-24 03:36:09.791907: step: 464/463, loss: 0.032700929790735245 2023-01-24 03:36:10.391268: step: 466/463, loss: 0.01059667207300663 2023-01-24 03:36:10.987354: step: 468/463, loss: 0.0033148638904094696 2023-01-24 03:36:11.546861: step: 470/463, loss: 0.010824110358953476 2023-01-24 03:36:12.162387: step: 472/463, loss: 0.007498008664697409 2023-01-24 03:36:12.783917: step: 474/463, loss: 0.007538353092968464 2023-01-24 03:36:13.393997: step: 476/463, loss: 0.056987062096595764 2023-01-24 03:36:13.985030: step: 478/463, loss: 0.07131024450063705 2023-01-24 03:36:14.602434: step: 480/463, loss: 0.045813802629709244 2023-01-24 03:36:15.220122: step: 482/463, loss: 0.003239908954128623 2023-01-24 03:36:15.765548: step: 484/463, loss: 0.0014419176150113344 2023-01-24 03:36:16.386468: step: 486/463, loss: 0.018764080479741096 2023-01-24 03:36:17.013017: step: 488/463, loss: 0.006720618344843388 2023-01-24 03:36:17.580171: step: 490/463, loss: 0.008544464595615864 2023-01-24 03:36:18.252605: step: 492/463, loss: 0.00428227661177516 2023-01-24 03:36:18.843440: step: 494/463, loss: 0.007183075416833162 2023-01-24 03:36:19.470263: step: 496/463, loss: 0.012095246464014053 2023-01-24 03:36:20.060171: step: 498/463, loss: 0.01244945079088211 2023-01-24 03:36:20.573934: step: 500/463, loss: 0.0004592906916514039 2023-01-24 03:36:21.229681: step: 502/463, loss: 0.00928113330155611 2023-01-24 03:36:21.854635: step: 504/463, loss: 0.007732453290373087 2023-01-24 03:36:22.428380: step: 506/463, loss: 0.03434942662715912 2023-01-24 03:36:23.079314: step: 508/463, loss: 0.00024358944210689515 2023-01-24 03:36:23.723633: step: 510/463, loss: 0.03508301079273224 2023-01-24 03:36:24.396729: step: 512/463, loss: 0.0077947345562279224 2023-01-24 03:36:25.056367: step: 514/463, loss: 0.025396818295121193 2023-01-24 03:36:25.702619: step: 516/463, loss: 0.029606034979224205 2023-01-24 03:36:26.320620: step: 518/463, loss: 0.002479645423591137 2023-01-24 03:36:26.910626: step: 520/463, loss: 0.0024734127800911665 2023-01-24 03:36:27.567035: step: 522/463, loss: 0.07300721853971481 2023-01-24 03:36:28.155159: step: 524/463, loss: 0.013177592307329178 2023-01-24 03:36:28.778072: step: 526/463, loss: 0.7319537997245789 2023-01-24 03:36:29.398006: step: 528/463, loss: 0.0023846549447625875 2023-01-24 03:36:29.999203: step: 530/463, loss: 0.015529816038906574 2023-01-24 03:36:30.623495: step: 532/463, loss: 0.062036339193582535 2023-01-24 03:36:31.271163: step: 534/463, loss: 0.000389710912713781 2023-01-24 03:36:31.859218: step: 536/463, loss: 0.0019996482878923416 2023-01-24 03:36:32.504386: step: 538/463, loss: 0.009069595485925674 2023-01-24 03:36:33.118721: step: 540/463, loss: 0.001913677086122334 2023-01-24 03:36:33.676911: step: 542/463, loss: 0.014776908792555332 2023-01-24 03:36:34.307910: step: 544/463, loss: 0.02986527979373932 2023-01-24 03:36:34.934922: step: 546/463, loss: 0.010200761258602142 2023-01-24 03:36:35.579363: step: 548/463, loss: 2.508096218109131 2023-01-24 03:36:36.240767: step: 550/463, loss: 0.011398224160075188 2023-01-24 03:36:36.860917: step: 552/463, loss: 0.00230087386444211 2023-01-24 03:36:37.501394: step: 554/463, loss: 0.07720857113599777 2023-01-24 03:36:38.151713: step: 556/463, loss: 0.010546802543103695 2023-01-24 03:36:38.869546: step: 558/463, loss: 0.012195480056107044 2023-01-24 03:36:39.442878: step: 560/463, loss: 0.02231784351170063 2023-01-24 03:36:40.225282: step: 562/463, loss: 0.061512671411037445 2023-01-24 03:36:40.845146: step: 564/463, loss: 0.03533143177628517 2023-01-24 03:36:41.502597: step: 566/463, loss: 0.06323465704917908 2023-01-24 03:36:42.143020: step: 568/463, loss: 0.0459849089384079 2023-01-24 03:36:42.782299: step: 570/463, loss: 0.01714775711297989 2023-01-24 03:36:43.438586: step: 572/463, loss: 0.013263450004160404 2023-01-24 03:36:44.074393: step: 574/463, loss: 0.4613925814628601 2023-01-24 03:36:44.680396: step: 576/463, loss: 0.003999463748186827 2023-01-24 03:36:45.332447: step: 578/463, loss: 0.026372535154223442 2023-01-24 03:36:45.930812: step: 580/463, loss: 0.004054812714457512 2023-01-24 03:36:46.512379: step: 582/463, loss: 0.03685910999774933 2023-01-24 03:36:47.171382: step: 584/463, loss: 0.005764555651694536 2023-01-24 03:36:47.802189: step: 586/463, loss: 0.01615178771317005 2023-01-24 03:36:48.429173: step: 588/463, loss: 0.0005658544250763953 2023-01-24 03:36:49.004819: step: 590/463, loss: 0.06997364014387131 2023-01-24 03:36:49.587317: step: 592/463, loss: 0.014365759678184986 2023-01-24 03:36:50.239546: step: 594/463, loss: 0.033954013139009476 2023-01-24 03:36:50.811216: step: 596/463, loss: 0.04654618725180626 2023-01-24 03:36:51.422601: step: 598/463, loss: 0.007194723468273878 2023-01-24 03:36:52.040572: step: 600/463, loss: 0.025961844250559807 2023-01-24 03:36:52.714538: step: 602/463, loss: 0.020418139174580574 2023-01-24 03:36:53.249857: step: 604/463, loss: 0.07165495306253433 2023-01-24 03:36:53.931097: step: 606/463, loss: 0.0035638357512652874 2023-01-24 03:36:54.503818: step: 608/463, loss: 0.005681503098458052 2023-01-24 03:36:55.083091: step: 610/463, loss: 0.01035415381193161 2023-01-24 03:36:55.661580: step: 612/463, loss: 0.14534080028533936 2023-01-24 03:36:56.305674: step: 614/463, loss: 0.007295891176909208 2023-01-24 03:36:56.916143: step: 616/463, loss: 0.011274606920778751 2023-01-24 03:36:57.571814: step: 618/463, loss: 0.13457202911376953 2023-01-24 03:36:58.145386: step: 620/463, loss: 0.015402178280055523 2023-01-24 03:36:58.805460: step: 622/463, loss: 0.03315766900777817 2023-01-24 03:36:59.409855: step: 624/463, loss: 0.024579806253314018 2023-01-24 03:37:00.153183: step: 626/463, loss: 0.022329552099108696 2023-01-24 03:37:00.770757: step: 628/463, loss: 0.14533448219299316 2023-01-24 03:37:01.341525: step: 630/463, loss: 0.009475641883909702 2023-01-24 03:37:02.013958: step: 632/463, loss: 0.029815059155225754 2023-01-24 03:37:02.616705: step: 634/463, loss: 0.008774718269705772 2023-01-24 03:37:03.284141: step: 636/463, loss: 0.01674095168709755 2023-01-24 03:37:03.880593: step: 638/463, loss: 0.09528885781764984 2023-01-24 03:37:04.567903: step: 640/463, loss: 0.013047860935330391 2023-01-24 03:37:05.197433: step: 642/463, loss: 0.05572964996099472 2023-01-24 03:37:05.823177: step: 644/463, loss: 0.016993409022688866 2023-01-24 03:37:06.464323: step: 646/463, loss: 0.012161463499069214 2023-01-24 03:37:07.104056: step: 648/463, loss: 0.02520204707980156 2023-01-24 03:37:07.739446: step: 650/463, loss: 0.02108667604625225 2023-01-24 03:37:08.333775: step: 652/463, loss: 0.001307966187596321 2023-01-24 03:37:09.020594: step: 654/463, loss: 0.00036701816134154797 2023-01-24 03:37:09.628485: step: 656/463, loss: 0.37352892756462097 2023-01-24 03:37:10.283974: step: 658/463, loss: 0.006581631489098072 2023-01-24 03:37:10.939650: step: 660/463, loss: 0.12856628000736237 2023-01-24 03:37:11.579243: step: 662/463, loss: 0.0007741264998912811 2023-01-24 03:37:12.182046: step: 664/463, loss: 0.0006795075605623424 2023-01-24 03:37:12.753410: step: 666/463, loss: 0.030199136584997177 2023-01-24 03:37:13.295485: step: 668/463, loss: 0.00982176885008812 2023-01-24 03:37:13.912720: step: 670/463, loss: 0.4164646863937378 2023-01-24 03:37:14.570362: step: 672/463, loss: 0.016944773495197296 2023-01-24 03:37:15.196572: step: 674/463, loss: 0.022225484251976013 2023-01-24 03:37:15.792237: step: 676/463, loss: 0.24771860241889954 2023-01-24 03:37:16.418536: step: 678/463, loss: 0.015846656635403633 2023-01-24 03:37:17.051104: step: 680/463, loss: 0.03963213413953781 2023-01-24 03:37:17.688428: step: 682/463, loss: 0.04629233852028847 2023-01-24 03:37:18.338625: step: 684/463, loss: 0.032785721123218536 2023-01-24 03:37:18.891445: step: 686/463, loss: 0.010960950516164303 2023-01-24 03:37:19.553175: step: 688/463, loss: 0.008271569386124611 2023-01-24 03:37:20.190354: step: 690/463, loss: 0.09276773035526276 2023-01-24 03:37:20.781770: step: 692/463, loss: 0.002968868240714073 2023-01-24 03:37:21.405326: step: 694/463, loss: 0.003816205309703946 2023-01-24 03:37:22.004604: step: 696/463, loss: 0.00024717082851566374 2023-01-24 03:37:22.616830: step: 698/463, loss: 0.024036243557929993 2023-01-24 03:37:23.252778: step: 700/463, loss: 0.007068546023219824 2023-01-24 03:37:23.838112: step: 702/463, loss: 0.06692715734243393 2023-01-24 03:37:24.490265: step: 704/463, loss: 0.028790920972824097 2023-01-24 03:37:25.129741: step: 706/463, loss: 0.0812610536813736 2023-01-24 03:37:25.682664: step: 708/463, loss: 0.011899742297828197 2023-01-24 03:37:26.305626: step: 710/463, loss: 0.02381330542266369 2023-01-24 03:37:26.902403: step: 712/463, loss: 0.0275272186845541 2023-01-24 03:37:27.473632: step: 714/463, loss: 0.006281886249780655 2023-01-24 03:37:28.087242: step: 716/463, loss: 0.029312219470739365 2023-01-24 03:37:28.751654: step: 718/463, loss: 0.1676856428384781 2023-01-24 03:37:29.388127: step: 720/463, loss: 0.008076364174485207 2023-01-24 03:37:30.049578: step: 722/463, loss: 0.03715268522500992 2023-01-24 03:37:30.710637: step: 724/463, loss: 0.02680104970932007 2023-01-24 03:37:31.320106: step: 726/463, loss: 0.0076037440448999405 2023-01-24 03:37:32.038717: step: 728/463, loss: 0.0314769484102726 2023-01-24 03:37:32.678669: step: 730/463, loss: 0.007067324593663216 2023-01-24 03:37:33.310330: step: 732/463, loss: 0.28651463985443115 2023-01-24 03:37:33.849267: step: 734/463, loss: 0.05160336568951607 2023-01-24 03:37:34.535388: step: 736/463, loss: 0.09703442454338074 2023-01-24 03:37:35.147620: step: 738/463, loss: 0.0030085614416748285 2023-01-24 03:37:35.733491: step: 740/463, loss: 0.05477362871170044 2023-01-24 03:37:36.369177: step: 742/463, loss: 0.040129974484443665 2023-01-24 03:37:36.973909: step: 744/463, loss: 0.003152980701997876 2023-01-24 03:37:37.630033: step: 746/463, loss: 0.0012924366164952517 2023-01-24 03:37:38.252932: step: 748/463, loss: 0.019745944067835808 2023-01-24 03:37:38.833623: step: 750/463, loss: 0.006218594033271074 2023-01-24 03:37:39.481676: step: 752/463, loss: 0.0668272152543068 2023-01-24 03:37:40.071307: step: 754/463, loss: 0.016391780227422714 2023-01-24 03:37:40.735694: step: 756/463, loss: 0.012321342714130878 2023-01-24 03:37:41.348500: step: 758/463, loss: 0.15765640139579773 2023-01-24 03:37:41.955797: step: 760/463, loss: 0.05147123709321022 2023-01-24 03:37:42.594024: step: 762/463, loss: 0.001761647523380816 2023-01-24 03:37:43.285558: step: 764/463, loss: 0.017148515209555626 2023-01-24 03:37:43.999711: step: 766/463, loss: 0.04036860913038254 2023-01-24 03:37:44.585266: step: 768/463, loss: 0.01573687233030796 2023-01-24 03:37:45.169082: step: 770/463, loss: 0.010533246211707592 2023-01-24 03:37:45.791629: step: 772/463, loss: 0.0003301176184322685 2023-01-24 03:37:46.479851: step: 774/463, loss: 0.0724468007683754 2023-01-24 03:37:47.085708: step: 776/463, loss: 0.021009590476751328 2023-01-24 03:37:47.682276: step: 778/463, loss: 0.007623524870723486 2023-01-24 03:37:48.296481: step: 780/463, loss: 0.0017408073181286454 2023-01-24 03:37:48.859518: step: 782/463, loss: 0.0005291840643621981 2023-01-24 03:37:49.478177: step: 784/463, loss: 0.032468684017658234 2023-01-24 03:37:50.093372: step: 786/463, loss: 0.005144279915839434 2023-01-24 03:37:50.680390: step: 788/463, loss: 0.03454132750630379 2023-01-24 03:37:51.280922: step: 790/463, loss: 0.03987123444676399 2023-01-24 03:37:51.847030: step: 792/463, loss: 0.03480389714241028 2023-01-24 03:37:52.455116: step: 794/463, loss: 0.0020555350929498672 2023-01-24 03:37:53.075145: step: 796/463, loss: 0.07491395622491837 2023-01-24 03:37:53.655762: step: 798/463, loss: 6.228529673535377e-05 2023-01-24 03:37:54.325557: step: 800/463, loss: 0.00911690853536129 2023-01-24 03:37:54.958329: step: 802/463, loss: 0.1268131285905838 2023-01-24 03:37:55.569807: step: 804/463, loss: 0.018176700919866562 2023-01-24 03:37:56.200793: step: 806/463, loss: 0.013455672189593315 2023-01-24 03:37:56.787586: step: 808/463, loss: 0.014742767438292503 2023-01-24 03:37:57.460403: step: 810/463, loss: 0.006980289705097675 2023-01-24 03:37:58.101318: step: 812/463, loss: 0.10349062085151672 2023-01-24 03:37:58.730470: step: 814/463, loss: 0.02058652602136135 2023-01-24 03:37:59.362823: step: 816/463, loss: 0.07814081758260727 2023-01-24 03:37:59.948815: step: 818/463, loss: 0.0005949281039647758 2023-01-24 03:38:00.554228: step: 820/463, loss: 0.010980096645653248 2023-01-24 03:38:01.216975: step: 822/463, loss: 0.15577933192253113 2023-01-24 03:38:01.780153: step: 824/463, loss: 0.004505002871155739 2023-01-24 03:38:02.440018: step: 826/463, loss: 0.0313376858830452 2023-01-24 03:38:03.123338: step: 828/463, loss: 0.08885578066110611 2023-01-24 03:38:03.757067: step: 830/463, loss: 0.0018728503491729498 2023-01-24 03:38:04.388085: step: 832/463, loss: 0.33356672525405884 2023-01-24 03:38:05.023097: step: 834/463, loss: 0.012734198942780495 2023-01-24 03:38:05.623285: step: 836/463, loss: 0.005018147639930248 2023-01-24 03:38:06.229789: step: 838/463, loss: 0.07090814411640167 2023-01-24 03:38:06.808714: step: 840/463, loss: 0.004562025424093008 2023-01-24 03:38:07.398514: step: 842/463, loss: 0.03343694657087326 2023-01-24 03:38:07.971062: step: 844/463, loss: 0.013465807773172855 2023-01-24 03:38:08.614862: step: 846/463, loss: 0.03526826575398445 2023-01-24 03:38:09.311227: step: 848/463, loss: 0.02908763289451599 2023-01-24 03:38:09.952970: step: 850/463, loss: 0.01684403233230114 2023-01-24 03:38:10.540853: step: 852/463, loss: 0.007814344018697739 2023-01-24 03:38:11.189668: step: 854/463, loss: 0.009764028713107109 2023-01-24 03:38:11.833013: step: 856/463, loss: 0.057583849877119064 2023-01-24 03:38:12.384115: step: 858/463, loss: 0.006579817272722721 2023-01-24 03:38:13.009294: step: 860/463, loss: 0.030978145077824593 2023-01-24 03:38:13.703273: step: 862/463, loss: 0.012535871006548405 2023-01-24 03:38:14.314851: step: 864/463, loss: 0.002370886504650116 2023-01-24 03:38:14.943246: step: 866/463, loss: 0.021539144217967987 2023-01-24 03:38:15.589503: step: 868/463, loss: 0.047405119985342026 2023-01-24 03:38:16.185332: step: 870/463, loss: 0.029941538348793983 2023-01-24 03:38:16.782657: step: 872/463, loss: 0.010385973379015923 2023-01-24 03:38:17.407640: step: 874/463, loss: 0.25071820616722107 2023-01-24 03:38:18.012209: step: 876/463, loss: 0.05144646018743515 2023-01-24 03:38:18.591497: step: 878/463, loss: 0.00984541792422533 2023-01-24 03:38:19.226765: step: 880/463, loss: 0.0525452122092247 2023-01-24 03:38:19.868916: step: 882/463, loss: 0.032475486397743225 2023-01-24 03:38:20.469894: step: 884/463, loss: 0.0006611873395740986 2023-01-24 03:38:21.072980: step: 886/463, loss: 0.006387923378497362 2023-01-24 03:38:21.656780: step: 888/463, loss: 0.01817561313509941 2023-01-24 03:38:22.258544: step: 890/463, loss: 0.014133309945464134 2023-01-24 03:38:22.879761: step: 892/463, loss: 0.060384996235370636 2023-01-24 03:38:23.478554: step: 894/463, loss: 0.015683840960264206 2023-01-24 03:38:24.099437: step: 896/463, loss: 0.013177204877138138 2023-01-24 03:38:24.788039: step: 898/463, loss: 0.026812294498085976 2023-01-24 03:38:25.447617: step: 900/463, loss: 0.13442784547805786 2023-01-24 03:38:26.079194: step: 902/463, loss: 0.03376253321766853 2023-01-24 03:38:26.662351: step: 904/463, loss: 0.017914265394210815 2023-01-24 03:38:27.313103: step: 906/463, loss: 0.7043275833129883 2023-01-24 03:38:27.897764: step: 908/463, loss: 0.16737906634807587 2023-01-24 03:38:28.594610: step: 910/463, loss: 0.0629877895116806 2023-01-24 03:38:29.231788: step: 912/463, loss: 0.03859868273139 2023-01-24 03:38:29.846859: step: 914/463, loss: 0.03236795961856842 2023-01-24 03:38:30.483647: step: 916/463, loss: 0.0013772927923128009 2023-01-24 03:38:31.122119: step: 918/463, loss: 0.055881962180137634 2023-01-24 03:38:31.731592: step: 920/463, loss: 0.006493183318525553 2023-01-24 03:38:32.343012: step: 922/463, loss: 0.024610666558146477 2023-01-24 03:38:32.951098: step: 924/463, loss: 0.019501943141222 2023-01-24 03:38:33.517469: step: 926/463, loss: 0.026174984872341156 ================================================== Loss: 0.075 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3647809278350515, 'r': 0.3357092030360531, 'f1': 0.34964179841897225}, 'combined': 0.25763079883503215, 'epoch': 29} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3500148473816059, 'r': 0.3722124801659173, 'f1': 0.36077254202273323}, 'combined': 0.2796418746779081, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34148299390826564, 'r': 0.3369471666647023, 'f1': 0.33919991754020656}, 'combined': 0.24993678134541536, 'epoch': 29} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3393104119093392, 'r': 0.37985301627350654, 'f1': 0.358438926023916}, 'combined': 0.27783304313815504, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34001213859732793, 'r': 0.32968918941790243, 'f1': 0.33477110370565427}, 'combined': 0.24667344483574524, 'epoch': 29} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34157155234650616, 'r': 0.36386160769264764, 'f1': 0.3523644229368942}, 'combined': 0.2731245766305113, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.385, 'r': 0.275, 'f1': 0.32083333333333336}, 'combined': 0.2138888888888889, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.37, 'r': 0.40217391304347827, 'f1': 0.38541666666666663}, 'combined': 0.19270833333333331, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.20689655172413793, 'f1': 0.31578947368421056}, 'combined': 0.2105263157894737, 'epoch': 29} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34001213859732793, 'r': 0.32968918941790243, 'f1': 0.33477110370565427}, 'combined': 0.24667344483574524, 'epoch': 29} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34157155234650616, 'r': 0.36386160769264764, 'f1': 0.3523644229368942}, 'combined': 0.2731245766305113, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.20689655172413793, 'f1': 0.31578947368421056}, 'combined': 0.2105263157894737, 'epoch': 29} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:41:14.467791: step: 2/463, loss: 0.003348055062815547 2023-01-24 03:41:15.150209: step: 4/463, loss: 0.046765830367803574 2023-01-24 03:41:15.831616: step: 6/463, loss: 0.031149689108133316 2023-01-24 03:41:16.436424: step: 8/463, loss: 0.012425611726939678 2023-01-24 03:41:17.090225: step: 10/463, loss: 0.0006921354215592146 2023-01-24 03:41:17.713006: step: 12/463, loss: 0.06397475302219391 2023-01-24 03:41:18.384325: step: 14/463, loss: 0.08447199314832687 2023-01-24 03:41:19.007091: step: 16/463, loss: 0.006886500399559736 2023-01-24 03:41:19.679160: step: 18/463, loss: 0.010859581641852856 2023-01-24 03:41:20.279098: step: 20/463, loss: 0.06130557134747505 2023-01-24 03:41:20.842675: step: 22/463, loss: 0.0675460547208786 2023-01-24 03:41:21.509920: step: 24/463, loss: 0.007639497518539429 2023-01-24 03:41:22.092448: step: 26/463, loss: 0.001132871606387198 2023-01-24 03:41:22.748866: step: 28/463, loss: 0.00405964395031333 2023-01-24 03:41:23.364984: step: 30/463, loss: 0.04965794086456299 2023-01-24 03:41:23.955277: step: 32/463, loss: 0.011324339546263218 2023-01-24 03:41:24.598043: step: 34/463, loss: 0.048247307538986206 2023-01-24 03:41:25.185654: step: 36/463, loss: 0.008319674991071224 2023-01-24 03:41:25.781775: step: 38/463, loss: 0.015220372937619686 2023-01-24 03:41:26.354229: step: 40/463, loss: 0.08465173840522766 2023-01-24 03:41:26.982741: step: 42/463, loss: 0.01284420769661665 2023-01-24 03:41:27.624312: step: 44/463, loss: 0.07054094225168228 2023-01-24 03:41:28.307253: step: 46/463, loss: 0.04635622352361679 2023-01-24 03:41:28.873660: step: 48/463, loss: 0.007711169775575399 2023-01-24 03:41:29.520243: step: 50/463, loss: 0.009285827167332172 2023-01-24 03:41:30.095395: step: 52/463, loss: 0.012208811938762665 2023-01-24 03:41:30.717099: step: 54/463, loss: 0.0004118823853787035 2023-01-24 03:41:31.316486: step: 56/463, loss: 0.24255159497261047 2023-01-24 03:41:31.941103: step: 58/463, loss: 0.028750255703926086 2023-01-24 03:41:32.537333: step: 60/463, loss: 0.021993907168507576 2023-01-24 03:41:33.265798: step: 62/463, loss: 0.014323953539133072 2023-01-24 03:41:33.848516: step: 64/463, loss: 0.003018668619915843 2023-01-24 03:41:34.462026: step: 66/463, loss: 0.02776459790766239 2023-01-24 03:41:35.081046: step: 68/463, loss: 0.0007424212526530027 2023-01-24 03:41:35.686957: step: 70/463, loss: 0.009312381967902184 2023-01-24 03:41:36.326641: step: 72/463, loss: 0.00380902411416173 2023-01-24 03:41:37.013720: step: 74/463, loss: 0.011261909268796444 2023-01-24 03:41:37.712811: step: 76/463, loss: 0.029278485104441643 2023-01-24 03:41:38.346995: step: 78/463, loss: 0.018031824380159378 2023-01-24 03:41:39.038878: step: 80/463, loss: 0.023070020601153374 2023-01-24 03:41:39.604726: step: 82/463, loss: 0.0025981683284044266 2023-01-24 03:41:40.285893: step: 84/463, loss: 0.030428972095251083 2023-01-24 03:41:40.891429: step: 86/463, loss: 0.002118075033649802 2023-01-24 03:41:41.514255: step: 88/463, loss: 0.13838376104831696 2023-01-24 03:41:42.150405: step: 90/463, loss: 0.5376948714256287 2023-01-24 03:41:42.822263: step: 92/463, loss: 0.007030895445495844 2023-01-24 03:41:43.388990: step: 94/463, loss: 0.0059221284464001656 2023-01-24 03:41:44.006044: step: 96/463, loss: 0.02351825311779976 2023-01-24 03:41:44.592819: step: 98/463, loss: 0.0036061201244592667 2023-01-24 03:41:45.226898: step: 100/463, loss: 0.018816350027918816 2023-01-24 03:41:45.868968: step: 102/463, loss: 0.014140629209578037 2023-01-24 03:41:46.482747: step: 104/463, loss: 0.008114437572658062 2023-01-24 03:41:47.065003: step: 106/463, loss: 0.05228383466601372 2023-01-24 03:41:47.700811: step: 108/463, loss: 0.004296346101909876 2023-01-24 03:41:48.267708: step: 110/463, loss: 0.017928671091794968 2023-01-24 03:41:48.822891: step: 112/463, loss: 0.03456459566950798 2023-01-24 03:41:49.482336: step: 114/463, loss: 0.3317072093486786 2023-01-24 03:41:50.151248: step: 116/463, loss: 0.0032351817935705185 2023-01-24 03:41:50.850949: step: 118/463, loss: 0.005768848583102226 2023-01-24 03:41:51.478197: step: 120/463, loss: 0.00055794877698645 2023-01-24 03:41:52.091398: step: 122/463, loss: 0.022919733077287674 2023-01-24 03:41:52.682920: step: 124/463, loss: 0.020538704469799995 2023-01-24 03:41:53.230536: step: 126/463, loss: 0.003932601306587458 2023-01-24 03:41:53.851250: step: 128/463, loss: 0.028072243556380272 2023-01-24 03:41:54.433725: step: 130/463, loss: 0.0019172925967723131 2023-01-24 03:41:55.071734: step: 132/463, loss: 0.020364809781312943 2023-01-24 03:41:55.658253: step: 134/463, loss: 0.00419685710221529 2023-01-24 03:41:56.246717: step: 136/463, loss: 0.017741378396749496 2023-01-24 03:41:56.945412: step: 138/463, loss: 0.005836400203406811 2023-01-24 03:41:57.529436: step: 140/463, loss: 0.003543427214026451 2023-01-24 03:41:58.126536: step: 142/463, loss: 0.012756900861859322 2023-01-24 03:41:58.744038: step: 144/463, loss: 0.39636000990867615 2023-01-24 03:41:59.344339: step: 146/463, loss: 0.017988160252571106 2023-01-24 03:42:00.000895: step: 148/463, loss: 0.02115839160978794 2023-01-24 03:42:00.603786: step: 150/463, loss: 0.005877521354705095 2023-01-24 03:42:01.232679: step: 152/463, loss: 0.3393757939338684 2023-01-24 03:42:01.802918: step: 154/463, loss: 0.01985025405883789 2023-01-24 03:42:02.403272: step: 156/463, loss: 0.0031463720370084047 2023-01-24 03:42:02.984904: step: 158/463, loss: 0.017887214198708534 2023-01-24 03:42:03.577255: step: 160/463, loss: 0.003854408860206604 2023-01-24 03:42:04.168470: step: 162/463, loss: 0.0056409770622849464 2023-01-24 03:42:04.734582: step: 164/463, loss: 0.08588533103466034 2023-01-24 03:42:05.330069: step: 166/463, loss: 0.014338796958327293 2023-01-24 03:42:05.984056: step: 168/463, loss: 0.040356457233428955 2023-01-24 03:42:06.580320: step: 170/463, loss: 0.022640036419034004 2023-01-24 03:42:07.194224: step: 172/463, loss: 0.02104882337152958 2023-01-24 03:42:07.782457: step: 174/463, loss: 0.0011047740699723363 2023-01-24 03:42:08.390935: step: 176/463, loss: 0.011188049800693989 2023-01-24 03:42:09.016192: step: 178/463, loss: 0.0005825217231176794 2023-01-24 03:42:09.680637: step: 180/463, loss: 0.012243734672665596 2023-01-24 03:42:10.308627: step: 182/463, loss: 0.04197511076927185 2023-01-24 03:42:10.897194: step: 184/463, loss: 0.035589613020420074 2023-01-24 03:42:11.448386: step: 186/463, loss: 0.01662350259721279 2023-01-24 03:42:12.082301: step: 188/463, loss: 0.0038081782404333353 2023-01-24 03:42:12.648583: step: 190/463, loss: 0.011021867394447327 2023-01-24 03:42:13.192696: step: 192/463, loss: 8.040751708904281e-05 2023-01-24 03:42:13.781429: step: 194/463, loss: 0.036171261221170425 2023-01-24 03:42:14.382699: step: 196/463, loss: 0.05074598640203476 2023-01-24 03:42:14.994263: step: 198/463, loss: 0.004794722888618708 2023-01-24 03:42:15.559042: step: 200/463, loss: 0.005619929172098637 2023-01-24 03:42:16.124558: step: 202/463, loss: 0.02185971662402153 2023-01-24 03:42:16.708752: step: 204/463, loss: 0.006833468563854694 2023-01-24 03:42:17.275254: step: 206/463, loss: 0.003926296252757311 2023-01-24 03:42:17.895999: step: 208/463, loss: 0.008927463553845882 2023-01-24 03:42:18.487971: step: 210/463, loss: 0.01127215102314949 2023-01-24 03:42:19.151439: step: 212/463, loss: 1.1639524698257446 2023-01-24 03:42:19.791162: step: 214/463, loss: 0.04122161865234375 2023-01-24 03:42:20.384340: step: 216/463, loss: 0.07138665020465851 2023-01-24 03:42:20.994693: step: 218/463, loss: 0.00013343404862098396 2023-01-24 03:42:21.583630: step: 220/463, loss: 0.0028880152385681868 2023-01-24 03:42:22.208053: step: 222/463, loss: 0.04937922582030296 2023-01-24 03:42:22.815768: step: 224/463, loss: 0.029749706387519836 2023-01-24 03:42:23.405917: step: 226/463, loss: 0.08724698424339294 2023-01-24 03:42:24.060230: step: 228/463, loss: 0.32674795389175415 2023-01-24 03:42:24.735393: step: 230/463, loss: 0.0015274988254532218 2023-01-24 03:42:25.324647: step: 232/463, loss: 0.004527280107140541 2023-01-24 03:42:25.916088: step: 234/463, loss: 0.1155090406537056 2023-01-24 03:42:26.616956: step: 236/463, loss: 0.025656316429376602 2023-01-24 03:42:27.198150: step: 238/463, loss: 0.03855903819203377 2023-01-24 03:42:27.811788: step: 240/463, loss: 0.00015012556104920805 2023-01-24 03:42:28.441569: step: 242/463, loss: 0.009756055660545826 2023-01-24 03:42:29.160981: step: 244/463, loss: 0.2989869713783264 2023-01-24 03:42:29.797262: step: 246/463, loss: 0.02781127206981182 2023-01-24 03:42:30.402943: step: 248/463, loss: 0.029746340587735176 2023-01-24 03:42:31.124565: step: 250/463, loss: 0.015172197483479977 2023-01-24 03:42:31.807888: step: 252/463, loss: 0.003679451998323202 2023-01-24 03:42:32.429634: step: 254/463, loss: 0.000615073717199266 2023-01-24 03:42:33.037908: step: 256/463, loss: 0.014317273162305355 2023-01-24 03:42:33.685948: step: 258/463, loss: 0.0034707149025052786 2023-01-24 03:42:34.258163: step: 260/463, loss: 0.06441783159971237 2023-01-24 03:42:34.873776: step: 262/463, loss: 0.0009400406270287931 2023-01-24 03:42:35.429395: step: 264/463, loss: 0.020266558974981308 2023-01-24 03:42:36.009829: step: 266/463, loss: 0.0024563022889196873 2023-01-24 03:42:36.621278: step: 268/463, loss: 0.010296055115759373 2023-01-24 03:42:37.270615: step: 270/463, loss: 0.011542663909494877 2023-01-24 03:42:37.886218: step: 272/463, loss: 0.10954266041517258 2023-01-24 03:42:38.455981: step: 274/463, loss: 0.005849192384630442 2023-01-24 03:42:39.118538: step: 276/463, loss: 0.010167590342462063 2023-01-24 03:42:39.747380: step: 278/463, loss: 0.003906392026692629 2023-01-24 03:42:40.371061: step: 280/463, loss: 0.02463553659617901 2023-01-24 03:42:41.039606: step: 282/463, loss: 0.020623067393898964 2023-01-24 03:42:41.652165: step: 284/463, loss: 0.0025998291093856096 2023-01-24 03:42:42.181523: step: 286/463, loss: 0.004346669185906649 2023-01-24 03:42:42.752290: step: 288/463, loss: 0.03985128179192543 2023-01-24 03:42:43.378313: step: 290/463, loss: 0.02054804004728794 2023-01-24 03:42:44.009988: step: 292/463, loss: 0.02744104154407978 2023-01-24 03:42:44.610274: step: 294/463, loss: 0.0021334406919777393 2023-01-24 03:42:45.160818: step: 296/463, loss: 0.005076550878584385 2023-01-24 03:42:45.878565: step: 298/463, loss: 0.013938345946371555 2023-01-24 03:42:46.479255: step: 300/463, loss: 0.032830264419317245 2023-01-24 03:42:47.091044: step: 302/463, loss: 0.008370975032448769 2023-01-24 03:42:47.628969: step: 304/463, loss: 0.0020224181935191154 2023-01-24 03:42:48.281870: step: 306/463, loss: 0.04459505155682564 2023-01-24 03:42:48.937977: step: 308/463, loss: 0.013771029189229012 2023-01-24 03:42:49.578687: step: 310/463, loss: 0.015615686774253845 2023-01-24 03:42:50.201858: step: 312/463, loss: 0.03839581832289696 2023-01-24 03:42:50.825291: step: 314/463, loss: 0.01638411357998848 2023-01-24 03:42:51.407662: step: 316/463, loss: 0.021316595375537872 2023-01-24 03:42:51.995128: step: 318/463, loss: 0.15528523921966553 2023-01-24 03:42:52.618831: step: 320/463, loss: 0.00015696136688347906 2023-01-24 03:42:53.198416: step: 322/463, loss: 0.004515354055911303 2023-01-24 03:42:53.894559: step: 324/463, loss: 0.00995574425905943 2023-01-24 03:42:54.513417: step: 326/463, loss: 0.021659819409251213 2023-01-24 03:42:55.163341: step: 328/463, loss: 0.007264286279678345 2023-01-24 03:42:55.750325: step: 330/463, loss: 0.01879897527396679 2023-01-24 03:42:56.348057: step: 332/463, loss: 0.019653448835015297 2023-01-24 03:42:56.943404: step: 334/463, loss: 0.021654561161994934 2023-01-24 03:42:57.560448: step: 336/463, loss: 0.004446979146450758 2023-01-24 03:42:58.160616: step: 338/463, loss: 0.010496840812265873 2023-01-24 03:42:58.832319: step: 340/463, loss: 0.007603057660162449 2023-01-24 03:42:59.527798: step: 342/463, loss: 0.005591457709670067 2023-01-24 03:43:00.124672: step: 344/463, loss: 0.005775043275207281 2023-01-24 03:43:00.789257: step: 346/463, loss: 0.022768402472138405 2023-01-24 03:43:01.407333: step: 348/463, loss: 0.00039576523704454303 2023-01-24 03:43:02.033057: step: 350/463, loss: 0.018127653747797012 2023-01-24 03:43:02.656814: step: 352/463, loss: 0.001632579485885799 2023-01-24 03:43:03.240580: step: 354/463, loss: 0.011940563097596169 2023-01-24 03:43:03.834437: step: 356/463, loss: 0.030893150717020035 2023-01-24 03:43:04.472997: step: 358/463, loss: 0.01903459057211876 2023-01-24 03:43:05.144386: step: 360/463, loss: 0.015471003018319607 2023-01-24 03:43:05.707895: step: 362/463, loss: 0.05312786251306534 2023-01-24 03:43:06.295993: step: 364/463, loss: 0.004520105198025703 2023-01-24 03:43:06.935221: step: 366/463, loss: 0.00021048002236057073 2023-01-24 03:43:07.561009: step: 368/463, loss: 0.06298676878213882 2023-01-24 03:43:08.310595: step: 370/463, loss: 0.05269232019782066 2023-01-24 03:43:08.879832: step: 372/463, loss: 0.03298819810152054 2023-01-24 03:43:09.409299: step: 374/463, loss: 0.0004433818394318223 2023-01-24 03:43:09.982403: step: 376/463, loss: 0.016590416431427002 2023-01-24 03:43:10.584528: step: 378/463, loss: 0.08264383673667908 2023-01-24 03:43:11.200070: step: 380/463, loss: 0.02032715082168579 2023-01-24 03:43:11.868407: step: 382/463, loss: 0.0247122123837471 2023-01-24 03:43:12.484235: step: 384/463, loss: 0.00149907183367759 2023-01-24 03:43:13.106891: step: 386/463, loss: 0.053618915379047394 2023-01-24 03:43:13.736699: step: 388/463, loss: 0.014058658853173256 2023-01-24 03:43:14.361130: step: 390/463, loss: 0.015364853665232658 2023-01-24 03:43:14.964440: step: 392/463, loss: 0.013582360930740833 2023-01-24 03:43:15.608718: step: 394/463, loss: 0.0031859998125582933 2023-01-24 03:43:16.246824: step: 396/463, loss: 0.021418336778879166 2023-01-24 03:43:16.829298: step: 398/463, loss: 0.005219413433223963 2023-01-24 03:43:17.437685: step: 400/463, loss: 0.09055787324905396 2023-01-24 03:43:18.080971: step: 402/463, loss: 0.0020374557934701443 2023-01-24 03:43:18.693006: step: 404/463, loss: 0.003488131333142519 2023-01-24 03:43:19.326610: step: 406/463, loss: 0.015424426645040512 2023-01-24 03:43:20.070119: step: 408/463, loss: 0.006992554757744074 2023-01-24 03:43:20.667326: step: 410/463, loss: 0.001312085660174489 2023-01-24 03:43:21.345155: step: 412/463, loss: 0.1532529592514038 2023-01-24 03:43:21.959157: step: 414/463, loss: 0.03673427551984787 2023-01-24 03:43:22.620521: step: 416/463, loss: 0.10569197684526443 2023-01-24 03:43:23.249684: step: 418/463, loss: 0.03797304630279541 2023-01-24 03:43:23.836352: step: 420/463, loss: 0.002807617885991931 2023-01-24 03:43:24.423927: step: 422/463, loss: 0.1057528704404831 2023-01-24 03:43:24.996248: step: 424/463, loss: 0.014239491894841194 2023-01-24 03:43:25.636862: step: 426/463, loss: 0.01293495949357748 2023-01-24 03:43:26.298085: step: 428/463, loss: 0.023818783462047577 2023-01-24 03:43:26.914339: step: 430/463, loss: 0.14168071746826172 2023-01-24 03:43:27.508883: step: 432/463, loss: 0.04936476796865463 2023-01-24 03:43:28.141671: step: 434/463, loss: 0.618794858455658 2023-01-24 03:43:28.758150: step: 436/463, loss: 0.003481074469164014 2023-01-24 03:43:29.406723: step: 438/463, loss: 1.2794694900512695 2023-01-24 03:43:29.998001: step: 440/463, loss: 0.013472139835357666 2023-01-24 03:43:30.656348: step: 442/463, loss: 0.0053153629414737225 2023-01-24 03:43:31.234075: step: 444/463, loss: 0.009222879074513912 2023-01-24 03:43:31.846823: step: 446/463, loss: 0.004356713034212589 2023-01-24 03:43:32.660445: step: 448/463, loss: 0.034813858568668365 2023-01-24 03:43:33.256901: step: 450/463, loss: 0.002519553294405341 2023-01-24 03:43:33.902188: step: 452/463, loss: 0.01727062091231346 2023-01-24 03:43:34.527639: step: 454/463, loss: 0.008006599731743336 2023-01-24 03:43:35.110577: step: 456/463, loss: 0.37737908959388733 2023-01-24 03:43:35.716089: step: 458/463, loss: 0.010549294762313366 2023-01-24 03:43:36.311561: step: 460/463, loss: 0.007140059489756823 2023-01-24 03:43:36.999710: step: 462/463, loss: 0.004069739021360874 2023-01-24 03:43:37.580094: step: 464/463, loss: 0.002762184012681246 2023-01-24 03:43:38.210457: step: 466/463, loss: 0.03549811616539955 2023-01-24 03:43:38.828187: step: 468/463, loss: 0.0002543667796999216 2023-01-24 03:43:39.435508: step: 470/463, loss: 0.592303991317749 2023-01-24 03:43:40.031098: step: 472/463, loss: 0.013420642353594303 2023-01-24 03:43:40.674343: step: 474/463, loss: 0.12643814086914062 2023-01-24 03:43:41.264496: step: 476/463, loss: 0.0009326456347480416 2023-01-24 03:43:41.901125: step: 478/463, loss: 0.2759227752685547 2023-01-24 03:43:42.451403: step: 480/463, loss: 0.06930850446224213 2023-01-24 03:43:43.067978: step: 482/463, loss: 0.0386422723531723 2023-01-24 03:43:43.656726: step: 484/463, loss: 14.120134353637695 2023-01-24 03:43:44.243634: step: 486/463, loss: 0.05389359965920448 2023-01-24 03:43:44.908965: step: 488/463, loss: 0.07896668463945389 2023-01-24 03:43:45.524220: step: 490/463, loss: 0.04704621061682701 2023-01-24 03:43:46.104652: step: 492/463, loss: 0.012464272789657116 2023-01-24 03:43:46.687874: step: 494/463, loss: 0.00142408418469131 2023-01-24 03:43:47.293662: step: 496/463, loss: 0.07210279256105423 2023-01-24 03:43:47.912723: step: 498/463, loss: 0.009681577794253826 2023-01-24 03:43:48.623444: step: 500/463, loss: 0.08222053200006485 2023-01-24 03:43:49.273480: step: 502/463, loss: 0.0025594925973564386 2023-01-24 03:43:49.849994: step: 504/463, loss: 0.012416169978678226 2023-01-24 03:43:50.410008: step: 506/463, loss: 0.027304846793413162 2023-01-24 03:43:51.013419: step: 508/463, loss: 0.056054968386888504 2023-01-24 03:43:51.651309: step: 510/463, loss: 0.03493053838610649 2023-01-24 03:43:52.317866: step: 512/463, loss: 0.011331530287861824 2023-01-24 03:43:52.937851: step: 514/463, loss: 0.017196929082274437 2023-01-24 03:43:53.579438: step: 516/463, loss: 0.1067686527967453 2023-01-24 03:43:54.174518: step: 518/463, loss: 0.0014026375720277429 2023-01-24 03:43:54.809997: step: 520/463, loss: 0.9607723951339722 2023-01-24 03:43:55.521756: step: 522/463, loss: 0.01363272126764059 2023-01-24 03:43:56.131149: step: 524/463, loss: 0.015428583137691021 2023-01-24 03:43:56.765053: step: 526/463, loss: 0.012432626448571682 2023-01-24 03:43:57.430560: step: 528/463, loss: 0.05134475976228714 2023-01-24 03:43:58.072152: step: 530/463, loss: 0.0033621059264987707 2023-01-24 03:43:58.667564: step: 532/463, loss: 0.0013458256144076586 2023-01-24 03:43:59.247318: step: 534/463, loss: 0.015177948400378227 2023-01-24 03:43:59.853564: step: 536/463, loss: 0.11847135424613953 2023-01-24 03:44:00.416812: step: 538/463, loss: 1.146825889009051e-05 2023-01-24 03:44:01.062180: step: 540/463, loss: 0.01634560152888298 2023-01-24 03:44:01.686808: step: 542/463, loss: 0.01712670549750328 2023-01-24 03:44:02.302207: step: 544/463, loss: 0.0015381629345938563 2023-01-24 03:44:02.963663: step: 546/463, loss: 1.8892041444778442 2023-01-24 03:44:03.579775: step: 548/463, loss: 0.00919247604906559 2023-01-24 03:44:04.155681: step: 550/463, loss: 0.004723436664789915 2023-01-24 03:44:04.730494: step: 552/463, loss: 0.00408986397087574 2023-01-24 03:44:05.280842: step: 554/463, loss: 0.00014379460480995476 2023-01-24 03:44:05.861728: step: 556/463, loss: 0.01637520268559456 2023-01-24 03:44:06.462975: step: 558/463, loss: 0.011019648984074593 2023-01-24 03:44:07.066092: step: 560/463, loss: 0.0492401085793972 2023-01-24 03:44:07.671341: step: 562/463, loss: 0.09095236659049988 2023-01-24 03:44:08.271357: step: 564/463, loss: 0.008499075658619404 2023-01-24 03:44:08.968498: step: 566/463, loss: 0.03640542924404144 2023-01-24 03:44:09.621723: step: 568/463, loss: 0.0003638894122559577 2023-01-24 03:44:10.258431: step: 570/463, loss: 0.0014382230583578348 2023-01-24 03:44:10.877990: step: 572/463, loss: 0.05100620165467262 2023-01-24 03:44:11.562684: step: 574/463, loss: 0.007858376018702984 2023-01-24 03:44:12.232019: step: 576/463, loss: 0.055623859167099 2023-01-24 03:44:12.894761: step: 578/463, loss: 0.0013281474821269512 2023-01-24 03:44:13.505447: step: 580/463, loss: 0.019869204610586166 2023-01-24 03:44:14.126258: step: 582/463, loss: 0.0028137112967669964 2023-01-24 03:44:14.805417: step: 584/463, loss: 0.2470996379852295 2023-01-24 03:44:15.472090: step: 586/463, loss: 0.5812039375305176 2023-01-24 03:44:16.085701: step: 588/463, loss: 0.03611539304256439 2023-01-24 03:44:16.768928: step: 590/463, loss: 0.011885772459208965 2023-01-24 03:44:17.371469: step: 592/463, loss: 0.01397947408258915 2023-01-24 03:44:18.120643: step: 594/463, loss: 0.12583327293395996 2023-01-24 03:44:18.718744: step: 596/463, loss: 0.0013787749921903014 2023-01-24 03:44:19.356012: step: 598/463, loss: 4.441771507263184 2023-01-24 03:44:19.994603: step: 600/463, loss: 0.07939635217189789 2023-01-24 03:44:20.588399: step: 602/463, loss: 0.04025167599320412 2023-01-24 03:44:21.173928: step: 604/463, loss: 0.061051249504089355 2023-01-24 03:44:21.805465: step: 606/463, loss: 0.7362342476844788 2023-01-24 03:44:22.359681: step: 608/463, loss: 0.06517346203327179 2023-01-24 03:44:23.009379: step: 610/463, loss: 0.046322111040353775 2023-01-24 03:44:23.576439: step: 612/463, loss: 0.031612757593393326 2023-01-24 03:44:24.174713: step: 614/463, loss: 0.02285258285701275 2023-01-24 03:44:24.746202: step: 616/463, loss: 0.0009958171285688877 2023-01-24 03:44:25.357858: step: 618/463, loss: 0.45432621240615845 2023-01-24 03:44:25.957023: step: 620/463, loss: 0.00349188013933599 2023-01-24 03:44:26.558036: step: 622/463, loss: 0.0024540708400309086 2023-01-24 03:44:27.184056: step: 624/463, loss: 0.008386512286961079 2023-01-24 03:44:27.870546: step: 626/463, loss: 0.015294750221073627 2023-01-24 03:44:28.493286: step: 628/463, loss: 0.0017503045964986086 2023-01-24 03:44:29.094161: step: 630/463, loss: 0.08070531487464905 2023-01-24 03:44:29.704757: step: 632/463, loss: 0.0030635695438832045 2023-01-24 03:44:30.329847: step: 634/463, loss: 0.08945896476507187 2023-01-24 03:44:30.964895: step: 636/463, loss: 0.022937500849366188 2023-01-24 03:44:31.573554: step: 638/463, loss: 0.052769217640161514 2023-01-24 03:44:32.215987: step: 640/463, loss: 0.042901817709207535 2023-01-24 03:44:32.775890: step: 642/463, loss: 0.07390157133340836 2023-01-24 03:44:33.409851: step: 644/463, loss: 0.047893062233924866 2023-01-24 03:44:33.987055: step: 646/463, loss: 0.05383450910449028 2023-01-24 03:44:34.551022: step: 648/463, loss: 0.008620602078735828 2023-01-24 03:44:35.284666: step: 650/463, loss: 0.18091782927513123 2023-01-24 03:44:35.924866: step: 652/463, loss: 0.021911179646849632 2023-01-24 03:44:36.547496: step: 654/463, loss: 0.000647700042463839 2023-01-24 03:44:37.105953: step: 656/463, loss: 0.001276754424907267 2023-01-24 03:44:37.680561: step: 658/463, loss: 0.03981545567512512 2023-01-24 03:44:38.266896: step: 660/463, loss: 0.005832474213093519 2023-01-24 03:44:38.932846: step: 662/463, loss: 0.011179996654391289 2023-01-24 03:44:39.536778: step: 664/463, loss: 0.03047151304781437 2023-01-24 03:44:40.145814: step: 666/463, loss: 0.0040741669945418835 2023-01-24 03:44:40.702697: step: 668/463, loss: 0.00031125021632760763 2023-01-24 03:44:41.355111: step: 670/463, loss: 0.06128876283764839 2023-01-24 03:44:41.952651: step: 672/463, loss: 0.03009014017879963 2023-01-24 03:44:42.527305: step: 674/463, loss: 0.0019778709392994642 2023-01-24 03:44:43.155378: step: 676/463, loss: 0.03799960017204285 2023-01-24 03:44:43.762189: step: 678/463, loss: 0.008525410667061806 2023-01-24 03:44:44.305527: step: 680/463, loss: 0.013243120163679123 2023-01-24 03:44:44.924501: step: 682/463, loss: 0.05975394323468208 2023-01-24 03:44:45.547596: step: 684/463, loss: 0.036906905472278595 2023-01-24 03:44:46.211574: step: 686/463, loss: 0.0022359774447977543 2023-01-24 03:44:46.813960: step: 688/463, loss: 0.023090695962309837 2023-01-24 03:44:47.386363: step: 690/463, loss: 0.022579804062843323 2023-01-24 03:44:47.950578: step: 692/463, loss: 0.03391774743795395 2023-01-24 03:44:48.572607: step: 694/463, loss: 0.03190687671303749 2023-01-24 03:44:49.180100: step: 696/463, loss: 0.0039575244300067425 2023-01-24 03:44:49.787373: step: 698/463, loss: 0.005288866814225912 2023-01-24 03:44:50.419074: step: 700/463, loss: 0.03183300420641899 2023-01-24 03:44:51.043112: step: 702/463, loss: 0.002325284993276 2023-01-24 03:44:51.720994: step: 704/463, loss: 0.010124661028385162 2023-01-24 03:44:52.360913: step: 706/463, loss: 0.027480168268084526 2023-01-24 03:44:53.027150: step: 708/463, loss: 0.01256866380572319 2023-01-24 03:44:53.693503: step: 710/463, loss: 0.03388833627104759 2023-01-24 03:44:54.323121: step: 712/463, loss: 0.07260380685329437 2023-01-24 03:44:54.954704: step: 714/463, loss: 0.005551299545913935 2023-01-24 03:44:55.607590: step: 716/463, loss: 0.020944247022271156 2023-01-24 03:44:56.185197: step: 718/463, loss: 0.01053232979029417 2023-01-24 03:44:56.783962: step: 720/463, loss: 0.0008032767800614238 2023-01-24 03:44:57.411309: step: 722/463, loss: 0.14156877994537354 2023-01-24 03:44:58.025916: step: 724/463, loss: 0.004391709342598915 2023-01-24 03:44:58.637107: step: 726/463, loss: 0.08217251300811768 2023-01-24 03:44:59.352384: step: 728/463, loss: 0.005677036941051483 2023-01-24 03:45:00.014339: step: 730/463, loss: 0.002708585700020194 2023-01-24 03:45:00.634209: step: 732/463, loss: 0.002403578255325556 2023-01-24 03:45:01.280660: step: 734/463, loss: 0.01208692230284214 2023-01-24 03:45:01.830358: step: 736/463, loss: 0.060735706239938736 2023-01-24 03:45:02.441790: step: 738/463, loss: 0.012099682353436947 2023-01-24 03:45:03.031588: step: 740/463, loss: 0.001410757889971137 2023-01-24 03:45:03.689153: step: 742/463, loss: 0.012133477255702019 2023-01-24 03:45:04.312069: step: 744/463, loss: 0.01876804232597351 2023-01-24 03:45:04.964706: step: 746/463, loss: 0.02310679480433464 2023-01-24 03:45:05.528961: step: 748/463, loss: 0.029150746762752533 2023-01-24 03:45:06.164942: step: 750/463, loss: 0.008996223099529743 2023-01-24 03:45:06.799867: step: 752/463, loss: 0.01346584502607584 2023-01-24 03:45:07.409210: step: 754/463, loss: 0.0063035194762051105 2023-01-24 03:45:08.026043: step: 756/463, loss: 0.07119446247816086 2023-01-24 03:45:08.613027: step: 758/463, loss: 0.007020119111984968 2023-01-24 03:45:09.267154: step: 760/463, loss: 0.04800300672650337 2023-01-24 03:45:09.856566: step: 762/463, loss: 0.06737812608480453 2023-01-24 03:45:10.545032: step: 764/463, loss: 0.020925460383296013 2023-01-24 03:45:11.129815: step: 766/463, loss: 0.012368453666567802 2023-01-24 03:45:11.736076: step: 768/463, loss: 0.006261227186769247 2023-01-24 03:45:12.329509: step: 770/463, loss: 0.0013962461380288005 2023-01-24 03:45:12.928174: step: 772/463, loss: 0.045866724103689194 2023-01-24 03:45:13.555161: step: 774/463, loss: 0.00868605449795723 2023-01-24 03:45:14.168777: step: 776/463, loss: 0.04266854003071785 2023-01-24 03:45:14.794957: step: 778/463, loss: 0.012076734565198421 2023-01-24 03:45:15.380521: step: 780/463, loss: 0.016845114529132843 2023-01-24 03:45:16.014001: step: 782/463, loss: 0.022558672353625298 2023-01-24 03:45:16.654016: step: 784/463, loss: 0.004800360184162855 2023-01-24 03:45:17.273337: step: 786/463, loss: 0.04612700641155243 2023-01-24 03:45:17.873667: step: 788/463, loss: 0.008846086449921131 2023-01-24 03:45:18.489259: step: 790/463, loss: 0.11185453832149506 2023-01-24 03:45:19.108030: step: 792/463, loss: 0.02231520414352417 2023-01-24 03:45:19.711299: step: 794/463, loss: 0.2917740046977997 2023-01-24 03:45:20.403691: step: 796/463, loss: 0.21534432470798492 2023-01-24 03:45:21.045693: step: 798/463, loss: 0.07309827208518982 2023-01-24 03:45:21.642744: step: 800/463, loss: 0.0015493407845497131 2023-01-24 03:45:22.374345: step: 802/463, loss: 0.006521427072584629 2023-01-24 03:45:23.014061: step: 804/463, loss: 0.015714384615421295 2023-01-24 03:45:23.661275: step: 806/463, loss: 0.008010848425328732 2023-01-24 03:45:24.280757: step: 808/463, loss: 0.006601187400519848 2023-01-24 03:45:24.844171: step: 810/463, loss: 0.035502608865499496 2023-01-24 03:45:25.433965: step: 812/463, loss: 0.03248879685997963 2023-01-24 03:45:26.050145: step: 814/463, loss: 0.025749389082193375 2023-01-24 03:45:26.709458: step: 816/463, loss: 0.04278761148452759 2023-01-24 03:45:27.266606: step: 818/463, loss: 0.008416598662734032 2023-01-24 03:45:27.872269: step: 820/463, loss: 0.13345743715763092 2023-01-24 03:45:28.460467: step: 822/463, loss: 0.014655270613729954 2023-01-24 03:45:29.037146: step: 824/463, loss: 0.0012383426073938608 2023-01-24 03:45:29.634609: step: 826/463, loss: 0.09054864197969437 2023-01-24 03:45:30.299901: step: 828/463, loss: 0.05785893276333809 2023-01-24 03:45:30.873722: step: 830/463, loss: 0.002178038004785776 2023-01-24 03:45:31.494942: step: 832/463, loss: 0.1174110695719719 2023-01-24 03:45:32.146169: step: 834/463, loss: 0.11237432062625885 2023-01-24 03:45:32.794077: step: 836/463, loss: 0.033386338502168655 2023-01-24 03:45:33.436582: step: 838/463, loss: 0.016350561752915382 2023-01-24 03:45:34.052818: step: 840/463, loss: 0.006214508321136236 2023-01-24 03:45:34.635161: step: 842/463, loss: 0.16392134130001068 2023-01-24 03:45:35.277409: step: 844/463, loss: 0.01186341792345047 2023-01-24 03:45:35.886563: step: 846/463, loss: 0.0068565248511731625 2023-01-24 03:45:36.486242: step: 848/463, loss: 0.11053220182657242 2023-01-24 03:45:37.084182: step: 850/463, loss: 0.05240786820650101 2023-01-24 03:45:37.637260: step: 852/463, loss: 0.035946860909461975 2023-01-24 03:45:38.251058: step: 854/463, loss: 0.13795296847820282 2023-01-24 03:45:38.870378: step: 856/463, loss: 0.029736429452896118 2023-01-24 03:45:39.591783: step: 858/463, loss: 0.028719386085867882 2023-01-24 03:45:40.134080: step: 860/463, loss: 0.0007364210323430598 2023-01-24 03:45:40.793591: step: 862/463, loss: 0.018374288454651833 2023-01-24 03:45:41.385691: step: 864/463, loss: 0.02100134640932083 2023-01-24 03:45:42.024140: step: 866/463, loss: 0.044303975999355316 2023-01-24 03:45:42.603418: step: 868/463, loss: 0.5984532833099365 2023-01-24 03:45:43.206181: step: 870/463, loss: 0.0006451236549764872 2023-01-24 03:45:43.867799: step: 872/463, loss: 0.0909319743514061 2023-01-24 03:45:44.540265: step: 874/463, loss: 0.0290652085095644 2023-01-24 03:45:45.204761: step: 876/463, loss: 0.00032575963996350765 2023-01-24 03:45:45.833499: step: 878/463, loss: 0.015450532548129559 2023-01-24 03:45:46.485606: step: 880/463, loss: 0.038711994886398315 2023-01-24 03:45:47.184463: step: 882/463, loss: 0.011028628796339035 2023-01-24 03:45:47.777379: step: 884/463, loss: 0.031230472028255463 2023-01-24 03:45:48.481532: step: 886/463, loss: 0.005912075750529766 2023-01-24 03:45:49.067569: step: 888/463, loss: 0.4749009311199188 2023-01-24 03:45:49.752231: step: 890/463, loss: 0.0033143230248242617 2023-01-24 03:45:50.268166: step: 892/463, loss: 0.011290629394352436 2023-01-24 03:45:50.852058: step: 894/463, loss: 0.017845887690782547 2023-01-24 03:45:51.424320: step: 896/463, loss: 0.040140196681022644 2023-01-24 03:45:52.009117: step: 898/463, loss: 0.4968484044075012 2023-01-24 03:45:52.583195: step: 900/463, loss: 0.022448042407631874 2023-01-24 03:45:53.218832: step: 902/463, loss: 0.00402583135291934 2023-01-24 03:45:53.852679: step: 904/463, loss: 0.07864254713058472 2023-01-24 03:45:54.453843: step: 906/463, loss: 0.004549562931060791 2023-01-24 03:45:55.042222: step: 908/463, loss: 0.003151519689708948 2023-01-24 03:45:55.629573: step: 910/463, loss: 0.015122473239898682 2023-01-24 03:45:56.233105: step: 912/463, loss: 0.013166156597435474 2023-01-24 03:45:56.919874: step: 914/463, loss: 0.07530917227268219 2023-01-24 03:45:57.531979: step: 916/463, loss: 0.013531996868550777 2023-01-24 03:45:58.085261: step: 918/463, loss: 0.03069126419723034 2023-01-24 03:45:58.712690: step: 920/463, loss: 0.0023237536661326885 2023-01-24 03:45:59.365292: step: 922/463, loss: 0.04988205060362816 2023-01-24 03:46:00.037149: step: 924/463, loss: 0.0030852099880576134 2023-01-24 03:46:00.713067: step: 926/463, loss: 0.02364422008395195 ================================================== Loss: 0.095 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36864482899763507, 'r': 0.33436855457470505, 'f1': 0.35067110101665583}, 'combined': 0.25838923232806216, 'epoch': 30} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3549289298648862, 'r': 0.3813528667390184, 'f1': 0.3676667425893239}, 'combined': 0.2849857047821554, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3366472126152833, 'r': 0.3372860118802079, 'f1': 0.3369663094992788}, 'combined': 0.24829096489420543, 'epoch': 30} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33731414202757226, 'r': 0.3850589746307397, 'f1': 0.35960872480536027}, 'combined': 0.2787397771218582, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3271700119096379, 'r': 0.3190994044052256, 'f1': 0.3230843153151852}, 'combined': 0.23806212707434699, 'epoch': 30} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3378778705583633, 'r': 0.37079611897673326, 'f1': 0.35357246051418556}, 'combined': 0.27406095025501465, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3577586206896552, 'r': 0.29642857142857143, 'f1': 0.32421875}, 'combined': 0.21614583333333331, 'epoch': 30} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.35, 'r': 0.3804347826086957, 'f1': 0.36458333333333337}, 'combined': 0.18229166666666669, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45454545454545453, 'r': 0.1724137931034483, 'f1': 0.25000000000000006}, 'combined': 0.16666666666666669, 'epoch': 30} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34001213859732793, 'r': 0.32968918941790243, 'f1': 0.33477110370565427}, 'combined': 0.24667344483574524, 'epoch': 29} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34157155234650616, 'r': 0.36386160769264764, 'f1': 0.3523644229368942}, 'combined': 0.2731245766305113, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.20689655172413793, 'f1': 0.31578947368421056}, 'combined': 0.2105263157894737, 'epoch': 29} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:48:34.884037: step: 2/463, loss: 0.004422870930284262 2023-01-24 03:48:35.462920: step: 4/463, loss: 0.0476013720035553 2023-01-24 03:48:36.038016: step: 6/463, loss: 0.03670978173613548 2023-01-24 03:48:36.667386: step: 8/463, loss: 0.01566462032496929 2023-01-24 03:48:37.249490: step: 10/463, loss: 0.013408592902123928 2023-01-24 03:48:37.910398: step: 12/463, loss: 0.025252968072891235 2023-01-24 03:48:38.482438: step: 14/463, loss: 0.006519390270113945 2023-01-24 03:48:39.032543: step: 16/463, loss: 0.007219681050628424 2023-01-24 03:48:39.696340: step: 18/463, loss: 0.03251561149954796 2023-01-24 03:48:40.354537: step: 20/463, loss: 0.03944979980587959 2023-01-24 03:48:40.953606: step: 22/463, loss: 0.0031611749436706305 2023-01-24 03:48:41.531360: step: 24/463, loss: 0.014545641839504242 2023-01-24 03:48:42.111600: step: 26/463, loss: 0.011168266646564007 2023-01-24 03:48:42.737696: step: 28/463, loss: 0.0022661034017801285 2023-01-24 03:48:43.395170: step: 30/463, loss: 0.032148342579603195 2023-01-24 03:48:43.963636: step: 32/463, loss: 0.001172112999483943 2023-01-24 03:48:44.581103: step: 34/463, loss: 0.012198339216411114 2023-01-24 03:48:45.308604: step: 36/463, loss: 0.002454516477882862 2023-01-24 03:48:45.911757: step: 38/463, loss: 0.051385894417762756 2023-01-24 03:48:46.585958: step: 40/463, loss: 0.02402893267571926 2023-01-24 03:48:47.201991: step: 42/463, loss: 0.13218927383422852 2023-01-24 03:48:47.933201: step: 44/463, loss: 0.0474386140704155 2023-01-24 03:48:48.621510: step: 46/463, loss: 0.002726589096710086 2023-01-24 03:48:49.198028: step: 48/463, loss: 0.005372941493988037 2023-01-24 03:48:49.800169: step: 50/463, loss: 0.04420856386423111 2023-01-24 03:48:50.443757: step: 52/463, loss: 0.01096365787088871 2023-01-24 03:48:51.065963: step: 54/463, loss: 0.016338463872671127 2023-01-24 03:48:51.712621: step: 56/463, loss: 0.013118507340550423 2023-01-24 03:48:52.365156: step: 58/463, loss: 0.0031445943750441074 2023-01-24 03:48:52.950037: step: 60/463, loss: 0.004941543098539114 2023-01-24 03:48:53.591441: step: 62/463, loss: 0.022115424275398254 2023-01-24 03:48:54.278738: step: 64/463, loss: 0.03489331528544426 2023-01-24 03:48:54.909324: step: 66/463, loss: 0.0438426248729229 2023-01-24 03:48:55.490917: step: 68/463, loss: 0.3694595396518707 2023-01-24 03:48:56.119068: step: 70/463, loss: 0.008282233029603958 2023-01-24 03:48:56.712826: step: 72/463, loss: 0.009963375516235828 2023-01-24 03:48:57.286376: step: 74/463, loss: 0.047709763050079346 2023-01-24 03:48:57.854241: step: 76/463, loss: 0.34061020612716675 2023-01-24 03:48:58.420877: step: 78/463, loss: 0.0006715833442285657 2023-01-24 03:48:59.011334: step: 80/463, loss: 0.0015674851601943374 2023-01-24 03:48:59.658676: step: 82/463, loss: 0.07726532965898514 2023-01-24 03:49:00.245791: step: 84/463, loss: 0.03366328775882721 2023-01-24 03:49:00.874433: step: 86/463, loss: 0.004537722561508417 2023-01-24 03:49:01.556891: step: 88/463, loss: 0.0054290262050926685 2023-01-24 03:49:02.156327: step: 90/463, loss: 0.029591098427772522 2023-01-24 03:49:02.724430: step: 92/463, loss: 0.0003665934782475233 2023-01-24 03:49:03.384793: step: 94/463, loss: 0.009631955996155739 2023-01-24 03:49:03.955329: step: 96/463, loss: 0.027263300493359566 2023-01-24 03:49:04.573352: step: 98/463, loss: 0.007477788254618645 2023-01-24 03:49:05.171831: step: 100/463, loss: 0.012094361707568169 2023-01-24 03:49:05.854181: step: 102/463, loss: 0.0550723671913147 2023-01-24 03:49:06.500537: step: 104/463, loss: 0.10447590053081512 2023-01-24 03:49:07.067915: step: 106/463, loss: 0.008263484574854374 2023-01-24 03:49:07.674956: step: 108/463, loss: 0.2188422679901123 2023-01-24 03:49:08.306216: step: 110/463, loss: 0.005995317827910185 2023-01-24 03:49:08.866855: step: 112/463, loss: 0.06964045763015747 2023-01-24 03:49:09.460650: step: 114/463, loss: 7.80285699875094e-05 2023-01-24 03:49:10.121177: step: 116/463, loss: 0.005388882476836443 2023-01-24 03:49:10.725676: step: 118/463, loss: 0.030760588124394417 2023-01-24 03:49:11.378703: step: 120/463, loss: 0.00666368193924427 2023-01-24 03:49:11.931826: step: 122/463, loss: 0.03842487558722496 2023-01-24 03:49:12.544822: step: 124/463, loss: 0.007111882790923119 2023-01-24 03:49:13.165381: step: 126/463, loss: 0.003897252958267927 2023-01-24 03:49:13.756724: step: 128/463, loss: 0.0006882630405016243 2023-01-24 03:49:14.368459: step: 130/463, loss: 0.007725379429757595 2023-01-24 03:49:14.951880: step: 132/463, loss: 0.0006098590674810112 2023-01-24 03:49:15.547436: step: 134/463, loss: 0.013623587787151337 2023-01-24 03:49:16.140268: step: 136/463, loss: 0.0449017770588398 2023-01-24 03:49:16.742638: step: 138/463, loss: 0.017932362854480743 2023-01-24 03:49:17.306381: step: 140/463, loss: 0.027936631813645363 2023-01-24 03:49:17.894857: step: 142/463, loss: 0.011372219771146774 2023-01-24 03:49:18.581238: step: 144/463, loss: 0.0019866484217345715 2023-01-24 03:49:19.248838: step: 146/463, loss: 0.023730548098683357 2023-01-24 03:49:19.843222: step: 148/463, loss: 0.11961495876312256 2023-01-24 03:49:20.504294: step: 150/463, loss: 0.07656265795230865 2023-01-24 03:49:21.065652: step: 152/463, loss: 0.31471946835517883 2023-01-24 03:49:21.699482: step: 154/463, loss: 0.035964835435152054 2023-01-24 03:49:22.311260: step: 156/463, loss: 0.019084136933088303 2023-01-24 03:49:22.940856: step: 158/463, loss: 0.039367157965898514 2023-01-24 03:49:23.587934: step: 160/463, loss: 0.07852606475353241 2023-01-24 03:49:24.261023: step: 162/463, loss: 0.024555031210184097 2023-01-24 03:49:24.889653: step: 164/463, loss: 0.0023580596316605806 2023-01-24 03:49:25.523937: step: 166/463, loss: 0.05921948328614235 2023-01-24 03:49:26.145616: step: 168/463, loss: 0.06778226792812347 2023-01-24 03:49:26.782204: step: 170/463, loss: 0.0036057790275663137 2023-01-24 03:49:27.372524: step: 172/463, loss: 0.0017839828506112099 2023-01-24 03:49:27.931845: step: 174/463, loss: 0.0013491861755028367 2023-01-24 03:49:28.591676: step: 176/463, loss: 0.08490173518657684 2023-01-24 03:49:29.158760: step: 178/463, loss: 0.0032500780653208494 2023-01-24 03:49:29.794851: step: 180/463, loss: 0.014830099418759346 2023-01-24 03:49:30.397123: step: 182/463, loss: 0.02372978813946247 2023-01-24 03:49:31.028870: step: 184/463, loss: 0.2949571907520294 2023-01-24 03:49:31.720613: step: 186/463, loss: 0.019025932997465134 2023-01-24 03:49:32.301482: step: 188/463, loss: 0.04316798970103264 2023-01-24 03:49:32.863145: step: 190/463, loss: 0.0016627575969323516 2023-01-24 03:49:33.456348: step: 192/463, loss: 0.0056515843607485294 2023-01-24 03:49:34.149295: step: 194/463, loss: 0.0047281705774366856 2023-01-24 03:49:34.859555: step: 196/463, loss: 0.003192935371771455 2023-01-24 03:49:35.519347: step: 198/463, loss: 0.20383906364440918 2023-01-24 03:49:36.141794: step: 200/463, loss: 0.03514109551906586 2023-01-24 03:49:36.793701: step: 202/463, loss: 0.010624373331665993 2023-01-24 03:49:37.382681: step: 204/463, loss: 0.03294515237212181 2023-01-24 03:49:37.984462: step: 206/463, loss: 0.017936188727617264 2023-01-24 03:49:38.529286: step: 208/463, loss: 0.0686739832162857 2023-01-24 03:49:39.119456: step: 210/463, loss: 0.33181145787239075 2023-01-24 03:49:39.660634: step: 212/463, loss: 0.010371187701821327 2023-01-24 03:49:40.339446: step: 214/463, loss: 0.036026500165462494 2023-01-24 03:49:40.915905: step: 216/463, loss: 0.03120010532438755 2023-01-24 03:49:41.552500: step: 218/463, loss: 0.03980020806193352 2023-01-24 03:49:42.210642: step: 220/463, loss: 0.01664537750184536 2023-01-24 03:49:42.844626: step: 222/463, loss: 0.006049699615687132 2023-01-24 03:49:43.460687: step: 224/463, loss: 0.040885474532842636 2023-01-24 03:49:44.156566: step: 226/463, loss: 0.003250849200412631 2023-01-24 03:49:44.758711: step: 228/463, loss: 0.013019710779190063 2023-01-24 03:49:45.318763: step: 230/463, loss: 0.014770979061722755 2023-01-24 03:49:46.006674: step: 232/463, loss: 0.008929251693189144 2023-01-24 03:49:46.707711: step: 234/463, loss: 0.00699266605079174 2023-01-24 03:49:47.334888: step: 236/463, loss: 0.0014869315782561898 2023-01-24 03:49:47.953356: step: 238/463, loss: 0.02677600085735321 2023-01-24 03:49:48.588256: step: 240/463, loss: 0.004532995633780956 2023-01-24 03:49:49.206055: step: 242/463, loss: 0.014747112058103085 2023-01-24 03:49:49.841994: step: 244/463, loss: 0.0004476590547710657 2023-01-24 03:49:50.474893: step: 246/463, loss: 0.008005515672266483 2023-01-24 03:49:51.069716: step: 248/463, loss: 0.029232745990157127 2023-01-24 03:49:51.649752: step: 250/463, loss: 0.006342190317809582 2023-01-24 03:49:52.289329: step: 252/463, loss: 0.007136001251637936 2023-01-24 03:49:52.875612: step: 254/463, loss: 0.026874519884586334 2023-01-24 03:49:53.461274: step: 256/463, loss: 0.01919802464544773 2023-01-24 03:49:53.996356: step: 258/463, loss: 0.0003610389831010252 2023-01-24 03:49:54.689439: step: 260/463, loss: 0.037108078598976135 2023-01-24 03:49:55.344286: step: 262/463, loss: 0.0019849385134875774 2023-01-24 03:49:55.977097: step: 264/463, loss: 0.029811395332217216 2023-01-24 03:49:56.652491: step: 266/463, loss: 0.0082953330129385 2023-01-24 03:49:57.232729: step: 268/463, loss: 0.005881347693502903 2023-01-24 03:49:57.832446: step: 270/463, loss: 0.0014427980640903115 2023-01-24 03:49:58.441380: step: 272/463, loss: 0.023032061755657196 2023-01-24 03:49:59.068290: step: 274/463, loss: 0.004619150422513485 2023-01-24 03:49:59.708572: step: 276/463, loss: 0.0014236380811780691 2023-01-24 03:50:00.455314: step: 278/463, loss: 0.09615522623062134 2023-01-24 03:50:01.164365: step: 280/463, loss: 0.04531494528055191 2023-01-24 03:50:01.746778: step: 282/463, loss: 0.014902645722031593 2023-01-24 03:50:02.380020: step: 284/463, loss: 0.013980317860841751 2023-01-24 03:50:02.993505: step: 286/463, loss: 0.015187329612672329 2023-01-24 03:50:03.583660: step: 288/463, loss: 0.0742793008685112 2023-01-24 03:50:04.183756: step: 290/463, loss: 0.006699662655591965 2023-01-24 03:50:04.804080: step: 292/463, loss: 0.00824943371117115 2023-01-24 03:50:05.449951: step: 294/463, loss: 0.09274782985448837 2023-01-24 03:50:06.151741: step: 296/463, loss: 0.013319441117346287 2023-01-24 03:50:06.888314: step: 298/463, loss: 0.005755189340561628 2023-01-24 03:50:07.496248: step: 300/463, loss: 0.007902318611741066 2023-01-24 03:50:08.091666: step: 302/463, loss: 8.287282253149897e-05 2023-01-24 03:50:08.852074: step: 304/463, loss: 0.002167182043194771 2023-01-24 03:50:09.435827: step: 306/463, loss: 0.001773059950210154 2023-01-24 03:50:10.068563: step: 308/463, loss: 0.008001173846423626 2023-01-24 03:50:10.750189: step: 310/463, loss: 0.18523503839969635 2023-01-24 03:50:11.366533: step: 312/463, loss: 0.009070301428437233 2023-01-24 03:50:11.915133: step: 314/463, loss: 0.07790570706129074 2023-01-24 03:50:12.592890: step: 316/463, loss: 0.01883280836045742 2023-01-24 03:50:13.236848: step: 318/463, loss: 0.05135779827833176 2023-01-24 03:50:13.868313: step: 320/463, loss: 0.023418454453349113 2023-01-24 03:50:14.450277: step: 322/463, loss: 0.0054235984571278095 2023-01-24 03:50:15.019202: step: 324/463, loss: 0.0016468078829348087 2023-01-24 03:50:15.587689: step: 326/463, loss: 0.0018365428550168872 2023-01-24 03:50:16.219046: step: 328/463, loss: 0.028370002284646034 2023-01-24 03:50:16.810682: step: 330/463, loss: 0.06369420886039734 2023-01-24 03:50:17.399359: step: 332/463, loss: 0.0007014954462647438 2023-01-24 03:50:18.039791: step: 334/463, loss: 0.0654231384396553 2023-01-24 03:50:18.612461: step: 336/463, loss: 0.041736625134944916 2023-01-24 03:50:19.168468: step: 338/463, loss: 0.24929679930210114 2023-01-24 03:50:19.818190: step: 340/463, loss: 0.04755621403455734 2023-01-24 03:50:20.373660: step: 342/463, loss: 0.005220194347202778 2023-01-24 03:50:21.064567: step: 344/463, loss: 0.012860847637057304 2023-01-24 03:50:21.673109: step: 346/463, loss: 0.0004501325893215835 2023-01-24 03:50:22.273775: step: 348/463, loss: 0.004493432585150003 2023-01-24 03:50:22.897953: step: 350/463, loss: 0.018364235758781433 2023-01-24 03:50:23.484232: step: 352/463, loss: 0.00035102470428682864 2023-01-24 03:50:24.095151: step: 354/463, loss: 0.10908031463623047 2023-01-24 03:50:24.654132: step: 356/463, loss: 0.0040270364843308926 2023-01-24 03:50:25.248912: step: 358/463, loss: 0.04088394716382027 2023-01-24 03:50:25.867907: step: 360/463, loss: 0.003437537234276533 2023-01-24 03:50:26.493594: step: 362/463, loss: 0.006508524529635906 2023-01-24 03:50:27.072098: step: 364/463, loss: 0.49140796065330505 2023-01-24 03:50:27.759735: step: 366/463, loss: 0.014118066988885403 2023-01-24 03:50:28.372024: step: 368/463, loss: 0.35202130675315857 2023-01-24 03:50:28.996118: step: 370/463, loss: 0.1096949651837349 2023-01-24 03:50:29.608244: step: 372/463, loss: 0.007475042250007391 2023-01-24 03:50:30.237140: step: 374/463, loss: 0.008727510459721088 2023-01-24 03:50:30.818366: step: 376/463, loss: 0.014625866897404194 2023-01-24 03:50:31.394970: step: 378/463, loss: 0.01204013917595148 2023-01-24 03:50:32.016414: step: 380/463, loss: 0.03378046303987503 2023-01-24 03:50:32.591661: step: 382/463, loss: 0.002222589449957013 2023-01-24 03:50:33.186687: step: 384/463, loss: 0.005115623585879803 2023-01-24 03:50:33.823267: step: 386/463, loss: 0.006413538008928299 2023-01-24 03:50:34.441521: step: 388/463, loss: 0.008121415972709656 2023-01-24 03:50:35.065128: step: 390/463, loss: 0.015697646886110306 2023-01-24 03:50:35.667070: step: 392/463, loss: 0.03854619711637497 2023-01-24 03:50:36.263258: step: 394/463, loss: 0.010431385599076748 2023-01-24 03:50:36.854812: step: 396/463, loss: 0.004541287198662758 2023-01-24 03:50:37.476466: step: 398/463, loss: 0.0363098606467247 2023-01-24 03:50:38.026034: step: 400/463, loss: 0.00254978914745152 2023-01-24 03:50:38.658947: step: 402/463, loss: 0.04078345745801926 2023-01-24 03:50:39.228442: step: 404/463, loss: 0.03281532973051071 2023-01-24 03:50:39.866402: step: 406/463, loss: 0.013361765071749687 2023-01-24 03:50:40.491804: step: 408/463, loss: 0.012684313580393791 2023-01-24 03:50:41.181471: step: 410/463, loss: 0.022267932072281837 2023-01-24 03:50:41.778797: step: 412/463, loss: 0.0008494913927279413 2023-01-24 03:50:42.364706: step: 414/463, loss: 0.003827937413007021 2023-01-24 03:50:42.940787: step: 416/463, loss: 0.00808730162680149 2023-01-24 03:50:43.562499: step: 418/463, loss: 0.080779068171978 2023-01-24 03:50:44.216717: step: 420/463, loss: 0.0078439861536026 2023-01-24 03:50:44.967485: step: 422/463, loss: 0.06151185929775238 2023-01-24 03:50:45.557919: step: 424/463, loss: 0.0015386121813207865 2023-01-24 03:50:46.200267: step: 426/463, loss: 0.07064806669950485 2023-01-24 03:50:46.881757: step: 428/463, loss: 0.023667573928833008 2023-01-24 03:50:47.536888: step: 430/463, loss: 0.049251630902290344 2023-01-24 03:50:48.183158: step: 432/463, loss: 0.2999231219291687 2023-01-24 03:50:48.736902: step: 434/463, loss: 0.004364494699984789 2023-01-24 03:50:49.318485: step: 436/463, loss: 0.08532516658306122 2023-01-24 03:50:49.926015: step: 438/463, loss: 0.020749501883983612 2023-01-24 03:50:50.548576: step: 440/463, loss: 0.02663409151136875 2023-01-24 03:50:51.154020: step: 442/463, loss: 0.003548748092725873 2023-01-24 03:50:51.772590: step: 444/463, loss: 0.0015679626958444715 2023-01-24 03:50:52.397738: step: 446/463, loss: 0.0156058045104146 2023-01-24 03:50:53.027344: step: 448/463, loss: 0.041782036423683167 2023-01-24 03:50:53.685748: step: 450/463, loss: 0.0058049894869327545 2023-01-24 03:50:54.325356: step: 452/463, loss: 0.0071624526754021645 2023-01-24 03:50:54.995135: step: 454/463, loss: 0.038687292486429214 2023-01-24 03:50:55.582873: step: 456/463, loss: 0.0023365935776382685 2023-01-24 03:50:56.124154: step: 458/463, loss: 0.020690549165010452 2023-01-24 03:50:56.729820: step: 460/463, loss: 2.011099338531494 2023-01-24 03:50:57.334720: step: 462/463, loss: 0.0026375912129878998 2023-01-24 03:50:58.108415: step: 464/463, loss: 0.04222738370299339 2023-01-24 03:50:58.709732: step: 466/463, loss: 0.011043574661016464 2023-01-24 03:50:59.306167: step: 468/463, loss: 0.008948219940066338 2023-01-24 03:50:59.937035: step: 470/463, loss: 0.0036710118874907494 2023-01-24 03:51:00.550029: step: 472/463, loss: 0.027389252558350563 2023-01-24 03:51:01.177359: step: 474/463, loss: 0.007042410783469677 2023-01-24 03:51:01.828321: step: 476/463, loss: 0.054590508341789246 2023-01-24 03:51:02.428844: step: 478/463, loss: 0.01330722589045763 2023-01-24 03:51:03.018421: step: 480/463, loss: 0.01824328489601612 2023-01-24 03:51:03.619814: step: 482/463, loss: 0.010179009288549423 2023-01-24 03:51:04.253120: step: 484/463, loss: 0.0037826818879693747 2023-01-24 03:51:04.825342: step: 486/463, loss: 0.00548544293269515 2023-01-24 03:51:05.463623: step: 488/463, loss: 0.03571353852748871 2023-01-24 03:51:06.107081: step: 490/463, loss: 0.030300496146082878 2023-01-24 03:51:06.830144: step: 492/463, loss: 0.0065094707533717155 2023-01-24 03:51:07.467359: step: 494/463, loss: 0.004390700254589319 2023-01-24 03:51:08.023993: step: 496/463, loss: 0.001701120170764625 2023-01-24 03:51:08.683636: step: 498/463, loss: 0.001852832967415452 2023-01-24 03:51:09.362659: step: 500/463, loss: 0.011258398182690144 2023-01-24 03:51:09.955454: step: 502/463, loss: 0.004814359825104475 2023-01-24 03:51:10.579968: step: 504/463, loss: 0.0061867572367191315 2023-01-24 03:51:11.192258: step: 506/463, loss: 0.04522324353456497 2023-01-24 03:51:11.859929: step: 508/463, loss: 0.006622238550335169 2023-01-24 03:51:12.447881: step: 510/463, loss: 0.02634294144809246 2023-01-24 03:51:13.124317: step: 512/463, loss: 0.0019054700387641788 2023-01-24 03:51:13.722687: step: 514/463, loss: 0.006115755066275597 2023-01-24 03:51:14.396415: step: 516/463, loss: 0.042282894253730774 2023-01-24 03:51:15.017847: step: 518/463, loss: 0.008722420781850815 2023-01-24 03:51:15.592667: step: 520/463, loss: 0.11657079309225082 2023-01-24 03:51:16.209483: step: 522/463, loss: 0.13663256168365479 2023-01-24 03:51:16.844637: step: 524/463, loss: 0.028521951287984848 2023-01-24 03:51:17.529163: step: 526/463, loss: 0.03692437335848808 2023-01-24 03:51:18.148749: step: 528/463, loss: 0.008721224963665009 2023-01-24 03:51:18.766304: step: 530/463, loss: 0.05696066468954086 2023-01-24 03:51:19.367644: step: 532/463, loss: 1.4876073598861694 2023-01-24 03:51:19.966020: step: 534/463, loss: 0.007431971374899149 2023-01-24 03:51:20.614968: step: 536/463, loss: 0.02266770415008068 2023-01-24 03:51:21.279538: step: 538/463, loss: 0.1841118484735489 2023-01-24 03:51:21.877772: step: 540/463, loss: 0.016357596963644028 2023-01-24 03:51:22.498009: step: 542/463, loss: 0.006929911207407713 2023-01-24 03:51:23.097718: step: 544/463, loss: 0.022365828976035118 2023-01-24 03:51:23.690043: step: 546/463, loss: 0.0352771133184433 2023-01-24 03:51:24.352873: step: 548/463, loss: 0.0016185512067750096 2023-01-24 03:51:25.028776: step: 550/463, loss: 0.0004608993185684085 2023-01-24 03:51:25.632262: step: 552/463, loss: 0.012112148106098175 2023-01-24 03:51:26.224807: step: 554/463, loss: 0.01999003067612648 2023-01-24 03:51:26.879677: step: 556/463, loss: 0.03824234753847122 2023-01-24 03:51:27.529190: step: 558/463, loss: 0.001486281049437821 2023-01-24 03:51:28.217301: step: 560/463, loss: 0.0161641426384449 2023-01-24 03:51:28.792400: step: 562/463, loss: 0.013917661271989346 2023-01-24 03:51:29.374857: step: 564/463, loss: 0.015233626589179039 2023-01-24 03:51:29.987468: step: 566/463, loss: 0.009183204732835293 2023-01-24 03:51:30.619221: step: 568/463, loss: 0.011217552237212658 2023-01-24 03:51:31.264716: step: 570/463, loss: 0.0045104562304914 2023-01-24 03:51:31.883717: step: 572/463, loss: 0.0038712089881300926 2023-01-24 03:51:32.567006: step: 574/463, loss: 0.00613539619371295 2023-01-24 03:51:33.185436: step: 576/463, loss: 0.021922443062067032 2023-01-24 03:51:33.790242: step: 578/463, loss: 0.030069440603256226 2023-01-24 03:51:34.447455: step: 580/463, loss: 0.01946062408387661 2023-01-24 03:51:35.032990: step: 582/463, loss: 0.015328926965594292 2023-01-24 03:51:35.602146: step: 584/463, loss: 0.00012223079102113843 2023-01-24 03:51:36.182962: step: 586/463, loss: 0.033645614981651306 2023-01-24 03:51:36.771075: step: 588/463, loss: 0.03122386895120144 2023-01-24 03:51:37.417049: step: 590/463, loss: 0.03320504352450371 2023-01-24 03:51:38.036468: step: 592/463, loss: 0.0273686982691288 2023-01-24 03:51:38.713503: step: 594/463, loss: 0.05744955688714981 2023-01-24 03:51:39.333187: step: 596/463, loss: 0.022972291335463524 2023-01-24 03:51:39.988468: step: 598/463, loss: 0.02768358401954174 2023-01-24 03:51:40.579904: step: 600/463, loss: 0.006396604236215353 2023-01-24 03:51:41.205961: step: 602/463, loss: 0.046035610139369965 2023-01-24 03:51:41.831860: step: 604/463, loss: 0.0030810264870524406 2023-01-24 03:51:42.466918: step: 606/463, loss: 0.010916823521256447 2023-01-24 03:51:43.062568: step: 608/463, loss: 0.04350392892956734 2023-01-24 03:51:43.725799: step: 610/463, loss: 0.0021778522059321404 2023-01-24 03:51:44.270900: step: 612/463, loss: 0.002617267891764641 2023-01-24 03:51:44.955840: step: 614/463, loss: 0.009265408851206303 2023-01-24 03:51:45.538881: step: 616/463, loss: 0.005866225343197584 2023-01-24 03:51:46.238624: step: 618/463, loss: 0.015919432044029236 2023-01-24 03:51:46.874907: step: 620/463, loss: 0.011642727069556713 2023-01-24 03:51:47.563321: step: 622/463, loss: 0.005333344917744398 2023-01-24 03:51:48.178368: step: 624/463, loss: 0.016595320776104927 2023-01-24 03:51:48.907768: step: 626/463, loss: 0.01663575880229473 2023-01-24 03:51:49.523122: step: 628/463, loss: 0.003091169521212578 2023-01-24 03:51:50.101596: step: 630/463, loss: 0.0015520612942054868 2023-01-24 03:51:50.710084: step: 632/463, loss: 0.02732008509337902 2023-01-24 03:51:51.347990: step: 634/463, loss: 0.006296942010521889 2023-01-24 03:51:51.960263: step: 636/463, loss: 0.02850566804409027 2023-01-24 03:51:52.631995: step: 638/463, loss: 0.008473414927721024 2023-01-24 03:51:53.266013: step: 640/463, loss: 0.016142599284648895 2023-01-24 03:51:53.874163: step: 642/463, loss: 0.06030401214957237 2023-01-24 03:51:54.515040: step: 644/463, loss: 0.05786483362317085 2023-01-24 03:51:55.113018: step: 646/463, loss: 0.0005453546764329076 2023-01-24 03:51:55.728905: step: 648/463, loss: 0.07799121737480164 2023-01-24 03:51:56.316419: step: 650/463, loss: 0.13648895919322968 2023-01-24 03:51:56.914490: step: 652/463, loss: 0.006396754644811153 2023-01-24 03:51:57.556781: step: 654/463, loss: 0.04120725393295288 2023-01-24 03:51:58.149277: step: 656/463, loss: 0.001183871878311038 2023-01-24 03:51:58.785034: step: 658/463, loss: 0.0656622126698494 2023-01-24 03:51:59.404577: step: 660/463, loss: 0.0038908233400434256 2023-01-24 03:52:00.106227: step: 662/463, loss: 0.04772219806909561 2023-01-24 03:52:00.787438: step: 664/463, loss: 0.01838582754135132 2023-01-24 03:52:01.382603: step: 666/463, loss: 0.004813347943127155 2023-01-24 03:52:01.922448: step: 668/463, loss: 0.008449632674455643 2023-01-24 03:52:02.520114: step: 670/463, loss: 0.002942825900390744 2023-01-24 03:52:03.135729: step: 672/463, loss: 0.00027331389719620347 2023-01-24 03:52:03.745349: step: 674/463, loss: 0.03727958723902702 2023-01-24 03:52:04.398137: step: 676/463, loss: 0.0025262413546442986 2023-01-24 03:52:04.956302: step: 678/463, loss: 0.0007347504724748433 2023-01-24 03:52:05.525468: step: 680/463, loss: 0.00021508800273295492 2023-01-24 03:52:06.132514: step: 682/463, loss: 0.020790673792362213 2023-01-24 03:52:06.809439: step: 684/463, loss: 0.012854392640292645 2023-01-24 03:52:07.430697: step: 686/463, loss: 0.0023949958849698305 2023-01-24 03:52:08.156896: step: 688/463, loss: 0.015197242610156536 2023-01-24 03:52:08.768086: step: 690/463, loss: 0.02898489311337471 2023-01-24 03:52:09.400227: step: 692/463, loss: 0.46489542722702026 2023-01-24 03:52:10.015189: step: 694/463, loss: 0.006251441780477762 2023-01-24 03:52:10.530560: step: 696/463, loss: 0.00031785733881406486 2023-01-24 03:52:11.131944: step: 698/463, loss: 0.03094170242547989 2023-01-24 03:52:11.826265: step: 700/463, loss: 2.71785044670105 2023-01-24 03:52:12.441411: step: 702/463, loss: 0.02963750995695591 2023-01-24 03:52:13.039105: step: 704/463, loss: 0.028155282139778137 2023-01-24 03:52:13.649718: step: 706/463, loss: 0.054499782621860504 2023-01-24 03:52:14.274355: step: 708/463, loss: 0.12207955121994019 2023-01-24 03:52:14.882514: step: 710/463, loss: 0.062202781438827515 2023-01-24 03:52:15.512358: step: 712/463, loss: 0.003921740688383579 2023-01-24 03:52:16.099665: step: 714/463, loss: 0.009068318642675877 2023-01-24 03:52:16.755889: step: 716/463, loss: 0.001717057777568698 2023-01-24 03:52:17.442049: step: 718/463, loss: 0.2314022332429886 2023-01-24 03:52:18.046345: step: 720/463, loss: 0.003480197163298726 2023-01-24 03:52:18.648318: step: 722/463, loss: 0.010539277456700802 2023-01-24 03:52:19.221678: step: 724/463, loss: 0.0001856078888522461 2023-01-24 03:52:19.976045: step: 726/463, loss: 0.1042601689696312 2023-01-24 03:52:20.619641: step: 728/463, loss: 0.20359423756599426 2023-01-24 03:52:21.233870: step: 730/463, loss: 0.06622110307216644 2023-01-24 03:52:21.829298: step: 732/463, loss: 0.021832749247550964 2023-01-24 03:52:22.434360: step: 734/463, loss: 0.028036857023835182 2023-01-24 03:52:23.133142: step: 736/463, loss: 0.00857492070645094 2023-01-24 03:52:23.712556: step: 738/463, loss: 0.024369752034544945 2023-01-24 03:52:24.350274: step: 740/463, loss: 0.9981862902641296 2023-01-24 03:52:24.939092: step: 742/463, loss: 0.26254791021347046 2023-01-24 03:52:25.512693: step: 744/463, loss: 0.008379360660910606 2023-01-24 03:52:26.133892: step: 746/463, loss: 0.006708393804728985 2023-01-24 03:52:26.739722: step: 748/463, loss: 0.027649573981761932 2023-01-24 03:52:27.342149: step: 750/463, loss: 0.021381065249443054 2023-01-24 03:52:27.951761: step: 752/463, loss: 0.010988865047693253 2023-01-24 03:52:28.590502: step: 754/463, loss: 0.11998961120843887 2023-01-24 03:52:29.142834: step: 756/463, loss: 0.004171561915427446 2023-01-24 03:52:29.767282: step: 758/463, loss: 0.014124194160103798 2023-01-24 03:52:30.455246: step: 760/463, loss: 0.036059629172086716 2023-01-24 03:52:31.028047: step: 762/463, loss: 0.010031620971858501 2023-01-24 03:52:31.687439: step: 764/463, loss: 0.02956785447895527 2023-01-24 03:52:32.264877: step: 766/463, loss: 0.014752618037164211 2023-01-24 03:52:32.888730: step: 768/463, loss: 0.009555048309266567 2023-01-24 03:52:33.547125: step: 770/463, loss: 0.027097180485725403 2023-01-24 03:52:34.131867: step: 772/463, loss: 0.0016095516039058566 2023-01-24 03:52:34.725766: step: 774/463, loss: 0.32477274537086487 2023-01-24 03:52:35.340643: step: 776/463, loss: 0.03107057884335518 2023-01-24 03:52:35.964449: step: 778/463, loss: 0.0038266393821686506 2023-01-24 03:52:36.574707: step: 780/463, loss: 0.0018915997352451086 2023-01-24 03:52:37.150738: step: 782/463, loss: 0.025805462151765823 2023-01-24 03:52:37.701232: step: 784/463, loss: 0.009442784823477268 2023-01-24 03:52:38.300077: step: 786/463, loss: 0.3690575361251831 2023-01-24 03:52:38.859743: step: 788/463, loss: 0.002227095188573003 2023-01-24 03:52:39.480258: step: 790/463, loss: 0.007028195075690746 2023-01-24 03:52:40.092935: step: 792/463, loss: 0.02309034764766693 2023-01-24 03:52:40.735757: step: 794/463, loss: 0.021725161001086235 2023-01-24 03:52:41.307624: step: 796/463, loss: 0.02373715490102768 2023-01-24 03:52:41.882441: step: 798/463, loss: 0.0081197964027524 2023-01-24 03:52:42.488386: step: 800/463, loss: 0.019046910107135773 2023-01-24 03:52:43.006412: step: 802/463, loss: 0.010289816185832024 2023-01-24 03:52:43.601863: step: 804/463, loss: 0.010623391717672348 2023-01-24 03:52:44.247120: step: 806/463, loss: 0.19675587117671967 2023-01-24 03:52:44.838546: step: 808/463, loss: 0.004776483401656151 2023-01-24 03:52:45.441207: step: 810/463, loss: 0.039210911840200424 2023-01-24 03:52:46.042468: step: 812/463, loss: 0.17494267225265503 2023-01-24 03:52:46.660445: step: 814/463, loss: 0.004393057897686958 2023-01-24 03:52:47.303776: step: 816/463, loss: 0.05353159457445145 2023-01-24 03:52:47.931588: step: 818/463, loss: 0.018934201449155807 2023-01-24 03:52:48.608286: step: 820/463, loss: 0.003956741653382778 2023-01-24 03:52:49.252080: step: 822/463, loss: 0.002721026772633195 2023-01-24 03:52:49.864728: step: 824/463, loss: 0.12289454787969589 2023-01-24 03:52:50.463175: step: 826/463, loss: 0.009979347698390484 2023-01-24 03:52:51.102118: step: 828/463, loss: 0.031114686280488968 2023-01-24 03:52:51.748507: step: 830/463, loss: 0.002735384739935398 2023-01-24 03:52:52.354746: step: 832/463, loss: 0.033943697810173035 2023-01-24 03:52:52.893366: step: 834/463, loss: 0.05121907591819763 2023-01-24 03:52:53.493149: step: 836/463, loss: 0.014786218293011189 2023-01-24 03:52:54.079118: step: 838/463, loss: 0.044448621571063995 2023-01-24 03:52:54.608575: step: 840/463, loss: 6.978730380069464e-05 2023-01-24 03:52:55.265878: step: 842/463, loss: 0.022355465218424797 2023-01-24 03:52:55.927937: step: 844/463, loss: 0.03591838479042053 2023-01-24 03:52:56.578232: step: 846/463, loss: 3.351704799570143e-05 2023-01-24 03:52:57.203707: step: 848/463, loss: 0.022055551409721375 2023-01-24 03:52:57.790071: step: 850/463, loss: 0.07801473140716553 2023-01-24 03:52:58.397723: step: 852/463, loss: 0.6784647703170776 2023-01-24 03:52:59.032971: step: 854/463, loss: 0.02263505384325981 2023-01-24 03:52:59.708848: step: 856/463, loss: 0.045020654797554016 2023-01-24 03:53:00.284889: step: 858/463, loss: 0.05555715411901474 2023-01-24 03:53:00.937684: step: 860/463, loss: 0.03509535640478134 2023-01-24 03:53:01.496460: step: 862/463, loss: 0.0019300265703350306 2023-01-24 03:53:02.112030: step: 864/463, loss: 0.03359898179769516 2023-01-24 03:53:02.807605: step: 866/463, loss: 0.03566916286945343 2023-01-24 03:53:03.448753: step: 868/463, loss: 0.048562973737716675 2023-01-24 03:53:04.058843: step: 870/463, loss: 0.017183849588036537 2023-01-24 03:53:04.669689: step: 872/463, loss: 0.09051021933555603 2023-01-24 03:53:05.393706: step: 874/463, loss: 0.037447940558195114 2023-01-24 03:53:06.030074: step: 876/463, loss: 0.09816146641969681 2023-01-24 03:53:06.654567: step: 878/463, loss: 0.21727986633777618 2023-01-24 03:53:07.298335: step: 880/463, loss: 0.03683961182832718 2023-01-24 03:53:07.871428: step: 882/463, loss: 0.0132490498945117 2023-01-24 03:53:08.551123: step: 884/463, loss: 0.06462587416172028 2023-01-24 03:53:09.165016: step: 886/463, loss: 0.019473182037472725 2023-01-24 03:53:09.757592: step: 888/463, loss: 0.012223903089761734 2023-01-24 03:53:10.451391: step: 890/463, loss: 0.0008236998110078275 2023-01-24 03:53:11.046421: step: 892/463, loss: 0.904689371585846 2023-01-24 03:53:11.603014: step: 894/463, loss: 0.06158997491002083 2023-01-24 03:53:12.211083: step: 896/463, loss: 0.12429679930210114 2023-01-24 03:53:12.960518: step: 898/463, loss: 0.04664970189332962 2023-01-24 03:53:13.543852: step: 900/463, loss: 0.0627560168504715 2023-01-24 03:53:14.151413: step: 902/463, loss: 0.0368766151368618 2023-01-24 03:53:14.714574: step: 904/463, loss: 0.006379998754709959 2023-01-24 03:53:15.453883: step: 906/463, loss: 1.0913007259368896 2023-01-24 03:53:16.070965: step: 908/463, loss: 0.009168403223156929 2023-01-24 03:53:16.727394: step: 910/463, loss: 0.03069346956908703 2023-01-24 03:53:17.322527: step: 912/463, loss: 0.0005233348929323256 2023-01-24 03:53:17.900438: step: 914/463, loss: 0.009611212648451328 2023-01-24 03:53:18.483720: step: 916/463, loss: 0.01006375439465046 2023-01-24 03:53:19.096182: step: 918/463, loss: 0.0023787294048815966 2023-01-24 03:53:19.656699: step: 920/463, loss: 0.005271725356578827 2023-01-24 03:53:20.294235: step: 922/463, loss: 0.0032884960528463125 2023-01-24 03:53:20.848855: step: 924/463, loss: 0.01920892857015133 2023-01-24 03:53:21.454138: step: 926/463, loss: 0.1331748217344284 ================================================== Loss: 0.057 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3662576002509289, 'r': 0.33150830231440814, 'f1': 0.34801767991970733}, 'combined': 0.25643407994083695, 'epoch': 31} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3472176659136165, 'r': 0.3746631799472296, 'f1': 0.36041869123128717}, 'combined': 0.2793675979878877, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3254711853978671, 'r': 0.32732396254434454, 'f1': 0.32639494467524993}, 'combined': 0.2405015381817631, 'epoch': 31} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3346731701423155, 'r': 0.38388981281030304, 'f1': 0.35759599001507686}, 'combined': 0.27717966690163853, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3291481073793433, 'r': 0.32102870435101033, 'f1': 0.32503770834386636}, 'combined': 0.23950146930600677, 'epoch': 31} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3351216017018834, 'r': 0.37208354306606173, 'f1': 0.3526366679929226}, 'combined': 0.2733355991141314, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31451612903225806, 'r': 0.2785714285714286, 'f1': 0.2954545454545454}, 'combined': 0.19696969696969693, 'epoch': 31} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.358695652173913, 'r': 0.358695652173913, 'f1': 0.358695652173913}, 'combined': 0.1793478260869565, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5227272727272727, 'r': 0.19827586206896552, 'f1': 0.28750000000000003}, 'combined': 0.19166666666666668, 'epoch': 31} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34001213859732793, 'r': 0.32968918941790243, 'f1': 0.33477110370565427}, 'combined': 0.24667344483574524, 'epoch': 29} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34157155234650616, 'r': 0.36386160769264764, 'f1': 0.3523644229368942}, 'combined': 0.2731245766305113, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.20689655172413793, 'f1': 0.31578947368421056}, 'combined': 0.2105263157894737, 'epoch': 29} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:55:56.983791: step: 2/463, loss: 0.007539997808635235 2023-01-24 03:55:57.575791: step: 4/463, loss: 0.0285597313195467 2023-01-24 03:55:58.157182: step: 6/463, loss: 0.016809236258268356 2023-01-24 03:55:58.787419: step: 8/463, loss: 0.015142922289669514 2023-01-24 03:55:59.364973: step: 10/463, loss: 0.034306954592466354 2023-01-24 03:55:59.965497: step: 12/463, loss: 0.012067035771906376 2023-01-24 03:56:00.614351: step: 14/463, loss: 0.0035218135453760624 2023-01-24 03:56:01.161758: step: 16/463, loss: 0.04193190485239029 2023-01-24 03:56:01.775429: step: 18/463, loss: 0.1488228589296341 2023-01-24 03:56:02.394521: step: 20/463, loss: 0.08529949933290482 2023-01-24 03:56:02.989138: step: 22/463, loss: 0.000689376553054899 2023-01-24 03:56:03.608770: step: 24/463, loss: 0.008331475779414177 2023-01-24 03:56:04.222526: step: 26/463, loss: 0.019964156672358513 2023-01-24 03:56:04.825205: step: 28/463, loss: 0.009994626976549625 2023-01-24 03:56:05.464316: step: 30/463, loss: 0.049466222524642944 2023-01-24 03:56:06.119649: step: 32/463, loss: 0.004443508572876453 2023-01-24 03:56:06.695022: step: 34/463, loss: 0.02194046787917614 2023-01-24 03:56:07.357757: step: 36/463, loss: 0.03573227301239967 2023-01-24 03:56:07.919870: step: 38/463, loss: 0.022319650277495384 2023-01-24 03:56:08.535039: step: 40/463, loss: 0.0014002065872773528 2023-01-24 03:56:09.121246: step: 42/463, loss: 0.0004496430919971317 2023-01-24 03:56:09.701934: step: 44/463, loss: 0.015456403605639935 2023-01-24 03:56:10.289719: step: 46/463, loss: 0.028172114863991737 2023-01-24 03:56:10.877107: step: 48/463, loss: 0.011922353878617287 2023-01-24 03:56:11.454107: step: 50/463, loss: 0.003793282201513648 2023-01-24 03:56:12.145865: step: 52/463, loss: 0.10658351331949234 2023-01-24 03:56:12.786842: step: 54/463, loss: 0.03181418031454086 2023-01-24 03:56:13.339702: step: 56/463, loss: 0.002492751693353057 2023-01-24 03:56:13.924138: step: 58/463, loss: 0.02101484127342701 2023-01-24 03:56:14.510345: step: 60/463, loss: 0.0030175780411809683 2023-01-24 03:56:15.116314: step: 62/463, loss: 0.05303362384438515 2023-01-24 03:56:15.768238: step: 64/463, loss: 0.0007619561511091888 2023-01-24 03:56:16.406580: step: 66/463, loss: 0.011478719301521778 2023-01-24 03:56:16.990629: step: 68/463, loss: 0.016095656901597977 2023-01-24 03:56:17.547772: step: 70/463, loss: 0.14240337908267975 2023-01-24 03:56:18.195321: step: 72/463, loss: 0.00042851909529417753 2023-01-24 03:56:18.805237: step: 74/463, loss: 0.04007928818464279 2023-01-24 03:56:19.459040: step: 76/463, loss: 0.0029200424905866385 2023-01-24 03:56:20.069115: step: 78/463, loss: 0.002364977030083537 2023-01-24 03:56:20.710610: step: 80/463, loss: 0.0036983764730393887 2023-01-24 03:56:21.354114: step: 82/463, loss: 0.02411513216793537 2023-01-24 03:56:21.973930: step: 84/463, loss: 0.0014074050122871995 2023-01-24 03:56:22.723976: step: 86/463, loss: 0.0342196524143219 2023-01-24 03:56:23.395514: step: 88/463, loss: 0.0018354646163061261 2023-01-24 03:56:23.982936: step: 90/463, loss: 0.02250245213508606 2023-01-24 03:56:24.675431: step: 92/463, loss: 0.033345721662044525 2023-01-24 03:56:25.222190: step: 94/463, loss: 0.0336601585149765 2023-01-24 03:56:25.866216: step: 96/463, loss: 0.04659218713641167 2023-01-24 03:56:26.529308: step: 98/463, loss: 0.0011301238555461168 2023-01-24 03:56:27.185789: step: 100/463, loss: 0.004545257892459631 2023-01-24 03:56:27.786766: step: 102/463, loss: 0.009769748896360397 2023-01-24 03:56:28.442254: step: 104/463, loss: 0.003323189914226532 2023-01-24 03:56:29.064480: step: 106/463, loss: 0.08884399384260178 2023-01-24 03:56:29.689145: step: 108/463, loss: 0.024232499301433563 2023-01-24 03:56:30.327908: step: 110/463, loss: 0.006647381000220776 2023-01-24 03:56:31.016792: step: 112/463, loss: 0.005637145601212978 2023-01-24 03:56:31.608586: step: 114/463, loss: 0.011465215124189854 2023-01-24 03:56:32.166625: step: 116/463, loss: 0.20188608765602112 2023-01-24 03:56:32.804539: step: 118/463, loss: 0.016808904707431793 2023-01-24 03:56:33.405901: step: 120/463, loss: 0.000454298424301669 2023-01-24 03:56:34.056833: step: 122/463, loss: 0.017502423375844955 2023-01-24 03:56:34.627356: step: 124/463, loss: 0.005748111288994551 2023-01-24 03:56:35.199319: step: 126/463, loss: 0.035578783601522446 2023-01-24 03:56:35.838373: step: 128/463, loss: 0.2723863124847412 2023-01-24 03:56:36.473649: step: 130/463, loss: 0.03454357758164406 2023-01-24 03:56:37.089740: step: 132/463, loss: 4.4352535041980445e-05 2023-01-24 03:56:37.705947: step: 134/463, loss: 0.4914063811302185 2023-01-24 03:56:38.262830: step: 136/463, loss: 0.035584814846515656 2023-01-24 03:56:38.866430: step: 138/463, loss: 0.16930851340293884 2023-01-24 03:56:39.526213: step: 140/463, loss: 0.03407140076160431 2023-01-24 03:56:40.096541: step: 142/463, loss: 0.0005401013768278062 2023-01-24 03:56:40.752028: step: 144/463, loss: 0.0006070762756280601 2023-01-24 03:56:41.372009: step: 146/463, loss: 0.029754312708973885 2023-01-24 03:56:42.038639: step: 148/463, loss: 0.010098290629684925 2023-01-24 03:56:42.572009: step: 150/463, loss: 0.023425571620464325 2023-01-24 03:56:43.140101: step: 152/463, loss: 0.0023472870234400034 2023-01-24 03:56:43.734747: step: 154/463, loss: 0.007180437911301851 2023-01-24 03:56:44.310419: step: 156/463, loss: 0.000614872551523149 2023-01-24 03:56:44.856737: step: 158/463, loss: 0.0002431828179396689 2023-01-24 03:56:45.436499: step: 160/463, loss: 0.5747070908546448 2023-01-24 03:56:46.023895: step: 162/463, loss: 0.003977107349783182 2023-01-24 03:56:46.620474: step: 164/463, loss: 0.006228742189705372 2023-01-24 03:56:47.255335: step: 166/463, loss: 0.02033989131450653 2023-01-24 03:56:47.968497: step: 168/463, loss: 0.026744529604911804 2023-01-24 03:56:48.571859: step: 170/463, loss: 0.00286267907358706 2023-01-24 03:56:49.187120: step: 172/463, loss: 0.009174594655632973 2023-01-24 03:56:49.767807: step: 174/463, loss: 0.001380595494993031 2023-01-24 03:56:50.346539: step: 176/463, loss: 0.015228061936795712 2023-01-24 03:56:50.970439: step: 178/463, loss: 0.009595110081136227 2023-01-24 03:56:51.577415: step: 180/463, loss: 0.0075636752881109715 2023-01-24 03:56:52.211739: step: 182/463, loss: 0.01381948497146368 2023-01-24 03:56:52.773953: step: 184/463, loss: 0.006805033423006535 2023-01-24 03:56:53.455139: step: 186/463, loss: 0.014521433971822262 2023-01-24 03:56:53.977157: step: 188/463, loss: 0.0005430255550891161 2023-01-24 03:56:54.608697: step: 190/463, loss: 0.11562582850456238 2023-01-24 03:56:55.235703: step: 192/463, loss: 0.006288902834057808 2023-01-24 03:56:55.875684: step: 194/463, loss: 0.024572215974330902 2023-01-24 03:56:56.564981: step: 196/463, loss: 0.42376944422721863 2023-01-24 03:56:57.179610: step: 198/463, loss: 0.028134725987911224 2023-01-24 03:56:57.793287: step: 200/463, loss: 0.06974295526742935 2023-01-24 03:56:58.380251: step: 202/463, loss: 0.028500178828835487 2023-01-24 03:56:58.992621: step: 204/463, loss: 0.033555179834365845 2023-01-24 03:56:59.601194: step: 206/463, loss: 0.017418064177036285 2023-01-24 03:57:00.187692: step: 208/463, loss: 0.005509042646735907 2023-01-24 03:57:00.777265: step: 210/463, loss: 0.00871426984667778 2023-01-24 03:57:01.403936: step: 212/463, loss: 0.009560792706906796 2023-01-24 03:57:02.018803: step: 214/463, loss: 0.08527348935604095 2023-01-24 03:57:02.635815: step: 216/463, loss: 0.0003343084827065468 2023-01-24 03:57:03.232576: step: 218/463, loss: 0.007383414078503847 2023-01-24 03:57:03.828717: step: 220/463, loss: 0.006665619555860758 2023-01-24 03:57:04.434074: step: 222/463, loss: 0.020200714468955994 2023-01-24 03:57:05.011479: step: 224/463, loss: 0.0008119925041683018 2023-01-24 03:57:05.584979: step: 226/463, loss: 0.005870304070413113 2023-01-24 03:57:06.207135: step: 228/463, loss: 0.0017156031681224704 2023-01-24 03:57:06.830175: step: 230/463, loss: 0.016523635014891624 2023-01-24 03:57:07.435845: step: 232/463, loss: 0.20136921107769012 2023-01-24 03:57:08.037535: step: 234/463, loss: 0.020561877638101578 2023-01-24 03:57:08.708756: step: 236/463, loss: 0.008616182021796703 2023-01-24 03:57:09.366228: step: 238/463, loss: 0.0021359645761549473 2023-01-24 03:57:09.966431: step: 240/463, loss: 0.03449198231101036 2023-01-24 03:57:10.580544: step: 242/463, loss: 0.006085831671953201 2023-01-24 03:57:11.249839: step: 244/463, loss: 0.00014921504771336913 2023-01-24 03:57:11.878136: step: 246/463, loss: 0.005383210722357035 2023-01-24 03:57:12.495656: step: 248/463, loss: 3.426999092102051 2023-01-24 03:57:13.069708: step: 250/463, loss: 0.0036993036046624184 2023-01-24 03:57:13.695648: step: 252/463, loss: 0.0005066048470325768 2023-01-24 03:57:14.215279: step: 254/463, loss: 0.008969089947640896 2023-01-24 03:57:14.859168: step: 256/463, loss: 0.006283185910433531 2023-01-24 03:57:15.496627: step: 258/463, loss: 0.003469745861366391 2023-01-24 03:57:16.091698: step: 260/463, loss: 0.004944317974150181 2023-01-24 03:57:16.716523: step: 262/463, loss: 0.012042064219713211 2023-01-24 03:57:17.291316: step: 264/463, loss: 0.001941087655723095 2023-01-24 03:57:17.854969: step: 266/463, loss: 0.0346544124186039 2023-01-24 03:57:18.439133: step: 268/463, loss: 0.13292662799358368 2023-01-24 03:57:19.030674: step: 270/463, loss: 0.2711741626262665 2023-01-24 03:57:19.637810: step: 272/463, loss: 0.10146824270486832 2023-01-24 03:57:20.337025: step: 274/463, loss: 0.035470522940158844 2023-01-24 03:57:20.919730: step: 276/463, loss: 0.023216169327497482 2023-01-24 03:57:21.511672: step: 278/463, loss: 0.00018035976972896606 2023-01-24 03:57:22.032965: step: 280/463, loss: 0.0031747580505907536 2023-01-24 03:57:22.578044: step: 282/463, loss: 0.020259961485862732 2023-01-24 03:57:23.200462: step: 284/463, loss: 0.047636549919843674 2023-01-24 03:57:23.722801: step: 286/463, loss: 0.0012870192294940352 2023-01-24 03:57:24.308555: step: 288/463, loss: 0.028955060988664627 2023-01-24 03:57:24.911811: step: 290/463, loss: 0.004549533594399691 2023-01-24 03:57:25.528960: step: 292/463, loss: 0.0027782809920608997 2023-01-24 03:57:26.252681: step: 294/463, loss: 0.05999428778886795 2023-01-24 03:57:26.816671: step: 296/463, loss: 0.09269710630178452 2023-01-24 03:57:27.429252: step: 298/463, loss: 0.0128501420840621 2023-01-24 03:57:28.074038: step: 300/463, loss: 0.008558955043554306 2023-01-24 03:57:28.636603: step: 302/463, loss: 0.030647819861769676 2023-01-24 03:57:29.242161: step: 304/463, loss: 0.0019651339389383793 2023-01-24 03:57:29.889160: step: 306/463, loss: 0.026631858199834824 2023-01-24 03:57:30.472998: step: 308/463, loss: 0.0030862498097121716 2023-01-24 03:57:31.142652: step: 310/463, loss: 0.026625145226716995 2023-01-24 03:57:31.828669: step: 312/463, loss: 0.0040268730372190475 2023-01-24 03:57:32.337399: step: 314/463, loss: 0.013235168531537056 2023-01-24 03:57:32.980372: step: 316/463, loss: 0.0004697859985753894 2023-01-24 03:57:33.614305: step: 318/463, loss: 0.0053980364464223385 2023-01-24 03:57:34.214152: step: 320/463, loss: 0.007824977859854698 2023-01-24 03:57:34.909147: step: 322/463, loss: 0.014681877568364143 2023-01-24 03:57:35.512796: step: 324/463, loss: 0.003200782462954521 2023-01-24 03:57:36.127476: step: 326/463, loss: 0.01203584298491478 2023-01-24 03:57:36.770757: step: 328/463, loss: 0.0701024979352951 2023-01-24 03:57:37.402889: step: 330/463, loss: 0.017137421295046806 2023-01-24 03:57:38.012700: step: 332/463, loss: 0.01446082629263401 2023-01-24 03:57:38.571149: step: 334/463, loss: 0.006402396131306887 2023-01-24 03:57:39.201936: step: 336/463, loss: 0.016582287847995758 2023-01-24 03:57:39.805873: step: 338/463, loss: 0.05337226018309593 2023-01-24 03:57:40.474079: step: 340/463, loss: 0.004474237561225891 2023-01-24 03:57:41.028557: step: 342/463, loss: 0.0021448079496622086 2023-01-24 03:57:41.648685: step: 344/463, loss: 0.00017759088950697333 2023-01-24 03:57:42.278820: step: 346/463, loss: 0.004703349433839321 2023-01-24 03:57:42.886533: step: 348/463, loss: 0.005637302063405514 2023-01-24 03:57:43.516123: step: 350/463, loss: 0.01383545808494091 2023-01-24 03:57:44.161892: step: 352/463, loss: 0.051064323633909225 2023-01-24 03:57:44.751221: step: 354/463, loss: 0.0009390169871039689 2023-01-24 03:57:45.535623: step: 356/463, loss: 0.2315608263015747 2023-01-24 03:57:46.144959: step: 358/463, loss: 0.0003287666477262974 2023-01-24 03:57:46.751155: step: 360/463, loss: 0.03099549002945423 2023-01-24 03:57:47.321538: step: 362/463, loss: 0.7973887920379639 2023-01-24 03:57:47.981222: step: 364/463, loss: 0.01659395545721054 2023-01-24 03:57:48.657747: step: 366/463, loss: 0.005600487347692251 2023-01-24 03:57:49.237809: step: 368/463, loss: 0.0022889862302690744 2023-01-24 03:57:49.865452: step: 370/463, loss: 0.0028396525885909796 2023-01-24 03:57:50.529309: step: 372/463, loss: 0.2733951807022095 2023-01-24 03:57:51.157565: step: 374/463, loss: 0.0029625236056745052 2023-01-24 03:57:51.764521: step: 376/463, loss: 0.04764530435204506 2023-01-24 03:57:52.391655: step: 378/463, loss: 0.0473158173263073 2023-01-24 03:57:53.022524: step: 380/463, loss: 0.021352553740143776 2023-01-24 03:57:53.610540: step: 382/463, loss: 0.003747256938368082 2023-01-24 03:57:54.186815: step: 384/463, loss: 7.466435636160895e-05 2023-01-24 03:57:54.861533: step: 386/463, loss: 0.007939601317048073 2023-01-24 03:57:55.438408: step: 388/463, loss: 0.07944590598344803 2023-01-24 03:57:56.045153: step: 390/463, loss: 0.000556295330170542 2023-01-24 03:57:56.678781: step: 392/463, loss: 0.03293980285525322 2023-01-24 03:57:57.254785: step: 394/463, loss: 0.01663028448820114 2023-01-24 03:57:57.819870: step: 396/463, loss: 0.02180313877761364 2023-01-24 03:57:58.384255: step: 398/463, loss: 0.00047571311006322503 2023-01-24 03:57:59.029657: step: 400/463, loss: 0.018062826246023178 2023-01-24 03:57:59.600788: step: 402/463, loss: 0.001344798132777214 2023-01-24 03:58:00.207713: step: 404/463, loss: 0.005027064122259617 2023-01-24 03:58:00.842322: step: 406/463, loss: 0.7167328596115112 2023-01-24 03:58:01.471452: step: 408/463, loss: 0.08638081699609756 2023-01-24 03:58:02.092782: step: 410/463, loss: 0.002496791537851095 2023-01-24 03:58:02.700170: step: 412/463, loss: 6.884579488541931e-05 2023-01-24 03:58:03.316513: step: 414/463, loss: 0.02883158251643181 2023-01-24 03:58:03.880720: step: 416/463, loss: 0.01060231402516365 2023-01-24 03:58:04.446467: step: 418/463, loss: 0.008587300777435303 2023-01-24 03:58:04.988047: step: 420/463, loss: 0.0019782360177487135 2023-01-24 03:58:05.635975: step: 422/463, loss: 0.1591232419013977 2023-01-24 03:58:06.172879: step: 424/463, loss: 0.010507761500775814 2023-01-24 03:58:06.774125: step: 426/463, loss: 0.011206735856831074 2023-01-24 03:58:07.467515: step: 428/463, loss: 0.04298040270805359 2023-01-24 03:58:08.129985: step: 430/463, loss: 0.000632758135907352 2023-01-24 03:58:08.820749: step: 432/463, loss: 0.012523945420980453 2023-01-24 03:58:09.441156: step: 434/463, loss: 0.36266952753067017 2023-01-24 03:58:10.046788: step: 436/463, loss: 0.011708064936101437 2023-01-24 03:58:10.658946: step: 438/463, loss: 0.001885719713754952 2023-01-24 03:58:11.282604: step: 440/463, loss: 0.014458074234426022 2023-01-24 03:58:11.920074: step: 442/463, loss: 0.026317961513996124 2023-01-24 03:58:12.593397: step: 444/463, loss: 0.004992858041077852 2023-01-24 03:58:13.193853: step: 446/463, loss: 0.0016601731767877936 2023-01-24 03:58:13.804495: step: 448/463, loss: 0.05608716979622841 2023-01-24 03:58:14.435704: step: 450/463, loss: 0.012247040867805481 2023-01-24 03:58:15.051095: step: 452/463, loss: 0.039441097527742386 2023-01-24 03:58:15.623997: step: 454/463, loss: 0.02453218773007393 2023-01-24 03:58:16.170958: step: 456/463, loss: 0.0062887161038815975 2023-01-24 03:58:16.856696: step: 458/463, loss: 0.01657373271882534 2023-01-24 03:58:17.428583: step: 460/463, loss: 0.012005730532109737 2023-01-24 03:58:18.088800: step: 462/463, loss: 0.028833623975515366 2023-01-24 03:58:18.687126: step: 464/463, loss: 0.03986469656229019 2023-01-24 03:58:19.255084: step: 466/463, loss: 0.0006596514140255749 2023-01-24 03:58:19.851202: step: 468/463, loss: 0.009200339205563068 2023-01-24 03:58:20.446180: step: 470/463, loss: 0.15731744468212128 2023-01-24 03:58:21.084416: step: 472/463, loss: 0.0037326219025999308 2023-01-24 03:58:21.681727: step: 474/463, loss: 0.0013432613341137767 2023-01-24 03:58:22.297314: step: 476/463, loss: 0.004083434119820595 2023-01-24 03:58:22.911054: step: 478/463, loss: 0.02273726649582386 2023-01-24 03:58:23.572099: step: 480/463, loss: 0.011660085991024971 2023-01-24 03:58:24.117975: step: 482/463, loss: 0.02291260100901127 2023-01-24 03:58:24.777401: step: 484/463, loss: 0.016993245109915733 2023-01-24 03:58:25.377344: step: 486/463, loss: 0.0034471338149160147 2023-01-24 03:58:26.047251: step: 488/463, loss: 0.02825026400387287 2023-01-24 03:58:26.675531: step: 490/463, loss: 0.0016568704741075635 2023-01-24 03:58:27.266918: step: 492/463, loss: 0.35882633924484253 2023-01-24 03:58:27.885670: step: 494/463, loss: 0.039255015552043915 2023-01-24 03:58:28.528825: step: 496/463, loss: 0.012035921216011047 2023-01-24 03:58:29.141671: step: 498/463, loss: 0.05582023039460182 2023-01-24 03:58:29.836847: step: 500/463, loss: 0.005246494431048632 2023-01-24 03:58:30.432673: step: 502/463, loss: 0.008546480908989906 2023-01-24 03:58:31.011293: step: 504/463, loss: 0.001535041257739067 2023-01-24 03:58:31.606176: step: 506/463, loss: 0.026231754571199417 2023-01-24 03:58:32.264151: step: 508/463, loss: 0.00016216209041886032 2023-01-24 03:58:32.922743: step: 510/463, loss: 0.0019158277427777648 2023-01-24 03:58:33.543179: step: 512/463, loss: 0.020430097356438637 2023-01-24 03:58:34.190009: step: 514/463, loss: 0.0023506670258939266 2023-01-24 03:58:34.749506: step: 516/463, loss: 0.0008170974906533957 2023-01-24 03:58:35.347558: step: 518/463, loss: 0.016291765496134758 2023-01-24 03:58:35.909483: step: 520/463, loss: 6.906566704856232e-05 2023-01-24 03:58:36.580471: step: 522/463, loss: 0.005249998532235622 2023-01-24 03:58:37.201588: step: 524/463, loss: 0.01802987977862358 2023-01-24 03:58:37.729207: step: 526/463, loss: 0.02856077067553997 2023-01-24 03:58:38.313041: step: 528/463, loss: 0.007953857071697712 2023-01-24 03:58:38.906376: step: 530/463, loss: 0.0005640205927193165 2023-01-24 03:58:39.573086: step: 532/463, loss: 0.03563994541764259 2023-01-24 03:58:40.256397: step: 534/463, loss: 0.0012998809106647968 2023-01-24 03:58:40.887793: step: 536/463, loss: 0.04176727682352066 2023-01-24 03:58:41.474591: step: 538/463, loss: 0.0002324201923329383 2023-01-24 03:58:42.099973: step: 540/463, loss: 0.0004154304333496839 2023-01-24 03:58:42.696671: step: 542/463, loss: 0.023231608793139458 2023-01-24 03:58:43.342056: step: 544/463, loss: 0.03646976128220558 2023-01-24 03:58:43.963389: step: 546/463, loss: 0.05267562344670296 2023-01-24 03:58:44.582671: step: 548/463, loss: 0.011600497178733349 2023-01-24 03:58:45.214576: step: 550/463, loss: 0.015198652632534504 2023-01-24 03:58:45.834465: step: 552/463, loss: 0.6468573808670044 2023-01-24 03:58:46.358953: step: 554/463, loss: 0.006744857877492905 2023-01-24 03:58:46.966785: step: 556/463, loss: 0.005127535201609135 2023-01-24 03:58:47.552043: step: 558/463, loss: 0.0048785884864628315 2023-01-24 03:58:48.127351: step: 560/463, loss: 0.003410136792808771 2023-01-24 03:58:48.772229: step: 562/463, loss: 0.005176797974854708 2023-01-24 03:58:49.395079: step: 564/463, loss: 0.001908926758915186 2023-01-24 03:58:50.000217: step: 566/463, loss: 1.9047214664169587e-05 2023-01-24 03:58:50.610475: step: 568/463, loss: 0.06040994077920914 2023-01-24 03:58:51.194503: step: 570/463, loss: 0.0005264796782284975 2023-01-24 03:58:51.833585: step: 572/463, loss: 0.00658460333943367 2023-01-24 03:58:52.418060: step: 574/463, loss: 0.0541391484439373 2023-01-24 03:58:53.087178: step: 576/463, loss: 0.03383643925189972 2023-01-24 03:58:53.751061: step: 578/463, loss: 0.035475488752126694 2023-01-24 03:58:54.353508: step: 580/463, loss: 0.00944242812693119 2023-01-24 03:58:54.953105: step: 582/463, loss: 0.005249816458672285 2023-01-24 03:58:55.542391: step: 584/463, loss: 0.010450259782373905 2023-01-24 03:58:56.145509: step: 586/463, loss: 0.0034519657492637634 2023-01-24 03:58:56.678757: step: 588/463, loss: 0.00016075785970315337 2023-01-24 03:58:57.357382: step: 590/463, loss: 0.06734348833560944 2023-01-24 03:58:57.945632: step: 592/463, loss: 0.005445543210953474 2023-01-24 03:58:58.504252: step: 594/463, loss: 0.00023987282474990934 2023-01-24 03:58:59.115932: step: 596/463, loss: 0.07066363096237183 2023-01-24 03:58:59.699395: step: 598/463, loss: 0.009559571743011475 2023-01-24 03:59:00.289472: step: 600/463, loss: 0.019260212779045105 2023-01-24 03:59:00.876080: step: 602/463, loss: 0.054603107273578644 2023-01-24 03:59:01.478607: step: 604/463, loss: 0.0011515539372339845 2023-01-24 03:59:02.023784: step: 606/463, loss: 0.005755016580224037 2023-01-24 03:59:02.629858: step: 608/463, loss: 0.012383447960019112 2023-01-24 03:59:03.260744: step: 610/463, loss: 0.0011383414966985583 2023-01-24 03:59:03.853214: step: 612/463, loss: 0.031573131680488586 2023-01-24 03:59:04.374245: step: 614/463, loss: 0.000995947397314012 2023-01-24 03:59:05.056602: step: 616/463, loss: 0.7395897507667542 2023-01-24 03:59:05.691775: step: 618/463, loss: 0.048117369413375854 2023-01-24 03:59:06.218152: step: 620/463, loss: 0.03181912750005722 2023-01-24 03:59:06.808833: step: 622/463, loss: 0.0028155629988759756 2023-01-24 03:59:07.417135: step: 624/463, loss: 0.16174601018428802 2023-01-24 03:59:08.019391: step: 626/463, loss: 0.04744167998433113 2023-01-24 03:59:08.664365: step: 628/463, loss: 0.013348712585866451 2023-01-24 03:59:09.268346: step: 630/463, loss: 0.027135038748383522 2023-01-24 03:59:09.951406: step: 632/463, loss: 0.03360970318317413 2023-01-24 03:59:10.634086: step: 634/463, loss: 0.03294328972697258 2023-01-24 03:59:11.303332: step: 636/463, loss: 0.03222019597887993 2023-01-24 03:59:11.898036: step: 638/463, loss: 0.0018262427765876055 2023-01-24 03:59:12.475644: step: 640/463, loss: 0.02675807476043701 2023-01-24 03:59:13.097232: step: 642/463, loss: 0.025562116876244545 2023-01-24 03:59:13.709369: step: 644/463, loss: 0.06224594637751579 2023-01-24 03:59:14.304685: step: 646/463, loss: 0.04108177497982979 2023-01-24 03:59:14.885805: step: 648/463, loss: 0.0028384001925587654 2023-01-24 03:59:15.536131: step: 650/463, loss: 0.0018289118306711316 2023-01-24 03:59:16.211944: step: 652/463, loss: 0.014852159656584263 2023-01-24 03:59:16.838193: step: 654/463, loss: 0.001475636730901897 2023-01-24 03:59:17.402809: step: 656/463, loss: 0.02210184745490551 2023-01-24 03:59:17.981133: step: 658/463, loss: 0.01853053830564022 2023-01-24 03:59:18.580762: step: 660/463, loss: 0.09320429712533951 2023-01-24 03:59:19.137067: step: 662/463, loss: 0.015808576717972755 2023-01-24 03:59:19.771581: step: 664/463, loss: 0.00013743633462581784 2023-01-24 03:59:20.335674: step: 666/463, loss: 0.0068207248114049435 2023-01-24 03:59:20.964484: step: 668/463, loss: 0.029963258653879166 2023-01-24 03:59:21.549066: step: 670/463, loss: 0.008672508411109447 2023-01-24 03:59:22.184317: step: 672/463, loss: 0.0331050269305706 2023-01-24 03:59:22.795524: step: 674/463, loss: 0.018555283546447754 2023-01-24 03:59:23.472358: step: 676/463, loss: 0.0025362588930875063 2023-01-24 03:59:24.175452: step: 678/463, loss: 0.01964394934475422 2023-01-24 03:59:24.729092: step: 680/463, loss: 0.009995434433221817 2023-01-24 03:59:25.314845: step: 682/463, loss: 0.0027733054012060165 2023-01-24 03:59:25.906730: step: 684/463, loss: 0.022071074694395065 2023-01-24 03:59:26.503132: step: 686/463, loss: 0.025674713775515556 2023-01-24 03:59:27.155249: step: 688/463, loss: 0.007691400125622749 2023-01-24 03:59:27.894271: step: 690/463, loss: 0.07100996375083923 2023-01-24 03:59:28.561062: step: 692/463, loss: 0.026908760890364647 2023-01-24 03:59:29.130090: step: 694/463, loss: 0.004746427293866873 2023-01-24 03:59:29.784798: step: 696/463, loss: 0.030488725751638412 2023-01-24 03:59:30.397755: step: 698/463, loss: 0.0661323070526123 2023-01-24 03:59:31.034549: step: 700/463, loss: 0.016116736456751823 2023-01-24 03:59:31.635018: step: 702/463, loss: 0.03909580036997795 2023-01-24 03:59:32.177367: step: 704/463, loss: 0.016833040863275528 2023-01-24 03:59:32.804982: step: 706/463, loss: 0.005295882932841778 2023-01-24 03:59:33.450564: step: 708/463, loss: 0.00467087235301733 2023-01-24 03:59:34.087558: step: 710/463, loss: 0.025982731953263283 2023-01-24 03:59:34.676441: step: 712/463, loss: 0.02441459521651268 2023-01-24 03:59:35.241367: step: 714/463, loss: 0.007864666171371937 2023-01-24 03:59:35.867078: step: 716/463, loss: 0.10547102987766266 2023-01-24 03:59:36.466703: step: 718/463, loss: 0.004200815688818693 2023-01-24 03:59:37.063991: step: 720/463, loss: 0.0004409697721712291 2023-01-24 03:59:37.634870: step: 722/463, loss: 0.08174954354763031 2023-01-24 03:59:38.318916: step: 724/463, loss: 0.07840759307146072 2023-01-24 03:59:38.856704: step: 726/463, loss: 0.0043959952890872955 2023-01-24 03:59:39.504990: step: 728/463, loss: 0.18816347420215607 2023-01-24 03:59:40.119344: step: 730/463, loss: 0.03758756443858147 2023-01-24 03:59:40.709780: step: 732/463, loss: 0.01197047159075737 2023-01-24 03:59:41.314401: step: 734/463, loss: 0.009181518107652664 2023-01-24 03:59:41.944256: step: 736/463, loss: 0.03412233665585518 2023-01-24 03:59:42.526687: step: 738/463, loss: 0.02147078327834606 2023-01-24 03:59:43.104870: step: 740/463, loss: 0.00813200417906046 2023-01-24 03:59:43.801638: step: 742/463, loss: 0.0014224787009879947 2023-01-24 03:59:44.484351: step: 744/463, loss: 0.015566750429570675 2023-01-24 03:59:45.058532: step: 746/463, loss: 0.0007399892783723772 2023-01-24 03:59:45.641659: step: 748/463, loss: 0.009280719794332981 2023-01-24 03:59:46.267503: step: 750/463, loss: 0.013851113617420197 2023-01-24 03:59:46.849608: step: 752/463, loss: 0.016852153465151787 2023-01-24 03:59:47.452152: step: 754/463, loss: 0.00749478954821825 2023-01-24 03:59:48.167681: step: 756/463, loss: 0.0030878072138875723 2023-01-24 03:59:48.804812: step: 758/463, loss: 0.04691128432750702 2023-01-24 03:59:49.443385: step: 760/463, loss: 0.009341364726424217 2023-01-24 03:59:50.098423: step: 762/463, loss: 0.05508425459265709 2023-01-24 03:59:50.889317: step: 764/463, loss: 0.03854438289999962 2023-01-24 03:59:51.474468: step: 766/463, loss: 0.01581127941608429 2023-01-24 03:59:52.218263: step: 768/463, loss: 0.005890332628041506 2023-01-24 03:59:52.830257: step: 770/463, loss: 0.05794880911707878 2023-01-24 03:59:53.456938: step: 772/463, loss: 0.009432896040380001 2023-01-24 03:59:54.114502: step: 774/463, loss: 0.021078072488307953 2023-01-24 03:59:54.744418: step: 776/463, loss: 0.007599270902574062 2023-01-24 03:59:55.370334: step: 778/463, loss: 0.0005113847437314689 2023-01-24 03:59:55.979175: step: 780/463, loss: 0.006401469931006432 2023-01-24 03:59:56.602828: step: 782/463, loss: 0.014508014544844627 2023-01-24 03:59:57.209337: step: 784/463, loss: 0.002345010172575712 2023-01-24 03:59:57.986076: step: 786/463, loss: 0.007189466618001461 2023-01-24 03:59:58.630200: step: 788/463, loss: 0.009035010822117329 2023-01-24 03:59:59.184769: step: 790/463, loss: 0.005614686757326126 2023-01-24 03:59:59.843458: step: 792/463, loss: 0.07986856997013092 2023-01-24 04:00:00.439660: step: 794/463, loss: 0.12394262850284576 2023-01-24 04:00:01.083252: step: 796/463, loss: 0.005549500696361065 2023-01-24 04:00:01.670073: step: 798/463, loss: 0.04039314389228821 2023-01-24 04:00:02.281579: step: 800/463, loss: 0.001079953508451581 2023-01-24 04:00:02.924646: step: 802/463, loss: 0.03734450787305832 2023-01-24 04:00:03.554962: step: 804/463, loss: 0.0039762272499501705 2023-01-24 04:00:04.155278: step: 806/463, loss: 0.010790260508656502 2023-01-24 04:00:04.823018: step: 808/463, loss: 0.01745694875717163 2023-01-24 04:00:05.452087: step: 810/463, loss: 0.5384841561317444 2023-01-24 04:00:06.084559: step: 812/463, loss: 0.0416187047958374 2023-01-24 04:00:06.761909: step: 814/463, loss: 0.06309138238430023 2023-01-24 04:00:07.284210: step: 816/463, loss: 0.000431559921707958 2023-01-24 04:00:07.903128: step: 818/463, loss: 0.012816688977181911 2023-01-24 04:00:08.564829: step: 820/463, loss: 0.05416051298379898 2023-01-24 04:00:09.185754: step: 822/463, loss: 0.0023913108743727207 2023-01-24 04:00:09.815785: step: 824/463, loss: 0.3894469439983368 2023-01-24 04:00:10.420764: step: 826/463, loss: 0.012212458997964859 2023-01-24 04:00:11.134272: step: 828/463, loss: 0.01997094601392746 2023-01-24 04:00:11.797000: step: 830/463, loss: 0.00045535824028775096 2023-01-24 04:00:12.352279: step: 832/463, loss: 0.02898307703435421 2023-01-24 04:00:12.976171: step: 834/463, loss: 0.22472518682479858 2023-01-24 04:00:13.620072: step: 836/463, loss: 0.18519264459609985 2023-01-24 04:00:14.248228: step: 838/463, loss: 0.005664360243827105 2023-01-24 04:00:14.894055: step: 840/463, loss: 0.012907752767205238 2023-01-24 04:00:15.491504: step: 842/463, loss: 0.00020149107149336487 2023-01-24 04:00:16.133238: step: 844/463, loss: 0.017091726884245872 2023-01-24 04:00:16.779167: step: 846/463, loss: 0.0016688171308487654 2023-01-24 04:00:17.421931: step: 848/463, loss: 0.004266593139618635 2023-01-24 04:00:18.101998: step: 850/463, loss: 0.012012452818453312 2023-01-24 04:00:18.750544: step: 852/463, loss: 0.06989787518978119 2023-01-24 04:00:19.391407: step: 854/463, loss: 0.0033829433377832174 2023-01-24 04:00:20.031238: step: 856/463, loss: 0.0267151091247797 2023-01-24 04:00:20.643554: step: 858/463, loss: 0.008524909615516663 2023-01-24 04:00:21.191077: step: 860/463, loss: 0.03437764570116997 2023-01-24 04:00:21.745244: step: 862/463, loss: 0.009047313593327999 2023-01-24 04:00:22.485264: step: 864/463, loss: 0.01516711339354515 2023-01-24 04:00:23.061678: step: 866/463, loss: 0.0020795678719878197 2023-01-24 04:00:23.666818: step: 868/463, loss: 0.034122999757528305 2023-01-24 04:00:24.224703: step: 870/463, loss: 0.1948269158601761 2023-01-24 04:00:24.841885: step: 872/463, loss: 0.31432220339775085 2023-01-24 04:00:25.444656: step: 874/463, loss: 0.010344895534217358 2023-01-24 04:00:26.038864: step: 876/463, loss: 0.001328377635218203 2023-01-24 04:00:26.662554: step: 878/463, loss: 0.0018691495060920715 2023-01-24 04:00:27.292368: step: 880/463, loss: 0.0036268578842282295 2023-01-24 04:00:27.841654: step: 882/463, loss: 0.06314323842525482 2023-01-24 04:00:28.425926: step: 884/463, loss: 0.0009191217832267284 2023-01-24 04:00:29.092603: step: 886/463, loss: 0.010080519132316113 2023-01-24 04:00:29.723554: step: 888/463, loss: 0.019825082272291183 2023-01-24 04:00:30.294431: step: 890/463, loss: 0.001594318775460124 2023-01-24 04:00:30.954446: step: 892/463, loss: 0.008323898538947105 2023-01-24 04:00:31.605057: step: 894/463, loss: 0.13878990709781647 2023-01-24 04:00:32.238342: step: 896/463, loss: 0.008268610574305058 2023-01-24 04:00:32.868713: step: 898/463, loss: 0.06482666730880737 2023-01-24 04:00:33.485587: step: 900/463, loss: 0.016729217022657394 2023-01-24 04:00:34.100244: step: 902/463, loss: 0.02992217428982258 2023-01-24 04:00:34.738022: step: 904/463, loss: 0.07099413871765137 2023-01-24 04:00:35.331294: step: 906/463, loss: 0.01644017919898033 2023-01-24 04:00:35.939089: step: 908/463, loss: 0.03739346191287041 2023-01-24 04:00:36.608798: step: 910/463, loss: 0.04760941490530968 2023-01-24 04:00:37.220563: step: 912/463, loss: 0.0301815215498209 2023-01-24 04:00:37.806022: step: 914/463, loss: 0.00023451166634913534 2023-01-24 04:00:38.407686: step: 916/463, loss: 0.0017771677812561393 2023-01-24 04:00:39.101142: step: 918/463, loss: 0.4552406668663025 2023-01-24 04:00:39.806105: step: 920/463, loss: 0.03320131450891495 2023-01-24 04:00:40.457954: step: 922/463, loss: 0.00185881566721946 2023-01-24 04:00:41.176869: step: 924/463, loss: 0.33790406584739685 2023-01-24 04:00:41.777870: step: 926/463, loss: 0.016135603189468384 ================================================== Loss: 0.048 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3544842262631194, 'r': 0.32421517468467465, 'f1': 0.33867472162303974}, 'combined': 0.24954979488013454, 'epoch': 32} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3615383998139277, 'r': 0.38014699392199747, 'f1': 0.3706092557232378}, 'combined': 0.2872665044361939, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32957963408562896, 'r': 0.32770346918571075, 'f1': 0.3286388739502751}, 'combined': 0.24215495975283424, 'epoch': 32} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34066324761966477, 'r': 0.3769839615202908, 'f1': 0.3579044940087927}, 'combined': 0.2774187943991599, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32748465971442603, 'r': 0.3249990076482824, 'f1': 0.32623709910599014}, 'combined': 0.24038523092020325, 'epoch': 32} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.348137098259148, 'r': 0.36957568794973894, 'f1': 0.35853620016880605}, 'combined': 0.27790844223610806, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3020833333333333, 'r': 0.25892857142857145, 'f1': 0.27884615384615385}, 'combined': 0.1858974358974359, 'epoch': 32} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.32608695652173914, 'f1': 0.3191489361702128}, 'combined': 0.1595744680851064, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45454545454545453, 'r': 0.1724137931034483, 'f1': 0.25000000000000006}, 'combined': 0.16666666666666669, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34001213859732793, 'r': 0.32968918941790243, 'f1': 0.33477110370565427}, 'combined': 0.24667344483574524, 'epoch': 29} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34157155234650616, 'r': 0.36386160769264764, 'f1': 0.3523644229368942}, 'combined': 0.2731245766305113, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.20689655172413793, 'f1': 0.31578947368421056}, 'combined': 0.2105263157894737, 'epoch': 29} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:03:16.717622: step: 2/463, loss: 0.013177735731005669 2023-01-24 04:03:17.349639: step: 4/463, loss: 0.029422033578157425 2023-01-24 04:03:17.943307: step: 6/463, loss: 0.0521036721765995 2023-01-24 04:03:18.628033: step: 8/463, loss: 0.7831087112426758 2023-01-24 04:03:19.209555: step: 10/463, loss: 0.0006630992284044623 2023-01-24 04:03:19.918981: step: 12/463, loss: 0.0023785794619470835 2023-01-24 04:03:20.518477: step: 14/463, loss: 0.0017757429741322994 2023-01-24 04:03:21.137759: step: 16/463, loss: 0.0009827989852055907 2023-01-24 04:03:21.838461: step: 18/463, loss: 0.0023622713051736355 2023-01-24 04:03:22.417287: step: 20/463, loss: 0.01516252662986517 2023-01-24 04:03:22.968297: step: 22/463, loss: 0.003957367967814207 2023-01-24 04:03:23.610318: step: 24/463, loss: 0.027441132813692093 2023-01-24 04:03:24.164640: step: 26/463, loss: 0.004595894832164049 2023-01-24 04:03:24.828649: step: 28/463, loss: 0.009913764894008636 2023-01-24 04:03:25.392871: step: 30/463, loss: 0.0001937171764438972 2023-01-24 04:03:26.008533: step: 32/463, loss: 0.0073243496008217335 2023-01-24 04:03:26.662289: step: 34/463, loss: 0.06665915250778198 2023-01-24 04:03:27.230120: step: 36/463, loss: 0.21806226670742035 2023-01-24 04:03:27.846445: step: 38/463, loss: 0.019458046182990074 2023-01-24 04:03:28.507449: step: 40/463, loss: 0.04964243993163109 2023-01-24 04:03:29.150883: step: 42/463, loss: 0.05178474262356758 2023-01-24 04:03:29.732386: step: 44/463, loss: 0.012613000348210335 2023-01-24 04:03:30.366009: step: 46/463, loss: 0.0006787462625652552 2023-01-24 04:03:30.988031: step: 48/463, loss: 0.03951142728328705 2023-01-24 04:03:31.643603: step: 50/463, loss: 0.06550993770360947 2023-01-24 04:03:32.224339: step: 52/463, loss: 0.0031946974340826273 2023-01-24 04:03:32.842845: step: 54/463, loss: 0.0009893204551190138 2023-01-24 04:03:33.477244: step: 56/463, loss: 0.030004192143678665 2023-01-24 04:03:34.067646: step: 58/463, loss: 0.07743649929761887 2023-01-24 04:03:34.681186: step: 60/463, loss: 0.004632330033928156 2023-01-24 04:03:35.322439: step: 62/463, loss: 0.005031968932598829 2023-01-24 04:03:35.920095: step: 64/463, loss: 0.012747881934046745 2023-01-24 04:03:36.472137: step: 66/463, loss: 0.002852066420018673 2023-01-24 04:03:37.050601: step: 68/463, loss: 0.03280583769083023 2023-01-24 04:03:37.666150: step: 70/463, loss: 0.02015034481883049 2023-01-24 04:03:38.305004: step: 72/463, loss: 0.004480517003685236 2023-01-24 04:03:38.907064: step: 74/463, loss: 0.020288977771997452 2023-01-24 04:03:39.506051: step: 76/463, loss: 0.0014101208653301 2023-01-24 04:03:40.074757: step: 78/463, loss: 0.013872583396732807 2023-01-24 04:03:40.693680: step: 80/463, loss: 0.023355277255177498 2023-01-24 04:03:41.253653: step: 82/463, loss: 0.009263689629733562 2023-01-24 04:03:41.900369: step: 84/463, loss: 0.012693917378783226 2023-01-24 04:03:42.502942: step: 86/463, loss: 0.011706424877047539 2023-01-24 04:03:43.133162: step: 88/463, loss: 0.051203593611717224 2023-01-24 04:03:43.740608: step: 90/463, loss: 0.005326269660145044 2023-01-24 04:03:44.373563: step: 92/463, loss: 0.006566681899130344 2023-01-24 04:03:44.985966: step: 94/463, loss: 0.010242292657494545 2023-01-24 04:03:45.568275: step: 96/463, loss: 0.02074410952627659 2023-01-24 04:03:46.241734: step: 98/463, loss: 0.0004857122548855841 2023-01-24 04:03:46.940630: step: 100/463, loss: 0.023802557960152626 2023-01-24 04:03:47.536935: step: 102/463, loss: 0.027592146769165993 2023-01-24 04:03:48.141287: step: 104/463, loss: 0.005277467425912619 2023-01-24 04:03:48.795249: step: 106/463, loss: 0.09126009047031403 2023-01-24 04:03:49.401701: step: 108/463, loss: 0.05373068153858185 2023-01-24 04:03:49.972833: step: 110/463, loss: 0.002197599271312356 2023-01-24 04:03:50.604333: step: 112/463, loss: 0.004238425754010677 2023-01-24 04:03:51.234979: step: 114/463, loss: 0.02249746024608612 2023-01-24 04:03:51.817476: step: 116/463, loss: 0.0004886161768808961 2023-01-24 04:03:52.454055: step: 118/463, loss: 0.14667724072933197 2023-01-24 04:03:53.030789: step: 120/463, loss: 0.0007660912815481424 2023-01-24 04:03:53.623695: step: 122/463, loss: 0.001688626827672124 2023-01-24 04:03:54.156487: step: 124/463, loss: 0.12359911948442459 2023-01-24 04:03:54.837611: step: 126/463, loss: 0.0017821916844695807 2023-01-24 04:03:55.406176: step: 128/463, loss: 0.0020403775852173567 2023-01-24 04:03:56.090039: step: 130/463, loss: 0.0015391093911603093 2023-01-24 04:03:56.719833: step: 132/463, loss: 0.02669435925781727 2023-01-24 04:03:57.321595: step: 134/463, loss: 0.0019884684588760138 2023-01-24 04:03:58.003155: step: 136/463, loss: 0.006303318776190281 2023-01-24 04:03:58.546932: step: 138/463, loss: 0.01015305146574974 2023-01-24 04:03:59.139100: step: 140/463, loss: 0.027442781254649162 2023-01-24 04:03:59.733029: step: 142/463, loss: 0.07229649275541306 2023-01-24 04:04:00.263050: step: 144/463, loss: 0.0019841294270008802 2023-01-24 04:04:00.879773: step: 146/463, loss: 0.045641813427209854 2023-01-24 04:04:01.472244: step: 148/463, loss: 0.004566526971757412 2023-01-24 04:04:02.089282: step: 150/463, loss: 0.030943425372242928 2023-01-24 04:04:02.683305: step: 152/463, loss: 0.0009443388553336263 2023-01-24 04:04:03.345234: step: 154/463, loss: 0.004768942482769489 2023-01-24 04:04:03.971549: step: 156/463, loss: 0.06695771962404251 2023-01-24 04:04:04.621490: step: 158/463, loss: 0.1569249927997589 2023-01-24 04:04:05.210121: step: 160/463, loss: 0.12606506049633026 2023-01-24 04:04:05.838887: step: 162/463, loss: 0.03515082970261574 2023-01-24 04:04:06.526650: step: 164/463, loss: 0.0030049022752791643 2023-01-24 04:04:07.091623: step: 166/463, loss: 0.00029705179622396827 2023-01-24 04:04:07.659388: step: 168/463, loss: 0.06618739664554596 2023-01-24 04:04:08.249780: step: 170/463, loss: 0.006797961890697479 2023-01-24 04:04:08.876247: step: 172/463, loss: 0.0024814188946038485 2023-01-24 04:04:09.494576: step: 174/463, loss: 0.0435347855091095 2023-01-24 04:04:10.209377: step: 176/463, loss: 0.025983229279518127 2023-01-24 04:04:10.809941: step: 178/463, loss: 0.01597868651151657 2023-01-24 04:04:11.437337: step: 180/463, loss: 0.005939070601016283 2023-01-24 04:04:12.054607: step: 182/463, loss: 0.03352585807442665 2023-01-24 04:04:12.675175: step: 184/463, loss: 0.038901034742593765 2023-01-24 04:04:13.311864: step: 186/463, loss: 0.01623605191707611 2023-01-24 04:04:13.891435: step: 188/463, loss: 0.012403161264955997 2023-01-24 04:04:14.497267: step: 190/463, loss: 0.0018404530128464103 2023-01-24 04:04:14.997363: step: 192/463, loss: 0.009095106273889542 2023-01-24 04:04:15.678317: step: 194/463, loss: 0.011725467629730701 2023-01-24 04:04:16.324912: step: 196/463, loss: 0.03635198622941971 2023-01-24 04:04:16.961997: step: 198/463, loss: 0.03714846819639206 2023-01-24 04:04:17.621097: step: 200/463, loss: 0.05019973963499069 2023-01-24 04:04:18.220484: step: 202/463, loss: 0.028682127594947815 2023-01-24 04:04:18.833631: step: 204/463, loss: 0.0020879965741187334 2023-01-24 04:04:19.437125: step: 206/463, loss: 0.007942724972963333 2023-01-24 04:04:20.036893: step: 208/463, loss: 0.01245945319533348 2023-01-24 04:04:20.625638: step: 210/463, loss: 0.04298003390431404 2023-01-24 04:04:21.217274: step: 212/463, loss: 0.02037498727440834 2023-01-24 04:04:21.889733: step: 214/463, loss: 0.09421339631080627 2023-01-24 04:04:22.446660: step: 216/463, loss: 0.4100435674190521 2023-01-24 04:04:23.030153: step: 218/463, loss: 0.0009400646667927504 2023-01-24 04:04:23.605135: step: 220/463, loss: 0.025177719071507454 2023-01-24 04:04:24.215009: step: 222/463, loss: 0.012517690658569336 2023-01-24 04:04:24.837705: step: 224/463, loss: 0.2530026137828827 2023-01-24 04:04:25.397647: step: 226/463, loss: 0.012043198570609093 2023-01-24 04:04:26.020229: step: 228/463, loss: 0.007266404572874308 2023-01-24 04:04:26.657099: step: 230/463, loss: 0.09906034171581268 2023-01-24 04:04:27.278794: step: 232/463, loss: 0.02197418175637722 2023-01-24 04:04:27.953795: step: 234/463, loss: 0.008904719725251198 2023-01-24 04:04:28.544358: step: 236/463, loss: 0.05396690219640732 2023-01-24 04:04:29.154262: step: 238/463, loss: 0.002286656992509961 2023-01-24 04:04:29.663072: step: 240/463, loss: 0.008480419404804707 2023-01-24 04:04:30.326715: step: 242/463, loss: 0.010825510136783123 2023-01-24 04:04:30.978096: step: 244/463, loss: 0.03129807114601135 2023-01-24 04:04:31.511486: step: 246/463, loss: 0.0008733254508115351 2023-01-24 04:04:32.128755: step: 248/463, loss: 0.02321523427963257 2023-01-24 04:04:32.660453: step: 250/463, loss: 0.001570650259964168 2023-01-24 04:04:33.312284: step: 252/463, loss: 0.03918491676449776 2023-01-24 04:04:33.862943: step: 254/463, loss: 0.16565479338169098 2023-01-24 04:04:34.482243: step: 256/463, loss: 1.4607250690460205 2023-01-24 04:04:35.085027: step: 258/463, loss: 0.044743604958057404 2023-01-24 04:04:35.662632: step: 260/463, loss: 0.007310402113944292 2023-01-24 04:04:36.293714: step: 262/463, loss: 0.01301309373229742 2023-01-24 04:04:36.890325: step: 264/463, loss: 0.019325660541653633 2023-01-24 04:04:37.551444: step: 266/463, loss: 0.047689154744148254 2023-01-24 04:04:38.144529: step: 268/463, loss: 0.009946894831955433 2023-01-24 04:04:38.772749: step: 270/463, loss: 0.005585205275565386 2023-01-24 04:04:39.315634: step: 272/463, loss: 0.0007765499758534133 2023-01-24 04:04:39.975523: step: 274/463, loss: 0.01617380604147911 2023-01-24 04:04:40.620714: step: 276/463, loss: 0.038956161588430405 2023-01-24 04:04:41.185225: step: 278/463, loss: 0.015469358302652836 2023-01-24 04:04:41.755705: step: 280/463, loss: 0.007868226617574692 2023-01-24 04:04:42.431723: step: 282/463, loss: 0.016742585226893425 2023-01-24 04:04:42.997727: step: 284/463, loss: 0.004956222604960203 2023-01-24 04:04:43.557047: step: 286/463, loss: 1.3458346984407399e-05 2023-01-24 04:04:44.163146: step: 288/463, loss: 0.0016780121950432658 2023-01-24 04:04:44.769494: step: 290/463, loss: 0.003546732710674405 2023-01-24 04:04:45.394342: step: 292/463, loss: 0.12040197104215622 2023-01-24 04:04:46.170814: step: 294/463, loss: 0.08213011175394058 2023-01-24 04:04:46.757931: step: 296/463, loss: 0.0020931046456098557 2023-01-24 04:04:47.391853: step: 298/463, loss: 0.0006639169296249747 2023-01-24 04:04:48.075065: step: 300/463, loss: 0.0018088988726958632 2023-01-24 04:04:48.733508: step: 302/463, loss: 0.06725714355707169 2023-01-24 04:04:49.379100: step: 304/463, loss: 0.0026506278663873672 2023-01-24 04:04:50.061273: step: 306/463, loss: 0.0436609648168087 2023-01-24 04:04:50.633966: step: 308/463, loss: 0.02149411477148533 2023-01-24 04:04:51.129289: step: 310/463, loss: 0.005827387794852257 2023-01-24 04:04:51.743803: step: 312/463, loss: 0.016611739993095398 2023-01-24 04:04:52.309564: step: 314/463, loss: 0.003579403506591916 2023-01-24 04:04:52.909471: step: 316/463, loss: 0.004192912019789219 2023-01-24 04:04:53.507590: step: 318/463, loss: 0.013725695200264454 2023-01-24 04:04:54.177103: step: 320/463, loss: 0.03245076164603233 2023-01-24 04:04:54.928885: step: 322/463, loss: 0.11209096759557724 2023-01-24 04:04:55.573410: step: 324/463, loss: 0.0012780773686245084 2023-01-24 04:04:56.223045: step: 326/463, loss: 0.04920756816864014 2023-01-24 04:04:56.972670: step: 328/463, loss: 0.00143470021430403 2023-01-24 04:04:57.564275: step: 330/463, loss: 1.839549258875195e-05 2023-01-24 04:04:58.256886: step: 332/463, loss: 0.032383084297180176 2023-01-24 04:04:58.916116: step: 334/463, loss: 0.020892543718218803 2023-01-24 04:04:59.517745: step: 336/463, loss: 0.004676688928157091 2023-01-24 04:05:00.152505: step: 338/463, loss: 0.012989516369998455 2023-01-24 04:05:00.821126: step: 340/463, loss: 0.011859335005283356 2023-01-24 04:05:01.383394: step: 342/463, loss: 0.019382981583476067 2023-01-24 04:05:02.035145: step: 344/463, loss: 0.020520128309726715 2023-01-24 04:05:02.616882: step: 346/463, loss: 0.009408780373632908 2023-01-24 04:05:03.177884: step: 348/463, loss: 0.0027982189785689116 2023-01-24 04:05:03.854049: step: 350/463, loss: 0.015885649248957634 2023-01-24 04:05:04.426465: step: 352/463, loss: 0.016582250595092773 2023-01-24 04:05:05.069699: step: 354/463, loss: 0.010317642241716385 2023-01-24 04:05:05.728769: step: 356/463, loss: 0.033750079572200775 2023-01-24 04:05:06.335700: step: 358/463, loss: 0.491613507270813 2023-01-24 04:05:07.019888: step: 360/463, loss: 0.009647327475249767 2023-01-24 04:05:07.669388: step: 362/463, loss: 0.045069120824337006 2023-01-24 04:05:08.299309: step: 364/463, loss: 0.0022171225864440203 2023-01-24 04:05:08.904968: step: 366/463, loss: 0.0004882703360635787 2023-01-24 04:05:09.478896: step: 368/463, loss: 0.007154977414757013 2023-01-24 04:05:10.100164: step: 370/463, loss: 0.010284720920026302 2023-01-24 04:05:10.672988: step: 372/463, loss: 9.433650120627135e-05 2023-01-24 04:05:11.360881: step: 374/463, loss: 0.024721626192331314 2023-01-24 04:05:11.961056: step: 376/463, loss: 0.002616564277559519 2023-01-24 04:05:12.601428: step: 378/463, loss: 0.008903161622583866 2023-01-24 04:05:13.219887: step: 380/463, loss: 0.0018138960003852844 2023-01-24 04:05:13.807396: step: 382/463, loss: 0.0036056144163012505 2023-01-24 04:05:14.354121: step: 384/463, loss: 0.020847296342253685 2023-01-24 04:05:14.960562: step: 386/463, loss: 0.0006662294617854059 2023-01-24 04:05:15.536938: step: 388/463, loss: 0.009713188745081425 2023-01-24 04:05:16.159779: step: 390/463, loss: 0.0009053823887370527 2023-01-24 04:05:16.775517: step: 392/463, loss: 0.023109719157218933 2023-01-24 04:05:17.378304: step: 394/463, loss: 0.001229922752827406 2023-01-24 04:05:18.023061: step: 396/463, loss: 0.004341547377407551 2023-01-24 04:05:18.572497: step: 398/463, loss: 0.0014307390665635467 2023-01-24 04:05:19.205539: step: 400/463, loss: 0.002218321431428194 2023-01-24 04:05:19.811892: step: 402/463, loss: 0.0005069616017863154 2023-01-24 04:05:20.462468: step: 404/463, loss: 2.783818483352661 2023-01-24 04:05:21.058363: step: 406/463, loss: 0.012074127793312073 2023-01-24 04:05:21.683449: step: 408/463, loss: 0.0005894795758649707 2023-01-24 04:05:22.304795: step: 410/463, loss: 0.00010849148384295404 2023-01-24 04:05:22.881330: step: 412/463, loss: 0.05550036579370499 2023-01-24 04:05:23.488092: step: 414/463, loss: 0.0010107363341376185 2023-01-24 04:05:24.101451: step: 416/463, loss: 0.0031266873702406883 2023-01-24 04:05:24.684509: step: 418/463, loss: 0.0002638222649693489 2023-01-24 04:05:25.360361: step: 420/463, loss: 0.016629505902528763 2023-01-24 04:05:25.994313: step: 422/463, loss: 0.06914753466844559 2023-01-24 04:05:26.558011: step: 424/463, loss: 0.001353125786408782 2023-01-24 04:05:27.120699: step: 426/463, loss: 0.10459199547767639 2023-01-24 04:05:27.707775: step: 428/463, loss: 0.004527949262410402 2023-01-24 04:05:28.277886: step: 430/463, loss: 0.00016903478535823524 2023-01-24 04:05:28.846773: step: 432/463, loss: 0.0052298265509307384 2023-01-24 04:05:29.483080: step: 434/463, loss: 1.5333068859035848e-06 2023-01-24 04:05:30.090625: step: 436/463, loss: 0.013857504352927208 2023-01-24 04:05:30.714415: step: 438/463, loss: 0.041202887892723083 2023-01-24 04:05:31.300922: step: 440/463, loss: 0.002106701722368598 2023-01-24 04:05:31.858381: step: 442/463, loss: 0.017782218754291534 2023-01-24 04:05:32.427774: step: 444/463, loss: 0.00529123842716217 2023-01-24 04:05:33.089858: step: 446/463, loss: 0.00023902471002656966 2023-01-24 04:05:33.712396: step: 448/463, loss: 0.012297023087739944 2023-01-24 04:05:34.347644: step: 450/463, loss: 0.9898707270622253 2023-01-24 04:05:34.879002: step: 452/463, loss: 0.0028778291307389736 2023-01-24 04:05:35.568212: step: 454/463, loss: 0.0004048977862112224 2023-01-24 04:05:36.131512: step: 456/463, loss: 0.010129190981388092 2023-01-24 04:05:36.755539: step: 458/463, loss: 0.0011601498117670417 2023-01-24 04:05:37.384138: step: 460/463, loss: 0.05444043502211571 2023-01-24 04:05:37.925573: step: 462/463, loss: 0.8219467401504517 2023-01-24 04:05:38.488814: step: 464/463, loss: 0.021931078284978867 2023-01-24 04:05:39.108481: step: 466/463, loss: 0.0034460548777133226 2023-01-24 04:05:39.769974: step: 468/463, loss: 0.004666443448513746 2023-01-24 04:05:40.448880: step: 470/463, loss: 0.005805822089314461 2023-01-24 04:05:41.067311: step: 472/463, loss: 0.007446239236742258 2023-01-24 04:05:41.652589: step: 474/463, loss: 0.006875197868794203 2023-01-24 04:05:42.208728: step: 476/463, loss: 0.10706232488155365 2023-01-24 04:05:42.799924: step: 478/463, loss: 0.03519926592707634 2023-01-24 04:05:43.353869: step: 480/463, loss: 0.0004966319538652897 2023-01-24 04:05:43.956332: step: 482/463, loss: 0.033550385385751724 2023-01-24 04:05:44.512958: step: 484/463, loss: 0.011099251918494701 2023-01-24 04:05:45.119897: step: 486/463, loss: 0.006425037980079651 2023-01-24 04:05:45.741945: step: 488/463, loss: 0.00516635412350297 2023-01-24 04:05:46.427375: step: 490/463, loss: 0.020119614899158478 2023-01-24 04:05:47.050171: step: 492/463, loss: 0.03843187540769577 2023-01-24 04:05:47.658438: step: 494/463, loss: 0.02273445948958397 2023-01-24 04:05:48.256179: step: 496/463, loss: 0.004197875503450632 2023-01-24 04:05:48.901096: step: 498/463, loss: 0.007444774731993675 2023-01-24 04:05:49.518050: step: 500/463, loss: 0.007977718487381935 2023-01-24 04:05:50.078719: step: 502/463, loss: 1.159813642501831 2023-01-24 04:05:50.665089: step: 504/463, loss: 0.12344076484441757 2023-01-24 04:05:51.286976: step: 506/463, loss: 0.010689151473343372 2023-01-24 04:05:51.959231: step: 508/463, loss: 0.02402939647436142 2023-01-24 04:05:52.596019: step: 510/463, loss: 0.000801203481387347 2023-01-24 04:05:53.166106: step: 512/463, loss: 0.0002643251500558108 2023-01-24 04:05:53.788326: step: 514/463, loss: 0.007294789422303438 2023-01-24 04:05:54.372980: step: 516/463, loss: 0.001394484774209559 2023-01-24 04:05:54.945223: step: 518/463, loss: 0.01273674238473177 2023-01-24 04:05:55.567899: step: 520/463, loss: 0.024614671245217323 2023-01-24 04:05:56.176583: step: 522/463, loss: 0.03714652359485626 2023-01-24 04:05:56.727645: step: 524/463, loss: 0.0001383195776725188 2023-01-24 04:05:57.359208: step: 526/463, loss: 0.022074243053793907 2023-01-24 04:05:57.910540: step: 528/463, loss: 0.03157607093453407 2023-01-24 04:05:58.458655: step: 530/463, loss: 0.012684871442615986 2023-01-24 04:05:59.041687: step: 532/463, loss: 0.029379574581980705 2023-01-24 04:05:59.605378: step: 534/463, loss: 0.0035063489340245724 2023-01-24 04:06:00.266865: step: 536/463, loss: 0.0034502989146858454 2023-01-24 04:06:00.843595: step: 538/463, loss: 0.018242251127958298 2023-01-24 04:06:01.460337: step: 540/463, loss: 0.004189246334135532 2023-01-24 04:06:02.056413: step: 542/463, loss: 0.02560761198401451 2023-01-24 04:06:02.662128: step: 544/463, loss: 0.025203125551342964 2023-01-24 04:06:03.313532: step: 546/463, loss: 0.02217089757323265 2023-01-24 04:06:03.892328: step: 548/463, loss: 0.011151999235153198 2023-01-24 04:06:04.475938: step: 550/463, loss: 0.010360317304730415 2023-01-24 04:06:05.077807: step: 552/463, loss: 0.001353154773823917 2023-01-24 04:06:05.682959: step: 554/463, loss: 0.0024906164035201073 2023-01-24 04:06:06.273912: step: 556/463, loss: 0.021636279299855232 2023-01-24 04:06:06.908942: step: 558/463, loss: 0.09633377939462662 2023-01-24 04:06:07.504676: step: 560/463, loss: 0.0014069689204916358 2023-01-24 04:06:08.181466: step: 562/463, loss: 0.0036347832065075636 2023-01-24 04:06:08.845063: step: 564/463, loss: 0.018188487738370895 2023-01-24 04:06:09.421236: step: 566/463, loss: 0.05478993430733681 2023-01-24 04:06:10.012396: step: 568/463, loss: 0.005565541796386242 2023-01-24 04:06:10.635586: step: 570/463, loss: 0.3302651643753052 2023-01-24 04:06:11.247145: step: 572/463, loss: 0.0002009569579968229 2023-01-24 04:06:11.944110: step: 574/463, loss: 0.5271614789962769 2023-01-24 04:06:12.555981: step: 576/463, loss: 0.009887102991342545 2023-01-24 04:06:13.173710: step: 578/463, loss: 0.09758669137954712 2023-01-24 04:06:13.743565: step: 580/463, loss: 0.015284329652786255 2023-01-24 04:06:14.262828: step: 582/463, loss: 0.0014495070790871978 2023-01-24 04:06:14.908258: step: 584/463, loss: 0.018039032816886902 2023-01-24 04:06:15.505489: step: 586/463, loss: 0.04768936708569527 2023-01-24 04:06:16.123762: step: 588/463, loss: 0.01392241008579731 2023-01-24 04:06:16.675326: step: 590/463, loss: 0.004238658584654331 2023-01-24 04:06:17.264173: step: 592/463, loss: 0.0014836931368336082 2023-01-24 04:06:17.876876: step: 594/463, loss: 0.0010989391012117267 2023-01-24 04:06:18.485362: step: 596/463, loss: 0.0038590440526604652 2023-01-24 04:06:19.145677: step: 598/463, loss: 0.010744186118245125 2023-01-24 04:06:19.738637: step: 600/463, loss: 0.0113844508305192 2023-01-24 04:06:20.387306: step: 602/463, loss: 0.017788993194699287 2023-01-24 04:06:21.056953: step: 604/463, loss: 0.0002349230198888108 2023-01-24 04:06:21.684862: step: 606/463, loss: 0.010457243770360947 2023-01-24 04:06:22.285901: step: 608/463, loss: 0.0058177197352051735 2023-01-24 04:06:22.943249: step: 610/463, loss: 0.08693305402994156 2023-01-24 04:06:23.520241: step: 612/463, loss: 0.0022323522716760635 2023-01-24 04:06:24.124587: step: 614/463, loss: 0.002958253724500537 2023-01-24 04:06:24.736558: step: 616/463, loss: 5.9136058553121984e-05 2023-01-24 04:06:25.274663: step: 618/463, loss: 0.003142335917800665 2023-01-24 04:06:25.903356: step: 620/463, loss: 0.02074419893324375 2023-01-24 04:06:26.466584: step: 622/463, loss: 0.030147427693009377 2023-01-24 04:06:27.090779: step: 624/463, loss: 0.2404526174068451 2023-01-24 04:06:27.741120: step: 626/463, loss: 0.01802827976644039 2023-01-24 04:06:28.392412: step: 628/463, loss: 0.0025942681822925806 2023-01-24 04:06:28.937522: step: 630/463, loss: 0.006616254802793264 2023-01-24 04:06:29.587726: step: 632/463, loss: 0.07031940668821335 2023-01-24 04:06:30.177373: step: 634/463, loss: 0.023251071572303772 2023-01-24 04:06:30.760761: step: 636/463, loss: 0.008291074074804783 2023-01-24 04:06:31.401424: step: 638/463, loss: 0.04736214131116867 2023-01-24 04:06:31.968525: step: 640/463, loss: 0.005038055125623941 2023-01-24 04:06:32.550615: step: 642/463, loss: 0.12091568857431412 2023-01-24 04:06:33.099727: step: 644/463, loss: 0.005785045213997364 2023-01-24 04:06:33.733484: step: 646/463, loss: 0.0006044826004654169 2023-01-24 04:06:34.427762: step: 648/463, loss: 0.2230464369058609 2023-01-24 04:06:35.070720: step: 650/463, loss: 0.00014197845302987844 2023-01-24 04:06:35.666802: step: 652/463, loss: 0.014181282371282578 2023-01-24 04:06:36.391284: step: 654/463, loss: 0.009210537187755108 2023-01-24 04:06:36.995385: step: 656/463, loss: 0.06659893691539764 2023-01-24 04:06:37.710899: step: 658/463, loss: 0.0014637090498581529 2023-01-24 04:06:38.346835: step: 660/463, loss: 0.015872491523623466 2023-01-24 04:06:38.987728: step: 662/463, loss: 0.02204708941280842 2023-01-24 04:06:39.594396: step: 664/463, loss: 0.00498984893783927 2023-01-24 04:06:40.198316: step: 666/463, loss: 0.0013688835315406322 2023-01-24 04:06:40.824541: step: 668/463, loss: 0.0034113817382603884 2023-01-24 04:06:41.466309: step: 670/463, loss: 0.02099032513797283 2023-01-24 04:06:42.030124: step: 672/463, loss: 0.016249027103185654 2023-01-24 04:06:42.648985: step: 674/463, loss: 0.014061027206480503 2023-01-24 04:06:43.293325: step: 676/463, loss: 0.00711113540455699 2023-01-24 04:06:43.918852: step: 678/463, loss: 0.0922115370631218 2023-01-24 04:06:44.497103: step: 680/463, loss: 0.015617083758115768 2023-01-24 04:06:45.102846: step: 682/463, loss: 0.01940792053937912 2023-01-24 04:06:45.720190: step: 684/463, loss: 0.002145026344805956 2023-01-24 04:06:46.365111: step: 686/463, loss: 0.025647325441241264 2023-01-24 04:06:46.952507: step: 688/463, loss: 0.0068283346481621265 2023-01-24 04:06:47.517851: step: 690/463, loss: 0.04125862568616867 2023-01-24 04:06:48.176742: step: 692/463, loss: 0.07212526351213455 2023-01-24 04:06:48.706752: step: 694/463, loss: 0.04558970034122467 2023-01-24 04:06:49.330106: step: 696/463, loss: 0.03964276611804962 2023-01-24 04:06:49.959908: step: 698/463, loss: 0.05120779573917389 2023-01-24 04:06:50.512336: step: 700/463, loss: 0.003935744054615498 2023-01-24 04:06:51.107732: step: 702/463, loss: 0.0038848421536386013 2023-01-24 04:06:51.786037: step: 704/463, loss: 0.00033737451303750277 2023-01-24 04:06:52.404231: step: 706/463, loss: 0.007344974670559168 2023-01-24 04:06:53.107907: step: 708/463, loss: 0.04298728331923485 2023-01-24 04:06:53.709471: step: 710/463, loss: 0.002970857312902808 2023-01-24 04:06:54.323784: step: 712/463, loss: 0.003636470763012767 2023-01-24 04:06:54.904165: step: 714/463, loss: 0.001263256766833365 2023-01-24 04:06:55.497570: step: 716/463, loss: 0.0005735816084779799 2023-01-24 04:06:56.155569: step: 718/463, loss: 0.002978523960337043 2023-01-24 04:06:56.722325: step: 720/463, loss: 0.003500348189845681 2023-01-24 04:06:57.343580: step: 722/463, loss: 0.02365230955183506 2023-01-24 04:06:57.997414: step: 724/463, loss: 8.246102515840903e-05 2023-01-24 04:06:58.696201: step: 726/463, loss: 0.005365605466067791 2023-01-24 04:06:59.312076: step: 728/463, loss: 0.0237798560410738 2023-01-24 04:06:59.946533: step: 730/463, loss: 0.008263921365141869 2023-01-24 04:07:00.607798: step: 732/463, loss: 0.005609151907265186 2023-01-24 04:07:01.184884: step: 734/463, loss: 0.01481957919895649 2023-01-24 04:07:01.829244: step: 736/463, loss: 0.0956425666809082 2023-01-24 04:07:02.446738: step: 738/463, loss: 0.013071027584373951 2023-01-24 04:07:03.032042: step: 740/463, loss: 0.019585823640227318 2023-01-24 04:07:03.675003: step: 742/463, loss: 0.0014239312149584293 2023-01-24 04:07:04.272325: step: 744/463, loss: 0.006012816447764635 2023-01-24 04:07:04.880219: step: 746/463, loss: 0.02948002889752388 2023-01-24 04:07:05.445016: step: 748/463, loss: 0.006546752993017435 2023-01-24 04:07:06.067357: step: 750/463, loss: 0.012195901945233345 2023-01-24 04:07:06.687140: step: 752/463, loss: 0.015468014404177666 2023-01-24 04:07:07.314570: step: 754/463, loss: 0.006080780643969774 2023-01-24 04:07:07.958102: step: 756/463, loss: 0.0003518761077430099 2023-01-24 04:07:08.588060: step: 758/463, loss: 0.009662225842475891 2023-01-24 04:07:09.125670: step: 760/463, loss: 2.3864381546445657e-06 2023-01-24 04:07:09.733206: step: 762/463, loss: 0.060901302844285965 2023-01-24 04:07:10.406185: step: 764/463, loss: 0.009654792957007885 2023-01-24 04:07:11.008366: step: 766/463, loss: 0.037627194076776505 2023-01-24 04:07:11.597079: step: 768/463, loss: 0.026824548840522766 2023-01-24 04:07:12.148488: step: 770/463, loss: 0.03738008067011833 2023-01-24 04:07:12.707121: step: 772/463, loss: 0.0005081394920125604 2023-01-24 04:07:13.351489: step: 774/463, loss: 0.04401644691824913 2023-01-24 04:07:14.034603: step: 776/463, loss: 0.02724134363234043 2023-01-24 04:07:14.666578: step: 778/463, loss: 0.010808688588440418 2023-01-24 04:07:15.279452: step: 780/463, loss: 0.0009761190740391612 2023-01-24 04:07:15.856502: step: 782/463, loss: 0.00019191816681995988 2023-01-24 04:07:16.555779: step: 784/463, loss: 0.01264650747179985 2023-01-24 04:07:17.135856: step: 786/463, loss: 0.009386629797518253 2023-01-24 04:07:17.766994: step: 788/463, loss: 0.03513404726982117 2023-01-24 04:07:18.438656: step: 790/463, loss: 0.012704877182841301 2023-01-24 04:07:19.179217: step: 792/463, loss: 0.004062521271407604 2023-01-24 04:07:19.822838: step: 794/463, loss: 0.010036393068730831 2023-01-24 04:07:20.443177: step: 796/463, loss: 0.01922842487692833 2023-01-24 04:07:21.044863: step: 798/463, loss: 0.000656516058370471 2023-01-24 04:07:21.707290: step: 800/463, loss: 0.04428530111908913 2023-01-24 04:07:22.325872: step: 802/463, loss: 0.00021473036031238735 2023-01-24 04:07:22.906313: step: 804/463, loss: 0.00013105737161822617 2023-01-24 04:07:23.532923: step: 806/463, loss: 0.040083661675453186 2023-01-24 04:07:24.125068: step: 808/463, loss: 0.058017369359731674 2023-01-24 04:07:24.720864: step: 810/463, loss: 0.010729657486081123 2023-01-24 04:07:25.373943: step: 812/463, loss: 0.02432204596698284 2023-01-24 04:07:25.971114: step: 814/463, loss: 0.002297374652698636 2023-01-24 04:07:26.582298: step: 816/463, loss: 0.012257283553481102 2023-01-24 04:07:27.216938: step: 818/463, loss: 0.003023503115400672 2023-01-24 04:07:27.856077: step: 820/463, loss: 0.023690316826105118 2023-01-24 04:07:28.531497: step: 822/463, loss: 0.00591715844348073 2023-01-24 04:07:29.123742: step: 824/463, loss: 0.041522618383169174 2023-01-24 04:07:29.736805: step: 826/463, loss: 1.5101227760314941 2023-01-24 04:07:30.342043: step: 828/463, loss: 0.005976432468742132 2023-01-24 04:07:31.014051: step: 830/463, loss: 0.025869490578770638 2023-01-24 04:07:31.603353: step: 832/463, loss: 0.023571673780679703 2023-01-24 04:07:32.268751: step: 834/463, loss: 0.014984600245952606 2023-01-24 04:07:32.870207: step: 836/463, loss: 0.005688313394784927 2023-01-24 04:07:33.471671: step: 838/463, loss: 0.0009162913192994893 2023-01-24 04:07:34.059333: step: 840/463, loss: 0.0007104834076017141 2023-01-24 04:07:34.654071: step: 842/463, loss: 0.027130533009767532 2023-01-24 04:07:35.339597: step: 844/463, loss: 0.00908284168690443 2023-01-24 04:07:35.950835: step: 846/463, loss: 0.006472413428127766 2023-01-24 04:07:36.583397: step: 848/463, loss: 0.0055686431005597115 2023-01-24 04:07:37.177431: step: 850/463, loss: 0.0002357563644181937 2023-01-24 04:07:37.818867: step: 852/463, loss: 0.009973691776394844 2023-01-24 04:07:38.482385: step: 854/463, loss: 0.06778901815414429 2023-01-24 04:07:39.131158: step: 856/463, loss: 0.01029406487941742 2023-01-24 04:07:39.706746: step: 858/463, loss: 0.002801164984703064 2023-01-24 04:07:40.379309: step: 860/463, loss: 0.8698673844337463 2023-01-24 04:07:40.952304: step: 862/463, loss: 0.05457659065723419 2023-01-24 04:07:41.612974: step: 864/463, loss: 0.0004166988655924797 2023-01-24 04:07:42.194433: step: 866/463, loss: 0.003854450536891818 2023-01-24 04:07:42.884737: step: 868/463, loss: 1.1777788400650024 2023-01-24 04:07:43.469537: step: 870/463, loss: 0.005094530060887337 2023-01-24 04:07:44.065258: step: 872/463, loss: 0.0005814541364088655 2023-01-24 04:07:44.688189: step: 874/463, loss: 0.01394410990178585 2023-01-24 04:07:45.321418: step: 876/463, loss: 0.0012392376083880663 2023-01-24 04:07:45.910985: step: 878/463, loss: 13.929421424865723 2023-01-24 04:07:46.488731: step: 880/463, loss: 0.014130426570773125 2023-01-24 04:07:47.057506: step: 882/463, loss: 0.011859637685120106 2023-01-24 04:07:47.662044: step: 884/463, loss: 0.0065087019465863705 2023-01-24 04:07:48.268669: step: 886/463, loss: 0.02387521229684353 2023-01-24 04:07:48.906907: step: 888/463, loss: 0.0002580659056548029 2023-01-24 04:07:49.513904: step: 890/463, loss: 0.003160901367664337 2023-01-24 04:07:50.071595: step: 892/463, loss: 0.0016409298405051231 2023-01-24 04:07:50.620179: step: 894/463, loss: 0.012187356129288673 2023-01-24 04:07:51.268307: step: 896/463, loss: 0.004704783670604229 2023-01-24 04:07:51.862923: step: 898/463, loss: 0.031524937599897385 2023-01-24 04:07:52.495887: step: 900/463, loss: 0.05365930497646332 2023-01-24 04:07:53.141484: step: 902/463, loss: 0.001847764360718429 2023-01-24 04:07:53.726702: step: 904/463, loss: 0.015729987993836403 2023-01-24 04:07:54.369829: step: 906/463, loss: 0.08213606476783752 2023-01-24 04:07:54.911778: step: 908/463, loss: 0.3502179980278015 2023-01-24 04:07:55.461091: step: 910/463, loss: 0.02215544693171978 2023-01-24 04:07:56.077080: step: 912/463, loss: 0.05643599107861519 2023-01-24 04:07:56.733383: step: 914/463, loss: 0.04383004456758499 2023-01-24 04:07:57.304851: step: 916/463, loss: 0.01738683693110943 2023-01-24 04:07:57.955680: step: 918/463, loss: 0.037004098296165466 2023-01-24 04:07:58.609508: step: 920/463, loss: 0.03255252540111542 2023-01-24 04:07:59.250321: step: 922/463, loss: 0.05243566259741783 2023-01-24 04:07:59.873389: step: 924/463, loss: 0.00585405807942152 2023-01-24 04:08:00.478404: step: 926/463, loss: 0.003993358928710222 ================================================== Loss: 0.081 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3559108922101449, 'r': 0.3241693135879878, 'f1': 0.3392993609947757}, 'combined': 0.25001005546983474, 'epoch': 33} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3619627786154952, 'r': 0.3915718662044465, 'f1': 0.3761855986140731}, 'combined': 0.29158883720325285, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3285123751209744, 'r': 0.3266422856990334, 'f1': 0.3275746613956053}, 'combined': 0.2413708031336039, 'epoch': 33} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34460953649499937, 'r': 0.3943371993899579, 'f1': 0.36780014825227103}, 'combined': 0.2850891101285546, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33509816969174167, 'r': 0.33319058997812645, 'f1': 0.3341416573139346}, 'combined': 0.2462096422313202, 'epoch': 33} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3456790471198534, 'r': 0.3799927760618977, 'f1': 0.3620246412918955}, 'combined': 0.28061240138414867, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30514705882352944, 'r': 0.29642857142857143, 'f1': 0.3007246376811594}, 'combined': 0.20048309178743962, 'epoch': 33} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2833333333333333, 'r': 0.3695652173913043, 'f1': 0.32075471698113206}, 'combined': 0.16037735849056603, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.1724137931034483, 'f1': 0.26315789473684215}, 'combined': 0.1754385964912281, 'epoch': 33} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34001213859732793, 'r': 0.32968918941790243, 'f1': 0.33477110370565427}, 'combined': 0.24667344483574524, 'epoch': 29} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34157155234650616, 'r': 0.36386160769264764, 'f1': 0.3523644229368942}, 'combined': 0.2731245766305113, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.20689655172413793, 'f1': 0.31578947368421056}, 'combined': 0.2105263157894737, 'epoch': 29} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:10:35.927316: step: 2/463, loss: 0.009739723987877369 2023-01-24 04:10:36.567081: step: 4/463, loss: 0.007610445376485586 2023-01-24 04:10:37.138566: step: 6/463, loss: 0.0008657005964778364 2023-01-24 04:10:37.802967: step: 8/463, loss: 0.02146882191300392 2023-01-24 04:10:38.479985: step: 10/463, loss: 0.0009552372503094375 2023-01-24 04:10:39.031107: step: 12/463, loss: 0.005236999597400427 2023-01-24 04:10:39.642533: step: 14/463, loss: 0.007812581956386566 2023-01-24 04:10:40.280144: step: 16/463, loss: 0.06353195011615753 2023-01-24 04:10:40.931082: step: 18/463, loss: 0.01044398732483387 2023-01-24 04:10:41.543879: step: 20/463, loss: 0.026223251596093178 2023-01-24 04:10:42.172945: step: 22/463, loss: 1.2370123863220215 2023-01-24 04:10:42.800334: step: 24/463, loss: 0.3983534276485443 2023-01-24 04:10:43.444260: step: 26/463, loss: 0.005513183772563934 2023-01-24 04:10:44.011814: step: 28/463, loss: 0.04135580733418465 2023-01-24 04:10:44.545182: step: 30/463, loss: 0.010369330644607544 2023-01-24 04:10:45.129923: step: 32/463, loss: 0.057855088263750076 2023-01-24 04:10:45.696449: step: 34/463, loss: 0.010785293765366077 2023-01-24 04:10:46.324537: step: 36/463, loss: 0.01930118165910244 2023-01-24 04:10:47.004085: step: 38/463, loss: 0.0038408352993428707 2023-01-24 04:10:47.614020: step: 40/463, loss: 0.008800194598734379 2023-01-24 04:10:48.199094: step: 42/463, loss: 0.0015701227821409702 2023-01-24 04:10:48.939771: step: 44/463, loss: 0.05451464280486107 2023-01-24 04:10:49.495519: step: 46/463, loss: 0.0008218620205298066 2023-01-24 04:10:50.011148: step: 48/463, loss: 0.002668748376891017 2023-01-24 04:10:50.581807: step: 50/463, loss: 0.0015461058355867863 2023-01-24 04:10:51.239203: step: 52/463, loss: 0.0013470182893797755 2023-01-24 04:10:51.845370: step: 54/463, loss: 0.00822441466152668 2023-01-24 04:10:52.493665: step: 56/463, loss: 0.0028365219477564096 2023-01-24 04:10:53.163054: step: 58/463, loss: 0.00044708087807521224 2023-01-24 04:10:53.789178: step: 60/463, loss: 0.010490639135241508 2023-01-24 04:10:54.373372: step: 62/463, loss: 0.0020631244406104088 2023-01-24 04:10:55.023624: step: 64/463, loss: 0.022615423426032066 2023-01-24 04:10:55.600306: step: 66/463, loss: 0.015612557530403137 2023-01-24 04:10:56.190172: step: 68/463, loss: 0.0027821483090519905 2023-01-24 04:10:56.750564: step: 70/463, loss: 0.013376089744269848 2023-01-24 04:10:57.344591: step: 72/463, loss: 0.021571798250079155 2023-01-24 04:10:57.897695: step: 74/463, loss: 0.0012288036523386836 2023-01-24 04:10:58.467049: step: 76/463, loss: 0.006507739424705505 2023-01-24 04:10:59.140708: step: 78/463, loss: 0.008792250417172909 2023-01-24 04:10:59.681121: step: 80/463, loss: 0.13713964819908142 2023-01-24 04:11:00.308347: step: 82/463, loss: 0.010746333748102188 2023-01-24 04:11:00.936306: step: 84/463, loss: 0.025266578420996666 2023-01-24 04:11:01.443395: step: 86/463, loss: 0.005215545650571585 2023-01-24 04:11:02.110077: step: 88/463, loss: 0.2075408399105072 2023-01-24 04:11:02.709910: step: 90/463, loss: 0.0064244866371154785 2023-01-24 04:11:03.432758: step: 92/463, loss: 0.035964082926511765 2023-01-24 04:11:04.044766: step: 94/463, loss: 0.002193576656281948 2023-01-24 04:11:04.616182: step: 96/463, loss: 0.03399713337421417 2023-01-24 04:11:05.249935: step: 98/463, loss: 0.06357075273990631 2023-01-24 04:11:05.794798: step: 100/463, loss: 0.00032459720387123525 2023-01-24 04:11:06.426398: step: 102/463, loss: 0.017216861248016357 2023-01-24 04:11:07.073139: step: 104/463, loss: 0.0002555028477218002 2023-01-24 04:11:07.705384: step: 106/463, loss: 0.0009637364419177175 2023-01-24 04:11:08.350170: step: 108/463, loss: 0.030441882088780403 2023-01-24 04:11:08.952037: step: 110/463, loss: 9.589681576471776e-05 2023-01-24 04:11:09.760773: step: 112/463, loss: 0.011581837199628353 2023-01-24 04:11:10.352199: step: 114/463, loss: 0.004575759172439575 2023-01-24 04:11:10.981717: step: 116/463, loss: 0.0029651059303432703 2023-01-24 04:11:11.556523: step: 118/463, loss: 0.003387508448213339 2023-01-24 04:11:12.246720: step: 120/463, loss: 0.013092845678329468 2023-01-24 04:11:12.898957: step: 122/463, loss: 0.03617576137185097 2023-01-24 04:11:13.531731: step: 124/463, loss: 0.0005085245938971639 2023-01-24 04:11:14.208965: step: 126/463, loss: 0.03192896023392677 2023-01-24 04:11:14.820165: step: 128/463, loss: 0.058753203600645065 2023-01-24 04:11:15.416333: step: 130/463, loss: 0.00109693908598274 2023-01-24 04:11:15.982600: step: 132/463, loss: 0.0025991476140916348 2023-01-24 04:11:16.620054: step: 134/463, loss: 0.006514101754873991 2023-01-24 04:11:17.308381: step: 136/463, loss: 0.014389179646968842 2023-01-24 04:11:17.940562: step: 138/463, loss: 0.029964737594127655 2023-01-24 04:11:18.567231: step: 140/463, loss: 0.010847127996385098 2023-01-24 04:11:19.162476: step: 142/463, loss: 4.811858889297582e-05 2023-01-24 04:11:19.752049: step: 144/463, loss: 0.0007950214785523713 2023-01-24 04:11:20.406802: step: 146/463, loss: 0.0016385347116738558 2023-01-24 04:11:20.992196: step: 148/463, loss: 0.00019960342615377158 2023-01-24 04:11:21.626799: step: 150/463, loss: 0.013654787093400955 2023-01-24 04:11:22.235528: step: 152/463, loss: 0.25668656826019287 2023-01-24 04:11:22.882032: step: 154/463, loss: 0.008343420922756195 2023-01-24 04:11:23.555000: step: 156/463, loss: 0.006150003056973219 2023-01-24 04:11:24.149122: step: 158/463, loss: 0.01569363847374916 2023-01-24 04:11:24.778476: step: 160/463, loss: 0.0003534180286806077 2023-01-24 04:11:25.364235: step: 162/463, loss: 0.01329941488802433 2023-01-24 04:11:25.992150: step: 164/463, loss: 0.04652642458677292 2023-01-24 04:11:26.556651: step: 166/463, loss: 0.0017869491130113602 2023-01-24 04:11:27.121782: step: 168/463, loss: 0.0011533185606822371 2023-01-24 04:11:27.710694: step: 170/463, loss: 0.001133265090174973 2023-01-24 04:11:28.314206: step: 172/463, loss: 0.0019344929605722427 2023-01-24 04:11:28.881254: step: 174/463, loss: 0.0038246966432780027 2023-01-24 04:11:29.536806: step: 176/463, loss: 0.00142365088686347 2023-01-24 04:11:30.152375: step: 178/463, loss: 0.006401105783879757 2023-01-24 04:11:30.755933: step: 180/463, loss: 0.04276920482516289 2023-01-24 04:11:31.373126: step: 182/463, loss: 0.008806237019598484 2023-01-24 04:11:32.022083: step: 184/463, loss: 0.0005914178327657282 2023-01-24 04:11:32.613431: step: 186/463, loss: 0.0068437280133366585 2023-01-24 04:11:33.237089: step: 188/463, loss: 0.0007700577261857688 2023-01-24 04:11:33.869479: step: 190/463, loss: 0.0026511407922953367 2023-01-24 04:11:34.492489: step: 192/463, loss: 0.007346590049564838 2023-01-24 04:11:35.097619: step: 194/463, loss: 0.01857823133468628 2023-01-24 04:11:35.674272: step: 196/463, loss: 0.004577023442834616 2023-01-24 04:11:36.319559: step: 198/463, loss: 0.0009512401884421706 2023-01-24 04:11:37.039919: step: 200/463, loss: 0.01460027415305376 2023-01-24 04:11:37.621115: step: 202/463, loss: 0.014259354211390018 2023-01-24 04:11:38.179759: step: 204/463, loss: 0.006603836547583342 2023-01-24 04:11:38.783249: step: 206/463, loss: 0.0009632774745114148 2023-01-24 04:11:39.388952: step: 208/463, loss: 0.20120222866535187 2023-01-24 04:11:39.998453: step: 210/463, loss: 0.053990691900253296 2023-01-24 04:11:40.636159: step: 212/463, loss: 0.0005672593251802027 2023-01-24 04:11:41.284968: step: 214/463, loss: 0.06538698822259903 2023-01-24 04:11:41.950921: step: 216/463, loss: 0.006212085485458374 2023-01-24 04:11:42.548285: step: 218/463, loss: 0.013693749904632568 2023-01-24 04:11:43.166804: step: 220/463, loss: 0.5488004088401794 2023-01-24 04:11:43.759154: step: 222/463, loss: 0.0010441800113767385 2023-01-24 04:11:44.352762: step: 224/463, loss: 0.0062617091462016106 2023-01-24 04:11:44.958083: step: 226/463, loss: 0.003725644899532199 2023-01-24 04:11:45.556503: step: 228/463, loss: 0.0015154535649344325 2023-01-24 04:11:46.119873: step: 230/463, loss: 0.006395978387445211 2023-01-24 04:11:46.748470: step: 232/463, loss: 0.0021662330254912376 2023-01-24 04:11:47.460958: step: 234/463, loss: 0.002501111011952162 2023-01-24 04:11:48.090758: step: 236/463, loss: 0.013906202279031277 2023-01-24 04:11:48.668260: step: 238/463, loss: 0.05204048380255699 2023-01-24 04:11:49.288424: step: 240/463, loss: 4.068772614118643e-05 2023-01-24 04:11:49.885755: step: 242/463, loss: 0.005519125144928694 2023-01-24 04:11:50.528043: step: 244/463, loss: 0.316930890083313 2023-01-24 04:11:51.089723: step: 246/463, loss: 0.004956568591296673 2023-01-24 04:11:51.708308: step: 248/463, loss: 0.0038417638279497623 2023-01-24 04:11:52.334391: step: 250/463, loss: 0.006842820439487696 2023-01-24 04:11:53.033115: step: 252/463, loss: 0.07526186853647232 2023-01-24 04:11:53.646204: step: 254/463, loss: 0.3961915969848633 2023-01-24 04:11:54.304339: step: 256/463, loss: 0.12554974853992462 2023-01-24 04:11:54.905997: step: 258/463, loss: 0.0163657795637846 2023-01-24 04:11:55.576458: step: 260/463, loss: 0.02212616056203842 2023-01-24 04:11:56.235138: step: 262/463, loss: 0.0003563922073226422 2023-01-24 04:11:56.842159: step: 264/463, loss: 0.09829600155353546 2023-01-24 04:11:57.464003: step: 266/463, loss: 0.08193520456552505 2023-01-24 04:11:58.068077: step: 268/463, loss: 0.018553458154201508 2023-01-24 04:11:58.683220: step: 270/463, loss: 0.006240359507501125 2023-01-24 04:11:59.231235: step: 272/463, loss: 0.006647813133895397 2023-01-24 04:11:59.749991: step: 274/463, loss: 0.0011024614796042442 2023-01-24 04:12:00.353326: step: 276/463, loss: 0.003351131919771433 2023-01-24 04:12:00.887987: step: 278/463, loss: 0.01782275177538395 2023-01-24 04:12:01.465313: step: 280/463, loss: 0.008393857628107071 2023-01-24 04:12:02.093515: step: 282/463, loss: 0.00037038218579255044 2023-01-24 04:12:02.764646: step: 284/463, loss: 0.008358017541468143 2023-01-24 04:12:03.397710: step: 286/463, loss: 0.08788734674453735 2023-01-24 04:12:03.999845: step: 288/463, loss: 0.006910989992320538 2023-01-24 04:12:04.643547: step: 290/463, loss: 0.022601911798119545 2023-01-24 04:12:05.244933: step: 292/463, loss: 0.003383188508450985 2023-01-24 04:12:05.844245: step: 294/463, loss: 0.005388925317674875 2023-01-24 04:12:06.423495: step: 296/463, loss: 0.010944236069917679 2023-01-24 04:12:07.086713: step: 298/463, loss: 0.0010097564663738012 2023-01-24 04:12:07.716744: step: 300/463, loss: 0.1455574780702591 2023-01-24 04:12:08.317221: step: 302/463, loss: 0.011903516948223114 2023-01-24 04:12:08.924737: step: 304/463, loss: 0.006360805593430996 2023-01-24 04:12:09.467788: step: 306/463, loss: 0.001116794184781611 2023-01-24 04:12:10.148305: step: 308/463, loss: 0.041386678814888 2023-01-24 04:12:10.773926: step: 310/463, loss: 0.002185546327382326 2023-01-24 04:12:11.437234: step: 312/463, loss: 0.032803475856781006 2023-01-24 04:12:12.007138: step: 314/463, loss: 0.00047534273471683264 2023-01-24 04:12:12.603133: step: 316/463, loss: 0.0035968010779470205 2023-01-24 04:12:13.266413: step: 318/463, loss: 0.0023719794116914272 2023-01-24 04:12:13.932397: step: 320/463, loss: 0.017731424421072006 2023-01-24 04:12:14.501149: step: 322/463, loss: 0.001555209862999618 2023-01-24 04:12:15.123865: step: 324/463, loss: 0.19333000481128693 2023-01-24 04:12:15.754030: step: 326/463, loss: 0.0023012254387140274 2023-01-24 04:12:16.383343: step: 328/463, loss: 0.8035910129547119 2023-01-24 04:12:16.970507: step: 330/463, loss: 0.020183803513646126 2023-01-24 04:12:17.619776: step: 332/463, loss: 0.0036327107809484005 2023-01-24 04:12:18.164489: step: 334/463, loss: 0.00017435017798561603 2023-01-24 04:12:18.833708: step: 336/463, loss: 0.020087696611881256 2023-01-24 04:12:19.419664: step: 338/463, loss: 0.004522798117250204 2023-01-24 04:12:19.981197: step: 340/463, loss: 0.0332925021648407 2023-01-24 04:12:20.580215: step: 342/463, loss: 0.0007199611281976104 2023-01-24 04:12:21.189869: step: 344/463, loss: 0.01485811360180378 2023-01-24 04:12:21.828506: step: 346/463, loss: 0.008959410712122917 2023-01-24 04:12:22.452033: step: 348/463, loss: 0.0005478111561387777 2023-01-24 04:12:23.047417: step: 350/463, loss: 0.0010155013296753168 2023-01-24 04:12:23.688485: step: 352/463, loss: 0.1405760496854782 2023-01-24 04:12:24.319470: step: 354/463, loss: 0.0029731285758316517 2023-01-24 04:12:24.922567: step: 356/463, loss: 0.0006213237647898495 2023-01-24 04:12:25.561502: step: 358/463, loss: 0.032526131719350815 2023-01-24 04:12:26.212207: step: 360/463, loss: 0.002632910618558526 2023-01-24 04:12:26.844814: step: 362/463, loss: 0.7473871111869812 2023-01-24 04:12:27.431391: step: 364/463, loss: 0.0013244155561551452 2023-01-24 04:12:28.083187: step: 366/463, loss: 0.011469478718936443 2023-01-24 04:12:28.786886: step: 368/463, loss: 0.014940064400434494 2023-01-24 04:12:29.309720: step: 370/463, loss: 0.012037776410579681 2023-01-24 04:12:29.934003: step: 372/463, loss: 0.006952646188437939 2023-01-24 04:12:30.524989: step: 374/463, loss: 0.00018836453091353178 2023-01-24 04:12:31.201016: step: 376/463, loss: 0.013855385594069958 2023-01-24 04:12:31.771350: step: 378/463, loss: 0.0077673266641795635 2023-01-24 04:12:32.434122: step: 380/463, loss: 0.004257932770997286 2023-01-24 04:12:33.076992: step: 382/463, loss: 0.0026532199699431658 2023-01-24 04:12:33.717635: step: 384/463, loss: 0.021064486354589462 2023-01-24 04:12:34.333312: step: 386/463, loss: 0.022865070030093193 2023-01-24 04:12:34.920766: step: 388/463, loss: 0.00334391207434237 2023-01-24 04:12:35.574269: step: 390/463, loss: 0.002264923881739378 2023-01-24 04:12:36.167873: step: 392/463, loss: 0.036474183201789856 2023-01-24 04:12:36.780511: step: 394/463, loss: 0.0005822824314236641 2023-01-24 04:12:37.345625: step: 396/463, loss: 0.008036978542804718 2023-01-24 04:12:37.922449: step: 398/463, loss: 0.0007134778425097466 2023-01-24 04:12:38.526634: step: 400/463, loss: 0.0007486168760806322 2023-01-24 04:12:39.121029: step: 402/463, loss: 0.15983781218528748 2023-01-24 04:12:39.703322: step: 404/463, loss: 0.0007222112035378814 2023-01-24 04:12:40.297541: step: 406/463, loss: 0.0030092766974121332 2023-01-24 04:12:40.972780: step: 408/463, loss: 0.0007156338542699814 2023-01-24 04:12:41.624328: step: 410/463, loss: 0.001254756934940815 2023-01-24 04:12:42.215710: step: 412/463, loss: 0.004104102496057749 2023-01-24 04:12:42.827821: step: 414/463, loss: 0.0383087657392025 2023-01-24 04:12:43.415418: step: 416/463, loss: 0.00833128485828638 2023-01-24 04:12:44.043508: step: 418/463, loss: 0.0017676475690677762 2023-01-24 04:12:44.696402: step: 420/463, loss: 0.0016550406580790877 2023-01-24 04:12:45.318974: step: 422/463, loss: 0.0021914620883762836 2023-01-24 04:12:45.989519: step: 424/463, loss: 0.0013504276284947991 2023-01-24 04:12:46.739435: step: 426/463, loss: 0.0062805647030472755 2023-01-24 04:12:47.311825: step: 428/463, loss: 0.03294497728347778 2023-01-24 04:12:47.980061: step: 430/463, loss: 0.03719580918550491 2023-01-24 04:12:48.642280: step: 432/463, loss: 0.07113736867904663 2023-01-24 04:12:49.201758: step: 434/463, loss: 0.006984752602875233 2023-01-24 04:12:49.828872: step: 436/463, loss: 0.009843391366302967 2023-01-24 04:12:50.448152: step: 438/463, loss: 0.00597524456679821 2023-01-24 04:12:51.071933: step: 440/463, loss: 0.07690596580505371 2023-01-24 04:12:51.757298: step: 442/463, loss: 0.005051633808761835 2023-01-24 04:12:52.333389: step: 444/463, loss: 0.001145934802480042 2023-01-24 04:12:52.962633: step: 446/463, loss: 0.018852099776268005 2023-01-24 04:12:53.528751: step: 448/463, loss: 0.00020774270524270833 2023-01-24 04:12:54.158453: step: 450/463, loss: 0.0006223213276825845 2023-01-24 04:12:54.707331: step: 452/463, loss: 0.01759417913854122 2023-01-24 04:12:55.336329: step: 454/463, loss: 0.013798830099403858 2023-01-24 04:12:55.991454: step: 456/463, loss: 0.046021368354558945 2023-01-24 04:12:56.664023: step: 458/463, loss: 0.03674422949552536 2023-01-24 04:12:57.281229: step: 460/463, loss: 0.009432390332221985 2023-01-24 04:12:57.836447: step: 462/463, loss: 0.012854049913585186 2023-01-24 04:12:58.475573: step: 464/463, loss: 0.0790523886680603 2023-01-24 04:12:59.101366: step: 466/463, loss: 0.023337973281741142 2023-01-24 04:12:59.717344: step: 468/463, loss: 0.021093090996146202 2023-01-24 04:13:00.316183: step: 470/463, loss: 0.026403164491057396 2023-01-24 04:13:00.884227: step: 472/463, loss: 0.0011704692151397467 2023-01-24 04:13:01.555742: step: 474/463, loss: 0.06086888164281845 2023-01-24 04:13:02.137187: step: 476/463, loss: 0.0010551728773862123 2023-01-24 04:13:02.757618: step: 478/463, loss: 0.0008148958440870047 2023-01-24 04:13:03.384756: step: 480/463, loss: 0.009969083592295647 2023-01-24 04:13:03.956965: step: 482/463, loss: 0.01283843070268631 2023-01-24 04:13:04.656384: step: 484/463, loss: 0.0034997870679944754 2023-01-24 04:13:05.250913: step: 486/463, loss: 0.00028669112361967564 2023-01-24 04:13:05.863814: step: 488/463, loss: 0.09110712260007858 2023-01-24 04:13:06.499105: step: 490/463, loss: 0.009743470698595047 2023-01-24 04:13:07.196108: step: 492/463, loss: 0.003664525458589196 2023-01-24 04:13:07.786945: step: 494/463, loss: 0.004117617383599281 2023-01-24 04:13:08.439341: step: 496/463, loss: 0.019161734730005264 2023-01-24 04:13:09.061564: step: 498/463, loss: 0.0022390701342374086 2023-01-24 04:13:09.656179: step: 500/463, loss: 0.03654737398028374 2023-01-24 04:13:10.299685: step: 502/463, loss: 0.04710760712623596 2023-01-24 04:13:10.868454: step: 504/463, loss: 0.00016607699217274785 2023-01-24 04:13:11.482258: step: 506/463, loss: 0.26908519864082336 2023-01-24 04:13:12.091572: step: 508/463, loss: 0.06100417673587799 2023-01-24 04:13:12.679732: step: 510/463, loss: 0.06180068850517273 2023-01-24 04:13:13.292647: step: 512/463, loss: 0.01284338440746069 2023-01-24 04:13:13.850310: step: 514/463, loss: 0.004898529965430498 2023-01-24 04:13:14.356982: step: 516/463, loss: 0.004370218142867088 2023-01-24 04:13:15.009866: step: 518/463, loss: 0.011034929193556309 2023-01-24 04:13:15.637888: step: 520/463, loss: 0.007362432312220335 2023-01-24 04:13:16.250761: step: 522/463, loss: 0.025243492797017097 2023-01-24 04:13:16.819744: step: 524/463, loss: 0.029760627076029778 2023-01-24 04:13:17.377667: step: 526/463, loss: 0.013672089204192162 2023-01-24 04:13:17.944249: step: 528/463, loss: 0.026861751452088356 2023-01-24 04:13:18.556402: step: 530/463, loss: 0.0014564846642315388 2023-01-24 04:13:19.210447: step: 532/463, loss: 0.0023695307318121195 2023-01-24 04:13:19.798009: step: 534/463, loss: 0.010567440651357174 2023-01-24 04:13:20.401288: step: 536/463, loss: 0.03477469086647034 2023-01-24 04:13:20.984827: step: 538/463, loss: 0.014735058881342411 2023-01-24 04:13:21.678511: step: 540/463, loss: 0.0010662655113264918 2023-01-24 04:13:22.308681: step: 542/463, loss: 0.023383846506476402 2023-01-24 04:13:22.980388: step: 544/463, loss: 0.02319318614900112 2023-01-24 04:13:23.595602: step: 546/463, loss: 0.07221105694770813 2023-01-24 04:13:24.193513: step: 548/463, loss: 0.08004879206418991 2023-01-24 04:13:24.818959: step: 550/463, loss: 0.03057803213596344 2023-01-24 04:13:25.498252: step: 552/463, loss: 0.005712749902158976 2023-01-24 04:13:26.116375: step: 554/463, loss: 0.008013799786567688 2023-01-24 04:13:26.765457: step: 556/463, loss: 0.013723742216825485 2023-01-24 04:13:27.364574: step: 558/463, loss: 0.004704783204942942 2023-01-24 04:13:27.936778: step: 560/463, loss: 0.012912776321172714 2023-01-24 04:13:28.464065: step: 562/463, loss: 0.005185188725590706 2023-01-24 04:13:29.059082: step: 564/463, loss: 0.0004636362427845597 2023-01-24 04:13:29.652532: step: 566/463, loss: 0.028054652735590935 2023-01-24 04:13:30.202278: step: 568/463, loss: 0.030527614057064056 2023-01-24 04:13:30.847070: step: 570/463, loss: 0.002444436540827155 2023-01-24 04:13:31.399487: step: 572/463, loss: 0.004083929117769003 2023-01-24 04:13:31.980278: step: 574/463, loss: 0.01851549558341503 2023-01-24 04:13:32.579853: step: 576/463, loss: 0.005885877180844545 2023-01-24 04:13:33.274229: step: 578/463, loss: 0.03820318728685379 2023-01-24 04:13:33.908857: step: 580/463, loss: 0.0353037491440773 2023-01-24 04:13:34.513875: step: 582/463, loss: 0.0684768557548523 2023-01-24 04:13:35.057924: step: 584/463, loss: 0.006563975475728512 2023-01-24 04:13:35.693166: step: 586/463, loss: 0.04160820692777634 2023-01-24 04:13:36.220840: step: 588/463, loss: 0.007843821309506893 2023-01-24 04:13:36.747311: step: 590/463, loss: 0.0005108764162287116 2023-01-24 04:13:37.376215: step: 592/463, loss: 0.15797743201255798 2023-01-24 04:13:37.963791: step: 594/463, loss: 0.006693197879940271 2023-01-24 04:13:38.553635: step: 596/463, loss: 0.062018558382987976 2023-01-24 04:13:39.161415: step: 598/463, loss: 0.010066436603665352 2023-01-24 04:13:39.816130: step: 600/463, loss: 0.048210203647613525 2023-01-24 04:13:40.406790: step: 602/463, loss: 0.0025097534526139498 2023-01-24 04:13:41.025173: step: 604/463, loss: 0.02489396557211876 2023-01-24 04:13:41.628406: step: 606/463, loss: 0.0004875862505286932 2023-01-24 04:13:42.208536: step: 608/463, loss: 0.012104896828532219 2023-01-24 04:13:42.842234: step: 610/463, loss: 0.03129550814628601 2023-01-24 04:13:43.384205: step: 612/463, loss: 0.0008676177822053432 2023-01-24 04:13:43.915798: step: 614/463, loss: 0.0003353909414727241 2023-01-24 04:13:44.588568: step: 616/463, loss: 0.025637736544013023 2023-01-24 04:13:45.132480: step: 618/463, loss: 0.0005053762579336762 2023-01-24 04:13:45.734267: step: 620/463, loss: 0.01377609558403492 2023-01-24 04:13:46.361274: step: 622/463, loss: 0.0078073893673717976 2023-01-24 04:13:47.030058: step: 624/463, loss: 0.008490349166095257 2023-01-24 04:13:47.583627: step: 626/463, loss: 0.0005791570292785764 2023-01-24 04:13:48.167807: step: 628/463, loss: 0.009469408541917801 2023-01-24 04:13:48.770689: step: 630/463, loss: 0.005361108109354973 2023-01-24 04:13:49.341558: step: 632/463, loss: 0.0005024028359912336 2023-01-24 04:13:49.959540: step: 634/463, loss: 0.016048002988100052 2023-01-24 04:13:50.492388: step: 636/463, loss: 0.0012524072080850601 2023-01-24 04:13:51.058255: step: 638/463, loss: 0.0008066810551099479 2023-01-24 04:13:51.638166: step: 640/463, loss: 0.0015481916489079595 2023-01-24 04:13:52.229014: step: 642/463, loss: 0.021330559626221657 2023-01-24 04:13:52.845200: step: 644/463, loss: 0.010048109106719494 2023-01-24 04:13:53.453536: step: 646/463, loss: 0.4652419090270996 2023-01-24 04:13:54.114977: step: 648/463, loss: 0.0006253863102756441 2023-01-24 04:13:54.699330: step: 650/463, loss: 0.009285411797463894 2023-01-24 04:13:55.369409: step: 652/463, loss: 0.03028927370905876 2023-01-24 04:13:56.032397: step: 654/463, loss: 0.014444654807448387 2023-01-24 04:13:56.669601: step: 656/463, loss: 0.0228570569306612 2023-01-24 04:13:57.333832: step: 658/463, loss: 0.021325791254639626 2023-01-24 04:13:57.946178: step: 660/463, loss: 0.005649307277053595 2023-01-24 04:13:58.609041: step: 662/463, loss: 0.1185615211725235 2023-01-24 04:13:59.189169: step: 664/463, loss: 0.001825980027206242 2023-01-24 04:13:59.772269: step: 666/463, loss: 0.0521569699048996 2023-01-24 04:14:00.318387: step: 668/463, loss: 0.005966990254819393 2023-01-24 04:14:00.912075: step: 670/463, loss: 0.006369201000779867 2023-01-24 04:14:01.535556: step: 672/463, loss: 0.010017817839980125 2023-01-24 04:14:02.220282: step: 674/463, loss: 0.029045892879366875 2023-01-24 04:14:02.828772: step: 676/463, loss: 0.00931037962436676 2023-01-24 04:14:03.491716: step: 678/463, loss: 0.004967473912984133 2023-01-24 04:14:04.075964: step: 680/463, loss: 0.01018843986093998 2023-01-24 04:14:04.631454: step: 682/463, loss: 0.0005618171999230981 2023-01-24 04:14:05.322548: step: 684/463, loss: 0.02629307471215725 2023-01-24 04:14:05.881779: step: 686/463, loss: 0.0011716255685314536 2023-01-24 04:14:06.515235: step: 688/463, loss: 4.1036284528672695e-05 2023-01-24 04:14:07.240908: step: 690/463, loss: 0.05460178479552269 2023-01-24 04:14:07.808101: step: 692/463, loss: 0.0008286880329251289 2023-01-24 04:14:08.437809: step: 694/463, loss: 0.007452541496604681 2023-01-24 04:14:09.108532: step: 696/463, loss: 0.019342167302966118 2023-01-24 04:14:09.705785: step: 698/463, loss: 0.0008071481715887785 2023-01-24 04:14:10.307040: step: 700/463, loss: 0.000634913332760334 2023-01-24 04:14:10.885354: step: 702/463, loss: 0.0025818785652518272 2023-01-24 04:14:11.490778: step: 704/463, loss: 0.0005449080490507185 2023-01-24 04:14:12.098227: step: 706/463, loss: 0.0052023292519152164 2023-01-24 04:14:12.715042: step: 708/463, loss: 0.0008114945376291871 2023-01-24 04:14:13.378135: step: 710/463, loss: 0.007314126007258892 2023-01-24 04:14:13.965919: step: 712/463, loss: 0.0020018103532493114 2023-01-24 04:14:14.529013: step: 714/463, loss: 0.02496105618774891 2023-01-24 04:14:15.172958: step: 716/463, loss: 0.06659258902072906 2023-01-24 04:14:15.794305: step: 718/463, loss: 0.04558748006820679 2023-01-24 04:14:16.398423: step: 720/463, loss: 0.009435946121811867 2023-01-24 04:14:16.977177: step: 722/463, loss: 0.009029042907059193 2023-01-24 04:14:17.615009: step: 724/463, loss: 0.0017209950601682067 2023-01-24 04:14:18.253682: step: 726/463, loss: 0.021001620218157768 2023-01-24 04:14:18.871855: step: 728/463, loss: 0.008627827279269695 2023-01-24 04:14:19.479482: step: 730/463, loss: 0.006505152210593224 2023-01-24 04:14:20.093107: step: 732/463, loss: 0.005757859442383051 2023-01-24 04:14:20.702459: step: 734/463, loss: 0.04773740842938423 2023-01-24 04:14:21.321775: step: 736/463, loss: 0.011440160684287548 2023-01-24 04:14:21.944468: step: 738/463, loss: 0.004779912065714598 2023-01-24 04:14:22.521708: step: 740/463, loss: 0.045252975076436996 2023-01-24 04:14:23.159742: step: 742/463, loss: 0.07623853534460068 2023-01-24 04:14:23.724894: step: 744/463, loss: 0.008411399088799953 2023-01-24 04:14:24.342478: step: 746/463, loss: 0.0043028355576097965 2023-01-24 04:14:24.937980: step: 748/463, loss: 0.005836684722453356 2023-01-24 04:14:25.595093: step: 750/463, loss: 0.005340093281120062 2023-01-24 04:14:26.229160: step: 752/463, loss: 0.0005660206661559641 2023-01-24 04:14:26.811062: step: 754/463, loss: 0.008960151113569736 2023-01-24 04:14:27.421472: step: 756/463, loss: 0.0411030612885952 2023-01-24 04:14:28.015172: step: 758/463, loss: 0.0014925183495506644 2023-01-24 04:14:28.572049: step: 760/463, loss: 0.0026543105486780405 2023-01-24 04:14:29.202126: step: 762/463, loss: 0.0003035521076526493 2023-01-24 04:14:29.814406: step: 764/463, loss: 0.021044567227363586 2023-01-24 04:14:30.468109: step: 766/463, loss: 0.011688812635838985 2023-01-24 04:14:31.137621: step: 768/463, loss: 0.10110897570848465 2023-01-24 04:14:31.749919: step: 770/463, loss: 0.04459425061941147 2023-01-24 04:14:32.313504: step: 772/463, loss: 7.603244011988863e-05 2023-01-24 04:14:32.950618: step: 774/463, loss: 0.019078094512224197 2023-01-24 04:14:33.564762: step: 776/463, loss: 0.002768618753179908 2023-01-24 04:14:34.192035: step: 778/463, loss: 0.004745963960886002 2023-01-24 04:14:34.754085: step: 780/463, loss: 0.00015188485849648714 2023-01-24 04:14:35.391548: step: 782/463, loss: 0.012416007928550243 2023-01-24 04:14:35.985952: step: 784/463, loss: 0.0016538889613002539 2023-01-24 04:14:36.539846: step: 786/463, loss: 0.36856821179389954 2023-01-24 04:14:37.144026: step: 788/463, loss: 0.00014135270612314343 2023-01-24 04:14:37.741996: step: 790/463, loss: 0.019952071830630302 2023-01-24 04:14:38.367523: step: 792/463, loss: 0.0041428254917263985 2023-01-24 04:14:38.926519: step: 794/463, loss: 0.031157664954662323 2023-01-24 04:14:39.532672: step: 796/463, loss: 0.0029931424651294947 2023-01-24 04:14:40.171690: step: 798/463, loss: 0.060080792754888535 2023-01-24 04:14:40.790053: step: 800/463, loss: 0.00038428761763498187 2023-01-24 04:14:41.400154: step: 802/463, loss: 0.0035635759122669697 2023-01-24 04:14:42.120971: step: 804/463, loss: 0.0027083682361990213 2023-01-24 04:14:42.745920: step: 806/463, loss: 0.002922641346231103 2023-01-24 04:14:43.338851: step: 808/463, loss: 0.0036797085776925087 2023-01-24 04:14:43.935395: step: 810/463, loss: 0.0494624488055706 2023-01-24 04:14:44.544144: step: 812/463, loss: 0.02796754613518715 2023-01-24 04:14:45.086533: step: 814/463, loss: 0.1730279177427292 2023-01-24 04:14:45.671435: step: 816/463, loss: 0.010082573629915714 2023-01-24 04:14:46.275407: step: 818/463, loss: 0.09070724248886108 2023-01-24 04:14:46.888073: step: 820/463, loss: 0.00033572097891010344 2023-01-24 04:14:47.571850: step: 822/463, loss: 0.013764219358563423 2023-01-24 04:14:48.198086: step: 824/463, loss: 0.01487722061574459 2023-01-24 04:14:48.779226: step: 826/463, loss: 0.0014253434492275119 2023-01-24 04:14:49.331387: step: 828/463, loss: 0.006183678284287453 2023-01-24 04:14:49.962292: step: 830/463, loss: 0.033008165657520294 2023-01-24 04:14:50.627780: step: 832/463, loss: 0.0062687755562365055 2023-01-24 04:14:51.247973: step: 834/463, loss: 0.0012203037040308118 2023-01-24 04:14:51.798654: step: 836/463, loss: 0.0011219758307561278 2023-01-24 04:14:52.388144: step: 838/463, loss: 0.0019976417534053326 2023-01-24 04:14:53.002980: step: 840/463, loss: 0.01087773498147726 2023-01-24 04:14:53.613338: step: 842/463, loss: 0.0008014105260372162 2023-01-24 04:14:54.163287: step: 844/463, loss: 0.009464616887271404 2023-01-24 04:14:54.721234: step: 846/463, loss: 0.10054533183574677 2023-01-24 04:14:55.322311: step: 848/463, loss: 0.007601532619446516 2023-01-24 04:14:55.926636: step: 850/463, loss: 0.010330067947506905 2023-01-24 04:14:56.595764: step: 852/463, loss: 0.03854396939277649 2023-01-24 04:14:57.215574: step: 854/463, loss: 0.005770996678620577 2023-01-24 04:14:57.845004: step: 856/463, loss: 0.005867260042577982 2023-01-24 04:14:58.452327: step: 858/463, loss: 6.179288175189868e-05 2023-01-24 04:14:59.076540: step: 860/463, loss: 0.10213794559240341 2023-01-24 04:14:59.694571: step: 862/463, loss: 0.5764427185058594 2023-01-24 04:15:00.325916: step: 864/463, loss: 0.0631270632147789 2023-01-24 04:15:00.930384: step: 866/463, loss: 0.00036338967038318515 2023-01-24 04:15:01.527890: step: 868/463, loss: 0.00947169866412878 2023-01-24 04:15:02.221493: step: 870/463, loss: 0.0015089331427589059 2023-01-24 04:15:02.835903: step: 872/463, loss: 0.07484561204910278 2023-01-24 04:15:03.432140: step: 874/463, loss: 0.2011224776506424 2023-01-24 04:15:04.031318: step: 876/463, loss: 0.026650454849004745 2023-01-24 04:15:04.636051: step: 878/463, loss: 0.01840878464281559 2023-01-24 04:15:05.309632: step: 880/463, loss: 0.00026987181627191603 2023-01-24 04:15:05.946938: step: 882/463, loss: 0.011966696009039879 2023-01-24 04:15:06.622910: step: 884/463, loss: 0.0032956497743725777 2023-01-24 04:15:07.205646: step: 886/463, loss: 1.5728992366348393e-05 2023-01-24 04:15:07.820576: step: 888/463, loss: 0.043862234801054 2023-01-24 04:15:08.452824: step: 890/463, loss: 0.009606434032320976 2023-01-24 04:15:09.078514: step: 892/463, loss: 0.004375309217721224 2023-01-24 04:15:09.677056: step: 894/463, loss: 0.00282529485411942 2023-01-24 04:15:10.345653: step: 896/463, loss: 0.009064910933375359 2023-01-24 04:15:10.955329: step: 898/463, loss: 0.007233513984829187 2023-01-24 04:15:11.590181: step: 900/463, loss: 0.017115803435444832 2023-01-24 04:15:12.189294: step: 902/463, loss: 0.008231788873672485 2023-01-24 04:15:12.766259: step: 904/463, loss: 0.027875155210494995 2023-01-24 04:15:13.458183: step: 906/463, loss: 0.00670216279104352 2023-01-24 04:15:14.027737: step: 908/463, loss: 0.8182143568992615 2023-01-24 04:15:14.661248: step: 910/463, loss: 0.021769888699054718 2023-01-24 04:15:15.385291: step: 912/463, loss: 0.021472467109560966 2023-01-24 04:15:16.071654: step: 914/463, loss: 0.01789284497499466 2023-01-24 04:15:16.698227: step: 916/463, loss: 0.014903604052960873 2023-01-24 04:15:17.270279: step: 918/463, loss: 0.019243378192186356 2023-01-24 04:15:17.922158: step: 920/463, loss: 0.0014249717351049185 2023-01-24 04:15:18.544876: step: 922/463, loss: 0.021726321429014206 2023-01-24 04:15:19.124282: step: 924/463, loss: 0.08095753192901611 2023-01-24 04:15:19.789083: step: 926/463, loss: 0.006877733860164881 ================================================== Loss: 0.035 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3683147122485358, 'r': 0.32288690143989285, 'f1': 0.3441079819187533}, 'combined': 0.2535532498348708, 'epoch': 34} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.367072877312721, 'r': 0.3812429700031017, 'f1': 0.3740237613736471}, 'combined': 0.28991315474895135, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3341578777442962, 'r': 0.32020821301872787, 'f1': 0.3270343570947085}, 'combined': 0.24097268417504836, 'epoch': 34} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35019548189286664, 'r': 0.38753250018291496, 'f1': 0.36791916247732237}, 'combined': 0.28518136038912073, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3322066015733779, 'r': 0.3177080212390559, 'f1': 0.3247955910630116}, 'combined': 0.23932306709906115, 'epoch': 34} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35171187016747246, 'r': 0.37531018498569446, 'f1': 0.363128040252944}, 'combined': 0.28146766756448294, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3448275862068966, 'r': 0.2857142857142857, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 34} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33653846153846156, 'r': 0.3804347826086957, 'f1': 0.35714285714285715}, 'combined': 0.17857142857142858, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45, 'r': 0.15517241379310345, 'f1': 0.23076923076923075}, 'combined': 0.15384615384615383, 'epoch': 34} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34001213859732793, 'r': 0.32968918941790243, 'f1': 0.33477110370565427}, 'combined': 0.24667344483574524, 'epoch': 29} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34157155234650616, 'r': 0.36386160769264764, 'f1': 0.3523644229368942}, 'combined': 0.2731245766305113, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.20689655172413793, 'f1': 0.31578947368421056}, 'combined': 0.2105263157894737, 'epoch': 29} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:17:53.444170: step: 2/463, loss: 0.024554675444960594 2023-01-24 04:17:54.012359: step: 4/463, loss: 0.000228421893552877 2023-01-24 04:17:54.690782: step: 6/463, loss: 0.006230027414858341 2023-01-24 04:17:55.328138: step: 8/463, loss: 0.0010267156176269054 2023-01-24 04:17:55.931333: step: 10/463, loss: 0.006440572906285524 2023-01-24 04:17:56.514670: step: 12/463, loss: 0.00042961008148267865 2023-01-24 04:17:57.178319: step: 14/463, loss: 0.006021533627063036 2023-01-24 04:17:57.787218: step: 16/463, loss: 0.00038502324605360627 2023-01-24 04:17:58.410466: step: 18/463, loss: 0.0016179465455934405 2023-01-24 04:17:58.968845: step: 20/463, loss: 0.012197596952319145 2023-01-24 04:17:59.565384: step: 22/463, loss: 0.0105760982260108 2023-01-24 04:18:00.284011: step: 24/463, loss: 0.031051844358444214 2023-01-24 04:18:00.898668: step: 26/463, loss: 0.04035607725381851 2023-01-24 04:18:01.480114: step: 28/463, loss: 0.0010999601799994707 2023-01-24 04:18:02.140313: step: 30/463, loss: 0.022077837958931923 2023-01-24 04:18:02.848807: step: 32/463, loss: 1.580573320388794 2023-01-24 04:18:03.482777: step: 34/463, loss: 0.0020611651707440615 2023-01-24 04:18:04.125924: step: 36/463, loss: 0.1768808364868164 2023-01-24 04:18:04.796749: step: 38/463, loss: 0.004586328286677599 2023-01-24 04:18:05.447728: step: 40/463, loss: 0.01226328406482935 2023-01-24 04:18:06.050952: step: 42/463, loss: 0.014630187302827835 2023-01-24 04:18:06.651910: step: 44/463, loss: 0.005935985129326582 2023-01-24 04:18:07.232598: step: 46/463, loss: 3.65638334187679e-05 2023-01-24 04:18:07.886049: step: 48/463, loss: 0.0007069005514495075 2023-01-24 04:18:08.514488: step: 50/463, loss: 0.006781348492950201 2023-01-24 04:18:09.109813: step: 52/463, loss: 0.03647290915250778 2023-01-24 04:18:09.755973: step: 54/463, loss: 0.08398403227329254 2023-01-24 04:18:10.284698: step: 56/463, loss: 0.003984568174928427 2023-01-24 04:18:10.835316: step: 58/463, loss: 0.06392472982406616 2023-01-24 04:18:11.433478: step: 60/463, loss: 0.04403117671608925 2023-01-24 04:18:12.051946: step: 62/463, loss: 0.022905055433511734 2023-01-24 04:18:12.658031: step: 64/463, loss: 0.02600308135151863 2023-01-24 04:18:13.200709: step: 66/463, loss: 0.00486516859382391 2023-01-24 04:18:13.814295: step: 68/463, loss: 0.005585775710642338 2023-01-24 04:18:14.338055: step: 70/463, loss: 0.014749941416084766 2023-01-24 04:18:14.972386: step: 72/463, loss: 0.006160380784422159 2023-01-24 04:18:15.530429: step: 74/463, loss: 0.11902791261672974 2023-01-24 04:18:16.138218: step: 76/463, loss: 0.0756763368844986 2023-01-24 04:18:16.808888: step: 78/463, loss: 0.00040072196861729026 2023-01-24 04:18:17.452034: step: 80/463, loss: 0.011726079508662224 2023-01-24 04:18:18.167448: step: 82/463, loss: 0.00554349971935153 2023-01-24 04:18:18.756526: step: 84/463, loss: 0.0021053783129900694 2023-01-24 04:18:19.385144: step: 86/463, loss: 0.02026268094778061 2023-01-24 04:18:20.070185: step: 88/463, loss: 0.04527315869927406 2023-01-24 04:18:20.665861: step: 90/463, loss: 0.0014820119831711054 2023-01-24 04:18:21.274677: step: 92/463, loss: 0.013923609629273415 2023-01-24 04:18:21.925039: step: 94/463, loss: 0.0008256182773038745 2023-01-24 04:18:22.594993: step: 96/463, loss: 0.03240247815847397 2023-01-24 04:18:23.200429: step: 98/463, loss: 0.009753512218594551 2023-01-24 04:18:23.803070: step: 100/463, loss: 0.0011146693723276258 2023-01-24 04:18:24.380233: step: 102/463, loss: 0.009076748043298721 2023-01-24 04:18:25.041118: step: 104/463, loss: 0.005524345673620701 2023-01-24 04:18:25.625563: step: 106/463, loss: 0.002493618056178093 2023-01-24 04:18:26.204850: step: 108/463, loss: 0.019862966611981392 2023-01-24 04:18:26.844311: step: 110/463, loss: 0.014839885756373405 2023-01-24 04:18:27.395270: step: 112/463, loss: 0.007156794890761375 2023-01-24 04:18:28.037164: step: 114/463, loss: 0.004859243985265493 2023-01-24 04:18:28.655865: step: 116/463, loss: 0.0010114931501448154 2023-01-24 04:18:29.210037: step: 118/463, loss: 0.00018777334480546415 2023-01-24 04:18:29.827865: step: 120/463, loss: 0.00024580300669185817 2023-01-24 04:18:30.469481: step: 122/463, loss: 0.0009292989270761609 2023-01-24 04:18:31.073788: step: 124/463, loss: 0.00553923798725009 2023-01-24 04:18:31.670062: step: 126/463, loss: 0.018627328798174858 2023-01-24 04:18:32.271916: step: 128/463, loss: 0.02359108440577984 2023-01-24 04:18:32.808384: step: 130/463, loss: 0.0004572428879328072 2023-01-24 04:18:33.450633: step: 132/463, loss: 0.00232084677554667 2023-01-24 04:18:34.016967: step: 134/463, loss: 0.01168471947312355 2023-01-24 04:18:34.603450: step: 136/463, loss: 0.03794407844543457 2023-01-24 04:18:35.268789: step: 138/463, loss: 0.005098619032651186 2023-01-24 04:18:35.885185: step: 140/463, loss: 0.03334835544228554 2023-01-24 04:18:36.494854: step: 142/463, loss: 0.01427743211388588 2023-01-24 04:18:37.093387: step: 144/463, loss: 0.031748294830322266 2023-01-24 04:18:37.720380: step: 146/463, loss: 0.0023708692751824856 2023-01-24 04:18:38.382250: step: 148/463, loss: 0.03166327625513077 2023-01-24 04:18:39.006974: step: 150/463, loss: 0.0005548236658796668 2023-01-24 04:18:39.668075: step: 152/463, loss: 6.9699868618045e-05 2023-01-24 04:18:40.274450: step: 154/463, loss: 0.05512380972504616 2023-01-24 04:18:40.838423: step: 156/463, loss: 0.00031485213548876345 2023-01-24 04:18:41.445747: step: 158/463, loss: 0.9138661623001099 2023-01-24 04:18:42.061683: step: 160/463, loss: 0.02591072954237461 2023-01-24 04:18:42.774807: step: 162/463, loss: 0.0016724716406315565 2023-01-24 04:18:43.346477: step: 164/463, loss: 0.005577743519097567 2023-01-24 04:18:43.958408: step: 166/463, loss: 0.010732623748481274 2023-01-24 04:18:44.587811: step: 168/463, loss: 0.39056822657585144 2023-01-24 04:18:45.220525: step: 170/463, loss: 0.006436503026634455 2023-01-24 04:18:45.808189: step: 172/463, loss: 0.025078184902668 2023-01-24 04:18:46.394467: step: 174/463, loss: 0.005097771529108286 2023-01-24 04:18:47.013451: step: 176/463, loss: 0.00323680997826159 2023-01-24 04:18:47.622505: step: 178/463, loss: 0.005976242944598198 2023-01-24 04:18:48.238326: step: 180/463, loss: 0.003433573991060257 2023-01-24 04:18:48.837822: step: 182/463, loss: 0.008844452910125256 2023-01-24 04:18:49.440422: step: 184/463, loss: 0.03760220482945442 2023-01-24 04:18:50.006707: step: 186/463, loss: 0.008496008813381195 2023-01-24 04:18:50.696134: step: 188/463, loss: 0.049681950360536575 2023-01-24 04:18:51.279426: step: 190/463, loss: 0.12096449732780457 2023-01-24 04:18:51.852629: step: 192/463, loss: 0.003335256827995181 2023-01-24 04:18:52.490562: step: 194/463, loss: 0.004286882467567921 2023-01-24 04:18:53.124014: step: 196/463, loss: 0.009320305660367012 2023-01-24 04:18:53.839414: step: 198/463, loss: 0.01455577090382576 2023-01-24 04:18:54.408660: step: 200/463, loss: 0.026991579681634903 2023-01-24 04:18:55.094546: step: 202/463, loss: 0.05739283189177513 2023-01-24 04:18:55.674275: step: 204/463, loss: 0.0010014408035203815 2023-01-24 04:18:56.211263: step: 206/463, loss: 6.723425030941144e-05 2023-01-24 04:18:56.800364: step: 208/463, loss: 0.0018492756644263864 2023-01-24 04:18:57.436631: step: 210/463, loss: 0.054891977459192276 2023-01-24 04:18:58.070392: step: 212/463, loss: 0.024136517196893692 2023-01-24 04:18:58.611286: step: 214/463, loss: 0.00943446159362793 2023-01-24 04:18:59.253606: step: 216/463, loss: 0.0013909173430874944 2023-01-24 04:18:59.838402: step: 218/463, loss: 0.0015011630021035671 2023-01-24 04:19:00.464544: step: 220/463, loss: 0.006773713510483503 2023-01-24 04:19:01.060561: step: 222/463, loss: 0.007433029823005199 2023-01-24 04:19:01.652989: step: 224/463, loss: 0.016462130472064018 2023-01-24 04:19:02.238758: step: 226/463, loss: 0.021360553801059723 2023-01-24 04:19:02.885258: step: 228/463, loss: 0.02648688107728958 2023-01-24 04:19:03.554547: step: 230/463, loss: 0.007667865138500929 2023-01-24 04:19:04.179349: step: 232/463, loss: 0.008582384325563908 2023-01-24 04:19:04.817789: step: 234/463, loss: 0.0007704205927439034 2023-01-24 04:19:05.412980: step: 236/463, loss: 0.018951408565044403 2023-01-24 04:19:06.043314: step: 238/463, loss: 0.012252001091837883 2023-01-24 04:19:06.641453: step: 240/463, loss: 0.008671257644891739 2023-01-24 04:19:07.214843: step: 242/463, loss: 0.01242708321660757 2023-01-24 04:19:07.821617: step: 244/463, loss: 0.062470290809869766 2023-01-24 04:19:08.416876: step: 246/463, loss: 9.429272722627502e-06 2023-01-24 04:19:09.048036: step: 248/463, loss: 0.05481576547026634 2023-01-24 04:19:09.641794: step: 250/463, loss: 0.001671632518991828 2023-01-24 04:19:10.323970: step: 252/463, loss: 0.015048501081764698 2023-01-24 04:19:10.910724: step: 254/463, loss: 0.015687121078372 2023-01-24 04:19:11.503178: step: 256/463, loss: 0.009678141213953495 2023-01-24 04:19:12.154585: step: 258/463, loss: 0.03176980838179588 2023-01-24 04:19:12.798315: step: 260/463, loss: 0.0023526581935584545 2023-01-24 04:19:13.425317: step: 262/463, loss: 0.0009741898975335062 2023-01-24 04:19:14.068769: step: 264/463, loss: 0.001743873581290245 2023-01-24 04:19:14.712417: step: 266/463, loss: 0.016783457249403 2023-01-24 04:19:15.338055: step: 268/463, loss: 0.01479868683964014 2023-01-24 04:19:15.899530: step: 270/463, loss: 0.011904990300536156 2023-01-24 04:19:16.578930: step: 272/463, loss: 0.016465744003653526 2023-01-24 04:19:17.231559: step: 274/463, loss: 0.0030503838788717985 2023-01-24 04:19:17.882153: step: 276/463, loss: 0.0041530681774020195 2023-01-24 04:19:18.484823: step: 278/463, loss: 0.030611081048846245 2023-01-24 04:19:19.093865: step: 280/463, loss: 0.004559692461043596 2023-01-24 04:19:19.738693: step: 282/463, loss: 0.0004207395249977708 2023-01-24 04:19:20.302273: step: 284/463, loss: 0.014515679329633713 2023-01-24 04:19:20.916620: step: 286/463, loss: 0.1311478465795517 2023-01-24 04:19:21.533387: step: 288/463, loss: 0.0012332580517977476 2023-01-24 04:19:22.163067: step: 290/463, loss: 0.008443433791399002 2023-01-24 04:19:22.846529: step: 292/463, loss: 0.0008047773153521121 2023-01-24 04:19:23.466523: step: 294/463, loss: 0.006280484143644571 2023-01-24 04:19:24.042292: step: 296/463, loss: 0.0015236276667565107 2023-01-24 04:19:24.640871: step: 298/463, loss: 0.0010327985510230064 2023-01-24 04:19:25.221083: step: 300/463, loss: 0.01540662907063961 2023-01-24 04:19:25.809195: step: 302/463, loss: 0.01582682505249977 2023-01-24 04:19:26.415863: step: 304/463, loss: 0.008314600214362144 2023-01-24 04:19:27.017444: step: 306/463, loss: 0.00023417093325406313 2023-01-24 04:19:27.614692: step: 308/463, loss: 0.009640461765229702 2023-01-24 04:19:28.250524: step: 310/463, loss: 0.02170552685856819 2023-01-24 04:19:28.869555: step: 312/463, loss: 9.597272872924805 2023-01-24 04:19:29.520084: step: 314/463, loss: 0.0086945416405797 2023-01-24 04:19:30.196634: step: 316/463, loss: 0.018993010744452477 2023-01-24 04:19:30.808479: step: 318/463, loss: 0.04802653193473816 2023-01-24 04:19:31.408368: step: 320/463, loss: 0.0020790782291442156 2023-01-24 04:19:31.969852: step: 322/463, loss: 0.003995911683887243 2023-01-24 04:19:32.565746: step: 324/463, loss: 0.001561065437272191 2023-01-24 04:19:33.236160: step: 326/463, loss: 0.017352307215332985 2023-01-24 04:19:33.842816: step: 328/463, loss: 0.019160201773047447 2023-01-24 04:19:34.506339: step: 330/463, loss: 0.0292675644159317 2023-01-24 04:19:35.178099: step: 332/463, loss: 0.002015606965869665 2023-01-24 04:19:35.772185: step: 334/463, loss: 0.008558913134038448 2023-01-24 04:19:36.320917: step: 336/463, loss: 0.45045584440231323 2023-01-24 04:19:36.883807: step: 338/463, loss: 0.002873100806027651 2023-01-24 04:19:37.513366: step: 340/463, loss: 0.006066231522709131 2023-01-24 04:19:38.095315: step: 342/463, loss: 0.004225987941026688 2023-01-24 04:19:38.926494: step: 344/463, loss: 3.483574982965365e-05 2023-01-24 04:19:39.524118: step: 346/463, loss: 0.002040430437773466 2023-01-24 04:19:40.172537: step: 348/463, loss: 0.009336970746517181 2023-01-24 04:19:40.814899: step: 350/463, loss: 0.026241572573781013 2023-01-24 04:19:41.409558: step: 352/463, loss: 0.007362271659076214 2023-01-24 04:19:42.056259: step: 354/463, loss: 0.027371464297175407 2023-01-24 04:19:42.635688: step: 356/463, loss: 0.00485308887436986 2023-01-24 04:19:43.248946: step: 358/463, loss: 0.01370034459978342 2023-01-24 04:19:43.866323: step: 360/463, loss: 0.004669187590479851 2023-01-24 04:19:44.417281: step: 362/463, loss: 7.050875865388662e-05 2023-01-24 04:19:45.066206: step: 364/463, loss: 0.017405999824404716 2023-01-24 04:19:45.715499: step: 366/463, loss: 5.289023101795465e-05 2023-01-24 04:19:46.284214: step: 368/463, loss: 0.04082733020186424 2023-01-24 04:19:46.960240: step: 370/463, loss: 0.0022130284924060106 2023-01-24 04:19:47.590617: step: 372/463, loss: 0.019310006871819496 2023-01-24 04:19:48.159114: step: 374/463, loss: 0.017915597185492516 2023-01-24 04:19:48.742098: step: 376/463, loss: 0.32642772793769836 2023-01-24 04:19:49.398491: step: 378/463, loss: 0.044260960072278976 2023-01-24 04:19:50.040098: step: 380/463, loss: 0.05304685980081558 2023-01-24 04:19:50.714800: step: 382/463, loss: 0.005499101709574461 2023-01-24 04:19:51.296464: step: 384/463, loss: 0.00102675158996135 2023-01-24 04:19:51.882110: step: 386/463, loss: 0.006113287061452866 2023-01-24 04:19:52.418984: step: 388/463, loss: 0.012271015904843807 2023-01-24 04:19:53.023166: step: 390/463, loss: 0.39034149050712585 2023-01-24 04:19:53.716189: step: 392/463, loss: 0.2743619978427887 2023-01-24 04:19:54.289157: step: 394/463, loss: 0.0016630636528134346 2023-01-24 04:19:54.922752: step: 396/463, loss: 0.005295955576002598 2023-01-24 04:19:55.520830: step: 398/463, loss: 0.0025687047746032476 2023-01-24 04:19:56.071266: step: 400/463, loss: 0.00913708470761776 2023-01-24 04:19:56.676309: step: 402/463, loss: 0.048686228692531586 2023-01-24 04:19:57.282303: step: 404/463, loss: 0.00530043663457036 2023-01-24 04:19:57.960080: step: 406/463, loss: 0.0024528519716113806 2023-01-24 04:19:58.568087: step: 408/463, loss: 0.019931841641664505 2023-01-24 04:19:59.251402: step: 410/463, loss: 0.004248816054314375 2023-01-24 04:19:59.829972: step: 412/463, loss: 0.017514139413833618 2023-01-24 04:20:00.442835: step: 414/463, loss: 0.004406094551086426 2023-01-24 04:20:01.037154: step: 416/463, loss: 0.062271326780319214 2023-01-24 04:20:01.642759: step: 418/463, loss: 0.040909670293331146 2023-01-24 04:20:02.293279: step: 420/463, loss: 0.055211763828992844 2023-01-24 04:20:02.880359: step: 422/463, loss: 0.015441077761352062 2023-01-24 04:20:03.582767: step: 424/463, loss: 0.00951111875474453 2023-01-24 04:20:04.193829: step: 426/463, loss: 0.055415406823158264 2023-01-24 04:20:04.775478: step: 428/463, loss: 0.0016636957880109549 2023-01-24 04:20:05.390362: step: 430/463, loss: 0.010604572482407093 2023-01-24 04:20:05.991260: step: 432/463, loss: 0.0010946291731670499 2023-01-24 04:20:06.703029: step: 434/463, loss: 0.0017120030242949724 2023-01-24 04:20:07.296837: step: 436/463, loss: 0.0030200621113181114 2023-01-24 04:20:07.904122: step: 438/463, loss: 0.03694145008921623 2023-01-24 04:20:08.532798: step: 440/463, loss: 0.028448956087231636 2023-01-24 04:20:09.145671: step: 442/463, loss: 0.001407848671078682 2023-01-24 04:20:09.797768: step: 444/463, loss: 0.022016361355781555 2023-01-24 04:20:10.416264: step: 446/463, loss: 0.04089091345667839 2023-01-24 04:20:11.027862: step: 448/463, loss: 0.02491391822695732 2023-01-24 04:20:11.628012: step: 450/463, loss: 0.0032255356200039387 2023-01-24 04:20:12.195729: step: 452/463, loss: 0.5869053602218628 2023-01-24 04:20:12.856682: step: 454/463, loss: 0.029924217611551285 2023-01-24 04:20:13.427180: step: 456/463, loss: 0.002704784506931901 2023-01-24 04:20:14.003120: step: 458/463, loss: 0.0017000801162794232 2023-01-24 04:20:14.640649: step: 460/463, loss: 0.12426872551441193 2023-01-24 04:20:15.345406: step: 462/463, loss: 0.018536772578954697 2023-01-24 04:20:15.999646: step: 464/463, loss: 0.0425092987716198 2023-01-24 04:20:16.605502: step: 466/463, loss: 0.006782358046621084 2023-01-24 04:20:17.162071: step: 468/463, loss: 0.0001791364193195477 2023-01-24 04:20:17.770943: step: 470/463, loss: 0.0036689655389636755 2023-01-24 04:20:18.389525: step: 472/463, loss: 0.0036370190791785717 2023-01-24 04:20:19.027609: step: 474/463, loss: 0.0001329735532635823 2023-01-24 04:20:19.659103: step: 476/463, loss: 0.005705771967768669 2023-01-24 04:20:20.239431: step: 478/463, loss: 0.012633300386369228 2023-01-24 04:20:20.841265: step: 480/463, loss: 0.011397972702980042 2023-01-24 04:20:21.518596: step: 482/463, loss: 0.031048130244016647 2023-01-24 04:20:22.163247: step: 484/463, loss: 0.01875011995434761 2023-01-24 04:20:22.790149: step: 486/463, loss: 0.020077552646398544 2023-01-24 04:20:23.398759: step: 488/463, loss: 0.014815251342952251 2023-01-24 04:20:24.007682: step: 490/463, loss: 0.025888385251164436 2023-01-24 04:20:24.631203: step: 492/463, loss: 0.0296202190220356 2023-01-24 04:20:25.187938: step: 494/463, loss: 0.003721346380189061 2023-01-24 04:20:25.791708: step: 496/463, loss: 7.28084851289168e-05 2023-01-24 04:20:26.428826: step: 498/463, loss: 0.005277659278362989 2023-01-24 04:20:27.080074: step: 500/463, loss: 0.01000724732875824 2023-01-24 04:20:27.671753: step: 502/463, loss: 0.012316937558352947 2023-01-24 04:20:28.271864: step: 504/463, loss: 0.00889474805444479 2023-01-24 04:20:28.902539: step: 506/463, loss: 0.021152684465050697 2023-01-24 04:20:29.480746: step: 508/463, loss: 0.04041510075330734 2023-01-24 04:20:30.135564: step: 510/463, loss: 0.02835632488131523 2023-01-24 04:20:30.680395: step: 512/463, loss: 0.0006655848119407892 2023-01-24 04:20:31.326904: step: 514/463, loss: 0.03245805203914642 2023-01-24 04:20:31.936827: step: 516/463, loss: 0.008442052640020847 2023-01-24 04:20:32.599624: step: 518/463, loss: 0.1609584540128708 2023-01-24 04:20:33.210350: step: 520/463, loss: 0.00019230511679779738 2023-01-24 04:20:33.811711: step: 522/463, loss: 0.006102635990828276 2023-01-24 04:20:34.446559: step: 524/463, loss: 0.0005319634801708162 2023-01-24 04:20:35.055644: step: 526/463, loss: 0.004204627126455307 2023-01-24 04:20:35.663682: step: 528/463, loss: 0.03489713370800018 2023-01-24 04:20:36.207747: step: 530/463, loss: 9.540874452795833e-05 2023-01-24 04:20:36.846827: step: 532/463, loss: 0.013699028640985489 2023-01-24 04:20:37.504895: step: 534/463, loss: 0.029252339154481888 2023-01-24 04:20:38.225090: step: 536/463, loss: 1.0969972610473633 2023-01-24 04:20:38.935353: step: 538/463, loss: 0.009437579661607742 2023-01-24 04:20:39.602323: step: 540/463, loss: 0.021492689847946167 2023-01-24 04:20:40.165450: step: 542/463, loss: 0.027512123808264732 2023-01-24 04:20:40.724543: step: 544/463, loss: 0.001970319775864482 2023-01-24 04:20:41.418914: step: 546/463, loss: 0.02360204979777336 2023-01-24 04:20:42.028138: step: 548/463, loss: 0.0014719793107360601 2023-01-24 04:20:42.681275: step: 550/463, loss: 0.0048825484700500965 2023-01-24 04:20:43.279402: step: 552/463, loss: 0.0253410954028368 2023-01-24 04:20:43.935498: step: 554/463, loss: 0.10798918455839157 2023-01-24 04:20:44.517583: step: 556/463, loss: 0.0022672193590551615 2023-01-24 04:20:45.094271: step: 558/463, loss: 0.0017448312137275934 2023-01-24 04:20:45.681367: step: 560/463, loss: 0.009754628874361515 2023-01-24 04:20:46.267138: step: 562/463, loss: 0.0013372161192819476 2023-01-24 04:20:46.904926: step: 564/463, loss: 0.003857825417071581 2023-01-24 04:20:47.548364: step: 566/463, loss: 0.004110759124159813 2023-01-24 04:20:48.251328: step: 568/463, loss: 0.008388431742787361 2023-01-24 04:20:48.834828: step: 570/463, loss: 0.0011858759680762887 2023-01-24 04:20:49.500477: step: 572/463, loss: 0.001762226689606905 2023-01-24 04:20:50.130463: step: 574/463, loss: 0.000316695892252028 2023-01-24 04:20:50.761671: step: 576/463, loss: 0.0530208945274353 2023-01-24 04:20:51.412829: step: 578/463, loss: 0.1530473232269287 2023-01-24 04:20:52.043999: step: 580/463, loss: 0.005145453382283449 2023-01-24 04:20:52.688789: step: 582/463, loss: 0.007890643551945686 2023-01-24 04:20:53.265348: step: 584/463, loss: 0.007970706559717655 2023-01-24 04:20:53.823335: step: 586/463, loss: 0.05107571929693222 2023-01-24 04:20:54.500275: step: 588/463, loss: 0.0023640599101781845 2023-01-24 04:20:55.108028: step: 590/463, loss: 0.004151259083300829 2023-01-24 04:20:55.735408: step: 592/463, loss: 0.0077116540633141994 2023-01-24 04:20:56.321926: step: 594/463, loss: 0.0021214198786765337 2023-01-24 04:20:56.932530: step: 596/463, loss: 0.0044120000675320625 2023-01-24 04:20:57.523402: step: 598/463, loss: 0.004525544587522745 2023-01-24 04:20:58.133945: step: 600/463, loss: 0.19436657428741455 2023-01-24 04:20:58.709027: step: 602/463, loss: 0.0015599527396261692 2023-01-24 04:20:59.395517: step: 604/463, loss: 0.008009737357497215 2023-01-24 04:20:59.995694: step: 606/463, loss: 0.0013858468737453222 2023-01-24 04:21:00.624080: step: 608/463, loss: 0.3488721251487732 2023-01-24 04:21:01.289664: step: 610/463, loss: 0.02543606422841549 2023-01-24 04:21:01.891441: step: 612/463, loss: 0.005030633416026831 2023-01-24 04:21:02.486818: step: 614/463, loss: 0.030824998393654823 2023-01-24 04:21:03.108061: step: 616/463, loss: 0.004081486724317074 2023-01-24 04:21:03.620879: step: 618/463, loss: 0.0006215223693288863 2023-01-24 04:21:04.194604: step: 620/463, loss: 0.061252009123563766 2023-01-24 04:21:04.843248: step: 622/463, loss: 0.01989113725721836 2023-01-24 04:21:05.561112: step: 624/463, loss: 0.0015687687555328012 2023-01-24 04:21:06.142991: step: 626/463, loss: 0.0009675811161287129 2023-01-24 04:21:06.764091: step: 628/463, loss: 0.00019023318600375205 2023-01-24 04:21:07.383235: step: 630/463, loss: 0.06610078364610672 2023-01-24 04:21:07.990184: step: 632/463, loss: 0.014715391211211681 2023-01-24 04:21:08.632136: step: 634/463, loss: 0.009596790187060833 2023-01-24 04:21:09.175950: step: 636/463, loss: 0.006795715540647507 2023-01-24 04:21:09.780418: step: 638/463, loss: 0.0027659893967211246 2023-01-24 04:21:10.370742: step: 640/463, loss: 0.022830478847026825 2023-01-24 04:21:11.066247: step: 642/463, loss: 0.02685198374092579 2023-01-24 04:21:11.657292: step: 644/463, loss: 0.001776121906004846 2023-01-24 04:21:12.265117: step: 646/463, loss: 0.03276059031486511 2023-01-24 04:21:12.942802: step: 648/463, loss: 0.013307644985616207 2023-01-24 04:21:13.544423: step: 650/463, loss: 0.013781944289803505 2023-01-24 04:21:14.134322: step: 652/463, loss: 0.3509994149208069 2023-01-24 04:21:14.721831: step: 654/463, loss: 0.0005310252890922129 2023-01-24 04:21:15.363458: step: 656/463, loss: 0.006202084012329578 2023-01-24 04:21:15.941452: step: 658/463, loss: 0.0015261704102158546 2023-01-24 04:21:16.534649: step: 660/463, loss: 0.007692432031035423 2023-01-24 04:21:17.168163: step: 662/463, loss: 0.011984733864665031 2023-01-24 04:21:17.835333: step: 664/463, loss: 0.08859200030565262 2023-01-24 04:21:18.537684: step: 666/463, loss: 0.001600740011781454 2023-01-24 04:21:19.179704: step: 668/463, loss: 3.259199365857057e-05 2023-01-24 04:21:19.819327: step: 670/463, loss: 0.043061695992946625 2023-01-24 04:21:20.393554: step: 672/463, loss: 0.0011611173395067453 2023-01-24 04:21:21.004320: step: 674/463, loss: 0.0028423587791621685 2023-01-24 04:21:21.673868: step: 676/463, loss: 0.003807240864261985 2023-01-24 04:21:22.299737: step: 678/463, loss: 0.00036372512113302946 2023-01-24 04:21:23.010646: step: 680/463, loss: 0.0069422065280377865 2023-01-24 04:21:23.624587: step: 682/463, loss: 0.8122259378433228 2023-01-24 04:21:24.241243: step: 684/463, loss: 0.007210102863609791 2023-01-24 04:21:24.871324: step: 686/463, loss: 0.0004664790176320821 2023-01-24 04:21:25.505086: step: 688/463, loss: 0.013120567426085472 2023-01-24 04:21:26.094770: step: 690/463, loss: 0.0008050451870076358 2023-01-24 04:21:26.733845: step: 692/463, loss: 0.010334783233702183 2023-01-24 04:21:27.306496: step: 694/463, loss: 0.001780427061021328 2023-01-24 04:21:27.951715: step: 696/463, loss: 0.002615198725834489 2023-01-24 04:21:28.607052: step: 698/463, loss: 0.02158784121274948 2023-01-24 04:21:29.227332: step: 700/463, loss: 0.025469280779361725 2023-01-24 04:21:29.757148: step: 702/463, loss: 0.007899214513599873 2023-01-24 04:21:30.374848: step: 704/463, loss: 0.06707986444234848 2023-01-24 04:21:30.958379: step: 706/463, loss: 0.0001986706192838028 2023-01-24 04:21:31.532835: step: 708/463, loss: 0.011409505270421505 2023-01-24 04:21:32.119098: step: 710/463, loss: 0.0065339095890522 2023-01-24 04:21:32.710481: step: 712/463, loss: 0.0018706261180341244 2023-01-24 04:21:33.353975: step: 714/463, loss: 0.00028417882276698947 2023-01-24 04:21:33.957982: step: 716/463, loss: 0.0025505241937935352 2023-01-24 04:21:34.539919: step: 718/463, loss: 0.006199778523296118 2023-01-24 04:21:35.050558: step: 720/463, loss: 0.00019804327166639268 2023-01-24 04:21:35.660413: step: 722/463, loss: 0.04341735318303108 2023-01-24 04:21:36.262474: step: 724/463, loss: 0.032529257237911224 2023-01-24 04:21:36.919063: step: 726/463, loss: 0.04912516102194786 2023-01-24 04:21:37.549701: step: 728/463, loss: 0.09744817018508911 2023-01-24 04:21:38.213957: step: 730/463, loss: 0.0012814155779778957 2023-01-24 04:21:38.815913: step: 732/463, loss: 0.009129542857408524 2023-01-24 04:21:39.428860: step: 734/463, loss: 0.038396455347537994 2023-01-24 04:21:40.089059: step: 736/463, loss: 0.010526176542043686 2023-01-24 04:21:40.706939: step: 738/463, loss: 0.003573357593268156 2023-01-24 04:21:41.387534: step: 740/463, loss: 0.032019611448049545 2023-01-24 04:21:42.050910: step: 742/463, loss: 0.003927348181605339 2023-01-24 04:21:42.655753: step: 744/463, loss: 0.0004113336035516113 2023-01-24 04:21:43.317366: step: 746/463, loss: 0.019907468929886818 2023-01-24 04:21:43.908327: step: 748/463, loss: 0.02164171077311039 2023-01-24 04:21:44.571024: step: 750/463, loss: 0.03540413826704025 2023-01-24 04:21:45.245081: step: 752/463, loss: 0.005953527521342039 2023-01-24 04:21:45.858600: step: 754/463, loss: 0.0008240683237090707 2023-01-24 04:21:46.431340: step: 756/463, loss: 0.0007584497798234224 2023-01-24 04:21:47.091850: step: 758/463, loss: 0.0404651053249836 2023-01-24 04:21:47.712783: step: 760/463, loss: 0.0017691616667434573 2023-01-24 04:21:48.314261: step: 762/463, loss: 0.01616172306239605 2023-01-24 04:21:48.874680: step: 764/463, loss: 0.0012893083039671183 2023-01-24 04:21:49.483992: step: 766/463, loss: 9.886065527098253e-05 2023-01-24 04:21:50.060274: step: 768/463, loss: 0.0026200832799077034 2023-01-24 04:21:50.631370: step: 770/463, loss: 0.007900582626461983 2023-01-24 04:21:51.260160: step: 772/463, loss: 0.0001425204100087285 2023-01-24 04:21:51.833564: step: 774/463, loss: 0.0838509052991867 2023-01-24 04:21:52.389986: step: 776/463, loss: 0.0026854455936700106 2023-01-24 04:21:52.995960: step: 778/463, loss: 0.005901542026549578 2023-01-24 04:21:53.617435: step: 780/463, loss: 0.015036815777420998 2023-01-24 04:21:54.171446: step: 782/463, loss: 0.005546136759221554 2023-01-24 04:21:54.784987: step: 784/463, loss: 0.0008756810566410422 2023-01-24 04:21:55.370226: step: 786/463, loss: 0.015073849819600582 2023-01-24 04:21:55.979945: step: 788/463, loss: 0.019593510776758194 2023-01-24 04:21:56.592685: step: 790/463, loss: 0.0057046180590987206 2023-01-24 04:21:57.109935: step: 792/463, loss: 0.001117024919949472 2023-01-24 04:21:57.699434: step: 794/463, loss: 0.05120617896318436 2023-01-24 04:21:58.280318: step: 796/463, loss: 0.0006611481658183038 2023-01-24 04:21:58.956489: step: 798/463, loss: 0.01597796007990837 2023-01-24 04:21:59.528948: step: 800/463, loss: 0.009253596886992455 2023-01-24 04:22:00.093839: step: 802/463, loss: 0.04280107095837593 2023-01-24 04:22:00.649545: step: 804/463, loss: 0.0026175014209002256 2023-01-24 04:22:01.293238: step: 806/463, loss: 0.0369688905775547 2023-01-24 04:22:01.884896: step: 808/463, loss: 0.05045194551348686 2023-01-24 04:22:02.516796: step: 810/463, loss: 0.1270289272069931 2023-01-24 04:22:03.147598: step: 812/463, loss: 0.022772789001464844 2023-01-24 04:22:03.724901: step: 814/463, loss: 0.07281602919101715 2023-01-24 04:22:04.330431: step: 816/463, loss: 0.1199316680431366 2023-01-24 04:22:04.959472: step: 818/463, loss: 0.020382236689329147 2023-01-24 04:22:05.624278: step: 820/463, loss: 0.03233255445957184 2023-01-24 04:22:06.253718: step: 822/463, loss: 0.003871083492413163 2023-01-24 04:22:06.854064: step: 824/463, loss: 0.006677394267171621 2023-01-24 04:22:07.502958: step: 826/463, loss: 0.09403805434703827 2023-01-24 04:22:08.056130: step: 828/463, loss: 0.003594765905290842 2023-01-24 04:22:08.667224: step: 830/463, loss: 0.010870123282074928 2023-01-24 04:22:09.259701: step: 832/463, loss: 0.004279666114598513 2023-01-24 04:22:09.892211: step: 834/463, loss: 0.033330705016851425 2023-01-24 04:22:10.435857: step: 836/463, loss: 0.03654314950108528 2023-01-24 04:22:11.127191: step: 838/463, loss: 0.0034194951876997948 2023-01-24 04:22:11.722617: step: 840/463, loss: 0.10320030897855759 2023-01-24 04:22:12.312158: step: 842/463, loss: 0.006930488161742687 2023-01-24 04:22:12.919697: step: 844/463, loss: 0.007995801977813244 2023-01-24 04:22:13.570063: step: 846/463, loss: 0.08015644550323486 2023-01-24 04:22:14.149758: step: 848/463, loss: 0.09332186728715897 2023-01-24 04:22:14.728670: step: 850/463, loss: 0.007438365835696459 2023-01-24 04:22:15.402746: step: 852/463, loss: 0.010123154148459435 2023-01-24 04:22:15.984500: step: 854/463, loss: 0.08119158446788788 2023-01-24 04:22:16.567036: step: 856/463, loss: 0.0019944319501519203 2023-01-24 04:22:17.186196: step: 858/463, loss: 0.01843361184000969 2023-01-24 04:22:17.846753: step: 860/463, loss: 0.012350602075457573 2023-01-24 04:22:18.399379: step: 862/463, loss: 0.05584613233804703 2023-01-24 04:22:19.017930: step: 864/463, loss: 0.014386721886694431 2023-01-24 04:22:19.691824: step: 866/463, loss: 0.0353621244430542 2023-01-24 04:22:20.246641: step: 868/463, loss: 0.020597638562321663 2023-01-24 04:22:20.833377: step: 870/463, loss: 0.0006872104131616652 2023-01-24 04:22:21.481938: step: 872/463, loss: 0.00979156419634819 2023-01-24 04:22:22.142522: step: 874/463, loss: 0.023115357384085655 2023-01-24 04:22:22.800321: step: 876/463, loss: 0.003294885391369462 2023-01-24 04:22:23.391180: step: 878/463, loss: 0.027601156383752823 2023-01-24 04:22:24.042742: step: 880/463, loss: 0.0022946984972804785 2023-01-24 04:22:24.702995: step: 882/463, loss: 0.01858082413673401 2023-01-24 04:22:25.350793: step: 884/463, loss: 0.28362688422203064 2023-01-24 04:22:25.956521: step: 886/463, loss: 0.001347129000350833 2023-01-24 04:22:26.593439: step: 888/463, loss: 0.004604569170624018 2023-01-24 04:22:27.237512: step: 890/463, loss: 0.0011813611490651965 2023-01-24 04:22:27.853109: step: 892/463, loss: 0.024046581238508224 2023-01-24 04:22:28.546938: step: 894/463, loss: 0.0010008730459958315 2023-01-24 04:22:29.130896: step: 896/463, loss: 0.029678529128432274 2023-01-24 04:22:29.683079: step: 898/463, loss: 0.0014649323420599103 2023-01-24 04:22:30.263540: step: 900/463, loss: 0.009135115891695023 2023-01-24 04:22:30.932388: step: 902/463, loss: 0.004966255277395248 2023-01-24 04:22:31.516880: step: 904/463, loss: 0.013460664078593254 2023-01-24 04:22:32.186068: step: 906/463, loss: 0.0005449182935990393 2023-01-24 04:22:32.771715: step: 908/463, loss: 0.00665718549862504 2023-01-24 04:22:33.365343: step: 910/463, loss: 0.0077726589515805244 2023-01-24 04:22:34.011254: step: 912/463, loss: 0.021509969606995583 2023-01-24 04:22:34.675064: step: 914/463, loss: 0.0011775298044085503 2023-01-24 04:22:35.261823: step: 916/463, loss: 0.030353013426065445 2023-01-24 04:22:35.831567: step: 918/463, loss: 0.0007901111384853721 2023-01-24 04:22:36.452604: step: 920/463, loss: 0.0056722210720181465 2023-01-24 04:22:37.073460: step: 922/463, loss: 0.04050293564796448 2023-01-24 04:22:37.737233: step: 924/463, loss: 0.09436770528554916 2023-01-24 04:22:38.357764: step: 926/463, loss: 0.006017202511429787 ================================================== Loss: 0.056 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35584995517705065, 'r': 0.3274899966999422, 'f1': 0.34108147877642203}, 'combined': 0.2513231948878899, 'epoch': 35} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3548567196115305, 'r': 0.3884506921482838, 'f1': 0.37089456170016044}, 'combined': 0.28748765069581816, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3310356050586874, 'r': 0.33040745400544513, 'f1': 0.33072123126470954}, 'combined': 0.24368932830031229, 'epoch': 35} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3423890975527805, 'r': 0.3927404354281894, 'f1': 0.36584040560434083}, 'combined': 0.28357007515743166, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3258301699668274, 'r': 0.3171743400246346, 'f1': 0.32144399460188927}, 'combined': 0.23685346970665525, 'epoch': 35} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3430235244074814, 'r': 0.3855861859837773, 'f1': 0.3630616792300733}, 'combined': 0.28141622983383674, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3020833333333333, 'r': 0.25892857142857145, 'f1': 0.27884615384615385}, 'combined': 0.1858974358974359, 'epoch': 35} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33035714285714285, 'r': 0.40217391304347827, 'f1': 0.36274509803921573}, 'combined': 0.18137254901960786, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34001213859732793, 'r': 0.32968918941790243, 'f1': 0.33477110370565427}, 'combined': 0.24667344483574524, 'epoch': 29} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34157155234650616, 'r': 0.36386160769264764, 'f1': 0.3523644229368942}, 'combined': 0.2731245766305113, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.20689655172413793, 'f1': 0.31578947368421056}, 'combined': 0.2105263157894737, 'epoch': 29} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:25:13.187963: step: 2/463, loss: 0.0004926755791530013 2023-01-24 04:25:13.852665: step: 4/463, loss: 0.02238519862294197 2023-01-24 04:25:14.492618: step: 6/463, loss: 0.0616052970290184 2023-01-24 04:25:15.042659: step: 8/463, loss: 0.004799255169928074 2023-01-24 04:25:15.660026: step: 10/463, loss: 0.012655009515583515 2023-01-24 04:25:16.263690: step: 12/463, loss: 0.00014118333638180047 2023-01-24 04:25:16.855204: step: 14/463, loss: 0.15786932408809662 2023-01-24 04:25:17.443283: step: 16/463, loss: 0.001410311902873218 2023-01-24 04:25:18.114286: step: 18/463, loss: 0.018634648993611336 2023-01-24 04:25:18.747723: step: 20/463, loss: 0.000459634029539302 2023-01-24 04:25:19.363012: step: 22/463, loss: 0.002159084426239133 2023-01-24 04:25:20.654127: step: 24/463, loss: 0.002403414808213711 2023-01-24 04:25:21.304195: step: 26/463, loss: 0.014240997843444347 2023-01-24 04:25:21.954192: step: 28/463, loss: 0.0010254951193928719 2023-01-24 04:25:22.600482: step: 30/463, loss: 0.017695574089884758 2023-01-24 04:25:23.188403: step: 32/463, loss: 0.027020089328289032 2023-01-24 04:25:23.817477: step: 34/463, loss: 0.0011526475427672267 2023-01-24 04:25:24.476745: step: 36/463, loss: 0.04943987354636192 2023-01-24 04:25:25.016650: step: 38/463, loss: 5.3604599088430405e-06 2023-01-24 04:25:25.782136: step: 40/463, loss: 0.0004678784462157637 2023-01-24 04:25:26.401956: step: 42/463, loss: 0.14743058383464813 2023-01-24 04:25:27.053048: step: 44/463, loss: 0.0006350097246468067 2023-01-24 04:25:27.674228: step: 46/463, loss: 0.004167316481471062 2023-01-24 04:25:28.329353: step: 48/463, loss: 0.024480260908603668 2023-01-24 04:25:28.956157: step: 50/463, loss: 0.07773642987012863 2023-01-24 04:25:29.614772: step: 52/463, loss: 0.031239869073033333 2023-01-24 04:25:30.271520: step: 54/463, loss: 0.0041245208121836185 2023-01-24 04:25:30.866758: step: 56/463, loss: 0.005031470209360123 2023-01-24 04:25:31.496967: step: 58/463, loss: 0.0033055332023650408 2023-01-24 04:25:32.080195: step: 60/463, loss: 0.11986285448074341 2023-01-24 04:25:32.666982: step: 62/463, loss: 0.0007648723549209535 2023-01-24 04:25:33.322587: step: 64/463, loss: 0.12618069350719452 2023-01-24 04:25:33.896121: step: 66/463, loss: 0.004225063603371382 2023-01-24 04:25:34.498817: step: 68/463, loss: 0.0026896544732153416 2023-01-24 04:25:35.090622: step: 70/463, loss: 0.0016927430406212807 2023-01-24 04:25:35.711764: step: 72/463, loss: 0.004847423173487186 2023-01-24 04:25:36.319158: step: 74/463, loss: 0.001417087041772902 2023-01-24 04:25:36.835563: step: 76/463, loss: 0.0014156046090647578 2023-01-24 04:25:37.429829: step: 78/463, loss: 0.005870248191058636 2023-01-24 04:25:38.000311: step: 80/463, loss: 0.028519045561552048 2023-01-24 04:25:38.581523: step: 82/463, loss: 0.05495013669133186 2023-01-24 04:25:39.314120: step: 84/463, loss: 0.01506493054330349 2023-01-24 04:25:39.940093: step: 86/463, loss: 0.0004517593188211322 2023-01-24 04:25:40.543275: step: 88/463, loss: 0.002268270356580615 2023-01-24 04:25:41.126537: step: 90/463, loss: 0.013584124855697155 2023-01-24 04:25:41.728912: step: 92/463, loss: 0.012236799113452435 2023-01-24 04:25:42.336557: step: 94/463, loss: 0.004698202945291996 2023-01-24 04:25:42.921683: step: 96/463, loss: 0.027181118726730347 2023-01-24 04:25:43.580853: step: 98/463, loss: 0.025009119883179665 2023-01-24 04:25:44.174618: step: 100/463, loss: 0.010331111028790474 2023-01-24 04:25:44.846242: step: 102/463, loss: 0.0032453066669404507 2023-01-24 04:25:45.439589: step: 104/463, loss: 0.006861776579171419 2023-01-24 04:25:46.079959: step: 106/463, loss: 0.0005807341076433659 2023-01-24 04:25:46.700708: step: 108/463, loss: 0.013131448067724705 2023-01-24 04:25:47.229013: step: 110/463, loss: 0.0013315534451976418 2023-01-24 04:25:47.831376: step: 112/463, loss: 0.0020186949986964464 2023-01-24 04:25:48.366609: step: 114/463, loss: 0.015919573605060577 2023-01-24 04:25:48.950156: step: 116/463, loss: 0.003444872098043561 2023-01-24 04:25:49.555275: step: 118/463, loss: 0.03311912715435028 2023-01-24 04:25:50.139012: step: 120/463, loss: 0.02634321339428425 2023-01-24 04:25:50.701461: step: 122/463, loss: 0.030533963814377785 2023-01-24 04:25:51.273480: step: 124/463, loss: 0.0035745142959058285 2023-01-24 04:25:51.820842: step: 126/463, loss: 0.03102034144103527 2023-01-24 04:25:52.413557: step: 128/463, loss: 0.055629272013902664 2023-01-24 04:25:53.036443: step: 130/463, loss: 0.02219184674322605 2023-01-24 04:25:53.682424: step: 132/463, loss: 0.009436080232262611 2023-01-24 04:25:54.297763: step: 134/463, loss: 0.004411205183714628 2023-01-24 04:25:54.947832: step: 136/463, loss: 1.7568196199135855e-05 2023-01-24 04:25:55.544377: step: 138/463, loss: 0.00033781048841774464 2023-01-24 04:25:56.177546: step: 140/463, loss: 0.021601134911179543 2023-01-24 04:25:56.787415: step: 142/463, loss: 0.0011771252611652017 2023-01-24 04:25:57.397167: step: 144/463, loss: 0.008932785131037235 2023-01-24 04:25:58.039669: step: 146/463, loss: 0.0005776201141998172 2023-01-24 04:25:58.621767: step: 148/463, loss: 0.005300901364535093 2023-01-24 04:25:59.220388: step: 150/463, loss: 0.01612846367061138 2023-01-24 04:25:59.806668: step: 152/463, loss: 0.00011750024714274332 2023-01-24 04:26:00.451268: step: 154/463, loss: 0.007638935465365648 2023-01-24 04:26:00.999742: step: 156/463, loss: 0.021703720092773438 2023-01-24 04:26:01.579842: step: 158/463, loss: 0.003742585424333811 2023-01-24 04:26:02.216193: step: 160/463, loss: 8.992474613478407e-05 2023-01-24 04:26:02.827009: step: 162/463, loss: 0.006666215136647224 2023-01-24 04:26:03.398914: step: 164/463, loss: 0.0013682059943675995 2023-01-24 04:26:03.928918: step: 166/463, loss: 0.0017774467123672366 2023-01-24 04:26:04.560639: step: 168/463, loss: 0.0019421938341110945 2023-01-24 04:26:05.107390: step: 170/463, loss: 0.009636595845222473 2023-01-24 04:26:05.653807: step: 172/463, loss: 0.006382985971868038 2023-01-24 04:26:06.192066: step: 174/463, loss: 0.00425747549161315 2023-01-24 04:26:06.801040: step: 176/463, loss: 0.006000712979584932 2023-01-24 04:26:07.468900: step: 178/463, loss: 0.008318754844367504 2023-01-24 04:26:08.062036: step: 180/463, loss: 0.023962531238794327 2023-01-24 04:26:08.731627: step: 182/463, loss: 0.14740021526813507 2023-01-24 04:26:09.373354: step: 184/463, loss: 0.9892958402633667 2023-01-24 04:26:09.976893: step: 186/463, loss: 0.11170303076505661 2023-01-24 04:26:10.538035: step: 188/463, loss: 0.0009004765306599438 2023-01-24 04:26:11.110323: step: 190/463, loss: 0.0004100656951777637 2023-01-24 04:26:11.714613: step: 192/463, loss: 0.0006549927638843656 2023-01-24 04:26:12.334584: step: 194/463, loss: 0.00014262759941630065 2023-01-24 04:26:12.928497: step: 196/463, loss: 0.0029615741223096848 2023-01-24 04:26:13.501429: step: 198/463, loss: 0.001007150742225349 2023-01-24 04:26:14.141978: step: 200/463, loss: 0.1437002718448639 2023-01-24 04:26:14.819826: step: 202/463, loss: 0.014947419986128807 2023-01-24 04:26:15.434667: step: 204/463, loss: 0.018611252307891846 2023-01-24 04:26:15.986491: step: 206/463, loss: 0.0018871185602620244 2023-01-24 04:26:16.579365: step: 208/463, loss: 0.008279492147266865 2023-01-24 04:26:17.134001: step: 210/463, loss: 0.0003480594896245748 2023-01-24 04:26:17.774602: step: 212/463, loss: 0.032954078167676926 2023-01-24 04:26:18.385888: step: 214/463, loss: 0.001809043693356216 2023-01-24 04:26:18.994474: step: 216/463, loss: 0.0032473087776452303 2023-01-24 04:26:19.574414: step: 218/463, loss: 0.003574102884158492 2023-01-24 04:26:20.179623: step: 220/463, loss: 0.009183285757899284 2023-01-24 04:26:20.763657: step: 222/463, loss: 0.006684563122689724 2023-01-24 04:26:21.423807: step: 224/463, loss: 0.016727399080991745 2023-01-24 04:26:22.042123: step: 226/463, loss: 0.0002858467632904649 2023-01-24 04:26:22.607589: step: 228/463, loss: 0.005393872503191233 2023-01-24 04:26:23.191439: step: 230/463, loss: 0.0008613124373368919 2023-01-24 04:26:23.787336: step: 232/463, loss: 0.05925869196653366 2023-01-24 04:26:24.334500: step: 234/463, loss: 0.0002998412528540939 2023-01-24 04:26:24.955912: step: 236/463, loss: 0.03975815698504448 2023-01-24 04:26:25.580462: step: 238/463, loss: 0.002198026515543461 2023-01-24 04:26:26.228001: step: 240/463, loss: 0.03136700391769409 2023-01-24 04:26:26.843574: step: 242/463, loss: 0.00011733981955330819 2023-01-24 04:26:27.476150: step: 244/463, loss: 0.00014876711065880954 2023-01-24 04:26:28.187249: step: 246/463, loss: 0.0011152435326948762 2023-01-24 04:26:28.769589: step: 248/463, loss: 0.002581898355856538 2023-01-24 04:26:29.393177: step: 250/463, loss: 0.0030039451085031033 2023-01-24 04:26:30.050332: step: 252/463, loss: 0.013062836602330208 2023-01-24 04:26:30.711967: step: 254/463, loss: 0.0015480725560337305 2023-01-24 04:26:31.412122: step: 256/463, loss: 0.009470746852457523 2023-01-24 04:26:32.142839: step: 258/463, loss: 0.03372591361403465 2023-01-24 04:26:32.734242: step: 260/463, loss: 0.005516430363059044 2023-01-24 04:26:33.343644: step: 262/463, loss: 0.030937649309635162 2023-01-24 04:26:33.923879: step: 264/463, loss: 0.016965948045253754 2023-01-24 04:26:34.532966: step: 266/463, loss: 0.0012964120833203197 2023-01-24 04:26:35.090985: step: 268/463, loss: 0.014559167437255383 2023-01-24 04:26:35.698998: step: 270/463, loss: 0.0017193866660818458 2023-01-24 04:26:36.316020: step: 272/463, loss: 0.0016711689531803131 2023-01-24 04:26:36.845428: step: 274/463, loss: 0.004963552579283714 2023-01-24 04:26:37.494693: step: 276/463, loss: 1.2774089574813843 2023-01-24 04:26:38.070127: step: 278/463, loss: 0.0025270506739616394 2023-01-24 04:26:38.795870: step: 280/463, loss: 0.02895297482609749 2023-01-24 04:26:39.474565: step: 282/463, loss: 0.06715033948421478 2023-01-24 04:26:40.044520: step: 284/463, loss: 0.0023797901812940836 2023-01-24 04:26:40.673384: step: 286/463, loss: 0.006618114188313484 2023-01-24 04:26:41.255353: step: 288/463, loss: 0.013377774506807327 2023-01-24 04:26:41.943411: step: 290/463, loss: 0.0013190142344683409 2023-01-24 04:26:42.543732: step: 292/463, loss: 0.021279960870742798 2023-01-24 04:26:43.216205: step: 294/463, loss: 0.009338678792119026 2023-01-24 04:26:43.797201: step: 296/463, loss: 0.005434241611510515 2023-01-24 04:26:44.378727: step: 298/463, loss: 0.08452336490154266 2023-01-24 04:26:45.065452: step: 300/463, loss: 0.019597329199314117 2023-01-24 04:26:45.659954: step: 302/463, loss: 0.007165477145463228 2023-01-24 04:26:46.252867: step: 304/463, loss: 0.014686892740428448 2023-01-24 04:26:46.860645: step: 306/463, loss: 0.0041551715694367886 2023-01-24 04:26:47.468645: step: 308/463, loss: 0.08586680889129639 2023-01-24 04:26:48.074057: step: 310/463, loss: 0.10356751829385757 2023-01-24 04:26:48.684846: step: 312/463, loss: 0.00019130560394842178 2023-01-24 04:26:49.394383: step: 314/463, loss: 0.003673784201964736 2023-01-24 04:26:49.977153: step: 316/463, loss: 0.028642889112234116 2023-01-24 04:26:50.596873: step: 318/463, loss: 0.3337525725364685 2023-01-24 04:26:51.159721: step: 320/463, loss: 0.009891267865896225 2023-01-24 04:26:51.872882: step: 322/463, loss: 0.00046149801346473396 2023-01-24 04:26:52.461927: step: 324/463, loss: 0.009151432663202286 2023-01-24 04:26:53.108976: step: 326/463, loss: 0.015597046352922916 2023-01-24 04:26:53.721878: step: 328/463, loss: 0.0012257576454430819 2023-01-24 04:26:54.354403: step: 330/463, loss: 0.0031902380287647247 2023-01-24 04:26:54.997790: step: 332/463, loss: 0.01941579394042492 2023-01-24 04:26:55.573224: step: 334/463, loss: 0.004373494535684586 2023-01-24 04:26:56.151017: step: 336/463, loss: 0.010517958551645279 2023-01-24 04:26:56.773416: step: 338/463, loss: 0.08891010284423828 2023-01-24 04:26:57.402305: step: 340/463, loss: 0.11529798805713654 2023-01-24 04:26:57.979730: step: 342/463, loss: 0.0021454598754644394 2023-01-24 04:26:58.605373: step: 344/463, loss: 0.008525926619768143 2023-01-24 04:26:59.166059: step: 346/463, loss: 0.0010154180927202106 2023-01-24 04:26:59.815402: step: 348/463, loss: 0.003982314839959145 2023-01-24 04:27:00.418369: step: 350/463, loss: 0.0034363928716629744 2023-01-24 04:27:01.069510: step: 352/463, loss: 0.03635721653699875 2023-01-24 04:27:01.762891: step: 354/463, loss: 0.003859040793031454 2023-01-24 04:27:02.355466: step: 356/463, loss: 0.010492019355297089 2023-01-24 04:27:02.986687: step: 358/463, loss: 0.04434703662991524 2023-01-24 04:27:03.688638: step: 360/463, loss: 0.020779302343726158 2023-01-24 04:27:04.344584: step: 362/463, loss: 0.051597096025943756 2023-01-24 04:27:04.938426: step: 364/463, loss: 0.0030227352399379015 2023-01-24 04:27:05.586575: step: 366/463, loss: 0.021089598536491394 2023-01-24 04:27:06.230529: step: 368/463, loss: 0.005464594345539808 2023-01-24 04:27:06.827760: step: 370/463, loss: 0.00038491084706038237 2023-01-24 04:27:07.444248: step: 372/463, loss: 0.010451785288751125 2023-01-24 04:27:08.032920: step: 374/463, loss: 0.003971833735704422 2023-01-24 04:27:08.686488: step: 376/463, loss: 0.4399808347225189 2023-01-24 04:27:09.347728: step: 378/463, loss: 0.050659067928791046 2023-01-24 04:27:09.985073: step: 380/463, loss: 0.002487249905243516 2023-01-24 04:27:10.685899: step: 382/463, loss: 0.009695771150290966 2023-01-24 04:27:11.318121: step: 384/463, loss: 0.0027448353357613087 2023-01-24 04:27:11.938668: step: 386/463, loss: 0.00022534250456374139 2023-01-24 04:27:12.548012: step: 388/463, loss: 0.012742413207888603 2023-01-24 04:27:13.173282: step: 390/463, loss: 0.008741947822272778 2023-01-24 04:27:13.786504: step: 392/463, loss: 0.015133303590118885 2023-01-24 04:27:14.452145: step: 394/463, loss: 0.008081257343292236 2023-01-24 04:27:15.016068: step: 396/463, loss: 0.003655626205727458 2023-01-24 04:27:15.629511: step: 398/463, loss: 0.026013191789388657 2023-01-24 04:27:16.253077: step: 400/463, loss: 0.004687536507844925 2023-01-24 04:27:16.789478: step: 402/463, loss: 5.243177656666376e-05 2023-01-24 04:27:17.423053: step: 404/463, loss: 0.02930634096264839 2023-01-24 04:27:17.995382: step: 406/463, loss: 0.0014166105538606644 2023-01-24 04:27:18.574330: step: 408/463, loss: 0.00021431539789773524 2023-01-24 04:27:19.218005: step: 410/463, loss: 0.020718924701213837 2023-01-24 04:27:19.816963: step: 412/463, loss: 0.008741088211536407 2023-01-24 04:27:20.389256: step: 414/463, loss: 0.006040649488568306 2023-01-24 04:27:21.014907: step: 416/463, loss: 0.019894791767001152 2023-01-24 04:27:21.627426: step: 418/463, loss: 0.02136288210749626 2023-01-24 04:27:22.210736: step: 420/463, loss: 0.0019151787273585796 2023-01-24 04:27:22.801839: step: 422/463, loss: 0.8710062503814697 2023-01-24 04:27:23.428282: step: 424/463, loss: 0.001510797068476677 2023-01-24 04:27:24.033940: step: 426/463, loss: 0.01244441233575344 2023-01-24 04:27:24.588638: step: 428/463, loss: 0.014736899174749851 2023-01-24 04:27:25.204353: step: 430/463, loss: 0.4316057860851288 2023-01-24 04:27:25.871781: step: 432/463, loss: 0.017364177852869034 2023-01-24 04:27:26.532881: step: 434/463, loss: 0.01485108956694603 2023-01-24 04:27:27.172324: step: 436/463, loss: 0.024831417948007584 2023-01-24 04:27:27.811742: step: 438/463, loss: 0.016303060576319695 2023-01-24 04:27:28.471375: step: 440/463, loss: 0.008644654415547848 2023-01-24 04:27:29.041319: step: 442/463, loss: 0.0037164457608014345 2023-01-24 04:27:29.727664: step: 444/463, loss: 0.09094803780317307 2023-01-24 04:27:30.335173: step: 446/463, loss: 0.00975746102631092 2023-01-24 04:27:31.006883: step: 448/463, loss: 0.4025951027870178 2023-01-24 04:27:31.677711: step: 450/463, loss: 0.030930671840906143 2023-01-24 04:27:32.201812: step: 452/463, loss: 0.01561846025288105 2023-01-24 04:27:32.778516: step: 454/463, loss: 0.0025900397449731827 2023-01-24 04:27:33.445605: step: 456/463, loss: 0.0030286521650850773 2023-01-24 04:27:34.070133: step: 458/463, loss: 0.00490938127040863 2023-01-24 04:27:34.679866: step: 460/463, loss: 0.01058991625905037 2023-01-24 04:27:35.311874: step: 462/463, loss: 0.01330417487770319 2023-01-24 04:27:35.915077: step: 464/463, loss: 0.0003906160709448159 2023-01-24 04:27:36.525132: step: 466/463, loss: 0.009563478641211987 2023-01-24 04:27:37.148776: step: 468/463, loss: 0.0009717753855511546 2023-01-24 04:27:37.735841: step: 470/463, loss: 0.03675663471221924 2023-01-24 04:27:38.299034: step: 472/463, loss: 0.00033287954283878207 2023-01-24 04:27:38.948114: step: 474/463, loss: 0.05320652946829796 2023-01-24 04:27:39.593772: step: 476/463, loss: 0.0005783308879472315 2023-01-24 04:27:40.341233: step: 478/463, loss: 0.04280523583292961 2023-01-24 04:27:40.912473: step: 480/463, loss: 0.03138202428817749 2023-01-24 04:27:41.507773: step: 482/463, loss: 0.016117680817842484 2023-01-24 04:27:42.118021: step: 484/463, loss: 0.006584015674889088 2023-01-24 04:27:42.688025: step: 486/463, loss: 0.010424034669995308 2023-01-24 04:27:43.312109: step: 488/463, loss: 0.025882141664624214 2023-01-24 04:27:44.000645: step: 490/463, loss: 0.0014137310208752751 2023-01-24 04:27:44.569294: step: 492/463, loss: 0.0005551911890506744 2023-01-24 04:27:45.213370: step: 494/463, loss: 0.2818014323711395 2023-01-24 04:27:45.828750: step: 496/463, loss: 0.017470164224505424 2023-01-24 04:27:46.398114: step: 498/463, loss: 0.00011574823292903602 2023-01-24 04:27:47.074558: step: 500/463, loss: 0.02045602723956108 2023-01-24 04:27:47.712678: step: 502/463, loss: 0.10712159425020218 2023-01-24 04:27:48.321370: step: 504/463, loss: 0.0013147150166332722 2023-01-24 04:27:48.915479: step: 506/463, loss: 0.00317944772541523 2023-01-24 04:27:49.561057: step: 508/463, loss: 0.5646162033081055 2023-01-24 04:27:50.193937: step: 510/463, loss: 0.0005679419846273959 2023-01-24 04:27:50.788890: step: 512/463, loss: 0.030714282765984535 2023-01-24 04:27:51.380571: step: 514/463, loss: 0.0008262402261607349 2023-01-24 04:27:51.993909: step: 516/463, loss: 0.014902494847774506 2023-01-24 04:27:52.644333: step: 518/463, loss: 0.02746141515672207 2023-01-24 04:27:53.333079: step: 520/463, loss: 0.016667431220412254 2023-01-24 04:27:53.887478: step: 522/463, loss: 0.022922292351722717 2023-01-24 04:27:54.508522: step: 524/463, loss: 0.00801930297166109 2023-01-24 04:27:55.099071: step: 526/463, loss: 0.01766378991305828 2023-01-24 04:27:55.682756: step: 528/463, loss: 0.0006419854471459985 2023-01-24 04:27:56.308773: step: 530/463, loss: 0.01546458899974823 2023-01-24 04:27:56.891992: step: 532/463, loss: 0.055628638714551926 2023-01-24 04:27:57.545623: step: 534/463, loss: 0.005957188084721565 2023-01-24 04:27:58.266110: step: 536/463, loss: 0.016784073784947395 2023-01-24 04:27:58.849656: step: 538/463, loss: 0.01718474179506302 2023-01-24 04:27:59.405473: step: 540/463, loss: 0.017915578559041023 2023-01-24 04:27:59.964899: step: 542/463, loss: 0.01347360759973526 2023-01-24 04:28:00.536535: step: 544/463, loss: 0.0026090466417372227 2023-01-24 04:28:01.112161: step: 546/463, loss: 0.0026845773681998253 2023-01-24 04:28:01.708152: step: 548/463, loss: 0.0263515617698431 2023-01-24 04:28:02.368735: step: 550/463, loss: 0.01133830938488245 2023-01-24 04:28:02.986820: step: 552/463, loss: 0.1092524453997612 2023-01-24 04:28:03.641731: step: 554/463, loss: 0.008198747411370277 2023-01-24 04:28:04.315732: step: 556/463, loss: 0.021788250654935837 2023-01-24 04:28:04.999266: step: 558/463, loss: 0.005576969124376774 2023-01-24 04:28:05.703205: step: 560/463, loss: 0.059285759925842285 2023-01-24 04:28:06.275721: step: 562/463, loss: 0.0007793564000166953 2023-01-24 04:28:06.841726: step: 564/463, loss: 0.0005645605851896107 2023-01-24 04:28:07.443344: step: 566/463, loss: 0.000992911751382053 2023-01-24 04:28:08.073265: step: 568/463, loss: 0.029286060482263565 2023-01-24 04:28:08.705123: step: 570/463, loss: 0.7687966823577881 2023-01-24 04:28:09.276316: step: 572/463, loss: 0.18781490623950958 2023-01-24 04:28:09.846894: step: 574/463, loss: 0.015316360630095005 2023-01-24 04:28:10.436893: step: 576/463, loss: 0.011777011677622795 2023-01-24 04:28:11.050557: step: 578/463, loss: 0.0014651017263531685 2023-01-24 04:28:11.669194: step: 580/463, loss: 0.013675212860107422 2023-01-24 04:28:12.376481: step: 582/463, loss: 0.007511957548558712 2023-01-24 04:28:12.988626: step: 584/463, loss: 0.001818681601434946 2023-01-24 04:28:13.608112: step: 586/463, loss: 0.008130278438329697 2023-01-24 04:28:14.252962: step: 588/463, loss: 0.0009175124578177929 2023-01-24 04:28:14.849521: step: 590/463, loss: 0.0013268294278532267 2023-01-24 04:28:15.539614: step: 592/463, loss: 0.005795582197606564 2023-01-24 04:28:16.157143: step: 594/463, loss: 0.2278042882680893 2023-01-24 04:28:16.756278: step: 596/463, loss: 0.3387654423713684 2023-01-24 04:28:17.294261: step: 598/463, loss: 0.012030359357595444 2023-01-24 04:28:17.882621: step: 600/463, loss: 0.04630205035209656 2023-01-24 04:28:18.556654: step: 602/463, loss: 0.004996767267584801 2023-01-24 04:28:19.140509: step: 604/463, loss: 0.023959588259458542 2023-01-24 04:28:19.730423: step: 606/463, loss: 0.020490165799856186 2023-01-24 04:28:20.330495: step: 608/463, loss: 0.05195914953947067 2023-01-24 04:28:20.964115: step: 610/463, loss: 0.0014299642061814666 2023-01-24 04:28:21.497083: step: 612/463, loss: 0.05177592486143112 2023-01-24 04:28:22.174350: step: 614/463, loss: 0.03818044066429138 2023-01-24 04:28:22.773383: step: 616/463, loss: 0.006384481210261583 2023-01-24 04:28:23.414173: step: 618/463, loss: 0.0002505085722077638 2023-01-24 04:28:23.980745: step: 620/463, loss: 0.0005267433007247746 2023-01-24 04:28:24.573355: step: 622/463, loss: 0.0007392748375423253 2023-01-24 04:28:25.188463: step: 624/463, loss: 0.01500691007822752 2023-01-24 04:28:25.789864: step: 626/463, loss: 0.0047962237149477005 2023-01-24 04:28:26.380832: step: 628/463, loss: 0.3398101031780243 2023-01-24 04:28:27.007686: step: 630/463, loss: 0.3707369863986969 2023-01-24 04:28:27.728351: step: 632/463, loss: 0.019323181360960007 2023-01-24 04:28:28.356261: step: 634/463, loss: 0.007944508455693722 2023-01-24 04:28:28.973140: step: 636/463, loss: 0.04026941582560539 2023-01-24 04:28:29.550475: step: 638/463, loss: 0.03118385188281536 2023-01-24 04:28:30.217524: step: 640/463, loss: 6.613253935938701e-05 2023-01-24 04:28:30.909694: step: 642/463, loss: 0.057899050414562225 2023-01-24 04:28:31.505749: step: 644/463, loss: 0.0043227397836744785 2023-01-24 04:28:32.042877: step: 646/463, loss: 0.023480139672756195 2023-01-24 04:28:32.613495: step: 648/463, loss: 0.00424191216006875 2023-01-24 04:28:33.239077: step: 650/463, loss: 0.005802365019917488 2023-01-24 04:28:34.020263: step: 652/463, loss: 0.0011234697885811329 2023-01-24 04:28:34.637399: step: 654/463, loss: 0.007755516562610865 2023-01-24 04:28:35.206151: step: 656/463, loss: 0.001991518074646592 2023-01-24 04:28:35.775326: step: 658/463, loss: 0.002021184889599681 2023-01-24 04:28:36.334029: step: 660/463, loss: 0.04739458113908768 2023-01-24 04:28:36.934194: step: 662/463, loss: 0.050317615270614624 2023-01-24 04:28:37.549954: step: 664/463, loss: 0.006748190149664879 2023-01-24 04:28:38.208440: step: 666/463, loss: 0.059826381504535675 2023-01-24 04:28:38.803825: step: 668/463, loss: 0.010084355250000954 2023-01-24 04:28:39.422803: step: 670/463, loss: 0.09050492942333221 2023-01-24 04:28:40.012316: step: 672/463, loss: 0.009593677707016468 2023-01-24 04:28:40.614774: step: 674/463, loss: 0.08869484066963196 2023-01-24 04:28:41.183718: step: 676/463, loss: 0.0049742525443434715 2023-01-24 04:28:41.804272: step: 678/463, loss: 0.0006836153916083276 2023-01-24 04:28:42.451045: step: 680/463, loss: 0.012122646905481815 2023-01-24 04:28:43.210723: step: 682/463, loss: 0.1786395162343979 2023-01-24 04:28:43.805022: step: 684/463, loss: 0.0017495764186605811 2023-01-24 04:28:44.397659: step: 686/463, loss: 0.004210320767015219 2023-01-24 04:28:44.973578: step: 688/463, loss: 0.00992894358932972 2023-01-24 04:28:45.525813: step: 690/463, loss: 0.004616623744368553 2023-01-24 04:28:46.173721: step: 692/463, loss: 0.005472216289490461 2023-01-24 04:28:46.783246: step: 694/463, loss: 0.004750858526676893 2023-01-24 04:28:47.331561: step: 696/463, loss: 0.0007360982708632946 2023-01-24 04:28:47.935727: step: 698/463, loss: 0.03710366040468216 2023-01-24 04:28:48.566297: step: 700/463, loss: 0.0401921384036541 2023-01-24 04:28:49.187101: step: 702/463, loss: 0.02024674043059349 2023-01-24 04:28:49.813249: step: 704/463, loss: 0.192637637257576 2023-01-24 04:28:50.390945: step: 706/463, loss: 0.0004425140214152634 2023-01-24 04:28:51.022993: step: 708/463, loss: 0.028471769765019417 2023-01-24 04:28:51.622282: step: 710/463, loss: 0.00462668901309371 2023-01-24 04:28:52.289606: step: 712/463, loss: 0.00042205440695397556 2023-01-24 04:28:52.904691: step: 714/463, loss: 0.01462631393224001 2023-01-24 04:28:53.531535: step: 716/463, loss: 0.09215617924928665 2023-01-24 04:28:54.103705: step: 718/463, loss: 0.1404639035463333 2023-01-24 04:28:54.699056: step: 720/463, loss: 0.0009139894973486662 2023-01-24 04:28:55.314715: step: 722/463, loss: 0.004680534824728966 2023-01-24 04:28:55.888370: step: 724/463, loss: 8.959847764344886e-05 2023-01-24 04:28:56.485089: step: 726/463, loss: 0.0074323187582194805 2023-01-24 04:28:57.058587: step: 728/463, loss: 0.005084065720438957 2023-01-24 04:28:57.621365: step: 730/463, loss: 0.00336533784866333 2023-01-24 04:28:58.266283: step: 732/463, loss: 0.018698927015066147 2023-01-24 04:28:58.864686: step: 734/463, loss: 0.007453972939401865 2023-01-24 04:28:59.495678: step: 736/463, loss: 8.966495443019085e-06 2023-01-24 04:29:00.129835: step: 738/463, loss: 0.1251787543296814 2023-01-24 04:29:00.798507: step: 740/463, loss: 0.0010850889375433326 2023-01-24 04:29:01.530909: step: 742/463, loss: 0.007543194107711315 2023-01-24 04:29:02.123747: step: 744/463, loss: 0.014797679148614407 2023-01-24 04:29:02.709991: step: 746/463, loss: 0.004885831847786903 2023-01-24 04:29:03.376056: step: 748/463, loss: 0.00171879050321877 2023-01-24 04:29:04.051184: step: 750/463, loss: 0.003688403172418475 2023-01-24 04:29:04.660971: step: 752/463, loss: 0.0009126869263127446 2023-01-24 04:29:05.267677: step: 754/463, loss: 0.0011168696219101548 2023-01-24 04:29:05.940851: step: 756/463, loss: 0.0016336004482582211 2023-01-24 04:29:06.576429: step: 758/463, loss: 0.014039484784007072 2023-01-24 04:29:07.195581: step: 760/463, loss: 0.012102670036256313 2023-01-24 04:29:07.832966: step: 762/463, loss: 0.0779075026512146 2023-01-24 04:29:08.479332: step: 764/463, loss: 0.014404848217964172 2023-01-24 04:29:09.077691: step: 766/463, loss: 0.013028395362198353 2023-01-24 04:29:09.652748: step: 768/463, loss: 0.003530989633873105 2023-01-24 04:29:10.331663: step: 770/463, loss: 0.02890629507601261 2023-01-24 04:29:10.927210: step: 772/463, loss: 0.010100237093865871 2023-01-24 04:29:11.591478: step: 774/463, loss: 0.04835496470332146 2023-01-24 04:29:12.233476: step: 776/463, loss: 0.1938502937555313 2023-01-24 04:29:12.881113: step: 778/463, loss: 0.017065370455384254 2023-01-24 04:29:13.565724: step: 780/463, loss: 0.052604153752326965 2023-01-24 04:29:14.179634: step: 782/463, loss: 0.0004957179771736264 2023-01-24 04:29:14.767556: step: 784/463, loss: 0.004018211271613836 2023-01-24 04:29:15.498421: step: 786/463, loss: 0.020835421979427338 2023-01-24 04:29:16.078299: step: 788/463, loss: 0.6701751947402954 2023-01-24 04:29:16.717517: step: 790/463, loss: 0.033327676355838776 2023-01-24 04:29:17.366453: step: 792/463, loss: 0.12329889088869095 2023-01-24 04:29:17.934444: step: 794/463, loss: 0.00904530193656683 2023-01-24 04:29:18.521004: step: 796/463, loss: 0.0008595373365096748 2023-01-24 04:29:19.209672: step: 798/463, loss: 0.049598634243011475 2023-01-24 04:29:19.884580: step: 800/463, loss: 0.012237580493092537 2023-01-24 04:29:20.522606: step: 802/463, loss: 0.04790181666612625 2023-01-24 04:29:21.224061: step: 804/463, loss: 0.013909575529396534 2023-01-24 04:29:21.803234: step: 806/463, loss: 0.03652876615524292 2023-01-24 04:29:22.398504: step: 808/463, loss: 0.0009854212403297424 2023-01-24 04:29:23.096590: step: 810/463, loss: 0.4963323175907135 2023-01-24 04:29:23.749180: step: 812/463, loss: 0.01650214195251465 2023-01-24 04:29:24.455155: step: 814/463, loss: 0.00038486087578348815 2023-01-24 04:29:25.002644: step: 816/463, loss: 0.023917051032185555 2023-01-24 04:29:25.601517: step: 818/463, loss: 0.0021937338169664145 2023-01-24 04:29:26.206268: step: 820/463, loss: 0.000810742552857846 2023-01-24 04:29:26.856186: step: 822/463, loss: 0.00015931669622659683 2023-01-24 04:29:27.479148: step: 824/463, loss: 0.019404729828238487 2023-01-24 04:29:28.067717: step: 826/463, loss: 0.009138391353189945 2023-01-24 04:29:28.757745: step: 828/463, loss: 6.217642658157274e-05 2023-01-24 04:29:29.347650: step: 830/463, loss: 13.489502906799316 2023-01-24 04:29:29.971175: step: 832/463, loss: 0.003037678077816963 2023-01-24 04:29:30.579903: step: 834/463, loss: 0.0023070438764989376 2023-01-24 04:29:31.182624: step: 836/463, loss: 0.02351263165473938 2023-01-24 04:29:31.805260: step: 838/463, loss: 0.00047117617214098573 2023-01-24 04:29:32.441454: step: 840/463, loss: 0.0038112776819616556 2023-01-24 04:29:33.059019: step: 842/463, loss: 0.008953064680099487 2023-01-24 04:29:33.728395: step: 844/463, loss: 0.006670886650681496 2023-01-24 04:29:34.305965: step: 846/463, loss: 0.00036838703090325 2023-01-24 04:29:34.913074: step: 848/463, loss: 0.007583236321806908 2023-01-24 04:29:35.583965: step: 850/463, loss: 0.00285124103538692 2023-01-24 04:29:36.218355: step: 852/463, loss: 0.08847746253013611 2023-01-24 04:29:36.872687: step: 854/463, loss: 0.00040643694228492677 2023-01-24 04:29:37.533082: step: 856/463, loss: 0.004814634099602699 2023-01-24 04:29:38.146230: step: 858/463, loss: 0.006215792149305344 2023-01-24 04:29:38.807457: step: 860/463, loss: 0.0004852747078984976 2023-01-24 04:29:39.393027: step: 862/463, loss: 0.007043166551738977 2023-01-24 04:29:40.028156: step: 864/463, loss: 0.009274091571569443 2023-01-24 04:29:40.654639: step: 866/463, loss: 0.0008911711047403514 2023-01-24 04:29:41.278228: step: 868/463, loss: 0.013141664676368237 2023-01-24 04:29:41.937861: step: 870/463, loss: 0.0004966504639014602 2023-01-24 04:29:42.590846: step: 872/463, loss: 0.0002911067276727408 2023-01-24 04:29:43.280141: step: 874/463, loss: 0.00825702678412199 2023-01-24 04:29:43.882745: step: 876/463, loss: 0.010459580458700657 2023-01-24 04:29:44.511113: step: 878/463, loss: 0.00437451247125864 2023-01-24 04:29:45.100850: step: 880/463, loss: 0.016428804025053978 2023-01-24 04:29:45.697948: step: 882/463, loss: 0.04766729101538658 2023-01-24 04:29:46.277590: step: 884/463, loss: 0.00655767135322094 2023-01-24 04:29:46.915533: step: 886/463, loss: 0.0003531308611854911 2023-01-24 04:29:47.485409: step: 888/463, loss: 0.008266502059996128 2023-01-24 04:29:48.122963: step: 890/463, loss: 0.015137778595089912 2023-01-24 04:29:48.708574: step: 892/463, loss: 0.005476124584674835 2023-01-24 04:29:49.264946: step: 894/463, loss: 0.00510898744687438 2023-01-24 04:29:49.844215: step: 896/463, loss: 0.049214065074920654 2023-01-24 04:29:50.418080: step: 898/463, loss: 0.0038208586629480124 2023-01-24 04:29:50.987197: step: 900/463, loss: 0.0021358139347285032 2023-01-24 04:29:51.537115: step: 902/463, loss: 0.005033013876527548 2023-01-24 04:29:52.159521: step: 904/463, loss: 0.016478123143315315 2023-01-24 04:29:52.790441: step: 906/463, loss: 0.00983423925936222 2023-01-24 04:29:53.348912: step: 908/463, loss: 0.0019199280068278313 2023-01-24 04:29:53.936470: step: 910/463, loss: 0.0015439112903550267 2023-01-24 04:29:54.488092: step: 912/463, loss: 0.0016192031325772405 2023-01-24 04:29:55.058380: step: 914/463, loss: 0.005190224852412939 2023-01-24 04:29:55.676254: step: 916/463, loss: 0.005277537740767002 2023-01-24 04:29:56.246608: step: 918/463, loss: 0.0022299408446997404 2023-01-24 04:29:56.854924: step: 920/463, loss: 0.0019721633289009333 2023-01-24 04:29:57.451707: step: 922/463, loss: 0.012343078851699829 2023-01-24 04:29:58.112761: step: 924/463, loss: 0.002723332028836012 2023-01-24 04:29:58.693692: step: 926/463, loss: 0.0010699160629883409 ================================================== Loss: 0.066 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.37211939617744333, 'r': 0.32622231695252146, 'f1': 0.34766261078661037}, 'combined': 0.2561724500532918, 'epoch': 36} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.36890685531440043, 'r': 0.39026818976734823, 'f1': 0.3792869946108753}, 'combined': 0.2939927900811569, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3459280279503106, 'r': 0.328204960104659, 'f1': 0.336833522833798}, 'combined': 0.24819312208806169, 'epoch': 36} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3507039926045664, 'r': 0.38809522711020034, 'f1': 0.3684534093332443}, 'combined': 0.2855954656075865, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3367781512342849, 'r': 0.31441148084870624, 'f1': 0.32521069756087967}, 'combined': 0.2396289350448587, 'epoch': 36} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35271312358412693, 'r': 0.3776753575142536, 'f1': 0.36476767774124086}, 'combined': 0.28273858274679914, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2899305555555556, 'r': 0.2982142857142857, 'f1': 0.29401408450704225}, 'combined': 0.1960093896713615, 'epoch': 36} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3148148148148148, 'r': 0.3695652173913043, 'f1': 0.34}, 'combined': 0.17, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 36} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34001213859732793, 'r': 0.32968918941790243, 'f1': 0.33477110370565427}, 'combined': 0.24667344483574524, 'epoch': 29} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34157155234650616, 'r': 0.36386160769264764, 'f1': 0.3523644229368942}, 'combined': 0.2731245766305113, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.20689655172413793, 'f1': 0.31578947368421056}, 'combined': 0.2105263157894737, 'epoch': 29} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:32:32.495871: step: 2/463, loss: 0.6587218046188354 2023-01-24 04:32:33.206475: step: 4/463, loss: 0.010776903480291367 2023-01-24 04:32:33.838074: step: 6/463, loss: 0.0005929653998464346 2023-01-24 04:32:34.383585: step: 8/463, loss: 0.0003558184253051877 2023-01-24 04:32:34.972561: step: 10/463, loss: 0.0017356309108436108 2023-01-24 04:32:35.589826: step: 12/463, loss: 0.0006220794748514891 2023-01-24 04:32:36.228953: step: 14/463, loss: 0.28144556283950806 2023-01-24 04:32:36.832547: step: 16/463, loss: 0.0003804799052886665 2023-01-24 04:32:37.461970: step: 18/463, loss: 0.00018494875985197723 2023-01-24 04:32:38.125475: step: 20/463, loss: 0.00023998554388526827 2023-01-24 04:32:38.744710: step: 22/463, loss: 0.01660904474556446 2023-01-24 04:32:39.412207: step: 24/463, loss: 0.05208080634474754 2023-01-24 04:32:40.013780: step: 26/463, loss: 0.008441172540187836 2023-01-24 04:32:40.644104: step: 28/463, loss: 0.012856683693826199 2023-01-24 04:32:41.267157: step: 30/463, loss: 0.06990411132574081 2023-01-24 04:32:41.889730: step: 32/463, loss: 0.0006049636285752058 2023-01-24 04:32:42.446974: step: 34/463, loss: 0.001130929565988481 2023-01-24 04:32:43.119562: step: 36/463, loss: 0.03293420374393463 2023-01-24 04:32:43.703411: step: 38/463, loss: 0.00443666847422719 2023-01-24 04:32:44.299987: step: 40/463, loss: 0.07706819474697113 2023-01-24 04:32:44.837643: step: 42/463, loss: 0.0007607361185364425 2023-01-24 04:32:45.417044: step: 44/463, loss: 0.01823762059211731 2023-01-24 04:32:46.055175: step: 46/463, loss: 0.01940738968551159 2023-01-24 04:32:46.638827: step: 48/463, loss: 0.00553401792421937 2023-01-24 04:32:47.228078: step: 50/463, loss: 0.2633526623249054 2023-01-24 04:32:47.851656: step: 52/463, loss: 0.003037841757759452 2023-01-24 04:32:48.430038: step: 54/463, loss: 0.0010397820733487606 2023-01-24 04:32:49.107144: step: 56/463, loss: 0.018858512863516808 2023-01-24 04:32:49.903935: step: 58/463, loss: 0.001477002282626927 2023-01-24 04:32:50.484521: step: 60/463, loss: 0.030347801744937897 2023-01-24 04:32:51.090924: step: 62/463, loss: 0.7860462665557861 2023-01-24 04:32:51.717402: step: 64/463, loss: 0.0010909750126302242 2023-01-24 04:32:52.266510: step: 66/463, loss: 6.107752415118739e-05 2023-01-24 04:32:52.934023: step: 68/463, loss: 0.2685215175151825 2023-01-24 04:32:53.574682: step: 70/463, loss: 0.009650515392422676 2023-01-24 04:32:54.182788: step: 72/463, loss: 0.002894675126299262 2023-01-24 04:32:54.761016: step: 74/463, loss: 0.017059307545423508 2023-01-24 04:32:55.375693: step: 76/463, loss: 0.0068626548163592815 2023-01-24 04:32:55.975615: step: 78/463, loss: 0.0013277180260047317 2023-01-24 04:32:56.601005: step: 80/463, loss: 0.0013721180148422718 2023-01-24 04:32:57.196410: step: 82/463, loss: 0.008821958675980568 2023-01-24 04:32:57.787129: step: 84/463, loss: 0.020003188401460648 2023-01-24 04:32:58.352505: step: 86/463, loss: 0.0007712449296377599 2023-01-24 04:32:58.964439: step: 88/463, loss: 5.655456334352493e-06 2023-01-24 04:32:59.592632: step: 90/463, loss: 0.018775301054120064 2023-01-24 04:33:00.225072: step: 92/463, loss: 0.0014276603469625115 2023-01-24 04:33:00.789725: step: 94/463, loss: 0.00956269446760416 2023-01-24 04:33:01.465823: step: 96/463, loss: 0.002346278168261051 2023-01-24 04:33:02.118261: step: 98/463, loss: 0.0004997443757019937 2023-01-24 04:33:02.679305: step: 100/463, loss: 2.352398951188661e-05 2023-01-24 04:33:03.280556: step: 102/463, loss: 0.026509350165724754 2023-01-24 04:33:03.914293: step: 104/463, loss: 0.00048460852121934295 2023-01-24 04:33:04.501928: step: 106/463, loss: 0.0020447096321731806 2023-01-24 04:33:05.105456: step: 108/463, loss: 0.042406003922224045 2023-01-24 04:33:05.651231: step: 110/463, loss: 0.0007937455666251481 2023-01-24 04:33:06.272252: step: 112/463, loss: 0.00023135694209486246 2023-01-24 04:33:06.860636: step: 114/463, loss: 0.0032308471854776144 2023-01-24 04:33:07.431345: step: 116/463, loss: 0.013062181882560253 2023-01-24 04:33:08.003677: step: 118/463, loss: 0.003929843660444021 2023-01-24 04:33:08.558467: step: 120/463, loss: 8.747879473958164e-05 2023-01-24 04:33:09.189140: step: 122/463, loss: 0.0021208536345511675 2023-01-24 04:33:09.769632: step: 124/463, loss: 0.004104707855731249 2023-01-24 04:33:10.325166: step: 126/463, loss: 0.000277163548162207 2023-01-24 04:33:10.922549: step: 128/463, loss: 0.05952223017811775 2023-01-24 04:33:11.525505: step: 130/463, loss: 0.008136567659676075 2023-01-24 04:33:12.143536: step: 132/463, loss: 2.4742646928643808e-05 2023-01-24 04:33:12.796658: step: 134/463, loss: 0.02547604590654373 2023-01-24 04:33:13.468868: step: 136/463, loss: 0.018019111827015877 2023-01-24 04:33:14.144484: step: 138/463, loss: 0.0668676421046257 2023-01-24 04:33:14.731516: step: 140/463, loss: 0.00028247645241208375 2023-01-24 04:33:15.329361: step: 142/463, loss: 0.0031307446770370007 2023-01-24 04:33:15.889151: step: 144/463, loss: 0.011265382170677185 2023-01-24 04:33:16.483029: step: 146/463, loss: 0.00040918265585787594 2023-01-24 04:33:17.163620: step: 148/463, loss: 0.09168965369462967 2023-01-24 04:33:17.786140: step: 150/463, loss: 0.015680095180869102 2023-01-24 04:33:18.551702: step: 152/463, loss: 0.00029443143284879625 2023-01-24 04:33:19.216547: step: 154/463, loss: 0.0056647262535989285 2023-01-24 04:33:19.891270: step: 156/463, loss: 0.0007648394675925374 2023-01-24 04:33:20.495242: step: 158/463, loss: 0.011920605786144733 2023-01-24 04:33:21.140275: step: 160/463, loss: 0.173386812210083 2023-01-24 04:33:21.729719: step: 162/463, loss: 3.1526673410553485e-05 2023-01-24 04:33:22.325504: step: 164/463, loss: 0.014528918080031872 2023-01-24 04:33:22.920883: step: 166/463, loss: 0.011221883818507195 2023-01-24 04:33:23.520366: step: 168/463, loss: 0.00778588792309165 2023-01-24 04:33:24.111502: step: 170/463, loss: 0.002198287984356284 2023-01-24 04:33:24.669587: step: 172/463, loss: 0.024640100076794624 2023-01-24 04:33:25.358006: step: 174/463, loss: 0.002938736928626895 2023-01-24 04:33:25.956697: step: 176/463, loss: 0.0010286591714248061 2023-01-24 04:33:26.634077: step: 178/463, loss: 0.03232716768980026 2023-01-24 04:33:27.200384: step: 180/463, loss: 0.0015578961465507746 2023-01-24 04:33:27.863277: step: 182/463, loss: 0.0005568155902437866 2023-01-24 04:33:28.455764: step: 184/463, loss: 0.00010768469655886292 2023-01-24 04:33:29.071255: step: 186/463, loss: 0.014435186982154846 2023-01-24 04:33:29.642573: step: 188/463, loss: 0.013997500762343407 2023-01-24 04:33:30.314532: step: 190/463, loss: 0.01962263137102127 2023-01-24 04:33:30.943233: step: 192/463, loss: 0.0076688127592206 2023-01-24 04:33:31.522787: step: 194/463, loss: 0.00011455887579359114 2023-01-24 04:33:32.117076: step: 196/463, loss: 0.00025081579224206507 2023-01-24 04:33:32.720446: step: 198/463, loss: 0.0312761627137661 2023-01-24 04:33:33.297248: step: 200/463, loss: 0.0003015534020960331 2023-01-24 04:33:33.914801: step: 202/463, loss: 0.0012189560802653432 2023-01-24 04:33:34.490837: step: 204/463, loss: 0.00765312509611249 2023-01-24 04:33:35.132680: step: 206/463, loss: 0.015591097064316273 2023-01-24 04:33:35.740076: step: 208/463, loss: 0.0079575777053833 2023-01-24 04:33:36.318156: step: 210/463, loss: 0.0045778690837323666 2023-01-24 04:33:36.852200: step: 212/463, loss: 0.01078322809189558 2023-01-24 04:33:37.461469: step: 214/463, loss: 0.04701203107833862 2023-01-24 04:33:38.095620: step: 216/463, loss: 0.007716403342783451 2023-01-24 04:33:38.734966: step: 218/463, loss: 0.0035424574743956327 2023-01-24 04:33:39.354433: step: 220/463, loss: 3.8686380321451e-06 2023-01-24 04:33:39.984237: step: 222/463, loss: 0.001566283404827118 2023-01-24 04:33:40.662515: step: 224/463, loss: 0.030678650364279747 2023-01-24 04:33:41.273911: step: 226/463, loss: 0.08632715046405792 2023-01-24 04:33:41.926317: step: 228/463, loss: 0.050980955362319946 2023-01-24 04:33:42.550040: step: 230/463, loss: 0.009675303474068642 2023-01-24 04:33:43.150209: step: 232/463, loss: 0.0043572308495640755 2023-01-24 04:33:43.751556: step: 234/463, loss: 0.0005882259574718773 2023-01-24 04:33:44.426121: step: 236/463, loss: 0.002075779251754284 2023-01-24 04:33:45.058497: step: 238/463, loss: 0.0683252215385437 2023-01-24 04:33:45.668043: step: 240/463, loss: 0.0024023137520998716 2023-01-24 04:33:46.242241: step: 242/463, loss: 0.09539209306240082 2023-01-24 04:33:46.798799: step: 244/463, loss: 0.000319745420711115 2023-01-24 04:33:47.491659: step: 246/463, loss: 0.0054846820421516895 2023-01-24 04:33:48.098970: step: 248/463, loss: 0.0013603162951767445 2023-01-24 04:33:48.773391: step: 250/463, loss: 0.027929656207561493 2023-01-24 04:33:49.346263: step: 252/463, loss: 0.000322263891575858 2023-01-24 04:33:49.960674: step: 254/463, loss: 0.014581589959561825 2023-01-24 04:33:50.551250: step: 256/463, loss: 0.03552702069282532 2023-01-24 04:33:51.147002: step: 258/463, loss: 0.040786027908325195 2023-01-24 04:33:51.737836: step: 260/463, loss: 0.0446326918900013 2023-01-24 04:33:52.271188: step: 262/463, loss: 0.0399804525077343 2023-01-24 04:33:52.901229: step: 264/463, loss: 0.004975477699190378 2023-01-24 04:33:53.524647: step: 266/463, loss: 0.012718992307782173 2023-01-24 04:33:54.083738: step: 268/463, loss: 0.004722801968455315 2023-01-24 04:33:54.682459: step: 270/463, loss: 0.0017666302155703306 2023-01-24 04:33:55.220738: step: 272/463, loss: 0.013465424999594688 2023-01-24 04:33:55.809486: step: 274/463, loss: 0.0062596979551017284 2023-01-24 04:33:56.508153: step: 276/463, loss: 0.002826994052156806 2023-01-24 04:33:57.203359: step: 278/463, loss: 0.01050316821783781 2023-01-24 04:33:57.831524: step: 280/463, loss: 0.013287696056067944 2023-01-24 04:33:58.453977: step: 282/463, loss: 6.352769560180604e-05 2023-01-24 04:33:59.025529: step: 284/463, loss: 0.0016838241135701537 2023-01-24 04:33:59.588116: step: 286/463, loss: 0.05246245115995407 2023-01-24 04:34:00.204219: step: 288/463, loss: 0.0007788481889292598 2023-01-24 04:34:00.835599: step: 290/463, loss: 0.0002616256824694574 2023-01-24 04:34:01.355374: step: 292/463, loss: 0.004513297230005264 2023-01-24 04:34:01.983955: step: 294/463, loss: 0.0008384017855860293 2023-01-24 04:34:02.620189: step: 296/463, loss: 0.00442774873226881 2023-01-24 04:34:03.294693: step: 298/463, loss: 0.22174061834812164 2023-01-24 04:34:03.961998: step: 300/463, loss: 0.014089682139456272 2023-01-24 04:34:04.497927: step: 302/463, loss: 0.011865006759762764 2023-01-24 04:34:05.103307: step: 304/463, loss: 0.007160308305174112 2023-01-24 04:34:05.705718: step: 306/463, loss: 2.191199779510498 2023-01-24 04:34:06.316945: step: 308/463, loss: 0.019760850816965103 2023-01-24 04:34:06.929815: step: 310/463, loss: 0.05446013808250427 2023-01-24 04:34:07.546895: step: 312/463, loss: 0.0038349914830178022 2023-01-24 04:34:08.197626: step: 314/463, loss: 0.015616307035088539 2023-01-24 04:34:08.784959: step: 316/463, loss: 0.06799617409706116 2023-01-24 04:34:09.387625: step: 318/463, loss: 0.005231272894889116 2023-01-24 04:34:09.994092: step: 320/463, loss: 0.0014700135216116905 2023-01-24 04:34:10.588289: step: 322/463, loss: 0.004051823168992996 2023-01-24 04:34:11.198094: step: 324/463, loss: 0.0331818126142025 2023-01-24 04:34:11.791019: step: 326/463, loss: 0.0007295148097909987 2023-01-24 04:34:12.290338: step: 328/463, loss: 0.00043950843974016607 2023-01-24 04:34:12.900532: step: 330/463, loss: 0.007079733535647392 2023-01-24 04:34:13.486032: step: 332/463, loss: 0.006048364564776421 2023-01-24 04:34:14.098399: step: 334/463, loss: 0.04785123094916344 2023-01-24 04:34:14.753733: step: 336/463, loss: 0.04843372851610184 2023-01-24 04:34:15.551953: step: 338/463, loss: 0.00014510856999550015 2023-01-24 04:34:16.217023: step: 340/463, loss: 0.0001122959511121735 2023-01-24 04:34:16.799981: step: 342/463, loss: 0.031640488654375076 2023-01-24 04:34:17.414498: step: 344/463, loss: 0.025138873606920242 2023-01-24 04:34:17.953760: step: 346/463, loss: 0.0037102666683495045 2023-01-24 04:34:18.560581: step: 348/463, loss: 0.03473350405693054 2023-01-24 04:34:19.173386: step: 350/463, loss: 0.0023750332184135914 2023-01-24 04:34:19.760543: step: 352/463, loss: 0.0017465592827647924 2023-01-24 04:34:20.360667: step: 354/463, loss: 0.00996798649430275 2023-01-24 04:34:21.054623: step: 356/463, loss: 0.7772992849349976 2023-01-24 04:34:21.627719: step: 358/463, loss: 0.001652126433327794 2023-01-24 04:34:22.357676: step: 360/463, loss: 0.004635266028344631 2023-01-24 04:34:22.898307: step: 362/463, loss: 0.0014862800016999245 2023-01-24 04:34:23.487981: step: 364/463, loss: 0.00030541254091076553 2023-01-24 04:34:24.094110: step: 366/463, loss: 0.001901472918689251 2023-01-24 04:34:24.652158: step: 368/463, loss: 0.004664566367864609 2023-01-24 04:34:25.265616: step: 370/463, loss: 0.0002563406014814973 2023-01-24 04:34:25.810540: step: 372/463, loss: 3.374091102159582e-05 2023-01-24 04:34:26.439500: step: 374/463, loss: 0.007033093832433224 2023-01-24 04:34:27.007000: step: 376/463, loss: 0.0037258719094097614 2023-01-24 04:34:27.649362: step: 378/463, loss: 0.002449818653985858 2023-01-24 04:34:28.270102: step: 380/463, loss: 0.010926024988293648 2023-01-24 04:34:28.911019: step: 382/463, loss: 0.0014149992493912578 2023-01-24 04:34:29.512249: step: 384/463, loss: 0.06429838389158249 2023-01-24 04:34:30.129375: step: 386/463, loss: 0.00046093412674963474 2023-01-24 04:34:30.765295: step: 388/463, loss: 0.0019565694965422153 2023-01-24 04:34:31.258375: step: 390/463, loss: 0.0019051478011533618 2023-01-24 04:34:31.853959: step: 392/463, loss: 0.008902883157134056 2023-01-24 04:34:32.470479: step: 394/463, loss: 0.006219537928700447 2023-01-24 04:34:33.113067: step: 396/463, loss: 0.10888869315385818 2023-01-24 04:34:33.715557: step: 398/463, loss: 0.016297919675707817 2023-01-24 04:34:34.334166: step: 400/463, loss: 0.008376400917768478 2023-01-24 04:34:35.000197: step: 402/463, loss: 0.007794746197760105 2023-01-24 04:34:35.643709: step: 404/463, loss: 0.015515094622969627 2023-01-24 04:34:36.226022: step: 406/463, loss: 0.0009189951815642416 2023-01-24 04:34:36.888008: step: 408/463, loss: 0.021026665344834328 2023-01-24 04:34:37.431770: step: 410/463, loss: 9.61375844781287e-05 2023-01-24 04:34:38.097488: step: 412/463, loss: 0.012185735628008842 2023-01-24 04:34:38.681292: step: 414/463, loss: 0.06111254170536995 2023-01-24 04:34:39.315572: step: 416/463, loss: 0.0015753593761473894 2023-01-24 04:34:39.900295: step: 418/463, loss: 0.00046376415411941707 2023-01-24 04:34:40.494406: step: 420/463, loss: 0.004102764185518026 2023-01-24 04:34:41.091907: step: 422/463, loss: 0.0336264967918396 2023-01-24 04:34:41.751116: step: 424/463, loss: 0.006779925897717476 2023-01-24 04:34:42.376338: step: 426/463, loss: 0.0041001299396157265 2023-01-24 04:34:43.074662: step: 428/463, loss: 0.0030108659993857145 2023-01-24 04:34:43.717506: step: 430/463, loss: 0.08740600198507309 2023-01-24 04:34:44.335910: step: 432/463, loss: 0.014774695038795471 2023-01-24 04:34:44.963977: step: 434/463, loss: 0.006550933234393597 2023-01-24 04:34:45.635187: step: 436/463, loss: 0.05794130265712738 2023-01-24 04:34:46.232735: step: 438/463, loss: 6.887897325213999e-05 2023-01-24 04:34:46.810125: step: 440/463, loss: 0.01628354750573635 2023-01-24 04:34:47.446421: step: 442/463, loss: 0.07845579087734222 2023-01-24 04:34:48.151192: step: 444/463, loss: 1.5991721738828346e-05 2023-01-24 04:34:48.755563: step: 446/463, loss: 1.4149316484690644e-05 2023-01-24 04:34:49.332689: step: 448/463, loss: 0.009916380047798157 2023-01-24 04:34:49.895325: step: 450/463, loss: 0.005335419438779354 2023-01-24 04:34:50.491945: step: 452/463, loss: 0.00026734109269455075 2023-01-24 04:34:51.035915: step: 454/463, loss: 0.016516294330358505 2023-01-24 04:34:51.608007: step: 456/463, loss: 0.0015434387605637312 2023-01-24 04:34:52.241118: step: 458/463, loss: 0.01522710919380188 2023-01-24 04:34:52.861248: step: 460/463, loss: 0.00010683065193006769 2023-01-24 04:34:53.448625: step: 462/463, loss: 0.0008388441638089716 2023-01-24 04:34:54.070447: step: 464/463, loss: 7.75800144765526e-05 2023-01-24 04:34:54.637897: step: 466/463, loss: 0.146086648106575 2023-01-24 04:34:55.204634: step: 468/463, loss: 0.0059030367992818356 2023-01-24 04:34:55.856279: step: 470/463, loss: 0.00972730852663517 2023-01-24 04:34:56.446590: step: 472/463, loss: 0.031643953174352646 2023-01-24 04:34:57.094227: step: 474/463, loss: 0.0016331018414348364 2023-01-24 04:34:57.715702: step: 476/463, loss: 0.0006400145939551294 2023-01-24 04:34:58.331936: step: 478/463, loss: 0.00296087097376585 2023-01-24 04:34:58.931684: step: 480/463, loss: 0.004904331639409065 2023-01-24 04:34:59.556048: step: 482/463, loss: 0.12728621065616608 2023-01-24 04:35:00.128261: step: 484/463, loss: 1.8096621715812944e-05 2023-01-24 04:35:00.829853: step: 486/463, loss: 0.0012359794927760959 2023-01-24 04:35:01.434633: step: 488/463, loss: 0.0007239349070005119 2023-01-24 04:35:02.047387: step: 490/463, loss: 0.006451805587857962 2023-01-24 04:35:02.614749: step: 492/463, loss: 0.00978158786892891 2023-01-24 04:35:03.231445: step: 494/463, loss: 0.049171071499586105 2023-01-24 04:35:03.776413: step: 496/463, loss: 0.01482637319713831 2023-01-24 04:35:04.373003: step: 498/463, loss: 0.014235462993383408 2023-01-24 04:35:05.000084: step: 500/463, loss: 0.03949636220932007 2023-01-24 04:35:05.589677: step: 502/463, loss: 0.00029810681007802486 2023-01-24 04:35:06.234336: step: 504/463, loss: 0.08927298337221146 2023-01-24 04:35:06.830861: step: 506/463, loss: 0.0010077450424432755 2023-01-24 04:35:07.433988: step: 508/463, loss: 0.0011717199813574553 2023-01-24 04:35:08.074444: step: 510/463, loss: 0.002724254271015525 2023-01-24 04:35:08.727954: step: 512/463, loss: 0.00045990533544681966 2023-01-24 04:35:09.416458: step: 514/463, loss: 0.007768549956381321 2023-01-24 04:35:10.055194: step: 516/463, loss: 0.0007777441060170531 2023-01-24 04:35:10.685230: step: 518/463, loss: 0.025105273351073265 2023-01-24 04:35:11.328863: step: 520/463, loss: 2.189960468967911e-05 2023-01-24 04:35:11.922410: step: 522/463, loss: 0.0006834982195869088 2023-01-24 04:35:12.517661: step: 524/463, loss: 0.08271333575248718 2023-01-24 04:35:13.181270: step: 526/463, loss: 0.021871773526072502 2023-01-24 04:35:13.777532: step: 528/463, loss: 0.0001770402886904776 2023-01-24 04:35:14.379315: step: 530/463, loss: 1.3111066436977126e-05 2023-01-24 04:35:15.054586: step: 532/463, loss: 0.3096505105495453 2023-01-24 04:35:15.689722: step: 534/463, loss: 0.0037230215966701508 2023-01-24 04:35:16.350845: step: 536/463, loss: 0.0009449947974644601 2023-01-24 04:35:16.990880: step: 538/463, loss: 0.0023832169827073812 2023-01-24 04:35:17.622685: step: 540/463, loss: 0.008003924041986465 2023-01-24 04:35:18.208122: step: 542/463, loss: 0.00026716661523096263 2023-01-24 04:35:18.868932: step: 544/463, loss: 0.025845911353826523 2023-01-24 04:35:19.478593: step: 546/463, loss: 0.005311736837029457 2023-01-24 04:35:20.080123: step: 548/463, loss: 0.006617727689445019 2023-01-24 04:35:20.682863: step: 550/463, loss: 0.015924856066703796 2023-01-24 04:35:21.303923: step: 552/463, loss: 0.00019956158939749002 2023-01-24 04:35:21.922138: step: 554/463, loss: 0.007889360189437866 2023-01-24 04:35:22.551643: step: 556/463, loss: 0.0014797173207625747 2023-01-24 04:35:23.248496: step: 558/463, loss: 0.0015090981032699347 2023-01-24 04:35:23.903670: step: 560/463, loss: 0.00020871504966635257 2023-01-24 04:35:24.529157: step: 562/463, loss: 0.007421551272273064 2023-01-24 04:35:25.110197: step: 564/463, loss: 0.01180185191333294 2023-01-24 04:35:25.774935: step: 566/463, loss: 0.0035408809781074524 2023-01-24 04:35:26.405520: step: 568/463, loss: 0.00014776161697227508 2023-01-24 04:35:26.975064: step: 570/463, loss: 0.013659101910889149 2023-01-24 04:35:27.540322: step: 572/463, loss: 0.024044468998908997 2023-01-24 04:35:28.197158: step: 574/463, loss: 0.0314486064016819 2023-01-24 04:35:28.762789: step: 576/463, loss: 0.0028053412679582834 2023-01-24 04:35:29.363597: step: 578/463, loss: 0.0020871474407613277 2023-01-24 04:35:30.000681: step: 580/463, loss: 0.005823214538395405 2023-01-24 04:35:30.578086: step: 582/463, loss: 0.011518976651132107 2023-01-24 04:35:31.192843: step: 584/463, loss: 0.016073133796453476 2023-01-24 04:35:31.743714: step: 586/463, loss: 0.0047295973636209965 2023-01-24 04:35:32.349987: step: 588/463, loss: 0.005530184600502253 2023-01-24 04:35:32.947996: step: 590/463, loss: 0.004026345908641815 2023-01-24 04:35:33.566432: step: 592/463, loss: 0.013685299083590508 2023-01-24 04:35:34.184188: step: 594/463, loss: 0.015032644383609295 2023-01-24 04:35:34.792817: step: 596/463, loss: 0.0006871359655633569 2023-01-24 04:35:35.491326: step: 598/463, loss: 0.007380248513072729 2023-01-24 04:35:36.080334: step: 600/463, loss: 0.0011174535611644387 2023-01-24 04:35:36.667632: step: 602/463, loss: 0.001170499948784709 2023-01-24 04:35:37.245263: step: 604/463, loss: 0.014355262741446495 2023-01-24 04:35:37.887373: step: 606/463, loss: 0.02323991246521473 2023-01-24 04:35:38.476095: step: 608/463, loss: 7.946568075567484e-06 2023-01-24 04:35:39.097505: step: 610/463, loss: 0.07203865796327591 2023-01-24 04:35:39.706142: step: 612/463, loss: 0.007512159179896116 2023-01-24 04:35:40.349430: step: 614/463, loss: 0.002797973807901144 2023-01-24 04:35:40.973013: step: 616/463, loss: 0.0006113756680861115 2023-01-24 04:35:41.641700: step: 618/463, loss: 0.02156316675245762 2023-01-24 04:35:42.254122: step: 620/463, loss: 0.02305697463452816 2023-01-24 04:35:42.874458: step: 622/463, loss: 0.015090367756783962 2023-01-24 04:35:43.511509: step: 624/463, loss: 2.848814256140031e-05 2023-01-24 04:35:44.113839: step: 626/463, loss: 0.009909285232424736 2023-01-24 04:35:44.760635: step: 628/463, loss: 0.0017538886750116944 2023-01-24 04:35:45.372639: step: 630/463, loss: 0.04216102510690689 2023-01-24 04:35:46.045821: step: 632/463, loss: 0.008701596409082413 2023-01-24 04:35:46.644959: step: 634/463, loss: 0.023315465077757835 2023-01-24 04:35:47.240126: step: 636/463, loss: 0.0005711687263101339 2023-01-24 04:35:47.830349: step: 638/463, loss: 0.0016999803483486176 2023-01-24 04:35:48.471919: step: 640/463, loss: 0.008287470787763596 2023-01-24 04:35:49.124909: step: 642/463, loss: 0.0002698024036362767 2023-01-24 04:35:49.735011: step: 644/463, loss: 0.010481531731784344 2023-01-24 04:35:50.304026: step: 646/463, loss: 0.03454779461026192 2023-01-24 04:35:50.888740: step: 648/463, loss: 0.016674399375915527 2023-01-24 04:35:51.497484: step: 650/463, loss: 0.000723723613191396 2023-01-24 04:35:52.112132: step: 652/463, loss: 0.008157163858413696 2023-01-24 04:35:52.723708: step: 654/463, loss: 0.014090480282902718 2023-01-24 04:35:53.310306: step: 656/463, loss: 0.019488925114274025 2023-01-24 04:35:53.970118: step: 658/463, loss: 0.019174255430698395 2023-01-24 04:35:54.617953: step: 660/463, loss: 0.010555608198046684 2023-01-24 04:35:55.243421: step: 662/463, loss: 0.0009650069405324757 2023-01-24 04:35:55.836348: step: 664/463, loss: 0.002380374586209655 2023-01-24 04:35:56.428070: step: 666/463, loss: 0.03403887525200844 2023-01-24 04:35:57.064456: step: 668/463, loss: 0.011219196021556854 2023-01-24 04:35:57.691313: step: 670/463, loss: 0.017417466267943382 2023-01-24 04:35:58.355266: step: 672/463, loss: 2.057253550447058e-05 2023-01-24 04:35:59.010161: step: 674/463, loss: 0.06781242787837982 2023-01-24 04:35:59.631065: step: 676/463, loss: 0.028085824102163315 2023-01-24 04:36:00.280343: step: 678/463, loss: 0.002456174697726965 2023-01-24 04:36:00.822281: step: 680/463, loss: 0.0015936418203637004 2023-01-24 04:36:01.434186: step: 682/463, loss: 0.0032691669184714556 2023-01-24 04:36:02.137589: step: 684/463, loss: 0.23696757853031158 2023-01-24 04:36:02.802542: step: 686/463, loss: 0.019505681470036507 2023-01-24 04:36:03.430793: step: 688/463, loss: 0.0034016871359199286 2023-01-24 04:36:04.026846: step: 690/463, loss: 0.024251684546470642 2023-01-24 04:36:04.647322: step: 692/463, loss: 0.08296268433332443 2023-01-24 04:36:05.235692: step: 694/463, loss: 0.0063774497248232365 2023-01-24 04:36:05.880863: step: 696/463, loss: 0.0012224129168316722 2023-01-24 04:36:06.453924: step: 698/463, loss: 0.016882825642824173 2023-01-24 04:36:07.154380: step: 700/463, loss: 0.03692098334431648 2023-01-24 04:36:07.765828: step: 702/463, loss: 0.0530284084379673 2023-01-24 04:36:08.357170: step: 704/463, loss: 0.0035500871017575264 2023-01-24 04:36:08.964166: step: 706/463, loss: 0.0025282411370426416 2023-01-24 04:36:09.518339: step: 708/463, loss: 0.0020579856354743242 2023-01-24 04:36:10.111867: step: 710/463, loss: 0.015920987352728844 2023-01-24 04:36:10.720206: step: 712/463, loss: 0.014441859908401966 2023-01-24 04:36:11.503870: step: 714/463, loss: 0.006769892759621143 2023-01-24 04:36:12.094834: step: 716/463, loss: 0.043161630630493164 2023-01-24 04:36:12.677059: step: 718/463, loss: 0.001978771761059761 2023-01-24 04:36:13.320887: step: 720/463, loss: 0.02008512243628502 2023-01-24 04:36:13.969643: step: 722/463, loss: 0.009537681937217712 2023-01-24 04:36:14.586712: step: 724/463, loss: 0.0003638060879893601 2023-01-24 04:36:15.239846: step: 726/463, loss: 0.005339528433978558 2023-01-24 04:36:15.796070: step: 728/463, loss: 0.003388767596334219 2023-01-24 04:36:16.370937: step: 730/463, loss: 0.0038376867305487394 2023-01-24 04:36:16.972870: step: 732/463, loss: 0.06787408888339996 2023-01-24 04:36:17.571721: step: 734/463, loss: 0.010417459532618523 2023-01-24 04:36:18.189815: step: 736/463, loss: 0.01652175560593605 2023-01-24 04:36:18.699728: step: 738/463, loss: 0.005788207985460758 2023-01-24 04:36:19.274855: step: 740/463, loss: 0.0028158684726804495 2023-01-24 04:36:19.893716: step: 742/463, loss: 0.059458520263433456 2023-01-24 04:36:20.490212: step: 744/463, loss: 5.446530394692672e-06 2023-01-24 04:36:21.103389: step: 746/463, loss: 0.03516753390431404 2023-01-24 04:36:21.728678: step: 748/463, loss: 0.013525101356208324 2023-01-24 04:36:22.392350: step: 750/463, loss: 0.06270499527454376 2023-01-24 04:36:23.051818: step: 752/463, loss: 0.00106957764364779 2023-01-24 04:36:23.614677: step: 754/463, loss: 0.04263201355934143 2023-01-24 04:36:24.239405: step: 756/463, loss: 0.03356302157044411 2023-01-24 04:36:24.877963: step: 758/463, loss: 0.012440657243132591 2023-01-24 04:36:25.526359: step: 760/463, loss: 0.029967335984110832 2023-01-24 04:36:26.208853: step: 762/463, loss: 0.01575998030602932 2023-01-24 04:36:26.847178: step: 764/463, loss: 0.006210016552358866 2023-01-24 04:36:27.480850: step: 766/463, loss: 0.010283583775162697 2023-01-24 04:36:28.123001: step: 768/463, loss: 0.018541868776082993 2023-01-24 04:36:28.762816: step: 770/463, loss: 0.01182506326586008 2023-01-24 04:36:29.405930: step: 772/463, loss: 0.002215802436694503 2023-01-24 04:36:30.032795: step: 774/463, loss: 0.0406578928232193 2023-01-24 04:36:30.645600: step: 776/463, loss: 0.07028999924659729 2023-01-24 04:36:31.237928: step: 778/463, loss: 0.0005066748126409948 2023-01-24 04:36:31.858178: step: 780/463, loss: 0.0019525624811649323 2023-01-24 04:36:32.482608: step: 782/463, loss: 0.01686103083193302 2023-01-24 04:36:33.159837: step: 784/463, loss: 0.17171694338321686 2023-01-24 04:36:33.788624: step: 786/463, loss: 0.004957855213433504 2023-01-24 04:36:34.375003: step: 788/463, loss: 0.004850171972066164 2023-01-24 04:36:34.954903: step: 790/463, loss: 0.0010972366435453296 2023-01-24 04:36:35.525174: step: 792/463, loss: 0.0025384726468473673 2023-01-24 04:36:36.164734: step: 794/463, loss: 0.011010677553713322 2023-01-24 04:36:36.814214: step: 796/463, loss: 0.07036984711885452 2023-01-24 04:36:37.462666: step: 798/463, loss: 0.0030289096757769585 2023-01-24 04:36:38.098602: step: 800/463, loss: 0.003858304815366864 2023-01-24 04:36:38.700663: step: 802/463, loss: 0.0008055354119278491 2023-01-24 04:36:39.303360: step: 804/463, loss: 0.0007819096790626645 2023-01-24 04:36:39.903770: step: 806/463, loss: 0.01665329560637474 2023-01-24 04:36:40.540671: step: 808/463, loss: 0.028631258755922318 2023-01-24 04:36:41.275998: step: 810/463, loss: 0.24964243173599243 2023-01-24 04:36:41.909519: step: 812/463, loss: 0.13243399560451508 2023-01-24 04:36:42.604590: step: 814/463, loss: 0.006908778566867113 2023-01-24 04:36:43.171110: step: 816/463, loss: 0.2623399794101715 2023-01-24 04:36:43.717349: step: 818/463, loss: 0.007665853947401047 2023-01-24 04:36:44.327746: step: 820/463, loss: 0.001188281225040555 2023-01-24 04:36:44.947665: step: 822/463, loss: 0.04388278350234032 2023-01-24 04:36:45.598510: step: 824/463, loss: 0.010815965943038464 2023-01-24 04:36:46.235227: step: 826/463, loss: 0.0022514716256409883 2023-01-24 04:36:46.820025: step: 828/463, loss: 0.010159856639802456 2023-01-24 04:36:47.411549: step: 830/463, loss: 0.000980306533165276 2023-01-24 04:36:48.001939: step: 832/463, loss: 0.025764310732483864 2023-01-24 04:36:48.608765: step: 834/463, loss: 0.0004008069809060544 2023-01-24 04:36:49.202061: step: 836/463, loss: 0.007156014908105135 2023-01-24 04:36:49.819809: step: 838/463, loss: 0.002076355740427971 2023-01-24 04:36:50.381116: step: 840/463, loss: 0.0010596619686111808 2023-01-24 04:36:50.942967: step: 842/463, loss: 0.007150654215365648 2023-01-24 04:36:51.602312: step: 844/463, loss: 0.0032344022765755653 2023-01-24 04:36:52.185003: step: 846/463, loss: 0.00041566186700947583 2023-01-24 04:36:52.763883: step: 848/463, loss: 0.0049224658869206905 2023-01-24 04:36:53.409335: step: 850/463, loss: 0.0395309180021286 2023-01-24 04:36:53.991548: step: 852/463, loss: 0.03558933734893799 2023-01-24 04:36:54.595959: step: 854/463, loss: 0.005729232914745808 2023-01-24 04:36:55.233074: step: 856/463, loss: 0.00038364113424904644 2023-01-24 04:36:55.817753: step: 858/463, loss: 0.0055260444059967995 2023-01-24 04:36:56.458037: step: 860/463, loss: 0.014664572663605213 2023-01-24 04:36:57.055556: step: 862/463, loss: 0.0016391221433877945 2023-01-24 04:36:57.665730: step: 864/463, loss: 0.011106476187705994 2023-01-24 04:36:58.307685: step: 866/463, loss: 0.04988894239068031 2023-01-24 04:36:58.948283: step: 868/463, loss: 0.0012667113915085793 2023-01-24 04:36:59.546624: step: 870/463, loss: 0.014364737085998058 2023-01-24 04:37:00.136090: step: 872/463, loss: 0.0052583059296011925 2023-01-24 04:37:00.734566: step: 874/463, loss: 0.0027395517099648714 2023-01-24 04:37:01.358124: step: 876/463, loss: 1.0095703601837158 2023-01-24 04:37:01.950476: step: 878/463, loss: 0.017347924411296844 2023-01-24 04:37:02.576923: step: 880/463, loss: 0.003989678807556629 2023-01-24 04:37:03.305071: step: 882/463, loss: 0.1823144406080246 2023-01-24 04:37:03.900131: step: 884/463, loss: 0.04542887583374977 2023-01-24 04:37:04.588093: step: 886/463, loss: 0.006405098829418421 2023-01-24 04:37:05.301961: step: 888/463, loss: 0.18624812364578247 2023-01-24 04:37:05.889217: step: 890/463, loss: 0.012357478961348534 2023-01-24 04:37:06.499833: step: 892/463, loss: 0.009346227161586285 2023-01-24 04:37:07.152849: step: 894/463, loss: 0.043834686279296875 2023-01-24 04:37:07.727355: step: 896/463, loss: 0.00037754818913526833 2023-01-24 04:37:08.315575: step: 898/463, loss: 0.0030709875281900167 2023-01-24 04:37:08.910230: step: 900/463, loss: 0.001151008065789938 2023-01-24 04:37:09.490575: step: 902/463, loss: 0.0017272848635911942 2023-01-24 04:37:10.057728: step: 904/463, loss: 0.004733656998723745 2023-01-24 04:37:10.680687: step: 906/463, loss: 0.03464411944150925 2023-01-24 04:37:11.256330: step: 908/463, loss: 0.0004434540751390159 2023-01-24 04:37:11.897692: step: 910/463, loss: 0.02998579852283001 2023-01-24 04:37:12.448937: step: 912/463, loss: 0.0012972571421414614 2023-01-24 04:37:13.046038: step: 914/463, loss: 0.0006090668612159789 2023-01-24 04:37:13.721796: step: 916/463, loss: 0.01941867731511593 2023-01-24 04:37:14.289466: step: 918/463, loss: 0.18417759239673615 2023-01-24 04:37:14.862414: step: 920/463, loss: 0.0005059108952991664 2023-01-24 04:37:15.530289: step: 922/463, loss: 2.426903009414673 2023-01-24 04:37:16.156525: step: 924/463, loss: 0.010100876912474632 2023-01-24 04:37:16.689966: step: 926/463, loss: 0.0012141040060669184 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.374519237405107, 'r': 0.3197982103079662, 'f1': 0.3450023681316237}, 'combined': 0.2542122712548806, 'epoch': 37} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3697973620904067, 'r': 0.38135352965573194, 'f1': 0.375486552276413}, 'combined': 0.2910469926735833, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3487177701112002, 'r': 0.328204960104659, 'f1': 0.33815056495631535}, 'combined': 0.24916357417833762, 'epoch': 37} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34911743145323726, 'r': 0.3776757507540995, 'f1': 0.3628355115412452}, 'combined': 0.2812409228214437, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3439326393151246, 'r': 0.31978556596662433, 'f1': 0.3314198490942203}, 'combined': 0.24420409933258336, 'epoch': 37} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3536490483545862, 'r': 0.37250166306466526, 'f1': 0.3628306261543024}, 'combined': 0.2812371360621866, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.33705357142857145, 'r': 0.26964285714285713, 'f1': 0.29960317460317465}, 'combined': 0.19973544973544977, 'epoch': 37} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2777777777777778, 'r': 0.32608695652173914, 'f1': 0.3}, 'combined': 0.15, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.20689655172413793, 'f1': 0.3076923076923077}, 'combined': 0.20512820512820512, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34001213859732793, 'r': 0.32968918941790243, 'f1': 0.33477110370565427}, 'combined': 0.24667344483574524, 'epoch': 29} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34157155234650616, 'r': 0.36386160769264764, 'f1': 0.3523644229368942}, 'combined': 0.2731245766305113, 'epoch': 29} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.20689655172413793, 'f1': 0.31578947368421056}, 'combined': 0.2105263157894737, 'epoch': 29} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:39:50.175440: step: 2/463, loss: 0.0003576675371732563 2023-01-24 04:39:50.832869: step: 4/463, loss: 0.6663461923599243 2023-01-24 04:39:51.512880: step: 6/463, loss: 0.005348617676645517 2023-01-24 04:39:52.153925: step: 8/463, loss: 0.0052065420895814896 2023-01-24 04:39:52.739018: step: 10/463, loss: 0.002165149664506316 2023-01-24 04:39:53.408110: step: 12/463, loss: 0.02910236269235611 2023-01-24 04:39:53.944932: step: 14/463, loss: 0.0011701653711497784 2023-01-24 04:39:54.585794: step: 16/463, loss: 0.007246619090437889 2023-01-24 04:39:55.200790: step: 18/463, loss: 0.0319894403219223 2023-01-24 04:39:55.834027: step: 20/463, loss: 0.00046730050235055387 2023-01-24 04:39:56.479317: step: 22/463, loss: 0.0020132004283368587 2023-01-24 04:39:57.116555: step: 24/463, loss: 0.003372333711013198 2023-01-24 04:39:57.678550: step: 26/463, loss: 0.009564385749399662 2023-01-24 04:39:58.264487: step: 28/463, loss: 0.013140233233571053 2023-01-24 04:39:58.811563: step: 30/463, loss: 0.001348411082290113 2023-01-24 04:39:59.409534: step: 32/463, loss: 0.0014122524298727512 2023-01-24 04:39:59.968554: step: 34/463, loss: 0.0008026689174585044 2023-01-24 04:40:00.608700: step: 36/463, loss: 0.0028248915914446115 2023-01-24 04:40:01.210848: step: 38/463, loss: 0.0075436849147081375 2023-01-24 04:40:01.857958: step: 40/463, loss: 0.005117383785545826 2023-01-24 04:40:02.435655: step: 42/463, loss: 0.006541636306792498 2023-01-24 04:40:03.110880: step: 44/463, loss: 0.011812773533165455 2023-01-24 04:40:03.676596: step: 46/463, loss: 0.03619756922125816 2023-01-24 04:40:04.275720: step: 48/463, loss: 0.0041055078618228436 2023-01-24 04:40:04.911868: step: 50/463, loss: 8.513357897754759e-05 2023-01-24 04:40:05.509341: step: 52/463, loss: 0.04997166246175766 2023-01-24 04:40:06.167461: step: 54/463, loss: 0.03975410386919975 2023-01-24 04:40:06.748124: step: 56/463, loss: 0.0003791792260017246 2023-01-24 04:40:07.429904: step: 58/463, loss: 0.03447412699460983 2023-01-24 04:40:08.095077: step: 60/463, loss: 0.010324408300220966 2023-01-24 04:40:08.629359: step: 62/463, loss: 0.013240799307823181 2023-01-24 04:40:09.267554: step: 64/463, loss: 0.006249695084989071 2023-01-24 04:40:09.941982: step: 66/463, loss: 0.025303052738308907 2023-01-24 04:40:10.494637: step: 68/463, loss: 0.0006516952998936176 2023-01-24 04:40:11.078544: step: 70/463, loss: 0.06753762066364288 2023-01-24 04:40:11.743824: step: 72/463, loss: 0.01517146173864603 2023-01-24 04:40:12.374509: step: 74/463, loss: 0.0010519068455323577 2023-01-24 04:40:12.971122: step: 76/463, loss: 0.007192879915237427 2023-01-24 04:40:13.610227: step: 78/463, loss: 0.05849984660744667 2023-01-24 04:40:14.261102: step: 80/463, loss: 0.012203852646052837 2023-01-24 04:40:14.838086: step: 82/463, loss: 0.0004563910770229995 2023-01-24 04:40:15.461719: step: 84/463, loss: 0.006087826564908028 2023-01-24 04:40:16.010199: step: 86/463, loss: 0.01261174213141203 2023-01-24 04:40:16.586600: step: 88/463, loss: 0.0009724133997224271 2023-01-24 04:40:17.190365: step: 90/463, loss: 0.0004132771282456815 2023-01-24 04:40:17.772996: step: 92/463, loss: 0.01645635813474655 2023-01-24 04:40:18.403815: step: 94/463, loss: 0.028350112959742546 2023-01-24 04:40:19.022797: step: 96/463, loss: 0.0012911633821204305 2023-01-24 04:40:19.674061: step: 98/463, loss: 0.0131125058978796 2023-01-24 04:40:20.284024: step: 100/463, loss: 0.00519681116566062 2023-01-24 04:40:20.915681: step: 102/463, loss: 0.03608179837465286 2023-01-24 04:40:21.493612: step: 104/463, loss: 0.011656703427433968 2023-01-24 04:40:22.079415: step: 106/463, loss: 0.0002910309995058924 2023-01-24 04:40:22.726241: step: 108/463, loss: 0.0004622248816303909 2023-01-24 04:40:23.433775: step: 110/463, loss: 0.009065485559403896 2023-01-24 04:40:24.078614: step: 112/463, loss: 0.002514266176149249 2023-01-24 04:40:24.728796: step: 114/463, loss: 0.00034480978501960635 2023-01-24 04:40:25.370415: step: 116/463, loss: 0.003990465309470892 2023-01-24 04:40:25.971708: step: 118/463, loss: 0.0036008465103805065 2023-01-24 04:40:26.564762: step: 120/463, loss: 0.006166631355881691 2023-01-24 04:40:27.179748: step: 122/463, loss: 4.63170072180219e-05 2023-01-24 04:40:27.801163: step: 124/463, loss: 0.007947270758450031 2023-01-24 04:40:28.439857: step: 126/463, loss: 0.010826355777680874 2023-01-24 04:40:29.014315: step: 128/463, loss: 0.016946762800216675 2023-01-24 04:40:29.606688: step: 130/463, loss: 0.004641765728592873 2023-01-24 04:40:30.250461: step: 132/463, loss: 0.005628272425383329 2023-01-24 04:40:30.886834: step: 134/463, loss: 0.0003984539653174579 2023-01-24 04:40:31.554837: step: 136/463, loss: 0.005691876634955406 2023-01-24 04:40:32.154444: step: 138/463, loss: 0.001149071496911347 2023-01-24 04:40:32.794267: step: 140/463, loss: 0.019951999187469482 2023-01-24 04:40:33.409384: step: 142/463, loss: 0.00044942894601263106 2023-01-24 04:40:34.041225: step: 144/463, loss: 0.00596737302839756 2023-01-24 04:40:34.603650: step: 146/463, loss: 0.01373070478439331 2023-01-24 04:40:35.258154: step: 148/463, loss: 0.006822957657277584 2023-01-24 04:40:35.925694: step: 150/463, loss: 6.739966192981228e-05 2023-01-24 04:40:36.538123: step: 152/463, loss: 0.005839567165821791 2023-01-24 04:40:37.056963: step: 154/463, loss: 5.105665331939235e-05 2023-01-24 04:40:37.732124: step: 156/463, loss: 0.017809750512242317 2023-01-24 04:40:38.301881: step: 158/463, loss: 0.013083020225167274 2023-01-24 04:40:38.854109: step: 160/463, loss: 0.0032974849455058575 2023-01-24 04:40:39.458873: step: 162/463, loss: 0.01070482563227415 2023-01-24 04:40:39.992336: step: 164/463, loss: 0.00032536074286326766 2023-01-24 04:40:40.562347: step: 166/463, loss: 0.04124321788549423 2023-01-24 04:40:41.128274: step: 168/463, loss: 0.0017832282464951277 2023-01-24 04:40:41.727870: step: 170/463, loss: 0.0022127775009721518 2023-01-24 04:40:42.337505: step: 172/463, loss: 0.001772384624928236 2023-01-24 04:40:42.855758: step: 174/463, loss: 0.0027756420895457268 2023-01-24 04:40:43.469223: step: 176/463, loss: 0.0034369942732155323 2023-01-24 04:40:44.105987: step: 178/463, loss: 0.01690688170492649 2023-01-24 04:40:44.706258: step: 180/463, loss: 0.027102582156658173 2023-01-24 04:40:45.376964: step: 182/463, loss: 0.013270555064082146 2023-01-24 04:40:45.947698: step: 184/463, loss: 0.002121496247127652 2023-01-24 04:40:46.583934: step: 186/463, loss: 0.00016423201304860413 2023-01-24 04:40:47.334859: step: 188/463, loss: 0.0037703884299844503 2023-01-24 04:40:47.938930: step: 190/463, loss: 0.008827411569654942 2023-01-24 04:40:48.607535: step: 192/463, loss: 0.027613524347543716 2023-01-24 04:40:49.278386: step: 194/463, loss: 0.008662181906402111 2023-01-24 04:40:49.901929: step: 196/463, loss: 0.09055357426404953 2023-01-24 04:40:50.517364: step: 198/463, loss: 0.017377590760588646 2023-01-24 04:40:51.150436: step: 200/463, loss: 7.32247790438123e-05 2023-01-24 04:40:51.759585: step: 202/463, loss: 0.0007087164558470249 2023-01-24 04:40:52.418877: step: 204/463, loss: 0.317324161529541 2023-01-24 04:40:53.009864: step: 206/463, loss: 0.0005053650238551199 2023-01-24 04:40:53.577129: step: 208/463, loss: 0.0010837082518264651 2023-01-24 04:40:54.177690: step: 210/463, loss: 0.0062217446975409985 2023-01-24 04:40:54.822031: step: 212/463, loss: 0.00924688670784235 2023-01-24 04:40:55.430936: step: 214/463, loss: 0.002303719986230135 2023-01-24 04:40:56.014147: step: 216/463, loss: 0.012535372748970985 2023-01-24 04:40:56.583373: step: 218/463, loss: 0.02805422618985176 2023-01-24 04:40:57.152403: step: 220/463, loss: 0.007259115111082792 2023-01-24 04:40:57.754249: step: 222/463, loss: 0.024462362751364708 2023-01-24 04:40:58.278195: step: 224/463, loss: 0.0010564392432570457 2023-01-24 04:40:58.848987: step: 226/463, loss: 0.02273215539753437 2023-01-24 04:40:59.424662: step: 228/463, loss: 0.10344050824642181 2023-01-24 04:41:00.018920: step: 230/463, loss: 0.11630513519048691 2023-01-24 04:41:00.640127: step: 232/463, loss: 0.00038182278512977064 2023-01-24 04:41:01.240805: step: 234/463, loss: 0.0013521601213142276 2023-01-24 04:41:01.894320: step: 236/463, loss: 0.06642608344554901 2023-01-24 04:41:02.550544: step: 238/463, loss: 0.02880418486893177 2023-01-24 04:41:03.231580: step: 240/463, loss: 0.015984902158379555 2023-01-24 04:41:03.858121: step: 242/463, loss: 0.047216299921274185 2023-01-24 04:41:04.570296: step: 244/463, loss: 0.0004027021932415664 2023-01-24 04:41:05.299779: step: 246/463, loss: 0.011578534729778767 2023-01-24 04:41:05.923988: step: 248/463, loss: 0.013928024098277092 2023-01-24 04:41:06.541026: step: 250/463, loss: 0.005497732665389776 2023-01-24 04:41:07.128616: step: 252/463, loss: 0.0004778372240252793 2023-01-24 04:41:07.744909: step: 254/463, loss: 0.0038341470062732697 2023-01-24 04:41:08.347838: step: 256/463, loss: 0.01212337240576744 2023-01-24 04:41:09.020646: step: 258/463, loss: 0.0110453050583601 2023-01-24 04:41:09.622528: step: 260/463, loss: 0.006752063054591417 2023-01-24 04:41:10.178478: step: 262/463, loss: 0.0001130100354203023 2023-01-24 04:41:10.768285: step: 264/463, loss: 0.0005019967793487012 2023-01-24 04:41:11.337648: step: 266/463, loss: 0.01084559503942728 2023-01-24 04:41:11.927076: step: 268/463, loss: 0.00031250284519046545 2023-01-24 04:41:12.559716: step: 270/463, loss: 0.023255877196788788 2023-01-24 04:41:13.182291: step: 272/463, loss: 0.008008042350411415 2023-01-24 04:41:13.703701: step: 274/463, loss: 0.026028001680970192 2023-01-24 04:41:14.335978: step: 276/463, loss: 0.034775178879499435 2023-01-24 04:41:14.934690: step: 278/463, loss: 0.06590401381254196 2023-01-24 04:41:15.583564: step: 280/463, loss: 0.009529476054012775 2023-01-24 04:41:16.204714: step: 282/463, loss: 0.0030940070282667875 2023-01-24 04:41:16.845440: step: 284/463, loss: 0.0076924762688577175 2023-01-24 04:41:17.498967: step: 286/463, loss: 0.0007767279748804867 2023-01-24 04:41:18.065458: step: 288/463, loss: 0.31206226348876953 2023-01-24 04:41:18.660859: step: 290/463, loss: 0.004208110738545656 2023-01-24 04:41:19.246523: step: 292/463, loss: 0.03561747819185257 2023-01-24 04:41:19.931106: step: 294/463, loss: 0.0025247172452509403 2023-01-24 04:41:20.486747: step: 296/463, loss: 0.02131984755396843 2023-01-24 04:41:21.094162: step: 298/463, loss: 0.0734555572271347 2023-01-24 04:41:21.736361: step: 300/463, loss: 0.02882952056825161 2023-01-24 04:41:22.381910: step: 302/463, loss: 0.0009613363072276115 2023-01-24 04:41:22.985283: step: 304/463, loss: 0.0318412110209465 2023-01-24 04:41:23.542991: step: 306/463, loss: 0.0057764495722949505 2023-01-24 04:41:24.107163: step: 308/463, loss: 0.003803574712947011 2023-01-24 04:41:24.672494: step: 310/463, loss: 0.02247965894639492 2023-01-24 04:41:25.270665: step: 312/463, loss: 0.00042657507583498955 2023-01-24 04:41:25.882439: step: 314/463, loss: 0.008545217104256153 2023-01-24 04:41:26.549878: step: 316/463, loss: 0.0020248896908015013 2023-01-24 04:41:27.180191: step: 318/463, loss: 0.1126699298620224 2023-01-24 04:41:27.783706: step: 320/463, loss: 0.0002879079256672412 2023-01-24 04:41:28.361522: step: 322/463, loss: 0.007370691746473312 2023-01-24 04:41:28.979561: step: 324/463, loss: 0.0018412236822769046 2023-01-24 04:41:29.578826: step: 326/463, loss: 0.0022184234112501144 2023-01-24 04:41:30.157992: step: 328/463, loss: 0.0019151547458022833 2023-01-24 04:41:30.766552: step: 330/463, loss: 0.00280355685390532 2023-01-24 04:41:31.333034: step: 332/463, loss: 0.003879902418702841 2023-01-24 04:41:31.995478: step: 334/463, loss: 0.0003928143414668739 2023-01-24 04:41:32.591895: step: 336/463, loss: 0.12195266783237457 2023-01-24 04:41:33.219437: step: 338/463, loss: 0.003956921864300966 2023-01-24 04:41:33.740081: step: 340/463, loss: 0.004294354468584061 2023-01-24 04:41:34.299978: step: 342/463, loss: 2.8229922463651747e-05 2023-01-24 04:41:34.908726: step: 344/463, loss: 0.011857496574521065 2023-01-24 04:41:35.487742: step: 346/463, loss: 0.002544622402638197 2023-01-24 04:41:36.100738: step: 348/463, loss: 0.005111938342452049 2023-01-24 04:41:36.711835: step: 350/463, loss: 0.003586391219869256 2023-01-24 04:41:37.386973: step: 352/463, loss: 0.002086812164634466 2023-01-24 04:41:37.961688: step: 354/463, loss: 0.00022493835422210395 2023-01-24 04:41:38.572265: step: 356/463, loss: 0.0029014514293521643 2023-01-24 04:41:39.170715: step: 358/463, loss: 0.005020488053560257 2023-01-24 04:41:39.785489: step: 360/463, loss: 0.04885225370526314 2023-01-24 04:41:40.360630: step: 362/463, loss: 0.017862966284155846 2023-01-24 04:41:40.961485: step: 364/463, loss: 0.05107182264328003 2023-01-24 04:41:41.560333: step: 366/463, loss: 0.0042709712870419025 2023-01-24 04:41:42.229230: step: 368/463, loss: 0.009424310177564621 2023-01-24 04:41:42.784271: step: 370/463, loss: 0.015868568792939186 2023-01-24 04:41:43.445266: step: 372/463, loss: 0.0004685927997343242 2023-01-24 04:41:44.113019: step: 374/463, loss: 0.01611120067536831 2023-01-24 04:41:44.665550: step: 376/463, loss: 0.016102414578199387 2023-01-24 04:41:45.337651: step: 378/463, loss: 0.013001061975955963 2023-01-24 04:41:45.967671: step: 380/463, loss: 0.006803620140999556 2023-01-24 04:41:46.604595: step: 382/463, loss: 0.0014990817289799452 2023-01-24 04:41:47.161134: step: 384/463, loss: 0.00013608716835733503 2023-01-24 04:41:47.744825: step: 386/463, loss: 0.09866796433925629 2023-01-24 04:41:48.380921: step: 388/463, loss: 0.0032702710013836622 2023-01-24 04:41:49.003327: step: 390/463, loss: 0.083692267537117 2023-01-24 04:41:49.566375: step: 392/463, loss: 0.02510911412537098 2023-01-24 04:41:50.189881: step: 394/463, loss: 0.0005975825479254127 2023-01-24 04:41:50.775936: step: 396/463, loss: 0.005218931473791599 2023-01-24 04:41:51.452177: step: 398/463, loss: 0.025239232927560806 2023-01-24 04:41:52.112958: step: 400/463, loss: 0.03985372185707092 2023-01-24 04:41:52.706872: step: 402/463, loss: 0.0002242095797555521 2023-01-24 04:41:53.394895: step: 404/463, loss: 0.02234315499663353 2023-01-24 04:41:54.083141: step: 406/463, loss: 0.013031018897891045 2023-01-24 04:41:54.733178: step: 408/463, loss: 0.0003241975500714034 2023-01-24 04:41:55.471938: step: 410/463, loss: 0.003737811464816332 2023-01-24 04:41:56.040994: step: 412/463, loss: 0.013983436860144138 2023-01-24 04:41:56.702689: step: 414/463, loss: 0.0006442320300266147 2023-01-24 04:41:57.259422: step: 416/463, loss: 0.0005439840024337173 2023-01-24 04:41:57.933781: step: 418/463, loss: 0.02544771507382393 2023-01-24 04:41:58.559706: step: 420/463, loss: 0.0023602836299687624 2023-01-24 04:41:59.163077: step: 422/463, loss: 0.0005632571410387754 2023-01-24 04:41:59.739257: step: 424/463, loss: 0.000652813003398478 2023-01-24 04:42:00.363070: step: 426/463, loss: 0.053215187042951584 2023-01-24 04:42:00.979141: step: 428/463, loss: 0.0015672557055950165 2023-01-24 04:42:01.617584: step: 430/463, loss: 0.002932344563305378 2023-01-24 04:42:02.205166: step: 432/463, loss: 0.03482050448656082 2023-01-24 04:42:02.767542: step: 434/463, loss: 0.0007440971676260233 2023-01-24 04:42:03.411252: step: 436/463, loss: 0.0005701840855181217 2023-01-24 04:42:04.061893: step: 438/463, loss: 0.02425306662917137 2023-01-24 04:42:04.679613: step: 440/463, loss: 0.015324989333748817 2023-01-24 04:42:05.244753: step: 442/463, loss: 0.0002631238312460482 2023-01-24 04:42:05.791907: step: 444/463, loss: 0.0005782668013125658 2023-01-24 04:42:06.387000: step: 446/463, loss: 0.0018936014967039227 2023-01-24 04:42:07.046188: step: 448/463, loss: 8.828086720313877e-05 2023-01-24 04:42:07.658077: step: 450/463, loss: 0.00518467603251338 2023-01-24 04:42:08.249807: step: 452/463, loss: 0.0032181974966078997 2023-01-24 04:42:08.884180: step: 454/463, loss: 1.5466004610061646 2023-01-24 04:42:09.497152: step: 456/463, loss: 0.03499947115778923 2023-01-24 04:42:10.177613: step: 458/463, loss: 0.012161768972873688 2023-01-24 04:42:10.792868: step: 460/463, loss: 0.011972170323133469 2023-01-24 04:42:11.439439: step: 462/463, loss: 0.0010148603469133377 2023-01-24 04:42:12.004397: step: 464/463, loss: 0.0006081080064177513 2023-01-24 04:42:12.642974: step: 466/463, loss: 0.04654070362448692 2023-01-24 04:42:13.264781: step: 468/463, loss: 0.016150107607245445 2023-01-24 04:42:13.874891: step: 470/463, loss: 0.010591501370072365 2023-01-24 04:42:14.444028: step: 472/463, loss: 0.016223080456256866 2023-01-24 04:42:15.053677: step: 474/463, loss: 0.0034826139453798532 2023-01-24 04:42:15.619628: step: 476/463, loss: 0.0015128606464713812 2023-01-24 04:42:16.237366: step: 478/463, loss: 0.017107337713241577 2023-01-24 04:42:16.928703: step: 480/463, loss: 0.0011645930353552103 2023-01-24 04:42:17.538349: step: 482/463, loss: 2.951589340227656e-05 2023-01-24 04:42:18.160275: step: 484/463, loss: 0.0028347501065582037 2023-01-24 04:42:18.796739: step: 486/463, loss: 0.0008458561496809125 2023-01-24 04:42:19.461091: step: 488/463, loss: 0.0025803851895034313 2023-01-24 04:42:20.070197: step: 490/463, loss: 0.001344438991509378 2023-01-24 04:42:20.688186: step: 492/463, loss: 0.0001398771710228175 2023-01-24 04:42:21.266447: step: 494/463, loss: 0.004892126657068729 2023-01-24 04:42:21.846923: step: 496/463, loss: 0.00012866096221841872 2023-01-24 04:42:22.417621: step: 498/463, loss: 0.011026565916836262 2023-01-24 04:42:23.026809: step: 500/463, loss: 4.267854819772765e-05 2023-01-24 04:42:23.609327: step: 502/463, loss: 0.00037181138759478927 2023-01-24 04:42:24.207741: step: 504/463, loss: 0.0010123489191755652 2023-01-24 04:42:24.824307: step: 506/463, loss: 0.6113887429237366 2023-01-24 04:42:25.437376: step: 508/463, loss: 0.031441714614629745 2023-01-24 04:42:26.077357: step: 510/463, loss: 0.027309393510222435 2023-01-24 04:42:26.667016: step: 512/463, loss: 0.0027584049385041 2023-01-24 04:42:27.231159: step: 514/463, loss: 0.0006248729769140482 2023-01-24 04:42:27.960806: step: 516/463, loss: 0.000973543559666723 2023-01-24 04:42:28.567385: step: 518/463, loss: 0.24825112521648407 2023-01-24 04:42:29.178181: step: 520/463, loss: 0.012699211947619915 2023-01-24 04:42:29.757424: step: 522/463, loss: 0.0012883590534329414 2023-01-24 04:42:30.347494: step: 524/463, loss: 0.0017056922661140561 2023-01-24 04:42:30.890944: step: 526/463, loss: 0.00032777892192825675 2023-01-24 04:42:31.498924: step: 528/463, loss: 0.015193992294371128 2023-01-24 04:42:32.054056: step: 530/463, loss: 0.0009753488702699542 2023-01-24 04:42:32.741238: step: 532/463, loss: 0.0037017534486949444 2023-01-24 04:42:33.313657: step: 534/463, loss: 0.00311911478638649 2023-01-24 04:42:33.917830: step: 536/463, loss: 2.0947974917362444e-05 2023-01-24 04:42:34.527729: step: 538/463, loss: 0.01927190274000168 2023-01-24 04:42:35.126863: step: 540/463, loss: 0.013577724806964397 2023-01-24 04:42:35.770517: step: 542/463, loss: 0.008778207935392857 2023-01-24 04:42:36.408682: step: 544/463, loss: 0.11551780998706818 2023-01-24 04:42:36.980206: step: 546/463, loss: 0.0034004158806055784 2023-01-24 04:42:37.618768: step: 548/463, loss: 0.007283760700374842 2023-01-24 04:42:38.251425: step: 550/463, loss: 0.005372778512537479 2023-01-24 04:42:38.837307: step: 552/463, loss: 0.008491241373121738 2023-01-24 04:42:39.383249: step: 554/463, loss: 0.00023776550369802862 2023-01-24 04:42:40.012466: step: 556/463, loss: 0.0029520662501454353 2023-01-24 04:42:40.575934: step: 558/463, loss: 2.8681654930114746 2023-01-24 04:42:41.199069: step: 560/463, loss: 0.051297131925821304 2023-01-24 04:42:41.845279: step: 562/463, loss: 0.0030340056400746107 2023-01-24 04:42:42.508451: step: 564/463, loss: 0.008575016632676125 2023-01-24 04:42:43.167503: step: 566/463, loss: 0.0009005602332763374 2023-01-24 04:42:43.810934: step: 568/463, loss: 0.0002541911671869457 2023-01-24 04:42:44.395435: step: 570/463, loss: 0.00010980103979818523 2023-01-24 04:42:45.034778: step: 572/463, loss: 0.03710321709513664 2023-01-24 04:42:45.634205: step: 574/463, loss: 0.02576286718249321 2023-01-24 04:42:46.247970: step: 576/463, loss: 0.00144080794416368 2023-01-24 04:42:46.816693: step: 578/463, loss: 0.000108132982859388 2023-01-24 04:42:47.412240: step: 580/463, loss: 0.00042414720519445837 2023-01-24 04:42:48.014253: step: 582/463, loss: 0.0016385602066293359 2023-01-24 04:42:48.609913: step: 584/463, loss: 0.011878685094416142 2023-01-24 04:42:49.204976: step: 586/463, loss: 8.690333925187588e-05 2023-01-24 04:42:49.879765: step: 588/463, loss: 0.02630392089486122 2023-01-24 04:42:50.424840: step: 590/463, loss: 0.0029558863025158644 2023-01-24 04:42:51.021540: step: 592/463, loss: 0.014071577228605747 2023-01-24 04:42:51.690783: step: 594/463, loss: 0.24573202431201935 2023-01-24 04:42:52.357447: step: 596/463, loss: 0.014122777618467808 2023-01-24 04:42:53.015776: step: 598/463, loss: 0.002087947679683566 2023-01-24 04:42:53.681852: step: 600/463, loss: 0.002814420498907566 2023-01-24 04:42:54.227483: step: 602/463, loss: 0.07893769443035126 2023-01-24 04:42:54.807493: step: 604/463, loss: 0.020510639995336533 2023-01-24 04:42:55.393253: step: 606/463, loss: 0.0025151835288852453 2023-01-24 04:42:56.001772: step: 608/463, loss: 0.0011185641633346677 2023-01-24 04:42:56.689793: step: 610/463, loss: 0.0019370777299627662 2023-01-24 04:42:57.294683: step: 612/463, loss: 0.038371533155441284 2023-01-24 04:42:57.882058: step: 614/463, loss: 0.002873349003493786 2023-01-24 04:42:58.505692: step: 616/463, loss: 0.7107179760932922 2023-01-24 04:42:59.068617: step: 618/463, loss: 0.0262912567704916 2023-01-24 04:42:59.638346: step: 620/463, loss: 0.01624593883752823 2023-01-24 04:43:00.256474: step: 622/463, loss: 0.0006126550724729896 2023-01-24 04:43:00.805535: step: 624/463, loss: 0.002259156433865428 2023-01-24 04:43:01.380116: step: 626/463, loss: 0.001167376758530736 2023-01-24 04:43:01.966244: step: 628/463, loss: 0.009499099105596542 2023-01-24 04:43:02.512327: step: 630/463, loss: 0.10232248902320862 2023-01-24 04:43:03.173827: step: 632/463, loss: 0.0006010098732076585 2023-01-24 04:43:03.722570: step: 634/463, loss: 0.0680655837059021 2023-01-24 04:43:04.388243: step: 636/463, loss: 0.011620243079960346 2023-01-24 04:43:05.005005: step: 638/463, loss: 0.018363557755947113 2023-01-24 04:43:05.597865: step: 640/463, loss: 0.016795363277196884 2023-01-24 04:43:06.215722: step: 642/463, loss: 0.005221130792051554 2023-01-24 04:43:06.879461: step: 644/463, loss: 2.1192850908846594e-05 2023-01-24 04:43:07.530398: step: 646/463, loss: 0.0009030998917296529 2023-01-24 04:43:08.139358: step: 648/463, loss: 0.7196435928344727 2023-01-24 04:43:08.736706: step: 650/463, loss: 0.02872166410088539 2023-01-24 04:43:09.301166: step: 652/463, loss: 0.006281400099396706 2023-01-24 04:43:09.879300: step: 654/463, loss: 0.00614789966493845 2023-01-24 04:43:10.498851: step: 656/463, loss: 0.005407969933003187 2023-01-24 04:43:11.090606: step: 658/463, loss: 0.00039257208118215203 2023-01-24 04:43:11.770903: step: 660/463, loss: 0.0011086283484473825 2023-01-24 04:43:12.419046: step: 662/463, loss: 0.0097615085542202 2023-01-24 04:43:13.063961: step: 664/463, loss: 0.2791215777397156 2023-01-24 04:43:13.608299: step: 666/463, loss: 0.011203419417142868 2023-01-24 04:43:14.328414: step: 668/463, loss: 0.00036510106292553246 2023-01-24 04:43:14.936616: step: 670/463, loss: 0.00014853785978630185 2023-01-24 04:43:15.516064: step: 672/463, loss: 0.00434811320155859 2023-01-24 04:43:16.202460: step: 674/463, loss: 0.014435181394219398 2023-01-24 04:43:16.872224: step: 676/463, loss: 0.0337931290268898 2023-01-24 04:43:17.503535: step: 678/463, loss: 0.001187111483886838 2023-01-24 04:43:18.110712: step: 680/463, loss: 0.0030091821681708097 2023-01-24 04:43:18.735138: step: 682/463, loss: 0.003528317203745246 2023-01-24 04:43:19.483016: step: 684/463, loss: 0.00029646451002918184 2023-01-24 04:43:20.019133: step: 686/463, loss: 0.020188894122838974 2023-01-24 04:43:20.626261: step: 688/463, loss: 0.038598254323005676 2023-01-24 04:43:21.181005: step: 690/463, loss: 0.04736355319619179 2023-01-24 04:43:21.799303: step: 692/463, loss: 0.034489214420318604 2023-01-24 04:43:22.382870: step: 694/463, loss: 0.35866814851760864 2023-01-24 04:43:23.034398: step: 696/463, loss: 0.02614535205066204 2023-01-24 04:43:23.633532: step: 698/463, loss: 0.0015560268657281995 2023-01-24 04:43:24.185022: step: 700/463, loss: 0.004201863892376423 2023-01-24 04:43:24.775724: step: 702/463, loss: 0.0010461887577548623 2023-01-24 04:43:25.381401: step: 704/463, loss: 0.004900551866739988 2023-01-24 04:43:25.936040: step: 706/463, loss: 0.008886730298399925 2023-01-24 04:43:26.500650: step: 708/463, loss: 0.0005063335411250591 2023-01-24 04:43:27.104059: step: 710/463, loss: 0.000594239856582135 2023-01-24 04:43:27.664948: step: 712/463, loss: 0.00010440604819450527 2023-01-24 04:43:28.275460: step: 714/463, loss: 0.0028269460890442133 2023-01-24 04:43:28.859283: step: 716/463, loss: 0.14797835052013397 2023-01-24 04:43:29.480159: step: 718/463, loss: 0.008436452597379684 2023-01-24 04:43:29.998287: step: 720/463, loss: 0.0006557427695952356 2023-01-24 04:43:30.597245: step: 722/463, loss: 0.10913539677858353 2023-01-24 04:43:31.177966: step: 724/463, loss: 0.5314435362815857 2023-01-24 04:43:31.827682: step: 726/463, loss: 0.0010888563701882958 2023-01-24 04:43:32.513511: step: 728/463, loss: 0.012055275030434132 2023-01-24 04:43:33.201348: step: 730/463, loss: 0.08905165642499924 2023-01-24 04:43:33.822363: step: 732/463, loss: 0.0008891245233826339 2023-01-24 04:43:34.524405: step: 734/463, loss: 0.2571966350078583 2023-01-24 04:43:35.216367: step: 736/463, loss: 0.00947421696037054 2023-01-24 04:43:35.884640: step: 738/463, loss: 0.006358742248266935 2023-01-24 04:43:36.537749: step: 740/463, loss: 0.002794235711917281 2023-01-24 04:43:37.187769: step: 742/463, loss: 0.017871318385004997 2023-01-24 04:43:37.868593: step: 744/463, loss: 0.010377487167716026 2023-01-24 04:43:38.430173: step: 746/463, loss: 0.0003550343681126833 2023-01-24 04:43:39.016239: step: 748/463, loss: 0.007270701229572296 2023-01-24 04:43:39.619671: step: 750/463, loss: 0.0003694660263136029 2023-01-24 04:43:40.200702: step: 752/463, loss: 0.00420248182490468 2023-01-24 04:43:40.882012: step: 754/463, loss: 0.0010248766047880054 2023-01-24 04:43:41.445576: step: 756/463, loss: 0.0033011864870786667 2023-01-24 04:43:42.063317: step: 758/463, loss: 0.0031189294531941414 2023-01-24 04:43:42.653432: step: 760/463, loss: 0.00022750585048925132 2023-01-24 04:43:43.336372: step: 762/463, loss: 0.004734429996460676 2023-01-24 04:43:44.030895: step: 764/463, loss: 0.0003108963428530842 2023-01-24 04:43:44.677439: step: 766/463, loss: 0.005663448013365269 2023-01-24 04:43:45.281281: step: 768/463, loss: 0.012136506848037243 2023-01-24 04:43:45.914069: step: 770/463, loss: 0.00012647359108086675 2023-01-24 04:43:46.489226: step: 772/463, loss: 0.043156947940588 2023-01-24 04:43:47.143334: step: 774/463, loss: 0.0008583275484852493 2023-01-24 04:43:47.780097: step: 776/463, loss: 0.0014312269631773233 2023-01-24 04:43:48.416223: step: 778/463, loss: 0.0029045837000012398 2023-01-24 04:43:49.010010: step: 780/463, loss: 0.043769583106040955 2023-01-24 04:43:49.654636: step: 782/463, loss: 0.012938517145812511 2023-01-24 04:43:50.246793: step: 784/463, loss: 0.005907172802835703 2023-01-24 04:43:50.892073: step: 786/463, loss: 0.09016815572977066 2023-01-24 04:43:51.476007: step: 788/463, loss: 0.00027480642893351614 2023-01-24 04:43:52.195434: step: 790/463, loss: 0.0014474766794592142 2023-01-24 04:43:52.808867: step: 792/463, loss: 0.0002280456101289019 2023-01-24 04:43:53.409414: step: 794/463, loss: 2.5823312171269208e-05 2023-01-24 04:43:54.110620: step: 796/463, loss: 0.0030285813845694065 2023-01-24 04:43:54.693143: step: 798/463, loss: 0.4696293771266937 2023-01-24 04:43:55.325236: step: 800/463, loss: 2.0237541320966557e-05 2023-01-24 04:43:55.903746: step: 802/463, loss: 0.0018452114891260862 2023-01-24 04:43:56.457876: step: 804/463, loss: 0.002650710754096508 2023-01-24 04:43:57.170883: step: 806/463, loss: 0.016896134242415428 2023-01-24 04:43:57.802292: step: 808/463, loss: 0.012639055028557777 2023-01-24 04:43:58.379682: step: 810/463, loss: 0.0026958251837641 2023-01-24 04:43:59.046178: step: 812/463, loss: 0.06391260772943497 2023-01-24 04:43:59.618114: step: 814/463, loss: 0.10942236334085464 2023-01-24 04:44:00.292164: step: 816/463, loss: 0.023639511317014694 2023-01-24 04:44:00.854067: step: 818/463, loss: 0.018793359398841858 2023-01-24 04:44:01.509141: step: 820/463, loss: 0.0004436885938048363 2023-01-24 04:44:02.127096: step: 822/463, loss: 0.0007158114458434284 2023-01-24 04:44:02.755196: step: 824/463, loss: 0.030479643493890762 2023-01-24 04:44:03.415624: step: 826/463, loss: 0.18333019316196442 2023-01-24 04:44:04.060505: step: 828/463, loss: 0.045274633914232254 2023-01-24 04:44:04.732634: step: 830/463, loss: 0.0015979836462065578 2023-01-24 04:44:05.370225: step: 832/463, loss: 0.048418302088975906 2023-01-24 04:44:05.991531: step: 834/463, loss: 0.0003083001065533608 2023-01-24 04:44:06.657873: step: 836/463, loss: 0.03679577261209488 2023-01-24 04:44:07.277467: step: 838/463, loss: 0.02976054698228836 2023-01-24 04:44:07.907653: step: 840/463, loss: 0.0027237161993980408 2023-01-24 04:44:08.575999: step: 842/463, loss: 0.014188104309141636 2023-01-24 04:44:09.163216: step: 844/463, loss: 0.000988544663414359 2023-01-24 04:44:09.759355: step: 846/463, loss: 0.0013473142171278596 2023-01-24 04:44:10.362405: step: 848/463, loss: 0.0008627126226201653 2023-01-24 04:44:11.004895: step: 850/463, loss: 0.02962706796824932 2023-01-24 04:44:11.616155: step: 852/463, loss: 0.0004974248586222529 2023-01-24 04:44:12.243889: step: 854/463, loss: 0.01137123815715313 2023-01-24 04:44:12.876181: step: 856/463, loss: 0.014730734750628471 2023-01-24 04:44:13.464610: step: 858/463, loss: 3.7103702652530046e-06 2023-01-24 04:44:14.051003: step: 860/463, loss: 0.0060698846355080605 2023-01-24 04:44:14.713730: step: 862/463, loss: 0.0019864977803081274 2023-01-24 04:44:15.307377: step: 864/463, loss: 0.0007902037468738854 2023-01-24 04:44:15.942114: step: 866/463, loss: 0.012988809496164322 2023-01-24 04:44:16.559708: step: 868/463, loss: 0.035771533846855164 2023-01-24 04:44:17.183472: step: 870/463, loss: 0.06955956667661667 2023-01-24 04:44:17.768576: step: 872/463, loss: 0.0034183140378445387 2023-01-24 04:44:18.382038: step: 874/463, loss: 0.015360042452812195 2023-01-24 04:44:19.054892: step: 876/463, loss: 0.00014103636203799397 2023-01-24 04:44:19.700877: step: 878/463, loss: 0.16709063947200775 2023-01-24 04:44:20.237536: step: 880/463, loss: 0.19758981466293335 2023-01-24 04:44:20.995610: step: 882/463, loss: 0.01201779767870903 2023-01-24 04:44:21.575157: step: 884/463, loss: 0.018532108515501022 2023-01-24 04:44:22.199681: step: 886/463, loss: 0.0005302152712829411 2023-01-24 04:44:22.768997: step: 888/463, loss: 7.821289727871772e-06 2023-01-24 04:44:23.379904: step: 890/463, loss: 0.06921089440584183 2023-01-24 04:44:24.030322: step: 892/463, loss: 0.0748114362359047 2023-01-24 04:44:24.617484: step: 894/463, loss: 0.037598468363285065 2023-01-24 04:44:25.204991: step: 896/463, loss: 0.09422103315591812 2023-01-24 04:44:25.819124: step: 898/463, loss: 0.012228304520249367 2023-01-24 04:44:26.420005: step: 900/463, loss: 0.01457449421286583 2023-01-24 04:44:27.054150: step: 902/463, loss: 0.0030833331402391195 2023-01-24 04:44:27.619720: step: 904/463, loss: 1.3750407695770264 2023-01-24 04:44:28.243136: step: 906/463, loss: 0.001494941534474492 2023-01-24 04:44:29.066323: step: 908/463, loss: 6.817867415520595e-06 2023-01-24 04:44:29.680897: step: 910/463, loss: 0.0016072022262960672 2023-01-24 04:44:30.281703: step: 912/463, loss: 0.002142509911209345 2023-01-24 04:44:30.885387: step: 914/463, loss: 0.0002531880745664239 2023-01-24 04:44:31.530700: step: 916/463, loss: 0.008350801654160023 2023-01-24 04:44:32.159937: step: 918/463, loss: 0.0008177039562724531 2023-01-24 04:44:32.710617: step: 920/463, loss: 0.006838065572082996 2023-01-24 04:44:33.274554: step: 922/463, loss: 0.0008794894674792886 2023-01-24 04:44:33.931063: step: 924/463, loss: 0.0006061706808395684 2023-01-24 04:44:34.581762: step: 926/463, loss: 0.04009054973721504 ================================================== Loss: 0.040 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3720741758241758, 'r': 0.32124051233396583, 'f1': 0.3447937881873727}, 'combined': 0.25405858076964305, 'epoch': 38} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35085022770415564, 'r': 0.3692311679423329, 'f1': 0.35980610006382285}, 'combined': 0.2788927665566474, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3532118346715023, 'r': 0.32707281844344044, 'f1': 0.33964014841318846}, 'combined': 0.25026116198866516, 'epoch': 38} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3327156085526451, 'r': 0.37032959738718124, 'f1': 0.3505164001368014}, 'combined': 0.2716921379050805, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.350815113857877, 'r': 0.329513247361763, 'f1': 0.33983068759226837}, 'combined': 0.25040155927851354, 'epoch': 38} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3323554133442105, 'r': 0.35587689020772534, 'f1': 0.34371420909543293}, 'combined': 0.2664196261888045, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34791666666666665, 'r': 0.2982142857142857, 'f1': 0.3211538461538461}, 'combined': 0.21410256410256406, 'epoch': 38} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2708333333333333, 'r': 0.2826086956521739, 'f1': 0.2765957446808511}, 'combined': 0.13829787234042554, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6486842105263158, 'r': 0.22368421052631576, 'f1': 0.3326585695006748}, 'combined': 0.22177237966711652, 'epoch': 38} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.350815113857877, 'r': 0.329513247361763, 'f1': 0.33983068759226837}, 'combined': 0.25040155927851354, 'epoch': 38} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3323554133442105, 'r': 0.35587689020772534, 'f1': 0.34371420909543293}, 'combined': 0.2664196261888045, 'epoch': 38} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6486842105263158, 'r': 0.22368421052631576, 'f1': 0.3326585695006748}, 'combined': 0.22177237966711652, 'epoch': 38} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:47:13.878247: step: 2/463, loss: 0.005101046059280634 2023-01-24 04:47:14.485270: step: 4/463, loss: 0.0063080801628530025 2023-01-24 04:47:15.135680: step: 6/463, loss: 0.019881948828697205 2023-01-24 04:47:15.791429: step: 8/463, loss: 7.387231016764417e-05 2023-01-24 04:47:16.436426: step: 10/463, loss: 0.045004162937402725 2023-01-24 04:47:17.052933: step: 12/463, loss: 0.004906332585960627 2023-01-24 04:47:17.621380: step: 14/463, loss: 0.0028790051583200693 2023-01-24 04:47:18.193012: step: 16/463, loss: 0.0012290325248613954 2023-01-24 04:47:18.787830: step: 18/463, loss: 0.0006157810566946864 2023-01-24 04:47:19.356443: step: 20/463, loss: 0.003034910187125206 2023-01-24 04:47:19.952918: step: 22/463, loss: 2.634338306961581e-05 2023-01-24 04:47:20.571575: step: 24/463, loss: 0.025913435965776443 2023-01-24 04:47:21.159715: step: 26/463, loss: 0.0034398380666971207 2023-01-24 04:47:21.691907: step: 28/463, loss: 0.0011896591167896986 2023-01-24 04:47:22.266387: step: 30/463, loss: 0.0163203626871109 2023-01-24 04:47:22.883658: step: 32/463, loss: 0.015592370182275772 2023-01-24 04:47:23.580052: step: 34/463, loss: 0.25456076860427856 2023-01-24 04:47:24.176886: step: 36/463, loss: 2.3247070203069597e-05 2023-01-24 04:47:24.783945: step: 38/463, loss: 0.00016227616288233548 2023-01-24 04:47:25.423654: step: 40/463, loss: 0.00020118938118685037 2023-01-24 04:47:26.004900: step: 42/463, loss: 0.0014288818929344416 2023-01-24 04:47:26.678538: step: 44/463, loss: 2.4023011064855382e-05 2023-01-24 04:47:27.371715: step: 46/463, loss: 0.002262156456708908 2023-01-24 04:47:27.972982: step: 48/463, loss: 0.0002539228298701346 2023-01-24 04:47:28.614827: step: 50/463, loss: 0.060603659600019455 2023-01-24 04:47:29.217645: step: 52/463, loss: 0.06319565325975418 2023-01-24 04:47:29.837068: step: 54/463, loss: 0.06426774710416794 2023-01-24 04:47:30.379548: step: 56/463, loss: 0.008915627375245094 2023-01-24 04:47:30.991963: step: 58/463, loss: 0.006581749767065048 2023-01-24 04:47:31.675403: step: 60/463, loss: 0.0013842361513525248 2023-01-24 04:47:32.294344: step: 62/463, loss: 0.011555763892829418 2023-01-24 04:47:32.931718: step: 64/463, loss: 0.00033713303855620325 2023-01-24 04:47:33.566291: step: 66/463, loss: 0.6245585083961487 2023-01-24 04:47:34.159091: step: 68/463, loss: 0.00883561559021473 2023-01-24 04:47:34.732485: step: 70/463, loss: 0.0012230186257511377 2023-01-24 04:47:35.376651: step: 72/463, loss: 0.0014932213816791773 2023-01-24 04:47:35.996376: step: 74/463, loss: 0.004410460125654936 2023-01-24 04:47:36.579512: step: 76/463, loss: 6.290165401878767e-06 2023-01-24 04:47:37.266665: step: 78/463, loss: 0.0002952778886537999 2023-01-24 04:47:37.917980: step: 80/463, loss: 0.02845289371907711 2023-01-24 04:47:38.527428: step: 82/463, loss: 0.002594040473923087 2023-01-24 04:47:39.089593: step: 84/463, loss: 0.0006576331797987223 2023-01-24 04:47:39.699459: step: 86/463, loss: 0.0026975558139383793 2023-01-24 04:47:40.259875: step: 88/463, loss: 0.002611092058941722 2023-01-24 04:47:40.867647: step: 90/463, loss: 0.0002484899014234543 2023-01-24 04:47:41.535148: step: 92/463, loss: 0.0277217086404562 2023-01-24 04:47:42.072105: step: 94/463, loss: 0.00152283301576972 2023-01-24 04:47:42.694167: step: 96/463, loss: 0.03214195370674133 2023-01-24 04:47:43.259145: step: 98/463, loss: 0.006914425175637007 2023-01-24 04:47:43.861480: step: 100/463, loss: 0.012915435247123241 2023-01-24 04:47:44.522407: step: 102/463, loss: 0.0020883933175355196 2023-01-24 04:47:45.132949: step: 104/463, loss: 0.00017096732335630804 2023-01-24 04:47:45.769321: step: 106/463, loss: 0.0005624577170237899 2023-01-24 04:47:46.410069: step: 108/463, loss: 0.003950077574700117 2023-01-24 04:47:47.008514: step: 110/463, loss: 0.0038696820847690105 2023-01-24 04:47:47.572528: step: 112/463, loss: 0.09254588931798935 2023-01-24 04:47:48.223396: step: 114/463, loss: 0.003738517640158534 2023-01-24 04:47:48.770122: step: 116/463, loss: 0.018618999049067497 2023-01-24 04:47:49.371018: step: 118/463, loss: 0.00016887504898477346 2023-01-24 04:47:50.042104: step: 120/463, loss: 0.011418359354138374 2023-01-24 04:47:50.637296: step: 122/463, loss: 0.0008552664658054709 2023-01-24 04:47:51.272705: step: 124/463, loss: 0.004260038956999779 2023-01-24 04:47:51.845984: step: 126/463, loss: 0.0004883751971647143 2023-01-24 04:47:52.461511: step: 128/463, loss: 0.006286273244768381 2023-01-24 04:47:53.010114: step: 130/463, loss: 0.43046244978904724 2023-01-24 04:47:53.642388: step: 132/463, loss: 0.001455778838135302 2023-01-24 04:47:54.332443: step: 134/463, loss: 0.00635158084332943 2023-01-24 04:47:54.988774: step: 136/463, loss: 0.08300653845071793 2023-01-24 04:47:55.706044: step: 138/463, loss: 0.0044763521291315556 2023-01-24 04:47:56.307766: step: 140/463, loss: 0.002413678914308548 2023-01-24 04:47:56.884914: step: 142/463, loss: 0.12263081222772598 2023-01-24 04:47:57.485014: step: 144/463, loss: 0.007996129803359509 2023-01-24 04:47:58.140861: step: 146/463, loss: 0.3734673857688904 2023-01-24 04:47:58.673035: step: 148/463, loss: 0.1497054547071457 2023-01-24 04:47:59.225232: step: 150/463, loss: 0.016379481181502342 2023-01-24 04:47:59.800640: step: 152/463, loss: 0.011143434792757034 2023-01-24 04:48:00.375935: step: 154/463, loss: 0.0004206180165056139 2023-01-24 04:48:01.071219: step: 156/463, loss: 0.031053245067596436 2023-01-24 04:48:01.640962: step: 158/463, loss: 0.004198350012302399 2023-01-24 04:48:02.225692: step: 160/463, loss: 0.030927203595638275 2023-01-24 04:48:02.839488: step: 162/463, loss: 0.0027811983600258827 2023-01-24 04:48:03.466087: step: 164/463, loss: 0.0003592850116547197 2023-01-24 04:48:04.101031: step: 166/463, loss: 0.00947804469615221 2023-01-24 04:48:04.720993: step: 168/463, loss: 0.00030571690876968205 2023-01-24 04:48:05.380383: step: 170/463, loss: 0.11764440685510635 2023-01-24 04:48:05.903024: step: 172/463, loss: 0.009552194736897945 2023-01-24 04:48:06.552296: step: 174/463, loss: 0.008524104952812195 2023-01-24 04:48:07.110536: step: 176/463, loss: 0.0008647734648548067 2023-01-24 04:48:07.692221: step: 178/463, loss: 0.0031043209601193666 2023-01-24 04:48:08.276653: step: 180/463, loss: 0.006270288489758968 2023-01-24 04:48:08.959926: step: 182/463, loss: 0.002518759109079838 2023-01-24 04:48:09.553677: step: 184/463, loss: 0.0005668033845722675 2023-01-24 04:48:10.192193: step: 186/463, loss: 0.02828381024301052 2023-01-24 04:48:10.963969: step: 188/463, loss: 0.0041167642921209335 2023-01-24 04:48:11.587121: step: 190/463, loss: 0.00010263576405122876 2023-01-24 04:48:12.190731: step: 192/463, loss: 0.000519321474712342 2023-01-24 04:48:12.801523: step: 194/463, loss: 0.46234777569770813 2023-01-24 04:48:13.415743: step: 196/463, loss: 0.008984023705124855 2023-01-24 04:48:13.980209: step: 198/463, loss: 0.03553246334195137 2023-01-24 04:48:14.602573: step: 200/463, loss: 0.004233300685882568 2023-01-24 04:48:15.184089: step: 202/463, loss: 0.0008449263987131417 2023-01-24 04:48:15.824237: step: 204/463, loss: 0.06882200390100479 2023-01-24 04:48:16.375583: step: 206/463, loss: 0.00012937060091644526 2023-01-24 04:48:16.972580: step: 208/463, loss: 0.04999740794301033 2023-01-24 04:48:17.551421: step: 210/463, loss: 0.00308515434153378 2023-01-24 04:48:18.172832: step: 212/463, loss: 0.04466313123703003 2023-01-24 04:48:18.790002: step: 214/463, loss: 0.25284236669540405 2023-01-24 04:48:19.367184: step: 216/463, loss: 0.0021615626756101847 2023-01-24 04:48:20.074640: step: 218/463, loss: 0.005363750737160444 2023-01-24 04:48:20.769503: step: 220/463, loss: 0.1568789780139923 2023-01-24 04:48:21.440029: step: 222/463, loss: 0.006635377649217844 2023-01-24 04:48:21.993400: step: 224/463, loss: 0.0016466293018311262 2023-01-24 04:48:22.611441: step: 226/463, loss: 0.0015907459892332554 2023-01-24 04:48:23.268037: step: 228/463, loss: 0.002491228748112917 2023-01-24 04:48:23.904592: step: 230/463, loss: 0.0019399513257667422 2023-01-24 04:48:24.526217: step: 232/463, loss: 0.0002647745131980628 2023-01-24 04:48:25.125748: step: 234/463, loss: 0.0329493023455143 2023-01-24 04:48:25.811561: step: 236/463, loss: 0.04008388891816139 2023-01-24 04:48:26.392376: step: 238/463, loss: 0.00517432764172554 2023-01-24 04:48:26.954784: step: 240/463, loss: 0.0018173099961131811 2023-01-24 04:48:27.551414: step: 242/463, loss: 0.0060622477903962135 2023-01-24 04:48:28.180854: step: 244/463, loss: 0.000108723746961914 2023-01-24 04:48:28.771960: step: 246/463, loss: 0.014138683676719666 2023-01-24 04:48:29.390239: step: 248/463, loss: 0.03220289945602417 2023-01-24 04:48:29.990300: step: 250/463, loss: 0.0005985701573081315 2023-01-24 04:48:30.563495: step: 252/463, loss: 0.0008431488531641662 2023-01-24 04:48:31.260157: step: 254/463, loss: 0.0031692287884652615 2023-01-24 04:48:31.839036: step: 256/463, loss: 0.0014526075683534145 2023-01-24 04:48:32.532885: step: 258/463, loss: 0.00011805987014668062 2023-01-24 04:48:33.060581: step: 260/463, loss: 0.0007070398423820734 2023-01-24 04:48:33.624384: step: 262/463, loss: 5.11353173351381e-05 2023-01-24 04:48:34.253592: step: 264/463, loss: 0.002086434280499816 2023-01-24 04:48:34.909444: step: 266/463, loss: 0.018929775804281235 2023-01-24 04:48:35.528953: step: 268/463, loss: 0.002038001548498869 2023-01-24 04:48:36.128821: step: 270/463, loss: 0.003594369860365987 2023-01-24 04:48:36.784782: step: 272/463, loss: 0.0012561352923512459 2023-01-24 04:48:37.370572: step: 274/463, loss: 0.026628682389855385 2023-01-24 04:48:38.008657: step: 276/463, loss: 0.021509597077965736 2023-01-24 04:48:38.665895: step: 278/463, loss: 0.001450798474252224 2023-01-24 04:48:39.289308: step: 280/463, loss: 0.005348288919776678 2023-01-24 04:48:39.906001: step: 282/463, loss: 0.005450593773275614 2023-01-24 04:48:40.604405: step: 284/463, loss: 0.035292964428663254 2023-01-24 04:48:41.209049: step: 286/463, loss: 2.2482222448161338e-06 2023-01-24 04:48:41.794266: step: 288/463, loss: 0.012215464375913143 2023-01-24 04:48:42.394903: step: 290/463, loss: 8.917354716686532e-05 2023-01-24 04:48:42.963631: step: 292/463, loss: 0.003010797780007124 2023-01-24 04:48:43.541707: step: 294/463, loss: 0.011076126247644424 2023-01-24 04:48:44.147768: step: 296/463, loss: 0.0001839690812630579 2023-01-24 04:48:44.738111: step: 298/463, loss: 0.0001617338857613504 2023-01-24 04:48:45.318104: step: 300/463, loss: 0.0016445706132799387 2023-01-24 04:48:45.876149: step: 302/463, loss: 0.0030276840552687645 2023-01-24 04:48:46.491349: step: 304/463, loss: 0.005086181219667196 2023-01-24 04:48:47.143122: step: 306/463, loss: 0.02982032112777233 2023-01-24 04:48:47.786337: step: 308/463, loss: 0.10039351880550385 2023-01-24 04:48:48.461495: step: 310/463, loss: 0.0018271517474204302 2023-01-24 04:48:49.077223: step: 312/463, loss: 0.00021448054758366197 2023-01-24 04:48:49.780826: step: 314/463, loss: 0.01972249150276184 2023-01-24 04:48:50.349449: step: 316/463, loss: 0.00026626032195053995 2023-01-24 04:48:51.031267: step: 318/463, loss: 0.040258318185806274 2023-01-24 04:48:51.653356: step: 320/463, loss: 0.0057281991466879845 2023-01-24 04:48:52.287739: step: 322/463, loss: 0.014666832983493805 2023-01-24 04:48:52.852505: step: 324/463, loss: 0.013578989543020725 2023-01-24 04:48:53.442488: step: 326/463, loss: 0.000523874128703028 2023-01-24 04:48:54.131579: step: 328/463, loss: 0.0010622312547639012 2023-01-24 04:48:54.721940: step: 330/463, loss: 1.2540181160147768e-06 2023-01-24 04:48:55.330775: step: 332/463, loss: 0.0012956437421962619 2023-01-24 04:48:55.915239: step: 334/463, loss: 0.0032489588484168053 2023-01-24 04:48:56.596637: step: 336/463, loss: 0.003476150333881378 2023-01-24 04:48:57.160076: step: 338/463, loss: 0.0002736754540819675 2023-01-24 04:48:57.737764: step: 340/463, loss: 0.002102866303175688 2023-01-24 04:48:58.365315: step: 342/463, loss: 0.5588254928588867 2023-01-24 04:48:58.933629: step: 344/463, loss: 0.004840799607336521 2023-01-24 04:48:59.658914: step: 346/463, loss: 0.00010948073031613603 2023-01-24 04:49:00.294919: step: 348/463, loss: 0.0037592104636132717 2023-01-24 04:49:00.863108: step: 350/463, loss: 0.0006970184622332454 2023-01-24 04:49:01.444447: step: 352/463, loss: 0.0017699286108836532 2023-01-24 04:49:02.048900: step: 354/463, loss: 0.04139326140284538 2023-01-24 04:49:02.660808: step: 356/463, loss: 0.0009388115722686052 2023-01-24 04:49:03.269857: step: 358/463, loss: 0.001461970154196024 2023-01-24 04:49:03.930863: step: 360/463, loss: 0.007859587669372559 2023-01-24 04:49:04.559363: step: 362/463, loss: 0.00931469164788723 2023-01-24 04:49:05.161367: step: 364/463, loss: 0.000484753109049052 2023-01-24 04:49:05.797653: step: 366/463, loss: 0.011816229671239853 2023-01-24 04:49:06.363177: step: 368/463, loss: 0.0037920218892395496 2023-01-24 04:49:07.035572: step: 370/463, loss: 0.0001582533586770296 2023-01-24 04:49:07.666934: step: 372/463, loss: 0.0309405829757452 2023-01-24 04:49:08.267105: step: 374/463, loss: 0.15409953892230988 2023-01-24 04:49:08.908579: step: 376/463, loss: 0.022477364167571068 2023-01-24 04:49:09.511650: step: 378/463, loss: 3.470348383416422e-05 2023-01-24 04:49:10.095453: step: 380/463, loss: 1.8464037566445768e-05 2023-01-24 04:49:10.813660: step: 382/463, loss: 0.0025057110469788313 2023-01-24 04:49:11.386008: step: 384/463, loss: 0.0003260721277911216 2023-01-24 04:49:11.920911: step: 386/463, loss: 0.0013037144672125578 2023-01-24 04:49:12.507913: step: 388/463, loss: 0.006715648341923952 2023-01-24 04:49:13.110105: step: 390/463, loss: 0.015780387446284294 2023-01-24 04:49:13.693988: step: 392/463, loss: 0.00019978114869445562 2023-01-24 04:49:14.334475: step: 394/463, loss: 0.6672088503837585 2023-01-24 04:49:14.927762: step: 396/463, loss: 0.002492237603291869 2023-01-24 04:49:15.525048: step: 398/463, loss: 0.03764458745718002 2023-01-24 04:49:16.228910: step: 400/463, loss: 0.00914923008531332 2023-01-24 04:49:16.839159: step: 402/463, loss: 0.006509678903967142 2023-01-24 04:49:17.413249: step: 404/463, loss: 0.0030041737481951714 2023-01-24 04:49:18.023927: step: 406/463, loss: 0.00275383610278368 2023-01-24 04:49:18.643681: step: 408/463, loss: 0.0032367929816246033 2023-01-24 04:49:19.197278: step: 410/463, loss: 4.163006451562978e-05 2023-01-24 04:49:19.829133: step: 412/463, loss: 0.0028238550294190645 2023-01-24 04:49:20.444686: step: 414/463, loss: 0.016968419775366783 2023-01-24 04:49:21.049060: step: 416/463, loss: 0.07657045871019363 2023-01-24 04:49:21.593252: step: 418/463, loss: 7.713102968409657e-05 2023-01-24 04:49:22.195673: step: 420/463, loss: 0.00978376716375351 2023-01-24 04:49:22.765340: step: 422/463, loss: 0.0027286261320114136 2023-01-24 04:49:23.364477: step: 424/463, loss: 0.0009378529503010213 2023-01-24 04:49:23.973594: step: 426/463, loss: 0.00018820025434251875 2023-01-24 04:49:24.638163: step: 428/463, loss: 0.005454068537801504 2023-01-24 04:49:25.293257: step: 430/463, loss: 0.017453545704483986 2023-01-24 04:49:25.867048: step: 432/463, loss: 0.0009796028025448322 2023-01-24 04:49:26.440731: step: 434/463, loss: 0.008941635489463806 2023-01-24 04:49:27.091120: step: 436/463, loss: 0.03115403838455677 2023-01-24 04:49:27.744830: step: 438/463, loss: 0.013458590023219585 2023-01-24 04:49:28.368531: step: 440/463, loss: 0.0001702435256447643 2023-01-24 04:49:29.005869: step: 442/463, loss: 0.0003959675959777087 2023-01-24 04:49:29.618692: step: 444/463, loss: 0.006209314800798893 2023-01-24 04:49:30.152062: step: 446/463, loss: 0.0018842765130102634 2023-01-24 04:49:30.767020: step: 448/463, loss: 0.0028514459263533354 2023-01-24 04:49:31.376703: step: 450/463, loss: 0.00018538547737989575 2023-01-24 04:49:32.004455: step: 452/463, loss: 0.09835506975650787 2023-01-24 04:49:32.627493: step: 454/463, loss: 0.040322139859199524 2023-01-24 04:49:33.252394: step: 456/463, loss: 0.0021380505058914423 2023-01-24 04:49:33.875197: step: 458/463, loss: 0.0032994237262755632 2023-01-24 04:49:34.506581: step: 460/463, loss: 0.012302545830607414 2023-01-24 04:49:35.142358: step: 462/463, loss: 0.0030976543202996254 2023-01-24 04:49:35.677103: step: 464/463, loss: 0.0023362203501164913 2023-01-24 04:49:36.273145: step: 466/463, loss: 0.0015525113558396697 2023-01-24 04:49:36.913812: step: 468/463, loss: 0.001852512708865106 2023-01-24 04:49:37.590638: step: 470/463, loss: 0.013955218717455864 2023-01-24 04:49:38.279930: step: 472/463, loss: 0.017245154827833176 2023-01-24 04:49:38.952617: step: 474/463, loss: 2.5008670490933582e-05 2023-01-24 04:49:39.639380: step: 476/463, loss: 0.007278566248714924 2023-01-24 04:49:40.258547: step: 478/463, loss: 0.000951079826336354 2023-01-24 04:49:40.954614: step: 480/463, loss: 0.00031163645326159894 2023-01-24 04:49:41.544203: step: 482/463, loss: 0.010875837877392769 2023-01-24 04:49:42.112565: step: 484/463, loss: 0.0008127672481350601 2023-01-24 04:49:42.698380: step: 486/463, loss: 0.0005339454510249197 2023-01-24 04:49:43.298659: step: 488/463, loss: 0.0013327541528269649 2023-01-24 04:49:43.900087: step: 490/463, loss: 0.013201793655753136 2023-01-24 04:49:44.506483: step: 492/463, loss: 0.0029789437539875507 2023-01-24 04:49:45.074786: step: 494/463, loss: 0.011383281089365482 2023-01-24 04:49:45.652610: step: 496/463, loss: 0.0029643219895660877 2023-01-24 04:49:46.241580: step: 498/463, loss: 0.023574139922857285 2023-01-24 04:49:46.819612: step: 500/463, loss: 0.00040819059358909726 2023-01-24 04:49:47.442558: step: 502/463, loss: 0.00013156521890778095 2023-01-24 04:49:48.038517: step: 504/463, loss: 0.551557183265686 2023-01-24 04:49:48.647629: step: 506/463, loss: 0.0020468926522880793 2023-01-24 04:49:49.246711: step: 508/463, loss: 0.3316692113876343 2023-01-24 04:49:49.852413: step: 510/463, loss: 0.008236047811806202 2023-01-24 04:49:50.457181: step: 512/463, loss: 0.00558823999017477 2023-01-24 04:49:51.087111: step: 514/463, loss: 0.004384888801723719 2023-01-24 04:49:51.678514: step: 516/463, loss: 0.01471670065075159 2023-01-24 04:49:52.363278: step: 518/463, loss: 0.010182141326367855 2023-01-24 04:49:52.978766: step: 520/463, loss: 0.0035986772272735834 2023-01-24 04:49:53.576156: step: 522/463, loss: 0.005598139949142933 2023-01-24 04:49:54.156272: step: 524/463, loss: 0.000648642482701689 2023-01-24 04:49:54.747156: step: 526/463, loss: 0.01973499171435833 2023-01-24 04:49:55.357076: step: 528/463, loss: 0.005143820773810148 2023-01-24 04:49:55.948929: step: 530/463, loss: 0.00038234624662436545 2023-01-24 04:49:56.527608: step: 532/463, loss: 0.030666884034872055 2023-01-24 04:49:57.169725: step: 534/463, loss: 0.005132536869496107 2023-01-24 04:49:57.773259: step: 536/463, loss: 0.006605064030736685 2023-01-24 04:49:58.378115: step: 538/463, loss: 0.0005883269477635622 2023-01-24 04:49:58.942006: step: 540/463, loss: 0.0002428966254228726 2023-01-24 04:49:59.528772: step: 542/463, loss: 0.002347770147025585 2023-01-24 04:50:00.129476: step: 544/463, loss: 0.000972317939158529 2023-01-24 04:50:00.762434: step: 546/463, loss: 0.0012101618340238929 2023-01-24 04:50:01.349409: step: 548/463, loss: 0.00036239458131603897 2023-01-24 04:50:02.072661: step: 550/463, loss: 0.0024849427863955498 2023-01-24 04:50:02.717207: step: 552/463, loss: 0.08889742940664291 2023-01-24 04:50:03.345341: step: 554/463, loss: 0.012291314080357552 2023-01-24 04:50:03.955929: step: 556/463, loss: 0.003750688163563609 2023-01-24 04:50:04.535606: step: 558/463, loss: 0.08504805713891983 2023-01-24 04:50:05.156893: step: 560/463, loss: 0.01901933364570141 2023-01-24 04:50:05.804074: step: 562/463, loss: 0.0319950170814991 2023-01-24 04:50:06.416560: step: 564/463, loss: 0.0008004764677025378 2023-01-24 04:50:06.969250: step: 566/463, loss: 0.000420475349528715 2023-01-24 04:50:07.584464: step: 568/463, loss: 0.030587129294872284 2023-01-24 04:50:08.252157: step: 570/463, loss: 0.0009074404370039701 2023-01-24 04:50:08.829997: step: 572/463, loss: 0.00037804763996973634 2023-01-24 04:50:09.408437: step: 574/463, loss: 0.005028039216995239 2023-01-24 04:50:10.055333: step: 576/463, loss: 0.002985528204590082 2023-01-24 04:50:10.599890: step: 578/463, loss: 0.00027284491807222366 2023-01-24 04:50:11.248474: step: 580/463, loss: 0.02392571046948433 2023-01-24 04:50:11.861841: step: 582/463, loss: 0.0006588668911717832 2023-01-24 04:50:12.452547: step: 584/463, loss: 0.0015033355448395014 2023-01-24 04:50:13.032002: step: 586/463, loss: 0.001714364974759519 2023-01-24 04:50:13.594603: step: 588/463, loss: 0.001540832919999957 2023-01-24 04:50:14.157398: step: 590/463, loss: 0.0008473931229673326 2023-01-24 04:50:14.766259: step: 592/463, loss: 0.02150999940931797 2023-01-24 04:50:15.372775: step: 594/463, loss: 0.0005528069450519979 2023-01-24 04:50:15.971219: step: 596/463, loss: 1.9969973436673172e-05 2023-01-24 04:50:16.614947: step: 598/463, loss: 0.01979796588420868 2023-01-24 04:50:17.229091: step: 600/463, loss: 0.007987127639353275 2023-01-24 04:50:17.858756: step: 602/463, loss: 0.00027877395041286945 2023-01-24 04:50:18.469033: step: 604/463, loss: 0.01493868324905634 2023-01-24 04:50:19.095644: step: 606/463, loss: 0.0010983727406710386 2023-01-24 04:50:19.678098: step: 608/463, loss: 0.08684752136468887 2023-01-24 04:50:20.279432: step: 610/463, loss: 0.0024158363230526447 2023-01-24 04:50:20.934978: step: 612/463, loss: 0.022967644035816193 2023-01-24 04:50:21.532218: step: 614/463, loss: 0.026730766519904137 2023-01-24 04:50:22.129422: step: 616/463, loss: 0.0008778470801189542 2023-01-24 04:50:22.756771: step: 618/463, loss: 0.006617836654186249 2023-01-24 04:50:23.342681: step: 620/463, loss: 0.0012552656698971987 2023-01-24 04:50:23.959690: step: 622/463, loss: 0.0016567009733989835 2023-01-24 04:50:24.502406: step: 624/463, loss: 0.00783392135053873 2023-01-24 04:50:25.123585: step: 626/463, loss: 0.10046045482158661 2023-01-24 04:50:25.745772: step: 628/463, loss: 0.015135063789784908 2023-01-24 04:50:26.286573: step: 630/463, loss: 0.008125129155814648 2023-01-24 04:50:26.831882: step: 632/463, loss: 5.0117076170863584e-05 2023-01-24 04:50:27.502841: step: 634/463, loss: 0.003074069507420063 2023-01-24 04:50:28.123710: step: 636/463, loss: 0.0026245855260640383 2023-01-24 04:50:28.762604: step: 638/463, loss: 0.04754061996936798 2023-01-24 04:50:29.329252: step: 640/463, loss: 0.002656852826476097 2023-01-24 04:50:29.906452: step: 642/463, loss: 0.0008477208903059363 2023-01-24 04:50:30.560288: step: 644/463, loss: 0.0167354978621006 2023-01-24 04:50:31.168258: step: 646/463, loss: 0.003730935510247946 2023-01-24 04:50:31.733965: step: 648/463, loss: 0.011582210659980774 2023-01-24 04:50:32.396631: step: 650/463, loss: 0.0007358565926551819 2023-01-24 04:50:33.037278: step: 652/463, loss: 0.0030736280605196953 2023-01-24 04:50:33.598255: step: 654/463, loss: 0.01061483845114708 2023-01-24 04:50:34.183906: step: 656/463, loss: 0.09793661534786224 2023-01-24 04:50:34.773728: step: 658/463, loss: 0.0012629505945369601 2023-01-24 04:50:35.400617: step: 660/463, loss: 0.0011275908909738064 2023-01-24 04:50:36.015522: step: 662/463, loss: 0.004502336960285902 2023-01-24 04:50:36.635813: step: 664/463, loss: 0.0013200596440583467 2023-01-24 04:50:37.217122: step: 666/463, loss: 0.0008304209331981838 2023-01-24 04:50:37.820517: step: 668/463, loss: 0.009631169028580189 2023-01-24 04:50:38.454213: step: 670/463, loss: 0.028128191828727722 2023-01-24 04:50:39.060832: step: 672/463, loss: 0.0015040450962260365 2023-01-24 04:50:39.630336: step: 674/463, loss: 0.0417172834277153 2023-01-24 04:50:40.219056: step: 676/463, loss: 0.005432909354567528 2023-01-24 04:50:40.840804: step: 678/463, loss: 0.003239082172513008 2023-01-24 04:50:41.462875: step: 680/463, loss: 0.0013843229971826077 2023-01-24 04:50:42.071766: step: 682/463, loss: 0.028053050860762596 2023-01-24 04:50:42.606550: step: 684/463, loss: 3.207140980521217e-05 2023-01-24 04:50:43.354008: step: 686/463, loss: 0.006003293674439192 2023-01-24 04:50:43.943904: step: 688/463, loss: 0.0002574764075689018 2023-01-24 04:50:44.553450: step: 690/463, loss: 0.5024890899658203 2023-01-24 04:50:45.203271: step: 692/463, loss: 0.031492963433265686 2023-01-24 04:50:45.704746: step: 694/463, loss: 0.0004643872089218348 2023-01-24 04:50:46.259520: step: 696/463, loss: 0.009366431273519993 2023-01-24 04:50:46.815714: step: 698/463, loss: 0.0012687998823821545 2023-01-24 04:50:47.403340: step: 700/463, loss: 0.004887820687144995 2023-01-24 04:50:48.007464: step: 702/463, loss: 5.0249684136360884e-05 2023-01-24 04:50:48.566343: step: 704/463, loss: 0.07279365509748459 2023-01-24 04:50:49.179597: step: 706/463, loss: 0.00300635676831007 2023-01-24 04:50:49.739955: step: 708/463, loss: 0.0017109294421970844 2023-01-24 04:50:50.429213: step: 710/463, loss: 0.020548371598124504 2023-01-24 04:50:51.029782: step: 712/463, loss: 0.01779833994805813 2023-01-24 04:50:51.649472: step: 714/463, loss: 0.002069491660222411 2023-01-24 04:50:52.333741: step: 716/463, loss: 0.0058089084923267365 2023-01-24 04:50:52.937395: step: 718/463, loss: 0.0032654430251568556 2023-01-24 04:50:53.516122: step: 720/463, loss: 0.001063687726855278 2023-01-24 04:50:54.134404: step: 722/463, loss: 0.0035961433313786983 2023-01-24 04:50:54.694548: step: 724/463, loss: 0.000984791200608015 2023-01-24 04:50:55.343994: step: 726/463, loss: 0.0021394516807049513 2023-01-24 04:50:55.936471: step: 728/463, loss: 0.0663040280342102 2023-01-24 04:50:56.496153: step: 730/463, loss: 8.17628315417096e-05 2023-01-24 04:50:57.126715: step: 732/463, loss: 0.009121349081397057 2023-01-24 04:50:57.761643: step: 734/463, loss: 0.10119572281837463 2023-01-24 04:50:58.338840: step: 736/463, loss: 0.0002363017265452072 2023-01-24 04:50:58.912464: step: 738/463, loss: 0.05009730905294418 2023-01-24 04:50:59.476983: step: 740/463, loss: 0.01026653777807951 2023-01-24 04:51:00.077679: step: 742/463, loss: 0.005966320168226957 2023-01-24 04:51:00.690804: step: 744/463, loss: 0.0044987816363573074 2023-01-24 04:51:01.212996: step: 746/463, loss: 0.008317560888826847 2023-01-24 04:51:01.817160: step: 748/463, loss: 0.000586635316722095 2023-01-24 04:51:02.518511: step: 750/463, loss: 0.08782698214054108 2023-01-24 04:51:03.115810: step: 752/463, loss: 0.07010345160961151 2023-01-24 04:51:03.734996: step: 754/463, loss: 0.058717429637908936 2023-01-24 04:51:04.358234: step: 756/463, loss: 0.010663501918315887 2023-01-24 04:51:04.977037: step: 758/463, loss: 0.05304395407438278 2023-01-24 04:51:05.631951: step: 760/463, loss: 0.09776268899440765 2023-01-24 04:51:06.182717: step: 762/463, loss: 0.005428258329629898 2023-01-24 04:51:06.804512: step: 764/463, loss: 0.033863089978694916 2023-01-24 04:51:07.408254: step: 766/463, loss: 0.008410153910517693 2023-01-24 04:51:08.114590: step: 768/463, loss: 0.09233640134334564 2023-01-24 04:51:08.725708: step: 770/463, loss: 0.00014279631432145834 2023-01-24 04:51:09.315585: step: 772/463, loss: 0.013316868804395199 2023-01-24 04:51:09.927346: step: 774/463, loss: 0.004622491076588631 2023-01-24 04:51:10.524619: step: 776/463, loss: 0.00931533332914114 2023-01-24 04:51:11.228416: step: 778/463, loss: 0.0290085282176733 2023-01-24 04:51:11.891188: step: 780/463, loss: 0.19074031710624695 2023-01-24 04:51:12.490295: step: 782/463, loss: 0.002779945731163025 2023-01-24 04:51:13.080042: step: 784/463, loss: 0.002603244734928012 2023-01-24 04:51:13.671695: step: 786/463, loss: 0.00027012574719265103 2023-01-24 04:51:14.249333: step: 788/463, loss: 0.0022005753125995398 2023-01-24 04:51:14.854034: step: 790/463, loss: 0.0023240766022354364 2023-01-24 04:51:15.406330: step: 792/463, loss: 0.01554103009402752 2023-01-24 04:51:15.999358: step: 794/463, loss: 0.07578250020742416 2023-01-24 04:51:16.574466: step: 796/463, loss: 0.000614823482464999 2023-01-24 04:51:17.164042: step: 798/463, loss: 0.014563743025064468 2023-01-24 04:51:17.830080: step: 800/463, loss: 0.001362152281217277 2023-01-24 04:51:18.467573: step: 802/463, loss: 0.03800148516893387 2023-01-24 04:51:19.058347: step: 804/463, loss: 0.00412390660494566 2023-01-24 04:51:19.713726: step: 806/463, loss: 0.03198016807436943 2023-01-24 04:51:20.345757: step: 808/463, loss: 0.03758576884865761 2023-01-24 04:51:20.990711: step: 810/463, loss: 0.02593049965798855 2023-01-24 04:51:21.773880: step: 812/463, loss: 0.0037283925339579582 2023-01-24 04:51:22.364419: step: 814/463, loss: 0.06398437172174454 2023-01-24 04:51:23.025196: step: 816/463, loss: 0.042970314621925354 2023-01-24 04:51:23.632886: step: 818/463, loss: 0.004306245129555464 2023-01-24 04:51:24.194004: step: 820/463, loss: 0.02571534737944603 2023-01-24 04:51:24.765922: step: 822/463, loss: 0.00019989970314782113 2023-01-24 04:51:25.348421: step: 824/463, loss: 0.06832410395145416 2023-01-24 04:51:25.929411: step: 826/463, loss: 0.004086012486368418 2023-01-24 04:51:26.504267: step: 828/463, loss: 0.014487771317362785 2023-01-24 04:51:27.301506: step: 830/463, loss: 0.005764051340520382 2023-01-24 04:51:27.926617: step: 832/463, loss: 5.8278452343074605e-05 2023-01-24 04:51:28.499098: step: 834/463, loss: 0.029955696314573288 2023-01-24 04:51:29.127960: step: 836/463, loss: 0.009231744334101677 2023-01-24 04:51:29.760031: step: 838/463, loss: 0.013798731379210949 2023-01-24 04:51:30.336438: step: 840/463, loss: 0.16709890961647034 2023-01-24 04:51:30.946279: step: 842/463, loss: 0.005619935691356659 2023-01-24 04:51:31.616318: step: 844/463, loss: 0.0003501700994092971 2023-01-24 04:51:32.322915: step: 846/463, loss: 0.0010360708693042397 2023-01-24 04:51:32.991200: step: 848/463, loss: 0.010365192778408527 2023-01-24 04:51:33.609681: step: 850/463, loss: 0.017780913040041924 2023-01-24 04:51:34.253533: step: 852/463, loss: 0.0027114583645015955 2023-01-24 04:51:34.883085: step: 854/463, loss: 0.02568534016609192 2023-01-24 04:51:35.458007: step: 856/463, loss: 0.0015489222714677453 2023-01-24 04:51:36.045115: step: 858/463, loss: 0.012909810058772564 2023-01-24 04:51:36.593492: step: 860/463, loss: 0.005378380883485079 2023-01-24 04:51:37.199690: step: 862/463, loss: 0.000740766292437911 2023-01-24 04:51:37.849352: step: 864/463, loss: 0.0013974225148558617 2023-01-24 04:51:38.462825: step: 866/463, loss: 0.04189683869481087 2023-01-24 04:51:39.028689: step: 868/463, loss: 0.10087142884731293 2023-01-24 04:51:39.651579: step: 870/463, loss: 0.048038266599178314 2023-01-24 04:51:40.228098: step: 872/463, loss: 0.00904986634850502 2023-01-24 04:51:40.850614: step: 874/463, loss: 0.0081681739538908 2023-01-24 04:51:41.390642: step: 876/463, loss: 0.001242789556272328 2023-01-24 04:51:41.963202: step: 878/463, loss: 0.002331588650122285 2023-01-24 04:51:42.692197: step: 880/463, loss: 0.028196750208735466 2023-01-24 04:51:43.334726: step: 882/463, loss: 0.030575983226299286 2023-01-24 04:51:43.922917: step: 884/463, loss: 0.0054059275425970554 2023-01-24 04:51:44.489515: step: 886/463, loss: 0.0030291141010820866 2023-01-24 04:51:45.124941: step: 888/463, loss: 0.16524618864059448 2023-01-24 04:51:45.748507: step: 890/463, loss: 0.005934928078204393 2023-01-24 04:51:46.343965: step: 892/463, loss: 0.026059653609991074 2023-01-24 04:51:46.961258: step: 894/463, loss: 0.23381909728050232 2023-01-24 04:51:47.562747: step: 896/463, loss: 0.0011872815666720271 2023-01-24 04:51:48.142576: step: 898/463, loss: 0.00025052239652723074 2023-01-24 04:51:48.739426: step: 900/463, loss: 0.00011480140528874472 2023-01-24 04:51:49.290707: step: 902/463, loss: 0.015487512573599815 2023-01-24 04:51:49.860884: step: 904/463, loss: 0.01088606845587492 2023-01-24 04:51:50.435457: step: 906/463, loss: 0.0017687305808067322 2023-01-24 04:51:51.065712: step: 908/463, loss: 0.0036358400247991085 2023-01-24 04:51:51.619315: step: 910/463, loss: 0.009502926841378212 2023-01-24 04:51:52.225601: step: 912/463, loss: 0.0005604664911516011 2023-01-24 04:51:52.914225: step: 914/463, loss: 0.03314416855573654 2023-01-24 04:51:53.513238: step: 916/463, loss: 0.005248896777629852 2023-01-24 04:51:54.164490: step: 918/463, loss: 0.01236130204051733 2023-01-24 04:51:54.855370: step: 920/463, loss: 0.0001980936503969133 2023-01-24 04:51:55.509970: step: 922/463, loss: 0.008808234706521034 2023-01-24 04:51:56.105249: step: 924/463, loss: 0.0009770625038072467 2023-01-24 04:51:56.760823: step: 926/463, loss: 0.006946452893316746 ================================================== Loss: 0.026 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36738507953194616, 'r': 0.3408943147839121, 'f1': 0.35364429899827093}, 'combined': 0.2605800097881996, 'epoch': 39} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.36129186827299176, 'r': 0.3855329586993965, 'f1': 0.3730189942774063}, 'combined': 0.289134340061913, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3376835054051491, 'r': 0.3357612084104329, 'f1': 0.33671961338210876}, 'combined': 0.2481091888078696, 'epoch': 39} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34494178635303097, 'r': 0.38742542548106973, 'f1': 0.3649513964704796}, 'combined': 0.2828809867378837, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3280272337751645, 'r': 0.3255374634998312, 'f1': 0.32677760621792573}, 'combined': 0.24078349931847157, 'epoch': 39} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34600404127901513, 'r': 0.3784419201489228, 'f1': 0.3614967595452397}, 'combined': 0.28020322988674085, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30492424242424243, 'r': 0.2875, 'f1': 0.2959558823529412}, 'combined': 0.19730392156862744, 'epoch': 39} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32, 'r': 0.34782608695652173, 'f1': 0.3333333333333333}, 'combined': 0.16666666666666666, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5486842105263158, 'r': 0.18920145190562612, 'f1': 0.2813765182186235}, 'combined': 0.18758434547908232, 'epoch': 39} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35531381856540084, 'r': 0.3195801707779886, 'f1': 0.336500999000999}, 'combined': 0.24794810452705188, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35077406976066583, 'r': 0.38269193088778525, 'f1': 0.3660385237854157}, 'combined': 0.2837236404461117, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4230769230769231, 'r': 0.3142857142857143, 'f1': 0.360655737704918}, 'combined': 0.24043715846994532, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3556843389529725, 'r': 0.32598773380319873, 'f1': 0.340189179632249}, 'combined': 0.25066571130797294, 'epoch': 19} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35235259195938173, 'r': 0.3698406801632481, 'f1': 0.3608848968767838}, 'combined': 0.2797289631293731, 'epoch': 19} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.41304347826086957, 'r': 0.41304347826086957, 'f1': 0.41304347826086957}, 'combined': 0.20652173913043478, 'epoch': 19} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.350815113857877, 'r': 0.329513247361763, 'f1': 0.33983068759226837}, 'combined': 0.25040155927851354, 'epoch': 38} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3323554133442105, 'r': 0.35587689020772534, 'f1': 0.34371420909543293}, 'combined': 0.2664196261888045, 'epoch': 38} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6486842105263158, 'r': 0.22368421052631576, 'f1': 0.3326585695006748}, 'combined': 0.22177237966711652, 'epoch': 38}