Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.imprisoned-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.imprisoned-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.imprisoned-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.imprisoned-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1127]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 613743345, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:02:47.469375: step: 2/463, loss: 5.602298259735107 2023-01-22 09:02:48.030025: step: 4/463, loss: 8.179482460021973 2023-01-22 09:02:48.711569: step: 6/463, loss: 22.52622413635254 2023-01-22 09:02:49.346538: step: 8/463, loss: 17.946720123291016 2023-01-22 09:02:49.957619: step: 10/463, loss: 13.71105670928955 2023-01-22 09:02:50.690667: step: 12/463, loss: 14.199092864990234 2023-01-22 09:02:51.336301: step: 14/463, loss: 13.36213493347168 2023-01-22 09:02:51.978756: step: 16/463, loss: 7.744307041168213 2023-01-22 09:02:52.652412: step: 18/463, loss: 6.063961982727051 2023-01-22 09:02:53.245064: step: 20/463, loss: 13.85445785522461 2023-01-22 09:02:53.829439: step: 22/463, loss: 5.830394268035889 2023-01-22 09:02:54.458325: step: 24/463, loss: 10.697946548461914 2023-01-22 09:02:55.037436: step: 26/463, loss: 17.892301559448242 2023-01-22 09:02:55.658989: step: 28/463, loss: 21.350595474243164 2023-01-22 09:02:56.176333: step: 30/463, loss: 7.015264987945557 2023-01-22 09:02:56.832336: step: 32/463, loss: 11.877845764160156 2023-01-22 09:02:57.493972: step: 34/463, loss: 11.74675464630127 2023-01-22 09:02:58.095967: step: 36/463, loss: 12.148698806762695 2023-01-22 09:02:58.738089: step: 38/463, loss: 18.386337280273438 2023-01-22 09:02:59.319037: step: 40/463, loss: 9.049979209899902 2023-01-22 09:02:59.939595: step: 42/463, loss: 10.876665115356445 2023-01-22 09:03:00.523540: step: 44/463, loss: 18.010337829589844 2023-01-22 09:03:01.108609: step: 46/463, loss: 15.44178295135498 2023-01-22 09:03:01.759074: step: 48/463, loss: 12.177515983581543 2023-01-22 09:03:02.412821: step: 50/463, loss: 15.834332466125488 2023-01-22 09:03:03.048046: step: 52/463, loss: 26.31072998046875 2023-01-22 09:03:03.717438: step: 54/463, loss: 18.291120529174805 2023-01-22 09:03:04.334807: step: 56/463, loss: 6.603659629821777 2023-01-22 09:03:04.897959: step: 58/463, loss: 20.18153190612793 2023-01-22 09:03:05.503124: step: 60/463, loss: 17.090713500976562 2023-01-22 09:03:06.100487: step: 62/463, loss: 11.084619522094727 2023-01-22 09:03:06.808296: step: 64/463, loss: 12.460599899291992 2023-01-22 09:03:07.415586: step: 66/463, loss: 14.595062255859375 2023-01-22 09:03:08.052468: step: 68/463, loss: 8.258983612060547 2023-01-22 09:03:08.651919: step: 70/463, loss: 24.9277286529541 2023-01-22 09:03:09.257075: step: 72/463, loss: 13.338602066040039 2023-01-22 09:03:09.863059: step: 74/463, loss: 18.1767635345459 2023-01-22 09:03:10.462555: step: 76/463, loss: 14.119132995605469 2023-01-22 09:03:11.020982: step: 78/463, loss: 21.10388946533203 2023-01-22 09:03:11.611639: step: 80/463, loss: 10.985973358154297 2023-01-22 09:03:12.269641: step: 82/463, loss: 16.187604904174805 2023-01-22 09:03:12.908222: step: 84/463, loss: 14.409175872802734 2023-01-22 09:03:13.533998: step: 86/463, loss: 14.989502906799316 2023-01-22 09:03:14.169695: step: 88/463, loss: 16.827714920043945 2023-01-22 09:03:14.807853: step: 90/463, loss: 6.5476298332214355 2023-01-22 09:03:15.389478: step: 92/463, loss: 19.470623016357422 2023-01-22 09:03:16.003861: step: 94/463, loss: 7.303631782531738 2023-01-22 09:03:16.634049: step: 96/463, loss: 12.505188941955566 2023-01-22 09:03:17.211966: step: 98/463, loss: 13.444581031799316 2023-01-22 09:03:17.842117: step: 100/463, loss: 7.426078796386719 2023-01-22 09:03:18.440803: step: 102/463, loss: 9.059783935546875 2023-01-22 09:03:19.041686: step: 104/463, loss: 14.105218887329102 2023-01-22 09:03:19.672800: step: 106/463, loss: 13.249429702758789 2023-01-22 09:03:20.329055: step: 108/463, loss: 15.53437614440918 2023-01-22 09:03:20.946246: step: 110/463, loss: 7.691650390625 2023-01-22 09:03:21.486200: step: 112/463, loss: 23.485111236572266 2023-01-22 09:03:22.067265: step: 114/463, loss: 4.0248260498046875 2023-01-22 09:03:22.670295: step: 116/463, loss: 6.678998947143555 2023-01-22 09:03:23.292940: step: 118/463, loss: 9.573973655700684 2023-01-22 09:03:23.935772: step: 120/463, loss: 16.726200103759766 2023-01-22 09:03:24.556886: step: 122/463, loss: 10.041594505310059 2023-01-22 09:03:25.189933: step: 124/463, loss: 12.505385398864746 2023-01-22 09:03:25.812558: step: 126/463, loss: 11.793069839477539 2023-01-22 09:03:26.398118: step: 128/463, loss: 6.807032585144043 2023-01-22 09:03:26.988472: step: 130/463, loss: 17.378379821777344 2023-01-22 09:03:27.620789: step: 132/463, loss: 15.738119125366211 2023-01-22 09:03:28.202017: step: 134/463, loss: 13.53643798828125 2023-01-22 09:03:28.800370: step: 136/463, loss: 6.586299896240234 2023-01-22 09:03:29.407549: step: 138/463, loss: 5.258662223815918 2023-01-22 09:03:30.208083: step: 140/463, loss: 4.619546413421631 2023-01-22 09:03:30.822581: step: 142/463, loss: 15.715588569641113 2023-01-22 09:03:31.459942: step: 144/463, loss: 4.4310431480407715 2023-01-22 09:03:32.154842: step: 146/463, loss: 23.154541015625 2023-01-22 09:03:32.842893: step: 148/463, loss: 9.951217651367188 2023-01-22 09:03:33.522507: step: 150/463, loss: 8.590143203735352 2023-01-22 09:03:34.127875: step: 152/463, loss: 9.71445369720459 2023-01-22 09:03:34.749936: step: 154/463, loss: 8.005531311035156 2023-01-22 09:03:35.370509: step: 156/463, loss: 13.424943923950195 2023-01-22 09:03:35.950908: step: 158/463, loss: 7.106303691864014 2023-01-22 09:03:36.584135: step: 160/463, loss: 8.364706993103027 2023-01-22 09:03:37.211253: step: 162/463, loss: 8.953907012939453 2023-01-22 09:03:37.787208: step: 164/463, loss: 3.3302693367004395 2023-01-22 09:03:38.441113: step: 166/463, loss: 16.106121063232422 2023-01-22 09:03:39.078453: step: 168/463, loss: 5.725656986236572 2023-01-22 09:03:39.732152: step: 170/463, loss: 6.649268627166748 2023-01-22 09:03:40.319391: step: 172/463, loss: 9.520486831665039 2023-01-22 09:03:40.887617: step: 174/463, loss: 8.203156471252441 2023-01-22 09:03:41.507877: step: 176/463, loss: 11.545543670654297 2023-01-22 09:03:42.184982: step: 178/463, loss: 4.302837371826172 2023-01-22 09:03:42.797866: step: 180/463, loss: 4.450910568237305 2023-01-22 09:03:43.324247: step: 182/463, loss: 6.10361909866333 2023-01-22 09:03:43.890569: step: 184/463, loss: 8.916027069091797 2023-01-22 09:03:44.516985: step: 186/463, loss: 8.522967338562012 2023-01-22 09:03:45.174066: step: 188/463, loss: 8.79146671295166 2023-01-22 09:03:45.778785: step: 190/463, loss: 8.949901580810547 2023-01-22 09:03:46.347946: step: 192/463, loss: 8.222185134887695 2023-01-22 09:03:46.922420: step: 194/463, loss: 4.042442321777344 2023-01-22 09:03:47.517737: step: 196/463, loss: 17.776769638061523 2023-01-22 09:03:48.140940: step: 198/463, loss: 5.129105091094971 2023-01-22 09:03:48.757336: step: 200/463, loss: 8.442646980285645 2023-01-22 09:03:49.331323: step: 202/463, loss: 7.674983501434326 2023-01-22 09:03:49.927919: step: 204/463, loss: 15.125864028930664 2023-01-22 09:03:50.574531: step: 206/463, loss: 8.169317245483398 2023-01-22 09:03:51.225344: step: 208/463, loss: 10.881017684936523 2023-01-22 09:03:51.841053: step: 210/463, loss: 8.816551208496094 2023-01-22 09:03:52.417348: step: 212/463, loss: 10.203678131103516 2023-01-22 09:03:52.918330: step: 214/463, loss: 7.142978191375732 2023-01-22 09:03:53.500137: step: 216/463, loss: 5.470901966094971 2023-01-22 09:03:54.109689: step: 218/463, loss: 5.4315619468688965 2023-01-22 09:03:54.706162: step: 220/463, loss: 12.305489540100098 2023-01-22 09:03:55.288514: step: 222/463, loss: 9.573036193847656 2023-01-22 09:03:55.841123: step: 224/463, loss: 4.671309947967529 2023-01-22 09:03:56.488732: step: 226/463, loss: 14.168991088867188 2023-01-22 09:03:57.052129: step: 228/463, loss: 3.745619535446167 2023-01-22 09:03:57.658075: step: 230/463, loss: 12.521639823913574 2023-01-22 09:03:58.256714: step: 232/463, loss: 5.382946968078613 2023-01-22 09:03:58.841044: step: 234/463, loss: 2.7595434188842773 2023-01-22 09:03:59.437852: step: 236/463, loss: 11.03259563446045 2023-01-22 09:03:59.979762: step: 238/463, loss: 5.2375407218933105 2023-01-22 09:04:00.594279: step: 240/463, loss: 6.666964530944824 2023-01-22 09:04:01.192626: step: 242/463, loss: 8.853500366210938 2023-01-22 09:04:01.822565: step: 244/463, loss: 11.538705825805664 2023-01-22 09:04:02.514341: step: 246/463, loss: 4.874959945678711 2023-01-22 09:04:03.137755: step: 248/463, loss: 2.589587688446045 2023-01-22 09:04:03.742369: step: 250/463, loss: 5.342429161071777 2023-01-22 09:04:04.440964: step: 252/463, loss: 14.176994323730469 2023-01-22 09:04:05.077097: step: 254/463, loss: 6.511935234069824 2023-01-22 09:04:05.723034: step: 256/463, loss: 7.1047844886779785 2023-01-22 09:04:06.347297: step: 258/463, loss: 6.963000774383545 2023-01-22 09:04:06.958352: step: 260/463, loss: 6.797969341278076 2023-01-22 09:04:07.519887: step: 262/463, loss: 9.047029495239258 2023-01-22 09:04:08.042484: step: 264/463, loss: 4.434392929077148 2023-01-22 09:04:08.605692: step: 266/463, loss: 13.79139232635498 2023-01-22 09:04:09.240583: step: 268/463, loss: 8.185296058654785 2023-01-22 09:04:09.888207: step: 270/463, loss: 6.778559684753418 2023-01-22 09:04:10.496238: step: 272/463, loss: 11.998456954956055 2023-01-22 09:04:11.085189: step: 274/463, loss: 8.637171745300293 2023-01-22 09:04:11.660752: step: 276/463, loss: 2.9375 2023-01-22 09:04:12.306279: step: 278/463, loss: 8.361226081848145 2023-01-22 09:04:12.919129: step: 280/463, loss: 2.1594314575195312 2023-01-22 09:04:13.469167: step: 282/463, loss: 4.514021396636963 2023-01-22 09:04:14.030575: step: 284/463, loss: 18.290882110595703 2023-01-22 09:04:14.598430: step: 286/463, loss: 6.228346824645996 2023-01-22 09:04:15.233688: step: 288/463, loss: 6.313811302185059 2023-01-22 09:04:15.873136: step: 290/463, loss: 9.055527687072754 2023-01-22 09:04:16.489327: step: 292/463, loss: 21.097455978393555 2023-01-22 09:04:17.095833: step: 294/463, loss: 9.765172004699707 2023-01-22 09:04:17.660101: step: 296/463, loss: 9.060818672180176 2023-01-22 09:04:18.298467: step: 298/463, loss: 6.205001354217529 2023-01-22 09:04:18.932741: step: 300/463, loss: 14.02237319946289 2023-01-22 09:04:19.471469: step: 302/463, loss: 6.1169891357421875 2023-01-22 09:04:20.043615: step: 304/463, loss: 5.814948081970215 2023-01-22 09:04:20.733179: step: 306/463, loss: 4.788305282592773 2023-01-22 09:04:21.341869: step: 308/463, loss: 7.753364562988281 2023-01-22 09:04:21.899037: step: 310/463, loss: 3.05171275138855 2023-01-22 09:04:22.514046: step: 312/463, loss: 6.9203267097473145 2023-01-22 09:04:23.121929: step: 314/463, loss: 6.376954078674316 2023-01-22 09:04:23.748716: step: 316/463, loss: 16.378690719604492 2023-01-22 09:04:24.301588: step: 318/463, loss: 12.88469123840332 2023-01-22 09:04:24.902007: step: 320/463, loss: 2.2646515369415283 2023-01-22 09:04:25.544979: step: 322/463, loss: 9.245916366577148 2023-01-22 09:04:26.140202: step: 324/463, loss: 3.8333253860473633 2023-01-22 09:04:26.761821: step: 326/463, loss: 14.657800674438477 2023-01-22 09:04:27.384393: step: 328/463, loss: 5.927628040313721 2023-01-22 09:04:27.979124: step: 330/463, loss: 4.4432477951049805 2023-01-22 09:04:28.511904: step: 332/463, loss: 6.40554141998291 2023-01-22 09:04:29.139292: step: 334/463, loss: 4.330059051513672 2023-01-22 09:04:29.727703: step: 336/463, loss: 10.429497718811035 2023-01-22 09:04:30.361109: step: 338/463, loss: 10.983132362365723 2023-01-22 09:04:30.989579: step: 340/463, loss: 7.257326602935791 2023-01-22 09:04:31.629557: step: 342/463, loss: 7.895818710327148 2023-01-22 09:04:32.234476: step: 344/463, loss: 10.706269264221191 2023-01-22 09:04:32.867895: step: 346/463, loss: 5.919206619262695 2023-01-22 09:04:33.471401: step: 348/463, loss: 5.22928524017334 2023-01-22 09:04:34.049308: step: 350/463, loss: 2.5848395824432373 2023-01-22 09:04:34.749619: step: 352/463, loss: 10.033931732177734 2023-01-22 09:04:35.451593: step: 354/463, loss: 4.048956394195557 2023-01-22 09:04:36.016165: step: 356/463, loss: 4.320642471313477 2023-01-22 09:04:36.658213: step: 358/463, loss: 8.659322738647461 2023-01-22 09:04:37.254444: step: 360/463, loss: 2.1166248321533203 2023-01-22 09:04:37.873134: step: 362/463, loss: 5.4784088134765625 2023-01-22 09:04:38.517796: step: 364/463, loss: 7.528636455535889 2023-01-22 09:04:39.104698: step: 366/463, loss: 2.7859270572662354 2023-01-22 09:04:39.745696: step: 368/463, loss: 4.643281936645508 2023-01-22 09:04:40.376058: step: 370/463, loss: 3.4890964031219482 2023-01-22 09:04:41.256177: step: 372/463, loss: 7.304950714111328 2023-01-22 09:04:41.955631: step: 374/463, loss: 5.0774688720703125 2023-01-22 09:04:42.577666: step: 376/463, loss: 1.6356875896453857 2023-01-22 09:04:43.259609: step: 378/463, loss: 4.603309154510498 2023-01-22 09:04:43.912869: step: 380/463, loss: 1.0375280380249023 2023-01-22 09:04:44.516640: step: 382/463, loss: 9.787210464477539 2023-01-22 09:04:45.168324: step: 384/463, loss: 2.499837636947632 2023-01-22 09:04:45.861414: step: 386/463, loss: 4.796648979187012 2023-01-22 09:04:46.528195: step: 388/463, loss: 1.0846575498580933 2023-01-22 09:04:47.200935: step: 390/463, loss: 9.04771900177002 2023-01-22 09:04:47.820564: step: 392/463, loss: 5.604578018188477 2023-01-22 09:04:48.411293: step: 394/463, loss: 5.01338005065918 2023-01-22 09:04:49.017208: step: 396/463, loss: 4.743184566497803 2023-01-22 09:04:49.656870: step: 398/463, loss: 3.104649543762207 2023-01-22 09:04:50.251008: step: 400/463, loss: 3.990280866622925 2023-01-22 09:04:51.020161: step: 402/463, loss: 1.2699460983276367 2023-01-22 09:04:51.580777: step: 404/463, loss: 1.7220298051834106 2023-01-22 09:04:52.155906: step: 406/463, loss: 2.5336601734161377 2023-01-22 09:04:52.770717: step: 408/463, loss: 2.748413324356079 2023-01-22 09:04:53.301673: step: 410/463, loss: 0.7016493082046509 2023-01-22 09:04:53.866822: step: 412/463, loss: 1.9688310623168945 2023-01-22 09:04:54.496229: step: 414/463, loss: 2.25248122215271 2023-01-22 09:04:55.110857: step: 416/463, loss: 1.93528151512146 2023-01-22 09:04:55.720931: step: 418/463, loss: 8.215465545654297 2023-01-22 09:04:56.315466: step: 420/463, loss: 1.888968586921692 2023-01-22 09:04:57.029021: step: 422/463, loss: 2.4234135150909424 2023-01-22 09:04:57.658918: step: 424/463, loss: 6.287452697753906 2023-01-22 09:04:58.308609: step: 426/463, loss: 2.3424036502838135 2023-01-22 09:04:58.858400: step: 428/463, loss: 1.1790132522583008 2023-01-22 09:04:59.442772: step: 430/463, loss: 1.5993249416351318 2023-01-22 09:05:00.017987: step: 432/463, loss: 3.9855756759643555 2023-01-22 09:05:00.651408: step: 434/463, loss: 2.207996129989624 2023-01-22 09:05:01.334045: step: 436/463, loss: 2.2234787940979004 2023-01-22 09:05:02.084700: step: 438/463, loss: 5.171801567077637 2023-01-22 09:05:02.709131: step: 440/463, loss: 2.545515537261963 2023-01-22 09:05:03.421206: step: 442/463, loss: 1.1477330923080444 2023-01-22 09:05:04.031652: step: 444/463, loss: 1.6340665817260742 2023-01-22 09:05:04.632123: step: 446/463, loss: 9.209417343139648 2023-01-22 09:05:05.287083: step: 448/463, loss: 3.4987401962280273 2023-01-22 09:05:05.917135: step: 450/463, loss: 1.4805747270584106 2023-01-22 09:05:06.539886: step: 452/463, loss: 3.5288729667663574 2023-01-22 09:05:07.178609: step: 454/463, loss: 1.6774455308914185 2023-01-22 09:05:07.722646: step: 456/463, loss: 2.0962767601013184 2023-01-22 09:05:08.368831: step: 458/463, loss: 2.5396194458007812 2023-01-22 09:05:09.012857: step: 460/463, loss: 2.4079654216766357 2023-01-22 09:05:09.706531: step: 462/463, loss: 6.29324197769165 2023-01-22 09:05:10.352839: step: 464/463, loss: 0.45623210072517395 2023-01-22 09:05:10.981343: step: 466/463, loss: 1.614783763885498 2023-01-22 09:05:11.610316: step: 468/463, loss: 5.5748395919799805 2023-01-22 09:05:12.150934: step: 470/463, loss: 1.3125004768371582 2023-01-22 09:05:12.807205: step: 472/463, loss: 5.226147651672363 2023-01-22 09:05:13.486974: step: 474/463, loss: 0.5349715948104858 2023-01-22 09:05:14.096712: step: 476/463, loss: 1.50754976272583 2023-01-22 09:05:14.665797: step: 478/463, loss: 0.5347693562507629 2023-01-22 09:05:15.236861: step: 480/463, loss: 0.5113075971603394 2023-01-22 09:05:15.899363: step: 482/463, loss: 9.503129959106445 2023-01-22 09:05:16.572183: step: 484/463, loss: 2.62331485748291 2023-01-22 09:05:17.176715: step: 486/463, loss: 3.452773094177246 2023-01-22 09:05:17.770186: step: 488/463, loss: 2.027191162109375 2023-01-22 09:05:18.352036: step: 490/463, loss: 1.7128016948699951 2023-01-22 09:05:18.937588: step: 492/463, loss: 4.352555274963379 2023-01-22 09:05:19.516750: step: 494/463, loss: 1.2832002639770508 2023-01-22 09:05:20.142910: step: 496/463, loss: 9.903707504272461 2023-01-22 09:05:20.795268: step: 498/463, loss: 5.700617790222168 2023-01-22 09:05:21.438841: step: 500/463, loss: 1.1571046113967896 2023-01-22 09:05:22.095345: step: 502/463, loss: 0.5186383128166199 2023-01-22 09:05:22.712149: step: 504/463, loss: 0.4617786407470703 2023-01-22 09:05:23.346698: step: 506/463, loss: 1.0404834747314453 2023-01-22 09:05:24.024287: step: 508/463, loss: 2.2752439975738525 2023-01-22 09:05:24.692289: step: 510/463, loss: 1.2502347230911255 2023-01-22 09:05:25.384166: step: 512/463, loss: 2.931732177734375 2023-01-22 09:05:26.041272: step: 514/463, loss: 4.332589626312256 2023-01-22 09:05:26.665638: step: 516/463, loss: 9.145713806152344 2023-01-22 09:05:27.260199: step: 518/463, loss: 1.687113642692566 2023-01-22 09:05:27.901444: step: 520/463, loss: 4.266299247741699 2023-01-22 09:05:28.540897: step: 522/463, loss: 1.3389843702316284 2023-01-22 09:05:29.206779: step: 524/463, loss: 4.764490604400635 2023-01-22 09:05:29.816453: step: 526/463, loss: 4.347450256347656 2023-01-22 09:05:30.452012: step: 528/463, loss: 5.252623081207275 2023-01-22 09:05:31.070303: step: 530/463, loss: 0.6236923933029175 2023-01-22 09:05:31.696161: step: 532/463, loss: 2.587319850921631 2023-01-22 09:05:32.345500: step: 534/463, loss: 3.3823764324188232 2023-01-22 09:05:32.965366: step: 536/463, loss: 1.5561316013336182 2023-01-22 09:05:33.576937: step: 538/463, loss: 2.1903076171875 2023-01-22 09:05:34.195192: step: 540/463, loss: 1.647963523864746 2023-01-22 09:05:34.751243: step: 542/463, loss: 1.153571367263794 2023-01-22 09:05:35.396819: step: 544/463, loss: 1.7215423583984375 2023-01-22 09:05:35.985055: step: 546/463, loss: 2.0538575649261475 2023-01-22 09:05:36.599170: step: 548/463, loss: 1.6106829643249512 2023-01-22 09:05:37.234075: step: 550/463, loss: 1.2971807718276978 2023-01-22 09:05:37.932460: step: 552/463, loss: 5.6439619064331055 2023-01-22 09:05:38.502956: step: 554/463, loss: 2.8362927436828613 2023-01-22 09:05:39.163817: step: 556/463, loss: 2.1298794746398926 2023-01-22 09:05:39.786016: step: 558/463, loss: 1.85184907913208 2023-01-22 09:05:40.419731: step: 560/463, loss: 4.955857276916504 2023-01-22 09:05:41.061834: step: 562/463, loss: 2.6816630363464355 2023-01-22 09:05:41.602104: step: 564/463, loss: 1.0311378240585327 2023-01-22 09:05:42.206365: step: 566/463, loss: 1.1304336786270142 2023-01-22 09:05:42.796668: step: 568/463, loss: 1.2872354984283447 2023-01-22 09:05:43.448319: step: 570/463, loss: 0.9479402899742126 2023-01-22 09:05:44.123763: step: 572/463, loss: 3.5955305099487305 2023-01-22 09:05:44.780495: step: 574/463, loss: 17.737064361572266 2023-01-22 09:05:45.448189: step: 576/463, loss: 0.387450248003006 2023-01-22 09:05:46.111140: step: 578/463, loss: 2.014277696609497 2023-01-22 09:05:46.712149: step: 580/463, loss: 0.8113519549369812 2023-01-22 09:05:47.312758: step: 582/463, loss: 1.2146477699279785 2023-01-22 09:05:47.890353: step: 584/463, loss: 1.0005701780319214 2023-01-22 09:05:48.522567: step: 586/463, loss: 3.106804132461548 2023-01-22 09:05:49.219673: step: 588/463, loss: 1.2648484706878662 2023-01-22 09:05:49.888403: step: 590/463, loss: 1.6307803392410278 2023-01-22 09:05:50.467474: step: 592/463, loss: 0.7805302739143372 2023-01-22 09:05:51.115781: step: 594/463, loss: 2.2618353366851807 2023-01-22 09:05:51.780426: step: 596/463, loss: 1.5117416381835938 2023-01-22 09:05:52.473182: step: 598/463, loss: 0.5041603446006775 2023-01-22 09:05:53.138908: step: 600/463, loss: 1.2836923599243164 2023-01-22 09:05:53.716492: step: 602/463, loss: 1.596165657043457 2023-01-22 09:05:54.437807: step: 604/463, loss: 2.5285661220550537 2023-01-22 09:05:55.126401: step: 606/463, loss: 0.4201674461364746 2023-01-22 09:05:55.742947: step: 608/463, loss: 1.2373557090759277 2023-01-22 09:05:56.420354: step: 610/463, loss: 7.557168960571289 2023-01-22 09:05:57.028042: step: 612/463, loss: 1.9671400785446167 2023-01-22 09:05:57.711877: step: 614/463, loss: 1.1954658031463623 2023-01-22 09:05:58.324262: step: 616/463, loss: 0.8409519791603088 2023-01-22 09:05:59.039623: step: 618/463, loss: 1.5823054313659668 2023-01-22 09:05:59.707481: step: 620/463, loss: 0.37346699833869934 2023-01-22 09:06:00.464857: step: 622/463, loss: 1.5253220796585083 2023-01-22 09:06:01.080310: step: 624/463, loss: 2.8737540245056152 2023-01-22 09:06:01.758316: step: 626/463, loss: 1.2973134517669678 2023-01-22 09:06:02.352233: step: 628/463, loss: 1.4284980297088623 2023-01-22 09:06:03.026691: step: 630/463, loss: 2.9359917640686035 2023-01-22 09:06:03.713442: step: 632/463, loss: 4.4256815910339355 2023-01-22 09:06:04.288583: step: 634/463, loss: 1.11931574344635 2023-01-22 09:06:04.934002: step: 636/463, loss: 7.506770133972168 2023-01-22 09:06:05.551318: step: 638/463, loss: 0.7412253618240356 2023-01-22 09:06:06.223279: step: 640/463, loss: 4.581730842590332 2023-01-22 09:06:06.855324: step: 642/463, loss: 1.089477777481079 2023-01-22 09:06:07.450296: step: 644/463, loss: 1.1849415302276611 2023-01-22 09:06:08.094513: step: 646/463, loss: 1.2748390436172485 2023-01-22 09:06:08.735433: step: 648/463, loss: 0.7718403339385986 2023-01-22 09:06:09.404324: step: 650/463, loss: 0.7421875596046448 2023-01-22 09:06:10.032780: step: 652/463, loss: 1.2299742698669434 2023-01-22 09:06:10.741449: step: 654/463, loss: 0.8530952334403992 2023-01-22 09:06:11.383165: step: 656/463, loss: 1.1307525634765625 2023-01-22 09:06:11.995804: step: 658/463, loss: 5.466414451599121 2023-01-22 09:06:12.634265: step: 660/463, loss: 8.384843826293945 2023-01-22 09:06:13.207899: step: 662/463, loss: 0.6394960880279541 2023-01-22 09:06:13.864343: step: 664/463, loss: 6.0730204582214355 2023-01-22 09:06:14.532734: step: 666/463, loss: 2.0828628540039062 2023-01-22 09:06:15.150832: step: 668/463, loss: 0.44130241870880127 2023-01-22 09:06:15.844438: step: 670/463, loss: 2.534024238586426 2023-01-22 09:06:16.385858: step: 672/463, loss: 2.019125461578369 2023-01-22 09:06:17.003689: step: 674/463, loss: 2.689220905303955 2023-01-22 09:06:17.669322: step: 676/463, loss: 3.6442699432373047 2023-01-22 09:06:18.348370: step: 678/463, loss: 4.048669815063477 2023-01-22 09:06:18.939891: step: 680/463, loss: 0.5916732549667358 2023-01-22 09:06:19.534332: step: 682/463, loss: 1.1659475564956665 2023-01-22 09:06:20.155691: step: 684/463, loss: 0.9714545607566833 2023-01-22 09:06:20.854529: step: 686/463, loss: 7.451725006103516 2023-01-22 09:06:21.435308: step: 688/463, loss: 3.6644105911254883 2023-01-22 09:06:22.014587: step: 690/463, loss: 0.7021387815475464 2023-01-22 09:06:22.610239: step: 692/463, loss: 4.626512050628662 2023-01-22 09:06:23.255784: step: 694/463, loss: 2.7556650638580322 2023-01-22 09:06:23.833241: step: 696/463, loss: 1.2383744716644287 2023-01-22 09:06:24.462207: step: 698/463, loss: 1.3536591529846191 2023-01-22 09:06:25.040018: step: 700/463, loss: 0.46849948167800903 2023-01-22 09:06:25.711056: step: 702/463, loss: 3.882406711578369 2023-01-22 09:06:26.335669: step: 704/463, loss: 1.7366465330123901 2023-01-22 09:06:27.021161: step: 706/463, loss: 0.6374361515045166 2023-01-22 09:06:27.588891: step: 708/463, loss: 1.248744010925293 2023-01-22 09:06:28.122143: step: 710/463, loss: 1.792894959449768 2023-01-22 09:06:28.718740: step: 712/463, loss: 2.7405524253845215 2023-01-22 09:06:29.343258: step: 714/463, loss: 6.932621955871582 2023-01-22 09:06:29.945465: step: 716/463, loss: 9.49526596069336 2023-01-22 09:06:30.549459: step: 718/463, loss: 1.006314992904663 2023-01-22 09:06:31.204948: step: 720/463, loss: 2.450564384460449 2023-01-22 09:06:31.817051: step: 722/463, loss: 3.009561061859131 2023-01-22 09:06:32.411569: step: 724/463, loss: 1.802248239517212 2023-01-22 09:06:33.020204: step: 726/463, loss: 0.5707215666770935 2023-01-22 09:06:33.590716: step: 728/463, loss: 1.8768552541732788 2023-01-22 09:06:34.184616: step: 730/463, loss: 2.089775323867798 2023-01-22 09:06:34.805264: step: 732/463, loss: 2.413895606994629 2023-01-22 09:06:35.380717: step: 734/463, loss: 1.5247026681900024 2023-01-22 09:06:36.001942: step: 736/463, loss: 1.6237387657165527 2023-01-22 09:06:36.635161: step: 738/463, loss: 1.133504867553711 2023-01-22 09:06:37.315140: step: 740/463, loss: 3.4109933376312256 2023-01-22 09:06:37.914158: step: 742/463, loss: 0.524638295173645 2023-01-22 09:06:38.538786: step: 744/463, loss: 1.4392170906066895 2023-01-22 09:06:39.163493: step: 746/463, loss: 0.466078519821167 2023-01-22 09:06:39.738594: step: 748/463, loss: 1.4921762943267822 2023-01-22 09:06:40.356016: step: 750/463, loss: 0.9424622058868408 2023-01-22 09:06:40.938239: step: 752/463, loss: 1.864139199256897 2023-01-22 09:06:41.550380: step: 754/463, loss: 1.7786788940429688 2023-01-22 09:06:42.183998: step: 756/463, loss: 0.4550171494483948 2023-01-22 09:06:42.767652: step: 758/463, loss: 0.8722974061965942 2023-01-22 09:06:43.332188: step: 760/463, loss: 1.4529714584350586 2023-01-22 09:06:43.894645: step: 762/463, loss: 1.4644681215286255 2023-01-22 09:06:44.516668: step: 764/463, loss: 0.9574808478355408 2023-01-22 09:06:45.149223: step: 766/463, loss: 1.4590986967086792 2023-01-22 09:06:45.757610: step: 768/463, loss: 1.6554760932922363 2023-01-22 09:06:46.366362: step: 770/463, loss: 0.768781304359436 2023-01-22 09:06:47.014441: step: 772/463, loss: 3.460744619369507 2023-01-22 09:06:47.652481: step: 774/463, loss: 5.019446849822998 2023-01-22 09:06:48.282384: step: 776/463, loss: 1.7518839836120605 2023-01-22 09:06:48.903061: step: 778/463, loss: 0.7793686985969543 2023-01-22 09:06:49.495796: step: 780/463, loss: 5.875916004180908 2023-01-22 09:06:50.031837: step: 782/463, loss: 3.337308168411255 2023-01-22 09:06:50.766125: step: 784/463, loss: 2.53031849861145 2023-01-22 09:06:51.435609: step: 786/463, loss: 0.6848602890968323 2023-01-22 09:06:51.989194: step: 788/463, loss: 1.8918633460998535 2023-01-22 09:06:52.621467: step: 790/463, loss: 2.227695941925049 2023-01-22 09:06:53.290803: step: 792/463, loss: 0.4642110764980316 2023-01-22 09:06:53.971112: step: 794/463, loss: 9.37708854675293 2023-01-22 09:06:54.614370: step: 796/463, loss: 6.848600387573242 2023-01-22 09:06:55.261003: step: 798/463, loss: 4.573422908782959 2023-01-22 09:06:55.856669: step: 800/463, loss: 1.1996718645095825 2023-01-22 09:06:56.518043: step: 802/463, loss: 1.856066346168518 2023-01-22 09:06:57.162715: step: 804/463, loss: 1.0040066242218018 2023-01-22 09:06:57.814870: step: 806/463, loss: 3.0978808403015137 2023-01-22 09:06:58.470430: step: 808/463, loss: 1.158308744430542 2023-01-22 09:06:59.094213: step: 810/463, loss: 2.584512710571289 2023-01-22 09:06:59.702739: step: 812/463, loss: 2.0479159355163574 2023-01-22 09:07:00.294365: step: 814/463, loss: 0.7239602208137512 2023-01-22 09:07:00.928759: step: 816/463, loss: 2.8600265979766846 2023-01-22 09:07:01.505522: step: 818/463, loss: 3.3635849952697754 2023-01-22 09:07:02.124369: step: 820/463, loss: 3.387791156768799 2023-01-22 09:07:02.782114: step: 822/463, loss: 2.9429941177368164 2023-01-22 09:07:03.450036: step: 824/463, loss: 0.7158240079879761 2023-01-22 09:07:04.052118: step: 826/463, loss: 1.6996464729309082 2023-01-22 09:07:04.666826: step: 828/463, loss: 1.3286644220352173 2023-01-22 09:07:05.307884: step: 830/463, loss: 1.5376002788543701 2023-01-22 09:07:05.893108: step: 832/463, loss: 15.429784774780273 2023-01-22 09:07:06.491115: step: 834/463, loss: 6.225384712219238 2023-01-22 09:07:07.138092: step: 836/463, loss: 0.42232662439346313 2023-01-22 09:07:07.778571: step: 838/463, loss: 0.8103923797607422 2023-01-22 09:07:08.377343: step: 840/463, loss: 5.468482971191406 2023-01-22 09:07:09.004393: step: 842/463, loss: 0.8557579517364502 2023-01-22 09:07:09.581860: step: 844/463, loss: 0.682935893535614 2023-01-22 09:07:10.142410: step: 846/463, loss: 1.533571720123291 2023-01-22 09:07:10.762669: step: 848/463, loss: 1.630355954170227 2023-01-22 09:07:11.304038: step: 850/463, loss: 1.689854621887207 2023-01-22 09:07:11.923707: step: 852/463, loss: 1.1244410276412964 2023-01-22 09:07:12.526713: step: 854/463, loss: 5.726593971252441 2023-01-22 09:07:13.208080: step: 856/463, loss: 2.025653839111328 2023-01-22 09:07:13.865959: step: 858/463, loss: 4.137726783752441 2023-01-22 09:07:14.501290: step: 860/463, loss: 8.090264320373535 2023-01-22 09:07:15.139384: step: 862/463, loss: 1.6618350744247437 2023-01-22 09:07:15.774821: step: 864/463, loss: 1.0811971426010132 2023-01-22 09:07:16.539609: step: 866/463, loss: 2.1779613494873047 2023-01-22 09:07:17.198692: step: 868/463, loss: 3.202235221862793 2023-01-22 09:07:17.901340: step: 870/463, loss: 2.495041608810425 2023-01-22 09:07:18.489718: step: 872/463, loss: 4.0412445068359375 2023-01-22 09:07:19.050156: step: 874/463, loss: 0.5530418753623962 2023-01-22 09:07:19.699996: step: 876/463, loss: 1.8525749444961548 2023-01-22 09:07:20.307886: step: 878/463, loss: 6.692317962646484 2023-01-22 09:07:20.922394: step: 880/463, loss: 1.459074854850769 2023-01-22 09:07:21.631842: step: 882/463, loss: 0.6565572023391724 2023-01-22 09:07:22.238742: step: 884/463, loss: 2.0492184162139893 2023-01-22 09:07:22.828396: step: 886/463, loss: 6.110342979431152 2023-01-22 09:07:23.442270: step: 888/463, loss: 0.8213084936141968 2023-01-22 09:07:24.108715: step: 890/463, loss: 1.5049846172332764 2023-01-22 09:07:24.753350: step: 892/463, loss: 2.3778645992279053 2023-01-22 09:07:25.366887: step: 894/463, loss: 2.0398316383361816 2023-01-22 09:07:25.980557: step: 896/463, loss: 3.0210647583007812 2023-01-22 09:07:26.597603: step: 898/463, loss: 1.910044550895691 2023-01-22 09:07:27.242469: step: 900/463, loss: 2.312530994415283 2023-01-22 09:07:27.795740: step: 902/463, loss: 2.947416305541992 2023-01-22 09:07:28.418681: step: 904/463, loss: 2.189818859100342 2023-01-22 09:07:29.014015: step: 906/463, loss: 1.1345771551132202 2023-01-22 09:07:29.602309: step: 908/463, loss: 1.2360376119613647 2023-01-22 09:07:30.200256: step: 910/463, loss: 1.3576138019561768 2023-01-22 09:07:30.809340: step: 912/463, loss: 2.997972011566162 2023-01-22 09:07:31.398046: step: 914/463, loss: 0.937995195388794 2023-01-22 09:07:32.031767: step: 916/463, loss: 4.045863628387451 2023-01-22 09:07:32.670940: step: 918/463, loss: 2.332854747772217 2023-01-22 09:07:33.273495: step: 920/463, loss: 1.0578027963638306 2023-01-22 09:07:33.940220: step: 922/463, loss: 3.0752735137939453 2023-01-22 09:07:34.586706: step: 924/463, loss: 8.361388206481934 2023-01-22 09:07:35.263949: step: 926/463, loss: 2.755699634552002 ================================================== Loss: 5.605 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.332, 'r': 0.07844990548204159, 'f1': 0.12691131498470948}, 'combined': 0.09351360051504909, 'epoch': 0} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3451492537313433, 'r': 0.042122040072859745, 'f1': 0.07508116883116883}, 'combined': 0.05819688684521221, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29924242424242425, 'r': 0.08711405166981727, 'f1': 0.13494387506100536}, 'combined': 0.09943232899231973, 'epoch': 0} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3113207547169811, 'r': 0.045081967213114756, 'f1': 0.07875894988066826}, 'combined': 0.06104760708453712, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3101785714285714, 'r': 0.08208884688090737, 'f1': 0.12982062780269057}, 'combined': 0.09565730469671936, 'epoch': 0} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33214285714285713, 'r': 0.04234972677595628, 'f1': 0.07512116316639741}, 'combined': 0.05822788723902575, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2222222222222222, 'r': 0.05714285714285714, 'f1': 0.0909090909090909}, 'combined': 0.060606060606060594, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4642857142857143, 'r': 0.11206896551724138, 'f1': 0.18055555555555555}, 'combined': 0.12037037037037036, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.332, 'r': 0.07844990548204159, 'f1': 0.12691131498470948}, 'combined': 0.09351360051504909, 'epoch': 0} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3451492537313433, 'r': 0.042122040072859745, 'f1': 0.07508116883116883}, 'combined': 0.05819688684521221, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2222222222222222, 'r': 0.05714285714285714, 'f1': 0.0909090909090909}, 'combined': 0.060606060606060594, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29924242424242425, 'r': 0.08711405166981727, 'f1': 0.13494387506100536}, 'combined': 0.09943232899231973, 'epoch': 0} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3113207547169811, 'r': 0.045081967213114756, 'f1': 0.07875894988066826}, 'combined': 0.06104760708453712, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3101785714285714, 'r': 0.08208884688090737, 'f1': 0.12982062780269057}, 'combined': 0.09565730469671936, 'epoch': 0} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33214285714285713, 'r': 0.04234972677595628, 'f1': 0.07512116316639741}, 'combined': 0.05822788723902575, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4642857142857143, 'r': 0.11206896551724138, 'f1': 0.18055555555555555}, 'combined': 0.12037037037037036, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:10:23.816214: step: 2/463, loss: 0.6685802340507507 2023-01-22 09:10:24.436233: step: 4/463, loss: 3.815256357192993 2023-01-22 09:10:25.058185: step: 6/463, loss: 5.122040271759033 2023-01-22 09:10:25.732141: step: 8/463, loss: 0.5858937501907349 2023-01-22 09:10:26.400523: step: 10/463, loss: 3.097500801086426 2023-01-22 09:10:26.995777: step: 12/463, loss: 2.622636556625366 2023-01-22 09:10:27.628100: step: 14/463, loss: 0.7561248540878296 2023-01-22 09:10:28.234127: step: 16/463, loss: 4.578721523284912 2023-01-22 09:10:28.928321: step: 18/463, loss: 1.684999704360962 2023-01-22 09:10:29.608863: step: 20/463, loss: 1.2447631359100342 2023-01-22 09:10:30.130372: step: 22/463, loss: 2.5334525108337402 2023-01-22 09:10:30.770648: step: 24/463, loss: 5.395010948181152 2023-01-22 09:10:31.344875: step: 26/463, loss: 0.6044031381607056 2023-01-22 09:10:31.915424: step: 28/463, loss: 2.5535888671875 2023-01-22 09:10:32.526006: step: 30/463, loss: 2.1872472763061523 2023-01-22 09:10:33.137475: step: 32/463, loss: 1.477719783782959 2023-01-22 09:10:33.752927: step: 34/463, loss: 1.7267385721206665 2023-01-22 09:10:34.426460: step: 36/463, loss: 1.0844117403030396 2023-01-22 09:10:35.026785: step: 38/463, loss: 0.5043696165084839 2023-01-22 09:10:35.650895: step: 40/463, loss: 0.7670609951019287 2023-01-22 09:10:36.306493: step: 42/463, loss: 1.944401741027832 2023-01-22 09:10:36.909479: step: 44/463, loss: 1.4977705478668213 2023-01-22 09:10:37.538183: step: 46/463, loss: 5.696686267852783 2023-01-22 09:10:38.122648: step: 48/463, loss: 1.3773748874664307 2023-01-22 09:10:38.747011: step: 50/463, loss: 1.2749525308609009 2023-01-22 09:10:39.351993: step: 52/463, loss: 1.9650002717971802 2023-01-22 09:10:39.991198: step: 54/463, loss: 9.730448722839355 2023-01-22 09:10:40.683615: step: 56/463, loss: 2.6587979793548584 2023-01-22 09:10:41.253921: step: 58/463, loss: 1.1504578590393066 2023-01-22 09:10:41.900774: step: 60/463, loss: 2.79347562789917 2023-01-22 09:10:42.524844: step: 62/463, loss: 4.746162414550781 2023-01-22 09:10:43.131438: step: 64/463, loss: 6.020909786224365 2023-01-22 09:10:43.739660: step: 66/463, loss: 4.420544624328613 2023-01-22 09:10:44.285696: step: 68/463, loss: 0.9461444616317749 2023-01-22 09:10:44.869146: step: 70/463, loss: 4.956018447875977 2023-01-22 09:10:45.504374: step: 72/463, loss: 6.183978080749512 2023-01-22 09:10:46.107311: step: 74/463, loss: 3.3069844245910645 2023-01-22 09:10:46.728629: step: 76/463, loss: 0.9955776929855347 2023-01-22 09:10:47.343915: step: 78/463, loss: 4.352029800415039 2023-01-22 09:10:47.907608: step: 80/463, loss: 0.9474515914916992 2023-01-22 09:10:48.479294: step: 82/463, loss: 1.5701227188110352 2023-01-22 09:10:49.064681: step: 84/463, loss: 2.8134660720825195 2023-01-22 09:10:49.655084: step: 86/463, loss: 7.871100425720215 2023-01-22 09:10:50.260841: step: 88/463, loss: 2.4787893295288086 2023-01-22 09:10:50.844676: step: 90/463, loss: 2.772219181060791 2023-01-22 09:10:51.509062: step: 92/463, loss: 3.449888229370117 2023-01-22 09:10:52.056356: step: 94/463, loss: 3.2907216548919678 2023-01-22 09:10:52.697486: step: 96/463, loss: 1.6990973949432373 2023-01-22 09:10:53.341964: step: 98/463, loss: 2.050144910812378 2023-01-22 09:10:53.976934: step: 100/463, loss: 1.248523235321045 2023-01-22 09:10:54.542600: step: 102/463, loss: 1.1855612993240356 2023-01-22 09:10:55.116440: step: 104/463, loss: 1.0870994329452515 2023-01-22 09:10:55.686195: step: 106/463, loss: 2.6335551738739014 2023-01-22 09:10:56.344632: step: 108/463, loss: 1.172735571861267 2023-01-22 09:10:56.954797: step: 110/463, loss: 1.4004729986190796 2023-01-22 09:10:57.593807: step: 112/463, loss: 0.9719650149345398 2023-01-22 09:10:58.300268: step: 114/463, loss: 1.6797430515289307 2023-01-22 09:10:58.893844: step: 116/463, loss: 3.8506879806518555 2023-01-22 09:10:59.535013: step: 118/463, loss: 0.6834520697593689 2023-01-22 09:11:00.168501: step: 120/463, loss: 0.8853448629379272 2023-01-22 09:11:00.808708: step: 122/463, loss: 5.188709735870361 2023-01-22 09:11:01.549033: step: 124/463, loss: 1.7113059759140015 2023-01-22 09:11:02.169929: step: 126/463, loss: 1.7622789144515991 2023-01-22 09:11:02.723824: step: 128/463, loss: 0.5963531732559204 2023-01-22 09:11:03.350979: step: 130/463, loss: 0.6164882183074951 2023-01-22 09:11:03.871929: step: 132/463, loss: 0.5139610767364502 2023-01-22 09:11:04.495397: step: 134/463, loss: 6.962419509887695 2023-01-22 09:11:05.132111: step: 136/463, loss: 6.858845233917236 2023-01-22 09:11:05.752863: step: 138/463, loss: 0.9637402296066284 2023-01-22 09:11:06.330308: step: 140/463, loss: 1.5021545886993408 2023-01-22 09:11:06.877014: step: 142/463, loss: 11.442215919494629 2023-01-22 09:11:07.444025: step: 144/463, loss: 1.0192979574203491 2023-01-22 09:11:08.048182: step: 146/463, loss: 0.937065601348877 2023-01-22 09:11:08.666964: step: 148/463, loss: 0.5950444936752319 2023-01-22 09:11:09.302885: step: 150/463, loss: 2.0465996265411377 2023-01-22 09:11:09.953654: step: 152/463, loss: 1.736748456954956 2023-01-22 09:11:10.609390: step: 154/463, loss: 1.2617604732513428 2023-01-22 09:11:11.225867: step: 156/463, loss: 0.35340410470962524 2023-01-22 09:11:11.782065: step: 158/463, loss: 6.960509300231934 2023-01-22 09:11:12.407987: step: 160/463, loss: 1.908158302307129 2023-01-22 09:11:13.048211: step: 162/463, loss: 0.7639504671096802 2023-01-22 09:11:13.732500: step: 164/463, loss: 0.793492317199707 2023-01-22 09:11:14.356723: step: 166/463, loss: 0.5547686815261841 2023-01-22 09:11:14.914705: step: 168/463, loss: 1.7488555908203125 2023-01-22 09:11:15.555822: step: 170/463, loss: 1.4251786470413208 2023-01-22 09:11:16.240367: step: 172/463, loss: 1.2899792194366455 2023-01-22 09:11:16.829149: step: 174/463, loss: 1.2221816778182983 2023-01-22 09:11:17.477734: step: 176/463, loss: 1.1296358108520508 2023-01-22 09:11:18.090721: step: 178/463, loss: 0.9121953248977661 2023-01-22 09:11:18.670019: step: 180/463, loss: 4.790550708770752 2023-01-22 09:11:19.302996: step: 182/463, loss: 0.37892529368400574 2023-01-22 09:11:19.976365: step: 184/463, loss: 1.215133786201477 2023-01-22 09:11:20.580000: step: 186/463, loss: 2.994995355606079 2023-01-22 09:11:21.181830: step: 188/463, loss: 1.41506826877594 2023-01-22 09:11:21.772067: step: 190/463, loss: 1.048855185508728 2023-01-22 09:11:22.390924: step: 192/463, loss: 0.6058880090713501 2023-01-22 09:11:23.019784: step: 194/463, loss: 3.981769561767578 2023-01-22 09:11:23.539261: step: 196/463, loss: 0.8736894726753235 2023-01-22 09:11:24.141743: step: 198/463, loss: 0.5024668574333191 2023-01-22 09:11:24.766802: step: 200/463, loss: 4.32876443862915 2023-01-22 09:11:25.378560: step: 202/463, loss: 0.6034828424453735 2023-01-22 09:11:25.990976: step: 204/463, loss: 0.7853680849075317 2023-01-22 09:11:26.574040: step: 206/463, loss: 2.123030185699463 2023-01-22 09:11:27.194380: step: 208/463, loss: 0.46757593750953674 2023-01-22 09:11:27.804599: step: 210/463, loss: 13.134532928466797 2023-01-22 09:11:28.463239: step: 212/463, loss: 0.7102795839309692 2023-01-22 09:11:29.034609: step: 214/463, loss: 1.3386493921279907 2023-01-22 09:11:29.667849: step: 216/463, loss: 0.4966233968734741 2023-01-22 09:11:30.298672: step: 218/463, loss: 0.7630448341369629 2023-01-22 09:11:30.967618: step: 220/463, loss: 2.044830322265625 2023-01-22 09:11:31.605410: step: 222/463, loss: 5.183900833129883 2023-01-22 09:11:32.197344: step: 224/463, loss: 4.236065864562988 2023-01-22 09:11:32.803635: step: 226/463, loss: 0.9535512924194336 2023-01-22 09:11:33.447850: step: 228/463, loss: 8.569318771362305 2023-01-22 09:11:34.050689: step: 230/463, loss: 0.725615382194519 2023-01-22 09:11:34.628808: step: 232/463, loss: 1.22381591796875 2023-01-22 09:11:35.242983: step: 234/463, loss: 3.0906450748443604 2023-01-22 09:11:35.903301: step: 236/463, loss: 1.2112234830856323 2023-01-22 09:11:36.453000: step: 238/463, loss: 0.6695554852485657 2023-01-22 09:11:37.083300: step: 240/463, loss: 1.61887526512146 2023-01-22 09:11:37.650876: step: 242/463, loss: 2.4645869731903076 2023-01-22 09:11:38.220667: step: 244/463, loss: 0.8892167806625366 2023-01-22 09:11:38.803986: step: 246/463, loss: 2.0332322120666504 2023-01-22 09:11:39.448757: step: 248/463, loss: 2.7730774879455566 2023-01-22 09:11:40.055826: step: 250/463, loss: 2.852320909500122 2023-01-22 09:11:40.694707: step: 252/463, loss: 2.1586720943450928 2023-01-22 09:11:41.321337: step: 254/463, loss: 2.050269842147827 2023-01-22 09:11:41.953060: step: 256/463, loss: 1.6949431896209717 2023-01-22 09:11:42.530828: step: 258/463, loss: 1.2738231420516968 2023-01-22 09:11:43.137719: step: 260/463, loss: 4.912646293640137 2023-01-22 09:11:43.693535: step: 262/463, loss: 1.6960387229919434 2023-01-22 09:11:44.329191: step: 264/463, loss: 1.8729029893875122 2023-01-22 09:11:44.916953: step: 266/463, loss: 0.477815717458725 2023-01-22 09:11:45.484391: step: 268/463, loss: 0.39634814858436584 2023-01-22 09:11:46.104680: step: 270/463, loss: 0.8920003175735474 2023-01-22 09:11:46.749544: step: 272/463, loss: 1.7977694272994995 2023-01-22 09:11:47.394908: step: 274/463, loss: 0.8314753770828247 2023-01-22 09:11:47.983385: step: 276/463, loss: 5.017110347747803 2023-01-22 09:11:48.583136: step: 278/463, loss: 1.5411887168884277 2023-01-22 09:11:49.156898: step: 280/463, loss: 2.2009634971618652 2023-01-22 09:11:49.693390: step: 282/463, loss: 0.9728631377220154 2023-01-22 09:11:50.292439: step: 284/463, loss: 5.722582817077637 2023-01-22 09:11:50.914986: step: 286/463, loss: 6.1283159255981445 2023-01-22 09:11:51.563113: step: 288/463, loss: 1.30561363697052 2023-01-22 09:11:52.119996: step: 290/463, loss: 4.328171253204346 2023-01-22 09:11:52.714484: step: 292/463, loss: 0.4708421230316162 2023-01-22 09:11:53.310542: step: 294/463, loss: 4.787020683288574 2023-01-22 09:11:53.944961: step: 296/463, loss: 3.3478779792785645 2023-01-22 09:11:54.568382: step: 298/463, loss: 3.621676445007324 2023-01-22 09:11:55.158071: step: 300/463, loss: 4.760943412780762 2023-01-22 09:11:55.728152: step: 302/463, loss: 7.1631598472595215 2023-01-22 09:11:56.286401: step: 304/463, loss: 1.246717929840088 2023-01-22 09:11:56.878822: step: 306/463, loss: 0.9490251541137695 2023-01-22 09:11:57.532888: step: 308/463, loss: 1.1441110372543335 2023-01-22 09:11:58.234111: step: 310/463, loss: 2.801241397857666 2023-01-22 09:11:58.801093: step: 312/463, loss: 6.124081611633301 2023-01-22 09:11:59.415801: step: 314/463, loss: 3.1647603511810303 2023-01-22 09:12:00.043568: step: 316/463, loss: 2.703418731689453 2023-01-22 09:12:00.637288: step: 318/463, loss: 5.265385150909424 2023-01-22 09:12:01.283534: step: 320/463, loss: 1.3631547689437866 2023-01-22 09:12:01.881601: step: 322/463, loss: 0.37927454710006714 2023-01-22 09:12:02.452432: step: 324/463, loss: 0.5200359225273132 2023-01-22 09:12:03.100377: step: 326/463, loss: 1.185221791267395 2023-01-22 09:12:03.673009: step: 328/463, loss: 3.7239699363708496 2023-01-22 09:12:04.286129: step: 330/463, loss: 1.7734441757202148 2023-01-22 09:12:04.968357: step: 332/463, loss: 1.6176515817642212 2023-01-22 09:12:05.623992: step: 334/463, loss: 0.6846375465393066 2023-01-22 09:12:06.316946: step: 336/463, loss: 4.7630205154418945 2023-01-22 09:12:06.920422: step: 338/463, loss: 4.955362319946289 2023-01-22 09:12:07.554209: step: 340/463, loss: 1.6540472507476807 2023-01-22 09:12:08.162397: step: 342/463, loss: 1.7042542695999146 2023-01-22 09:12:08.762092: step: 344/463, loss: 4.431768417358398 2023-01-22 09:12:09.365848: step: 346/463, loss: 0.724765419960022 2023-01-22 09:12:10.064976: step: 348/463, loss: 0.8880772590637207 2023-01-22 09:12:10.704260: step: 350/463, loss: 2.6019840240478516 2023-01-22 09:12:11.413379: step: 352/463, loss: 2.2807857990264893 2023-01-22 09:12:12.062568: step: 354/463, loss: 1.6729512214660645 2023-01-22 09:12:12.712075: step: 356/463, loss: 0.29523590207099915 2023-01-22 09:12:13.307842: step: 358/463, loss: 1.171804428100586 2023-01-22 09:12:13.866810: step: 360/463, loss: 1.4620466232299805 2023-01-22 09:12:14.470936: step: 362/463, loss: 1.2754545211791992 2023-01-22 09:12:15.096655: step: 364/463, loss: 4.919126033782959 2023-01-22 09:12:15.732263: step: 366/463, loss: 1.9732675552368164 2023-01-22 09:12:16.292499: step: 368/463, loss: 2.2051146030426025 2023-01-22 09:12:16.839027: step: 370/463, loss: 1.236556887626648 2023-01-22 09:12:17.462619: step: 372/463, loss: 0.9393231868743896 2023-01-22 09:12:18.135179: step: 374/463, loss: 3.647397994995117 2023-01-22 09:12:18.725735: step: 376/463, loss: 1.0917147397994995 2023-01-22 09:12:19.372588: step: 378/463, loss: 1.8892124891281128 2023-01-22 09:12:19.991689: step: 380/463, loss: 0.615264892578125 2023-01-22 09:12:20.667995: step: 382/463, loss: 0.5577064752578735 2023-01-22 09:12:21.252345: step: 384/463, loss: 0.3654223680496216 2023-01-22 09:12:21.915769: step: 386/463, loss: 2.1288323402404785 2023-01-22 09:12:22.527249: step: 388/463, loss: 0.46888911724090576 2023-01-22 09:12:23.113984: step: 390/463, loss: 6.196017265319824 2023-01-22 09:12:23.783135: step: 392/463, loss: 0.3807101249694824 2023-01-22 09:12:24.429496: step: 394/463, loss: 1.2807503938674927 2023-01-22 09:12:25.041734: step: 396/463, loss: 0.8407572507858276 2023-01-22 09:12:25.641754: step: 398/463, loss: 3.6059751510620117 2023-01-22 09:12:26.271610: step: 400/463, loss: 0.7967534065246582 2023-01-22 09:12:26.823201: step: 402/463, loss: 1.070436954498291 2023-01-22 09:12:27.461501: step: 404/463, loss: 0.9442594051361084 2023-01-22 09:12:28.112447: step: 406/463, loss: 1.3431742191314697 2023-01-22 09:12:28.736727: step: 408/463, loss: 1.3841991424560547 2023-01-22 09:12:29.359497: step: 410/463, loss: 1.2273149490356445 2023-01-22 09:12:29.989922: step: 412/463, loss: 1.0216115713119507 2023-01-22 09:12:30.616052: step: 414/463, loss: 2.7380475997924805 2023-01-22 09:12:31.224082: step: 416/463, loss: 0.5786815285682678 2023-01-22 09:12:31.845126: step: 418/463, loss: 0.3677418529987335 2023-01-22 09:12:32.458030: step: 420/463, loss: 4.857378005981445 2023-01-22 09:12:33.066267: step: 422/463, loss: 0.6248304843902588 2023-01-22 09:12:33.649308: step: 424/463, loss: 1.3008418083190918 2023-01-22 09:12:34.229618: step: 426/463, loss: 4.184779167175293 2023-01-22 09:12:34.880976: step: 428/463, loss: 4.474721908569336 2023-01-22 09:12:35.505036: step: 430/463, loss: 0.7341389060020447 2023-01-22 09:12:36.128270: step: 432/463, loss: 8.08967113494873 2023-01-22 09:12:36.757984: step: 434/463, loss: 10.318756103515625 2023-01-22 09:12:37.319403: step: 436/463, loss: 0.5290687084197998 2023-01-22 09:12:37.868412: step: 438/463, loss: 0.9555424451828003 2023-01-22 09:12:38.443225: step: 440/463, loss: 4.081544876098633 2023-01-22 09:12:39.048937: step: 442/463, loss: 0.6440564393997192 2023-01-22 09:12:39.644024: step: 444/463, loss: 3.0376334190368652 2023-01-22 09:12:40.276540: step: 446/463, loss: 0.6823337078094482 2023-01-22 09:12:40.854577: step: 448/463, loss: 0.9174196720123291 2023-01-22 09:12:41.514435: step: 450/463, loss: 2.0081372261047363 2023-01-22 09:12:42.121018: step: 452/463, loss: 0.7903839945793152 2023-01-22 09:12:42.728749: step: 454/463, loss: 0.4242723882198334 2023-01-22 09:12:43.309515: step: 456/463, loss: 0.7555660009384155 2023-01-22 09:12:43.921091: step: 458/463, loss: 5.849801063537598 2023-01-22 09:12:44.605513: step: 460/463, loss: 0.9508663415908813 2023-01-22 09:12:45.268310: step: 462/463, loss: 0.8560835719108582 2023-01-22 09:12:45.957379: step: 464/463, loss: 1.0714929103851318 2023-01-22 09:12:46.629676: step: 466/463, loss: 0.48916831612586975 2023-01-22 09:12:47.325796: step: 468/463, loss: 1.1483577489852905 2023-01-22 09:12:47.958030: step: 470/463, loss: 1.8613275289535522 2023-01-22 09:12:48.583389: step: 472/463, loss: 0.6832380294799805 2023-01-22 09:12:49.282053: step: 474/463, loss: 2.1384215354919434 2023-01-22 09:12:49.903110: step: 476/463, loss: 1.636368989944458 2023-01-22 09:12:50.505586: step: 478/463, loss: 1.5405359268188477 2023-01-22 09:12:51.122329: step: 480/463, loss: 2.874466896057129 2023-01-22 09:12:51.794469: step: 482/463, loss: 1.4746910333633423 2023-01-22 09:12:52.416107: step: 484/463, loss: 2.5639333724975586 2023-01-22 09:12:53.028266: step: 486/463, loss: 1.324280858039856 2023-01-22 09:12:53.723103: step: 488/463, loss: 1.0978533029556274 2023-01-22 09:12:54.420354: step: 490/463, loss: 23.97795295715332 2023-01-22 09:12:55.034167: step: 492/463, loss: 2.39566707611084 2023-01-22 09:12:55.637947: step: 494/463, loss: 2.2220065593719482 2023-01-22 09:12:56.213586: step: 496/463, loss: 7.3298020362854 2023-01-22 09:12:56.908654: step: 498/463, loss: 1.9549223184585571 2023-01-22 09:12:57.492978: step: 500/463, loss: 0.5419197082519531 2023-01-22 09:12:58.320417: step: 502/463, loss: 0.749390721321106 2023-01-22 09:12:58.909641: step: 504/463, loss: 1.9556846618652344 2023-01-22 09:12:59.480500: step: 506/463, loss: 2.056826114654541 2023-01-22 09:13:00.103398: step: 508/463, loss: 1.2158986330032349 2023-01-22 09:13:00.738571: step: 510/463, loss: 0.4667484760284424 2023-01-22 09:13:01.473543: step: 512/463, loss: 5.003033638000488 2023-01-22 09:13:02.092265: step: 514/463, loss: 0.5443268418312073 2023-01-22 09:13:02.769466: step: 516/463, loss: 2.5313668251037598 2023-01-22 09:13:03.399045: step: 518/463, loss: 2.405019760131836 2023-01-22 09:13:04.077687: step: 520/463, loss: 8.626433372497559 2023-01-22 09:13:04.677503: step: 522/463, loss: 1.2849178314208984 2023-01-22 09:13:05.290875: step: 524/463, loss: 7.990692615509033 2023-01-22 09:13:05.876982: step: 526/463, loss: 1.7502684593200684 2023-01-22 09:13:06.510883: step: 528/463, loss: 1.3626116514205933 2023-01-22 09:13:07.258525: step: 530/463, loss: 2.245612859725952 2023-01-22 09:13:07.888192: step: 532/463, loss: 3.4241137504577637 2023-01-22 09:13:08.522899: step: 534/463, loss: 0.7420649528503418 2023-01-22 09:13:09.141847: step: 536/463, loss: 0.5043743848800659 2023-01-22 09:13:09.729686: step: 538/463, loss: 2.1114377975463867 2023-01-22 09:13:10.341466: step: 540/463, loss: 0.8497910499572754 2023-01-22 09:13:10.945408: step: 542/463, loss: 1.3439457416534424 2023-01-22 09:13:11.554661: step: 544/463, loss: 3.4550399780273438 2023-01-22 09:13:12.227644: step: 546/463, loss: 0.5475401878356934 2023-01-22 09:13:12.830819: step: 548/463, loss: 0.849360466003418 2023-01-22 09:13:13.388544: step: 550/463, loss: 17.204517364501953 2023-01-22 09:13:14.034229: step: 552/463, loss: 2.0592710971832275 2023-01-22 09:13:14.677945: step: 554/463, loss: 0.8619705438613892 2023-01-22 09:13:15.262388: step: 556/463, loss: 6.703515529632568 2023-01-22 09:13:16.014391: step: 558/463, loss: 0.493984192609787 2023-01-22 09:13:16.632242: step: 560/463, loss: 1.1164534091949463 2023-01-22 09:13:17.291159: step: 562/463, loss: 0.35726404190063477 2023-01-22 09:13:17.911006: step: 564/463, loss: 0.6496716737747192 2023-01-22 09:13:18.615957: step: 566/463, loss: 0.9316614866256714 2023-01-22 09:13:19.273843: step: 568/463, loss: 0.791114091873169 2023-01-22 09:13:19.916828: step: 570/463, loss: 1.144370675086975 2023-01-22 09:13:20.593588: step: 572/463, loss: 0.7868590950965881 2023-01-22 09:13:21.244733: step: 574/463, loss: 1.517804741859436 2023-01-22 09:13:21.966455: step: 576/463, loss: 1.369206190109253 2023-01-22 09:13:22.612030: step: 578/463, loss: 0.7141205072402954 2023-01-22 09:13:23.216419: step: 580/463, loss: 0.3427852988243103 2023-01-22 09:13:23.796839: step: 582/463, loss: 0.9353724718093872 2023-01-22 09:13:24.436383: step: 584/463, loss: 2.5886993408203125 2023-01-22 09:13:25.054995: step: 586/463, loss: 2.3710124492645264 2023-01-22 09:13:25.729703: step: 588/463, loss: 0.9547492265701294 2023-01-22 09:13:26.314557: step: 590/463, loss: 0.7733676433563232 2023-01-22 09:13:26.933630: step: 592/463, loss: 1.1091457605361938 2023-01-22 09:13:27.566020: step: 594/463, loss: 1.0487369298934937 2023-01-22 09:13:28.281724: step: 596/463, loss: 0.9992524981498718 2023-01-22 09:13:28.863300: step: 598/463, loss: 0.40794116258621216 2023-01-22 09:13:29.448145: step: 600/463, loss: 3.844391345977783 2023-01-22 09:13:30.032368: step: 602/463, loss: 1.3142657279968262 2023-01-22 09:13:30.654805: step: 604/463, loss: 1.470659613609314 2023-01-22 09:13:31.295475: step: 606/463, loss: 0.4712180495262146 2023-01-22 09:13:31.930073: step: 608/463, loss: 1.1896867752075195 2023-01-22 09:13:32.518618: step: 610/463, loss: 1.4044132232666016 2023-01-22 09:13:33.156671: step: 612/463, loss: 12.41322135925293 2023-01-22 09:13:33.801148: step: 614/463, loss: 1.7171419858932495 2023-01-22 09:13:34.414408: step: 616/463, loss: 0.3041672110557556 2023-01-22 09:13:34.974344: step: 618/463, loss: 2.610074281692505 2023-01-22 09:13:35.632575: step: 620/463, loss: 0.8648314476013184 2023-01-22 09:13:36.229015: step: 622/463, loss: 1.521160364151001 2023-01-22 09:13:36.802619: step: 624/463, loss: 1.4403678178787231 2023-01-22 09:13:37.438962: step: 626/463, loss: 0.3804192543029785 2023-01-22 09:13:38.047758: step: 628/463, loss: 0.2660582363605499 2023-01-22 09:13:38.774157: step: 630/463, loss: 1.0609209537506104 2023-01-22 09:13:39.475073: step: 632/463, loss: 1.5146912336349487 2023-01-22 09:13:40.101003: step: 634/463, loss: 1.0184211730957031 2023-01-22 09:13:40.668137: step: 636/463, loss: 3.6134836673736572 2023-01-22 09:13:41.325283: step: 638/463, loss: 5.694892883300781 2023-01-22 09:13:41.992085: step: 640/463, loss: 1.4821346998214722 2023-01-22 09:13:42.590976: step: 642/463, loss: 1.0457806587219238 2023-01-22 09:13:43.223279: step: 644/463, loss: 1.15732741355896 2023-01-22 09:13:43.845985: step: 646/463, loss: 0.8805398344993591 2023-01-22 09:13:44.489487: step: 648/463, loss: 1.3210361003875732 2023-01-22 09:13:45.116667: step: 650/463, loss: 2.356326103210449 2023-01-22 09:13:45.708017: step: 652/463, loss: 0.37767118215560913 2023-01-22 09:13:46.295488: step: 654/463, loss: 1.0555232763290405 2023-01-22 09:13:46.924608: step: 656/463, loss: 1.132493257522583 2023-01-22 09:13:47.534656: step: 658/463, loss: 0.3220830261707306 2023-01-22 09:13:48.126275: step: 660/463, loss: 1.618369698524475 2023-01-22 09:13:48.740882: step: 662/463, loss: 2.3216099739074707 2023-01-22 09:13:49.334640: step: 664/463, loss: 0.3483749032020569 2023-01-22 09:13:49.933911: step: 666/463, loss: 0.4026816189289093 2023-01-22 09:13:50.475725: step: 668/463, loss: 0.9321635961532593 2023-01-22 09:13:51.031442: step: 670/463, loss: 3.0314548015594482 2023-01-22 09:13:51.641131: step: 672/463, loss: 1.1623146533966064 2023-01-22 09:13:52.209809: step: 674/463, loss: 1.567813754081726 2023-01-22 09:13:52.775678: step: 676/463, loss: 1.1469882726669312 2023-01-22 09:13:53.339894: step: 678/463, loss: 1.4488227367401123 2023-01-22 09:13:53.998039: step: 680/463, loss: 5.3399763107299805 2023-01-22 09:13:54.716329: step: 682/463, loss: 2.2437868118286133 2023-01-22 09:13:55.308297: step: 684/463, loss: 1.210664987564087 2023-01-22 09:13:55.938737: step: 686/463, loss: 0.20591044425964355 2023-01-22 09:13:56.586377: step: 688/463, loss: 1.8625613451004028 2023-01-22 09:13:57.218444: step: 690/463, loss: 3.4302992820739746 2023-01-22 09:13:57.836201: step: 692/463, loss: 1.37776780128479 2023-01-22 09:13:58.522925: step: 694/463, loss: 2.2194693088531494 2023-01-22 09:13:59.130744: step: 696/463, loss: 2.1454403400421143 2023-01-22 09:13:59.749522: step: 698/463, loss: 3.7571959495544434 2023-01-22 09:14:00.350921: step: 700/463, loss: 1.7427443265914917 2023-01-22 09:14:01.032414: step: 702/463, loss: 0.44627776741981506 2023-01-22 09:14:01.719954: step: 704/463, loss: 2.0296101570129395 2023-01-22 09:14:02.367019: step: 706/463, loss: 3.0709877014160156 2023-01-22 09:14:02.975199: step: 708/463, loss: 1.7025601863861084 2023-01-22 09:14:03.540880: step: 710/463, loss: 0.8691942691802979 2023-01-22 09:14:04.147940: step: 712/463, loss: 0.6644496917724609 2023-01-22 09:14:04.780065: step: 714/463, loss: 1.6537455320358276 2023-01-22 09:14:05.374764: step: 716/463, loss: 0.4561287760734558 2023-01-22 09:14:06.004011: step: 718/463, loss: 4.355337142944336 2023-01-22 09:14:06.608057: step: 720/463, loss: 0.2932380437850952 2023-01-22 09:14:07.238949: step: 722/463, loss: 1.7480332851409912 2023-01-22 09:14:07.810543: step: 724/463, loss: 0.6260095238685608 2023-01-22 09:14:08.373419: step: 726/463, loss: 2.5749106407165527 2023-01-22 09:14:08.972714: step: 728/463, loss: 1.2347019910812378 2023-01-22 09:14:09.606028: step: 730/463, loss: 2.2668538093566895 2023-01-22 09:14:10.239310: step: 732/463, loss: 0.4873841106891632 2023-01-22 09:14:10.849620: step: 734/463, loss: 0.5760297775268555 2023-01-22 09:14:11.464889: step: 736/463, loss: 0.29523178935050964 2023-01-22 09:14:12.104334: step: 738/463, loss: 1.0267291069030762 2023-01-22 09:14:12.692678: step: 740/463, loss: 1.0055054426193237 2023-01-22 09:14:13.298034: step: 742/463, loss: 1.4158940315246582 2023-01-22 09:14:13.893471: step: 744/463, loss: 2.1626105308532715 2023-01-22 09:14:14.507077: step: 746/463, loss: 0.4254618287086487 2023-01-22 09:14:15.127162: step: 748/463, loss: 4.174129009246826 2023-01-22 09:14:15.828657: step: 750/463, loss: 0.6104744076728821 2023-01-22 09:14:16.486895: step: 752/463, loss: 0.6177344918251038 2023-01-22 09:14:17.117657: step: 754/463, loss: 4.04056978225708 2023-01-22 09:14:17.759315: step: 756/463, loss: 2.3158512115478516 2023-01-22 09:14:18.380370: step: 758/463, loss: 0.59056556224823 2023-01-22 09:14:18.985832: step: 760/463, loss: 1.5480821132659912 2023-01-22 09:14:19.590827: step: 762/463, loss: 1.7627665996551514 2023-01-22 09:14:20.238061: step: 764/463, loss: 0.8368968963623047 2023-01-22 09:14:20.810428: step: 766/463, loss: 0.8852662444114685 2023-01-22 09:14:21.444441: step: 768/463, loss: 1.296881914138794 2023-01-22 09:14:22.016810: step: 770/463, loss: 0.405099481344223 2023-01-22 09:14:22.633664: step: 772/463, loss: 0.8391633629798889 2023-01-22 09:14:23.242089: step: 774/463, loss: 1.044474482536316 2023-01-22 09:14:23.939272: step: 776/463, loss: 1.3586586713790894 2023-01-22 09:14:24.567258: step: 778/463, loss: 0.4751380681991577 2023-01-22 09:14:25.177049: step: 780/463, loss: 1.0881835222244263 2023-01-22 09:14:25.796307: step: 782/463, loss: 1.1149868965148926 2023-01-22 09:14:26.403041: step: 784/463, loss: 0.8063578605651855 2023-01-22 09:14:27.023155: step: 786/463, loss: 1.1965080499649048 2023-01-22 09:14:27.688791: step: 788/463, loss: 0.7118450403213501 2023-01-22 09:14:28.277630: step: 790/463, loss: 2.6800503730773926 2023-01-22 09:14:28.869082: step: 792/463, loss: 1.428928017616272 2023-01-22 09:14:29.478206: step: 794/463, loss: 1.1040412187576294 2023-01-22 09:14:30.106737: step: 796/463, loss: 0.8095263242721558 2023-01-22 09:14:30.719789: step: 798/463, loss: 1.8907171487808228 2023-01-22 09:14:31.364917: step: 800/463, loss: 1.46341872215271 2023-01-22 09:14:32.016622: step: 802/463, loss: 3.4522597789764404 2023-01-22 09:14:32.611796: step: 804/463, loss: 1.3675616979599 2023-01-22 09:14:33.272761: step: 806/463, loss: 1.7054327726364136 2023-01-22 09:14:33.871476: step: 808/463, loss: 1.456458568572998 2023-01-22 09:14:34.451926: step: 810/463, loss: 0.9994184374809265 2023-01-22 09:14:35.137333: step: 812/463, loss: 1.1029129028320312 2023-01-22 09:14:35.781924: step: 814/463, loss: 0.8378959894180298 2023-01-22 09:14:36.394958: step: 816/463, loss: 1.1774468421936035 2023-01-22 09:14:37.074072: step: 818/463, loss: 2.8366777896881104 2023-01-22 09:14:37.640791: step: 820/463, loss: 2.7778737545013428 2023-01-22 09:14:38.197412: step: 822/463, loss: 1.7021194696426392 2023-01-22 09:14:38.791911: step: 824/463, loss: 1.1035935878753662 2023-01-22 09:14:39.349539: step: 826/463, loss: 0.9226508140563965 2023-01-22 09:14:39.955226: step: 828/463, loss: 1.2424044609069824 2023-01-22 09:14:40.601018: step: 830/463, loss: 5.341761589050293 2023-01-22 09:14:41.370106: step: 832/463, loss: 1.7395113706588745 2023-01-22 09:14:41.954838: step: 834/463, loss: 1.1090619564056396 2023-01-22 09:14:42.539702: step: 836/463, loss: 1.822282075881958 2023-01-22 09:14:43.203755: step: 838/463, loss: 8.355430603027344 2023-01-22 09:14:43.824793: step: 840/463, loss: 1.2190101146697998 2023-01-22 09:14:44.447708: step: 842/463, loss: 1.0992544889450073 2023-01-22 09:14:45.092181: step: 844/463, loss: 8.197500228881836 2023-01-22 09:14:45.760381: step: 846/463, loss: 0.6674755215644836 2023-01-22 09:14:46.368674: step: 848/463, loss: 0.3324805200099945 2023-01-22 09:14:47.026392: step: 850/463, loss: 1.9683656692504883 2023-01-22 09:14:47.648973: step: 852/463, loss: 0.7022861242294312 2023-01-22 09:14:48.266615: step: 854/463, loss: 0.5963472127914429 2023-01-22 09:14:48.949447: step: 856/463, loss: 8.475939750671387 2023-01-22 09:14:49.643908: step: 858/463, loss: 1.18717360496521 2023-01-22 09:14:50.201263: step: 860/463, loss: 1.9248133897781372 2023-01-22 09:14:50.844860: step: 862/463, loss: 0.484659880399704 2023-01-22 09:14:51.386112: step: 864/463, loss: 0.9339818358421326 2023-01-22 09:14:52.682587: step: 866/463, loss: 0.6975277662277222 2023-01-22 09:14:53.324173: step: 868/463, loss: 1.8819152116775513 2023-01-22 09:14:53.934701: step: 870/463, loss: 0.7943848967552185 2023-01-22 09:14:54.546902: step: 872/463, loss: 1.0655615329742432 2023-01-22 09:14:55.156635: step: 874/463, loss: 10.067506790161133 2023-01-22 09:14:55.765193: step: 876/463, loss: 2.9179444313049316 2023-01-22 09:14:56.350030: step: 878/463, loss: 2.4652976989746094 2023-01-22 09:14:56.973042: step: 880/463, loss: 1.2682594060897827 2023-01-22 09:14:57.653413: step: 882/463, loss: 2.0408806800842285 2023-01-22 09:14:58.327617: step: 884/463, loss: 1.9349489212036133 2023-01-22 09:14:58.905237: step: 886/463, loss: 1.3845210075378418 2023-01-22 09:14:59.572864: step: 888/463, loss: 1.749855637550354 2023-01-22 09:15:00.158822: step: 890/463, loss: 0.8009510040283203 2023-01-22 09:15:00.770447: step: 892/463, loss: 1.494858980178833 2023-01-22 09:15:01.400187: step: 894/463, loss: 1.754842758178711 2023-01-22 09:15:02.007988: step: 896/463, loss: 2.425070285797119 2023-01-22 09:15:02.630210: step: 898/463, loss: 1.732689380645752 2023-01-22 09:15:03.273090: step: 900/463, loss: 1.6223952770233154 2023-01-22 09:15:03.845009: step: 902/463, loss: 3.6462016105651855 2023-01-22 09:15:04.434266: step: 904/463, loss: 2.098703384399414 2023-01-22 09:15:05.098848: step: 906/463, loss: 1.2983542680740356 2023-01-22 09:15:05.699100: step: 908/463, loss: 2.644914150238037 2023-01-22 09:15:06.292214: step: 910/463, loss: 1.0027775764465332 2023-01-22 09:15:06.866857: step: 912/463, loss: 0.8366899490356445 2023-01-22 09:15:07.520760: step: 914/463, loss: 1.4652920961380005 2023-01-22 09:15:08.232187: step: 916/463, loss: 4.050869941711426 2023-01-22 09:15:08.866661: step: 918/463, loss: 0.1418902575969696 2023-01-22 09:15:09.537834: step: 920/463, loss: 1.356013536453247 2023-01-22 09:15:10.170138: step: 922/463, loss: 1.6795458793640137 2023-01-22 09:15:10.776290: step: 924/463, loss: 1.043339729309082 2023-01-22 09:15:11.486900: step: 926/463, loss: 4.3604583740234375 ================================================== Loss: 2.155 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2966829498896943, 'r': 0.1876744417862839, 'f1': 0.22991207717670045}, 'combined': 0.1694088989723056, 'epoch': 1} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34772294179902874, 'r': 0.27123026899264296, 'f1': 0.3047499726786544}, 'combined': 0.23621768217197137, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2601594931177627, 'r': 0.1961050724637681, 'f1': 0.2236360221617053}, 'combined': 0.16478443738230916, 'epoch': 1} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3221864127697005, 'r': 0.26991602316361707, 'f1': 0.2937440212184601}, 'combined': 0.22768675328894994, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2630524729243385, 'r': 0.1903144785172297, 'f1': 0.22084844979581822}, 'combined': 0.16273043669165552, 'epoch': 1} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3311246384638462, 'r': 0.2702316354160575, 'f1': 0.29759514106775903}, 'combined': 0.23067183183242568, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23333333333333334, 'r': 0.2, 'f1': 0.2153846153846154}, 'combined': 0.14358974358974358, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3088235294117647, 'r': 0.22826086956521738, 'f1': 0.26249999999999996}, 'combined': 0.13124999999999998, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3530701754385965, 'r': 0.14609800362976405, 'f1': 0.2066752246469833}, 'combined': 0.13778348309798885, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2966829498896943, 'r': 0.1876744417862839, 'f1': 0.22991207717670045}, 'combined': 0.1694088989723056, 'epoch': 1} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34772294179902874, 'r': 0.27123026899264296, 'f1': 0.3047499726786544}, 'combined': 0.23621768217197137, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23333333333333334, 'r': 0.2, 'f1': 0.2153846153846154}, 'combined': 0.14358974358974358, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2601594931177627, 'r': 0.1961050724637681, 'f1': 0.2236360221617053}, 'combined': 0.16478443738230916, 'epoch': 1} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3221864127697005, 'r': 0.26991602316361707, 'f1': 0.2937440212184601}, 'combined': 0.22768675328894994, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3088235294117647, 'r': 0.22826086956521738, 'f1': 0.26249999999999996}, 'combined': 0.13124999999999998, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2630524729243385, 'r': 0.1903144785172297, 'f1': 0.22084844979581822}, 'combined': 0.16273043669165552, 'epoch': 1} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3311246384638462, 'r': 0.2702316354160575, 'f1': 0.29759514106775903}, 'combined': 0.23067183183242568, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3530701754385965, 'r': 0.14609800362976405, 'f1': 0.2066752246469833}, 'combined': 0.13778348309798885, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:18:02.420295: step: 2/463, loss: 1.4660286903381348 2023-01-22 09:18:03.119152: step: 4/463, loss: 1.409912347793579 2023-01-22 09:18:03.750637: step: 6/463, loss: 1.0477442741394043 2023-01-22 09:18:04.330323: step: 8/463, loss: 0.9781979918479919 2023-01-22 09:18:04.941124: step: 10/463, loss: 0.44752755761146545 2023-01-22 09:18:05.549939: step: 12/463, loss: 3.8065993785858154 2023-01-22 09:18:06.178649: step: 14/463, loss: 1.494258999824524 2023-01-22 09:18:06.775847: step: 16/463, loss: 0.5401797294616699 2023-01-22 09:18:07.397114: step: 18/463, loss: 11.624839782714844 2023-01-22 09:18:08.021693: step: 20/463, loss: 0.5669568777084351 2023-01-22 09:18:08.612873: step: 22/463, loss: 0.73268723487854 2023-01-22 09:18:09.212331: step: 24/463, loss: 2.1691019535064697 2023-01-22 09:18:09.835351: step: 26/463, loss: 0.29225462675094604 2023-01-22 09:18:10.449081: step: 28/463, loss: 1.8725613355636597 2023-01-22 09:18:11.095060: step: 30/463, loss: 0.5147923827171326 2023-01-22 09:18:11.715115: step: 32/463, loss: 0.8150551319122314 2023-01-22 09:18:12.389554: step: 34/463, loss: 1.74850332736969 2023-01-22 09:18:12.951143: step: 36/463, loss: 2.0846107006073 2023-01-22 09:18:13.534298: step: 38/463, loss: 1.8782908916473389 2023-01-22 09:18:14.162143: step: 40/463, loss: 0.5618594884872437 2023-01-22 09:18:14.898231: step: 42/463, loss: 2.6510252952575684 2023-01-22 09:18:15.476529: step: 44/463, loss: 0.811319887638092 2023-01-22 09:18:16.099111: step: 46/463, loss: 1.146957516670227 2023-01-22 09:18:16.729122: step: 48/463, loss: 0.8560822606086731 2023-01-22 09:18:17.262631: step: 50/463, loss: 0.3750093877315521 2023-01-22 09:18:17.845291: step: 52/463, loss: 0.6742995977401733 2023-01-22 09:18:18.540613: step: 54/463, loss: 1.573695421218872 2023-01-22 09:18:19.113811: step: 56/463, loss: 0.9209487438201904 2023-01-22 09:18:19.685818: step: 58/463, loss: 1.2521803379058838 2023-01-22 09:18:20.285786: step: 60/463, loss: 0.7721288800239563 2023-01-22 09:18:20.829094: step: 62/463, loss: 0.7939708828926086 2023-01-22 09:18:21.445737: step: 64/463, loss: 0.5871481895446777 2023-01-22 09:18:22.123355: step: 66/463, loss: 1.1367230415344238 2023-01-22 09:18:22.816248: step: 68/463, loss: 0.720838189125061 2023-01-22 09:18:23.437869: step: 70/463, loss: 6.537387847900391 2023-01-22 09:18:24.068548: step: 72/463, loss: 1.9464659690856934 2023-01-22 09:18:24.626301: step: 74/463, loss: 2.104203224182129 2023-01-22 09:18:25.172934: step: 76/463, loss: 0.4451018273830414 2023-01-22 09:18:25.944290: step: 78/463, loss: 0.6001902222633362 2023-01-22 09:18:26.446711: step: 80/463, loss: 0.8536806702613831 2023-01-22 09:18:27.072745: step: 82/463, loss: 0.7285168766975403 2023-01-22 09:18:27.680841: step: 84/463, loss: 0.276102215051651 2023-01-22 09:18:28.342119: step: 86/463, loss: 3.1876182556152344 2023-01-22 09:18:28.931079: step: 88/463, loss: 0.5421339273452759 2023-01-22 09:18:29.584484: step: 90/463, loss: 1.9555740356445312 2023-01-22 09:18:30.141486: step: 92/463, loss: 1.3989418745040894 2023-01-22 09:18:30.783632: step: 94/463, loss: 0.33679524064064026 2023-01-22 09:18:31.370795: step: 96/463, loss: 0.48682576417922974 2023-01-22 09:18:32.043993: step: 98/463, loss: 0.6764743328094482 2023-01-22 09:18:32.706453: step: 100/463, loss: 2.4615845680236816 2023-01-22 09:18:33.256682: step: 102/463, loss: 4.785783290863037 2023-01-22 09:18:33.828523: step: 104/463, loss: 4.911044120788574 2023-01-22 09:18:34.515949: step: 106/463, loss: 0.643082857131958 2023-01-22 09:18:35.172607: step: 108/463, loss: 1.482361078262329 2023-01-22 09:18:35.798190: step: 110/463, loss: 1.034616470336914 2023-01-22 09:18:36.387786: step: 112/463, loss: 2.216588020324707 2023-01-22 09:18:37.029120: step: 114/463, loss: 3.748507261276245 2023-01-22 09:18:37.663263: step: 116/463, loss: 3.693830966949463 2023-01-22 09:18:38.271305: step: 118/463, loss: 1.0829129219055176 2023-01-22 09:18:38.962929: step: 120/463, loss: 1.3640830516815186 2023-01-22 09:18:39.538399: step: 122/463, loss: 2.6467080116271973 2023-01-22 09:18:40.191013: step: 124/463, loss: 1.7558529376983643 2023-01-22 09:18:40.846811: step: 126/463, loss: 0.5338179469108582 2023-01-22 09:18:41.468170: step: 128/463, loss: 4.626224994659424 2023-01-22 09:18:42.135434: step: 130/463, loss: 0.9506338834762573 2023-01-22 09:18:42.685301: step: 132/463, loss: 1.1604193449020386 2023-01-22 09:18:43.256699: step: 134/463, loss: 0.9605328440666199 2023-01-22 09:18:43.878174: step: 136/463, loss: 2.9243288040161133 2023-01-22 09:18:44.513911: step: 138/463, loss: 0.8890388011932373 2023-01-22 09:18:45.115812: step: 140/463, loss: 1.8085663318634033 2023-01-22 09:18:45.690563: step: 142/463, loss: 0.8109515905380249 2023-01-22 09:18:46.244674: step: 144/463, loss: 0.6374633312225342 2023-01-22 09:18:46.847763: step: 146/463, loss: 0.6498245596885681 2023-01-22 09:18:47.466725: step: 148/463, loss: 1.0928947925567627 2023-01-22 09:18:48.057527: step: 150/463, loss: 1.227744460105896 2023-01-22 09:18:48.639615: step: 152/463, loss: 0.3492332994937897 2023-01-22 09:18:49.309860: step: 154/463, loss: 0.16385142505168915 2023-01-22 09:18:49.964385: step: 156/463, loss: 2.3085997104644775 2023-01-22 09:18:50.527927: step: 158/463, loss: 1.3386341333389282 2023-01-22 09:18:51.299786: step: 160/463, loss: 1.241170048713684 2023-01-22 09:18:51.897867: step: 162/463, loss: 0.8155643939971924 2023-01-22 09:18:52.553556: step: 164/463, loss: 0.8639945387840271 2023-01-22 09:18:53.205855: step: 166/463, loss: 0.8003929853439331 2023-01-22 09:18:53.775261: step: 168/463, loss: 1.34852135181427 2023-01-22 09:18:54.390640: step: 170/463, loss: 2.3515124320983887 2023-01-22 09:18:55.033350: step: 172/463, loss: 0.6648683547973633 2023-01-22 09:18:55.633337: step: 174/463, loss: 0.8789557218551636 2023-01-22 09:18:56.288103: step: 176/463, loss: 2.127133846282959 2023-01-22 09:18:56.923868: step: 178/463, loss: 0.28835994005203247 2023-01-22 09:18:57.546683: step: 180/463, loss: 0.6357977390289307 2023-01-22 09:18:58.114324: step: 182/463, loss: 0.6560345888137817 2023-01-22 09:18:58.641383: step: 184/463, loss: 0.23894557356834412 2023-01-22 09:18:59.213150: step: 186/463, loss: 1.8762205839157104 2023-01-22 09:18:59.786082: step: 188/463, loss: 3.8380684852600098 2023-01-22 09:19:00.410933: step: 190/463, loss: 1.7604936361312866 2023-01-22 09:19:01.014484: step: 192/463, loss: 0.8130249977111816 2023-01-22 09:19:01.640386: step: 194/463, loss: 0.999138593673706 2023-01-22 09:19:02.306703: step: 196/463, loss: 0.5387791991233826 2023-01-22 09:19:02.868018: step: 198/463, loss: 1.9275074005126953 2023-01-22 09:19:03.437654: step: 200/463, loss: 0.762654185295105 2023-01-22 09:19:04.017402: step: 202/463, loss: 1.0605900287628174 2023-01-22 09:19:04.594500: step: 204/463, loss: 5.775745391845703 2023-01-22 09:19:05.211937: step: 206/463, loss: 0.5210607051849365 2023-01-22 09:19:05.783998: step: 208/463, loss: 5.282645225524902 2023-01-22 09:19:06.392199: step: 210/463, loss: 0.870699405670166 2023-01-22 09:19:06.957113: step: 212/463, loss: 1.8843967914581299 2023-01-22 09:19:07.521559: step: 214/463, loss: 1.6005609035491943 2023-01-22 09:19:08.149368: step: 216/463, loss: 5.171538829803467 2023-01-22 09:19:08.708957: step: 218/463, loss: 3.3907129764556885 2023-01-22 09:19:09.380966: step: 220/463, loss: 7.025726318359375 2023-01-22 09:19:10.012406: step: 222/463, loss: 1.6763451099395752 2023-01-22 09:19:10.619254: step: 224/463, loss: 3.6829617023468018 2023-01-22 09:19:11.304388: step: 226/463, loss: 3.822625160217285 2023-01-22 09:19:11.898914: step: 228/463, loss: 0.3872496485710144 2023-01-22 09:19:12.520297: step: 230/463, loss: 1.786887288093567 2023-01-22 09:19:13.102687: step: 232/463, loss: 2.5063986778259277 2023-01-22 09:19:13.699515: step: 234/463, loss: 0.3635252118110657 2023-01-22 09:19:14.340267: step: 236/463, loss: 1.9342350959777832 2023-01-22 09:19:14.945176: step: 238/463, loss: 4.329127311706543 2023-01-22 09:19:15.596351: step: 240/463, loss: 0.24979916214942932 2023-01-22 09:19:16.200368: step: 242/463, loss: 0.7364491820335388 2023-01-22 09:19:16.803529: step: 244/463, loss: 2.0005571842193604 2023-01-22 09:19:17.316106: step: 246/463, loss: 1.3922744989395142 2023-01-22 09:19:17.951556: step: 248/463, loss: 1.6599267721176147 2023-01-22 09:19:18.661825: step: 250/463, loss: 0.9804898500442505 2023-01-22 09:19:19.286695: step: 252/463, loss: 1.5135247707366943 2023-01-22 09:19:19.863587: step: 254/463, loss: 1.1995038986206055 2023-01-22 09:19:20.461510: step: 256/463, loss: 1.7913821935653687 2023-01-22 09:19:21.110892: step: 258/463, loss: 1.5432732105255127 2023-01-22 09:19:21.716724: step: 260/463, loss: 1.5108667612075806 2023-01-22 09:19:22.399102: step: 262/463, loss: 0.36469635367393494 2023-01-22 09:19:23.018915: step: 264/463, loss: 0.7474936842918396 2023-01-22 09:19:23.611366: step: 266/463, loss: 0.7007871866226196 2023-01-22 09:19:24.223567: step: 268/463, loss: 0.2520897090435028 2023-01-22 09:19:24.771885: step: 270/463, loss: 6.105922222137451 2023-01-22 09:19:25.409070: step: 272/463, loss: 2.4899673461914062 2023-01-22 09:19:26.018686: step: 274/463, loss: 3.262800455093384 2023-01-22 09:19:26.640901: step: 276/463, loss: 1.1527456045150757 2023-01-22 09:19:27.396911: step: 278/463, loss: 2.031769037246704 2023-01-22 09:19:28.015977: step: 280/463, loss: 2.4171719551086426 2023-01-22 09:19:28.622707: step: 282/463, loss: 2.7042527198791504 2023-01-22 09:19:29.223786: step: 284/463, loss: 3.3712878227233887 2023-01-22 09:19:29.816767: step: 286/463, loss: 5.174746036529541 2023-01-22 09:19:30.457934: step: 288/463, loss: 1.609847068786621 2023-01-22 09:19:31.060783: step: 290/463, loss: 0.902625560760498 2023-01-22 09:19:31.691297: step: 292/463, loss: 0.6399639844894409 2023-01-22 09:19:32.336242: step: 294/463, loss: 1.2580806016921997 2023-01-22 09:19:32.935396: step: 296/463, loss: 0.6816375255584717 2023-01-22 09:19:33.595025: step: 298/463, loss: 1.2385717630386353 2023-01-22 09:19:34.225806: step: 300/463, loss: 0.536469578742981 2023-01-22 09:19:34.808144: step: 302/463, loss: 0.9843665361404419 2023-01-22 09:19:35.425259: step: 304/463, loss: 0.8425735235214233 2023-01-22 09:19:35.957716: step: 306/463, loss: 0.27193188667297363 2023-01-22 09:19:36.559706: step: 308/463, loss: 0.4825771450996399 2023-01-22 09:19:37.167206: step: 310/463, loss: 0.8274874091148376 2023-01-22 09:19:37.817213: step: 312/463, loss: 4.543065071105957 2023-01-22 09:19:38.436602: step: 314/463, loss: 0.3094620406627655 2023-01-22 09:19:39.147011: step: 316/463, loss: 0.7046681046485901 2023-01-22 09:19:39.758173: step: 318/463, loss: 0.38656431436538696 2023-01-22 09:19:40.443237: step: 320/463, loss: 0.5257557034492493 2023-01-22 09:19:41.173519: step: 322/463, loss: 0.4737904965877533 2023-01-22 09:19:41.781943: step: 324/463, loss: 0.5960273742675781 2023-01-22 09:19:42.398096: step: 326/463, loss: 1.6312655210494995 2023-01-22 09:19:42.987665: step: 328/463, loss: 1.3117055892944336 2023-01-22 09:19:43.608356: step: 330/463, loss: 1.133667230606079 2023-01-22 09:19:44.302798: step: 332/463, loss: 5.118167877197266 2023-01-22 09:19:44.906226: step: 334/463, loss: 0.7834743857383728 2023-01-22 09:19:45.527007: step: 336/463, loss: 0.40042173862457275 2023-01-22 09:19:46.192244: step: 338/463, loss: 1.3825232982635498 2023-01-22 09:19:46.739156: step: 340/463, loss: 1.819142460823059 2023-01-22 09:19:47.377507: step: 342/463, loss: 0.5662192106246948 2023-01-22 09:19:47.956542: step: 344/463, loss: 1.0023061037063599 2023-01-22 09:19:48.577115: step: 346/463, loss: 2.2159485816955566 2023-01-22 09:19:49.215174: step: 348/463, loss: 1.493571162223816 2023-01-22 09:19:49.880941: step: 350/463, loss: 1.2307442426681519 2023-01-22 09:19:50.515245: step: 352/463, loss: 1.850580096244812 2023-01-22 09:19:51.161037: step: 354/463, loss: 1.633779764175415 2023-01-22 09:19:51.885691: step: 356/463, loss: 2.8367621898651123 2023-01-22 09:19:52.547849: step: 358/463, loss: 0.959022045135498 2023-01-22 09:19:53.137866: step: 360/463, loss: 0.41384461522102356 2023-01-22 09:19:53.742443: step: 362/463, loss: 6.035156726837158 2023-01-22 09:19:54.337181: step: 364/463, loss: 0.5185157656669617 2023-01-22 09:19:54.970711: step: 366/463, loss: 1.361049771308899 2023-01-22 09:19:55.618725: step: 368/463, loss: 1.148449182510376 2023-01-22 09:19:56.277602: step: 370/463, loss: 0.7699196338653564 2023-01-22 09:19:56.959534: step: 372/463, loss: 0.23366540670394897 2023-01-22 09:19:57.586262: step: 374/463, loss: 0.40603286027908325 2023-01-22 09:19:58.170888: step: 376/463, loss: 1.3715323209762573 2023-01-22 09:19:58.821370: step: 378/463, loss: 0.9165188074111938 2023-01-22 09:19:59.474147: step: 380/463, loss: 0.6428179740905762 2023-01-22 09:20:00.118662: step: 382/463, loss: 0.9613966345787048 2023-01-22 09:20:00.667788: step: 384/463, loss: 1.0092254877090454 2023-01-22 09:20:01.260649: step: 386/463, loss: 1.6940070390701294 2023-01-22 09:20:01.902171: step: 388/463, loss: 1.739461064338684 2023-01-22 09:20:02.483701: step: 390/463, loss: 0.2564719021320343 2023-01-22 09:20:03.125658: step: 392/463, loss: 1.3298134803771973 2023-01-22 09:20:03.699821: step: 394/463, loss: 0.3073103427886963 2023-01-22 09:20:04.306527: step: 396/463, loss: 2.5522541999816895 2023-01-22 09:20:04.890721: step: 398/463, loss: 0.31658124923706055 2023-01-22 09:20:05.559665: step: 400/463, loss: 2.982454299926758 2023-01-22 09:20:06.165640: step: 402/463, loss: 1.2065281867980957 2023-01-22 09:20:06.800133: step: 404/463, loss: 8.816350936889648 2023-01-22 09:20:07.432613: step: 406/463, loss: 1.474839448928833 2023-01-22 09:20:08.051249: step: 408/463, loss: 1.553722858428955 2023-01-22 09:20:08.653248: step: 410/463, loss: 0.47708964347839355 2023-01-22 09:20:09.359146: step: 412/463, loss: 0.6190148591995239 2023-01-22 09:20:10.013630: step: 414/463, loss: 0.6632809042930603 2023-01-22 09:20:10.594894: step: 416/463, loss: 1.2494126558303833 2023-01-22 09:20:11.231381: step: 418/463, loss: 0.9812670946121216 2023-01-22 09:20:11.873216: step: 420/463, loss: 1.4529144763946533 2023-01-22 09:20:12.466678: step: 422/463, loss: 1.0692442655563354 2023-01-22 09:20:13.093540: step: 424/463, loss: 3.1296417713165283 2023-01-22 09:20:13.666799: step: 426/463, loss: 7.347497940063477 2023-01-22 09:20:14.303727: step: 428/463, loss: 0.987598180770874 2023-01-22 09:20:14.919150: step: 430/463, loss: 1.848487377166748 2023-01-22 09:20:15.554401: step: 432/463, loss: 0.6004656553268433 2023-01-22 09:20:16.178053: step: 434/463, loss: 0.9679194092750549 2023-01-22 09:20:16.861070: step: 436/463, loss: 1.8503260612487793 2023-01-22 09:20:17.488785: step: 438/463, loss: 9.326584815979004 2023-01-22 09:20:18.068215: step: 440/463, loss: 1.3815076351165771 2023-01-22 09:20:18.664747: step: 442/463, loss: 2.8499250411987305 2023-01-22 09:20:19.269214: step: 444/463, loss: 1.5699008703231812 2023-01-22 09:20:19.887204: step: 446/463, loss: 1.273353099822998 2023-01-22 09:20:20.509963: step: 448/463, loss: 0.603901207447052 2023-01-22 09:20:21.132899: step: 450/463, loss: 0.6193287372589111 2023-01-22 09:20:21.726621: step: 452/463, loss: 12.034431457519531 2023-01-22 09:20:22.286135: step: 454/463, loss: 0.9807054400444031 2023-01-22 09:20:22.908124: step: 456/463, loss: 3.2840614318847656 2023-01-22 09:20:23.547089: step: 458/463, loss: 0.6853996515274048 2023-01-22 09:20:24.140248: step: 460/463, loss: 0.7610770463943481 2023-01-22 09:20:24.720202: step: 462/463, loss: 2.459209442138672 2023-01-22 09:20:25.385683: step: 464/463, loss: 0.7250696420669556 2023-01-22 09:20:25.960179: step: 466/463, loss: 1.5966969728469849 2023-01-22 09:20:26.626858: step: 468/463, loss: 2.038206100463867 2023-01-22 09:20:27.269740: step: 470/463, loss: 0.3883358836174011 2023-01-22 09:20:27.901121: step: 472/463, loss: 3.1393630504608154 2023-01-22 09:20:28.562316: step: 474/463, loss: 0.7819725871086121 2023-01-22 09:20:29.185379: step: 476/463, loss: 3.0739307403564453 2023-01-22 09:20:29.822282: step: 478/463, loss: 0.979496419429779 2023-01-22 09:20:30.475164: step: 480/463, loss: 3.8374500274658203 2023-01-22 09:20:31.118512: step: 482/463, loss: 1.2882366180419922 2023-01-22 09:20:31.759643: step: 484/463, loss: 0.7963497638702393 2023-01-22 09:20:32.371195: step: 486/463, loss: 0.8104899525642395 2023-01-22 09:20:33.025037: step: 488/463, loss: 1.0237727165222168 2023-01-22 09:20:33.643421: step: 490/463, loss: 3.098798990249634 2023-01-22 09:20:34.234915: step: 492/463, loss: 1.7690072059631348 2023-01-22 09:20:34.880952: step: 494/463, loss: 0.6741423010826111 2023-01-22 09:20:35.443983: step: 496/463, loss: 0.22404100000858307 2023-01-22 09:20:36.055805: step: 498/463, loss: 0.48252809047698975 2023-01-22 09:20:36.604789: step: 500/463, loss: 0.22238671779632568 2023-01-22 09:20:37.198346: step: 502/463, loss: 1.140441656112671 2023-01-22 09:20:37.808729: step: 504/463, loss: 0.32342782616615295 2023-01-22 09:20:38.462544: step: 506/463, loss: 0.9599140882492065 2023-01-22 09:20:39.024270: step: 508/463, loss: 0.38328179717063904 2023-01-22 09:20:39.684568: step: 510/463, loss: 1.8107041120529175 2023-01-22 09:20:40.294119: step: 512/463, loss: 0.5601431727409363 2023-01-22 09:20:40.964108: step: 514/463, loss: 0.8199431896209717 2023-01-22 09:20:41.549869: step: 516/463, loss: 0.28174111247062683 2023-01-22 09:20:42.152884: step: 518/463, loss: 0.3254542648792267 2023-01-22 09:20:42.740080: step: 520/463, loss: 1.1558681726455688 2023-01-22 09:20:43.359687: step: 522/463, loss: 0.5232790112495422 2023-01-22 09:20:43.949893: step: 524/463, loss: 2.3477325439453125 2023-01-22 09:20:44.532698: step: 526/463, loss: 0.6032165288925171 2023-01-22 09:20:45.144656: step: 528/463, loss: 0.8377740979194641 2023-01-22 09:20:45.704760: step: 530/463, loss: 0.5559775233268738 2023-01-22 09:20:46.429929: step: 532/463, loss: 0.685110330581665 2023-01-22 09:20:47.047918: step: 534/463, loss: 1.523877739906311 2023-01-22 09:20:47.727905: step: 536/463, loss: 2.7532782554626465 2023-01-22 09:20:48.391763: step: 538/463, loss: 0.42484498023986816 2023-01-22 09:20:48.988868: step: 540/463, loss: 1.3740956783294678 2023-01-22 09:20:49.518675: step: 542/463, loss: 0.6577314138412476 2023-01-22 09:20:50.124073: step: 544/463, loss: 0.2149064689874649 2023-01-22 09:20:50.755209: step: 546/463, loss: 0.8327336311340332 2023-01-22 09:20:51.451861: step: 548/463, loss: 0.5447402000427246 2023-01-22 09:20:52.068151: step: 550/463, loss: 0.37505486607551575 2023-01-22 09:20:52.755688: step: 552/463, loss: 0.6039700508117676 2023-01-22 09:20:53.469865: step: 554/463, loss: 0.36998796463012695 2023-01-22 09:20:54.076178: step: 556/463, loss: 5.404050827026367 2023-01-22 09:20:54.726966: step: 558/463, loss: 1.346888542175293 2023-01-22 09:20:55.324363: step: 560/463, loss: 0.7103744745254517 2023-01-22 09:20:55.975282: step: 562/463, loss: 0.5426487922668457 2023-01-22 09:20:56.581602: step: 564/463, loss: 1.5296276807785034 2023-01-22 09:20:57.181764: step: 566/463, loss: 3.758345603942871 2023-01-22 09:20:57.766316: step: 568/463, loss: 3.3112542629241943 2023-01-22 09:20:58.394039: step: 570/463, loss: 1.065105676651001 2023-01-22 09:20:59.095462: step: 572/463, loss: 1.2148536443710327 2023-01-22 09:20:59.765058: step: 574/463, loss: 2.2530767917633057 2023-01-22 09:21:00.382375: step: 576/463, loss: 0.49736371636390686 2023-01-22 09:21:00.961591: step: 578/463, loss: 1.3433315753936768 2023-01-22 09:21:01.578337: step: 580/463, loss: 1.414001703262329 2023-01-22 09:21:02.196011: step: 582/463, loss: 1.4997971057891846 2023-01-22 09:21:02.846568: step: 584/463, loss: 0.8019573092460632 2023-01-22 09:21:03.504360: step: 586/463, loss: 0.794381856918335 2023-01-22 09:21:04.172206: step: 588/463, loss: 0.8728071451187134 2023-01-22 09:21:04.730192: step: 590/463, loss: 1.1764867305755615 2023-01-22 09:21:05.319339: step: 592/463, loss: 0.33734798431396484 2023-01-22 09:21:05.930076: step: 594/463, loss: 0.4521723985671997 2023-01-22 09:21:06.612945: step: 596/463, loss: 0.7711382508277893 2023-01-22 09:21:07.199969: step: 598/463, loss: 2.0961408615112305 2023-01-22 09:21:07.825795: step: 600/463, loss: 0.8732158541679382 2023-01-22 09:21:08.422339: step: 602/463, loss: 1.8879420757293701 2023-01-22 09:21:09.049536: step: 604/463, loss: 2.4897968769073486 2023-01-22 09:21:09.691955: step: 606/463, loss: 1.7163336277008057 2023-01-22 09:21:10.322232: step: 608/463, loss: 1.9362083673477173 2023-01-22 09:21:10.877860: step: 610/463, loss: 0.7525981068611145 2023-01-22 09:21:11.459738: step: 612/463, loss: 0.758465051651001 2023-01-22 09:21:12.041315: step: 614/463, loss: 10.410308837890625 2023-01-22 09:21:12.689069: step: 616/463, loss: 0.6769464015960693 2023-01-22 09:21:13.304232: step: 618/463, loss: 1.1008172035217285 2023-01-22 09:21:13.986133: step: 620/463, loss: 0.9038336277008057 2023-01-22 09:21:14.582796: step: 622/463, loss: 1.5241053104400635 2023-01-22 09:21:15.297428: step: 624/463, loss: 2.4104151725769043 2023-01-22 09:21:15.925929: step: 626/463, loss: 0.24718955159187317 2023-01-22 09:21:16.574127: step: 628/463, loss: 0.9794787764549255 2023-01-22 09:21:17.170215: step: 630/463, loss: 0.6829535961151123 2023-01-22 09:21:17.786168: step: 632/463, loss: 0.29277709126472473 2023-01-22 09:21:18.394627: step: 634/463, loss: 1.5057624578475952 2023-01-22 09:21:18.972421: step: 636/463, loss: 0.3484926223754883 2023-01-22 09:21:19.554858: step: 638/463, loss: 1.3105700016021729 2023-01-22 09:21:20.131283: step: 640/463, loss: 0.8319970369338989 2023-01-22 09:21:20.760265: step: 642/463, loss: 0.731103777885437 2023-01-22 09:21:21.437429: step: 644/463, loss: 0.32466021180152893 2023-01-22 09:21:22.104540: step: 646/463, loss: 1.8179347515106201 2023-01-22 09:21:22.699925: step: 648/463, loss: 0.982518196105957 2023-01-22 09:21:23.258801: step: 650/463, loss: 1.2489746809005737 2023-01-22 09:21:23.866231: step: 652/463, loss: 1.3136425018310547 2023-01-22 09:21:24.487280: step: 654/463, loss: 0.7801439762115479 2023-01-22 09:21:25.052023: step: 656/463, loss: 1.4304418563842773 2023-01-22 09:21:25.641196: step: 658/463, loss: 3.3175930976867676 2023-01-22 09:21:26.222640: step: 660/463, loss: 0.4897221624851227 2023-01-22 09:21:26.865850: step: 662/463, loss: 0.6910770535469055 2023-01-22 09:21:27.467776: step: 664/463, loss: 0.6170697212219238 2023-01-22 09:21:28.140441: step: 666/463, loss: 0.35087454319000244 2023-01-22 09:21:28.826902: step: 668/463, loss: 0.8153607845306396 2023-01-22 09:21:29.433984: step: 670/463, loss: 0.5577743649482727 2023-01-22 09:21:30.006807: step: 672/463, loss: 1.2256635427474976 2023-01-22 09:21:30.590004: step: 674/463, loss: 0.37994086742401123 2023-01-22 09:21:31.247541: step: 676/463, loss: 2.9787983894348145 2023-01-22 09:21:31.898237: step: 678/463, loss: 1.5564532279968262 2023-01-22 09:21:32.568884: step: 680/463, loss: 0.9659140706062317 2023-01-22 09:21:33.194311: step: 682/463, loss: 0.8655729293823242 2023-01-22 09:21:33.761472: step: 684/463, loss: 1.3255674839019775 2023-01-22 09:21:34.352157: step: 686/463, loss: 0.5485014915466309 2023-01-22 09:21:34.948547: step: 688/463, loss: 1.1680799722671509 2023-01-22 09:21:35.537982: step: 690/463, loss: 6.5204033851623535 2023-01-22 09:21:36.122796: step: 692/463, loss: 2.358337879180908 2023-01-22 09:21:36.755480: step: 694/463, loss: 0.3776841163635254 2023-01-22 09:21:37.317116: step: 696/463, loss: 0.5681212544441223 2023-01-22 09:21:37.985493: step: 698/463, loss: 2.4553847312927246 2023-01-22 09:21:38.557385: step: 700/463, loss: 0.9091672301292419 2023-01-22 09:21:39.124660: step: 702/463, loss: 0.3592880666255951 2023-01-22 09:21:39.776565: step: 704/463, loss: 2.7278480529785156 2023-01-22 09:21:40.388160: step: 706/463, loss: 0.6220060586929321 2023-01-22 09:21:41.009466: step: 708/463, loss: 1.7619963884353638 2023-01-22 09:21:41.804599: step: 710/463, loss: 0.7121729254722595 2023-01-22 09:21:42.387119: step: 712/463, loss: 0.4605644643306732 2023-01-22 09:21:43.040265: step: 714/463, loss: 0.533030092716217 2023-01-22 09:21:43.664753: step: 716/463, loss: 0.9994133710861206 2023-01-22 09:21:44.316772: step: 718/463, loss: 0.5203347206115723 2023-01-22 09:21:44.957215: step: 720/463, loss: 0.34223470091819763 2023-01-22 09:21:45.498256: step: 722/463, loss: 1.003799319267273 2023-01-22 09:21:46.135326: step: 724/463, loss: 2.554177761077881 2023-01-22 09:21:46.708311: step: 726/463, loss: 1.3827364444732666 2023-01-22 09:21:47.434228: step: 728/463, loss: 2.2792391777038574 2023-01-22 09:21:48.035681: step: 730/463, loss: 0.8657217025756836 2023-01-22 09:21:48.617081: step: 732/463, loss: 1.5779836177825928 2023-01-22 09:21:49.240324: step: 734/463, loss: 1.3845607042312622 2023-01-22 09:21:49.882324: step: 736/463, loss: 0.5036277770996094 2023-01-22 09:21:50.432600: step: 738/463, loss: 0.8902627825737 2023-01-22 09:21:51.085933: step: 740/463, loss: 0.9201827049255371 2023-01-22 09:21:51.657223: step: 742/463, loss: 0.6846176385879517 2023-01-22 09:21:52.267879: step: 744/463, loss: 0.20370186865329742 2023-01-22 09:21:52.917890: step: 746/463, loss: 2.649069309234619 2023-01-22 09:21:53.540294: step: 748/463, loss: 2.1348648071289062 2023-01-22 09:21:54.203834: step: 750/463, loss: 1.833654522895813 2023-01-22 09:21:54.847954: step: 752/463, loss: 0.4487384855747223 2023-01-22 09:21:55.457059: step: 754/463, loss: 1.139836311340332 2023-01-22 09:21:56.096543: step: 756/463, loss: 3.737612724304199 2023-01-22 09:21:56.706850: step: 758/463, loss: 4.06385612487793 2023-01-22 09:21:57.324524: step: 760/463, loss: 0.8881456255912781 2023-01-22 09:21:57.922959: step: 762/463, loss: 1.1446888446807861 2023-01-22 09:21:58.485737: step: 764/463, loss: 3.837965488433838 2023-01-22 09:21:59.089294: step: 766/463, loss: 0.6248462796211243 2023-01-22 09:21:59.742017: step: 768/463, loss: 0.45582717657089233 2023-01-22 09:22:00.307485: step: 770/463, loss: 1.6969670057296753 2023-01-22 09:22:00.961830: step: 772/463, loss: 0.45889759063720703 2023-01-22 09:22:01.499540: step: 774/463, loss: 1.189276099205017 2023-01-22 09:22:02.130443: step: 776/463, loss: 3.6378519535064697 2023-01-22 09:22:02.753753: step: 778/463, loss: 1.0155627727508545 2023-01-22 09:22:03.333442: step: 780/463, loss: 0.20280370116233826 2023-01-22 09:22:03.941964: step: 782/463, loss: 2.4504642486572266 2023-01-22 09:22:04.574056: step: 784/463, loss: 0.46651384234428406 2023-01-22 09:22:05.218902: step: 786/463, loss: 1.5811887979507446 2023-01-22 09:22:05.887790: step: 788/463, loss: 0.5913249254226685 2023-01-22 09:22:06.547859: step: 790/463, loss: 0.39848899841308594 2023-01-22 09:22:07.278690: step: 792/463, loss: 0.4430505633354187 2023-01-22 09:22:07.927570: step: 794/463, loss: 1.360766887664795 2023-01-22 09:22:08.519398: step: 796/463, loss: 1.5937418937683105 2023-01-22 09:22:09.244293: step: 798/463, loss: 3.325157880783081 2023-01-22 09:22:09.946737: step: 800/463, loss: 2.8510522842407227 2023-01-22 09:22:10.641560: step: 802/463, loss: 7.090426921844482 2023-01-22 09:22:11.266651: step: 804/463, loss: 2.0580217838287354 2023-01-22 09:22:11.837729: step: 806/463, loss: 0.3951593339443207 2023-01-22 09:22:12.479381: step: 808/463, loss: 1.911102533340454 2023-01-22 09:22:13.149492: step: 810/463, loss: 0.8056557178497314 2023-01-22 09:22:13.882402: step: 812/463, loss: 0.8891547322273254 2023-01-22 09:22:14.535862: step: 814/463, loss: 1.185707449913025 2023-01-22 09:22:15.149960: step: 816/463, loss: 0.8997979164123535 2023-01-22 09:22:15.823434: step: 818/463, loss: 1.4040427207946777 2023-01-22 09:22:16.417781: step: 820/463, loss: 0.7298281192779541 2023-01-22 09:22:17.087999: step: 822/463, loss: 2.939669132232666 2023-01-22 09:22:17.731228: step: 824/463, loss: 0.9704604744911194 2023-01-22 09:22:18.401902: step: 826/463, loss: 2.1452479362487793 2023-01-22 09:22:19.053067: step: 828/463, loss: 0.4087268114089966 2023-01-22 09:22:19.673863: step: 830/463, loss: 0.9527699947357178 2023-01-22 09:22:20.276710: step: 832/463, loss: 0.4639630913734436 2023-01-22 09:22:20.913228: step: 834/463, loss: 1.3756364583969116 2023-01-22 09:22:21.498030: step: 836/463, loss: 1.413311243057251 2023-01-22 09:22:22.095884: step: 838/463, loss: 1.6089898347854614 2023-01-22 09:22:22.757118: step: 840/463, loss: 1.9243468046188354 2023-01-22 09:22:23.360756: step: 842/463, loss: 1.7314045429229736 2023-01-22 09:22:24.007515: step: 844/463, loss: 0.7033754587173462 2023-01-22 09:22:24.606782: step: 846/463, loss: 1.3000514507293701 2023-01-22 09:22:25.226262: step: 848/463, loss: 3.187561511993408 2023-01-22 09:22:25.887890: step: 850/463, loss: 1.1734458208084106 2023-01-22 09:22:26.487162: step: 852/463, loss: 0.9049826860427856 2023-01-22 09:22:27.065827: step: 854/463, loss: 0.322780042886734 2023-01-22 09:22:27.698202: step: 856/463, loss: 0.5923429727554321 2023-01-22 09:22:28.358908: step: 858/463, loss: 3.676504611968994 2023-01-22 09:22:28.953456: step: 860/463, loss: 1.0498003959655762 2023-01-22 09:22:29.606816: step: 862/463, loss: 0.45997345447540283 2023-01-22 09:22:30.255760: step: 864/463, loss: 1.7753878831863403 2023-01-22 09:22:30.862428: step: 866/463, loss: 0.7851157188415527 2023-01-22 09:22:31.513966: step: 868/463, loss: 3.227285861968994 2023-01-22 09:22:32.140510: step: 870/463, loss: 0.6186690926551819 2023-01-22 09:22:32.829391: step: 872/463, loss: 0.7951236963272095 2023-01-22 09:22:33.404148: step: 874/463, loss: 0.7681107521057129 2023-01-22 09:22:34.007106: step: 876/463, loss: 0.6349336504936218 2023-01-22 09:22:34.529699: step: 878/463, loss: 1.5183823108673096 2023-01-22 09:22:35.176384: step: 880/463, loss: 0.8476854562759399 2023-01-22 09:22:35.813411: step: 882/463, loss: 1.6248974800109863 2023-01-22 09:22:36.428241: step: 884/463, loss: 1.7656960487365723 2023-01-22 09:22:36.967587: step: 886/463, loss: 1.8569083213806152 2023-01-22 09:22:37.620157: step: 888/463, loss: 3.104783535003662 2023-01-22 09:22:38.255161: step: 890/463, loss: 0.704376220703125 2023-01-22 09:22:38.842303: step: 892/463, loss: 1.4094443321228027 2023-01-22 09:22:39.478202: step: 894/463, loss: 0.9609196186065674 2023-01-22 09:22:40.099014: step: 896/463, loss: 0.8082519769668579 2023-01-22 09:22:40.747901: step: 898/463, loss: 2.183816432952881 2023-01-22 09:22:41.380286: step: 900/463, loss: 4.201904296875 2023-01-22 09:22:41.964863: step: 902/463, loss: 1.4621878862380981 2023-01-22 09:22:42.653067: step: 904/463, loss: 0.7892708778381348 2023-01-22 09:22:43.263154: step: 906/463, loss: 1.0239051580429077 2023-01-22 09:22:43.897710: step: 908/463, loss: 1.0663679838180542 2023-01-22 09:22:44.497737: step: 910/463, loss: 0.3086651563644409 2023-01-22 09:22:45.122197: step: 912/463, loss: 0.7683236002922058 2023-01-22 09:22:45.799616: step: 914/463, loss: 1.2961827516555786 2023-01-22 09:22:46.363879: step: 916/463, loss: 0.8860579133033752 2023-01-22 09:22:46.987555: step: 918/463, loss: 2.0733752250671387 2023-01-22 09:22:47.650285: step: 920/463, loss: 1.5364549160003662 2023-01-22 09:22:48.241692: step: 922/463, loss: 0.8691327571868896 2023-01-22 09:22:48.915576: step: 924/463, loss: 0.23627275228500366 2023-01-22 09:22:49.544845: step: 926/463, loss: 0.609950065612793 ================================================== Loss: 1.538 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2954727564102564, 'r': 0.2618963068181818, 'f1': 0.2776731927710843}, 'combined': 0.20460129993658843, 'epoch': 2} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3508325011047194, 'r': 0.31056247113402163, 'f1': 0.32947153537744717}, 'combined': 0.2553798503882605, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26367726518651735, 'r': 0.2706687078240387, 'f1': 0.26712724809549987}, 'combined': 0.196830603859842, 'epoch': 2} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33461647347297296, 'r': 0.3155657651577073, 'f1': 0.3248120210366193}, 'combined': 0.25176816941594415, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25784122500224194, 'r': 0.259306231962482, 'f1': 0.25857165340168176}, 'combined': 0.19052648145387074, 'epoch': 2} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33718038614347295, 'r': 0.304050632867668, 'f1': 0.31975967087676527}, 'combined': 0.24785199369395203, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.2857142857142857, 'f1': 0.2857142857142857}, 'combined': 0.19047619047619047, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23684210526315788, 'r': 0.1956521739130435, 'f1': 0.21428571428571427}, 'combined': 0.10714285714285714, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.1896551724137931, 'f1': 0.2682926829268293}, 'combined': 0.17886178861788618, 'epoch': 2} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2954727564102564, 'r': 0.2618963068181818, 'f1': 0.2776731927710843}, 'combined': 0.20460129993658843, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3508325011047194, 'r': 0.31056247113402163, 'f1': 0.32947153537744717}, 'combined': 0.2553798503882605, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.2857142857142857, 'f1': 0.2857142857142857}, 'combined': 0.19047619047619047, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2601594931177627, 'r': 0.1961050724637681, 'f1': 0.2236360221617053}, 'combined': 0.16478443738230916, 'epoch': 1} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3221864127697005, 'r': 0.26991602316361707, 'f1': 0.2937440212184601}, 'combined': 0.22768675328894994, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3088235294117647, 'r': 0.22826086956521738, 'f1': 0.26249999999999996}, 'combined': 0.13124999999999998, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25784122500224194, 'r': 0.259306231962482, 'f1': 0.25857165340168176}, 'combined': 0.19052648145387074, 'epoch': 2} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33718038614347295, 'r': 0.304050632867668, 'f1': 0.31975967087676527}, 'combined': 0.24785199369395203, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.1896551724137931, 'f1': 0.2682926829268293}, 'combined': 0.17886178861788618, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:25:35.782156: step: 2/463, loss: 0.5861802101135254 2023-01-22 09:25:36.453304: step: 4/463, loss: 1.3735284805297852 2023-01-22 09:25:37.088931: step: 6/463, loss: 0.8099331259727478 2023-01-22 09:25:37.711336: step: 8/463, loss: 1.2316306829452515 2023-01-22 09:25:38.376013: step: 10/463, loss: 1.097306251525879 2023-01-22 09:25:39.008592: step: 12/463, loss: 1.6339104175567627 2023-01-22 09:25:39.637800: step: 14/463, loss: 1.1404949426651 2023-01-22 09:25:40.284698: step: 16/463, loss: 1.3325152397155762 2023-01-22 09:25:40.920592: step: 18/463, loss: 7.127131462097168 2023-01-22 09:25:41.563638: step: 20/463, loss: 1.808300495147705 2023-01-22 09:25:42.187374: step: 22/463, loss: 0.4308868944644928 2023-01-22 09:25:42.865533: step: 24/463, loss: 6.343321323394775 2023-01-22 09:25:43.509431: step: 26/463, loss: 4.054040431976318 2023-01-22 09:25:44.222741: step: 28/463, loss: 2.443748950958252 2023-01-22 09:25:44.876097: step: 30/463, loss: 0.453969806432724 2023-01-22 09:25:45.557097: step: 32/463, loss: 1.8452019691467285 2023-01-22 09:25:46.161455: step: 34/463, loss: 0.8011531829833984 2023-01-22 09:25:46.750997: step: 36/463, loss: 0.26531073451042175 2023-01-22 09:25:47.421669: step: 38/463, loss: 6.34611177444458 2023-01-22 09:25:48.078354: step: 40/463, loss: 2.094949722290039 2023-01-22 09:25:48.649830: step: 42/463, loss: 1.7757618427276611 2023-01-22 09:25:49.212479: step: 44/463, loss: 0.9916521906852722 2023-01-22 09:25:49.814847: step: 46/463, loss: 0.39908546209335327 2023-01-22 09:25:50.431488: step: 48/463, loss: 0.6339250206947327 2023-01-22 09:25:51.011766: step: 50/463, loss: 0.25349336862564087 2023-01-22 09:25:51.635612: step: 52/463, loss: 0.5262441635131836 2023-01-22 09:25:52.287128: step: 54/463, loss: 0.5490635633468628 2023-01-22 09:25:52.877114: step: 56/463, loss: 3.83716082572937 2023-01-22 09:25:53.466678: step: 58/463, loss: 1.4231542348861694 2023-01-22 09:25:54.036524: step: 60/463, loss: 0.2889988422393799 2023-01-22 09:25:54.690119: step: 62/463, loss: 0.4947792589664459 2023-01-22 09:25:55.241596: step: 64/463, loss: 2.2839412689208984 2023-01-22 09:25:55.835808: step: 66/463, loss: 0.31450018286705017 2023-01-22 09:25:56.462197: step: 68/463, loss: 2.052475929260254 2023-01-22 09:25:57.033395: step: 70/463, loss: 0.9109322428703308 2023-01-22 09:25:57.652471: step: 72/463, loss: 3.709829092025757 2023-01-22 09:25:58.230295: step: 74/463, loss: 5.1272501945495605 2023-01-22 09:25:58.826161: step: 76/463, loss: 0.5184556841850281 2023-01-22 09:25:59.425455: step: 78/463, loss: 0.8050916790962219 2023-01-22 09:26:00.037322: step: 80/463, loss: 0.8729863166809082 2023-01-22 09:26:00.648840: step: 82/463, loss: 1.5307533740997314 2023-01-22 09:26:01.217406: step: 84/463, loss: 1.6778154373168945 2023-01-22 09:26:01.843320: step: 86/463, loss: 0.26905369758605957 2023-01-22 09:26:02.440647: step: 88/463, loss: 1.1601436138153076 2023-01-22 09:26:03.099581: step: 90/463, loss: 1.824967861175537 2023-01-22 09:26:03.755346: step: 92/463, loss: 1.315454363822937 2023-01-22 09:26:04.456473: step: 94/463, loss: 0.28194063901901245 2023-01-22 09:26:05.057801: step: 96/463, loss: 3.870901346206665 2023-01-22 09:26:05.659385: step: 98/463, loss: 0.9389933347702026 2023-01-22 09:26:06.333436: step: 100/463, loss: 5.372572898864746 2023-01-22 09:26:07.022540: step: 102/463, loss: 0.6139791011810303 2023-01-22 09:26:07.625643: step: 104/463, loss: 1.6130611896514893 2023-01-22 09:26:08.219160: step: 106/463, loss: 0.558000922203064 2023-01-22 09:26:08.829118: step: 108/463, loss: 0.8126239776611328 2023-01-22 09:26:09.467188: step: 110/463, loss: 1.028892159461975 2023-01-22 09:26:10.087644: step: 112/463, loss: 1.3918007612228394 2023-01-22 09:26:10.685388: step: 114/463, loss: 0.6031695604324341 2023-01-22 09:26:11.242352: step: 116/463, loss: 1.7772356271743774 2023-01-22 09:26:11.849322: step: 118/463, loss: 0.6793345212936401 2023-01-22 09:26:12.451235: step: 120/463, loss: 0.5006611943244934 2023-01-22 09:26:13.058329: step: 122/463, loss: 0.4140205979347229 2023-01-22 09:26:13.673181: step: 124/463, loss: 1.1413962841033936 2023-01-22 09:26:14.265362: step: 126/463, loss: 0.2686176896095276 2023-01-22 09:26:14.882750: step: 128/463, loss: 0.8819482326507568 2023-01-22 09:26:15.526318: step: 130/463, loss: 1.0352551937103271 2023-01-22 09:26:16.133815: step: 132/463, loss: 2.252594232559204 2023-01-22 09:26:16.741725: step: 134/463, loss: 0.8180625438690186 2023-01-22 09:26:17.326815: step: 136/463, loss: 0.4154853820800781 2023-01-22 09:26:18.025137: step: 138/463, loss: 4.321404457092285 2023-01-22 09:26:18.677165: step: 140/463, loss: 0.5059050917625427 2023-01-22 09:26:19.290343: step: 142/463, loss: 5.630484580993652 2023-01-22 09:26:19.910390: step: 144/463, loss: 0.8276830315589905 2023-01-22 09:26:20.486886: step: 146/463, loss: 0.6797397136688232 2023-01-22 09:26:21.088743: step: 148/463, loss: 0.3368200361728668 2023-01-22 09:26:21.640126: step: 150/463, loss: 1.7757160663604736 2023-01-22 09:26:22.257954: step: 152/463, loss: 0.5957731604576111 2023-01-22 09:26:22.962170: step: 154/463, loss: 0.9258847236633301 2023-01-22 09:26:23.557380: step: 156/463, loss: 0.26371699571609497 2023-01-22 09:26:24.122097: step: 158/463, loss: 1.1446921825408936 2023-01-22 09:26:24.740138: step: 160/463, loss: 0.6360496878623962 2023-01-22 09:26:25.388199: step: 162/463, loss: 1.1370587348937988 2023-01-22 09:26:25.932029: step: 164/463, loss: 0.8999028205871582 2023-01-22 09:26:26.481937: step: 166/463, loss: 2.5184946060180664 2023-01-22 09:26:27.122889: step: 168/463, loss: 0.1822376251220703 2023-01-22 09:26:27.725645: step: 170/463, loss: 1.5731807947158813 2023-01-22 09:26:28.320285: step: 172/463, loss: 0.47844138741493225 2023-01-22 09:26:28.920141: step: 174/463, loss: 0.6122033596038818 2023-01-22 09:26:29.497908: step: 176/463, loss: 1.179314136505127 2023-01-22 09:26:30.090799: step: 178/463, loss: 1.4083936214447021 2023-01-22 09:26:30.703038: step: 180/463, loss: 1.178756833076477 2023-01-22 09:26:31.278314: step: 182/463, loss: 1.239646077156067 2023-01-22 09:26:31.875764: step: 184/463, loss: 0.27436578273773193 2023-01-22 09:26:32.537516: step: 186/463, loss: 0.5282583832740784 2023-01-22 09:26:33.096459: step: 188/463, loss: 1.8934845924377441 2023-01-22 09:26:33.752961: step: 190/463, loss: 0.856751561164856 2023-01-22 09:26:34.403455: step: 192/463, loss: 0.8625920414924622 2023-01-22 09:26:34.958560: step: 194/463, loss: 0.35751649737358093 2023-01-22 09:26:35.582706: step: 196/463, loss: 0.6424140334129333 2023-01-22 09:26:36.215243: step: 198/463, loss: 0.2851732671260834 2023-01-22 09:26:36.878030: step: 200/463, loss: 1.6641547679901123 2023-01-22 09:26:37.533322: step: 202/463, loss: 1.0294564962387085 2023-01-22 09:26:38.160896: step: 204/463, loss: 0.5370976328849792 2023-01-22 09:26:38.844289: step: 206/463, loss: 0.8250738382339478 2023-01-22 09:26:39.590440: step: 208/463, loss: 0.9348017573356628 2023-01-22 09:26:40.229152: step: 210/463, loss: 0.42686107754707336 2023-01-22 09:26:40.967943: step: 212/463, loss: 0.43081042170524597 2023-01-22 09:26:41.610929: step: 214/463, loss: 1.2076212167739868 2023-01-22 09:26:42.185905: step: 216/463, loss: 0.5701276063919067 2023-01-22 09:26:42.785974: step: 218/463, loss: 0.30391037464141846 2023-01-22 09:26:43.438976: step: 220/463, loss: 0.748412013053894 2023-01-22 09:26:43.999901: step: 222/463, loss: 0.6345020532608032 2023-01-22 09:26:44.611207: step: 224/463, loss: 1.4756361246109009 2023-01-22 09:26:45.202957: step: 226/463, loss: 1.0517140626907349 2023-01-22 09:26:45.793543: step: 228/463, loss: 0.2785537540912628 2023-01-22 09:26:46.443700: step: 230/463, loss: 1.8610318899154663 2023-01-22 09:26:47.105873: step: 232/463, loss: 2.6059792041778564 2023-01-22 09:26:47.756227: step: 234/463, loss: 1.5763198137283325 2023-01-22 09:26:48.379596: step: 236/463, loss: 0.27505967020988464 2023-01-22 09:26:48.950626: step: 238/463, loss: 6.978393077850342 2023-01-22 09:26:49.587660: step: 240/463, loss: 0.27931398153305054 2023-01-22 09:26:50.182827: step: 242/463, loss: 3.0710248947143555 2023-01-22 09:26:50.781725: step: 244/463, loss: 3.735290050506592 2023-01-22 09:26:51.435171: step: 246/463, loss: 0.9977658987045288 2023-01-22 09:26:52.016409: step: 248/463, loss: 7.581729888916016 2023-01-22 09:26:52.619575: step: 250/463, loss: 1.1610678434371948 2023-01-22 09:26:53.222447: step: 252/463, loss: 0.419791042804718 2023-01-22 09:26:53.841504: step: 254/463, loss: 0.5280798673629761 2023-01-22 09:26:54.415318: step: 256/463, loss: 1.2745078802108765 2023-01-22 09:26:55.020806: step: 258/463, loss: 0.6631940603256226 2023-01-22 09:26:55.593781: step: 260/463, loss: 0.5666185617446899 2023-01-22 09:26:56.168258: step: 262/463, loss: 0.5521462559700012 2023-01-22 09:26:56.817296: step: 264/463, loss: 0.7782831192016602 2023-01-22 09:26:57.379497: step: 266/463, loss: 2.3119421005249023 2023-01-22 09:26:57.981167: step: 268/463, loss: 0.17674469947814941 2023-01-22 09:26:58.653183: step: 270/463, loss: 0.24772882461547852 2023-01-22 09:26:59.233606: step: 272/463, loss: 0.35136878490448 2023-01-22 09:26:59.872143: step: 274/463, loss: 1.448638916015625 2023-01-22 09:27:00.533049: step: 276/463, loss: 0.6813992261886597 2023-01-22 09:27:01.087707: step: 278/463, loss: 0.5054275393486023 2023-01-22 09:27:01.802101: step: 280/463, loss: 0.6831744909286499 2023-01-22 09:27:02.359761: step: 282/463, loss: 2.321988344192505 2023-01-22 09:27:02.984599: step: 284/463, loss: 0.4068462550640106 2023-01-22 09:27:03.610517: step: 286/463, loss: 1.177586317062378 2023-01-22 09:27:04.229258: step: 288/463, loss: 0.40971463918685913 2023-01-22 09:27:04.838799: step: 290/463, loss: 0.6653015613555908 2023-01-22 09:27:05.410249: step: 292/463, loss: 2.758209228515625 2023-01-22 09:27:06.060278: step: 294/463, loss: 1.2204540967941284 2023-01-22 09:27:06.677048: step: 296/463, loss: 0.8606878519058228 2023-01-22 09:27:07.313303: step: 298/463, loss: 0.4252905249595642 2023-01-22 09:27:08.024792: step: 300/463, loss: 0.7016019821166992 2023-01-22 09:27:08.660348: step: 302/463, loss: 1.239556074142456 2023-01-22 09:27:09.279116: step: 304/463, loss: 0.8451669216156006 2023-01-22 09:27:09.890943: step: 306/463, loss: 0.2898506224155426 2023-01-22 09:27:10.511988: step: 308/463, loss: 1.11007821559906 2023-01-22 09:27:11.286243: step: 310/463, loss: 1.357858419418335 2023-01-22 09:27:11.926888: step: 312/463, loss: 0.8738505840301514 2023-01-22 09:27:12.508018: step: 314/463, loss: 0.8062949776649475 2023-01-22 09:27:13.137678: step: 316/463, loss: 0.28662511706352234 2023-01-22 09:27:13.774647: step: 318/463, loss: 1.0566612482070923 2023-01-22 09:27:14.424145: step: 320/463, loss: 4.029036045074463 2023-01-22 09:27:15.104398: step: 322/463, loss: 0.33994489908218384 2023-01-22 09:27:15.722567: step: 324/463, loss: 0.6363861560821533 2023-01-22 09:27:16.305062: step: 326/463, loss: 0.17141923308372498 2023-01-22 09:27:16.933479: step: 328/463, loss: 1.4284721612930298 2023-01-22 09:27:17.529310: step: 330/463, loss: 0.9983595609664917 2023-01-22 09:27:18.174833: step: 332/463, loss: 0.5424853563308716 2023-01-22 09:27:18.772947: step: 334/463, loss: 0.4410066306591034 2023-01-22 09:27:19.398936: step: 336/463, loss: 0.5908340811729431 2023-01-22 09:27:20.132058: step: 338/463, loss: 1.1698366403579712 2023-01-22 09:27:20.702845: step: 340/463, loss: 1.2951130867004395 2023-01-22 09:27:21.304656: step: 342/463, loss: 5.039706230163574 2023-01-22 09:27:21.889386: step: 344/463, loss: 0.8472703099250793 2023-01-22 09:27:22.450494: step: 346/463, loss: 0.4648662507534027 2023-01-22 09:27:23.081861: step: 348/463, loss: 1.3336620330810547 2023-01-22 09:27:23.699332: step: 350/463, loss: 0.7034398913383484 2023-01-22 09:27:24.363483: step: 352/463, loss: 2.906385660171509 2023-01-22 09:27:24.944481: step: 354/463, loss: 0.6561774015426636 2023-01-22 09:27:25.503331: step: 356/463, loss: 1.2728078365325928 2023-01-22 09:27:26.044945: step: 358/463, loss: 0.570324718952179 2023-01-22 09:27:26.668924: step: 360/463, loss: 2.1639058589935303 2023-01-22 09:27:27.306871: step: 362/463, loss: 0.8726208806037903 2023-01-22 09:27:27.952925: step: 364/463, loss: 0.8003362417221069 2023-01-22 09:27:28.586475: step: 366/463, loss: 0.3665609359741211 2023-01-22 09:27:29.217738: step: 368/463, loss: 2.069092035293579 2023-01-22 09:27:29.843086: step: 370/463, loss: 0.6988101005554199 2023-01-22 09:27:30.526404: step: 372/463, loss: 0.3569216728210449 2023-01-22 09:27:31.184270: step: 374/463, loss: 0.4643917977809906 2023-01-22 09:27:31.823372: step: 376/463, loss: 0.5362979769706726 2023-01-22 09:27:32.395249: step: 378/463, loss: 0.45398181676864624 2023-01-22 09:27:33.080994: step: 380/463, loss: 1.6672056913375854 2023-01-22 09:27:33.667422: step: 382/463, loss: 12.548444747924805 2023-01-22 09:27:34.235370: step: 384/463, loss: 0.3443489670753479 2023-01-22 09:27:34.817389: step: 386/463, loss: 0.6675260663032532 2023-01-22 09:27:35.384172: step: 388/463, loss: 0.5176720023155212 2023-01-22 09:27:35.994586: step: 390/463, loss: 2.1954400539398193 2023-01-22 09:27:36.550703: step: 392/463, loss: 2.113750457763672 2023-01-22 09:27:37.164134: step: 394/463, loss: 0.849291205406189 2023-01-22 09:27:37.816521: step: 396/463, loss: 0.6472211480140686 2023-01-22 09:27:38.477545: step: 398/463, loss: 0.7684171795845032 2023-01-22 09:27:39.100486: step: 400/463, loss: 0.21302194893360138 2023-01-22 09:27:39.729646: step: 402/463, loss: 0.29459697008132935 2023-01-22 09:27:40.352323: step: 404/463, loss: 0.871033251285553 2023-01-22 09:27:40.974295: step: 406/463, loss: 3.115330934524536 2023-01-22 09:27:41.621463: step: 408/463, loss: 10.548649787902832 2023-01-22 09:27:42.191994: step: 410/463, loss: 2.4393489360809326 2023-01-22 09:27:42.812543: step: 412/463, loss: 0.8470199108123779 2023-01-22 09:27:43.490045: step: 414/463, loss: 0.917614758014679 2023-01-22 09:27:44.069778: step: 416/463, loss: 1.7795963287353516 2023-01-22 09:27:44.683717: step: 418/463, loss: 0.9657366275787354 2023-01-22 09:27:45.309174: step: 420/463, loss: 0.5376374125480652 2023-01-22 09:27:45.909857: step: 422/463, loss: 1.7917696237564087 2023-01-22 09:27:46.600411: step: 424/463, loss: 0.6141297817230225 2023-01-22 09:27:47.228979: step: 426/463, loss: 1.829071044921875 2023-01-22 09:27:47.807557: step: 428/463, loss: 1.31123685836792 2023-01-22 09:27:48.392855: step: 430/463, loss: 1.9714741706848145 2023-01-22 09:27:49.012805: step: 432/463, loss: 1.7060784101486206 2023-01-22 09:27:49.694556: step: 434/463, loss: 1.519418478012085 2023-01-22 09:27:50.264104: step: 436/463, loss: 1.744737982749939 2023-01-22 09:27:50.837892: step: 438/463, loss: 0.24459630250930786 2023-01-22 09:27:51.398150: step: 440/463, loss: 0.5234218239784241 2023-01-22 09:27:52.034356: step: 442/463, loss: 0.6314299702644348 2023-01-22 09:27:52.630965: step: 444/463, loss: 0.5994163155555725 2023-01-22 09:27:53.271471: step: 446/463, loss: 0.8587840795516968 2023-01-22 09:27:53.867421: step: 448/463, loss: 1.207343339920044 2023-01-22 09:27:54.490315: step: 450/463, loss: 0.9677929878234863 2023-01-22 09:27:55.056936: step: 452/463, loss: 0.2548527717590332 2023-01-22 09:27:55.673094: step: 454/463, loss: 0.7236099243164062 2023-01-22 09:27:56.197228: step: 456/463, loss: 0.15329702198505402 2023-01-22 09:27:56.830296: step: 458/463, loss: 0.9661197662353516 2023-01-22 09:27:57.444800: step: 460/463, loss: 0.6926252841949463 2023-01-22 09:27:58.094963: step: 462/463, loss: 1.2916291952133179 2023-01-22 09:27:58.701069: step: 464/463, loss: 1.450941562652588 2023-01-22 09:27:59.320534: step: 466/463, loss: 0.4431131184101105 2023-01-22 09:27:59.957896: step: 468/463, loss: 1.0406421422958374 2023-01-22 09:28:00.573441: step: 470/463, loss: 0.3492239713668823 2023-01-22 09:28:01.177351: step: 472/463, loss: 0.7932349443435669 2023-01-22 09:28:01.776920: step: 474/463, loss: 0.2141757756471634 2023-01-22 09:28:02.360872: step: 476/463, loss: 1.2393091917037964 2023-01-22 09:28:02.971561: step: 478/463, loss: 0.7231630086898804 2023-01-22 09:28:03.517130: step: 480/463, loss: 1.9460698366165161 2023-01-22 09:28:04.104557: step: 482/463, loss: 0.7675976753234863 2023-01-22 09:28:04.662186: step: 484/463, loss: 0.8713288307189941 2023-01-22 09:28:05.227957: step: 486/463, loss: 4.226635932922363 2023-01-22 09:28:05.791033: step: 488/463, loss: 0.9001918435096741 2023-01-22 09:28:06.363884: step: 490/463, loss: 1.3170396089553833 2023-01-22 09:28:06.969645: step: 492/463, loss: 0.19038604199886322 2023-01-22 09:28:07.543495: step: 494/463, loss: 0.71153724193573 2023-01-22 09:28:08.184532: step: 496/463, loss: 1.6241918802261353 2023-01-22 09:28:08.763087: step: 498/463, loss: 2.139375925064087 2023-01-22 09:28:09.326790: step: 500/463, loss: 0.3230654299259186 2023-01-22 09:28:09.956945: step: 502/463, loss: 1.6597295999526978 2023-01-22 09:28:10.533261: step: 504/463, loss: 1.501914143562317 2023-01-22 09:28:11.176152: step: 506/463, loss: 0.7005184292793274 2023-01-22 09:28:11.845563: step: 508/463, loss: 1.976676344871521 2023-01-22 09:28:12.486916: step: 510/463, loss: 2.682526111602783 2023-01-22 09:28:13.104388: step: 512/463, loss: 0.5501776933670044 2023-01-22 09:28:13.671760: step: 514/463, loss: 0.16431453824043274 2023-01-22 09:28:14.323075: step: 516/463, loss: 0.6123006939888 2023-01-22 09:28:14.889427: step: 518/463, loss: 1.1639758348464966 2023-01-22 09:28:15.557719: step: 520/463, loss: 1.2645277976989746 2023-01-22 09:28:16.175236: step: 522/463, loss: 0.31428611278533936 2023-01-22 09:28:16.790504: step: 524/463, loss: 1.8434460163116455 2023-01-22 09:28:17.381903: step: 526/463, loss: 0.513737678527832 2023-01-22 09:28:18.029108: step: 528/463, loss: 1.9712008237838745 2023-01-22 09:28:18.647026: step: 530/463, loss: 1.06448495388031 2023-01-22 09:28:19.230282: step: 532/463, loss: 2.177704095840454 2023-01-22 09:28:19.849703: step: 534/463, loss: 0.7558973431587219 2023-01-22 09:28:20.461111: step: 536/463, loss: 1.3033359050750732 2023-01-22 09:28:21.047413: step: 538/463, loss: 0.44104665517807007 2023-01-22 09:28:21.679083: step: 540/463, loss: 1.5810799598693848 2023-01-22 09:28:22.263990: step: 542/463, loss: 1.0467878580093384 2023-01-22 09:28:22.825539: step: 544/463, loss: 2.053138256072998 2023-01-22 09:28:23.378847: step: 546/463, loss: 0.5331852436065674 2023-01-22 09:28:24.009878: step: 548/463, loss: 0.8410186767578125 2023-01-22 09:28:24.609709: step: 550/463, loss: 1.1138092279434204 2023-01-22 09:28:25.254850: step: 552/463, loss: 1.8896617889404297 2023-01-22 09:28:25.870936: step: 554/463, loss: 0.5497190952301025 2023-01-22 09:28:26.407114: step: 556/463, loss: 0.8403732180595398 2023-01-22 09:28:26.992989: step: 558/463, loss: 0.7325672507286072 2023-01-22 09:28:27.599103: step: 560/463, loss: 0.8002176880836487 2023-01-22 09:28:28.206970: step: 562/463, loss: 0.8524148464202881 2023-01-22 09:28:28.837963: step: 564/463, loss: 0.6542765498161316 2023-01-22 09:28:29.402749: step: 566/463, loss: 0.15191848576068878 2023-01-22 09:28:29.983885: step: 568/463, loss: 1.5579509735107422 2023-01-22 09:28:30.602661: step: 570/463, loss: 0.545935332775116 2023-01-22 09:28:31.213986: step: 572/463, loss: 0.6627711057662964 2023-01-22 09:28:31.835236: step: 574/463, loss: 0.7539028525352478 2023-01-22 09:28:32.441331: step: 576/463, loss: 0.9779089689254761 2023-01-22 09:28:32.983468: step: 578/463, loss: 0.3440927565097809 2023-01-22 09:28:33.566025: step: 580/463, loss: 0.7873618006706238 2023-01-22 09:28:34.166988: step: 582/463, loss: 0.5235024690628052 2023-01-22 09:28:34.764804: step: 584/463, loss: 0.4406951665878296 2023-01-22 09:28:35.358866: step: 586/463, loss: 1.1145025491714478 2023-01-22 09:28:35.994613: step: 588/463, loss: 0.8192187547683716 2023-01-22 09:28:36.645613: step: 590/463, loss: 0.5468909740447998 2023-01-22 09:28:37.202738: step: 592/463, loss: 0.8540785312652588 2023-01-22 09:28:37.751539: step: 594/463, loss: 0.48876598477363586 2023-01-22 09:28:38.355693: step: 596/463, loss: 7.115334987640381 2023-01-22 09:28:39.018488: step: 598/463, loss: 1.5285708904266357 2023-01-22 09:28:39.632631: step: 600/463, loss: 0.5567865371704102 2023-01-22 09:28:40.283654: step: 602/463, loss: 0.6331756114959717 2023-01-22 09:28:40.913673: step: 604/463, loss: 0.25854185223579407 2023-01-22 09:28:41.495409: step: 606/463, loss: 0.7981890439987183 2023-01-22 09:28:42.111948: step: 608/463, loss: 1.4998795986175537 2023-01-22 09:28:42.759695: step: 610/463, loss: 0.7032451033592224 2023-01-22 09:28:43.361073: step: 612/463, loss: 1.8100223541259766 2023-01-22 09:28:43.999975: step: 614/463, loss: 0.6211801767349243 2023-01-22 09:28:44.599698: step: 616/463, loss: 0.9448941349983215 2023-01-22 09:28:45.190368: step: 618/463, loss: 1.4542386531829834 2023-01-22 09:28:45.842206: step: 620/463, loss: 1.0262504816055298 2023-01-22 09:28:46.413423: step: 622/463, loss: 0.3168994188308716 2023-01-22 09:28:47.036916: step: 624/463, loss: 0.4160541892051697 2023-01-22 09:28:47.601047: step: 626/463, loss: 0.16113285720348358 2023-01-22 09:28:48.224788: step: 628/463, loss: 0.5850247144699097 2023-01-22 09:28:48.827657: step: 630/463, loss: 1.9325248003005981 2023-01-22 09:28:49.410926: step: 632/463, loss: 0.4104180634021759 2023-01-22 09:28:50.028290: step: 634/463, loss: 3.36464262008667 2023-01-22 09:28:50.597998: step: 636/463, loss: 2.89518404006958 2023-01-22 09:28:51.222903: step: 638/463, loss: 1.1435872316360474 2023-01-22 09:28:51.867282: step: 640/463, loss: 0.5668440461158752 2023-01-22 09:28:52.502908: step: 642/463, loss: 2.265939474105835 2023-01-22 09:28:53.139841: step: 644/463, loss: 0.8448171615600586 2023-01-22 09:28:53.740990: step: 646/463, loss: 0.904466986656189 2023-01-22 09:28:54.436181: step: 648/463, loss: 0.6500080823898315 2023-01-22 09:28:55.088580: step: 650/463, loss: 1.0509213209152222 2023-01-22 09:28:55.690699: step: 652/463, loss: 2.3355085849761963 2023-01-22 09:28:56.339751: step: 654/463, loss: 2.0012452602386475 2023-01-22 09:28:56.997368: step: 656/463, loss: 0.36765116453170776 2023-01-22 09:28:57.604709: step: 658/463, loss: 0.7038108110427856 2023-01-22 09:28:58.230988: step: 660/463, loss: 4.1042160987854 2023-01-22 09:28:58.781428: step: 662/463, loss: 0.7068663835525513 2023-01-22 09:28:59.372481: step: 664/463, loss: 0.8961589336395264 2023-01-22 09:28:59.950759: step: 666/463, loss: 0.7031440734863281 2023-01-22 09:29:00.535124: step: 668/463, loss: 0.6003390550613403 2023-01-22 09:29:01.171519: step: 670/463, loss: 2.03519868850708 2023-01-22 09:29:01.772681: step: 672/463, loss: 0.9938336610794067 2023-01-22 09:29:02.363123: step: 674/463, loss: 0.3683180809020996 2023-01-22 09:29:02.938841: step: 676/463, loss: 0.5071661472320557 2023-01-22 09:29:03.636942: step: 678/463, loss: 1.2671972513198853 2023-01-22 09:29:04.295134: step: 680/463, loss: 7.186624526977539 2023-01-22 09:29:04.912422: step: 682/463, loss: 0.9342090487480164 2023-01-22 09:29:05.525531: step: 684/463, loss: 0.7077946662902832 2023-01-22 09:29:06.063380: step: 686/463, loss: 0.5273284912109375 2023-01-22 09:29:06.633732: step: 688/463, loss: 0.4433707594871521 2023-01-22 09:29:07.238140: step: 690/463, loss: 2.368081569671631 2023-01-22 09:29:07.897486: step: 692/463, loss: 1.9197748899459839 2023-01-22 09:29:08.488241: step: 694/463, loss: 9.760427474975586 2023-01-22 09:29:09.141255: step: 696/463, loss: 0.564527690410614 2023-01-22 09:29:09.695013: step: 698/463, loss: 0.6014097929000854 2023-01-22 09:29:10.280765: step: 700/463, loss: 0.82591313123703 2023-01-22 09:29:11.044841: step: 702/463, loss: 1.2693265676498413 2023-01-22 09:29:11.670367: step: 704/463, loss: 0.3230922520160675 2023-01-22 09:29:12.250827: step: 706/463, loss: 1.8927083015441895 2023-01-22 09:29:12.988818: step: 708/463, loss: 0.9642900228500366 2023-01-22 09:29:13.629050: step: 710/463, loss: 0.3218848705291748 2023-01-22 09:29:14.213365: step: 712/463, loss: 0.43335258960723877 2023-01-22 09:29:14.830582: step: 714/463, loss: 1.2807881832122803 2023-01-22 09:29:15.442565: step: 716/463, loss: 0.17460721731185913 2023-01-22 09:29:16.113119: step: 718/463, loss: 1.3837840557098389 2023-01-22 09:29:16.740885: step: 720/463, loss: 0.69569993019104 2023-01-22 09:29:17.270754: step: 722/463, loss: 0.9557390213012695 2023-01-22 09:29:17.869035: step: 724/463, loss: 1.474457859992981 2023-01-22 09:29:18.464470: step: 726/463, loss: 6.387290000915527 2023-01-22 09:29:19.170489: step: 728/463, loss: 0.5838481783866882 2023-01-22 09:29:19.787024: step: 730/463, loss: 1.8129897117614746 2023-01-22 09:29:20.338698: step: 732/463, loss: 1.4038245677947998 2023-01-22 09:29:20.974696: step: 734/463, loss: 2.108640432357788 2023-01-22 09:29:21.579619: step: 736/463, loss: 0.3806455433368683 2023-01-22 09:29:22.159817: step: 738/463, loss: 0.6286329030990601 2023-01-22 09:29:22.811358: step: 740/463, loss: 1.0798470973968506 2023-01-22 09:29:23.406956: step: 742/463, loss: 1.1314997673034668 2023-01-22 09:29:24.011949: step: 744/463, loss: 0.7847237586975098 2023-01-22 09:29:24.668043: step: 746/463, loss: 1.1423081159591675 2023-01-22 09:29:25.269000: step: 748/463, loss: 1.2383679151535034 2023-01-22 09:29:25.856702: step: 750/463, loss: 3.0755460262298584 2023-01-22 09:29:26.418726: step: 752/463, loss: 0.8417287468910217 2023-01-22 09:29:27.086173: step: 754/463, loss: 1.211743712425232 2023-01-22 09:29:27.676904: step: 756/463, loss: 0.2925609052181244 2023-01-22 09:29:28.265731: step: 758/463, loss: 0.8931692838668823 2023-01-22 09:29:28.858085: step: 760/463, loss: 1.3160549402236938 2023-01-22 09:29:29.507353: step: 762/463, loss: 5.234009742736816 2023-01-22 09:29:30.073567: step: 764/463, loss: 0.8863605260848999 2023-01-22 09:29:30.726883: step: 766/463, loss: 0.5623803734779358 2023-01-22 09:29:31.299590: step: 768/463, loss: 0.270097017288208 2023-01-22 09:29:31.919698: step: 770/463, loss: 1.0369184017181396 2023-01-22 09:29:32.517680: step: 772/463, loss: 0.25732794404029846 2023-01-22 09:29:33.074384: step: 774/463, loss: 0.9469619989395142 2023-01-22 09:29:33.649969: step: 776/463, loss: 1.1866776943206787 2023-01-22 09:29:34.229116: step: 778/463, loss: 1.0973572731018066 2023-01-22 09:29:34.891217: step: 780/463, loss: 0.8199627995491028 2023-01-22 09:29:35.487540: step: 782/463, loss: 2.367387533187866 2023-01-22 09:29:36.148168: step: 784/463, loss: 0.8228763341903687 2023-01-22 09:29:36.751033: step: 786/463, loss: 1.0637669563293457 2023-01-22 09:29:37.372059: step: 788/463, loss: 0.5030679106712341 2023-01-22 09:29:38.037556: step: 790/463, loss: 1.4799129962921143 2023-01-22 09:29:38.656993: step: 792/463, loss: 0.46158695220947266 2023-01-22 09:29:39.271323: step: 794/463, loss: 1.2611229419708252 2023-01-22 09:29:39.900758: step: 796/463, loss: 0.6380144357681274 2023-01-22 09:29:40.515416: step: 798/463, loss: 1.0006062984466553 2023-01-22 09:29:41.164569: step: 800/463, loss: 0.5657363533973694 2023-01-22 09:29:41.728136: step: 802/463, loss: 0.577856719493866 2023-01-22 09:29:42.335685: step: 804/463, loss: 0.7096571326255798 2023-01-22 09:29:42.944247: step: 806/463, loss: 0.44972920417785645 2023-01-22 09:29:43.534702: step: 808/463, loss: 1.2680013179779053 2023-01-22 09:29:44.116908: step: 810/463, loss: 0.42306584119796753 2023-01-22 09:29:44.703370: step: 812/463, loss: 1.772107481956482 2023-01-22 09:29:45.302812: step: 814/463, loss: 0.429757297039032 2023-01-22 09:29:45.979662: step: 816/463, loss: 0.3739444315433502 2023-01-22 09:29:46.575302: step: 818/463, loss: 2.2081494331359863 2023-01-22 09:29:47.195246: step: 820/463, loss: 1.4641931056976318 2023-01-22 09:29:47.788549: step: 822/463, loss: 5.53977108001709 2023-01-22 09:29:48.365506: step: 824/463, loss: 0.5255829095840454 2023-01-22 09:29:49.035572: step: 826/463, loss: 0.7704782485961914 2023-01-22 09:29:49.643741: step: 828/463, loss: 0.2725366950035095 2023-01-22 09:29:50.233415: step: 830/463, loss: 1.444212555885315 2023-01-22 09:29:50.859887: step: 832/463, loss: 0.8818296194076538 2023-01-22 09:29:51.565020: step: 834/463, loss: 0.9003888368606567 2023-01-22 09:29:52.137481: step: 836/463, loss: 0.7898910045623779 2023-01-22 09:29:52.715836: step: 838/463, loss: 0.697838544845581 2023-01-22 09:29:53.351866: step: 840/463, loss: 0.5520128011703491 2023-01-22 09:29:54.035462: step: 842/463, loss: 0.26900333166122437 2023-01-22 09:29:54.612971: step: 844/463, loss: 0.30813395977020264 2023-01-22 09:29:55.169078: step: 846/463, loss: 0.35832253098487854 2023-01-22 09:29:55.750067: step: 848/463, loss: 0.689294159412384 2023-01-22 09:29:56.376789: step: 850/463, loss: 0.30226805806159973 2023-01-22 09:29:57.015509: step: 852/463, loss: 2.3166003227233887 2023-01-22 09:29:57.638083: step: 854/463, loss: 0.8949844837188721 2023-01-22 09:29:58.232383: step: 856/463, loss: 0.3378556966781616 2023-01-22 09:29:58.869802: step: 858/463, loss: 0.9642161726951599 2023-01-22 09:29:59.490822: step: 860/463, loss: 0.2953707277774811 2023-01-22 09:30:00.087416: step: 862/463, loss: 1.545053243637085 2023-01-22 09:30:00.673512: step: 864/463, loss: 0.3494056165218353 2023-01-22 09:30:01.285469: step: 866/463, loss: 0.14667749404907227 2023-01-22 09:30:01.898016: step: 868/463, loss: 0.5536177754402161 2023-01-22 09:30:02.503597: step: 870/463, loss: 0.861301600933075 2023-01-22 09:30:03.259824: step: 872/463, loss: 0.8773106932640076 2023-01-22 09:30:03.820080: step: 874/463, loss: 1.549638032913208 2023-01-22 09:30:04.396539: step: 876/463, loss: 0.4861299991607666 2023-01-22 09:30:04.984257: step: 878/463, loss: 0.571794331073761 2023-01-22 09:30:05.559801: step: 880/463, loss: 0.2687010169029236 2023-01-22 09:30:06.181330: step: 882/463, loss: 0.27729666233062744 2023-01-22 09:30:06.770968: step: 884/463, loss: 0.5071350932121277 2023-01-22 09:30:07.358643: step: 886/463, loss: 0.9567668437957764 2023-01-22 09:30:07.929162: step: 888/463, loss: 0.5104363560676575 2023-01-22 09:30:08.547978: step: 890/463, loss: 0.3868088722229004 2023-01-22 09:30:09.287152: step: 892/463, loss: 0.889945924282074 2023-01-22 09:30:09.775682: step: 894/463, loss: 0.7016943693161011 2023-01-22 09:30:10.380670: step: 896/463, loss: 0.6431083083152771 2023-01-22 09:30:10.994202: step: 898/463, loss: 1.282279372215271 2023-01-22 09:30:11.678992: step: 900/463, loss: 1.4700337648391724 2023-01-22 09:30:12.270425: step: 902/463, loss: 0.7349509000778198 2023-01-22 09:30:12.899298: step: 904/463, loss: 1.867394208908081 2023-01-22 09:30:13.553840: step: 906/463, loss: 1.5084949731826782 2023-01-22 09:30:14.199038: step: 908/463, loss: 0.7614808082580566 2023-01-22 09:30:14.821198: step: 910/463, loss: 0.258074015378952 2023-01-22 09:30:15.429121: step: 912/463, loss: 1.1549773216247559 2023-01-22 09:30:16.077614: step: 914/463, loss: 1.5394691228866577 2023-01-22 09:30:16.628460: step: 916/463, loss: 0.7869983315467834 2023-01-22 09:30:17.255792: step: 918/463, loss: 4.469120979309082 2023-01-22 09:30:17.882379: step: 920/463, loss: 0.9039833545684814 2023-01-22 09:30:18.511022: step: 922/463, loss: 1.4155858755111694 2023-01-22 09:30:19.201854: step: 924/463, loss: 1.495905876159668 2023-01-22 09:30:19.858687: step: 926/463, loss: 0.18362940847873688 ================================================== Loss: 1.260 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31490492170022366, 'r': 0.26659564393939394, 'f1': 0.2887435897435897}, 'combined': 0.2127584345479082, 'epoch': 3} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32583254485752133, 'r': 0.33121326578177396, 'f1': 0.32850087325034905}, 'combined': 0.2546274711318495, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26854368932038836, 'r': 0.2619318181818182, 'f1': 0.265196548418025}, 'combined': 0.1954079830448605, 'epoch': 3} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3110620060294373, 'r': 0.33788753682463646, 'f1': 0.32392032993742637}, 'combined': 0.2510770021524549, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2753738095238095, 'r': 0.260770652958153, 'f1': 0.26787335556790814}, 'combined': 0.19738036726056388, 'epoch': 3} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.31125412588579554, 'r': 0.3238185126188001, 'f1': 0.31741203125403966}, 'combined': 0.246032292168203, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2692307692307692, 'r': 0.3, 'f1': 0.28378378378378377}, 'combined': 0.18918918918918917, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26136363636363635, 'r': 0.25, 'f1': 0.25555555555555554}, 'combined': 0.12777777777777777, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.06896551724137931, 'f1': 0.1081081081081081}, 'combined': 0.07207207207207206, 'epoch': 3} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31490492170022366, 'r': 0.26659564393939394, 'f1': 0.2887435897435897}, 'combined': 0.2127584345479082, 'epoch': 3} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32583254485752133, 'r': 0.33121326578177396, 'f1': 0.32850087325034905}, 'combined': 0.2546274711318495, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2692307692307692, 'r': 0.3, 'f1': 0.28378378378378377}, 'combined': 0.18918918918918917, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26854368932038836, 'r': 0.2619318181818182, 'f1': 0.265196548418025}, 'combined': 0.1954079830448605, 'epoch': 3} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3110620060294373, 'r': 0.33788753682463646, 'f1': 0.32392032993742637}, 'combined': 0.2510770021524549, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26136363636363635, 'r': 0.25, 'f1': 0.25555555555555554}, 'combined': 0.12777777777777777, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25784122500224194, 'r': 0.259306231962482, 'f1': 0.25857165340168176}, 'combined': 0.19052648145387074, 'epoch': 2} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33718038614347295, 'r': 0.304050632867668, 'f1': 0.31975967087676527}, 'combined': 0.24785199369395203, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.1896551724137931, 'f1': 0.2682926829268293}, 'combined': 0.17886178861788618, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:33:06.494285: step: 2/463, loss: 1.5499814748764038 2023-01-22 09:33:07.055549: step: 4/463, loss: 1.3976256847381592 2023-01-22 09:33:07.671140: step: 6/463, loss: 0.7366541624069214 2023-01-22 09:33:08.297440: step: 8/463, loss: 1.052539587020874 2023-01-22 09:33:08.941330: step: 10/463, loss: 0.12517094612121582 2023-01-22 09:33:09.537949: step: 12/463, loss: 0.7128562927246094 2023-01-22 09:33:10.196155: step: 14/463, loss: 0.6162844896316528 2023-01-22 09:33:10.774350: step: 16/463, loss: 0.2992062270641327 2023-01-22 09:33:11.346957: step: 18/463, loss: 0.6701135635375977 2023-01-22 09:33:12.029598: step: 20/463, loss: 3.2410378456115723 2023-01-22 09:33:12.658500: step: 22/463, loss: 2.759457588195801 2023-01-22 09:33:13.232229: step: 24/463, loss: 0.7164468765258789 2023-01-22 09:33:13.841922: step: 26/463, loss: 0.4062047302722931 2023-01-22 09:33:14.455838: step: 28/463, loss: 0.6168721318244934 2023-01-22 09:33:15.070894: step: 30/463, loss: 0.9625312685966492 2023-01-22 09:33:15.756812: step: 32/463, loss: 0.5392823219299316 2023-01-22 09:33:16.334462: step: 34/463, loss: 0.3740987479686737 2023-01-22 09:33:16.983954: step: 36/463, loss: 2.635406255722046 2023-01-22 09:33:17.680219: step: 38/463, loss: 0.32080456614494324 2023-01-22 09:33:18.356728: step: 40/463, loss: 0.4820089340209961 2023-01-22 09:33:18.922732: step: 42/463, loss: 0.5408957004547119 2023-01-22 09:33:19.519179: step: 44/463, loss: 0.3159761428833008 2023-01-22 09:33:20.103661: step: 46/463, loss: 1.0234254598617554 2023-01-22 09:33:20.752992: step: 48/463, loss: 1.0249654054641724 2023-01-22 09:33:21.395024: step: 50/463, loss: 0.5312938690185547 2023-01-22 09:33:22.031311: step: 52/463, loss: 0.697744607925415 2023-01-22 09:33:22.624705: step: 54/463, loss: 0.4011181592941284 2023-01-22 09:33:23.182507: step: 56/463, loss: 1.4504947662353516 2023-01-22 09:33:23.795899: step: 58/463, loss: 0.7250833511352539 2023-01-22 09:33:24.388057: step: 60/463, loss: 0.9447992444038391 2023-01-22 09:33:25.003220: step: 62/463, loss: 1.2293415069580078 2023-01-22 09:33:25.559441: step: 64/463, loss: 1.1988518238067627 2023-01-22 09:33:26.139492: step: 66/463, loss: 0.6876519918441772 2023-01-22 09:33:26.851922: step: 68/463, loss: 1.082821249961853 2023-01-22 09:33:27.395109: step: 70/463, loss: 1.3291658163070679 2023-01-22 09:33:27.981318: step: 72/463, loss: 5.81356954574585 2023-01-22 09:33:28.508010: step: 74/463, loss: 0.2818446159362793 2023-01-22 09:33:29.120753: step: 76/463, loss: 0.5646530985832214 2023-01-22 09:33:29.729113: step: 78/463, loss: 0.5189517736434937 2023-01-22 09:33:30.321510: step: 80/463, loss: 1.4787445068359375 2023-01-22 09:33:30.916214: step: 82/463, loss: 0.20730076730251312 2023-01-22 09:33:31.579234: step: 84/463, loss: 0.991459310054779 2023-01-22 09:33:32.150474: step: 86/463, loss: 1.3527065515518188 2023-01-22 09:33:32.742959: step: 88/463, loss: 0.48701098561286926 2023-01-22 09:33:33.354729: step: 90/463, loss: 0.5792151093482971 2023-01-22 09:33:33.912047: step: 92/463, loss: 0.5322930812835693 2023-01-22 09:33:34.494735: step: 94/463, loss: 0.6045755743980408 2023-01-22 09:33:35.088231: step: 96/463, loss: 0.5694054961204529 2023-01-22 09:33:35.672906: step: 98/463, loss: 0.36010074615478516 2023-01-22 09:33:36.291184: step: 100/463, loss: 0.4744027256965637 2023-01-22 09:33:36.863304: step: 102/463, loss: 0.2686455547809601 2023-01-22 09:33:37.461772: step: 104/463, loss: 0.22884207963943481 2023-01-22 09:33:38.013800: step: 106/463, loss: 8.636054039001465 2023-01-22 09:33:38.593634: step: 108/463, loss: 0.4087449908256531 2023-01-22 09:33:39.180728: step: 110/463, loss: 0.696462869644165 2023-01-22 09:33:39.761212: step: 112/463, loss: 0.7888972759246826 2023-01-22 09:33:40.386138: step: 114/463, loss: 1.7524969577789307 2023-01-22 09:33:41.016981: step: 116/463, loss: 1.1989630460739136 2023-01-22 09:33:41.608848: step: 118/463, loss: 0.8862960338592529 2023-01-22 09:33:42.247110: step: 120/463, loss: 1.5011028051376343 2023-01-22 09:33:42.824357: step: 122/463, loss: 0.2632182538509369 2023-01-22 09:33:43.378092: step: 124/463, loss: 1.1665992736816406 2023-01-22 09:33:43.978496: step: 126/463, loss: 0.5007005929946899 2023-01-22 09:33:44.619368: step: 128/463, loss: 1.1219534873962402 2023-01-22 09:33:45.227741: step: 130/463, loss: 0.24945278465747833 2023-01-22 09:33:45.849820: step: 132/463, loss: 0.572050929069519 2023-01-22 09:33:46.422842: step: 134/463, loss: 0.5119114518165588 2023-01-22 09:33:47.176370: step: 136/463, loss: 1.209529161453247 2023-01-22 09:33:47.793305: step: 138/463, loss: 0.8966051340103149 2023-01-22 09:33:48.420171: step: 140/463, loss: 0.4535786509513855 2023-01-22 09:33:49.024856: step: 142/463, loss: 0.4864313006401062 2023-01-22 09:33:49.624467: step: 144/463, loss: 0.3802744150161743 2023-01-22 09:33:50.257811: step: 146/463, loss: 1.4790608882904053 2023-01-22 09:33:50.873177: step: 148/463, loss: 1.546055793762207 2023-01-22 09:33:51.481143: step: 150/463, loss: 1.0345149040222168 2023-01-22 09:33:52.089512: step: 152/463, loss: 1.5517895221710205 2023-01-22 09:33:52.725184: step: 154/463, loss: 1.5662128925323486 2023-01-22 09:33:53.298484: step: 156/463, loss: 0.9450207948684692 2023-01-22 09:33:53.904789: step: 158/463, loss: 1.6443309783935547 2023-01-22 09:33:54.554752: step: 160/463, loss: 1.9773542881011963 2023-01-22 09:33:55.175587: step: 162/463, loss: 1.3677631616592407 2023-01-22 09:33:55.831354: step: 164/463, loss: 0.1987934708595276 2023-01-22 09:33:56.435273: step: 166/463, loss: 1.2400168180465698 2023-01-22 09:33:57.008332: step: 168/463, loss: 0.9201279282569885 2023-01-22 09:33:57.627969: step: 170/463, loss: 5.982337951660156 2023-01-22 09:33:58.237076: step: 172/463, loss: 4.339910507202148 2023-01-22 09:33:58.814354: step: 174/463, loss: 0.9256488680839539 2023-01-22 09:33:59.444882: step: 176/463, loss: 1.2025197744369507 2023-01-22 09:34:00.152589: step: 178/463, loss: 1.4537386894226074 2023-01-22 09:34:00.698201: step: 180/463, loss: 1.1383411884307861 2023-01-22 09:34:01.275794: step: 182/463, loss: 0.48516348004341125 2023-01-22 09:34:01.897154: step: 184/463, loss: 0.6389771699905396 2023-01-22 09:34:02.615850: step: 186/463, loss: 1.2300013303756714 2023-01-22 09:34:03.292522: step: 188/463, loss: 1.5572624206542969 2023-01-22 09:34:03.909464: step: 190/463, loss: 0.558221161365509 2023-01-22 09:34:04.480064: step: 192/463, loss: 0.3494292199611664 2023-01-22 09:34:05.087455: step: 194/463, loss: 0.3570707440376282 2023-01-22 09:34:05.638237: step: 196/463, loss: 0.6325271129608154 2023-01-22 09:34:06.176634: step: 198/463, loss: 1.267531394958496 2023-01-22 09:34:06.809028: step: 200/463, loss: 0.9931960701942444 2023-01-22 09:34:07.397634: step: 202/463, loss: 1.6724361181259155 2023-01-22 09:34:07.924816: step: 204/463, loss: 0.859893798828125 2023-01-22 09:34:08.502684: step: 206/463, loss: 1.2097604274749756 2023-01-22 09:34:09.116063: step: 208/463, loss: 0.9325987100601196 2023-01-22 09:34:09.734908: step: 210/463, loss: 0.31368377804756165 2023-01-22 09:34:10.318871: step: 212/463, loss: 0.36260324716567993 2023-01-22 09:34:10.879613: step: 214/463, loss: 1.1381499767303467 2023-01-22 09:34:11.506888: step: 216/463, loss: 0.21593250334262848 2023-01-22 09:34:12.190103: step: 218/463, loss: 1.3254320621490479 2023-01-22 09:34:12.833083: step: 220/463, loss: 1.705629587173462 2023-01-22 09:34:13.448870: step: 222/463, loss: 0.4967641234397888 2023-01-22 09:34:14.081689: step: 224/463, loss: 0.9133996367454529 2023-01-22 09:34:14.798246: step: 226/463, loss: 3.405883550643921 2023-01-22 09:34:15.433713: step: 228/463, loss: 1.4448118209838867 2023-01-22 09:34:16.061343: step: 230/463, loss: 0.7092037200927734 2023-01-22 09:34:16.749846: step: 232/463, loss: 0.5238355398178101 2023-01-22 09:34:17.352059: step: 234/463, loss: 1.1063101291656494 2023-01-22 09:34:17.955194: step: 236/463, loss: 0.6502557992935181 2023-01-22 09:34:18.770423: step: 238/463, loss: 1.274713397026062 2023-01-22 09:34:19.338512: step: 240/463, loss: 0.704504668712616 2023-01-22 09:34:20.061567: step: 242/463, loss: 0.24314984679222107 2023-01-22 09:34:20.609938: step: 244/463, loss: 0.5813273191452026 2023-01-22 09:34:21.271523: step: 246/463, loss: 0.464899480342865 2023-01-22 09:34:21.847749: step: 248/463, loss: 0.5019469261169434 2023-01-22 09:34:22.402405: step: 250/463, loss: 0.8156411051750183 2023-01-22 09:34:23.000071: step: 252/463, loss: 1.0405614376068115 2023-01-22 09:34:23.642422: step: 254/463, loss: 0.28098031878471375 2023-01-22 09:34:24.249180: step: 256/463, loss: 0.8996996879577637 2023-01-22 09:34:24.893827: step: 258/463, loss: 0.4171857237815857 2023-01-22 09:34:25.484795: step: 260/463, loss: 0.21163006126880646 2023-01-22 09:34:26.097011: step: 262/463, loss: 1.0563526153564453 2023-01-22 09:34:26.702987: step: 264/463, loss: 0.5746558308601379 2023-01-22 09:34:27.288427: step: 266/463, loss: 0.7748067378997803 2023-01-22 09:34:27.992471: step: 268/463, loss: 0.24001193046569824 2023-01-22 09:34:28.601394: step: 270/463, loss: 1.6013071537017822 2023-01-22 09:34:29.186723: step: 272/463, loss: 1.338907241821289 2023-01-22 09:34:29.821047: step: 274/463, loss: 1.3389027118682861 2023-01-22 09:34:30.416876: step: 276/463, loss: 0.4273982048034668 2023-01-22 09:34:31.048333: step: 278/463, loss: 0.8513306379318237 2023-01-22 09:34:31.640181: step: 280/463, loss: 3.8566060066223145 2023-01-22 09:34:32.311080: step: 282/463, loss: 0.27493754029273987 2023-01-22 09:34:32.961399: step: 284/463, loss: 0.2856474220752716 2023-01-22 09:34:33.565139: step: 286/463, loss: 6.383513450622559 2023-01-22 09:34:34.223961: step: 288/463, loss: 0.9333340525627136 2023-01-22 09:34:34.857275: step: 290/463, loss: 0.31084156036376953 2023-01-22 09:34:35.544791: step: 292/463, loss: 0.7775826454162598 2023-01-22 09:34:36.146817: step: 294/463, loss: 0.5315329432487488 2023-01-22 09:34:36.770071: step: 296/463, loss: 0.6977090239524841 2023-01-22 09:34:37.403619: step: 298/463, loss: 1.2035317420959473 2023-01-22 09:34:38.055426: step: 300/463, loss: 2.377845525741577 2023-01-22 09:34:38.580991: step: 302/463, loss: 1.1665148735046387 2023-01-22 09:34:39.271372: step: 304/463, loss: 0.6681552529335022 2023-01-22 09:34:39.900364: step: 306/463, loss: 0.7620559334754944 2023-01-22 09:34:40.496881: step: 308/463, loss: 0.34468916058540344 2023-01-22 09:34:41.174315: step: 310/463, loss: 0.47265100479125977 2023-01-22 09:34:41.780467: step: 312/463, loss: 0.4769750237464905 2023-01-22 09:34:42.398262: step: 314/463, loss: 0.17289303243160248 2023-01-22 09:34:42.972373: step: 316/463, loss: 0.4151703715324402 2023-01-22 09:34:43.590179: step: 318/463, loss: 0.4990484118461609 2023-01-22 09:34:44.207320: step: 320/463, loss: 2.021172285079956 2023-01-22 09:34:44.792534: step: 322/463, loss: 0.4618651866912842 2023-01-22 09:34:45.432194: step: 324/463, loss: 0.7754377126693726 2023-01-22 09:34:46.094819: step: 326/463, loss: 0.9057049751281738 2023-01-22 09:34:46.725142: step: 328/463, loss: 0.5969914793968201 2023-01-22 09:34:47.421265: step: 330/463, loss: 0.22016790509223938 2023-01-22 09:34:48.065581: step: 332/463, loss: 0.4143878221511841 2023-01-22 09:34:48.710226: step: 334/463, loss: 1.5573976039886475 2023-01-22 09:34:49.302223: step: 336/463, loss: 2.2091221809387207 2023-01-22 09:34:49.912727: step: 338/463, loss: 0.7839574217796326 2023-01-22 09:34:50.576136: step: 340/463, loss: 1.1517176628112793 2023-01-22 09:34:51.228433: step: 342/463, loss: 2.2496585845947266 2023-01-22 09:34:51.822196: step: 344/463, loss: 2.2116799354553223 2023-01-22 09:34:52.400456: step: 346/463, loss: 0.6737585067749023 2023-01-22 09:34:53.007489: step: 348/463, loss: 2.59515380859375 2023-01-22 09:34:53.557935: step: 350/463, loss: 0.5876641869544983 2023-01-22 09:34:54.133203: step: 352/463, loss: 0.6915403008460999 2023-01-22 09:34:54.677607: step: 354/463, loss: 1.2536826133728027 2023-01-22 09:34:55.337791: step: 356/463, loss: 0.845802366733551 2023-01-22 09:34:55.864440: step: 358/463, loss: 1.0244643688201904 2023-01-22 09:34:56.485556: step: 360/463, loss: 0.9937472343444824 2023-01-22 09:34:57.114758: step: 362/463, loss: 0.4416143596172333 2023-01-22 09:34:57.710082: step: 364/463, loss: 0.5093398094177246 2023-01-22 09:34:58.315878: step: 366/463, loss: 1.3189377784729004 2023-01-22 09:34:58.899978: step: 368/463, loss: 1.2794768810272217 2023-01-22 09:34:59.475128: step: 370/463, loss: 0.49750301241874695 2023-01-22 09:35:00.136508: step: 372/463, loss: 2.072526693344116 2023-01-22 09:35:00.776163: step: 374/463, loss: 1.5029501914978027 2023-01-22 09:35:01.363851: step: 376/463, loss: 1.5449005365371704 2023-01-22 09:35:02.007086: step: 378/463, loss: 0.7353377342224121 2023-01-22 09:35:02.597866: step: 380/463, loss: 0.22864212095737457 2023-01-22 09:35:03.236003: step: 382/463, loss: 0.9468680024147034 2023-01-22 09:35:03.799506: step: 384/463, loss: 1.0538992881774902 2023-01-22 09:35:04.411774: step: 386/463, loss: 0.7449288964271545 2023-01-22 09:35:05.045434: step: 388/463, loss: 0.6340309977531433 2023-01-22 09:35:05.638334: step: 390/463, loss: 0.2261842042207718 2023-01-22 09:35:06.239206: step: 392/463, loss: 1.7919375896453857 2023-01-22 09:35:06.882754: step: 394/463, loss: 0.657434344291687 2023-01-22 09:35:07.506893: step: 396/463, loss: 0.5850203633308411 2023-01-22 09:35:08.131581: step: 398/463, loss: 1.2136361598968506 2023-01-22 09:35:08.699055: step: 400/463, loss: 1.141325831413269 2023-01-22 09:35:09.350904: step: 402/463, loss: 0.3117745816707611 2023-01-22 09:35:09.982876: step: 404/463, loss: 0.4130016267299652 2023-01-22 09:35:10.588406: step: 406/463, loss: 0.5025079846382141 2023-01-22 09:35:11.201646: step: 408/463, loss: 0.5135420560836792 2023-01-22 09:35:11.821915: step: 410/463, loss: 0.2760311961174011 2023-01-22 09:35:12.383789: step: 412/463, loss: 1.9271175861358643 2023-01-22 09:35:13.008557: step: 414/463, loss: 0.6319013237953186 2023-01-22 09:35:13.648321: step: 416/463, loss: 0.6430721282958984 2023-01-22 09:35:14.188332: step: 418/463, loss: 0.6520329117774963 2023-01-22 09:35:14.908440: step: 420/463, loss: 0.5931282639503479 2023-01-22 09:35:15.456575: step: 422/463, loss: 3.5090017318725586 2023-01-22 09:35:15.982695: step: 424/463, loss: 0.18159732222557068 2023-01-22 09:35:16.596760: step: 426/463, loss: 0.7756088376045227 2023-01-22 09:35:17.143952: step: 428/463, loss: 0.40602627396583557 2023-01-22 09:35:17.781860: step: 430/463, loss: 1.0019469261169434 2023-01-22 09:35:18.462296: step: 432/463, loss: 1.3378796577453613 2023-01-22 09:35:19.021217: step: 434/463, loss: 1.8086732625961304 2023-01-22 09:35:19.651328: step: 436/463, loss: 1.603554368019104 2023-01-22 09:35:20.244099: step: 438/463, loss: 0.6995077729225159 2023-01-22 09:35:20.846110: step: 440/463, loss: 0.6076539754867554 2023-01-22 09:35:21.467695: step: 442/463, loss: 0.44419634342193604 2023-01-22 09:35:22.093549: step: 444/463, loss: 0.7216389775276184 2023-01-22 09:35:22.666225: step: 446/463, loss: 0.8208056688308716 2023-01-22 09:35:23.281179: step: 448/463, loss: 0.6675727367401123 2023-01-22 09:35:23.981940: step: 450/463, loss: 1.1969261169433594 2023-01-22 09:35:24.609719: step: 452/463, loss: 1.633812427520752 2023-01-22 09:35:25.198842: step: 454/463, loss: 0.6107681393623352 2023-01-22 09:35:25.916354: step: 456/463, loss: 2.18015193939209 2023-01-22 09:35:26.529783: step: 458/463, loss: 0.8157476782798767 2023-01-22 09:35:27.222003: step: 460/463, loss: 0.4467031955718994 2023-01-22 09:35:27.814134: step: 462/463, loss: 3.7532663345336914 2023-01-22 09:35:28.351080: step: 464/463, loss: 0.682722806930542 2023-01-22 09:35:28.972175: step: 466/463, loss: 0.5589893460273743 2023-01-22 09:35:29.579328: step: 468/463, loss: 1.3220820426940918 2023-01-22 09:35:30.177459: step: 470/463, loss: 0.19605286419391632 2023-01-22 09:35:30.848704: step: 472/463, loss: 0.2717975676059723 2023-01-22 09:35:31.530025: step: 474/463, loss: 0.7258267402648926 2023-01-22 09:35:32.109034: step: 476/463, loss: 0.8305843472480774 2023-01-22 09:35:32.736439: step: 478/463, loss: 1.0759207010269165 2023-01-22 09:35:33.308704: step: 480/463, loss: 0.9920896291732788 2023-01-22 09:35:34.006114: step: 482/463, loss: 1.0002574920654297 2023-01-22 09:35:34.606789: step: 484/463, loss: 0.7887349724769592 2023-01-22 09:35:35.204063: step: 486/463, loss: 0.38126900792121887 2023-01-22 09:35:35.796572: step: 488/463, loss: 1.2281166315078735 2023-01-22 09:35:36.385153: step: 490/463, loss: 0.612450897693634 2023-01-22 09:35:36.985575: step: 492/463, loss: 0.6850741505622864 2023-01-22 09:35:37.654678: step: 494/463, loss: 0.6846902966499329 2023-01-22 09:35:38.303530: step: 496/463, loss: 0.8670388460159302 2023-01-22 09:35:38.894306: step: 498/463, loss: 0.5522689819335938 2023-01-22 09:35:39.500267: step: 500/463, loss: 1.7460061311721802 2023-01-22 09:35:40.110977: step: 502/463, loss: 0.6664052605628967 2023-01-22 09:35:40.759209: step: 504/463, loss: 1.4807360172271729 2023-01-22 09:35:41.378315: step: 506/463, loss: 0.8345937132835388 2023-01-22 09:35:41.998706: step: 508/463, loss: 1.1722451448440552 2023-01-22 09:35:42.589292: step: 510/463, loss: 0.8332258462905884 2023-01-22 09:35:43.181064: step: 512/463, loss: 0.6260147094726562 2023-01-22 09:35:43.836545: step: 514/463, loss: 1.7738807201385498 2023-01-22 09:35:44.377778: step: 516/463, loss: 0.671615481376648 2023-01-22 09:35:45.015089: step: 518/463, loss: 0.5809204578399658 2023-01-22 09:35:45.550789: step: 520/463, loss: 0.9149419665336609 2023-01-22 09:35:46.240465: step: 522/463, loss: 0.5901783108711243 2023-01-22 09:35:46.875459: step: 524/463, loss: 3.1206703186035156 2023-01-22 09:35:47.510975: step: 526/463, loss: 0.30269375443458557 2023-01-22 09:35:48.113531: step: 528/463, loss: 1.3251066207885742 2023-01-22 09:35:48.643828: step: 530/463, loss: 0.6037574410438538 2023-01-22 09:35:49.324698: step: 532/463, loss: 0.6829760670661926 2023-01-22 09:35:49.893833: step: 534/463, loss: 1.2309551239013672 2023-01-22 09:35:50.553010: step: 536/463, loss: 0.8177639842033386 2023-01-22 09:35:51.122710: step: 538/463, loss: 2.6674256324768066 2023-01-22 09:35:51.741143: step: 540/463, loss: 2.1480700969696045 2023-01-22 09:35:52.312636: step: 542/463, loss: 0.5788074731826782 2023-01-22 09:35:52.896212: step: 544/463, loss: 2.4021010398864746 2023-01-22 09:35:53.551515: step: 546/463, loss: 0.89857017993927 2023-01-22 09:35:54.106852: step: 548/463, loss: 0.1400298923254013 2023-01-22 09:35:54.739841: step: 550/463, loss: 0.9120315313339233 2023-01-22 09:35:55.349904: step: 552/463, loss: 2.2394559383392334 2023-01-22 09:35:55.941095: step: 554/463, loss: 0.33983784914016724 2023-01-22 09:35:56.546836: step: 556/463, loss: 1.1214693784713745 2023-01-22 09:35:57.153390: step: 558/463, loss: 3.144329309463501 2023-01-22 09:35:57.775054: step: 560/463, loss: 0.5036450028419495 2023-01-22 09:35:58.366064: step: 562/463, loss: 1.2057098150253296 2023-01-22 09:35:58.925809: step: 564/463, loss: 0.4738738536834717 2023-01-22 09:35:59.584764: step: 566/463, loss: 0.49166056513786316 2023-01-22 09:36:00.257381: step: 568/463, loss: 0.8041418790817261 2023-01-22 09:36:00.878980: step: 570/463, loss: 0.7282711267471313 2023-01-22 09:36:01.496235: step: 572/463, loss: 0.44081413745880127 2023-01-22 09:36:02.157887: step: 574/463, loss: 0.08008335530757904 2023-01-22 09:36:02.773615: step: 576/463, loss: 0.5423160195350647 2023-01-22 09:36:03.396057: step: 578/463, loss: 6.515285015106201 2023-01-22 09:36:04.106629: step: 580/463, loss: 0.3133462071418762 2023-01-22 09:36:04.682732: step: 582/463, loss: 0.6519750356674194 2023-01-22 09:36:05.303786: step: 584/463, loss: 3.7580223083496094 2023-01-22 09:36:05.906210: step: 586/463, loss: 6.031226634979248 2023-01-22 09:36:06.439335: step: 588/463, loss: 0.6599688529968262 2023-01-22 09:36:07.017678: step: 590/463, loss: 1.0934603214263916 2023-01-22 09:36:07.600507: step: 592/463, loss: 1.2284626960754395 2023-01-22 09:36:08.210931: step: 594/463, loss: 0.18504633009433746 2023-01-22 09:36:08.789303: step: 596/463, loss: 0.6401344537734985 2023-01-22 09:36:09.405222: step: 598/463, loss: 0.230966717004776 2023-01-22 09:36:09.989748: step: 600/463, loss: 1.2084563970565796 2023-01-22 09:36:10.647873: step: 602/463, loss: 5.660030364990234 2023-01-22 09:36:11.242172: step: 604/463, loss: 0.5374194979667664 2023-01-22 09:36:11.885450: step: 606/463, loss: 1.4481379985809326 2023-01-22 09:36:12.515820: step: 608/463, loss: 1.5611393451690674 2023-01-22 09:36:13.128733: step: 610/463, loss: 2.258495807647705 2023-01-22 09:36:13.804866: step: 612/463, loss: 0.323032945394516 2023-01-22 09:36:14.325849: step: 614/463, loss: 0.61004638671875 2023-01-22 09:36:14.937478: step: 616/463, loss: 0.593949556350708 2023-01-22 09:36:15.468917: step: 618/463, loss: 1.1773996353149414 2023-01-22 09:36:16.039680: step: 620/463, loss: 1.2285062074661255 2023-01-22 09:36:16.583993: step: 622/463, loss: 1.7627588510513306 2023-01-22 09:36:17.223688: step: 624/463, loss: 1.952219843864441 2023-01-22 09:36:17.881062: step: 626/463, loss: 0.28251194953918457 2023-01-22 09:36:18.427891: step: 628/463, loss: 0.5243813395500183 2023-01-22 09:36:19.035569: step: 630/463, loss: 0.8729084730148315 2023-01-22 09:36:19.580563: step: 632/463, loss: 0.9973430633544922 2023-01-22 09:36:20.178417: step: 634/463, loss: 0.21113714575767517 2023-01-22 09:36:20.731501: step: 636/463, loss: 0.7165486812591553 2023-01-22 09:36:21.398007: step: 638/463, loss: 1.3764750957489014 2023-01-22 09:36:22.014955: step: 640/463, loss: 0.9395245313644409 2023-01-22 09:36:22.680234: step: 642/463, loss: 1.3470277786254883 2023-01-22 09:36:23.401828: step: 644/463, loss: 0.5813266038894653 2023-01-22 09:36:24.007313: step: 646/463, loss: 2.697793483734131 2023-01-22 09:36:24.607762: step: 648/463, loss: 1.7574219703674316 2023-01-22 09:36:25.181160: step: 650/463, loss: 2.361961841583252 2023-01-22 09:36:25.790820: step: 652/463, loss: 0.8672612905502319 2023-01-22 09:36:26.388358: step: 654/463, loss: 1.0100539922714233 2023-01-22 09:36:27.024480: step: 656/463, loss: 1.2172175645828247 2023-01-22 09:36:27.671316: step: 658/463, loss: 0.4758746027946472 2023-01-22 09:36:28.291041: step: 660/463, loss: 1.8471059799194336 2023-01-22 09:36:28.929000: step: 662/463, loss: 0.6793623566627502 2023-01-22 09:36:29.502058: step: 664/463, loss: 0.8186171650886536 2023-01-22 09:36:30.094191: step: 666/463, loss: 0.8648422360420227 2023-01-22 09:36:30.745302: step: 668/463, loss: 2.600332498550415 2023-01-22 09:36:31.348119: step: 670/463, loss: 0.4720839858055115 2023-01-22 09:36:31.925629: step: 672/463, loss: 0.2750377058982849 2023-01-22 09:36:32.453015: step: 674/463, loss: 0.5651376247406006 2023-01-22 09:36:33.069054: step: 676/463, loss: 1.260711908340454 2023-01-22 09:36:33.659177: step: 678/463, loss: 1.6037933826446533 2023-01-22 09:36:34.331048: step: 680/463, loss: 0.13473549485206604 2023-01-22 09:36:35.002740: step: 682/463, loss: 0.758983314037323 2023-01-22 09:36:35.694250: step: 684/463, loss: 0.4858092963695526 2023-01-22 09:36:36.290106: step: 686/463, loss: 2.1029160022735596 2023-01-22 09:36:36.908191: step: 688/463, loss: 0.9216176867485046 2023-01-22 09:36:37.501954: step: 690/463, loss: 0.5142749547958374 2023-01-22 09:36:38.114848: step: 692/463, loss: 0.31698527932167053 2023-01-22 09:36:38.687603: step: 694/463, loss: 0.31004756689071655 2023-01-22 09:36:39.357734: step: 696/463, loss: 0.9755090475082397 2023-01-22 09:36:39.932866: step: 698/463, loss: 0.3814958930015564 2023-01-22 09:36:40.455886: step: 700/463, loss: 0.4584546685218811 2023-01-22 09:36:41.098037: step: 702/463, loss: 1.25494384765625 2023-01-22 09:36:41.771307: step: 704/463, loss: 0.5057708024978638 2023-01-22 09:36:42.337086: step: 706/463, loss: 2.5170087814331055 2023-01-22 09:36:42.924193: step: 708/463, loss: 2.6502881050109863 2023-01-22 09:36:43.515034: step: 710/463, loss: 0.9171215295791626 2023-01-22 09:36:44.109383: step: 712/463, loss: 0.4922195374965668 2023-01-22 09:36:44.697880: step: 714/463, loss: 1.2491676807403564 2023-01-22 09:36:45.287306: step: 716/463, loss: 1.4253944158554077 2023-01-22 09:36:45.899914: step: 718/463, loss: 0.787155270576477 2023-01-22 09:36:46.515267: step: 720/463, loss: 1.5657223463058472 2023-01-22 09:36:47.075692: step: 722/463, loss: 1.7306480407714844 2023-01-22 09:36:47.743950: step: 724/463, loss: 0.4160867929458618 2023-01-22 09:36:48.397402: step: 726/463, loss: 0.6272135972976685 2023-01-22 09:36:49.096910: step: 728/463, loss: 0.5074421763420105 2023-01-22 09:36:49.685455: step: 730/463, loss: 1.9525392055511475 2023-01-22 09:36:50.323286: step: 732/463, loss: 1.1704788208007812 2023-01-22 09:36:50.953904: step: 734/463, loss: 1.7162446975708008 2023-01-22 09:36:51.586904: step: 736/463, loss: 0.9051508903503418 2023-01-22 09:36:52.239945: step: 738/463, loss: 0.7166047096252441 2023-01-22 09:36:52.945863: step: 740/463, loss: 0.504905641078949 2023-01-22 09:36:53.633825: step: 742/463, loss: 1.1095871925354004 2023-01-22 09:36:54.315367: step: 744/463, loss: 0.45236122608184814 2023-01-22 09:36:54.895657: step: 746/463, loss: 0.78248530626297 2023-01-22 09:36:55.587490: step: 748/463, loss: 1.0007086992263794 2023-01-22 09:36:56.175514: step: 750/463, loss: 0.3215717673301697 2023-01-22 09:36:56.776985: step: 752/463, loss: 1.448829174041748 2023-01-22 09:36:57.362216: step: 754/463, loss: 0.6961119771003723 2023-01-22 09:36:57.946958: step: 756/463, loss: 0.5924288034439087 2023-01-22 09:36:58.599349: step: 758/463, loss: 0.5901463031768799 2023-01-22 09:36:59.213739: step: 760/463, loss: 1.212836503982544 2023-01-22 09:36:59.816175: step: 762/463, loss: 1.0207247734069824 2023-01-22 09:37:00.448008: step: 764/463, loss: 0.5022910833358765 2023-01-22 09:37:01.028917: step: 766/463, loss: 0.4612976312637329 2023-01-22 09:37:01.625233: step: 768/463, loss: 0.495321661233902 2023-01-22 09:37:02.276501: step: 770/463, loss: 0.32203686237335205 2023-01-22 09:37:02.893725: step: 772/463, loss: 0.21196919679641724 2023-01-22 09:37:03.578904: step: 774/463, loss: 0.3725093901157379 2023-01-22 09:37:04.196891: step: 776/463, loss: 0.6345022916793823 2023-01-22 09:37:04.782645: step: 778/463, loss: 1.3286399841308594 2023-01-22 09:37:05.372843: step: 780/463, loss: 0.415843665599823 2023-01-22 09:37:05.945726: step: 782/463, loss: 0.5772085785865784 2023-01-22 09:37:06.586610: step: 784/463, loss: 0.5375340580940247 2023-01-22 09:37:07.197881: step: 786/463, loss: 0.2410525381565094 2023-01-22 09:37:07.796854: step: 788/463, loss: 3.295276641845703 2023-01-22 09:37:08.390307: step: 790/463, loss: 0.18998917937278748 2023-01-22 09:37:09.055153: step: 792/463, loss: 0.8517022132873535 2023-01-22 09:37:09.597662: step: 794/463, loss: 1.0473159551620483 2023-01-22 09:37:10.191543: step: 796/463, loss: 2.286813259124756 2023-01-22 09:37:10.806408: step: 798/463, loss: 0.2646630108356476 2023-01-22 09:37:11.420158: step: 800/463, loss: 0.5460038185119629 2023-01-22 09:37:12.012873: step: 802/463, loss: 1.433488368988037 2023-01-22 09:37:12.626627: step: 804/463, loss: 0.8649519085884094 2023-01-22 09:37:13.194999: step: 806/463, loss: 0.08898138999938965 2023-01-22 09:37:13.754305: step: 808/463, loss: 1.1761524677276611 2023-01-22 09:37:14.378074: step: 810/463, loss: 1.080528736114502 2023-01-22 09:37:14.984375: step: 812/463, loss: 1.019982933998108 2023-01-22 09:37:15.547158: step: 814/463, loss: 1.567225694656372 2023-01-22 09:37:16.137587: step: 816/463, loss: 0.7515830993652344 2023-01-22 09:37:16.808362: step: 818/463, loss: 0.5104256272315979 2023-01-22 09:37:17.372999: step: 820/463, loss: 1.502622365951538 2023-01-22 09:37:17.972319: step: 822/463, loss: 0.8216833472251892 2023-01-22 09:37:18.593523: step: 824/463, loss: 0.6181341409683228 2023-01-22 09:37:19.263030: step: 826/463, loss: 1.1912914514541626 2023-01-22 09:37:19.876155: step: 828/463, loss: 0.3649296462535858 2023-01-22 09:37:20.462698: step: 830/463, loss: 0.42700421810150146 2023-01-22 09:37:21.039709: step: 832/463, loss: 0.4368527829647064 2023-01-22 09:37:21.612602: step: 834/463, loss: 2.055616617202759 2023-01-22 09:37:22.242094: step: 836/463, loss: 0.5650148987770081 2023-01-22 09:37:22.818194: step: 838/463, loss: 0.3242678940296173 2023-01-22 09:37:23.402865: step: 840/463, loss: 0.1505160629749298 2023-01-22 09:37:23.956920: step: 842/463, loss: 2.7335596084594727 2023-01-22 09:37:24.592136: step: 844/463, loss: 0.9642425775527954 2023-01-22 09:37:25.262812: step: 846/463, loss: 0.5027278661727905 2023-01-22 09:37:25.908050: step: 848/463, loss: 4.763665199279785 2023-01-22 09:37:26.479528: step: 850/463, loss: 0.8516854047775269 2023-01-22 09:37:27.090925: step: 852/463, loss: 1.2815616130828857 2023-01-22 09:37:27.710185: step: 854/463, loss: 0.6073788404464722 2023-01-22 09:37:28.309874: step: 856/463, loss: 0.8202017545700073 2023-01-22 09:37:28.865423: step: 858/463, loss: 0.46286290884017944 2023-01-22 09:37:29.504582: step: 860/463, loss: 0.9589809775352478 2023-01-22 09:37:30.152658: step: 862/463, loss: 0.636374294757843 2023-01-22 09:37:30.688938: step: 864/463, loss: 0.35780298709869385 2023-01-22 09:37:31.291638: step: 866/463, loss: 0.8220357298851013 2023-01-22 09:37:32.001000: step: 868/463, loss: 0.5103381872177124 2023-01-22 09:37:32.592706: step: 870/463, loss: 0.11688251793384552 2023-01-22 09:37:33.260235: step: 872/463, loss: 3.6261203289031982 2023-01-22 09:37:33.882617: step: 874/463, loss: 1.783101201057434 2023-01-22 09:37:34.474330: step: 876/463, loss: 0.3184046149253845 2023-01-22 09:37:35.055308: step: 878/463, loss: 0.8564475178718567 2023-01-22 09:37:35.627716: step: 880/463, loss: 1.0247830152511597 2023-01-22 09:37:36.258299: step: 882/463, loss: 0.13300535082817078 2023-01-22 09:37:36.832827: step: 884/463, loss: 1.0001450777053833 2023-01-22 09:37:37.450290: step: 886/463, loss: 1.7600239515304565 2023-01-22 09:37:38.013211: step: 888/463, loss: 2.075435161590576 2023-01-22 09:37:38.619939: step: 890/463, loss: 1.5437798500061035 2023-01-22 09:37:39.164858: step: 892/463, loss: 0.7232778072357178 2023-01-22 09:37:39.830430: step: 894/463, loss: 0.30222436785697937 2023-01-22 09:37:40.578377: step: 896/463, loss: 1.6298611164093018 2023-01-22 09:37:41.179371: step: 898/463, loss: 0.6090134382247925 2023-01-22 09:37:41.823257: step: 900/463, loss: 0.9103258848190308 2023-01-22 09:37:42.424156: step: 902/463, loss: 0.9974690675735474 2023-01-22 09:37:43.021422: step: 904/463, loss: 2.5768048763275146 2023-01-22 09:37:43.737378: step: 906/463, loss: 0.4563380479812622 2023-01-22 09:37:44.381290: step: 908/463, loss: 1.9918770790100098 2023-01-22 09:37:45.055802: step: 910/463, loss: 0.5253063440322876 2023-01-22 09:37:45.618186: step: 912/463, loss: 0.7533784508705139 2023-01-22 09:37:46.238353: step: 914/463, loss: 0.44251173734664917 2023-01-22 09:37:46.824818: step: 916/463, loss: 0.7267775535583496 2023-01-22 09:37:47.444414: step: 918/463, loss: 7.284496307373047 2023-01-22 09:37:48.038924: step: 920/463, loss: 0.13394027948379517 2023-01-22 09:37:48.646383: step: 922/463, loss: 0.5903412103652954 2023-01-22 09:37:49.198957: step: 924/463, loss: 0.48630595207214355 2023-01-22 09:37:49.819039: step: 926/463, loss: 0.1694043129682541 ================================================== Loss: 1.065 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36037458430841235, 'r': 0.2413894274399802, 'f1': 0.28911870059288536}, 'combined': 0.21303483201581025, 'epoch': 4} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34769685687236057, 'r': 0.3008056293859046, 'f1': 0.3225559626469612}, 'combined': 0.25001945430051536, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3137062272435211, 'r': 0.2416788012540219, 'f1': 0.2730219255324106}, 'combined': 0.20117405039230254, 'epoch': 4} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3330316267916425, 'r': 0.3070612705739456, 'f1': 0.31951960374759014}, 'combined': 0.24766591295267754, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32068910740099443, 'r': 0.23793062807170554, 'f1': 0.2731796100082545}, 'combined': 0.2012902389534507, 'epoch': 4} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33195207997018616, 'r': 0.29479781046893594, 'f1': 0.31227367678439283}, 'combined': 0.2420494528185246, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31, 'r': 0.22142857142857142, 'f1': 0.25833333333333336}, 'combined': 0.17222222222222222, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26136363636363635, 'r': 0.25, 'f1': 0.25555555555555554}, 'combined': 0.12777777777777777, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2222222222222222, 'r': 0.06896551724137931, 'f1': 0.10526315789473684}, 'combined': 0.07017543859649122, 'epoch': 4} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31490492170022366, 'r': 0.26659564393939394, 'f1': 0.2887435897435897}, 'combined': 0.2127584345479082, 'epoch': 3} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32583254485752133, 'r': 0.33121326578177396, 'f1': 0.32850087325034905}, 'combined': 0.2546274711318495, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2692307692307692, 'r': 0.3, 'f1': 0.28378378378378377}, 'combined': 0.18918918918918917, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3137062272435211, 'r': 0.2416788012540219, 'f1': 0.2730219255324106}, 'combined': 0.20117405039230254, 'epoch': 4} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3330316267916425, 'r': 0.3070612705739456, 'f1': 0.31951960374759014}, 'combined': 0.24766591295267754, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26136363636363635, 'r': 0.25, 'f1': 0.25555555555555554}, 'combined': 0.12777777777777777, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.25784122500224194, 'r': 0.259306231962482, 'f1': 0.25857165340168176}, 'combined': 0.19052648145387074, 'epoch': 2} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33718038614347295, 'r': 0.304050632867668, 'f1': 0.31975967087676527}, 'combined': 0.24785199369395203, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.1896551724137931, 'f1': 0.2682926829268293}, 'combined': 0.17886178861788618, 'epoch': 2} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:40:29.297018: step: 2/463, loss: 0.6280529499053955 2023-01-22 09:40:29.913525: step: 4/463, loss: 1.1968387365341187 2023-01-22 09:40:30.544221: step: 6/463, loss: 0.1501970887184143 2023-01-22 09:40:31.141862: step: 8/463, loss: 1.8222169876098633 2023-01-22 09:40:31.743909: step: 10/463, loss: 0.389104962348938 2023-01-22 09:40:32.418580: step: 12/463, loss: 1.9284547567367554 2023-01-22 09:40:33.095980: step: 14/463, loss: 0.21964147686958313 2023-01-22 09:40:33.700639: step: 16/463, loss: 1.3979219198226929 2023-01-22 09:40:34.377667: step: 18/463, loss: 0.25654852390289307 2023-01-22 09:40:34.960651: step: 20/463, loss: 0.2708037793636322 2023-01-22 09:40:35.575168: step: 22/463, loss: 0.8239782452583313 2023-01-22 09:40:36.166909: step: 24/463, loss: 0.5828837156295776 2023-01-22 09:40:36.849232: step: 26/463, loss: 1.0815584659576416 2023-01-22 09:40:37.480133: step: 28/463, loss: 0.33342719078063965 2023-01-22 09:40:38.126138: step: 30/463, loss: 0.7502411603927612 2023-01-22 09:40:38.691439: step: 32/463, loss: 0.3727218210697174 2023-01-22 09:40:39.356295: step: 34/463, loss: 0.5319357514381409 2023-01-22 09:40:39.956622: step: 36/463, loss: 0.24648316204547882 2023-01-22 09:40:40.570333: step: 38/463, loss: 0.7171517610549927 2023-01-22 09:40:41.176069: step: 40/463, loss: 0.7860630750656128 2023-01-22 09:40:41.762198: step: 42/463, loss: 0.19975592195987701 2023-01-22 09:40:42.440848: step: 44/463, loss: 0.23239928483963013 2023-01-22 09:40:43.053012: step: 46/463, loss: 0.30570685863494873 2023-01-22 09:40:43.729478: step: 48/463, loss: 1.0062568187713623 2023-01-22 09:40:44.327983: step: 50/463, loss: 0.4155263304710388 2023-01-22 09:40:45.039039: step: 52/463, loss: 0.9353864789009094 2023-01-22 09:40:45.609264: step: 54/463, loss: 0.41787198185920715 2023-01-22 09:40:46.204938: step: 56/463, loss: 0.31391215324401855 2023-01-22 09:40:46.881175: step: 58/463, loss: 0.7312435507774353 2023-01-22 09:40:47.466423: step: 60/463, loss: 0.7266860008239746 2023-01-22 09:40:48.088332: step: 62/463, loss: 0.48195719718933105 2023-01-22 09:40:48.703365: step: 64/463, loss: 1.275835394859314 2023-01-22 09:40:49.257330: step: 66/463, loss: 1.4393664598464966 2023-01-22 09:40:49.884571: step: 68/463, loss: 0.920201301574707 2023-01-22 09:40:50.500802: step: 70/463, loss: 0.6970359683036804 2023-01-22 09:40:51.089960: step: 72/463, loss: 0.7751462459564209 2023-01-22 09:40:51.802934: step: 74/463, loss: 0.593722403049469 2023-01-22 09:40:52.449466: step: 76/463, loss: 0.45327481627464294 2023-01-22 09:40:53.140787: step: 78/463, loss: 0.6711190342903137 2023-01-22 09:40:53.769681: step: 80/463, loss: 0.9234674572944641 2023-01-22 09:40:54.368361: step: 82/463, loss: 0.8867120146751404 2023-01-22 09:40:55.008206: step: 84/463, loss: 0.5921074151992798 2023-01-22 09:40:55.620099: step: 86/463, loss: 0.26304155588150024 2023-01-22 09:40:56.229026: step: 88/463, loss: 0.3195022940635681 2023-01-22 09:40:56.789487: step: 90/463, loss: 0.22182859480381012 2023-01-22 09:40:57.434911: step: 92/463, loss: 1.2784497737884521 2023-01-22 09:40:58.061007: step: 94/463, loss: 0.7152591347694397 2023-01-22 09:40:58.691133: step: 96/463, loss: 0.9055781364440918 2023-01-22 09:40:59.250871: step: 98/463, loss: 2.5438709259033203 2023-01-22 09:40:59.814613: step: 100/463, loss: 0.17646795511245728 2023-01-22 09:41:00.434140: step: 102/463, loss: 1.8708586692810059 2023-01-22 09:41:01.076718: step: 104/463, loss: 0.5604712963104248 2023-01-22 09:41:01.671744: step: 106/463, loss: 0.6748901009559631 2023-01-22 09:41:02.275944: step: 108/463, loss: 0.22819286584854126 2023-01-22 09:41:02.837564: step: 110/463, loss: 0.9221715331077576 2023-01-22 09:41:03.426686: step: 112/463, loss: 0.2510334551334381 2023-01-22 09:41:04.030036: step: 114/463, loss: 0.5729089379310608 2023-01-22 09:41:04.657763: step: 116/463, loss: 0.4011878967285156 2023-01-22 09:41:05.225275: step: 118/463, loss: 0.664446234703064 2023-01-22 09:41:05.854475: step: 120/463, loss: 0.42498496174812317 2023-01-22 09:41:06.478462: step: 122/463, loss: 0.3810936510562897 2023-01-22 09:41:07.025914: step: 124/463, loss: 0.16681428253650665 2023-01-22 09:41:07.612177: step: 126/463, loss: 0.29393795132637024 2023-01-22 09:41:08.212723: step: 128/463, loss: 3.136566638946533 2023-01-22 09:41:08.782956: step: 130/463, loss: 1.044725775718689 2023-01-22 09:41:09.391174: step: 132/463, loss: 0.5069814324378967 2023-01-22 09:41:10.058392: step: 134/463, loss: 1.2128422260284424 2023-01-22 09:41:10.628061: step: 136/463, loss: 0.7545170783996582 2023-01-22 09:41:11.189097: step: 138/463, loss: 0.6737390756607056 2023-01-22 09:41:11.770099: step: 140/463, loss: 0.6108490824699402 2023-01-22 09:41:12.394643: step: 142/463, loss: 1.2781355381011963 2023-01-22 09:41:12.992708: step: 144/463, loss: 0.5160717368125916 2023-01-22 09:41:13.634356: step: 146/463, loss: 0.4122994542121887 2023-01-22 09:41:14.266571: step: 148/463, loss: 1.1514430046081543 2023-01-22 09:41:14.866577: step: 150/463, loss: 0.3080209195613861 2023-01-22 09:41:15.469386: step: 152/463, loss: 0.19173398613929749 2023-01-22 09:41:16.076982: step: 154/463, loss: 1.4000709056854248 2023-01-22 09:41:16.710158: step: 156/463, loss: 0.42382609844207764 2023-01-22 09:41:17.318929: step: 158/463, loss: 0.659846305847168 2023-01-22 09:41:17.901917: step: 160/463, loss: 0.9543033838272095 2023-01-22 09:41:18.515811: step: 162/463, loss: 0.22856488823890686 2023-01-22 09:41:19.165876: step: 164/463, loss: 0.3018302619457245 2023-01-22 09:41:19.848420: step: 166/463, loss: 1.1354354619979858 2023-01-22 09:41:20.436966: step: 168/463, loss: 0.6017375588417053 2023-01-22 09:41:21.039500: step: 170/463, loss: 0.6742135286331177 2023-01-22 09:41:21.620025: step: 172/463, loss: 0.4315274953842163 2023-01-22 09:41:22.236509: step: 174/463, loss: 0.9956346750259399 2023-01-22 09:41:22.870916: step: 176/463, loss: 0.8927717208862305 2023-01-22 09:41:23.456840: step: 178/463, loss: 0.3627506494522095 2023-01-22 09:41:24.071676: step: 180/463, loss: 2.179762840270996 2023-01-22 09:41:24.699256: step: 182/463, loss: 0.3978305160999298 2023-01-22 09:41:25.368926: step: 184/463, loss: 0.6732479929924011 2023-01-22 09:41:25.970451: step: 186/463, loss: 1.0474364757537842 2023-01-22 09:41:26.574727: step: 188/463, loss: 0.08027077466249466 2023-01-22 09:41:27.213974: step: 190/463, loss: 1.8433949947357178 2023-01-22 09:41:27.831005: step: 192/463, loss: 0.5679114460945129 2023-01-22 09:41:28.424620: step: 194/463, loss: 0.1965644657611847 2023-01-22 09:41:29.018118: step: 196/463, loss: 0.3184294104576111 2023-01-22 09:41:29.614041: step: 198/463, loss: 11.678325653076172 2023-01-22 09:41:30.209577: step: 200/463, loss: 0.7518364191055298 2023-01-22 09:41:30.788918: step: 202/463, loss: 1.3319413661956787 2023-01-22 09:41:31.373052: step: 204/463, loss: 0.9582511186599731 2023-01-22 09:41:31.963686: step: 206/463, loss: 2.8840670585632324 2023-01-22 09:41:32.558566: step: 208/463, loss: 0.5567591190338135 2023-01-22 09:41:33.157085: step: 210/463, loss: 1.0928796529769897 2023-01-22 09:41:33.680078: step: 212/463, loss: 0.2947782874107361 2023-01-22 09:41:34.356029: step: 214/463, loss: 0.5035770535469055 2023-01-22 09:41:34.964230: step: 216/463, loss: 0.8390800952911377 2023-01-22 09:41:35.605953: step: 218/463, loss: 1.0294955968856812 2023-01-22 09:41:36.180892: step: 220/463, loss: 1.071850299835205 2023-01-22 09:41:36.845926: step: 222/463, loss: 0.6560050845146179 2023-01-22 09:41:37.415288: step: 224/463, loss: 0.23306003212928772 2023-01-22 09:41:38.003956: step: 226/463, loss: 1.802243709564209 2023-01-22 09:41:38.706511: step: 228/463, loss: 0.8357193470001221 2023-01-22 09:41:39.281934: step: 230/463, loss: 0.8067852258682251 2023-01-22 09:41:39.861247: step: 232/463, loss: 0.2559684216976166 2023-01-22 09:41:40.464730: step: 234/463, loss: 0.8133226633071899 2023-01-22 09:41:41.086530: step: 236/463, loss: 1.7267839908599854 2023-01-22 09:41:41.753175: step: 238/463, loss: 0.9141260385513306 2023-01-22 09:41:42.363768: step: 240/463, loss: 0.602196216583252 2023-01-22 09:41:42.957462: step: 242/463, loss: 2.07576584815979 2023-01-22 09:41:43.558708: step: 244/463, loss: 0.26883256435394287 2023-01-22 09:41:44.189741: step: 246/463, loss: 0.2601020932197571 2023-01-22 09:41:44.766441: step: 248/463, loss: 0.5728490352630615 2023-01-22 09:41:45.346628: step: 250/463, loss: 1.3841387033462524 2023-01-22 09:41:46.023243: step: 252/463, loss: 1.2997407913208008 2023-01-22 09:41:46.646708: step: 254/463, loss: 0.1857355237007141 2023-01-22 09:41:47.302753: step: 256/463, loss: 0.2503657341003418 2023-01-22 09:41:47.908861: step: 258/463, loss: 0.266375869512558 2023-01-22 09:41:48.561809: step: 260/463, loss: 0.636852502822876 2023-01-22 09:41:49.128079: step: 262/463, loss: 1.0548986196517944 2023-01-22 09:41:49.714334: step: 264/463, loss: 0.3508453369140625 2023-01-22 09:41:50.310883: step: 266/463, loss: 1.0740405321121216 2023-01-22 09:41:50.943746: step: 268/463, loss: 0.7337073087692261 2023-01-22 09:41:51.614084: step: 270/463, loss: 1.2571613788604736 2023-01-22 09:41:52.161409: step: 272/463, loss: 0.3398612141609192 2023-01-22 09:41:52.881294: step: 274/463, loss: 1.3762160539627075 2023-01-22 09:41:53.455693: step: 276/463, loss: 0.37680551409721375 2023-01-22 09:41:54.024130: step: 278/463, loss: 1.4244345426559448 2023-01-22 09:41:54.678633: step: 280/463, loss: 0.2548726201057434 2023-01-22 09:41:55.298501: step: 282/463, loss: 0.25318050384521484 2023-01-22 09:41:55.965797: step: 284/463, loss: 2.1174185276031494 2023-01-22 09:41:56.593900: step: 286/463, loss: 0.25583475828170776 2023-01-22 09:41:57.197402: step: 288/463, loss: 0.8417084813117981 2023-01-22 09:41:57.811580: step: 290/463, loss: 0.22802284359931946 2023-01-22 09:41:58.452798: step: 292/463, loss: 1.4744998216629028 2023-01-22 09:41:59.135931: step: 294/463, loss: 3.5566883087158203 2023-01-22 09:41:59.760897: step: 296/463, loss: 0.6578130722045898 2023-01-22 09:42:00.551033: step: 298/463, loss: 0.8024518489837646 2023-01-22 09:42:01.107461: step: 300/463, loss: 2.375673294067383 2023-01-22 09:42:01.749120: step: 302/463, loss: 3.3944952487945557 2023-01-22 09:42:02.371064: step: 304/463, loss: 0.6203200817108154 2023-01-22 09:42:03.032479: step: 306/463, loss: 1.3409645557403564 2023-01-22 09:42:03.586626: step: 308/463, loss: 0.6700774431228638 2023-01-22 09:42:04.210495: step: 310/463, loss: 0.9447431564331055 2023-01-22 09:42:04.863171: step: 312/463, loss: 0.7548693418502808 2023-01-22 09:42:05.411152: step: 314/463, loss: 1.5705002546310425 2023-01-22 09:42:06.003195: step: 316/463, loss: 0.3633936941623688 2023-01-22 09:42:06.598649: step: 318/463, loss: 0.24533593654632568 2023-01-22 09:42:07.251801: step: 320/463, loss: 0.4184809923171997 2023-01-22 09:42:07.904246: step: 322/463, loss: 0.7727669477462769 2023-01-22 09:42:08.532171: step: 324/463, loss: 0.9991454482078552 2023-01-22 09:42:09.199745: step: 326/463, loss: 0.1757662296295166 2023-01-22 09:42:09.755899: step: 328/463, loss: 0.6562673449516296 2023-01-22 09:42:10.330964: step: 330/463, loss: 0.6632398366928101 2023-01-22 09:42:10.928426: step: 332/463, loss: 1.1940343379974365 2023-01-22 09:42:11.487708: step: 334/463, loss: 1.6092807054519653 2023-01-22 09:42:12.086217: step: 336/463, loss: 0.438454270362854 2023-01-22 09:42:12.684281: step: 338/463, loss: 0.5626126527786255 2023-01-22 09:42:13.286735: step: 340/463, loss: 2.9122085571289062 2023-01-22 09:42:13.940804: step: 342/463, loss: 1.530444622039795 2023-01-22 09:42:14.588035: step: 344/463, loss: 0.8764356970787048 2023-01-22 09:42:15.204359: step: 346/463, loss: 0.8195348978042603 2023-01-22 09:42:15.819543: step: 348/463, loss: 0.29962530732154846 2023-01-22 09:42:16.465859: step: 350/463, loss: 1.9524885416030884 2023-01-22 09:42:17.079361: step: 352/463, loss: 0.6456241607666016 2023-01-22 09:42:17.713058: step: 354/463, loss: 0.656253457069397 2023-01-22 09:42:18.365523: step: 356/463, loss: 0.5095264911651611 2023-01-22 09:42:18.950451: step: 358/463, loss: 3.0658915042877197 2023-01-22 09:42:19.529967: step: 360/463, loss: 1.7187938690185547 2023-01-22 09:42:20.106835: step: 362/463, loss: 0.7578294277191162 2023-01-22 09:42:20.843109: step: 364/463, loss: 0.29678162932395935 2023-01-22 09:42:21.393021: step: 366/463, loss: 0.9792714715003967 2023-01-22 09:42:21.931603: step: 368/463, loss: 0.3181105852127075 2023-01-22 09:42:22.470122: step: 370/463, loss: 1.940513253211975 2023-01-22 09:42:23.082459: step: 372/463, loss: 0.2905747592449188 2023-01-22 09:42:23.661588: step: 374/463, loss: 0.9227666854858398 2023-01-22 09:42:24.240787: step: 376/463, loss: 0.8191882967948914 2023-01-22 09:42:24.892548: step: 378/463, loss: 0.6728701591491699 2023-01-22 09:42:25.449619: step: 380/463, loss: 0.8327441811561584 2023-01-22 09:42:26.127998: step: 382/463, loss: 0.6571238040924072 2023-01-22 09:42:26.732326: step: 384/463, loss: 1.015677571296692 2023-01-22 09:42:27.353464: step: 386/463, loss: 0.5491654872894287 2023-01-22 09:42:27.989445: step: 388/463, loss: 0.6769430637359619 2023-01-22 09:42:28.573100: step: 390/463, loss: 0.9880294799804688 2023-01-22 09:42:29.171029: step: 392/463, loss: 2.2204930782318115 2023-01-22 09:42:29.862923: step: 394/463, loss: 0.524406909942627 2023-01-22 09:42:30.440290: step: 396/463, loss: 0.38486480712890625 2023-01-22 09:42:30.979971: step: 398/463, loss: 0.9057947397232056 2023-01-22 09:42:31.603614: step: 400/463, loss: 0.5521529912948608 2023-01-22 09:42:32.252515: step: 402/463, loss: 0.40139076113700867 2023-01-22 09:42:32.850275: step: 404/463, loss: 2.3127365112304688 2023-01-22 09:42:33.477491: step: 406/463, loss: 0.1813669502735138 2023-01-22 09:42:34.105557: step: 408/463, loss: 0.41743168234825134 2023-01-22 09:42:34.745966: step: 410/463, loss: 1.1886844635009766 2023-01-22 09:42:35.314526: step: 412/463, loss: 0.5159401893615723 2023-01-22 09:42:35.908101: step: 414/463, loss: 0.2798379361629486 2023-01-22 09:42:36.476805: step: 416/463, loss: 1.701764702796936 2023-01-22 09:42:37.033403: step: 418/463, loss: 0.22832989692687988 2023-01-22 09:42:37.729984: step: 420/463, loss: 1.0826969146728516 2023-01-22 09:42:38.301808: step: 422/463, loss: 1.515113115310669 2023-01-22 09:42:38.959637: step: 424/463, loss: 0.5910017490386963 2023-01-22 09:42:39.567236: step: 426/463, loss: 0.570904016494751 2023-01-22 09:42:40.224270: step: 428/463, loss: 3.435241937637329 2023-01-22 09:42:40.808336: step: 430/463, loss: 0.38170838356018066 2023-01-22 09:42:41.526856: step: 432/463, loss: 0.8956758975982666 2023-01-22 09:42:42.204086: step: 434/463, loss: 1.1432406902313232 2023-01-22 09:42:42.783008: step: 436/463, loss: 0.6462689638137817 2023-01-22 09:42:43.454859: step: 438/463, loss: 0.33242911100387573 2023-01-22 09:42:44.074745: step: 440/463, loss: 0.4339667558670044 2023-01-22 09:42:44.724305: step: 442/463, loss: 0.5501629710197449 2023-01-22 09:42:45.335238: step: 444/463, loss: 0.9211000800132751 2023-01-22 09:42:46.076655: step: 446/463, loss: 0.8362309336662292 2023-01-22 09:42:46.691506: step: 448/463, loss: 0.2754156291484833 2023-01-22 09:42:47.362409: step: 450/463, loss: 1.3154528141021729 2023-01-22 09:42:48.017324: step: 452/463, loss: 1.008116364479065 2023-01-22 09:42:48.643442: step: 454/463, loss: 1.3285428285598755 2023-01-22 09:42:49.247239: step: 456/463, loss: 1.4497106075286865 2023-01-22 09:42:49.893831: step: 458/463, loss: 0.48949024081230164 2023-01-22 09:42:50.473865: step: 460/463, loss: 0.4763083755970001 2023-01-22 09:42:51.044133: step: 462/463, loss: 1.9459871053695679 2023-01-22 09:42:51.706447: step: 464/463, loss: 0.3341725170612335 2023-01-22 09:42:52.324592: step: 466/463, loss: 0.22736842930316925 2023-01-22 09:42:52.940411: step: 468/463, loss: 0.57209312915802 2023-01-22 09:42:53.567261: step: 470/463, loss: 0.2902805209159851 2023-01-22 09:42:54.218751: step: 472/463, loss: 0.5204944610595703 2023-01-22 09:42:54.843150: step: 474/463, loss: 1.1368167400360107 2023-01-22 09:42:55.486730: step: 476/463, loss: 0.8238862752914429 2023-01-22 09:42:56.117546: step: 478/463, loss: 2.135991096496582 2023-01-22 09:42:56.667576: step: 480/463, loss: 1.2142736911773682 2023-01-22 09:42:57.280602: step: 482/463, loss: 0.9453557729721069 2023-01-22 09:42:57.911475: step: 484/463, loss: 1.0474512577056885 2023-01-22 09:42:58.530238: step: 486/463, loss: 0.2816633880138397 2023-01-22 09:42:59.187809: step: 488/463, loss: 0.6661911606788635 2023-01-22 09:42:59.826299: step: 490/463, loss: 0.3506323993206024 2023-01-22 09:43:00.424287: step: 492/463, loss: 0.6111321449279785 2023-01-22 09:43:01.026455: step: 494/463, loss: 0.2844662368297577 2023-01-22 09:43:01.671762: step: 496/463, loss: 0.8982583284378052 2023-01-22 09:43:02.289684: step: 498/463, loss: 0.12321044504642487 2023-01-22 09:43:02.984229: step: 500/463, loss: 0.2831031382083893 2023-01-22 09:43:03.633698: step: 502/463, loss: 0.44888120889663696 2023-01-22 09:43:04.237533: step: 504/463, loss: 0.4643210768699646 2023-01-22 09:43:04.833029: step: 506/463, loss: 0.7459716200828552 2023-01-22 09:43:05.451644: step: 508/463, loss: 1.4252722263336182 2023-01-22 09:43:06.037515: step: 510/463, loss: 0.723647952079773 2023-01-22 09:43:06.587372: step: 512/463, loss: 0.33296412229537964 2023-01-22 09:43:07.264704: step: 514/463, loss: 5.2945170402526855 2023-01-22 09:43:08.008209: step: 516/463, loss: 0.49359017610549927 2023-01-22 09:43:08.631271: step: 518/463, loss: 0.6195040941238403 2023-01-22 09:43:09.200555: step: 520/463, loss: 0.34351181983947754 2023-01-22 09:43:09.735869: step: 522/463, loss: 0.32653310894966125 2023-01-22 09:43:10.333279: step: 524/463, loss: 0.5711261034011841 2023-01-22 09:43:10.893023: step: 526/463, loss: 3.975227117538452 2023-01-22 09:43:11.420416: step: 528/463, loss: 0.2498871386051178 2023-01-22 09:43:12.071562: step: 530/463, loss: 0.8269243240356445 2023-01-22 09:43:12.662422: step: 532/463, loss: 0.9738156199455261 2023-01-22 09:43:13.264508: step: 534/463, loss: 0.4229276478290558 2023-01-22 09:43:13.857128: step: 536/463, loss: 1.4646267890930176 2023-01-22 09:43:14.483892: step: 538/463, loss: 1.0056730508804321 2023-01-22 09:43:15.158400: step: 540/463, loss: 0.16793085634708405 2023-01-22 09:43:15.750567: step: 542/463, loss: 0.19599169492721558 2023-01-22 09:43:16.387064: step: 544/463, loss: 0.5333278179168701 2023-01-22 09:43:16.995791: step: 546/463, loss: 0.5272350907325745 2023-01-22 09:43:17.611274: step: 548/463, loss: 0.17491260170936584 2023-01-22 09:43:18.206785: step: 550/463, loss: 0.2488846480846405 2023-01-22 09:43:18.787709: step: 552/463, loss: 3.257854461669922 2023-01-22 09:43:19.479134: step: 554/463, loss: 0.38660240173339844 2023-01-22 09:43:20.052504: step: 556/463, loss: 0.17643235623836517 2023-01-22 09:43:20.656118: step: 558/463, loss: 0.3294633626937866 2023-01-22 09:43:21.283904: step: 560/463, loss: 0.8908080458641052 2023-01-22 09:43:21.924939: step: 562/463, loss: 0.5673060417175293 2023-01-22 09:43:22.507298: step: 564/463, loss: 2.551823854446411 2023-01-22 09:43:23.141095: step: 566/463, loss: 0.20586590468883514 2023-01-22 09:43:23.738099: step: 568/463, loss: 0.8672521710395813 2023-01-22 09:43:24.374423: step: 570/463, loss: 0.5010704398155212 2023-01-22 09:43:24.990280: step: 572/463, loss: 0.48847177624702454 2023-01-22 09:43:25.743701: step: 574/463, loss: 0.9841750264167786 2023-01-22 09:43:26.347479: step: 576/463, loss: 0.10429425537586212 2023-01-22 09:43:26.890543: step: 578/463, loss: 0.6549193859100342 2023-01-22 09:43:27.549751: step: 580/463, loss: 0.3207032084465027 2023-01-22 09:43:28.143033: step: 582/463, loss: 1.3616400957107544 2023-01-22 09:43:28.783698: step: 584/463, loss: 0.2568977177143097 2023-01-22 09:43:29.456604: step: 586/463, loss: 1.2974032163619995 2023-01-22 09:43:30.060319: step: 588/463, loss: 1.5133144855499268 2023-01-22 09:43:30.717483: step: 590/463, loss: 0.876836895942688 2023-01-22 09:43:31.318208: step: 592/463, loss: 5.248604774475098 2023-01-22 09:43:31.902545: step: 594/463, loss: 0.7509450912475586 2023-01-22 09:43:32.535765: step: 596/463, loss: 1.0288889408111572 2023-01-22 09:43:33.251926: step: 598/463, loss: 0.23910602927207947 2023-01-22 09:43:33.879471: step: 600/463, loss: 1.0580503940582275 2023-01-22 09:43:34.588582: step: 602/463, loss: 0.4943075478076935 2023-01-22 09:43:35.181912: step: 604/463, loss: 1.9560822248458862 2023-01-22 09:43:35.799150: step: 606/463, loss: 0.35893234610557556 2023-01-22 09:43:36.364904: step: 608/463, loss: 0.8699542880058289 2023-01-22 09:43:36.955345: step: 610/463, loss: 0.3269708454608917 2023-01-22 09:43:37.589800: step: 612/463, loss: 0.17714658379554749 2023-01-22 09:43:38.208630: step: 614/463, loss: 1.839385747909546 2023-01-22 09:43:38.746887: step: 616/463, loss: 0.6757277846336365 2023-01-22 09:43:39.349869: step: 618/463, loss: 0.20764169096946716 2023-01-22 09:43:39.972883: step: 620/463, loss: 0.8209818601608276 2023-01-22 09:43:40.581645: step: 622/463, loss: 2.5471646785736084 2023-01-22 09:43:41.192464: step: 624/463, loss: 5.142049312591553 2023-01-22 09:43:41.805337: step: 626/463, loss: 1.1702842712402344 2023-01-22 09:43:42.353056: step: 628/463, loss: 0.18336109817028046 2023-01-22 09:43:42.978816: step: 630/463, loss: 1.1800868511199951 2023-01-22 09:43:43.552831: step: 632/463, loss: 0.2515396177768707 2023-01-22 09:43:44.226210: step: 634/463, loss: 0.32376086711883545 2023-01-22 09:43:44.841284: step: 636/463, loss: 0.5929350852966309 2023-01-22 09:43:45.516044: step: 638/463, loss: 0.7307126522064209 2023-01-22 09:43:46.284614: step: 640/463, loss: 1.8886864185333252 2023-01-22 09:43:46.800964: step: 642/463, loss: 1.7407853603363037 2023-01-22 09:43:47.417967: step: 644/463, loss: 1.293758511543274 2023-01-22 09:43:48.023076: step: 646/463, loss: 0.3747621774673462 2023-01-22 09:43:48.660730: step: 648/463, loss: 0.81679767370224 2023-01-22 09:43:49.289546: step: 650/463, loss: 0.8737527132034302 2023-01-22 09:43:49.891507: step: 652/463, loss: 1.2232550382614136 2023-01-22 09:43:50.544112: step: 654/463, loss: 4.435805320739746 2023-01-22 09:43:51.124145: step: 656/463, loss: 0.246099054813385 2023-01-22 09:43:51.746804: step: 658/463, loss: 0.35442981123924255 2023-01-22 09:43:52.313805: step: 660/463, loss: 0.6668121814727783 2023-01-22 09:43:52.963033: step: 662/463, loss: 2.919111728668213 2023-01-22 09:43:53.635425: step: 664/463, loss: 1.1155575513839722 2023-01-22 09:43:54.220536: step: 666/463, loss: 0.32291388511657715 2023-01-22 09:43:54.875573: step: 668/463, loss: 0.8881826996803284 2023-01-22 09:43:55.514304: step: 670/463, loss: 2.108267307281494 2023-01-22 09:43:56.197407: step: 672/463, loss: 2.5875234603881836 2023-01-22 09:43:56.774705: step: 674/463, loss: 0.4154404401779175 2023-01-22 09:43:57.362026: step: 676/463, loss: 0.5452799797058105 2023-01-22 09:43:57.966201: step: 678/463, loss: 0.33030974864959717 2023-01-22 09:43:58.569332: step: 680/463, loss: 0.7643932104110718 2023-01-22 09:43:59.125154: step: 682/463, loss: 1.0512075424194336 2023-01-22 09:43:59.753221: step: 684/463, loss: 0.29171621799468994 2023-01-22 09:44:00.328063: step: 686/463, loss: 0.17222242057323456 2023-01-22 09:44:00.889935: step: 688/463, loss: 0.7244187593460083 2023-01-22 09:44:01.505734: step: 690/463, loss: 0.9839054346084595 2023-01-22 09:44:02.137258: step: 692/463, loss: 0.3827417194843292 2023-01-22 09:44:02.774206: step: 694/463, loss: 0.8360433578491211 2023-01-22 09:44:03.440684: step: 696/463, loss: 1.2717273235321045 2023-01-22 09:44:04.070646: step: 698/463, loss: 0.11977909505367279 2023-01-22 09:44:04.729159: step: 700/463, loss: 1.086526870727539 2023-01-22 09:44:05.335161: step: 702/463, loss: 0.8046153783798218 2023-01-22 09:44:05.935463: step: 704/463, loss: 0.2607859969139099 2023-01-22 09:44:06.550925: step: 706/463, loss: 0.2078489065170288 2023-01-22 09:44:07.147176: step: 708/463, loss: 0.5821657776832581 2023-01-22 09:44:07.731004: step: 710/463, loss: 0.7629448771476746 2023-01-22 09:44:08.331604: step: 712/463, loss: 0.5218830108642578 2023-01-22 09:44:08.876492: step: 714/463, loss: 0.13013869524002075 2023-01-22 09:44:09.499069: step: 716/463, loss: 0.41951417922973633 2023-01-22 09:44:10.218471: step: 718/463, loss: 0.945431113243103 2023-01-22 09:44:10.775083: step: 720/463, loss: 0.8925947546958923 2023-01-22 09:44:11.420261: step: 722/463, loss: 1.3860774040222168 2023-01-22 09:44:12.003646: step: 724/463, loss: 0.6957374811172485 2023-01-22 09:44:12.694637: step: 726/463, loss: 0.3541806638240814 2023-01-22 09:44:13.292518: step: 728/463, loss: 0.7336874008178711 2023-01-22 09:44:13.927699: step: 730/463, loss: 3.7046642303466797 2023-01-22 09:44:14.544622: step: 732/463, loss: 0.5892671346664429 2023-01-22 09:44:15.231445: step: 734/463, loss: 3.310222625732422 2023-01-22 09:44:15.898023: step: 736/463, loss: 0.40570223331451416 2023-01-22 09:44:16.571577: step: 738/463, loss: 1.0854921340942383 2023-01-22 09:44:17.181732: step: 740/463, loss: 1.2302539348602295 2023-01-22 09:44:17.725794: step: 742/463, loss: 0.29824066162109375 2023-01-22 09:44:18.327212: step: 744/463, loss: 0.21834328770637512 2023-01-22 09:44:18.859766: step: 746/463, loss: 0.9164169430732727 2023-01-22 09:44:19.460165: step: 748/463, loss: 1.6107051372528076 2023-01-22 09:44:20.068403: step: 750/463, loss: 0.39505648612976074 2023-01-22 09:44:20.741392: step: 752/463, loss: 0.7051186561584473 2023-01-22 09:44:21.407120: step: 754/463, loss: 0.5606783628463745 2023-01-22 09:44:22.017424: step: 756/463, loss: 0.6574631929397583 2023-01-22 09:44:22.590950: step: 758/463, loss: 0.6522522568702698 2023-01-22 09:44:23.210996: step: 760/463, loss: 1.08747398853302 2023-01-22 09:44:23.790837: step: 762/463, loss: 0.8959791660308838 2023-01-22 09:44:24.525217: step: 764/463, loss: 12.886826515197754 2023-01-22 09:44:25.063671: step: 766/463, loss: 0.49474939703941345 2023-01-22 09:44:25.707077: step: 768/463, loss: 0.22615544497966766 2023-01-22 09:44:26.317356: step: 770/463, loss: 0.8572375178337097 2023-01-22 09:44:26.927407: step: 772/463, loss: 1.1405339241027832 2023-01-22 09:44:27.526299: step: 774/463, loss: 0.5088615417480469 2023-01-22 09:44:28.220026: step: 776/463, loss: 0.8074105381965637 2023-01-22 09:44:28.852667: step: 778/463, loss: 1.1362767219543457 2023-01-22 09:44:29.482979: step: 780/463, loss: 0.24066327512264252 2023-01-22 09:44:30.179815: step: 782/463, loss: 0.6467727422714233 2023-01-22 09:44:30.740378: step: 784/463, loss: 0.3192857503890991 2023-01-22 09:44:31.332192: step: 786/463, loss: 2.1987528800964355 2023-01-22 09:44:31.950042: step: 788/463, loss: 1.2378207445144653 2023-01-22 09:44:32.584188: step: 790/463, loss: 0.57787024974823 2023-01-22 09:44:33.203629: step: 792/463, loss: 0.7829220294952393 2023-01-22 09:44:33.787224: step: 794/463, loss: 1.5045771598815918 2023-01-22 09:44:34.385186: step: 796/463, loss: 0.39104029536247253 2023-01-22 09:44:35.064478: step: 798/463, loss: 0.804844856262207 2023-01-22 09:44:35.654285: step: 800/463, loss: 4.929046630859375 2023-01-22 09:44:36.236656: step: 802/463, loss: 0.38950616121292114 2023-01-22 09:44:36.897540: step: 804/463, loss: 0.6650965809822083 2023-01-22 09:44:37.489159: step: 806/463, loss: 0.6405189037322998 2023-01-22 09:44:38.133047: step: 808/463, loss: 0.864395022392273 2023-01-22 09:44:38.710589: step: 810/463, loss: 0.7275404930114746 2023-01-22 09:44:39.314927: step: 812/463, loss: 1.927489161491394 2023-01-22 09:44:39.948676: step: 814/463, loss: 0.6312630772590637 2023-01-22 09:44:40.571028: step: 816/463, loss: 1.1469464302062988 2023-01-22 09:44:41.185777: step: 818/463, loss: 1.0295991897583008 2023-01-22 09:44:41.759956: step: 820/463, loss: 0.5181655287742615 2023-01-22 09:44:42.365001: step: 822/463, loss: 0.2782285511493683 2023-01-22 09:44:42.976310: step: 824/463, loss: 0.7657139897346497 2023-01-22 09:44:43.586155: step: 826/463, loss: 0.3979091942310333 2023-01-22 09:44:44.174912: step: 828/463, loss: 1.0972727537155151 2023-01-22 09:44:44.797376: step: 830/463, loss: 0.4853355586528778 2023-01-22 09:44:45.460014: step: 832/463, loss: 0.58555668592453 2023-01-22 09:44:46.036765: step: 834/463, loss: 0.7757879495620728 2023-01-22 09:44:46.739613: step: 836/463, loss: 0.9302806854248047 2023-01-22 09:44:47.358743: step: 838/463, loss: 0.287883460521698 2023-01-22 09:44:47.923562: step: 840/463, loss: 0.9032585620880127 2023-01-22 09:44:48.534713: step: 842/463, loss: 0.5623165369033813 2023-01-22 09:44:49.153319: step: 844/463, loss: 0.3701925277709961 2023-01-22 09:44:49.821950: step: 846/463, loss: 3.4421286582946777 2023-01-22 09:44:50.468454: step: 848/463, loss: 0.2465246617794037 2023-01-22 09:44:51.059708: step: 850/463, loss: 1.4481582641601562 2023-01-22 09:44:51.661263: step: 852/463, loss: 2.952502727508545 2023-01-22 09:44:52.311048: step: 854/463, loss: 0.8330993056297302 2023-01-22 09:44:52.927709: step: 856/463, loss: 0.980499804019928 2023-01-22 09:44:53.567875: step: 858/463, loss: 0.5031222701072693 2023-01-22 09:44:54.217343: step: 860/463, loss: 0.5115187168121338 2023-01-22 09:44:54.844481: step: 862/463, loss: 0.30255091190338135 2023-01-22 09:44:55.387166: step: 864/463, loss: 0.6826913356781006 2023-01-22 09:44:56.070019: step: 866/463, loss: 2.0543980598449707 2023-01-22 09:44:56.609202: step: 868/463, loss: 1.2051109075546265 2023-01-22 09:44:57.229282: step: 870/463, loss: 0.3185446858406067 2023-01-22 09:44:57.803683: step: 872/463, loss: 0.26583802700042725 2023-01-22 09:44:58.436166: step: 874/463, loss: 0.5186485648155212 2023-01-22 09:44:59.049455: step: 876/463, loss: 0.17223262786865234 2023-01-22 09:44:59.673450: step: 878/463, loss: 0.23885127902030945 2023-01-22 09:45:00.337726: step: 880/463, loss: 0.5206378102302551 2023-01-22 09:45:00.937226: step: 882/463, loss: 0.25288882851600647 2023-01-22 09:45:01.596261: step: 884/463, loss: 1.3440959453582764 2023-01-22 09:45:02.266447: step: 886/463, loss: 1.2510030269622803 2023-01-22 09:45:02.861698: step: 888/463, loss: 0.9494075775146484 2023-01-22 09:45:03.592652: step: 890/463, loss: 2.1396734714508057 2023-01-22 09:45:04.165607: step: 892/463, loss: 0.894913375377655 2023-01-22 09:45:04.757905: step: 894/463, loss: 0.48696890473365784 2023-01-22 09:45:05.342896: step: 896/463, loss: 0.31168943643569946 2023-01-22 09:45:05.928637: step: 898/463, loss: 0.4771164059638977 2023-01-22 09:45:06.654929: step: 900/463, loss: 0.41636332869529724 2023-01-22 09:45:07.332904: step: 902/463, loss: 0.5606483221054077 2023-01-22 09:45:07.963428: step: 904/463, loss: 0.4186766743659973 2023-01-22 09:45:08.590364: step: 906/463, loss: 0.8164801001548767 2023-01-22 09:45:09.205514: step: 908/463, loss: 0.7248661518096924 2023-01-22 09:45:09.854717: step: 910/463, loss: 1.102360725402832 2023-01-22 09:45:10.464458: step: 912/463, loss: 0.5251678228378296 2023-01-22 09:45:11.135872: step: 914/463, loss: 0.6367273330688477 2023-01-22 09:45:11.805074: step: 916/463, loss: 0.1624290943145752 2023-01-22 09:45:12.408358: step: 918/463, loss: 1.067218542098999 2023-01-22 09:45:13.018388: step: 920/463, loss: 0.3017536699771881 2023-01-22 09:45:13.642911: step: 922/463, loss: 0.7523595690727234 2023-01-22 09:45:14.239951: step: 924/463, loss: 0.6766957640647888 2023-01-22 09:45:14.838817: step: 926/463, loss: 0.9524654150009155 ================================================== Loss: 0.932 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.312208299512987, 'r': 0.3074778707324872, 'f1': 0.3098250300510558}, 'combined': 0.22829212740604113, 'epoch': 5} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32315563723618757, 'r': 0.3688964167714568, 'f1': 0.34451442184321457}, 'combined': 0.26703988678756346, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2767740756144682, 'r': 0.3171369616415781, 'f1': 0.2955839642484612}, 'combined': 0.21779871049886612, 'epoch': 5} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.30611852880336826, 'r': 0.37673962322399823, 'f1': 0.3377772641678699}, 'combined': 0.26181778370906666, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28012957534441907, 'r': 0.3140846753861668, 'f1': 0.29613697964981445}, 'combined': 0.21820619553144222, 'epoch': 5} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3064842101133353, 'r': 0.3585977936344447, 'f1': 0.3304992795207758}, 'combined': 0.2561764750352425, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25943396226415094, 'r': 0.39285714285714285, 'f1': 0.31249999999999994}, 'combined': 0.2083333333333333, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28448275862068967, 'r': 0.358695652173913, 'f1': 0.3173076923076923}, 'combined': 0.15865384615384615, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.23275862068965517, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 5} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.312208299512987, 'r': 0.3074778707324872, 'f1': 0.3098250300510558}, 'combined': 0.22829212740604113, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32315563723618757, 'r': 0.3688964167714568, 'f1': 0.34451442184321457}, 'combined': 0.26703988678756346, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25943396226415094, 'r': 0.39285714285714285, 'f1': 0.31249999999999994}, 'combined': 0.2083333333333333, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2767740756144682, 'r': 0.3171369616415781, 'f1': 0.2955839642484612}, 'combined': 0.21779871049886612, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.30611852880336826, 'r': 0.37673962322399823, 'f1': 0.3377772641678699}, 'combined': 0.26181778370906666, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28448275862068967, 'r': 0.358695652173913, 'f1': 0.3173076923076923}, 'combined': 0.15865384615384615, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28012957534441907, 'r': 0.3140846753861668, 'f1': 0.29613697964981445}, 'combined': 0.21820619553144222, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3064842101133353, 'r': 0.3585977936344447, 'f1': 0.3304992795207758}, 'combined': 0.2561764750352425, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.23275862068965517, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 5} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:48:11.787962: step: 2/463, loss: 0.7707034349441528 2023-01-22 09:48:12.467794: step: 4/463, loss: 2.139986038208008 2023-01-22 09:48:13.121801: step: 6/463, loss: 1.0813167095184326 2023-01-22 09:48:13.715463: step: 8/463, loss: 0.9199732542037964 2023-01-22 09:48:14.313571: step: 10/463, loss: 0.2954943776130676 2023-01-22 09:48:14.972126: step: 12/463, loss: 0.7491180300712585 2023-01-22 09:48:15.575177: step: 14/463, loss: 2.387415885925293 2023-01-22 09:48:16.190969: step: 16/463, loss: 0.7784934043884277 2023-01-22 09:48:16.818384: step: 18/463, loss: 1.0621168613433838 2023-01-22 09:48:17.470460: step: 20/463, loss: 0.43312177062034607 2023-01-22 09:48:18.113057: step: 22/463, loss: 0.3385125994682312 2023-01-22 09:48:18.721577: step: 24/463, loss: 0.26764538884162903 2023-01-22 09:48:19.332479: step: 26/463, loss: 0.5014766454696655 2023-01-22 09:48:19.882584: step: 28/463, loss: 0.7479044795036316 2023-01-22 09:48:20.527597: step: 30/463, loss: 1.2933988571166992 2023-01-22 09:48:21.149693: step: 32/463, loss: 0.826672375202179 2023-01-22 09:48:21.888776: step: 34/463, loss: 0.3321710526943207 2023-01-22 09:48:22.524542: step: 36/463, loss: 0.8432543873786926 2023-01-22 09:48:23.230319: step: 38/463, loss: 4.003955841064453 2023-01-22 09:48:23.871114: step: 40/463, loss: 0.5457615852355957 2023-01-22 09:48:24.493638: step: 42/463, loss: 0.48903197050094604 2023-01-22 09:48:25.113272: step: 44/463, loss: 0.411626935005188 2023-01-22 09:48:25.734857: step: 46/463, loss: 0.5120250582695007 2023-01-22 09:48:26.331957: step: 48/463, loss: 0.8634063601493835 2023-01-22 09:48:26.927473: step: 50/463, loss: 0.5466212034225464 2023-01-22 09:48:27.537284: step: 52/463, loss: 0.3239036202430725 2023-01-22 09:48:28.135460: step: 54/463, loss: 0.13446322083473206 2023-01-22 09:48:28.756330: step: 56/463, loss: 1.3016963005065918 2023-01-22 09:48:29.373624: step: 58/463, loss: 0.45938900113105774 2023-01-22 09:48:29.956372: step: 60/463, loss: 0.2309417873620987 2023-01-22 09:48:30.539583: step: 62/463, loss: 0.5879989862442017 2023-01-22 09:48:31.167750: step: 64/463, loss: 0.3607172667980194 2023-01-22 09:48:31.763562: step: 66/463, loss: 0.15693148970603943 2023-01-22 09:48:32.372952: step: 68/463, loss: 0.43909192085266113 2023-01-22 09:48:33.030851: step: 70/463, loss: 0.7162376642227173 2023-01-22 09:48:33.617218: step: 72/463, loss: 0.7164316773414612 2023-01-22 09:48:34.130845: step: 74/463, loss: 0.6055495738983154 2023-01-22 09:48:34.719956: step: 76/463, loss: 0.40779149532318115 2023-01-22 09:48:35.363173: step: 78/463, loss: 0.3160075843334198 2023-01-22 09:48:36.026212: step: 80/463, loss: 0.3557608127593994 2023-01-22 09:48:36.656911: step: 82/463, loss: 0.417134165763855 2023-01-22 09:48:37.225492: step: 84/463, loss: 0.7544646859169006 2023-01-22 09:48:37.836171: step: 86/463, loss: 0.14090657234191895 2023-01-22 09:48:38.506085: step: 88/463, loss: 0.38752931356430054 2023-01-22 09:48:39.131638: step: 90/463, loss: 0.31950053572654724 2023-01-22 09:48:39.775597: step: 92/463, loss: 0.3570743501186371 2023-01-22 09:48:40.320646: step: 94/463, loss: 0.43369346857070923 2023-01-22 09:48:40.966872: step: 96/463, loss: 0.31948956847190857 2023-01-22 09:48:41.622256: step: 98/463, loss: 0.9211097359657288 2023-01-22 09:48:42.238614: step: 100/463, loss: 0.4100872874259949 2023-01-22 09:48:42.831886: step: 102/463, loss: 0.507490336894989 2023-01-22 09:48:43.509731: step: 104/463, loss: 0.6800287961959839 2023-01-22 09:48:44.141485: step: 106/463, loss: 0.3257635533809662 2023-01-22 09:48:44.706964: step: 108/463, loss: 0.24174264073371887 2023-01-22 09:48:45.386836: step: 110/463, loss: 0.5335452556610107 2023-01-22 09:48:46.015784: step: 112/463, loss: 0.7120741605758667 2023-01-22 09:48:46.621281: step: 114/463, loss: 0.2345055639743805 2023-01-22 09:48:47.211538: step: 116/463, loss: 0.7252466678619385 2023-01-22 09:48:47.853950: step: 118/463, loss: 2.573593854904175 2023-01-22 09:48:48.500903: step: 120/463, loss: 0.5916131734848022 2023-01-22 09:48:49.094244: step: 122/463, loss: 0.7222192883491516 2023-01-22 09:48:49.689678: step: 124/463, loss: 0.743853747844696 2023-01-22 09:48:50.269336: step: 126/463, loss: 0.4970948100090027 2023-01-22 09:48:50.903471: step: 128/463, loss: 0.9570875763893127 2023-01-22 09:48:51.510238: step: 130/463, loss: 1.0289407968521118 2023-01-22 09:48:52.078790: step: 132/463, loss: 0.39893364906311035 2023-01-22 09:48:52.717768: step: 134/463, loss: 0.10169912129640579 2023-01-22 09:48:53.324198: step: 136/463, loss: 0.3106191158294678 2023-01-22 09:48:53.946420: step: 138/463, loss: 1.3883326053619385 2023-01-22 09:48:54.595683: step: 140/463, loss: 0.2029099315404892 2023-01-22 09:48:55.178428: step: 142/463, loss: 0.23208893835544586 2023-01-22 09:48:55.791570: step: 144/463, loss: 0.48671191930770874 2023-01-22 09:48:56.353239: step: 146/463, loss: 1.0086190700531006 2023-01-22 09:48:56.939966: step: 148/463, loss: 0.186837300658226 2023-01-22 09:48:57.504066: step: 150/463, loss: 0.40221652388572693 2023-01-22 09:48:58.187473: step: 152/463, loss: 0.41822606325149536 2023-01-22 09:48:58.845340: step: 154/463, loss: 0.2611430883407593 2023-01-22 09:48:59.425222: step: 156/463, loss: 0.208994060754776 2023-01-22 09:49:00.044685: step: 158/463, loss: 0.600904107093811 2023-01-22 09:49:00.650761: step: 160/463, loss: 0.3257748782634735 2023-01-22 09:49:01.250853: step: 162/463, loss: 0.6409143805503845 2023-01-22 09:49:01.810537: step: 164/463, loss: 0.2803063690662384 2023-01-22 09:49:02.423846: step: 166/463, loss: 2.9948930740356445 2023-01-22 09:49:03.038314: step: 168/463, loss: 0.20377136766910553 2023-01-22 09:49:03.676811: step: 170/463, loss: 0.47860196232795715 2023-01-22 09:49:04.279318: step: 172/463, loss: 0.3640652596950531 2023-01-22 09:49:04.872160: step: 174/463, loss: 0.7649862766265869 2023-01-22 09:49:05.476607: step: 176/463, loss: 0.5883657932281494 2023-01-22 09:49:06.111029: step: 178/463, loss: 0.42379188537597656 2023-01-22 09:49:06.763406: step: 180/463, loss: 0.45746615529060364 2023-01-22 09:49:07.380359: step: 182/463, loss: 0.2927231192588806 2023-01-22 09:49:08.048726: step: 184/463, loss: 1.417479157447815 2023-01-22 09:49:08.660745: step: 186/463, loss: 1.0829448699951172 2023-01-22 09:49:09.234018: step: 188/463, loss: 0.19566978514194489 2023-01-22 09:49:09.946015: step: 190/463, loss: 0.2037958949804306 2023-01-22 09:49:10.536537: step: 192/463, loss: 0.6271510124206543 2023-01-22 09:49:11.156853: step: 194/463, loss: 0.6505361795425415 2023-01-22 09:49:11.760278: step: 196/463, loss: 1.0390840768814087 2023-01-22 09:49:12.391078: step: 198/463, loss: 0.6060810685157776 2023-01-22 09:49:12.980892: step: 200/463, loss: 0.3452938199043274 2023-01-22 09:49:13.569669: step: 202/463, loss: 0.9558874368667603 2023-01-22 09:49:14.303169: step: 204/463, loss: 1.3529548645019531 2023-01-22 09:49:14.908755: step: 206/463, loss: 0.8873156905174255 2023-01-22 09:49:15.558656: step: 208/463, loss: 0.41194331645965576 2023-01-22 09:49:16.225377: step: 210/463, loss: 1.1888885498046875 2023-01-22 09:49:16.873650: step: 212/463, loss: 1.880638837814331 2023-01-22 09:49:17.502726: step: 214/463, loss: 1.268115758895874 2023-01-22 09:49:18.133187: step: 216/463, loss: 0.25972822308540344 2023-01-22 09:49:18.712514: step: 218/463, loss: 0.7622128129005432 2023-01-22 09:49:19.280595: step: 220/463, loss: 0.31395792961120605 2023-01-22 09:49:19.913112: step: 222/463, loss: 0.41082993149757385 2023-01-22 09:49:20.564468: step: 224/463, loss: 0.9554826021194458 2023-01-22 09:49:21.314353: step: 226/463, loss: 0.6175981163978577 2023-01-22 09:49:21.894468: step: 228/463, loss: 0.8517409563064575 2023-01-22 09:49:22.548614: step: 230/463, loss: 0.2539173662662506 2023-01-22 09:49:23.192259: step: 232/463, loss: 0.6553291082382202 2023-01-22 09:49:23.754998: step: 234/463, loss: 0.2384515255689621 2023-01-22 09:49:24.376529: step: 236/463, loss: 0.43330925703048706 2023-01-22 09:49:25.050185: step: 238/463, loss: 0.2994055151939392 2023-01-22 09:49:25.651362: step: 240/463, loss: 0.2976920008659363 2023-01-22 09:49:26.330987: step: 242/463, loss: 0.8106310367584229 2023-01-22 09:49:27.006039: step: 244/463, loss: 0.3887229263782501 2023-01-22 09:49:27.658410: step: 246/463, loss: 0.5251857042312622 2023-01-22 09:49:28.300792: step: 248/463, loss: 1.9007600545883179 2023-01-22 09:49:28.935478: step: 250/463, loss: 0.23919978737831116 2023-01-22 09:49:29.524587: step: 252/463, loss: 0.6328753232955933 2023-01-22 09:49:30.186687: step: 254/463, loss: 0.7381503582000732 2023-01-22 09:49:30.778677: step: 256/463, loss: 0.20988327264785767 2023-01-22 09:49:31.388472: step: 258/463, loss: 0.9936560392379761 2023-01-22 09:49:32.033526: step: 260/463, loss: 1.5107687711715698 2023-01-22 09:49:32.698415: step: 262/463, loss: 0.4509623646736145 2023-01-22 09:49:33.257839: step: 264/463, loss: 0.272694855928421 2023-01-22 09:49:33.872013: step: 266/463, loss: 0.2572956085205078 2023-01-22 09:49:34.469299: step: 268/463, loss: 0.7112085819244385 2023-01-22 09:49:35.044571: step: 270/463, loss: 0.3555031418800354 2023-01-22 09:49:35.675083: step: 272/463, loss: 0.17245355248451233 2023-01-22 09:49:36.346438: step: 274/463, loss: 0.35611552000045776 2023-01-22 09:49:37.003753: step: 276/463, loss: 0.20791593194007874 2023-01-22 09:49:37.589778: step: 278/463, loss: 0.21697591245174408 2023-01-22 09:49:38.308816: step: 280/463, loss: 0.5917024612426758 2023-01-22 09:49:38.867687: step: 282/463, loss: 0.24465574324131012 2023-01-22 09:49:39.591119: step: 284/463, loss: 0.31289833784103394 2023-01-22 09:49:40.203322: step: 286/463, loss: 3.5686933994293213 2023-01-22 09:49:40.821975: step: 288/463, loss: 0.5982808470726013 2023-01-22 09:49:41.472606: step: 290/463, loss: 0.10122385621070862 2023-01-22 09:49:42.058121: step: 292/463, loss: 0.30538395047187805 2023-01-22 09:49:42.690934: step: 294/463, loss: 1.1313884258270264 2023-01-22 09:49:43.309990: step: 296/463, loss: 0.9204930067062378 2023-01-22 09:49:43.996940: step: 298/463, loss: 0.2620668113231659 2023-01-22 09:49:44.563664: step: 300/463, loss: 0.19864974915981293 2023-01-22 09:49:45.201936: step: 302/463, loss: 0.4536626935005188 2023-01-22 09:49:45.759248: step: 304/463, loss: 0.29439884424209595 2023-01-22 09:49:46.325281: step: 306/463, loss: 0.45344313979148865 2023-01-22 09:49:46.947905: step: 308/463, loss: 0.9328432679176331 2023-01-22 09:49:47.553780: step: 310/463, loss: 0.3301640450954437 2023-01-22 09:49:48.209572: step: 312/463, loss: 0.6652065515518188 2023-01-22 09:49:48.871958: step: 314/463, loss: 1.7454242706298828 2023-01-22 09:49:49.527710: step: 316/463, loss: 1.013277530670166 2023-01-22 09:49:50.152637: step: 318/463, loss: 0.9130158424377441 2023-01-22 09:49:50.818514: step: 320/463, loss: 0.2867955267429352 2023-01-22 09:49:51.397868: step: 322/463, loss: 0.800923764705658 2023-01-22 09:49:52.019084: step: 324/463, loss: 0.8890997171401978 2023-01-22 09:49:52.686847: step: 326/463, loss: 0.09248176217079163 2023-01-22 09:49:53.266293: step: 328/463, loss: 0.7782219052314758 2023-01-22 09:49:53.883695: step: 330/463, loss: 1.5487060546875 2023-01-22 09:49:54.514948: step: 332/463, loss: 0.23208898305892944 2023-01-22 09:49:55.127772: step: 334/463, loss: 1.7774015665054321 2023-01-22 09:49:55.780527: step: 336/463, loss: 0.23104318976402283 2023-01-22 09:49:56.330089: step: 338/463, loss: 1.6652344465255737 2023-01-22 09:49:56.941195: step: 340/463, loss: 0.942787766456604 2023-01-22 09:49:57.577945: step: 342/463, loss: 0.30264759063720703 2023-01-22 09:49:58.211519: step: 344/463, loss: 0.3393298089504242 2023-01-22 09:49:58.860965: step: 346/463, loss: 0.26077795028686523 2023-01-22 09:49:59.468257: step: 348/463, loss: 0.3874621093273163 2023-01-22 09:50:00.056395: step: 350/463, loss: 0.41772162914276123 2023-01-22 09:50:00.735705: step: 352/463, loss: 0.5013745427131653 2023-01-22 09:50:01.368856: step: 354/463, loss: 0.23540829122066498 2023-01-22 09:50:01.918060: step: 356/463, loss: 0.4870157837867737 2023-01-22 09:50:02.601023: step: 358/463, loss: 0.5283874869346619 2023-01-22 09:50:03.264979: step: 360/463, loss: 2.800855875015259 2023-01-22 09:50:03.860976: step: 362/463, loss: 0.3269934356212616 2023-01-22 09:50:04.416884: step: 364/463, loss: 0.31342813372612 2023-01-22 09:50:05.107957: step: 366/463, loss: 0.6667982339859009 2023-01-22 09:50:05.716833: step: 368/463, loss: 0.44992002844810486 2023-01-22 09:50:06.362099: step: 370/463, loss: 0.5229851007461548 2023-01-22 09:50:07.064033: step: 372/463, loss: 0.959665060043335 2023-01-22 09:50:07.668256: step: 374/463, loss: 0.7876143455505371 2023-01-22 09:50:08.242336: step: 376/463, loss: 1.4744727611541748 2023-01-22 09:50:08.902137: step: 378/463, loss: 0.34497082233428955 2023-01-22 09:50:09.434419: step: 380/463, loss: 0.6525307297706604 2023-01-22 09:50:10.066169: step: 382/463, loss: 0.6078543663024902 2023-01-22 09:50:10.686987: step: 384/463, loss: 0.5248273611068726 2023-01-22 09:50:11.268010: step: 386/463, loss: 1.6794519424438477 2023-01-22 09:50:11.806775: step: 388/463, loss: 0.40272361040115356 2023-01-22 09:50:12.411483: step: 390/463, loss: 1.127328872680664 2023-01-22 09:50:13.001199: step: 392/463, loss: 0.4430447518825531 2023-01-22 09:50:13.631469: step: 394/463, loss: 0.15610727667808533 2023-01-22 09:50:14.229266: step: 396/463, loss: 0.16499409079551697 2023-01-22 09:50:14.834563: step: 398/463, loss: 1.1390331983566284 2023-01-22 09:50:15.523143: step: 400/463, loss: 0.15032213926315308 2023-01-22 09:50:16.141471: step: 402/463, loss: 0.5404168963432312 2023-01-22 09:50:16.737816: step: 404/463, loss: 0.3954692780971527 2023-01-22 09:50:17.356327: step: 406/463, loss: 0.9207911491394043 2023-01-22 09:50:18.050188: step: 408/463, loss: 0.5242671370506287 2023-01-22 09:50:18.584518: step: 410/463, loss: 0.09933799505233765 2023-01-22 09:50:19.216899: step: 412/463, loss: 1.0824713706970215 2023-01-22 09:50:19.889225: step: 414/463, loss: 0.5512629747390747 2023-01-22 09:50:20.527157: step: 416/463, loss: 0.2910410761833191 2023-01-22 09:50:21.213666: step: 418/463, loss: 0.2227073609828949 2023-01-22 09:50:21.780146: step: 420/463, loss: 1.188856840133667 2023-01-22 09:50:22.509625: step: 422/463, loss: 0.5407494902610779 2023-01-22 09:50:23.105893: step: 424/463, loss: 0.5058335065841675 2023-01-22 09:50:23.746722: step: 426/463, loss: 0.35298728942871094 2023-01-22 09:50:24.392279: step: 428/463, loss: 1.0835871696472168 2023-01-22 09:50:25.017096: step: 430/463, loss: 0.40585756301879883 2023-01-22 09:50:25.601138: step: 432/463, loss: 0.6000495553016663 2023-01-22 09:50:26.179078: step: 434/463, loss: 0.10432958602905273 2023-01-22 09:50:26.805851: step: 436/463, loss: 0.2688646614551544 2023-01-22 09:50:27.392400: step: 438/463, loss: 0.16700828075408936 2023-01-22 09:50:28.096323: step: 440/463, loss: 1.4053618907928467 2023-01-22 09:50:28.682657: step: 442/463, loss: 0.2594049870967865 2023-01-22 09:50:29.290025: step: 444/463, loss: 0.19910700619220734 2023-01-22 09:50:29.912963: step: 446/463, loss: 0.1585317701101303 2023-01-22 09:50:30.523210: step: 448/463, loss: 1.686780333518982 2023-01-22 09:50:31.127812: step: 450/463, loss: 0.17139622569084167 2023-01-22 09:50:31.742754: step: 452/463, loss: 0.1759694665670395 2023-01-22 09:50:32.312644: step: 454/463, loss: 0.3747706115245819 2023-01-22 09:50:32.892935: step: 456/463, loss: 1.5020641088485718 2023-01-22 09:50:33.478685: step: 458/463, loss: 0.9281125068664551 2023-01-22 09:50:34.097160: step: 460/463, loss: 0.17045475542545319 2023-01-22 09:50:34.624392: step: 462/463, loss: 0.8570803999900818 2023-01-22 09:50:35.193189: step: 464/463, loss: 0.40932726860046387 2023-01-22 09:50:35.833994: step: 466/463, loss: 0.44993898272514343 2023-01-22 09:50:36.392590: step: 468/463, loss: 0.2706313729286194 2023-01-22 09:50:36.987310: step: 470/463, loss: 0.27971887588500977 2023-01-22 09:50:37.556578: step: 472/463, loss: 0.6699079275131226 2023-01-22 09:50:38.171004: step: 474/463, loss: 0.3720257580280304 2023-01-22 09:50:38.726733: step: 476/463, loss: 0.4284832775592804 2023-01-22 09:50:39.323842: step: 478/463, loss: 2.215088129043579 2023-01-22 09:50:39.921110: step: 480/463, loss: 0.49277177453041077 2023-01-22 09:50:40.587546: step: 482/463, loss: 1.403556227684021 2023-01-22 09:50:41.224966: step: 484/463, loss: 1.7338634729385376 2023-01-22 09:50:41.782893: step: 486/463, loss: 0.23187251389026642 2023-01-22 09:50:42.495526: step: 488/463, loss: 0.13469001650810242 2023-01-22 09:50:43.167009: step: 490/463, loss: 0.2629240155220032 2023-01-22 09:50:43.732433: step: 492/463, loss: 0.1605781465768814 2023-01-22 09:50:44.365257: step: 494/463, loss: 0.8620311617851257 2023-01-22 09:50:45.060421: step: 496/463, loss: 1.7924976348876953 2023-01-22 09:50:45.762461: step: 498/463, loss: 0.500994086265564 2023-01-22 09:50:46.332418: step: 500/463, loss: 0.6751488447189331 2023-01-22 09:50:46.889954: step: 502/463, loss: 0.39528971910476685 2023-01-22 09:50:47.466113: step: 504/463, loss: 1.156477689743042 2023-01-22 09:50:48.109304: step: 506/463, loss: 0.6608617901802063 2023-01-22 09:50:48.727320: step: 508/463, loss: 2.1466660499572754 2023-01-22 09:50:49.300785: step: 510/463, loss: 0.16079235076904297 2023-01-22 09:50:50.054354: step: 512/463, loss: 0.6797022819519043 2023-01-22 09:50:50.621078: step: 514/463, loss: 0.42306578159332275 2023-01-22 09:50:51.247067: step: 516/463, loss: 0.7788892984390259 2023-01-22 09:50:51.884722: step: 518/463, loss: 0.31527185440063477 2023-01-22 09:50:52.501494: step: 520/463, loss: 0.7262465953826904 2023-01-22 09:50:53.043299: step: 522/463, loss: 0.24979224801063538 2023-01-22 09:50:53.693526: step: 524/463, loss: 0.9982425570487976 2023-01-22 09:50:54.375294: step: 526/463, loss: 2.9610493183135986 2023-01-22 09:50:55.000869: step: 528/463, loss: 0.5105767250061035 2023-01-22 09:50:55.585811: step: 530/463, loss: 0.47443604469299316 2023-01-22 09:50:56.212378: step: 532/463, loss: 0.4157335162162781 2023-01-22 09:50:56.756962: step: 534/463, loss: 1.113906979560852 2023-01-22 09:50:57.319343: step: 536/463, loss: 0.43656229972839355 2023-01-22 09:50:57.887415: step: 538/463, loss: 0.14742086827754974 2023-01-22 09:50:58.487376: step: 540/463, loss: 0.2634921669960022 2023-01-22 09:50:59.109644: step: 542/463, loss: 2.609593391418457 2023-01-22 09:50:59.699441: step: 544/463, loss: 0.6216706037521362 2023-01-22 09:51:00.286562: step: 546/463, loss: 0.5992388129234314 2023-01-22 09:51:00.937886: step: 548/463, loss: 0.5043485760688782 2023-01-22 09:51:01.504221: step: 550/463, loss: 0.8861814737319946 2023-01-22 09:51:02.102037: step: 552/463, loss: 1.3062583208084106 2023-01-22 09:51:02.726934: step: 554/463, loss: 0.1536601334810257 2023-01-22 09:51:03.338244: step: 556/463, loss: 0.5184997916221619 2023-01-22 09:51:03.925811: step: 558/463, loss: 0.37322762608528137 2023-01-22 09:51:04.499370: step: 560/463, loss: 0.3827729821205139 2023-01-22 09:51:05.163074: step: 562/463, loss: 1.0360947847366333 2023-01-22 09:51:05.832348: step: 564/463, loss: 0.05056465044617653 2023-01-22 09:51:06.449614: step: 566/463, loss: 0.753536581993103 2023-01-22 09:51:07.063024: step: 568/463, loss: 0.4754701256752014 2023-01-22 09:51:07.667781: step: 570/463, loss: 1.1415660381317139 2023-01-22 09:51:08.287770: step: 572/463, loss: 0.8563820123672485 2023-01-22 09:51:08.871399: step: 574/463, loss: 1.3379309177398682 2023-01-22 09:51:09.458534: step: 576/463, loss: 0.9537292718887329 2023-01-22 09:51:10.102287: step: 578/463, loss: 0.6534048914909363 2023-01-22 09:51:10.713263: step: 580/463, loss: 0.18520990014076233 2023-01-22 09:51:11.304582: step: 582/463, loss: 0.35937270522117615 2023-01-22 09:51:11.918258: step: 584/463, loss: 0.7841150760650635 2023-01-22 09:51:12.518530: step: 586/463, loss: 0.32687780261039734 2023-01-22 09:51:13.110454: step: 588/463, loss: 0.2522583305835724 2023-01-22 09:51:13.689521: step: 590/463, loss: 0.6390393972396851 2023-01-22 09:51:14.265203: step: 592/463, loss: 0.8120431303977966 2023-01-22 09:51:14.850180: step: 594/463, loss: 0.6621069312095642 2023-01-22 09:51:15.487465: step: 596/463, loss: 0.7320810556411743 2023-01-22 09:51:16.024806: step: 598/463, loss: 1.664620041847229 2023-01-22 09:51:16.588913: step: 600/463, loss: 0.7753128409385681 2023-01-22 09:51:17.214895: step: 602/463, loss: 0.5026118159294128 2023-01-22 09:51:17.826603: step: 604/463, loss: 0.7103804349899292 2023-01-22 09:51:18.544303: step: 606/463, loss: 0.5003908276557922 2023-01-22 09:51:19.192218: step: 608/463, loss: 0.30910584330558777 2023-01-22 09:51:19.811669: step: 610/463, loss: 0.3031342029571533 2023-01-22 09:51:20.452576: step: 612/463, loss: 3.683682918548584 2023-01-22 09:51:21.073239: step: 614/463, loss: 0.441860169172287 2023-01-22 09:51:21.637877: step: 616/463, loss: 1.2985293865203857 2023-01-22 09:51:22.210577: step: 618/463, loss: 1.1251379251480103 2023-01-22 09:51:22.866516: step: 620/463, loss: 0.7022017240524292 2023-01-22 09:51:23.518069: step: 622/463, loss: 0.3726992607116699 2023-01-22 09:51:24.090434: step: 624/463, loss: 0.5467562675476074 2023-01-22 09:51:24.630735: step: 626/463, loss: 0.7616590857505798 2023-01-22 09:51:25.212675: step: 628/463, loss: 0.7752111554145813 2023-01-22 09:51:25.863424: step: 630/463, loss: 2.210228204727173 2023-01-22 09:51:26.457635: step: 632/463, loss: 0.6542195677757263 2023-01-22 09:51:27.023485: step: 634/463, loss: 0.10479707270860672 2023-01-22 09:51:27.645613: step: 636/463, loss: 1.4585634469985962 2023-01-22 09:51:28.255482: step: 638/463, loss: 0.7846353054046631 2023-01-22 09:51:28.838305: step: 640/463, loss: 0.5195389986038208 2023-01-22 09:51:29.457677: step: 642/463, loss: 0.261513888835907 2023-01-22 09:51:30.053279: step: 644/463, loss: 1.8318625688552856 2023-01-22 09:51:30.620517: step: 646/463, loss: 1.7291748523712158 2023-01-22 09:51:31.323379: step: 648/463, loss: 0.4745703637599945 2023-01-22 09:51:31.966582: step: 650/463, loss: 1.722800612449646 2023-01-22 09:51:32.534784: step: 652/463, loss: 0.8241041898727417 2023-01-22 09:51:33.166030: step: 654/463, loss: 0.4597836136817932 2023-01-22 09:51:33.796882: step: 656/463, loss: 0.42067283391952515 2023-01-22 09:51:34.472066: step: 658/463, loss: 0.6434979438781738 2023-01-22 09:51:35.035256: step: 660/463, loss: 0.20520149171352386 2023-01-22 09:51:35.668491: step: 662/463, loss: 0.9808153510093689 2023-01-22 09:51:36.349407: step: 664/463, loss: 0.7001787424087524 2023-01-22 09:51:36.941648: step: 666/463, loss: 0.20100714266300201 2023-01-22 09:51:37.514123: step: 668/463, loss: 1.1117522716522217 2023-01-22 09:51:38.141215: step: 670/463, loss: 0.30436694622039795 2023-01-22 09:51:38.889281: step: 672/463, loss: 1.8440709114074707 2023-01-22 09:51:39.515874: step: 674/463, loss: 0.4838281273841858 2023-01-22 09:51:40.183047: step: 676/463, loss: 0.31730198860168457 2023-01-22 09:51:40.771417: step: 678/463, loss: 0.6645902395248413 2023-01-22 09:51:41.358370: step: 680/463, loss: 0.24026280641555786 2023-01-22 09:51:41.997844: step: 682/463, loss: 0.31971657276153564 2023-01-22 09:51:42.628777: step: 684/463, loss: 0.852814257144928 2023-01-22 09:51:43.327676: step: 686/463, loss: 0.40470990538597107 2023-01-22 09:51:43.969206: step: 688/463, loss: 0.5372722148895264 2023-01-22 09:51:44.548052: step: 690/463, loss: 0.9322777390480042 2023-01-22 09:51:45.171449: step: 692/463, loss: 0.767435610294342 2023-01-22 09:51:45.841680: step: 694/463, loss: 0.36985763907432556 2023-01-22 09:51:46.492342: step: 696/463, loss: 0.2971535623073578 2023-01-22 09:51:47.174743: step: 698/463, loss: 0.7105740308761597 2023-01-22 09:51:47.751653: step: 700/463, loss: 0.30317947268486023 2023-01-22 09:51:48.338083: step: 702/463, loss: 2.6077194213867188 2023-01-22 09:51:49.004567: step: 704/463, loss: 0.43240219354629517 2023-01-22 09:51:49.643088: step: 706/463, loss: 0.8379062414169312 2023-01-22 09:51:50.237770: step: 708/463, loss: 0.13727572560310364 2023-01-22 09:51:50.841521: step: 710/463, loss: 0.4508086144924164 2023-01-22 09:51:51.423430: step: 712/463, loss: 0.3328574597835541 2023-01-22 09:51:52.022174: step: 714/463, loss: 0.16498662531375885 2023-01-22 09:51:52.651826: step: 716/463, loss: 0.7082381844520569 2023-01-22 09:51:53.213677: step: 718/463, loss: 0.22216297686100006 2023-01-22 09:51:53.784531: step: 720/463, loss: 1.5357190370559692 2023-01-22 09:51:54.448977: step: 722/463, loss: 2.8503880500793457 2023-01-22 09:51:55.067314: step: 724/463, loss: 0.887367844581604 2023-01-22 09:51:55.754645: step: 726/463, loss: 2.088667631149292 2023-01-22 09:51:56.364098: step: 728/463, loss: 1.1347683668136597 2023-01-22 09:51:57.054950: step: 730/463, loss: 0.3880079388618469 2023-01-22 09:51:57.648491: step: 732/463, loss: 0.4522395730018616 2023-01-22 09:51:58.282466: step: 734/463, loss: 0.801544189453125 2023-01-22 09:51:58.937493: step: 736/463, loss: 0.4031171500682831 2023-01-22 09:51:59.617037: step: 738/463, loss: 0.7093856334686279 2023-01-22 09:52:00.262870: step: 740/463, loss: 0.45906001329421997 2023-01-22 09:52:00.878377: step: 742/463, loss: 0.2921605706214905 2023-01-22 09:52:01.455116: step: 744/463, loss: 0.9815138578414917 2023-01-22 09:52:02.076570: step: 746/463, loss: 0.27694931626319885 2023-01-22 09:52:02.713864: step: 748/463, loss: 0.4865105152130127 2023-01-22 09:52:03.331361: step: 750/463, loss: 1.2407090663909912 2023-01-22 09:52:03.913893: step: 752/463, loss: 0.7826077342033386 2023-01-22 09:52:04.546392: step: 754/463, loss: 0.383323073387146 2023-01-22 09:52:05.211331: step: 756/463, loss: 0.4975360929965973 2023-01-22 09:52:05.876416: step: 758/463, loss: 0.8760184049606323 2023-01-22 09:52:06.546054: step: 760/463, loss: 0.5592105984687805 2023-01-22 09:52:07.141069: step: 762/463, loss: 0.43544185161590576 2023-01-22 09:52:07.740010: step: 764/463, loss: 0.25800713896751404 2023-01-22 09:52:08.390645: step: 766/463, loss: 1.2327625751495361 2023-01-22 09:52:08.963008: step: 768/463, loss: 0.1724807471036911 2023-01-22 09:52:09.605967: step: 770/463, loss: 0.5716548562049866 2023-01-22 09:52:10.169883: step: 772/463, loss: 1.4487444162368774 2023-01-22 09:52:10.744256: step: 774/463, loss: 0.4769440293312073 2023-01-22 09:52:11.359529: step: 776/463, loss: 0.18004576861858368 2023-01-22 09:52:11.981025: step: 778/463, loss: 0.7428921461105347 2023-01-22 09:52:12.644856: step: 780/463, loss: 0.95490562915802 2023-01-22 09:52:13.260192: step: 782/463, loss: 1.2781492471694946 2023-01-22 09:52:13.891325: step: 784/463, loss: 1.148795485496521 2023-01-22 09:52:14.548673: step: 786/463, loss: 0.733167290687561 2023-01-22 09:52:15.091948: step: 788/463, loss: 0.3271699845790863 2023-01-22 09:52:15.709756: step: 790/463, loss: 0.7401608228683472 2023-01-22 09:52:16.381989: step: 792/463, loss: 0.9319025278091431 2023-01-22 09:52:17.001319: step: 794/463, loss: 0.8767288327217102 2023-01-22 09:52:17.614791: step: 796/463, loss: 0.38745731115341187 2023-01-22 09:52:18.205417: step: 798/463, loss: 0.36867478489875793 2023-01-22 09:52:18.845014: step: 800/463, loss: 1.06089186668396 2023-01-22 09:52:19.465536: step: 802/463, loss: 0.4051103889942169 2023-01-22 09:52:20.114417: step: 804/463, loss: 0.6608892679214478 2023-01-22 09:52:20.748627: step: 806/463, loss: 0.18371205031871796 2023-01-22 09:52:21.369696: step: 808/463, loss: 1.0662380456924438 2023-01-22 09:52:22.022960: step: 810/463, loss: 0.8443009257316589 2023-01-22 09:52:22.564891: step: 812/463, loss: 0.7897899150848389 2023-01-22 09:52:23.399099: step: 814/463, loss: 3.3653674125671387 2023-01-22 09:52:24.022231: step: 816/463, loss: 0.3008726239204407 2023-01-22 09:52:24.638045: step: 818/463, loss: 0.780657172203064 2023-01-22 09:52:25.319725: step: 820/463, loss: 0.430696576833725 2023-01-22 09:52:25.902345: step: 822/463, loss: 0.32731932401657104 2023-01-22 09:52:26.537242: step: 824/463, loss: 0.7866721749305725 2023-01-22 09:52:27.143673: step: 826/463, loss: 10.629744529724121 2023-01-22 09:52:27.753215: step: 828/463, loss: 0.17941005527973175 2023-01-22 09:52:28.358660: step: 830/463, loss: 2.94114351272583 2023-01-22 09:52:28.927706: step: 832/463, loss: 0.8321788311004639 2023-01-22 09:52:29.547247: step: 834/463, loss: 0.303272008895874 2023-01-22 09:52:30.169512: step: 836/463, loss: 0.4845165014266968 2023-01-22 09:52:30.766187: step: 838/463, loss: 0.9765169024467468 2023-01-22 09:52:31.357915: step: 840/463, loss: 0.36979925632476807 2023-01-22 09:52:32.054315: step: 842/463, loss: 0.9372493624687195 2023-01-22 09:52:32.632939: step: 844/463, loss: 0.383465975522995 2023-01-22 09:52:33.283344: step: 846/463, loss: 0.6714259386062622 2023-01-22 09:52:33.889306: step: 848/463, loss: 0.5280047059059143 2023-01-22 09:52:34.645334: step: 850/463, loss: 0.24716663360595703 2023-01-22 09:52:35.222891: step: 852/463, loss: 1.1619768142700195 2023-01-22 09:52:35.830383: step: 854/463, loss: 0.3374081552028656 2023-01-22 09:52:36.417809: step: 856/463, loss: 0.49963921308517456 2023-01-22 09:52:37.013239: step: 858/463, loss: 0.6429591774940491 2023-01-22 09:52:37.625256: step: 860/463, loss: 0.27377620339393616 2023-01-22 09:52:38.240292: step: 862/463, loss: 0.29810675978660583 2023-01-22 09:52:38.843207: step: 864/463, loss: 0.48808982968330383 2023-01-22 09:52:39.503956: step: 866/463, loss: 0.2809275984764099 2023-01-22 09:52:40.119399: step: 868/463, loss: 0.7357933521270752 2023-01-22 09:52:40.720093: step: 870/463, loss: 0.8168999552726746 2023-01-22 09:52:41.350377: step: 872/463, loss: 0.3281324505805969 2023-01-22 09:52:41.970038: step: 874/463, loss: 0.6887940764427185 2023-01-22 09:52:42.561810: step: 876/463, loss: 0.5973749756813049 2023-01-22 09:52:43.173899: step: 878/463, loss: 0.1966850757598877 2023-01-22 09:52:43.843601: step: 880/463, loss: 0.4791107475757599 2023-01-22 09:52:44.514162: step: 882/463, loss: 0.2369583696126938 2023-01-22 09:52:45.092510: step: 884/463, loss: 0.24381394684314728 2023-01-22 09:52:45.704059: step: 886/463, loss: 0.4390019178390503 2023-01-22 09:52:46.285817: step: 888/463, loss: 4.36630916595459 2023-01-22 09:52:46.858993: step: 890/463, loss: 0.31937089562416077 2023-01-22 09:52:47.519587: step: 892/463, loss: 0.23234668374061584 2023-01-22 09:52:48.109337: step: 894/463, loss: 0.2113475203514099 2023-01-22 09:52:48.734579: step: 896/463, loss: 1.6517930030822754 2023-01-22 09:52:49.298111: step: 898/463, loss: 0.4913887083530426 2023-01-22 09:52:49.937061: step: 900/463, loss: 0.35672491788864136 2023-01-22 09:52:50.562946: step: 902/463, loss: 1.694883108139038 2023-01-22 09:52:51.189181: step: 904/463, loss: 0.3495710492134094 2023-01-22 09:52:51.817737: step: 906/463, loss: 2.152754545211792 2023-01-22 09:52:52.381963: step: 908/463, loss: 0.3849526643753052 2023-01-22 09:52:52.972123: step: 910/463, loss: 0.15306980907917023 2023-01-22 09:52:53.642622: step: 912/463, loss: 0.3186946213245392 2023-01-22 09:52:54.245880: step: 914/463, loss: 0.1676206886768341 2023-01-22 09:52:54.845912: step: 916/463, loss: 1.0460083484649658 2023-01-22 09:52:55.475307: step: 918/463, loss: 0.6239566802978516 2023-01-22 09:52:56.237753: step: 920/463, loss: 1.5490068197250366 2023-01-22 09:52:56.820511: step: 922/463, loss: 0.6644265055656433 2023-01-22 09:52:57.380542: step: 924/463, loss: 0.6493674516677856 2023-01-22 09:52:58.025542: step: 926/463, loss: 1.972741961479187 ================================================== Loss: 0.729 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3335795293176009, 'r': 0.31838805170161905, 'f1': 0.32580680242088006}, 'combined': 0.240068170204859, 'epoch': 6} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35086511957313077, 'r': 0.3306229011362194, 'f1': 0.34044338334818636}, 'combined': 0.26388434498758945, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.294349755002927, 'r': 0.3345645222898544, 'f1': 0.3131714089640378}, 'combined': 0.23075788028929098, 'epoch': 6} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3358950473035951, 'r': 0.33466466251493726, 'f1': 0.3352787261158821}, 'combined': 0.25988111785058804, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2943745297296732, 'r': 0.32230380200004066, 'f1': 0.30770670951815476}, 'combined': 0.22673125964495613, 'epoch': 6} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3406437995843611, 'r': 0.33158910646983636, 'f1': 0.3360554715825281}, 'combined': 0.26048318849937585, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3142857142857143, 'r': 0.3142857142857143, 'f1': 0.3142857142857143}, 'combined': 0.2095238095238095, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35714285714285715, 'r': 0.1724137931034483, 'f1': 0.23255813953488377}, 'combined': 0.1550387596899225, 'epoch': 6} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3335795293176009, 'r': 0.31838805170161905, 'f1': 0.32580680242088006}, 'combined': 0.240068170204859, 'epoch': 6} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35086511957313077, 'r': 0.3306229011362194, 'f1': 0.34044338334818636}, 'combined': 0.26388434498758945, 'epoch': 6} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3142857142857143, 'r': 0.3142857142857143, 'f1': 0.3142857142857143}, 'combined': 0.2095238095238095, 'epoch': 6} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.294349755002927, 'r': 0.3345645222898544, 'f1': 0.3131714089640378}, 'combined': 0.23075788028929098, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3358950473035951, 'r': 0.33466466251493726, 'f1': 0.3352787261158821}, 'combined': 0.25988111785058804, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28012957534441907, 'r': 0.3140846753861668, 'f1': 0.29613697964981445}, 'combined': 0.21820619553144222, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3064842101133353, 'r': 0.3585977936344447, 'f1': 0.3304992795207758}, 'combined': 0.2561764750352425, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.23275862068965517, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 5} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 09:55:56.756788: step: 2/463, loss: 0.807654857635498 2023-01-22 09:55:57.402298: step: 4/463, loss: 0.7418429851531982 2023-01-22 09:55:58.036179: step: 6/463, loss: 0.30306100845336914 2023-01-22 09:55:58.544688: step: 8/463, loss: 0.3872629404067993 2023-01-22 09:55:59.105720: step: 10/463, loss: 0.17483903467655182 2023-01-22 09:55:59.647478: step: 12/463, loss: 0.6835933923721313 2023-01-22 09:56:00.237612: step: 14/463, loss: 0.4460596442222595 2023-01-22 09:56:00.849907: step: 16/463, loss: 0.29260244965553284 2023-01-22 09:56:01.545172: step: 18/463, loss: 0.36839205026626587 2023-01-22 09:56:02.124013: step: 20/463, loss: 0.2833377420902252 2023-01-22 09:56:02.749140: step: 22/463, loss: 0.1488151103258133 2023-01-22 09:56:03.302583: step: 24/463, loss: 0.19251784682273865 2023-01-22 09:56:03.958850: step: 26/463, loss: 0.4787790775299072 2023-01-22 09:56:04.538533: step: 28/463, loss: 0.20639607310295105 2023-01-22 09:56:05.080306: step: 30/463, loss: 0.3026690185070038 2023-01-22 09:56:05.672756: step: 32/463, loss: 0.38833969831466675 2023-01-22 09:56:06.304953: step: 34/463, loss: 0.47689932584762573 2023-01-22 09:56:06.985865: step: 36/463, loss: 0.38047540187835693 2023-01-22 09:56:07.624394: step: 38/463, loss: 0.5756352543830872 2023-01-22 09:56:08.160586: step: 40/463, loss: 0.459454208612442 2023-01-22 09:56:08.713927: step: 42/463, loss: 0.5612775683403015 2023-01-22 09:56:09.322420: step: 44/463, loss: 0.22516365349292755 2023-01-22 09:56:09.929191: step: 46/463, loss: 0.3391384184360504 2023-01-22 09:56:10.517027: step: 48/463, loss: 0.19588753581047058 2023-01-22 09:56:11.168944: step: 50/463, loss: 2.4960684776306152 2023-01-22 09:56:11.811119: step: 52/463, loss: 0.9179222583770752 2023-01-22 09:56:12.466358: step: 54/463, loss: 0.2930730879306793 2023-01-22 09:56:13.094724: step: 56/463, loss: 0.40667015314102173 2023-01-22 09:56:13.705863: step: 58/463, loss: 0.38417690992355347 2023-01-22 09:56:14.325123: step: 60/463, loss: 0.1884060800075531 2023-01-22 09:56:14.902939: step: 62/463, loss: 0.4711410403251648 2023-01-22 09:56:15.491531: step: 64/463, loss: 0.4842836856842041 2023-01-22 09:56:16.119059: step: 66/463, loss: 0.6604782342910767 2023-01-22 09:56:16.686753: step: 68/463, loss: 0.1569122076034546 2023-01-22 09:56:17.314093: step: 70/463, loss: 0.6698318123817444 2023-01-22 09:56:17.951642: step: 72/463, loss: 0.06883817166090012 2023-01-22 09:56:18.544834: step: 74/463, loss: 0.802859365940094 2023-01-22 09:56:19.174804: step: 76/463, loss: 0.21703235805034637 2023-01-22 09:56:19.759228: step: 78/463, loss: 0.4542236626148224 2023-01-22 09:56:20.376363: step: 80/463, loss: 0.45655548572540283 2023-01-22 09:56:21.004749: step: 82/463, loss: 0.17808252573013306 2023-01-22 09:56:21.702240: step: 84/463, loss: 0.4637203812599182 2023-01-22 09:56:22.271446: step: 86/463, loss: 0.5246639251708984 2023-01-22 09:56:22.888596: step: 88/463, loss: 0.8671875 2023-01-22 09:56:23.545118: step: 90/463, loss: 0.28850170969963074 2023-01-22 09:56:24.175280: step: 92/463, loss: 0.8279662728309631 2023-01-22 09:56:24.785612: step: 94/463, loss: 0.1822223663330078 2023-01-22 09:56:25.391292: step: 96/463, loss: 0.780676543712616 2023-01-22 09:56:25.998657: step: 98/463, loss: 0.29151782393455505 2023-01-22 09:56:26.655490: step: 100/463, loss: 0.15302254259586334 2023-01-22 09:56:27.225217: step: 102/463, loss: 0.16352394223213196 2023-01-22 09:56:27.844847: step: 104/463, loss: 0.23694144189357758 2023-01-22 09:56:28.421788: step: 106/463, loss: 0.19907023012638092 2023-01-22 09:56:29.066986: step: 108/463, loss: 0.27573174238204956 2023-01-22 09:56:29.716320: step: 110/463, loss: 0.6128889918327332 2023-01-22 09:56:30.347691: step: 112/463, loss: 1.5029224157333374 2023-01-22 09:56:30.973725: step: 114/463, loss: 0.6199305057525635 2023-01-22 09:56:31.551663: step: 116/463, loss: 0.31033530831336975 2023-01-22 09:56:32.146309: step: 118/463, loss: 0.6868170499801636 2023-01-22 09:56:32.723578: step: 120/463, loss: 0.8857533931732178 2023-01-22 09:56:33.353155: step: 122/463, loss: 0.20317086577415466 2023-01-22 09:56:33.964969: step: 124/463, loss: 1.5232303142547607 2023-01-22 09:56:34.536609: step: 126/463, loss: 1.0587047338485718 2023-01-22 09:56:35.114847: step: 128/463, loss: 0.16158871352672577 2023-01-22 09:56:35.812280: step: 130/463, loss: 0.8642796874046326 2023-01-22 09:56:36.376751: step: 132/463, loss: 0.34781551361083984 2023-01-22 09:56:36.989418: step: 134/463, loss: 0.5579984188079834 2023-01-22 09:56:37.648255: step: 136/463, loss: 0.6466804146766663 2023-01-22 09:56:38.264486: step: 138/463, loss: 0.666228175163269 2023-01-22 09:56:38.851355: step: 140/463, loss: 0.48522070050239563 2023-01-22 09:56:39.446409: step: 142/463, loss: 0.23623982071876526 2023-01-22 09:56:40.030042: step: 144/463, loss: 1.0772197246551514 2023-01-22 09:56:40.638261: step: 146/463, loss: 0.27934178709983826 2023-01-22 09:56:41.205949: step: 148/463, loss: 0.48079627752304077 2023-01-22 09:56:41.884463: step: 150/463, loss: 0.3658713102340698 2023-01-22 09:56:42.470230: step: 152/463, loss: 0.5583712458610535 2023-01-22 09:56:43.123179: step: 154/463, loss: 0.4811897873878479 2023-01-22 09:56:43.729848: step: 156/463, loss: 0.48738229274749756 2023-01-22 09:56:44.418015: step: 158/463, loss: 0.2898690700531006 2023-01-22 09:56:44.993046: step: 160/463, loss: 1.4352582693099976 2023-01-22 09:56:45.632239: step: 162/463, loss: 0.2560354471206665 2023-01-22 09:56:46.219760: step: 164/463, loss: 0.36349260807037354 2023-01-22 09:56:46.853383: step: 166/463, loss: 1.0397658348083496 2023-01-22 09:56:47.489871: step: 168/463, loss: 0.9558521509170532 2023-01-22 09:56:48.101251: step: 170/463, loss: 0.29159045219421387 2023-01-22 09:56:48.695636: step: 172/463, loss: 0.2603519856929779 2023-01-22 09:56:49.295746: step: 174/463, loss: 0.42037704586982727 2023-01-22 09:56:49.914845: step: 176/463, loss: 0.15870143473148346 2023-01-22 09:56:50.530643: step: 178/463, loss: 0.779308021068573 2023-01-22 09:56:51.156426: step: 180/463, loss: 0.26173537969589233 2023-01-22 09:56:51.813772: step: 182/463, loss: 1.1715259552001953 2023-01-22 09:56:52.424379: step: 184/463, loss: 0.5751940011978149 2023-01-22 09:56:53.196398: step: 186/463, loss: 0.331051230430603 2023-01-22 09:56:53.830899: step: 188/463, loss: 0.18856927752494812 2023-01-22 09:56:54.433494: step: 190/463, loss: 0.6854605674743652 2023-01-22 09:56:55.041503: step: 192/463, loss: 0.6216540932655334 2023-01-22 09:56:55.638179: step: 194/463, loss: 0.18062391877174377 2023-01-22 09:56:56.243027: step: 196/463, loss: 0.3826708197593689 2023-01-22 09:56:56.901227: step: 198/463, loss: 0.3214462995529175 2023-01-22 09:56:57.554959: step: 200/463, loss: 0.33739641308784485 2023-01-22 09:56:58.171665: step: 202/463, loss: 0.8185354471206665 2023-01-22 09:56:58.761007: step: 204/463, loss: 0.2642740309238434 2023-01-22 09:56:59.391845: step: 206/463, loss: 0.7646939158439636 2023-01-22 09:57:00.022593: step: 208/463, loss: 16.320388793945312 2023-01-22 09:57:00.697181: step: 210/463, loss: 0.2212313711643219 2023-01-22 09:57:01.414167: step: 212/463, loss: 0.6817798018455505 2023-01-22 09:57:02.093956: step: 214/463, loss: 0.4071405827999115 2023-01-22 09:57:02.686573: step: 216/463, loss: 0.8440535068511963 2023-01-22 09:57:03.332294: step: 218/463, loss: 0.134111687541008 2023-01-22 09:57:03.925615: step: 220/463, loss: 0.2947194278240204 2023-01-22 09:57:04.588418: step: 222/463, loss: 0.22341568768024445 2023-01-22 09:57:05.203917: step: 224/463, loss: 0.2976885139942169 2023-01-22 09:57:05.844711: step: 226/463, loss: 0.8032858967781067 2023-01-22 09:57:06.409100: step: 228/463, loss: 0.8179838061332703 2023-01-22 09:57:07.020587: step: 230/463, loss: 0.33685067296028137 2023-01-22 09:57:07.647115: step: 232/463, loss: 0.5753642320632935 2023-01-22 09:57:08.248378: step: 234/463, loss: 0.06891431659460068 2023-01-22 09:57:08.869984: step: 236/463, loss: 0.25993892550468445 2023-01-22 09:57:09.559407: step: 238/463, loss: 1.5857888460159302 2023-01-22 09:57:10.144435: step: 240/463, loss: 0.23574510216712952 2023-01-22 09:57:10.795814: step: 242/463, loss: 0.9469902515411377 2023-01-22 09:57:11.480028: step: 244/463, loss: 0.8277049660682678 2023-01-22 09:57:12.138706: step: 246/463, loss: 0.7651932835578918 2023-01-22 09:57:12.762827: step: 248/463, loss: 0.40362977981567383 2023-01-22 09:57:13.393398: step: 250/463, loss: 0.37863391637802124 2023-01-22 09:57:13.999110: step: 252/463, loss: 0.49910610914230347 2023-01-22 09:57:14.634077: step: 254/463, loss: 0.6247705817222595 2023-01-22 09:57:15.241978: step: 256/463, loss: 0.39149364829063416 2023-01-22 09:57:15.991504: step: 258/463, loss: 0.6698437333106995 2023-01-22 09:57:16.552415: step: 260/463, loss: 3.34090256690979 2023-01-22 09:57:17.267423: step: 262/463, loss: 0.33949434757232666 2023-01-22 09:57:17.907988: step: 264/463, loss: 0.39752963185310364 2023-01-22 09:57:18.541353: step: 266/463, loss: 0.42182302474975586 2023-01-22 09:57:19.127895: step: 268/463, loss: 0.29192841053009033 2023-01-22 09:57:19.804802: step: 270/463, loss: 0.3941940665245056 2023-01-22 09:57:20.353006: step: 272/463, loss: 0.4327924847602844 2023-01-22 09:57:20.967241: step: 274/463, loss: 1.0333911180496216 2023-01-22 09:57:21.550217: step: 276/463, loss: 0.7380674481391907 2023-01-22 09:57:22.198407: step: 278/463, loss: 0.4221072196960449 2023-01-22 09:57:22.798651: step: 280/463, loss: 0.3454017639160156 2023-01-22 09:57:23.413859: step: 282/463, loss: 0.16518624126911163 2023-01-22 09:57:24.079628: step: 284/463, loss: 0.1964893639087677 2023-01-22 09:57:24.708024: step: 286/463, loss: 1.697434663772583 2023-01-22 09:57:25.335209: step: 288/463, loss: 0.4196591377258301 2023-01-22 09:57:25.988635: step: 290/463, loss: 0.542227566242218 2023-01-22 09:57:26.520740: step: 292/463, loss: 0.1716984063386917 2023-01-22 09:57:27.147705: step: 294/463, loss: 0.19872500002384186 2023-01-22 09:57:27.745668: step: 296/463, loss: 0.3258502185344696 2023-01-22 09:57:28.371392: step: 298/463, loss: 0.1960049271583557 2023-01-22 09:57:29.028706: step: 300/463, loss: 0.4125145673751831 2023-01-22 09:57:29.641604: step: 302/463, loss: 1.8116427659988403 2023-01-22 09:57:30.249650: step: 304/463, loss: 0.7189337611198425 2023-01-22 09:57:30.902570: step: 306/463, loss: 0.5128545165061951 2023-01-22 09:57:31.468254: step: 308/463, loss: 0.4789336323738098 2023-01-22 09:57:32.102400: step: 310/463, loss: 0.1118573248386383 2023-01-22 09:57:32.694877: step: 312/463, loss: 0.13190127909183502 2023-01-22 09:57:33.381894: step: 314/463, loss: 0.6765699982643127 2023-01-22 09:57:33.940771: step: 316/463, loss: 0.4358658492565155 2023-01-22 09:57:34.495447: step: 318/463, loss: 1.0757784843444824 2023-01-22 09:57:35.098990: step: 320/463, loss: 0.6946585178375244 2023-01-22 09:57:35.820403: step: 322/463, loss: 0.3356768786907196 2023-01-22 09:57:36.557036: step: 324/463, loss: 0.3354523181915283 2023-01-22 09:57:37.178332: step: 326/463, loss: 0.6069251894950867 2023-01-22 09:57:37.833983: step: 328/463, loss: 1.1654002666473389 2023-01-22 09:57:38.482527: step: 330/463, loss: 0.38132423162460327 2023-01-22 09:57:39.203387: step: 332/463, loss: 1.2565807104110718 2023-01-22 09:57:39.871389: step: 334/463, loss: 0.4544839859008789 2023-01-22 09:57:40.490671: step: 336/463, loss: 0.21627789735794067 2023-01-22 09:57:41.062251: step: 338/463, loss: 0.44023042917251587 2023-01-22 09:57:41.659010: step: 340/463, loss: 0.8491129875183105 2023-01-22 09:57:42.302153: step: 342/463, loss: 0.6784866452217102 2023-01-22 09:57:42.968931: step: 344/463, loss: 0.8466278910636902 2023-01-22 09:57:43.649314: step: 346/463, loss: 0.4635503888130188 2023-01-22 09:57:44.216094: step: 348/463, loss: 0.3777149021625519 2023-01-22 09:57:44.872651: step: 350/463, loss: 1.0819035768508911 2023-01-22 09:57:45.472080: step: 352/463, loss: 0.8905228972434998 2023-01-22 09:57:46.047925: step: 354/463, loss: 0.17360574007034302 2023-01-22 09:57:46.632323: step: 356/463, loss: 0.7715097069740295 2023-01-22 09:57:47.214953: step: 358/463, loss: 0.24512378871440887 2023-01-22 09:57:47.757381: step: 360/463, loss: 0.25460466742515564 2023-01-22 09:57:48.449102: step: 362/463, loss: 0.30078959465026855 2023-01-22 09:57:49.088499: step: 364/463, loss: 0.34295371174812317 2023-01-22 09:57:49.672641: step: 366/463, loss: 0.14471924304962158 2023-01-22 09:57:50.331709: step: 368/463, loss: 0.39906489849090576 2023-01-22 09:57:51.001031: step: 370/463, loss: 1.0646107196807861 2023-01-22 09:57:51.644565: step: 372/463, loss: 0.25546663999557495 2023-01-22 09:57:52.308612: step: 374/463, loss: 0.45163822174072266 2023-01-22 09:57:52.935594: step: 376/463, loss: 0.2709403336048126 2023-01-22 09:57:53.537085: step: 378/463, loss: 0.18220201134681702 2023-01-22 09:57:54.194430: step: 380/463, loss: 0.2909991443157196 2023-01-22 09:57:54.780155: step: 382/463, loss: 0.353007048368454 2023-01-22 09:57:55.424060: step: 384/463, loss: 0.5811549425125122 2023-01-22 09:57:56.036182: step: 386/463, loss: 0.8028070330619812 2023-01-22 09:57:56.593624: step: 388/463, loss: 0.5155059099197388 2023-01-22 09:57:57.208883: step: 390/463, loss: 0.42461997270584106 2023-01-22 09:57:57.817819: step: 392/463, loss: 3.6070966720581055 2023-01-22 09:57:58.410179: step: 394/463, loss: 0.21848256886005402 2023-01-22 09:57:59.017833: step: 396/463, loss: 0.23974929749965668 2023-01-22 09:57:59.614624: step: 398/463, loss: 0.42405638098716736 2023-01-22 09:58:00.201956: step: 400/463, loss: 0.6365113854408264 2023-01-22 09:58:00.842241: step: 402/463, loss: 0.3297807276248932 2023-01-22 09:58:01.449716: step: 404/463, loss: 0.4948780834674835 2023-01-22 09:58:02.120527: step: 406/463, loss: 0.315847247838974 2023-01-22 09:58:02.731003: step: 408/463, loss: 0.14956313371658325 2023-01-22 09:58:03.428158: step: 410/463, loss: 0.6917096376419067 2023-01-22 09:58:04.046886: step: 412/463, loss: 0.20221228897571564 2023-01-22 09:58:04.714998: step: 414/463, loss: 0.36974620819091797 2023-01-22 09:58:05.349702: step: 416/463, loss: 1.0079822540283203 2023-01-22 09:58:05.959577: step: 418/463, loss: 0.3096442222595215 2023-01-22 09:58:06.654165: step: 420/463, loss: 0.38070785999298096 2023-01-22 09:58:07.265043: step: 422/463, loss: 0.5125380158424377 2023-01-22 09:58:07.929532: step: 424/463, loss: 0.16851523518562317 2023-01-22 09:58:08.514118: step: 426/463, loss: 0.17775863409042358 2023-01-22 09:58:09.142918: step: 428/463, loss: 1.3436264991760254 2023-01-22 09:58:09.785289: step: 430/463, loss: 0.46194779872894287 2023-01-22 09:58:10.400846: step: 432/463, loss: 0.7208782434463501 2023-01-22 09:58:11.000512: step: 434/463, loss: 0.7194353938102722 2023-01-22 09:58:11.590697: step: 436/463, loss: 0.20689809322357178 2023-01-22 09:58:12.299987: step: 438/463, loss: 0.40774598717689514 2023-01-22 09:58:12.926831: step: 440/463, loss: 0.34186121821403503 2023-01-22 09:58:13.558200: step: 442/463, loss: 0.454932302236557 2023-01-22 09:58:14.204280: step: 444/463, loss: 1.22999906539917 2023-01-22 09:58:14.819025: step: 446/463, loss: 0.21150189638137817 2023-01-22 09:58:15.502079: step: 448/463, loss: 0.43343502283096313 2023-01-22 09:58:16.081007: step: 450/463, loss: 3.3858485221862793 2023-01-22 09:58:16.680162: step: 452/463, loss: 0.4066624641418457 2023-01-22 09:58:17.292701: step: 454/463, loss: 0.8977017998695374 2023-01-22 09:58:17.920265: step: 456/463, loss: 0.2965715825557709 2023-01-22 09:58:18.568811: step: 458/463, loss: 0.247700035572052 2023-01-22 09:58:19.239673: step: 460/463, loss: 0.35015323758125305 2023-01-22 09:58:19.880810: step: 462/463, loss: 0.2099350392818451 2023-01-22 09:58:20.419640: step: 464/463, loss: 2.146547794342041 2023-01-22 09:58:21.009375: step: 466/463, loss: 0.8461926579475403 2023-01-22 09:58:21.653727: step: 468/463, loss: 0.28629907965660095 2023-01-22 09:58:22.243771: step: 470/463, loss: 0.2353653907775879 2023-01-22 09:58:22.875465: step: 472/463, loss: 0.28022244572639465 2023-01-22 09:58:23.466006: step: 474/463, loss: 0.8489611148834229 2023-01-22 09:58:24.063341: step: 476/463, loss: 0.8608915209770203 2023-01-22 09:58:24.663446: step: 478/463, loss: 0.3049798607826233 2023-01-22 09:58:25.318771: step: 480/463, loss: 0.23026049137115479 2023-01-22 09:58:26.038437: step: 482/463, loss: 0.7937565445899963 2023-01-22 09:58:26.660663: step: 484/463, loss: 0.6780728101730347 2023-01-22 09:58:27.252617: step: 486/463, loss: 0.15573105216026306 2023-01-22 09:58:27.917588: step: 488/463, loss: 0.6416550278663635 2023-01-22 09:58:28.530957: step: 490/463, loss: 0.34756866097450256 2023-01-22 09:58:29.210854: step: 492/463, loss: 0.46077483892440796 2023-01-22 09:58:29.781888: step: 494/463, loss: 0.5212734937667847 2023-01-22 09:58:30.485953: step: 496/463, loss: 0.7460431456565857 2023-01-22 09:58:31.166293: step: 498/463, loss: 0.1731736809015274 2023-01-22 09:58:31.808600: step: 500/463, loss: 1.0468392372131348 2023-01-22 09:58:32.417216: step: 502/463, loss: 0.2363441437482834 2023-01-22 09:58:33.031296: step: 504/463, loss: 0.27943772077560425 2023-01-22 09:58:33.755340: step: 506/463, loss: 0.25516173243522644 2023-01-22 09:58:34.382331: step: 508/463, loss: 0.21006213128566742 2023-01-22 09:58:34.992716: step: 510/463, loss: 0.17224839329719543 2023-01-22 09:58:35.592293: step: 512/463, loss: 0.6667054891586304 2023-01-22 09:58:36.251244: step: 514/463, loss: 0.5145998597145081 2023-01-22 09:58:36.876047: step: 516/463, loss: 0.2936624586582184 2023-01-22 09:58:37.467396: step: 518/463, loss: 1.0246702432632446 2023-01-22 09:58:38.182091: step: 520/463, loss: 0.3395323157310486 2023-01-22 09:58:38.762332: step: 522/463, loss: 0.1633838415145874 2023-01-22 09:58:39.363408: step: 524/463, loss: 0.7587984204292297 2023-01-22 09:58:39.952082: step: 526/463, loss: 0.15019561350345612 2023-01-22 09:58:40.512327: step: 528/463, loss: 0.8764554262161255 2023-01-22 09:58:41.166738: step: 530/463, loss: 0.48441052436828613 2023-01-22 09:58:41.742874: step: 532/463, loss: 0.3228605389595032 2023-01-22 09:58:42.371924: step: 534/463, loss: 0.2345363199710846 2023-01-22 09:58:43.017934: step: 536/463, loss: 0.47030070424079895 2023-01-22 09:58:43.647135: step: 538/463, loss: 1.6260746717453003 2023-01-22 09:58:44.278356: step: 540/463, loss: 0.3487606644630432 2023-01-22 09:58:44.901633: step: 542/463, loss: 0.5683432817459106 2023-01-22 09:58:45.545946: step: 544/463, loss: 1.0445327758789062 2023-01-22 09:58:46.145516: step: 546/463, loss: 0.52126544713974 2023-01-22 09:58:46.775383: step: 548/463, loss: 0.14662712812423706 2023-01-22 09:58:47.385312: step: 550/463, loss: 0.3969848155975342 2023-01-22 09:58:47.931717: step: 552/463, loss: 0.28085920214653015 2023-01-22 09:58:48.575726: step: 554/463, loss: 0.5541949272155762 2023-01-22 09:58:49.185372: step: 556/463, loss: 0.8852306008338928 2023-01-22 09:58:49.869912: step: 558/463, loss: 0.2770520746707916 2023-01-22 09:58:50.567636: step: 560/463, loss: 0.5369435548782349 2023-01-22 09:58:51.158907: step: 562/463, loss: 0.20169247686862946 2023-01-22 09:58:51.733979: step: 564/463, loss: 0.3182758688926697 2023-01-22 09:58:52.319329: step: 566/463, loss: 0.6117270588874817 2023-01-22 09:58:52.948137: step: 568/463, loss: 0.3761119842529297 2023-01-22 09:58:53.590303: step: 570/463, loss: 0.5531476140022278 2023-01-22 09:58:54.182294: step: 572/463, loss: 0.26792165637016296 2023-01-22 09:58:54.817514: step: 574/463, loss: 0.3079359233379364 2023-01-22 09:58:55.504903: step: 576/463, loss: 0.09021048992872238 2023-01-22 09:58:56.123636: step: 578/463, loss: 0.4560026526451111 2023-01-22 09:58:56.717027: step: 580/463, loss: 0.9412817358970642 2023-01-22 09:58:57.378682: step: 582/463, loss: 0.32238975167274475 2023-01-22 09:58:57.933559: step: 584/463, loss: 0.22451704740524292 2023-01-22 09:58:58.506288: step: 586/463, loss: 0.7485355734825134 2023-01-22 09:58:59.120265: step: 588/463, loss: 0.3757765591144562 2023-01-22 09:58:59.719989: step: 590/463, loss: 0.30013325810432434 2023-01-22 09:59:00.325740: step: 592/463, loss: 0.28425711393356323 2023-01-22 09:59:00.939047: step: 594/463, loss: 0.5399459600448608 2023-01-22 09:59:01.591657: step: 596/463, loss: 0.22150474786758423 2023-01-22 09:59:02.211180: step: 598/463, loss: 0.2701462507247925 2023-01-22 09:59:02.810345: step: 600/463, loss: 0.19972483813762665 2023-01-22 09:59:03.446573: step: 602/463, loss: 0.4586547911167145 2023-01-22 09:59:04.070497: step: 604/463, loss: 0.3901784420013428 2023-01-22 09:59:04.662413: step: 606/463, loss: 0.2278127670288086 2023-01-22 09:59:05.352135: step: 608/463, loss: 0.3906201124191284 2023-01-22 09:59:05.963463: step: 610/463, loss: 0.16159886121749878 2023-01-22 09:59:06.616803: step: 612/463, loss: 0.39694541692733765 2023-01-22 09:59:07.267472: step: 614/463, loss: 0.1280856430530548 2023-01-22 09:59:07.940768: step: 616/463, loss: 0.2246810346841812 2023-01-22 09:59:08.552321: step: 618/463, loss: 2.6703665256500244 2023-01-22 09:59:09.113281: step: 620/463, loss: 0.27411097288131714 2023-01-22 09:59:09.769225: step: 622/463, loss: 0.42829567193984985 2023-01-22 09:59:10.423673: step: 624/463, loss: 0.7449740171432495 2023-01-22 09:59:10.989735: step: 626/463, loss: 0.6394545435905457 2023-01-22 09:59:11.602284: step: 628/463, loss: 1.4328681230545044 2023-01-22 09:59:12.208079: step: 630/463, loss: 0.21688972413539886 2023-01-22 09:59:12.831107: step: 632/463, loss: 0.23573768138885498 2023-01-22 09:59:13.497076: step: 634/463, loss: 0.3699069023132324 2023-01-22 09:59:14.075335: step: 636/463, loss: 0.20608720183372498 2023-01-22 09:59:14.659546: step: 638/463, loss: 0.751802384853363 2023-01-22 09:59:15.250796: step: 640/463, loss: 0.17930588126182556 2023-01-22 09:59:15.863468: step: 642/463, loss: 1.1260582208633423 2023-01-22 09:59:16.469903: step: 644/463, loss: 0.47740891575813293 2023-01-22 09:59:17.049153: step: 646/463, loss: 0.32726845145225525 2023-01-22 09:59:17.623896: step: 648/463, loss: 0.4457055330276489 2023-01-22 09:59:18.186594: step: 650/463, loss: 0.4692555367946625 2023-01-22 09:59:18.858935: step: 652/463, loss: 0.38204291462898254 2023-01-22 09:59:19.418126: step: 654/463, loss: 0.10649622976779938 2023-01-22 09:59:19.966256: step: 656/463, loss: 0.6467968821525574 2023-01-22 09:59:20.610847: step: 658/463, loss: 3.5594067573547363 2023-01-22 09:59:21.227081: step: 660/463, loss: 0.2943882644176483 2023-01-22 09:59:21.854366: step: 662/463, loss: 0.45514634251594543 2023-01-22 09:59:22.458400: step: 664/463, loss: 0.6457360982894897 2023-01-22 09:59:23.172747: step: 666/463, loss: 0.38968369364738464 2023-01-22 09:59:23.779457: step: 668/463, loss: 0.7273297905921936 2023-01-22 09:59:24.447560: step: 670/463, loss: 0.9432954788208008 2023-01-22 09:59:25.018155: step: 672/463, loss: 0.511285126209259 2023-01-22 09:59:25.663963: step: 674/463, loss: 0.5174610018730164 2023-01-22 09:59:26.271392: step: 676/463, loss: 0.8758470416069031 2023-01-22 09:59:26.837486: step: 678/463, loss: 0.6177841424942017 2023-01-22 09:59:27.470753: step: 680/463, loss: 0.16760799288749695 2023-01-22 09:59:28.093380: step: 682/463, loss: 0.5053936839103699 2023-01-22 09:59:28.674410: step: 684/463, loss: 0.6580698490142822 2023-01-22 09:59:29.284492: step: 686/463, loss: 1.0233275890350342 2023-01-22 09:59:29.849247: step: 688/463, loss: 0.37378281354904175 2023-01-22 09:59:30.457450: step: 690/463, loss: 2.5256831645965576 2023-01-22 09:59:31.125715: step: 692/463, loss: 0.2835516631603241 2023-01-22 09:59:31.738714: step: 694/463, loss: 0.39075908064842224 2023-01-22 09:59:32.367731: step: 696/463, loss: 1.6244397163391113 2023-01-22 09:59:32.972129: step: 698/463, loss: 0.4946838319301605 2023-01-22 09:59:33.607872: step: 700/463, loss: 0.6236141324043274 2023-01-22 09:59:34.250631: step: 702/463, loss: 1.4703083038330078 2023-01-22 09:59:34.847531: step: 704/463, loss: 0.27198830246925354 2023-01-22 09:59:35.473031: step: 706/463, loss: 0.23540060222148895 2023-01-22 09:59:36.113351: step: 708/463, loss: 0.37853050231933594 2023-01-22 09:59:36.802401: step: 710/463, loss: 0.39998170733451843 2023-01-22 09:59:37.431152: step: 712/463, loss: 0.3821353018283844 2023-01-22 09:59:38.092634: step: 714/463, loss: 2.231764793395996 2023-01-22 09:59:38.724996: step: 716/463, loss: 1.0240801572799683 2023-01-22 09:59:39.373549: step: 718/463, loss: 0.32155483961105347 2023-01-22 09:59:40.030130: step: 720/463, loss: 0.45779508352279663 2023-01-22 09:59:40.626198: step: 722/463, loss: 0.5051276087760925 2023-01-22 09:59:41.310785: step: 724/463, loss: 0.20838335156440735 2023-01-22 09:59:41.900778: step: 726/463, loss: 0.2737368047237396 2023-01-22 09:59:42.548262: step: 728/463, loss: 1.008242130279541 2023-01-22 09:59:43.144387: step: 730/463, loss: 0.17207854986190796 2023-01-22 09:59:43.841704: step: 732/463, loss: 0.35649871826171875 2023-01-22 09:59:44.472707: step: 734/463, loss: 0.8441442847251892 2023-01-22 09:59:45.076060: step: 736/463, loss: 0.1971072256565094 2023-01-22 09:59:45.782617: step: 738/463, loss: 0.3960895538330078 2023-01-22 09:59:46.354121: step: 740/463, loss: 0.3293589949607849 2023-01-22 09:59:47.000216: step: 742/463, loss: 0.5418511033058167 2023-01-22 09:59:47.654709: step: 744/463, loss: 0.1348060518503189 2023-01-22 09:59:48.246421: step: 746/463, loss: 0.26601338386535645 2023-01-22 09:59:48.840763: step: 748/463, loss: 0.7992173433303833 2023-01-22 09:59:49.467971: step: 750/463, loss: 0.31622886657714844 2023-01-22 09:59:50.086143: step: 752/463, loss: 0.25814250111579895 2023-01-22 09:59:50.695453: step: 754/463, loss: 0.566559910774231 2023-01-22 09:59:51.322980: step: 756/463, loss: 0.13443303108215332 2023-01-22 09:59:51.937481: step: 758/463, loss: 0.2778433859348297 2023-01-22 09:59:52.535069: step: 760/463, loss: 0.17934758961200714 2023-01-22 09:59:53.166640: step: 762/463, loss: 0.5554709434509277 2023-01-22 09:59:53.846798: step: 764/463, loss: 0.3979393541812897 2023-01-22 09:59:54.392153: step: 766/463, loss: 0.5631006956100464 2023-01-22 09:59:55.042347: step: 768/463, loss: 1.1216202974319458 2023-01-22 09:59:55.644522: step: 770/463, loss: 0.9841573238372803 2023-01-22 09:59:56.279004: step: 772/463, loss: 0.44668784737586975 2023-01-22 09:59:56.905281: step: 774/463, loss: 0.7791368365287781 2023-01-22 09:59:57.507435: step: 776/463, loss: 0.1461644023656845 2023-01-22 09:59:58.118234: step: 778/463, loss: 0.9589164853096008 2023-01-22 09:59:58.821962: step: 780/463, loss: 0.8096201419830322 2023-01-22 09:59:59.419144: step: 782/463, loss: 0.4256063997745514 2023-01-22 09:59:59.993647: step: 784/463, loss: 0.7588926553726196 2023-01-22 10:00:00.617039: step: 786/463, loss: 1.454399824142456 2023-01-22 10:00:01.279845: step: 788/463, loss: 0.6575345993041992 2023-01-22 10:00:01.939978: step: 790/463, loss: 0.2303142249584198 2023-01-22 10:00:02.658476: step: 792/463, loss: 1.300675630569458 2023-01-22 10:00:03.335948: step: 794/463, loss: 0.3828582167625427 2023-01-22 10:00:03.981292: step: 796/463, loss: 0.4375804662704468 2023-01-22 10:00:04.632265: step: 798/463, loss: 0.1595740020275116 2023-01-22 10:00:05.247169: step: 800/463, loss: 0.2536550760269165 2023-01-22 10:00:05.840634: step: 802/463, loss: 0.2015850841999054 2023-01-22 10:00:06.524654: step: 804/463, loss: 0.2345610409975052 2023-01-22 10:00:07.177142: step: 806/463, loss: 0.16826623678207397 2023-01-22 10:00:07.790823: step: 808/463, loss: 0.2407093197107315 2023-01-22 10:00:08.394820: step: 810/463, loss: 8.192036628723145 2023-01-22 10:00:09.013415: step: 812/463, loss: 0.34899067878723145 2023-01-22 10:00:09.604594: step: 814/463, loss: 1.387427806854248 2023-01-22 10:00:10.268295: step: 816/463, loss: 0.9117158651351929 2023-01-22 10:00:10.916597: step: 818/463, loss: 1.0657923221588135 2023-01-22 10:00:11.487425: step: 820/463, loss: 0.421714186668396 2023-01-22 10:00:12.103469: step: 822/463, loss: 0.6935148239135742 2023-01-22 10:00:12.697072: step: 824/463, loss: 0.2468547224998474 2023-01-22 10:00:13.375995: step: 826/463, loss: 0.3636510968208313 2023-01-22 10:00:14.008431: step: 828/463, loss: 3.340468406677246 2023-01-22 10:00:14.688325: step: 830/463, loss: 0.29835495352745056 2023-01-22 10:00:15.325694: step: 832/463, loss: 0.9196963310241699 2023-01-22 10:00:15.934340: step: 834/463, loss: 0.27401408553123474 2023-01-22 10:00:16.512624: step: 836/463, loss: 0.21896766126155853 2023-01-22 10:00:17.084579: step: 838/463, loss: 0.44227316975593567 2023-01-22 10:00:17.678923: step: 840/463, loss: 0.15010488033294678 2023-01-22 10:00:18.336372: step: 842/463, loss: 0.6847964525222778 2023-01-22 10:00:18.904138: step: 844/463, loss: 0.6540997624397278 2023-01-22 10:00:19.520383: step: 846/463, loss: 0.28418341279029846 2023-01-22 10:00:20.183278: step: 848/463, loss: 0.677936851978302 2023-01-22 10:00:20.870026: step: 850/463, loss: 0.15395225584506989 2023-01-22 10:00:21.555085: step: 852/463, loss: 0.5777564644813538 2023-01-22 10:00:22.221860: step: 854/463, loss: 0.5975735783576965 2023-01-22 10:00:22.811016: step: 856/463, loss: 1.5409783124923706 2023-01-22 10:00:23.408045: step: 858/463, loss: 0.7472043633460999 2023-01-22 10:00:24.000832: step: 860/463, loss: 0.19810639321804047 2023-01-22 10:00:24.642811: step: 862/463, loss: 0.3365834355354309 2023-01-22 10:00:25.280166: step: 864/463, loss: 0.4925359785556793 2023-01-22 10:00:25.857093: step: 866/463, loss: 0.6772522926330566 2023-01-22 10:00:26.390592: step: 868/463, loss: 0.5169462561607361 2023-01-22 10:00:26.995503: step: 870/463, loss: 0.7759062051773071 2023-01-22 10:00:27.600507: step: 872/463, loss: 0.2759900391101837 2023-01-22 10:00:28.187178: step: 874/463, loss: 0.8977956771850586 2023-01-22 10:00:28.802181: step: 876/463, loss: 0.5913708806037903 2023-01-22 10:00:29.409263: step: 878/463, loss: 0.8133429884910583 2023-01-22 10:00:30.017212: step: 880/463, loss: 0.09968496114015579 2023-01-22 10:00:30.649832: step: 882/463, loss: 0.18211953341960907 2023-01-22 10:00:31.245395: step: 884/463, loss: 0.2955639958381653 2023-01-22 10:00:31.840613: step: 886/463, loss: 0.423570454120636 2023-01-22 10:00:32.454493: step: 888/463, loss: 0.605933427810669 2023-01-22 10:00:33.103631: step: 890/463, loss: 0.6295936107635498 2023-01-22 10:00:33.732326: step: 892/463, loss: 0.33904340863227844 2023-01-22 10:00:34.372038: step: 894/463, loss: 0.09033072739839554 2023-01-22 10:00:34.953819: step: 896/463, loss: 0.3019601106643677 2023-01-22 10:00:35.591201: step: 898/463, loss: 0.28671038150787354 2023-01-22 10:00:36.138506: step: 900/463, loss: 0.21940843760967255 2023-01-22 10:00:36.826505: step: 902/463, loss: 0.136872336268425 2023-01-22 10:00:37.418134: step: 904/463, loss: 0.3400750160217285 2023-01-22 10:00:38.012209: step: 906/463, loss: 1.4074058532714844 2023-01-22 10:00:38.617723: step: 908/463, loss: 0.5688105821609497 2023-01-22 10:00:39.234865: step: 910/463, loss: 0.9753573536872864 2023-01-22 10:00:39.833462: step: 912/463, loss: 0.19331412017345428 2023-01-22 10:00:40.464464: step: 914/463, loss: 0.40865567326545715 2023-01-22 10:00:41.073919: step: 916/463, loss: 0.12868818640708923 2023-01-22 10:00:41.682323: step: 918/463, loss: 0.519225001335144 2023-01-22 10:00:42.277808: step: 920/463, loss: 0.20459315180778503 2023-01-22 10:00:42.906149: step: 922/463, loss: 0.29174530506134033 2023-01-22 10:00:43.530076: step: 924/463, loss: 1.4248042106628418 2023-01-22 10:00:44.154506: step: 926/463, loss: 0.5768977403640747 ================================================== Loss: 0.601 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29855383301254707, 'r': 0.34444161379815674, 'f1': 0.3198603180116804}, 'combined': 0.23568655011386977, 'epoch': 7} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.30988948788864984, 'r': 0.3867922100668993, 'f1': 0.34409642236532006}, 'combined': 0.2667158871922577, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.26001530737750156, 'r': 0.3458647637032801, 'f1': 0.2968578672176362}, 'combined': 0.218737375844574, 'epoch': 7} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.2978896621081954, 'r': 0.39481332055148693, 'f1': 0.3395706662134528}, 'combined': 0.26320788481616914, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2611310905659904, 'r': 0.33595233283442405, 'f1': 0.29385374174894846}, 'combined': 0.2165238097097515, 'epoch': 7} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.2959613416560244, 'r': 0.38572902800389947, 'f1': 0.3349347026881425}, 'combined': 0.25961445854296217, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2636363636363636, 'r': 0.4142857142857143, 'f1': 0.3222222222222222}, 'combined': 0.2148148148148148, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.20945945945945946, 'r': 0.33695652173913043, 'f1': 0.25833333333333336}, 'combined': 0.12916666666666668, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.20689655172413793, 'f1': 0.24489795918367346}, 'combined': 0.16326530612244897, 'epoch': 7} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29855383301254707, 'r': 0.34444161379815674, 'f1': 0.3198603180116804}, 'combined': 0.23568655011386977, 'epoch': 7} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.30988948788864984, 'r': 0.3867922100668993, 'f1': 0.34409642236532006}, 'combined': 0.2667158871922577, 'epoch': 7} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2636363636363636, 'r': 0.4142857142857143, 'f1': 0.3222222222222222}, 'combined': 0.2148148148148148, 'epoch': 7} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.294349755002927, 'r': 0.3345645222898544, 'f1': 0.3131714089640378}, 'combined': 0.23075788028929098, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3358950473035951, 'r': 0.33466466251493726, 'f1': 0.3352787261158821}, 'combined': 0.25988111785058804, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28012957534441907, 'r': 0.3140846753861668, 'f1': 0.29613697964981445}, 'combined': 0.21820619553144222, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3064842101133353, 'r': 0.3585977936344447, 'f1': 0.3304992795207758}, 'combined': 0.2561764750352425, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.23275862068965517, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 5} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:03:36.537670: step: 2/463, loss: 0.27454549074172974 2023-01-22 10:03:37.138755: step: 4/463, loss: 0.1212196797132492 2023-01-22 10:03:37.695457: step: 6/463, loss: 0.1886449158191681 2023-01-22 10:03:38.329420: step: 8/463, loss: 0.9557496309280396 2023-01-22 10:03:38.950463: step: 10/463, loss: 0.3028426468372345 2023-01-22 10:03:39.577846: step: 12/463, loss: 0.17726431787014008 2023-01-22 10:03:40.200121: step: 14/463, loss: 0.7418721318244934 2023-01-22 10:03:40.740352: step: 16/463, loss: 0.23382455110549927 2023-01-22 10:03:41.358788: step: 18/463, loss: 0.34230974316596985 2023-01-22 10:03:42.013964: step: 20/463, loss: 0.31475967168807983 2023-01-22 10:03:42.734038: step: 22/463, loss: 0.5941904187202454 2023-01-22 10:03:43.489069: step: 24/463, loss: 0.079100102186203 2023-01-22 10:03:44.084700: step: 26/463, loss: 0.5590833425521851 2023-01-22 10:03:44.748058: step: 28/463, loss: 0.613706111907959 2023-01-22 10:03:45.327073: step: 30/463, loss: 0.08005934208631516 2023-01-22 10:03:45.851825: step: 32/463, loss: 0.31207549571990967 2023-01-22 10:03:46.466599: step: 34/463, loss: 0.18248826265335083 2023-01-22 10:03:47.029143: step: 36/463, loss: 0.43410515785217285 2023-01-22 10:03:47.663184: step: 38/463, loss: 0.16809117794036865 2023-01-22 10:03:48.272405: step: 40/463, loss: 0.14377035200595856 2023-01-22 10:03:48.877555: step: 42/463, loss: 0.21317505836486816 2023-01-22 10:03:49.628995: step: 44/463, loss: 0.29171112179756165 2023-01-22 10:03:50.286626: step: 46/463, loss: 0.2774196267127991 2023-01-22 10:03:50.926286: step: 48/463, loss: 0.32932984828948975 2023-01-22 10:03:51.557832: step: 50/463, loss: 0.14370252192020416 2023-01-22 10:03:52.168594: step: 52/463, loss: 0.10232265293598175 2023-01-22 10:03:52.784345: step: 54/463, loss: 0.19507911801338196 2023-01-22 10:03:53.581180: step: 56/463, loss: 0.35276392102241516 2023-01-22 10:03:54.239752: step: 58/463, loss: 0.6074838042259216 2023-01-22 10:03:54.826406: step: 60/463, loss: 0.28890860080718994 2023-01-22 10:03:55.411532: step: 62/463, loss: 0.16480675339698792 2023-01-22 10:03:56.043694: step: 64/463, loss: 0.30417776107788086 2023-01-22 10:03:56.767724: step: 66/463, loss: 0.4949904978275299 2023-01-22 10:03:57.405938: step: 68/463, loss: 0.23410335183143616 2023-01-22 10:03:58.117505: step: 70/463, loss: 0.3565497100353241 2023-01-22 10:03:58.734557: step: 72/463, loss: 0.7579011917114258 2023-01-22 10:03:59.382576: step: 74/463, loss: 0.19558286666870117 2023-01-22 10:04:00.083046: step: 76/463, loss: 0.38547712564468384 2023-01-22 10:04:00.730141: step: 78/463, loss: 0.6572482585906982 2023-01-22 10:04:01.316849: step: 80/463, loss: 0.24175013601779938 2023-01-22 10:04:01.934311: step: 82/463, loss: 0.11045575141906738 2023-01-22 10:04:02.526052: step: 84/463, loss: 0.5427460074424744 2023-01-22 10:04:03.169917: step: 86/463, loss: 0.488781213760376 2023-01-22 10:04:03.788249: step: 88/463, loss: 0.14809972047805786 2023-01-22 10:04:04.416096: step: 90/463, loss: 0.48586544394493103 2023-01-22 10:04:05.078646: step: 92/463, loss: 1.466514229774475 2023-01-22 10:04:05.672845: step: 94/463, loss: 0.10293036699295044 2023-01-22 10:04:06.282540: step: 96/463, loss: 0.5894548892974854 2023-01-22 10:04:06.840409: step: 98/463, loss: 0.3123920261859894 2023-01-22 10:04:07.462490: step: 100/463, loss: 0.5899331569671631 2023-01-22 10:04:08.110106: step: 102/463, loss: 0.7128351330757141 2023-01-22 10:04:08.694267: step: 104/463, loss: 0.28514599800109863 2023-01-22 10:04:09.307149: step: 106/463, loss: 0.122963547706604 2023-01-22 10:04:09.908040: step: 108/463, loss: 0.165252685546875 2023-01-22 10:04:10.535414: step: 110/463, loss: 0.997168779373169 2023-01-22 10:04:11.200160: step: 112/463, loss: 0.15040598809719086 2023-01-22 10:04:11.818984: step: 114/463, loss: 0.1996837854385376 2023-01-22 10:04:12.450615: step: 116/463, loss: 0.38388633728027344 2023-01-22 10:04:13.032613: step: 118/463, loss: 0.2027028352022171 2023-01-22 10:04:13.661787: step: 120/463, loss: 1.100383996963501 2023-01-22 10:04:14.312153: step: 122/463, loss: 0.656183123588562 2023-01-22 10:04:14.919774: step: 124/463, loss: 0.331216037273407 2023-01-22 10:04:15.569620: step: 126/463, loss: 0.1319654881954193 2023-01-22 10:04:16.129020: step: 128/463, loss: 0.5940644145011902 2023-01-22 10:04:16.723853: step: 130/463, loss: 0.27870550751686096 2023-01-22 10:04:17.314197: step: 132/463, loss: 0.38014325499534607 2023-01-22 10:04:17.920788: step: 134/463, loss: 0.3247123956680298 2023-01-22 10:04:18.519579: step: 136/463, loss: 0.5922228693962097 2023-01-22 10:04:19.077590: step: 138/463, loss: 0.36510688066482544 2023-01-22 10:04:19.758618: step: 140/463, loss: 0.632095456123352 2023-01-22 10:04:20.324296: step: 142/463, loss: 0.49989303946495056 2023-01-22 10:04:20.865252: step: 144/463, loss: 0.16780757904052734 2023-01-22 10:04:21.559605: step: 146/463, loss: 0.5040149092674255 2023-01-22 10:04:22.223990: step: 148/463, loss: 1.149385929107666 2023-01-22 10:04:22.750731: step: 150/463, loss: 0.3881452977657318 2023-01-22 10:04:23.362671: step: 152/463, loss: 0.6306591033935547 2023-01-22 10:04:24.111827: step: 154/463, loss: 0.914454460144043 2023-01-22 10:04:24.746836: step: 156/463, loss: 0.6722761988639832 2023-01-22 10:04:25.403168: step: 158/463, loss: 0.4319745600223541 2023-01-22 10:04:26.035659: step: 160/463, loss: 0.3853481411933899 2023-01-22 10:04:26.674865: step: 162/463, loss: 0.2538663446903229 2023-01-22 10:04:27.245983: step: 164/463, loss: 0.30353406071662903 2023-01-22 10:04:27.830270: step: 166/463, loss: 0.6068345308303833 2023-01-22 10:04:28.429192: step: 168/463, loss: 0.2134595662355423 2023-01-22 10:04:29.058468: step: 170/463, loss: 0.38325822353363037 2023-01-22 10:04:29.716233: step: 172/463, loss: 0.281634122133255 2023-01-22 10:04:30.368361: step: 174/463, loss: 0.5745058655738831 2023-01-22 10:04:30.971867: step: 176/463, loss: 0.1763850301504135 2023-01-22 10:04:31.609592: step: 178/463, loss: 0.21464592218399048 2023-01-22 10:04:32.224520: step: 180/463, loss: 0.37111854553222656 2023-01-22 10:04:32.793183: step: 182/463, loss: 0.275806725025177 2023-01-22 10:04:33.434151: step: 184/463, loss: 0.764906108379364 2023-01-22 10:04:34.126257: step: 186/463, loss: 1.3192195892333984 2023-01-22 10:04:34.753209: step: 188/463, loss: 0.1547672301530838 2023-01-22 10:04:35.358359: step: 190/463, loss: 0.2144608497619629 2023-01-22 10:04:35.960894: step: 192/463, loss: 0.21921630203723907 2023-01-22 10:04:36.547534: step: 194/463, loss: 0.23773063719272614 2023-01-22 10:04:37.097522: step: 196/463, loss: 0.24143753945827484 2023-01-22 10:04:37.727152: step: 198/463, loss: 0.215574249625206 2023-01-22 10:04:38.345920: step: 200/463, loss: 0.19480891525745392 2023-01-22 10:04:38.962693: step: 202/463, loss: 0.20009520649909973 2023-01-22 10:04:39.565013: step: 204/463, loss: 0.21734221279621124 2023-01-22 10:04:40.201830: step: 206/463, loss: 0.1972508430480957 2023-01-22 10:04:40.797025: step: 208/463, loss: 0.4554538428783417 2023-01-22 10:04:41.400833: step: 210/463, loss: 0.19008655846118927 2023-01-22 10:04:41.971989: step: 212/463, loss: 0.23899120092391968 2023-01-22 10:04:42.562811: step: 214/463, loss: 0.1570320874452591 2023-01-22 10:04:43.265722: step: 216/463, loss: 1.5704302787780762 2023-01-22 10:04:43.982446: step: 218/463, loss: 0.22331584990024567 2023-01-22 10:04:44.638943: step: 220/463, loss: 0.23334158957004547 2023-01-22 10:04:45.219563: step: 222/463, loss: 0.2397872358560562 2023-01-22 10:04:45.848906: step: 224/463, loss: 0.3666159510612488 2023-01-22 10:04:46.460356: step: 226/463, loss: 0.39328733086586 2023-01-22 10:04:47.123996: step: 228/463, loss: 0.27437376976013184 2023-01-22 10:04:47.806189: step: 230/463, loss: 0.3906311094760895 2023-01-22 10:04:48.565981: step: 232/463, loss: 0.51536625623703 2023-01-22 10:04:49.171316: step: 234/463, loss: 0.1613476425409317 2023-01-22 10:04:49.863174: step: 236/463, loss: 0.4213184416294098 2023-01-22 10:04:50.432100: step: 238/463, loss: 1.1637166738510132 2023-01-22 10:04:51.069034: step: 240/463, loss: 0.8391563296318054 2023-01-22 10:04:51.723796: step: 242/463, loss: 0.6976143717765808 2023-01-22 10:04:52.362339: step: 244/463, loss: 0.24530336260795593 2023-01-22 10:04:53.038648: step: 246/463, loss: 0.7133917808532715 2023-01-22 10:04:53.634463: step: 248/463, loss: 0.42089614272117615 2023-01-22 10:04:54.299295: step: 250/463, loss: 0.19285614788532257 2023-01-22 10:04:54.934275: step: 252/463, loss: 0.1550258994102478 2023-01-22 10:04:55.526092: step: 254/463, loss: 0.5059755444526672 2023-01-22 10:04:56.211010: step: 256/463, loss: 0.16709062457084656 2023-01-22 10:04:56.805288: step: 258/463, loss: 0.6122788786888123 2023-01-22 10:04:57.377025: step: 260/463, loss: 0.23059485852718353 2023-01-22 10:04:57.993691: step: 262/463, loss: 0.13720940053462982 2023-01-22 10:04:58.659217: step: 264/463, loss: 0.4587840437889099 2023-01-22 10:04:59.284526: step: 266/463, loss: 0.13675425946712494 2023-01-22 10:04:59.917806: step: 268/463, loss: 0.24087047576904297 2023-01-22 10:05:00.541286: step: 270/463, loss: 0.243925541639328 2023-01-22 10:05:01.185168: step: 272/463, loss: 0.8294358253479004 2023-01-22 10:05:01.758779: step: 274/463, loss: 0.20199105143547058 2023-01-22 10:05:02.385024: step: 276/463, loss: 1.4911930561065674 2023-01-22 10:05:03.062884: step: 278/463, loss: 0.3227616548538208 2023-01-22 10:05:03.702453: step: 280/463, loss: 0.47269970178604126 2023-01-22 10:05:04.366751: step: 282/463, loss: 0.19193769991397858 2023-01-22 10:05:05.047741: step: 284/463, loss: 1.3312082290649414 2023-01-22 10:05:05.620838: step: 286/463, loss: 1.020970344543457 2023-01-22 10:05:06.198895: step: 288/463, loss: 0.3713074326515198 2023-01-22 10:05:06.789596: step: 290/463, loss: 0.46999895572662354 2023-01-22 10:05:07.420409: step: 292/463, loss: 0.09279989451169968 2023-01-22 10:05:08.004781: step: 294/463, loss: 0.28854042291641235 2023-01-22 10:05:08.613294: step: 296/463, loss: 0.32105016708374023 2023-01-22 10:05:09.225908: step: 298/463, loss: 0.5700692534446716 2023-01-22 10:05:09.793407: step: 300/463, loss: 2.2373642921447754 2023-01-22 10:05:10.448865: step: 302/463, loss: 0.48219946026802063 2023-01-22 10:05:11.092085: step: 304/463, loss: 0.19304844737052917 2023-01-22 10:05:11.707754: step: 306/463, loss: 0.32953280210494995 2023-01-22 10:05:12.290699: step: 308/463, loss: 0.3513036370277405 2023-01-22 10:05:12.836131: step: 310/463, loss: 0.1558876484632492 2023-01-22 10:05:13.442131: step: 312/463, loss: 0.3405417501926422 2023-01-22 10:05:14.023209: step: 314/463, loss: 0.5260372757911682 2023-01-22 10:05:14.635592: step: 316/463, loss: 0.18853680789470673 2023-01-22 10:05:15.219637: step: 318/463, loss: 0.18867424130439758 2023-01-22 10:05:15.891883: step: 320/463, loss: 0.13986973464488983 2023-01-22 10:05:16.546745: step: 322/463, loss: 0.16702282428741455 2023-01-22 10:05:17.199336: step: 324/463, loss: 0.24092090129852295 2023-01-22 10:05:17.810073: step: 326/463, loss: 0.16718487441539764 2023-01-22 10:05:18.393000: step: 328/463, loss: 1.4338083267211914 2023-01-22 10:05:19.018071: step: 330/463, loss: 0.5060054063796997 2023-01-22 10:05:19.651920: step: 332/463, loss: 0.2249811738729477 2023-01-22 10:05:20.230519: step: 334/463, loss: 0.12821589410305023 2023-01-22 10:05:20.805641: step: 336/463, loss: 0.25745388865470886 2023-01-22 10:05:21.397936: step: 338/463, loss: 0.20372295379638672 2023-01-22 10:05:22.008011: step: 340/463, loss: 0.35935333371162415 2023-01-22 10:05:22.628866: step: 342/463, loss: 0.1503312736749649 2023-01-22 10:05:23.252087: step: 344/463, loss: 0.39330172538757324 2023-01-22 10:05:23.851355: step: 346/463, loss: 0.31189626455307007 2023-01-22 10:05:24.416726: step: 348/463, loss: 0.25170210003852844 2023-01-22 10:05:25.042841: step: 350/463, loss: 0.1842184215784073 2023-01-22 10:05:25.725852: step: 352/463, loss: 0.30135318636894226 2023-01-22 10:05:26.312679: step: 354/463, loss: 0.15483951568603516 2023-01-22 10:05:26.969903: step: 356/463, loss: 0.3748832643032074 2023-01-22 10:05:28.304817: step: 358/463, loss: 0.15046001970767975 2023-01-22 10:05:28.970121: step: 360/463, loss: 0.6785732507705688 2023-01-22 10:05:29.617105: step: 362/463, loss: 0.46878835558891296 2023-01-22 10:05:30.221893: step: 364/463, loss: 0.14611150324344635 2023-01-22 10:05:30.819977: step: 366/463, loss: 0.20062783360481262 2023-01-22 10:05:31.583315: step: 368/463, loss: 1.4893033504486084 2023-01-22 10:05:32.190647: step: 370/463, loss: 0.3624162971973419 2023-01-22 10:05:32.772821: step: 372/463, loss: 0.22823302447795868 2023-01-22 10:05:33.403308: step: 374/463, loss: 0.47888925671577454 2023-01-22 10:05:34.070137: step: 376/463, loss: 0.2142326831817627 2023-01-22 10:05:34.677443: step: 378/463, loss: 0.08842132240533829 2023-01-22 10:05:35.230244: step: 380/463, loss: 0.20132502913475037 2023-01-22 10:05:35.850482: step: 382/463, loss: 0.5702792406082153 2023-01-22 10:05:36.449166: step: 384/463, loss: 0.2645527124404907 2023-01-22 10:05:37.047283: step: 386/463, loss: 0.2643909156322479 2023-01-22 10:05:37.627758: step: 388/463, loss: 0.11464519798755646 2023-01-22 10:05:38.186179: step: 390/463, loss: 1.2405376434326172 2023-01-22 10:05:38.846118: step: 392/463, loss: 1.8353369235992432 2023-01-22 10:05:39.483980: step: 394/463, loss: 0.19126446545124054 2023-01-22 10:05:40.079148: step: 396/463, loss: 0.18591001629829407 2023-01-22 10:05:40.672450: step: 398/463, loss: 0.6155804991722107 2023-01-22 10:05:41.303692: step: 400/463, loss: 0.6110813021659851 2023-01-22 10:05:41.877944: step: 402/463, loss: 0.3434958755970001 2023-01-22 10:05:42.423213: step: 404/463, loss: 0.2551930546760559 2023-01-22 10:05:43.079749: step: 406/463, loss: 0.4357593357563019 2023-01-22 10:05:43.644134: step: 408/463, loss: 0.34898892045021057 2023-01-22 10:05:44.226686: step: 410/463, loss: 0.17367421090602875 2023-01-22 10:05:44.844698: step: 412/463, loss: 0.26134827733039856 2023-01-22 10:05:45.453220: step: 414/463, loss: 0.3092823326587677 2023-01-22 10:05:46.038928: step: 416/463, loss: 7.929866790771484 2023-01-22 10:05:46.644165: step: 418/463, loss: 0.24986934661865234 2023-01-22 10:05:47.233188: step: 420/463, loss: 0.1763705015182495 2023-01-22 10:05:47.847625: step: 422/463, loss: 0.29315638542175293 2023-01-22 10:05:48.511088: step: 424/463, loss: 1.7149840593338013 2023-01-22 10:05:49.138449: step: 426/463, loss: 0.6248399019241333 2023-01-22 10:05:49.767295: step: 428/463, loss: 0.32113632559776306 2023-01-22 10:05:50.528729: step: 430/463, loss: 0.435170441865921 2023-01-22 10:05:51.171156: step: 432/463, loss: 0.17764750123023987 2023-01-22 10:05:51.782950: step: 434/463, loss: 0.7798184156417847 2023-01-22 10:05:52.376823: step: 436/463, loss: 0.49483349919319153 2023-01-22 10:05:52.997744: step: 438/463, loss: 0.1810191422700882 2023-01-22 10:05:53.542304: step: 440/463, loss: 0.20566925406455994 2023-01-22 10:05:54.141803: step: 442/463, loss: 0.19017474353313446 2023-01-22 10:05:54.704748: step: 444/463, loss: 3.432640552520752 2023-01-22 10:05:55.326718: step: 446/463, loss: 0.23029492795467377 2023-01-22 10:05:55.900573: step: 448/463, loss: 0.2634607255458832 2023-01-22 10:05:56.536506: step: 450/463, loss: 1.443914771080017 2023-01-22 10:05:57.203936: step: 452/463, loss: 0.16821452975273132 2023-01-22 10:05:57.817856: step: 454/463, loss: 0.4631841480731964 2023-01-22 10:05:58.447860: step: 456/463, loss: 0.423465758562088 2023-01-22 10:05:59.085912: step: 458/463, loss: 0.6958268880844116 2023-01-22 10:05:59.715028: step: 460/463, loss: 0.15607315301895142 2023-01-22 10:06:00.282111: step: 462/463, loss: 0.39441296458244324 2023-01-22 10:06:00.901579: step: 464/463, loss: 0.47144651412963867 2023-01-22 10:06:01.501886: step: 466/463, loss: 0.2517615258693695 2023-01-22 10:06:02.112915: step: 468/463, loss: 0.3042495846748352 2023-01-22 10:06:02.730706: step: 470/463, loss: 0.7725745439529419 2023-01-22 10:06:03.364043: step: 472/463, loss: 0.1470322161912918 2023-01-22 10:06:04.069728: step: 474/463, loss: 0.044164594262838364 2023-01-22 10:06:04.694289: step: 476/463, loss: 0.06672929972410202 2023-01-22 10:06:05.311023: step: 478/463, loss: 0.5192210674285889 2023-01-22 10:06:05.920728: step: 480/463, loss: 0.2355055958032608 2023-01-22 10:06:06.592556: step: 482/463, loss: 0.33371806144714355 2023-01-22 10:06:07.199315: step: 484/463, loss: 1.575055480003357 2023-01-22 10:06:07.824159: step: 486/463, loss: 0.30354687571525574 2023-01-22 10:06:08.441783: step: 488/463, loss: 0.3030671179294586 2023-01-22 10:06:09.121849: step: 490/463, loss: 0.4810742735862732 2023-01-22 10:06:09.786457: step: 492/463, loss: 0.34197038412094116 2023-01-22 10:06:10.410505: step: 494/463, loss: 0.3498307764530182 2023-01-22 10:06:11.139367: step: 496/463, loss: 0.468900203704834 2023-01-22 10:06:11.745919: step: 498/463, loss: 0.4007844626903534 2023-01-22 10:06:12.368722: step: 500/463, loss: 0.20504020154476166 2023-01-22 10:06:12.975064: step: 502/463, loss: 0.3164767026901245 2023-01-22 10:06:13.574276: step: 504/463, loss: 0.7533845901489258 2023-01-22 10:06:14.167022: step: 506/463, loss: 0.23010562360286713 2023-01-22 10:06:14.826849: step: 508/463, loss: 0.4565170407295227 2023-01-22 10:06:15.412996: step: 510/463, loss: 0.28601327538490295 2023-01-22 10:06:16.087272: step: 512/463, loss: 0.12773257493972778 2023-01-22 10:06:16.761362: step: 514/463, loss: 0.4118571877479553 2023-01-22 10:06:17.304929: step: 516/463, loss: 0.20072387158870697 2023-01-22 10:06:17.880636: step: 518/463, loss: 0.16222453117370605 2023-01-22 10:06:18.495375: step: 520/463, loss: 0.33557742834091187 2023-01-22 10:06:19.139872: step: 522/463, loss: 0.8582131266593933 2023-01-22 10:06:19.766436: step: 524/463, loss: 0.19500266015529633 2023-01-22 10:06:20.406675: step: 526/463, loss: 0.48093193769454956 2023-01-22 10:06:21.030024: step: 528/463, loss: 0.22190040349960327 2023-01-22 10:06:21.647362: step: 530/463, loss: 1.0980900526046753 2023-01-22 10:06:22.267215: step: 532/463, loss: 0.8851344585418701 2023-01-22 10:06:22.876327: step: 534/463, loss: 0.30034351348876953 2023-01-22 10:06:23.439851: step: 536/463, loss: 4.212319374084473 2023-01-22 10:06:24.078442: step: 538/463, loss: 0.3674173355102539 2023-01-22 10:06:24.742626: step: 540/463, loss: 0.3275724947452545 2023-01-22 10:06:25.342900: step: 542/463, loss: 0.10442414879798889 2023-01-22 10:06:25.930634: step: 544/463, loss: 0.13089825212955475 2023-01-22 10:06:26.551374: step: 546/463, loss: 0.13757087290287018 2023-01-22 10:06:27.173402: step: 548/463, loss: 0.15106546878814697 2023-01-22 10:06:27.779975: step: 550/463, loss: 0.25607678294181824 2023-01-22 10:06:28.353877: step: 552/463, loss: 0.616750955581665 2023-01-22 10:06:29.047312: step: 554/463, loss: 1.3699512481689453 2023-01-22 10:06:29.617718: step: 556/463, loss: 0.05203315615653992 2023-01-22 10:06:30.272875: step: 558/463, loss: 0.26425009965896606 2023-01-22 10:06:30.916439: step: 560/463, loss: 0.4489101469516754 2023-01-22 10:06:31.525326: step: 562/463, loss: 3.074275493621826 2023-01-22 10:06:32.082691: step: 564/463, loss: 0.17674091458320618 2023-01-22 10:06:32.668275: step: 566/463, loss: 0.2708001136779785 2023-01-22 10:06:33.350031: step: 568/463, loss: 0.48294466733932495 2023-01-22 10:06:33.968086: step: 570/463, loss: 0.14365455508232117 2023-01-22 10:06:34.599309: step: 572/463, loss: 0.1714448779821396 2023-01-22 10:06:35.199824: step: 574/463, loss: 0.471382200717926 2023-01-22 10:06:35.954078: step: 576/463, loss: 0.5601073503494263 2023-01-22 10:06:36.601385: step: 578/463, loss: 1.8641855716705322 2023-01-22 10:06:37.221839: step: 580/463, loss: 0.35316845774650574 2023-01-22 10:06:37.867519: step: 582/463, loss: 0.25862613320350647 2023-01-22 10:06:38.482679: step: 584/463, loss: 0.22035980224609375 2023-01-22 10:06:39.102608: step: 586/463, loss: 0.299435555934906 2023-01-22 10:06:39.709954: step: 588/463, loss: 0.31494027376174927 2023-01-22 10:06:40.358949: step: 590/463, loss: 0.2907198965549469 2023-01-22 10:06:40.989358: step: 592/463, loss: 0.41593366861343384 2023-01-22 10:06:41.611531: step: 594/463, loss: 0.17134374380111694 2023-01-22 10:06:42.275058: step: 596/463, loss: 0.9272217750549316 2023-01-22 10:06:42.840517: step: 598/463, loss: 1.0068892240524292 2023-01-22 10:06:43.594131: step: 600/463, loss: 0.6083629131317139 2023-01-22 10:06:44.230219: step: 602/463, loss: 0.06725073605775833 2023-01-22 10:06:44.868096: step: 604/463, loss: 0.6282367706298828 2023-01-22 10:06:45.448763: step: 606/463, loss: 0.483088880777359 2023-01-22 10:06:46.045616: step: 608/463, loss: 0.2649013102054596 2023-01-22 10:06:46.617566: step: 610/463, loss: 0.851344883441925 2023-01-22 10:06:47.229159: step: 612/463, loss: 0.14235958456993103 2023-01-22 10:06:47.825813: step: 614/463, loss: 0.27428579330444336 2023-01-22 10:06:48.522454: step: 616/463, loss: 0.6611388325691223 2023-01-22 10:06:49.221387: step: 618/463, loss: 0.4034227132797241 2023-01-22 10:06:49.830634: step: 620/463, loss: 0.17095880210399628 2023-01-22 10:06:50.423734: step: 622/463, loss: 0.21051353216171265 2023-01-22 10:06:51.147371: step: 624/463, loss: 0.2529064118862152 2023-01-22 10:06:51.793575: step: 626/463, loss: 0.31142446398735046 2023-01-22 10:06:52.419194: step: 628/463, loss: 0.20614519715309143 2023-01-22 10:06:53.037910: step: 630/463, loss: 0.229752779006958 2023-01-22 10:06:53.619821: step: 632/463, loss: 0.1111738383769989 2023-01-22 10:06:54.202647: step: 634/463, loss: 0.8112533688545227 2023-01-22 10:06:54.833376: step: 636/463, loss: 0.32893380522727966 2023-01-22 10:06:55.435667: step: 638/463, loss: 0.4370189905166626 2023-01-22 10:06:56.001115: step: 640/463, loss: 0.32176172733306885 2023-01-22 10:06:56.660520: step: 642/463, loss: 0.4488545358181 2023-01-22 10:06:57.262183: step: 644/463, loss: 0.17297649383544922 2023-01-22 10:06:57.840788: step: 646/463, loss: 0.17448876798152924 2023-01-22 10:06:58.459062: step: 648/463, loss: 0.49763840436935425 2023-01-22 10:06:59.076556: step: 650/463, loss: 0.2580465078353882 2023-01-22 10:06:59.673642: step: 652/463, loss: 0.7945095300674438 2023-01-22 10:07:00.274898: step: 654/463, loss: 0.19404321908950806 2023-01-22 10:07:00.889993: step: 656/463, loss: 0.5144699811935425 2023-01-22 10:07:01.473277: step: 658/463, loss: 0.4411325454711914 2023-01-22 10:07:02.068626: step: 660/463, loss: 0.1756366640329361 2023-01-22 10:07:02.694219: step: 662/463, loss: 0.28219160437583923 2023-01-22 10:07:03.299154: step: 664/463, loss: 0.12736627459526062 2023-01-22 10:07:03.931400: step: 666/463, loss: 0.6618295311927795 2023-01-22 10:07:04.528463: step: 668/463, loss: 0.812481701374054 2023-01-22 10:07:05.148807: step: 670/463, loss: 0.13250994682312012 2023-01-22 10:07:05.794349: step: 672/463, loss: 0.2282523810863495 2023-01-22 10:07:06.342545: step: 674/463, loss: 0.34716796875 2023-01-22 10:07:06.941533: step: 676/463, loss: 0.2386336326599121 2023-01-22 10:07:07.549559: step: 678/463, loss: 0.15245743095874786 2023-01-22 10:07:08.174988: step: 680/463, loss: 0.28126800060272217 2023-01-22 10:07:08.789086: step: 682/463, loss: 0.5431774854660034 2023-01-22 10:07:09.354837: step: 684/463, loss: 0.5609435439109802 2023-01-22 10:07:09.925258: step: 686/463, loss: 0.13595929741859436 2023-01-22 10:07:10.542361: step: 688/463, loss: 0.4991567134857178 2023-01-22 10:07:11.123181: step: 690/463, loss: 0.38147616386413574 2023-01-22 10:07:11.760528: step: 692/463, loss: 0.28176456689834595 2023-01-22 10:07:12.392455: step: 694/463, loss: 0.1652994304895401 2023-01-22 10:07:13.075295: step: 696/463, loss: 0.8659372329711914 2023-01-22 10:07:13.728031: step: 698/463, loss: 0.24530211091041565 2023-01-22 10:07:14.344858: step: 700/463, loss: 0.14377903938293457 2023-01-22 10:07:15.039863: step: 702/463, loss: 0.4163385331630707 2023-01-22 10:07:15.684670: step: 704/463, loss: 0.22683656215667725 2023-01-22 10:07:16.307728: step: 706/463, loss: 0.122826486825943 2023-01-22 10:07:16.932872: step: 708/463, loss: 0.7241946458816528 2023-01-22 10:07:17.562174: step: 710/463, loss: 0.4256995618343353 2023-01-22 10:07:18.157032: step: 712/463, loss: 0.2642606794834137 2023-01-22 10:07:18.763506: step: 714/463, loss: 0.21085220575332642 2023-01-22 10:07:19.407972: step: 716/463, loss: 0.38459616899490356 2023-01-22 10:07:20.030365: step: 718/463, loss: 0.24798916280269623 2023-01-22 10:07:20.596548: step: 720/463, loss: 0.05103160813450813 2023-01-22 10:07:21.218338: step: 722/463, loss: 0.15094715356826782 2023-01-22 10:07:21.786156: step: 724/463, loss: 1.1056698560714722 2023-01-22 10:07:22.431988: step: 726/463, loss: 0.5388104319572449 2023-01-22 10:07:23.057426: step: 728/463, loss: 0.1357012391090393 2023-01-22 10:07:23.696654: step: 730/463, loss: 0.5290142893791199 2023-01-22 10:07:24.348835: step: 732/463, loss: 0.23319855332374573 2023-01-22 10:07:24.958267: step: 734/463, loss: 0.8086882829666138 2023-01-22 10:07:25.553941: step: 736/463, loss: 1.1085156202316284 2023-01-22 10:07:26.157667: step: 738/463, loss: 0.16713033616542816 2023-01-22 10:07:26.809117: step: 740/463, loss: 0.3009839653968811 2023-01-22 10:07:27.394455: step: 742/463, loss: 0.2579971253871918 2023-01-22 10:07:28.122979: step: 744/463, loss: 0.3805778920650482 2023-01-22 10:07:28.794968: step: 746/463, loss: 0.08140489459037781 2023-01-22 10:07:29.454201: step: 748/463, loss: 0.4339982867240906 2023-01-22 10:07:30.047873: step: 750/463, loss: 0.10837500542402267 2023-01-22 10:07:30.652177: step: 752/463, loss: 4.3671064376831055 2023-01-22 10:07:31.273819: step: 754/463, loss: 1.4679443836212158 2023-01-22 10:07:31.830137: step: 756/463, loss: 0.1418849527835846 2023-01-22 10:07:32.505131: step: 758/463, loss: 0.3023754060268402 2023-01-22 10:07:33.039450: step: 760/463, loss: 0.24818727374076843 2023-01-22 10:07:33.674560: step: 762/463, loss: 0.26257655024528503 2023-01-22 10:07:34.333086: step: 764/463, loss: 0.46585485339164734 2023-01-22 10:07:35.036792: step: 766/463, loss: 0.4479946792125702 2023-01-22 10:07:35.733251: step: 768/463, loss: 1.1319674253463745 2023-01-22 10:07:36.395895: step: 770/463, loss: 0.2650575637817383 2023-01-22 10:07:36.996947: step: 772/463, loss: 0.1902674436569214 2023-01-22 10:07:37.614129: step: 774/463, loss: 0.32330140471458435 2023-01-22 10:07:38.255057: step: 776/463, loss: 0.5427892208099365 2023-01-22 10:07:38.961781: step: 778/463, loss: 0.4061028063297272 2023-01-22 10:07:39.550181: step: 780/463, loss: 0.5603631734848022 2023-01-22 10:07:40.165269: step: 782/463, loss: 0.17620758712291718 2023-01-22 10:07:40.779481: step: 784/463, loss: 0.5316905379295349 2023-01-22 10:07:41.353690: step: 786/463, loss: 0.7653321623802185 2023-01-22 10:07:41.930629: step: 788/463, loss: 0.3327375054359436 2023-01-22 10:07:42.577962: step: 790/463, loss: 0.2217465490102768 2023-01-22 10:07:43.239438: step: 792/463, loss: 0.2062823325395584 2023-01-22 10:07:43.845275: step: 794/463, loss: 2.1143057346343994 2023-01-22 10:07:44.482076: step: 796/463, loss: 1.028924584388733 2023-01-22 10:07:45.109783: step: 798/463, loss: 0.24175474047660828 2023-01-22 10:07:45.741479: step: 800/463, loss: 0.3310534954071045 2023-01-22 10:07:46.396630: step: 802/463, loss: 0.17518888413906097 2023-01-22 10:07:47.004247: step: 804/463, loss: 0.5500239729881287 2023-01-22 10:07:47.599901: step: 806/463, loss: 0.43291524052619934 2023-01-22 10:07:48.209993: step: 808/463, loss: 0.32473257184028625 2023-01-22 10:07:48.797972: step: 810/463, loss: 0.18057170510292053 2023-01-22 10:07:49.454570: step: 812/463, loss: 0.5960824489593506 2023-01-22 10:07:50.102907: step: 814/463, loss: 0.6424495577812195 2023-01-22 10:07:50.718593: step: 816/463, loss: 0.10353157669305801 2023-01-22 10:07:51.381050: step: 818/463, loss: 1.0949581861495972 2023-01-22 10:07:51.950268: step: 820/463, loss: 0.21636156737804413 2023-01-22 10:07:52.572959: step: 822/463, loss: 0.5443524718284607 2023-01-22 10:07:53.261765: step: 824/463, loss: 0.14077365398406982 2023-01-22 10:07:53.815351: step: 826/463, loss: 0.32151108980178833 2023-01-22 10:07:54.437395: step: 828/463, loss: 0.23523424565792084 2023-01-22 10:07:55.069825: step: 830/463, loss: 0.5430747866630554 2023-01-22 10:07:55.700308: step: 832/463, loss: 0.25517770648002625 2023-01-22 10:07:56.270440: step: 834/463, loss: 1.2619940042495728 2023-01-22 10:07:56.796265: step: 836/463, loss: 0.27874496579170227 2023-01-22 10:07:57.426370: step: 838/463, loss: 0.16189457476139069 2023-01-22 10:07:57.992015: step: 840/463, loss: 0.20547501742839813 2023-01-22 10:07:58.562565: step: 842/463, loss: 0.23212389647960663 2023-01-22 10:07:59.116285: step: 844/463, loss: 0.22806406021118164 2023-01-22 10:07:59.693412: step: 846/463, loss: 1.722379207611084 2023-01-22 10:08:00.342240: step: 848/463, loss: 0.5067812204360962 2023-01-22 10:08:00.955377: step: 850/463, loss: 0.9827410578727722 2023-01-22 10:08:01.638809: step: 852/463, loss: 0.2934538722038269 2023-01-22 10:08:02.257082: step: 854/463, loss: 0.4790877401828766 2023-01-22 10:08:02.921070: step: 856/463, loss: 0.19760249555110931 2023-01-22 10:08:03.569334: step: 858/463, loss: 0.5390596985816956 2023-01-22 10:08:04.194997: step: 860/463, loss: 0.25786659121513367 2023-01-22 10:08:04.795354: step: 862/463, loss: 0.31573036313056946 2023-01-22 10:08:05.403625: step: 864/463, loss: 0.19822876155376434 2023-01-22 10:08:06.036612: step: 866/463, loss: 0.7719884514808655 2023-01-22 10:08:06.673026: step: 868/463, loss: 0.21586428582668304 2023-01-22 10:08:07.335768: step: 870/463, loss: 0.19002723693847656 2023-01-22 10:08:07.951519: step: 872/463, loss: 0.2642134130001068 2023-01-22 10:08:08.534460: step: 874/463, loss: 0.3830374479293823 2023-01-22 10:08:09.214658: step: 876/463, loss: 1.1521027088165283 2023-01-22 10:08:09.851527: step: 878/463, loss: 0.0997028797864914 2023-01-22 10:08:10.485643: step: 880/463, loss: 0.15445277094841003 2023-01-22 10:08:11.102450: step: 882/463, loss: 1.9089767932891846 2023-01-22 10:08:11.745176: step: 884/463, loss: 0.11428172141313553 2023-01-22 10:08:12.342929: step: 886/463, loss: 0.5019733309745789 2023-01-22 10:08:12.961771: step: 888/463, loss: 0.3502446413040161 2023-01-22 10:08:13.571043: step: 890/463, loss: 0.28718674182891846 2023-01-22 10:08:14.181295: step: 892/463, loss: 0.10633283108472824 2023-01-22 10:08:14.810246: step: 894/463, loss: 0.26933473348617554 2023-01-22 10:08:15.467354: step: 896/463, loss: 0.1510273665189743 2023-01-22 10:08:16.075103: step: 898/463, loss: 0.2200581133365631 2023-01-22 10:08:16.728643: step: 900/463, loss: 0.6008175611495972 2023-01-22 10:08:17.304492: step: 902/463, loss: 0.23193147778511047 2023-01-22 10:08:17.936468: step: 904/463, loss: 0.21933698654174805 2023-01-22 10:08:18.557674: step: 906/463, loss: 0.795813262462616 2023-01-22 10:08:19.159567: step: 908/463, loss: 0.5112689137458801 2023-01-22 10:08:19.859537: step: 910/463, loss: 0.3328273594379425 2023-01-22 10:08:20.426878: step: 912/463, loss: 0.4828084111213684 2023-01-22 10:08:21.009580: step: 914/463, loss: 0.140917107462883 2023-01-22 10:08:21.609824: step: 916/463, loss: 0.8214280605316162 2023-01-22 10:08:22.324762: step: 918/463, loss: 0.4063258767127991 2023-01-22 10:08:22.953599: step: 920/463, loss: 0.1763933002948761 2023-01-22 10:08:23.577958: step: 922/463, loss: 0.6062476634979248 2023-01-22 10:08:24.245008: step: 924/463, loss: 0.3227226436138153 2023-01-22 10:08:24.861948: step: 926/463, loss: 0.3610214293003082 ================================================== Loss: 0.462 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3105357435021661, 'r': 0.3264455444026566, 'f1': 0.318291955412026}, 'combined': 0.2345309145141244, 'epoch': 8} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.327225896009555, 'r': 0.4027164290044064, 'f1': 0.36106755233357574}, 'combined': 0.2798705429571257, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.280285083726824, 'r': 0.3478300659532123, 'f1': 0.310425816693214}, 'combined': 0.22873481230026294, 'epoch': 8} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.31131974333667495, 'r': 0.40603190789957877, 'f1': 0.35242338715176846}, 'combined': 0.2731702809501746, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2848234553046372, 'r': 0.3426528855088045, 'f1': 0.31107333447569335}, 'combined': 0.22921193066630036, 'epoch': 8} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.31787501828469555, 'r': 0.3970516083170048, 'f1': 0.3530789945638752}, 'combined': 0.2736784551164966, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.34285714285714286, 'f1': 0.2891566265060241}, 'combined': 0.19277108433734938, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2916666666666667, 'r': 0.45652173913043476, 'f1': 0.35593220338983056}, 'combined': 0.17796610169491528, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2840909090909091, 'r': 0.21551724137931033, 'f1': 0.2450980392156863}, 'combined': 0.16339869281045752, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29855383301254707, 'r': 0.34444161379815674, 'f1': 0.3198603180116804}, 'combined': 0.23568655011386977, 'epoch': 7} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.30988948788864984, 'r': 0.3867922100668993, 'f1': 0.34409642236532006}, 'combined': 0.2667158871922577, 'epoch': 7} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2636363636363636, 'r': 0.4142857142857143, 'f1': 0.3222222222222222}, 'combined': 0.2148148148148148, 'epoch': 7} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.294349755002927, 'r': 0.3345645222898544, 'f1': 0.3131714089640378}, 'combined': 0.23075788028929098, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3358950473035951, 'r': 0.33466466251493726, 'f1': 0.3352787261158821}, 'combined': 0.25988111785058804, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28012957534441907, 'r': 0.3140846753861668, 'f1': 0.29613697964981445}, 'combined': 0.21820619553144222, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3064842101133353, 'r': 0.3585977936344447, 'f1': 0.3304992795207758}, 'combined': 0.2561764750352425, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.23275862068965517, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 5} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:11:04.128286: step: 2/463, loss: 0.17077802121639252 2023-01-22 10:11:04.832611: step: 4/463, loss: 0.2563360929489136 2023-01-22 10:11:05.469902: step: 6/463, loss: 0.19956116378307343 2023-01-22 10:11:06.144201: step: 8/463, loss: 0.17916664481163025 2023-01-22 10:11:06.754266: step: 10/463, loss: 0.4735904932022095 2023-01-22 10:11:07.412740: step: 12/463, loss: 0.234725221991539 2023-01-22 10:11:08.013007: step: 14/463, loss: 1.0785027742385864 2023-01-22 10:11:08.612330: step: 16/463, loss: 0.05499253422021866 2023-01-22 10:11:09.250812: step: 18/463, loss: 0.10897567868232727 2023-01-22 10:11:09.895316: step: 20/463, loss: 1.1164591312408447 2023-01-22 10:11:10.480976: step: 22/463, loss: 0.21747346222400665 2023-01-22 10:11:11.058319: step: 24/463, loss: 0.18439310789108276 2023-01-22 10:11:11.641436: step: 26/463, loss: 0.2679608166217804 2023-01-22 10:11:12.324096: step: 28/463, loss: 0.28326791524887085 2023-01-22 10:11:12.931684: step: 30/463, loss: 0.11695236712694168 2023-01-22 10:11:13.584380: step: 32/463, loss: 0.1910436898469925 2023-01-22 10:11:14.221242: step: 34/463, loss: 0.13600853085517883 2023-01-22 10:11:14.870742: step: 36/463, loss: 0.43715304136276245 2023-01-22 10:11:15.372848: step: 38/463, loss: 0.05087998881936073 2023-01-22 10:11:15.987694: step: 40/463, loss: 0.6077514886856079 2023-01-22 10:11:16.598093: step: 42/463, loss: 0.20853185653686523 2023-01-22 10:11:17.212373: step: 44/463, loss: 0.09062162786722183 2023-01-22 10:11:17.796571: step: 46/463, loss: 0.08322703838348389 2023-01-22 10:11:18.438390: step: 48/463, loss: 0.38514578342437744 2023-01-22 10:11:19.104793: step: 50/463, loss: 0.23162110149860382 2023-01-22 10:11:19.672368: step: 52/463, loss: 0.23460164666175842 2023-01-22 10:11:20.309802: step: 54/463, loss: 0.33674460649490356 2023-01-22 10:11:20.917104: step: 56/463, loss: 0.5197405815124512 2023-01-22 10:11:21.502489: step: 58/463, loss: 0.2500210106372833 2023-01-22 10:11:22.102801: step: 60/463, loss: 1.7277462482452393 2023-01-22 10:11:22.694427: step: 62/463, loss: 0.3194347023963928 2023-01-22 10:11:23.323911: step: 64/463, loss: 0.11162091046571732 2023-01-22 10:11:23.977000: step: 66/463, loss: 0.060975331813097 2023-01-22 10:11:24.536418: step: 68/463, loss: 0.2567768394947052 2023-01-22 10:11:25.156685: step: 70/463, loss: 0.4143083095550537 2023-01-22 10:11:25.770976: step: 72/463, loss: 0.32424283027648926 2023-01-22 10:11:26.378629: step: 74/463, loss: 0.26578614115715027 2023-01-22 10:11:26.967176: step: 76/463, loss: 0.14340151846408844 2023-01-22 10:11:27.613226: step: 78/463, loss: 0.4129473567008972 2023-01-22 10:11:28.189520: step: 80/463, loss: 0.20089292526245117 2023-01-22 10:11:28.766269: step: 82/463, loss: 0.2243395745754242 2023-01-22 10:11:29.371798: step: 84/463, loss: 0.10392922163009644 2023-01-22 10:11:30.037033: step: 86/463, loss: 0.5979198217391968 2023-01-22 10:11:30.711571: step: 88/463, loss: 0.3399348258972168 2023-01-22 10:11:31.327939: step: 90/463, loss: 0.536894679069519 2023-01-22 10:11:32.003546: step: 92/463, loss: 0.08235007524490356 2023-01-22 10:11:32.653354: step: 94/463, loss: 0.21865396201610565 2023-01-22 10:11:33.265647: step: 96/463, loss: 0.3912118077278137 2023-01-22 10:11:33.837992: step: 98/463, loss: 0.13150645792484283 2023-01-22 10:11:34.438319: step: 100/463, loss: 0.25504016876220703 2023-01-22 10:11:35.087087: step: 102/463, loss: 0.365493506193161 2023-01-22 10:11:35.724685: step: 104/463, loss: 0.5118498802185059 2023-01-22 10:11:36.306512: step: 106/463, loss: 0.18823763728141785 2023-01-22 10:11:36.926054: step: 108/463, loss: 0.19413326680660248 2023-01-22 10:11:37.556047: step: 110/463, loss: 0.24084702134132385 2023-01-22 10:11:38.156375: step: 112/463, loss: 0.6102190017700195 2023-01-22 10:11:38.779162: step: 114/463, loss: 1.2868348360061646 2023-01-22 10:11:39.391787: step: 116/463, loss: 0.17565158009529114 2023-01-22 10:11:40.044019: step: 118/463, loss: 0.9952040314674377 2023-01-22 10:11:40.634715: step: 120/463, loss: 0.07152537256479263 2023-01-22 10:11:41.282391: step: 122/463, loss: 0.3385022282600403 2023-01-22 10:11:41.875220: step: 124/463, loss: 0.23959952592849731 2023-01-22 10:11:42.499214: step: 126/463, loss: 0.16376827657222748 2023-01-22 10:11:43.097159: step: 128/463, loss: 0.17524555325508118 2023-01-22 10:11:43.750680: step: 130/463, loss: 0.21081186830997467 2023-01-22 10:11:44.362528: step: 132/463, loss: 0.15743355453014374 2023-01-22 10:11:44.984086: step: 134/463, loss: 0.2795874774456024 2023-01-22 10:11:45.637618: step: 136/463, loss: 0.7767999768257141 2023-01-22 10:11:46.271420: step: 138/463, loss: 0.7737017273902893 2023-01-22 10:11:46.997474: step: 140/463, loss: 0.34982675313949585 2023-01-22 10:11:47.629641: step: 142/463, loss: 0.2001238316297531 2023-01-22 10:11:48.219998: step: 144/463, loss: 0.2352839708328247 2023-01-22 10:11:48.880691: step: 146/463, loss: 0.22720541059970856 2023-01-22 10:11:49.582914: step: 148/463, loss: 0.35505396127700806 2023-01-22 10:11:50.163955: step: 150/463, loss: 0.35628750920295715 2023-01-22 10:11:50.708160: step: 152/463, loss: 0.9582790732383728 2023-01-22 10:11:51.332194: step: 154/463, loss: 0.10442464053630829 2023-01-22 10:11:51.958764: step: 156/463, loss: 0.23777472972869873 2023-01-22 10:11:52.587434: step: 158/463, loss: 0.497800350189209 2023-01-22 10:11:53.141866: step: 160/463, loss: 0.32195955514907837 2023-01-22 10:11:53.770811: step: 162/463, loss: 1.4129806756973267 2023-01-22 10:11:54.337425: step: 164/463, loss: 0.36605462431907654 2023-01-22 10:11:55.063258: step: 166/463, loss: 0.3789929449558258 2023-01-22 10:11:55.634390: step: 168/463, loss: 0.40258800983428955 2023-01-22 10:11:56.223094: step: 170/463, loss: 0.9423494935035706 2023-01-22 10:11:56.866945: step: 172/463, loss: 0.34803786873817444 2023-01-22 10:11:57.501813: step: 174/463, loss: 0.3194010555744171 2023-01-22 10:11:58.157452: step: 176/463, loss: 1.1413010358810425 2023-01-22 10:11:58.776388: step: 178/463, loss: 0.16474317014217377 2023-01-22 10:11:59.380375: step: 180/463, loss: 0.2473813146352768 2023-01-22 10:12:00.056598: step: 182/463, loss: 0.2337036281824112 2023-01-22 10:12:00.761090: step: 184/463, loss: 0.2663898169994354 2023-01-22 10:12:01.331727: step: 186/463, loss: 0.3031774163246155 2023-01-22 10:12:02.083752: step: 188/463, loss: 0.6257765293121338 2023-01-22 10:12:02.684188: step: 190/463, loss: 0.10745250433683395 2023-01-22 10:12:03.271449: step: 192/463, loss: 0.8570696711540222 2023-01-22 10:12:03.895888: step: 194/463, loss: 0.7822129130363464 2023-01-22 10:12:04.528345: step: 196/463, loss: 0.6533249616622925 2023-01-22 10:12:05.235602: step: 198/463, loss: 0.24035483598709106 2023-01-22 10:12:05.940173: step: 200/463, loss: 1.2772352695465088 2023-01-22 10:12:06.570127: step: 202/463, loss: 0.1282467544078827 2023-01-22 10:12:07.156064: step: 204/463, loss: 0.32942572236061096 2023-01-22 10:12:07.851738: step: 206/463, loss: 0.2963917553424835 2023-01-22 10:12:08.493184: step: 208/463, loss: 0.24848397076129913 2023-01-22 10:12:09.089961: step: 210/463, loss: 0.08796899765729904 2023-01-22 10:12:09.679984: step: 212/463, loss: 0.18135881423950195 2023-01-22 10:12:10.296047: step: 214/463, loss: 0.16503803431987762 2023-01-22 10:12:10.957113: step: 216/463, loss: 0.564280092716217 2023-01-22 10:12:11.550973: step: 218/463, loss: 0.6624785661697388 2023-01-22 10:12:12.159731: step: 220/463, loss: 0.12074249237775803 2023-01-22 10:12:12.823678: step: 222/463, loss: 0.23013147711753845 2023-01-22 10:12:13.468313: step: 224/463, loss: 0.7275635004043579 2023-01-22 10:12:14.083452: step: 226/463, loss: 0.3013765811920166 2023-01-22 10:12:14.729312: step: 228/463, loss: 0.1642383337020874 2023-01-22 10:12:15.403856: step: 230/463, loss: 0.2057809680700302 2023-01-22 10:12:16.014943: step: 232/463, loss: 0.5123519897460938 2023-01-22 10:12:16.644681: step: 234/463, loss: 0.14983588457107544 2023-01-22 10:12:17.274022: step: 236/463, loss: 0.7289428114891052 2023-01-22 10:12:17.857389: step: 238/463, loss: 0.30941131711006165 2023-01-22 10:12:18.498423: step: 240/463, loss: 0.6629010438919067 2023-01-22 10:12:19.136240: step: 242/463, loss: 0.6024523973464966 2023-01-22 10:12:19.785861: step: 244/463, loss: 0.2660287320613861 2023-01-22 10:12:20.367825: step: 246/463, loss: 0.4353945255279541 2023-01-22 10:12:21.022879: step: 248/463, loss: 0.3672528564929962 2023-01-22 10:12:21.658377: step: 250/463, loss: 0.15033632516860962 2023-01-22 10:12:22.211029: step: 252/463, loss: 0.33095505833625793 2023-01-22 10:12:22.794634: step: 254/463, loss: 0.2627013623714447 2023-01-22 10:12:23.405197: step: 256/463, loss: 0.22009465098381042 2023-01-22 10:12:24.036804: step: 258/463, loss: 0.17957089841365814 2023-01-22 10:12:24.638388: step: 260/463, loss: 0.19365763664245605 2023-01-22 10:12:25.258312: step: 262/463, loss: 0.4233319163322449 2023-01-22 10:12:25.866948: step: 264/463, loss: 0.3395460247993469 2023-01-22 10:12:26.474444: step: 266/463, loss: 0.18502365052700043 2023-01-22 10:12:27.097867: step: 268/463, loss: 0.16486413776874542 2023-01-22 10:12:27.702038: step: 270/463, loss: 0.12008094042539597 2023-01-22 10:12:28.389436: step: 272/463, loss: 0.33829760551452637 2023-01-22 10:12:29.074060: step: 274/463, loss: 0.25912705063819885 2023-01-22 10:12:29.712759: step: 276/463, loss: 0.16830359399318695 2023-01-22 10:12:30.320800: step: 278/463, loss: 0.5848151445388794 2023-01-22 10:12:30.987930: step: 280/463, loss: 0.6665197014808655 2023-01-22 10:12:31.614943: step: 282/463, loss: 0.31737908720970154 2023-01-22 10:12:32.258422: step: 284/463, loss: 0.3111627399921417 2023-01-22 10:12:32.818763: step: 286/463, loss: 0.11487477272748947 2023-01-22 10:12:33.424277: step: 288/463, loss: 0.26367437839508057 2023-01-22 10:12:34.000465: step: 290/463, loss: 0.21586939692497253 2023-01-22 10:12:34.593154: step: 292/463, loss: 0.3515869081020355 2023-01-22 10:12:35.202362: step: 294/463, loss: 0.1616506278514862 2023-01-22 10:12:35.758813: step: 296/463, loss: 0.24998968839645386 2023-01-22 10:12:36.328243: step: 298/463, loss: 0.23901832103729248 2023-01-22 10:12:36.928632: step: 300/463, loss: 0.25040459632873535 2023-01-22 10:12:37.572906: step: 302/463, loss: 0.6697432398796082 2023-01-22 10:12:38.262947: step: 304/463, loss: 0.14914096891880035 2023-01-22 10:12:38.864056: step: 306/463, loss: 0.9890682697296143 2023-01-22 10:12:39.457805: step: 308/463, loss: 0.543837308883667 2023-01-22 10:12:40.033157: step: 310/463, loss: 0.20534953474998474 2023-01-22 10:12:40.728250: step: 312/463, loss: 0.25874242186546326 2023-01-22 10:12:41.310863: step: 314/463, loss: 0.7873653769493103 2023-01-22 10:12:41.916082: step: 316/463, loss: 0.627815842628479 2023-01-22 10:12:42.471538: step: 318/463, loss: 0.15859894454479218 2023-01-22 10:12:43.102542: step: 320/463, loss: 1.783982515335083 2023-01-22 10:12:43.734611: step: 322/463, loss: 0.3193933069705963 2023-01-22 10:12:44.284911: step: 324/463, loss: 0.12599636614322662 2023-01-22 10:12:44.974863: step: 326/463, loss: 0.40168797969818115 2023-01-22 10:12:45.588912: step: 328/463, loss: 0.27117809653282166 2023-01-22 10:12:46.218765: step: 330/463, loss: 0.676965594291687 2023-01-22 10:12:46.876281: step: 332/463, loss: 1.0799967050552368 2023-01-22 10:12:47.516279: step: 334/463, loss: 0.32789263129234314 2023-01-22 10:12:48.137926: step: 336/463, loss: 0.38837581872940063 2023-01-22 10:12:48.739080: step: 338/463, loss: 0.12659011781215668 2023-01-22 10:12:49.480881: step: 340/463, loss: 0.3912752568721771 2023-01-22 10:12:50.087416: step: 342/463, loss: 0.14781580865383148 2023-01-22 10:12:50.682937: step: 344/463, loss: 0.2638659179210663 2023-01-22 10:12:51.282370: step: 346/463, loss: 0.09076520055532455 2023-01-22 10:12:51.869191: step: 348/463, loss: 0.26640796661376953 2023-01-22 10:12:52.479934: step: 350/463, loss: 0.22564326226711273 2023-01-22 10:12:53.067136: step: 352/463, loss: 0.13490281999111176 2023-01-22 10:12:53.685637: step: 354/463, loss: 0.4779565632343292 2023-01-22 10:12:54.283394: step: 356/463, loss: 0.13475587964057922 2023-01-22 10:12:54.864057: step: 358/463, loss: 0.26774728298187256 2023-01-22 10:12:55.467662: step: 360/463, loss: 0.7304152250289917 2023-01-22 10:12:56.080100: step: 362/463, loss: 0.28829339146614075 2023-01-22 10:12:56.686937: step: 364/463, loss: 0.21928000450134277 2023-01-22 10:12:57.284654: step: 366/463, loss: 0.4275215268135071 2023-01-22 10:12:57.903590: step: 368/463, loss: 0.6500132083892822 2023-01-22 10:12:58.445689: step: 370/463, loss: 0.1395910084247589 2023-01-22 10:12:59.053106: step: 372/463, loss: 0.32921189069747925 2023-01-22 10:12:59.676415: step: 374/463, loss: 0.1203937977552414 2023-01-22 10:13:00.277679: step: 376/463, loss: 0.5926775336265564 2023-01-22 10:13:00.996950: step: 378/463, loss: 0.4914388060569763 2023-01-22 10:13:01.627067: step: 380/463, loss: 0.33103182911872864 2023-01-22 10:13:02.243666: step: 382/463, loss: 0.1833011507987976 2023-01-22 10:13:02.816575: step: 384/463, loss: 0.17184047400951385 2023-01-22 10:13:03.400212: step: 386/463, loss: 0.08801168203353882 2023-01-22 10:13:04.020356: step: 388/463, loss: 0.12636591494083405 2023-01-22 10:13:04.600942: step: 390/463, loss: 0.15098901093006134 2023-01-22 10:13:05.226387: step: 392/463, loss: 0.15603704750537872 2023-01-22 10:13:05.786252: step: 394/463, loss: 0.1481577754020691 2023-01-22 10:13:06.398545: step: 396/463, loss: 0.37629637122154236 2023-01-22 10:13:07.026641: step: 398/463, loss: 0.29679200053215027 2023-01-22 10:13:07.623062: step: 400/463, loss: 0.05918826162815094 2023-01-22 10:13:08.357264: step: 402/463, loss: 0.7189347147941589 2023-01-22 10:13:09.024893: step: 404/463, loss: 0.149513840675354 2023-01-22 10:13:09.712414: step: 406/463, loss: 0.1751466691493988 2023-01-22 10:13:10.361768: step: 408/463, loss: 0.12888486683368683 2023-01-22 10:13:10.955790: step: 410/463, loss: 0.06725501269102097 2023-01-22 10:13:11.556376: step: 412/463, loss: 0.46183106303215027 2023-01-22 10:13:12.147565: step: 414/463, loss: 0.5635194778442383 2023-01-22 10:13:12.714095: step: 416/463, loss: 0.12534135580062866 2023-01-22 10:13:13.390101: step: 418/463, loss: 0.10064512491226196 2023-01-22 10:13:14.024342: step: 420/463, loss: 0.2672666609287262 2023-01-22 10:13:14.664661: step: 422/463, loss: 0.1969425082206726 2023-01-22 10:13:15.242393: step: 424/463, loss: 0.493498831987381 2023-01-22 10:13:15.820480: step: 426/463, loss: 0.31862872838974 2023-01-22 10:13:16.400324: step: 428/463, loss: 0.14827391505241394 2023-01-22 10:13:17.019154: step: 430/463, loss: 0.11653280258178711 2023-01-22 10:13:17.625784: step: 432/463, loss: 0.6190656423568726 2023-01-22 10:13:18.238902: step: 434/463, loss: 0.5870165824890137 2023-01-22 10:13:18.808437: step: 436/463, loss: 0.12024692445993423 2023-01-22 10:13:19.397916: step: 438/463, loss: 0.12173856794834137 2023-01-22 10:13:20.013276: step: 440/463, loss: 0.42035773396492004 2023-01-22 10:13:20.640886: step: 442/463, loss: 0.23534147441387177 2023-01-22 10:13:21.283978: step: 444/463, loss: 0.18608425557613373 2023-01-22 10:13:21.934382: step: 446/463, loss: 0.09951744973659515 2023-01-22 10:13:22.557748: step: 448/463, loss: 0.3251801133155823 2023-01-22 10:13:23.172711: step: 450/463, loss: 0.1532084196805954 2023-01-22 10:13:23.782748: step: 452/463, loss: 0.18904155492782593 2023-01-22 10:13:24.388502: step: 454/463, loss: 0.3416127860546112 2023-01-22 10:13:24.983697: step: 456/463, loss: 0.13285750150680542 2023-01-22 10:13:25.585852: step: 458/463, loss: 0.38581329584121704 2023-01-22 10:13:26.233501: step: 460/463, loss: 0.07741407305002213 2023-01-22 10:13:26.824140: step: 462/463, loss: 0.1496533453464508 2023-01-22 10:13:27.427120: step: 464/463, loss: 0.4598150849342346 2023-01-22 10:13:28.068609: step: 466/463, loss: 0.2529575228691101 2023-01-22 10:13:28.751623: step: 468/463, loss: 0.15284910798072815 2023-01-22 10:13:29.368670: step: 470/463, loss: 0.7500278949737549 2023-01-22 10:13:30.039721: step: 472/463, loss: 0.4187954366207123 2023-01-22 10:13:30.614871: step: 474/463, loss: 0.1004641056060791 2023-01-22 10:13:31.258419: step: 476/463, loss: 0.2837781012058258 2023-01-22 10:13:31.845640: step: 478/463, loss: 0.234932079911232 2023-01-22 10:13:32.458580: step: 480/463, loss: 0.14485178887844086 2023-01-22 10:13:33.078538: step: 482/463, loss: 0.8474454283714294 2023-01-22 10:13:33.697333: step: 484/463, loss: 0.04368195682764053 2023-01-22 10:13:34.307530: step: 486/463, loss: 0.45092302560806274 2023-01-22 10:13:34.879701: step: 488/463, loss: 0.403274267911911 2023-01-22 10:13:35.497593: step: 490/463, loss: 0.07356066256761551 2023-01-22 10:13:36.183459: step: 492/463, loss: 1.185387134552002 2023-01-22 10:13:36.779119: step: 494/463, loss: 0.8510439395904541 2023-01-22 10:13:37.451274: step: 496/463, loss: 0.34438633918762207 2023-01-22 10:13:38.055481: step: 498/463, loss: 0.5409513115882874 2023-01-22 10:13:38.707902: step: 500/463, loss: 0.5065198540687561 2023-01-22 10:13:39.259505: step: 502/463, loss: 0.1975422203540802 2023-01-22 10:13:39.904582: step: 504/463, loss: 0.5251680612564087 2023-01-22 10:13:40.538829: step: 506/463, loss: 0.2176342010498047 2023-01-22 10:13:41.139361: step: 508/463, loss: 0.28560739755630493 2023-01-22 10:13:41.743017: step: 510/463, loss: 0.9183048009872437 2023-01-22 10:13:42.368327: step: 512/463, loss: 0.24042105674743652 2023-01-22 10:13:43.007122: step: 514/463, loss: 0.488264262676239 2023-01-22 10:13:43.596126: step: 516/463, loss: 0.4155459403991699 2023-01-22 10:13:44.168586: step: 518/463, loss: 0.5750282406806946 2023-01-22 10:13:44.774119: step: 520/463, loss: 0.09353403747081757 2023-01-22 10:13:45.431453: step: 522/463, loss: 0.5741830468177795 2023-01-22 10:13:46.176932: step: 524/463, loss: 0.5304901599884033 2023-01-22 10:13:46.768188: step: 526/463, loss: 0.11579668521881104 2023-01-22 10:13:47.406485: step: 528/463, loss: 0.806922435760498 2023-01-22 10:13:48.022985: step: 530/463, loss: 0.13459539413452148 2023-01-22 10:13:48.648908: step: 532/463, loss: 0.9403744339942932 2023-01-22 10:13:49.256609: step: 534/463, loss: 0.32428431510925293 2023-01-22 10:13:49.991261: step: 536/463, loss: 1.3836514949798584 2023-01-22 10:13:50.706712: step: 538/463, loss: 0.1800803244113922 2023-01-22 10:13:51.358781: step: 540/463, loss: 0.11934054642915726 2023-01-22 10:13:51.997394: step: 542/463, loss: 0.458288311958313 2023-01-22 10:13:52.548793: step: 544/463, loss: 0.3857291340827942 2023-01-22 10:13:53.095280: step: 546/463, loss: 0.06559847295284271 2023-01-22 10:13:53.753671: step: 548/463, loss: 0.4696250855922699 2023-01-22 10:13:54.459776: step: 550/463, loss: 0.24462640285491943 2023-01-22 10:13:55.047632: step: 552/463, loss: 0.39239171147346497 2023-01-22 10:13:55.757673: step: 554/463, loss: 0.8460155129432678 2023-01-22 10:13:56.385499: step: 556/463, loss: 0.1752811223268509 2023-01-22 10:13:57.007567: step: 558/463, loss: 0.3194790184497833 2023-01-22 10:13:57.746257: step: 560/463, loss: 0.21365021169185638 2023-01-22 10:13:58.377314: step: 562/463, loss: 0.22283637523651123 2023-01-22 10:13:58.989500: step: 564/463, loss: 0.8638373613357544 2023-01-22 10:13:59.591336: step: 566/463, loss: 0.17956224083900452 2023-01-22 10:14:00.239892: step: 568/463, loss: 0.2995205819606781 2023-01-22 10:14:00.908060: step: 570/463, loss: 0.4693411588668823 2023-01-22 10:14:01.526803: step: 572/463, loss: 0.0845862329006195 2023-01-22 10:14:02.187169: step: 574/463, loss: 0.06597862392663956 2023-01-22 10:14:02.759480: step: 576/463, loss: 0.19842566549777985 2023-01-22 10:14:03.391890: step: 578/463, loss: 0.17455452680587769 2023-01-22 10:14:03.990663: step: 580/463, loss: 0.15945930778980255 2023-01-22 10:14:04.629235: step: 582/463, loss: 0.3631148040294647 2023-01-22 10:14:05.146694: step: 584/463, loss: 0.3113175928592682 2023-01-22 10:14:05.822311: step: 586/463, loss: 0.13008259236812592 2023-01-22 10:14:06.489143: step: 588/463, loss: 0.18816684186458588 2023-01-22 10:14:07.091461: step: 590/463, loss: 0.6339176297187805 2023-01-22 10:14:07.722165: step: 592/463, loss: 0.12655837833881378 2023-01-22 10:14:08.405013: step: 594/463, loss: 0.12447237968444824 2023-01-22 10:14:09.051933: step: 596/463, loss: 0.23655134439468384 2023-01-22 10:14:09.670606: step: 598/463, loss: 0.270586758852005 2023-01-22 10:14:10.238852: step: 600/463, loss: 0.7481467127799988 2023-01-22 10:14:10.845172: step: 602/463, loss: 1.033585548400879 2023-01-22 10:14:11.439515: step: 604/463, loss: 0.17560365796089172 2023-01-22 10:14:12.040450: step: 606/463, loss: 0.2297007441520691 2023-01-22 10:14:12.732817: step: 608/463, loss: 0.34454545378685 2023-01-22 10:14:13.340799: step: 610/463, loss: 0.1451507806777954 2023-01-22 10:14:14.042102: step: 612/463, loss: 0.18311670422554016 2023-01-22 10:14:14.667971: step: 614/463, loss: 0.22084030508995056 2023-01-22 10:14:15.306958: step: 616/463, loss: 0.23423704504966736 2023-01-22 10:14:15.979406: step: 618/463, loss: 0.5065959095954895 2023-01-22 10:14:16.636011: step: 620/463, loss: 0.2406112104654312 2023-01-22 10:14:17.240211: step: 622/463, loss: 0.21883827447891235 2023-01-22 10:14:17.861976: step: 624/463, loss: 0.5106756687164307 2023-01-22 10:14:18.412057: step: 626/463, loss: 0.13174429535865784 2023-01-22 10:14:19.068639: step: 628/463, loss: 0.08035876601934433 2023-01-22 10:14:19.683028: step: 630/463, loss: 0.19846908748149872 2023-01-22 10:14:20.307608: step: 632/463, loss: 0.1697191447019577 2023-01-22 10:14:20.978692: step: 634/463, loss: 0.7762958407402039 2023-01-22 10:14:21.661448: step: 636/463, loss: 0.2021734118461609 2023-01-22 10:14:22.285546: step: 638/463, loss: 0.6043620109558105 2023-01-22 10:14:22.921294: step: 640/463, loss: 0.2545052468776703 2023-01-22 10:14:23.569950: step: 642/463, loss: 0.852972686290741 2023-01-22 10:14:24.196871: step: 644/463, loss: 0.11844108998775482 2023-01-22 10:14:24.794841: step: 646/463, loss: 3.2565789222717285 2023-01-22 10:14:25.394678: step: 648/463, loss: 0.3924574851989746 2023-01-22 10:14:26.091327: step: 650/463, loss: 0.7701157331466675 2023-01-22 10:14:26.663397: step: 652/463, loss: 0.15826819837093353 2023-01-22 10:14:27.389987: step: 654/463, loss: 0.6228159666061401 2023-01-22 10:14:28.030706: step: 656/463, loss: 0.10164700448513031 2023-01-22 10:14:28.646105: step: 658/463, loss: 0.941778838634491 2023-01-22 10:14:29.305975: step: 660/463, loss: 0.40705472230911255 2023-01-22 10:14:29.945346: step: 662/463, loss: 0.19260768592357635 2023-01-22 10:14:30.555745: step: 664/463, loss: 0.5180705189704895 2023-01-22 10:14:31.199070: step: 666/463, loss: 0.3370892405509949 2023-01-22 10:14:31.801242: step: 668/463, loss: 0.4702625274658203 2023-01-22 10:14:32.397864: step: 670/463, loss: 0.2752023935317993 2023-01-22 10:14:33.072273: step: 672/463, loss: 1.0023704767227173 2023-01-22 10:14:33.669547: step: 674/463, loss: 0.26584145426750183 2023-01-22 10:14:34.295898: step: 676/463, loss: 0.2884277105331421 2023-01-22 10:14:34.909181: step: 678/463, loss: 0.17747734487056732 2023-01-22 10:14:35.536859: step: 680/463, loss: 0.5460485816001892 2023-01-22 10:14:36.129050: step: 682/463, loss: 0.20242799818515778 2023-01-22 10:14:36.728581: step: 684/463, loss: 0.11478574573993683 2023-01-22 10:14:37.490290: step: 686/463, loss: 0.5837790369987488 2023-01-22 10:14:38.122805: step: 688/463, loss: 0.24769118428230286 2023-01-22 10:14:38.759360: step: 690/463, loss: 0.08140033483505249 2023-01-22 10:14:39.411207: step: 692/463, loss: 0.20934681594371796 2023-01-22 10:14:40.022627: step: 694/463, loss: 0.12450077384710312 2023-01-22 10:14:40.674668: step: 696/463, loss: 0.9106389284133911 2023-01-22 10:14:41.377759: step: 698/463, loss: 1.0714139938354492 2023-01-22 10:14:41.985819: step: 700/463, loss: 0.18466581404209137 2023-01-22 10:14:42.616139: step: 702/463, loss: 0.45947933197021484 2023-01-22 10:14:43.247862: step: 704/463, loss: 0.3965025544166565 2023-01-22 10:14:43.855499: step: 706/463, loss: 0.22625593841075897 2023-01-22 10:14:44.488016: step: 708/463, loss: 0.11809322237968445 2023-01-22 10:14:45.159549: step: 710/463, loss: 0.30682238936424255 2023-01-22 10:14:45.791647: step: 712/463, loss: 0.11762760579586029 2023-01-22 10:14:46.397767: step: 714/463, loss: 0.16235385835170746 2023-01-22 10:14:47.025560: step: 716/463, loss: 0.3187079131603241 2023-01-22 10:14:47.593977: step: 718/463, loss: 0.4351699650287628 2023-01-22 10:14:48.235432: step: 720/463, loss: 0.3702765703201294 2023-01-22 10:14:48.852300: step: 722/463, loss: 0.337517648935318 2023-01-22 10:14:49.463160: step: 724/463, loss: 0.17862853407859802 2023-01-22 10:14:50.059236: step: 726/463, loss: 0.1480695605278015 2023-01-22 10:14:50.644924: step: 728/463, loss: 0.09627832472324371 2023-01-22 10:14:51.262738: step: 730/463, loss: 0.10118921846151352 2023-01-22 10:14:51.877944: step: 732/463, loss: 0.2089187353849411 2023-01-22 10:14:52.472452: step: 734/463, loss: 0.2085215449333191 2023-01-22 10:14:53.131257: step: 736/463, loss: 0.3316103518009186 2023-01-22 10:14:53.760066: step: 738/463, loss: 0.17901891469955444 2023-01-22 10:14:54.360984: step: 740/463, loss: 0.32212668657302856 2023-01-22 10:14:54.978473: step: 742/463, loss: 0.119500070810318 2023-01-22 10:14:55.602038: step: 744/463, loss: 0.10016391426324844 2023-01-22 10:14:56.230066: step: 746/463, loss: 0.4700267016887665 2023-01-22 10:14:56.909487: step: 748/463, loss: 0.10390250384807587 2023-01-22 10:14:57.658801: step: 750/463, loss: 0.2568435072898865 2023-01-22 10:14:58.310030: step: 752/463, loss: 0.2085363119840622 2023-01-22 10:14:59.045557: step: 754/463, loss: 0.03869916498661041 2023-01-22 10:14:59.649524: step: 756/463, loss: 0.4686168134212494 2023-01-22 10:15:00.158765: step: 758/463, loss: 0.4501277506351471 2023-01-22 10:15:00.759210: step: 760/463, loss: 0.08514165878295898 2023-01-22 10:15:01.397724: step: 762/463, loss: 0.16509270668029785 2023-01-22 10:15:01.974135: step: 764/463, loss: 0.5358824729919434 2023-01-22 10:15:02.632848: step: 766/463, loss: 0.34480440616607666 2023-01-22 10:15:03.288702: step: 768/463, loss: 0.211360901594162 2023-01-22 10:15:03.879948: step: 770/463, loss: 0.2957981824874878 2023-01-22 10:15:04.506430: step: 772/463, loss: 0.09436078369617462 2023-01-22 10:15:05.102533: step: 774/463, loss: 0.06603935360908508 2023-01-22 10:15:05.688036: step: 776/463, loss: 0.14114435017108917 2023-01-22 10:15:06.293495: step: 778/463, loss: 0.1795409768819809 2023-01-22 10:15:06.902262: step: 780/463, loss: 0.3366824984550476 2023-01-22 10:15:07.568908: step: 782/463, loss: 0.3289976418018341 2023-01-22 10:15:08.132902: step: 784/463, loss: 0.1650543510913849 2023-01-22 10:15:08.731491: step: 786/463, loss: 0.13097169995307922 2023-01-22 10:15:09.283076: step: 788/463, loss: 0.24702036380767822 2023-01-22 10:15:09.865548: step: 790/463, loss: 0.18632261455059052 2023-01-22 10:15:10.519328: step: 792/463, loss: 0.18176907300949097 2023-01-22 10:15:11.123212: step: 794/463, loss: 0.5092656016349792 2023-01-22 10:15:11.730798: step: 796/463, loss: 0.07761086523532867 2023-01-22 10:15:12.344910: step: 798/463, loss: 0.4100775122642517 2023-01-22 10:15:12.952438: step: 800/463, loss: 0.20849396288394928 2023-01-22 10:15:13.610549: step: 802/463, loss: 1.4022369384765625 2023-01-22 10:15:14.265046: step: 804/463, loss: 0.1023639440536499 2023-01-22 10:15:14.927120: step: 806/463, loss: 0.16859178245067596 2023-01-22 10:15:15.554842: step: 808/463, loss: 0.2601999342441559 2023-01-22 10:15:16.149138: step: 810/463, loss: 0.38377615809440613 2023-01-22 10:15:16.716622: step: 812/463, loss: 0.3133040964603424 2023-01-22 10:15:17.296140: step: 814/463, loss: 0.47720515727996826 2023-01-22 10:15:17.892962: step: 816/463, loss: 2.147118330001831 2023-01-22 10:15:18.503479: step: 818/463, loss: 0.04397233948111534 2023-01-22 10:15:19.149252: step: 820/463, loss: 0.1503096967935562 2023-01-22 10:15:19.815191: step: 822/463, loss: 0.4594634473323822 2023-01-22 10:15:20.411932: step: 824/463, loss: 0.12943784892559052 2023-01-22 10:15:21.003660: step: 826/463, loss: 0.46011871099472046 2023-01-22 10:15:21.623817: step: 828/463, loss: 0.8428401350975037 2023-01-22 10:15:22.206146: step: 830/463, loss: 0.23051264882087708 2023-01-22 10:15:22.824341: step: 832/463, loss: 0.10706888884305954 2023-01-22 10:15:23.436504: step: 834/463, loss: 0.5708986520767212 2023-01-22 10:15:24.003244: step: 836/463, loss: 0.1993168443441391 2023-01-22 10:15:24.665671: step: 838/463, loss: 0.7545018792152405 2023-01-22 10:15:25.305253: step: 840/463, loss: 0.5041952133178711 2023-01-22 10:15:25.924152: step: 842/463, loss: 0.1180022805929184 2023-01-22 10:15:26.489059: step: 844/463, loss: 0.34608232975006104 2023-01-22 10:15:27.114536: step: 846/463, loss: 0.060121431946754456 2023-01-22 10:15:27.713217: step: 848/463, loss: 0.10963843762874603 2023-01-22 10:15:28.299148: step: 850/463, loss: 0.2471901923418045 2023-01-22 10:15:28.919182: step: 852/463, loss: 0.5380828976631165 2023-01-22 10:15:29.506190: step: 854/463, loss: 0.467693030834198 2023-01-22 10:15:30.116790: step: 856/463, loss: 3.8073570728302 2023-01-22 10:15:30.719831: step: 858/463, loss: 2.411452293395996 2023-01-22 10:15:31.355724: step: 860/463, loss: 0.27982455492019653 2023-01-22 10:15:31.947736: step: 862/463, loss: 2.2248191833496094 2023-01-22 10:15:32.569949: step: 864/463, loss: 0.22622354328632355 2023-01-22 10:15:33.159088: step: 866/463, loss: 0.14150923490524292 2023-01-22 10:15:33.880997: step: 868/463, loss: 0.22644901275634766 2023-01-22 10:15:34.487153: step: 870/463, loss: 0.16060954332351685 2023-01-22 10:15:35.086736: step: 872/463, loss: 0.22159768640995026 2023-01-22 10:15:35.704677: step: 874/463, loss: 0.2330881804227829 2023-01-22 10:15:36.351264: step: 876/463, loss: 0.16592679917812347 2023-01-22 10:15:36.970033: step: 878/463, loss: 0.7527386546134949 2023-01-22 10:15:37.613077: step: 880/463, loss: 0.17801988124847412 2023-01-22 10:15:38.221163: step: 882/463, loss: 0.25850164890289307 2023-01-22 10:15:38.860149: step: 884/463, loss: 0.8933775424957275 2023-01-22 10:15:39.509238: step: 886/463, loss: 0.28092578053474426 2023-01-22 10:15:40.166775: step: 888/463, loss: 0.8770626187324524 2023-01-22 10:15:40.781959: step: 890/463, loss: 0.2462228685617447 2023-01-22 10:15:41.438377: step: 892/463, loss: 0.4454179108142853 2023-01-22 10:15:42.056595: step: 894/463, loss: 0.22192177176475525 2023-01-22 10:15:42.656366: step: 896/463, loss: 0.19274753332138062 2023-01-22 10:15:43.272989: step: 898/463, loss: 0.3427952527999878 2023-01-22 10:15:43.924884: step: 900/463, loss: 0.3240050673484802 2023-01-22 10:15:44.482687: step: 902/463, loss: 0.08743232488632202 2023-01-22 10:15:45.091224: step: 904/463, loss: 0.3861238956451416 2023-01-22 10:15:45.673259: step: 906/463, loss: 0.3245295584201813 2023-01-22 10:15:46.285507: step: 908/463, loss: 0.11214976757764816 2023-01-22 10:15:46.965247: step: 910/463, loss: 0.8596938252449036 2023-01-22 10:15:47.554417: step: 912/463, loss: 0.5284637212753296 2023-01-22 10:15:48.155919: step: 914/463, loss: 0.30676794052124023 2023-01-22 10:15:48.821385: step: 916/463, loss: 0.15184001624584198 2023-01-22 10:15:49.422821: step: 918/463, loss: 0.591627299785614 2023-01-22 10:15:50.162117: step: 920/463, loss: 0.1877332627773285 2023-01-22 10:15:50.801344: step: 922/463, loss: 0.13615302741527557 2023-01-22 10:15:51.361027: step: 924/463, loss: 0.3489464521408081 2023-01-22 10:15:51.915546: step: 926/463, loss: 0.24088625609874725 ================================================== Loss: 0.372 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3065845650971899, 'r': 0.3409080742826438, 'f1': 0.322836577083474}, 'combined': 0.23787958311413873, 'epoch': 9} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3289102502889268, 'r': 0.3590855026090118, 'f1': 0.34333613845949373}, 'combined': 0.2661265762221913, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2688971045942649, 'r': 0.34798448829846046, 'f1': 0.30337109236276044}, 'combined': 0.22353659437256032, 'epoch': 9} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.31299154514393834, 'r': 0.36262265175246383, 'f1': 0.33598412998890703}, 'combined': 0.26042789023063606, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2753613735398345, 'r': 0.3438098364121653, 'f1': 0.3058021667328457}, 'combined': 0.22532791232946525, 'epoch': 9} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3208621169988284, 'r': 0.35920042141964426, 'f1': 0.338950614487921}, 'combined': 0.2627272705600154, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23039215686274508, 'r': 0.3357142857142857, 'f1': 0.27325581395348836}, 'combined': 0.1821705426356589, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28846153846153844, 'r': 0.4891304347826087, 'f1': 0.3629032258064516}, 'combined': 0.1814516129032258, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29545454545454547, 'r': 0.22413793103448276, 'f1': 0.2549019607843137}, 'combined': 0.1699346405228758, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29855383301254707, 'r': 0.34444161379815674, 'f1': 0.3198603180116804}, 'combined': 0.23568655011386977, 'epoch': 7} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.30988948788864984, 'r': 0.3867922100668993, 'f1': 0.34409642236532006}, 'combined': 0.2667158871922577, 'epoch': 7} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2636363636363636, 'r': 0.4142857142857143, 'f1': 0.3222222222222222}, 'combined': 0.2148148148148148, 'epoch': 7} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.294349755002927, 'r': 0.3345645222898544, 'f1': 0.3131714089640378}, 'combined': 0.23075788028929098, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3358950473035951, 'r': 0.33466466251493726, 'f1': 0.3352787261158821}, 'combined': 0.25988111785058804, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28012957534441907, 'r': 0.3140846753861668, 'f1': 0.29613697964981445}, 'combined': 0.21820619553144222, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3064842101133353, 'r': 0.3585977936344447, 'f1': 0.3304992795207758}, 'combined': 0.2561764750352425, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.23275862068965517, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 5} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:18:30.725217: step: 2/463, loss: 0.16869837045669556 2023-01-22 10:18:31.335227: step: 4/463, loss: 0.3342270851135254 2023-01-22 10:18:31.968558: step: 6/463, loss: 0.5534235239028931 2023-01-22 10:18:32.531949: step: 8/463, loss: 0.8908038139343262 2023-01-22 10:18:33.166762: step: 10/463, loss: 0.20792478322982788 2023-01-22 10:18:33.777252: step: 12/463, loss: 0.11852412670850754 2023-01-22 10:18:34.568963: step: 14/463, loss: 0.26360851526260376 2023-01-22 10:18:35.188556: step: 16/463, loss: 0.5815234184265137 2023-01-22 10:18:35.781722: step: 18/463, loss: 0.4189456105232239 2023-01-22 10:18:36.447058: step: 20/463, loss: 0.11026723682880402 2023-01-22 10:18:37.058212: step: 22/463, loss: 0.19458189606666565 2023-01-22 10:18:37.666636: step: 24/463, loss: 0.15351428091526031 2023-01-22 10:18:38.270412: step: 26/463, loss: 0.1480802446603775 2023-01-22 10:18:38.946806: step: 28/463, loss: 0.12472604960203171 2023-01-22 10:18:39.656090: step: 30/463, loss: 0.03652859851717949 2023-01-22 10:18:40.237231: step: 32/463, loss: 0.6001203656196594 2023-01-22 10:18:40.887458: step: 34/463, loss: 0.6026164293289185 2023-01-22 10:18:41.453654: step: 36/463, loss: 0.07915201783180237 2023-01-22 10:18:42.086092: step: 38/463, loss: 0.18657277524471283 2023-01-22 10:18:42.662113: step: 40/463, loss: 0.1918850839138031 2023-01-22 10:18:43.182208: step: 42/463, loss: 0.11573538929224014 2023-01-22 10:18:43.818474: step: 44/463, loss: 1.5855425596237183 2023-01-22 10:18:44.439836: step: 46/463, loss: 0.026589874178171158 2023-01-22 10:18:45.066420: step: 48/463, loss: 0.4069761037826538 2023-01-22 10:18:45.652818: step: 50/463, loss: 0.319105863571167 2023-01-22 10:18:46.300228: step: 52/463, loss: 0.20788046717643738 2023-01-22 10:18:46.835224: step: 54/463, loss: 0.8110256195068359 2023-01-22 10:18:47.364380: step: 56/463, loss: 0.14753176271915436 2023-01-22 10:18:47.932073: step: 58/463, loss: 0.6654013395309448 2023-01-22 10:18:48.545565: step: 60/463, loss: 0.14762844145298004 2023-01-22 10:18:49.162433: step: 62/463, loss: 0.6875556111335754 2023-01-22 10:18:49.766421: step: 64/463, loss: 0.05807267874479294 2023-01-22 10:18:50.319673: step: 66/463, loss: 0.5644345283508301 2023-01-22 10:18:50.982714: step: 68/463, loss: 0.117078997194767 2023-01-22 10:18:51.636073: step: 70/463, loss: 0.31340864300727844 2023-01-22 10:18:52.277528: step: 72/463, loss: 0.3335522413253784 2023-01-22 10:18:52.878528: step: 74/463, loss: 0.5517826080322266 2023-01-22 10:18:53.399886: step: 76/463, loss: 1.1152862310409546 2023-01-22 10:18:53.951019: step: 78/463, loss: 0.11360624432563782 2023-01-22 10:18:54.578953: step: 80/463, loss: 0.38960960507392883 2023-01-22 10:18:55.193771: step: 82/463, loss: 0.3981383442878723 2023-01-22 10:18:55.805963: step: 84/463, loss: 0.21463234722614288 2023-01-22 10:18:56.460208: step: 86/463, loss: 0.18379442393779755 2023-01-22 10:18:57.096310: step: 88/463, loss: 0.14944233000278473 2023-01-22 10:18:57.691080: step: 90/463, loss: 0.5530070662498474 2023-01-22 10:18:58.251658: step: 92/463, loss: 2.2206058502197266 2023-01-22 10:18:58.853801: step: 94/463, loss: 0.21944405138492584 2023-01-22 10:18:59.456342: step: 96/463, loss: 0.3404669761657715 2023-01-22 10:19:00.067435: step: 98/463, loss: 0.88285893201828 2023-01-22 10:19:00.717046: step: 100/463, loss: 0.26726895570755005 2023-01-22 10:19:01.496752: step: 102/463, loss: 0.14909866452217102 2023-01-22 10:19:02.110885: step: 104/463, loss: 0.1025569811463356 2023-01-22 10:19:02.793861: step: 106/463, loss: 0.17497482895851135 2023-01-22 10:19:03.411572: step: 108/463, loss: 0.12367457151412964 2023-01-22 10:19:03.994887: step: 110/463, loss: 0.12136532366275787 2023-01-22 10:19:04.729696: step: 112/463, loss: 0.46946007013320923 2023-01-22 10:19:05.238397: step: 114/463, loss: 0.10456369817256927 2023-01-22 10:19:05.791976: step: 116/463, loss: 0.24857363104820251 2023-01-22 10:19:06.432223: step: 118/463, loss: 0.19830164313316345 2023-01-22 10:19:07.052669: step: 120/463, loss: 0.8026825785636902 2023-01-22 10:19:07.621116: step: 122/463, loss: 0.19431965053081512 2023-01-22 10:19:08.256542: step: 124/463, loss: 0.473052054643631 2023-01-22 10:19:08.857199: step: 126/463, loss: 0.23367270827293396 2023-01-22 10:19:09.421365: step: 128/463, loss: 0.1543530970811844 2023-01-22 10:19:09.969541: step: 130/463, loss: 0.04371014982461929 2023-01-22 10:19:10.599473: step: 132/463, loss: 0.39058345556259155 2023-01-22 10:19:11.188421: step: 134/463, loss: 0.11146162450313568 2023-01-22 10:19:11.757358: step: 136/463, loss: 0.15062418580055237 2023-01-22 10:19:12.410381: step: 138/463, loss: 0.14504064619541168 2023-01-22 10:19:12.978790: step: 140/463, loss: 0.11511898785829544 2023-01-22 10:19:13.626971: step: 142/463, loss: 0.5905314087867737 2023-01-22 10:19:14.278794: step: 144/463, loss: 1.299288272857666 2023-01-22 10:19:14.900353: step: 146/463, loss: 1.1239265203475952 2023-01-22 10:19:15.510078: step: 148/463, loss: 0.4312412440776825 2023-01-22 10:19:16.137299: step: 150/463, loss: 2.4450268745422363 2023-01-22 10:19:16.741436: step: 152/463, loss: 0.14248216152191162 2023-01-22 10:19:17.316894: step: 154/463, loss: 0.18731839954853058 2023-01-22 10:19:18.112064: step: 156/463, loss: 0.23078471422195435 2023-01-22 10:19:18.702283: step: 158/463, loss: 0.14505517482757568 2023-01-22 10:19:19.329228: step: 160/463, loss: 0.1900167167186737 2023-01-22 10:19:19.906778: step: 162/463, loss: 0.3618879020214081 2023-01-22 10:19:20.510806: step: 164/463, loss: 0.3488426208496094 2023-01-22 10:19:21.156636: step: 166/463, loss: 0.081473208963871 2023-01-22 10:19:21.793456: step: 168/463, loss: 0.17818912863731384 2023-01-22 10:19:22.375595: step: 170/463, loss: 0.8628257513046265 2023-01-22 10:19:22.970644: step: 172/463, loss: 0.2598787248134613 2023-01-22 10:19:23.580743: step: 174/463, loss: 0.17548741400241852 2023-01-22 10:19:24.176995: step: 176/463, loss: 0.2954922914505005 2023-01-22 10:19:24.809493: step: 178/463, loss: 0.15275795757770538 2023-01-22 10:19:25.398782: step: 180/463, loss: 0.3680973947048187 2023-01-22 10:19:26.093931: step: 182/463, loss: 0.34629032015800476 2023-01-22 10:19:26.708100: step: 184/463, loss: 0.15489205718040466 2023-01-22 10:19:27.340416: step: 186/463, loss: 0.25586605072021484 2023-01-22 10:19:27.990050: step: 188/463, loss: 0.25235360860824585 2023-01-22 10:19:28.581566: step: 190/463, loss: 0.3492814600467682 2023-01-22 10:19:29.216439: step: 192/463, loss: 0.7814868688583374 2023-01-22 10:19:29.837494: step: 194/463, loss: 0.1842203289270401 2023-01-22 10:19:30.466026: step: 196/463, loss: 0.23700620234012604 2023-01-22 10:19:31.115932: step: 198/463, loss: 0.15423765778541565 2023-01-22 10:19:31.757972: step: 200/463, loss: 0.5841436386108398 2023-01-22 10:19:32.439778: step: 202/463, loss: 0.43622252345085144 2023-01-22 10:19:33.031343: step: 204/463, loss: 0.18335981667041779 2023-01-22 10:19:33.604064: step: 206/463, loss: 0.13302578032016754 2023-01-22 10:19:34.212071: step: 208/463, loss: 0.08598335087299347 2023-01-22 10:19:34.859107: step: 210/463, loss: 0.13705244660377502 2023-01-22 10:19:35.471516: step: 212/463, loss: 0.24282808601856232 2023-01-22 10:19:36.115126: step: 214/463, loss: 0.07543016225099564 2023-01-22 10:19:36.699115: step: 216/463, loss: 0.26602137088775635 2023-01-22 10:19:37.267713: step: 218/463, loss: 0.18413710594177246 2023-01-22 10:19:37.916456: step: 220/463, loss: 0.5791482925415039 2023-01-22 10:19:38.525801: step: 222/463, loss: 0.16703982651233673 2023-01-22 10:19:39.181000: step: 224/463, loss: 0.14707669615745544 2023-01-22 10:19:39.819593: step: 226/463, loss: 0.20876748859882355 2023-01-22 10:19:40.472650: step: 228/463, loss: 0.23287977278232574 2023-01-22 10:19:41.076012: step: 230/463, loss: 0.20351676642894745 2023-01-22 10:19:41.698946: step: 232/463, loss: 0.897995114326477 2023-01-22 10:19:42.306565: step: 234/463, loss: 0.30385085940361023 2023-01-22 10:19:42.925335: step: 236/463, loss: 0.09378073364496231 2023-01-22 10:19:43.555375: step: 238/463, loss: 0.22015248239040375 2023-01-22 10:19:44.151093: step: 240/463, loss: 0.21884196996688843 2023-01-22 10:19:44.858070: step: 242/463, loss: 0.6067631244659424 2023-01-22 10:19:45.529061: step: 244/463, loss: 0.5320797562599182 2023-01-22 10:19:46.148299: step: 246/463, loss: 0.17964306473731995 2023-01-22 10:19:46.796690: step: 248/463, loss: 1.1467785835266113 2023-01-22 10:19:47.488131: step: 250/463, loss: 0.25201496481895447 2023-01-22 10:19:48.117118: step: 252/463, loss: 0.1855405867099762 2023-01-22 10:19:48.761603: step: 254/463, loss: 0.22568370401859283 2023-01-22 10:19:49.344129: step: 256/463, loss: 0.2871897518634796 2023-01-22 10:19:50.074455: step: 258/463, loss: 0.0970970019698143 2023-01-22 10:19:50.630457: step: 260/463, loss: 0.28054147958755493 2023-01-22 10:19:51.211912: step: 262/463, loss: 0.21849049627780914 2023-01-22 10:19:51.826565: step: 264/463, loss: 0.25020843744277954 2023-01-22 10:19:52.447899: step: 266/463, loss: 0.16708190739154816 2023-01-22 10:19:53.052246: step: 268/463, loss: 0.15879333019256592 2023-01-22 10:19:53.656593: step: 270/463, loss: 0.900672197341919 2023-01-22 10:19:54.322660: step: 272/463, loss: 0.14571386575698853 2023-01-22 10:19:55.028816: step: 274/463, loss: 0.4086513817310333 2023-01-22 10:19:55.696908: step: 276/463, loss: 0.45200464129447937 2023-01-22 10:19:56.234717: step: 278/463, loss: 0.27273863554000854 2023-01-22 10:19:56.858087: step: 280/463, loss: 0.3633576035499573 2023-01-22 10:19:57.504083: step: 282/463, loss: 0.15236657857894897 2023-01-22 10:19:58.103593: step: 284/463, loss: 0.23210710287094116 2023-01-22 10:19:58.711479: step: 286/463, loss: 0.2957319915294647 2023-01-22 10:19:59.347373: step: 288/463, loss: 0.19323796033859253 2023-01-22 10:19:59.946106: step: 290/463, loss: 0.21700216829776764 2023-01-22 10:20:00.517137: step: 292/463, loss: 0.14365844428539276 2023-01-22 10:20:01.194584: step: 294/463, loss: 0.17817026376724243 2023-01-22 10:20:01.763133: step: 296/463, loss: 0.25463876128196716 2023-01-22 10:20:02.369701: step: 298/463, loss: 0.10557904839515686 2023-01-22 10:20:03.014610: step: 300/463, loss: 0.2822621166706085 2023-01-22 10:20:03.631682: step: 302/463, loss: 0.1397036612033844 2023-01-22 10:20:04.233832: step: 304/463, loss: 0.2683257758617401 2023-01-22 10:20:04.884314: step: 306/463, loss: 0.2542690336704254 2023-01-22 10:20:05.543669: step: 308/463, loss: 0.263831228017807 2023-01-22 10:20:06.243686: step: 310/463, loss: 1.9460499286651611 2023-01-22 10:20:06.824460: step: 312/463, loss: 0.2618602514266968 2023-01-22 10:20:07.426401: step: 314/463, loss: 0.13222338259220123 2023-01-22 10:20:07.994262: step: 316/463, loss: 0.11337538808584213 2023-01-22 10:20:08.542883: step: 318/463, loss: 0.2254640758037567 2023-01-22 10:20:09.160085: step: 320/463, loss: 0.11492456495761871 2023-01-22 10:20:09.780109: step: 322/463, loss: 1.0367741584777832 2023-01-22 10:20:10.386368: step: 324/463, loss: 0.25470981001853943 2023-01-22 10:20:10.959692: step: 326/463, loss: 0.05796697363257408 2023-01-22 10:20:11.580662: step: 328/463, loss: 0.2255239188671112 2023-01-22 10:20:12.219911: step: 330/463, loss: 0.30796173214912415 2023-01-22 10:20:12.821388: step: 332/463, loss: 0.24017490446567535 2023-01-22 10:20:13.441058: step: 334/463, loss: 0.08183560520410538 2023-01-22 10:20:14.046926: step: 336/463, loss: 0.3996495008468628 2023-01-22 10:20:14.644608: step: 338/463, loss: 0.4513796269893646 2023-01-22 10:20:15.220078: step: 340/463, loss: 0.3590623438358307 2023-01-22 10:20:15.843882: step: 342/463, loss: 0.21036890149116516 2023-01-22 10:20:16.476979: step: 344/463, loss: 0.5369996428489685 2023-01-22 10:20:17.057214: step: 346/463, loss: 0.18285968899726868 2023-01-22 10:20:17.697915: step: 348/463, loss: 0.21790096163749695 2023-01-22 10:20:18.302555: step: 350/463, loss: 0.22410736978054047 2023-01-22 10:20:18.945144: step: 352/463, loss: 0.1446080207824707 2023-01-22 10:20:19.584627: step: 354/463, loss: 0.7221202254295349 2023-01-22 10:20:20.240342: step: 356/463, loss: 0.17447739839553833 2023-01-22 10:20:20.924752: step: 358/463, loss: 0.26540112495422363 2023-01-22 10:20:21.547222: step: 360/463, loss: 0.14820483326911926 2023-01-22 10:20:22.159483: step: 362/463, loss: 0.11222131550312042 2023-01-22 10:20:22.828424: step: 364/463, loss: 0.1932050883769989 2023-01-22 10:20:23.412142: step: 366/463, loss: 0.09613216668367386 2023-01-22 10:20:24.103324: step: 368/463, loss: 0.32115912437438965 2023-01-22 10:20:24.726812: step: 370/463, loss: 0.22972719371318817 2023-01-22 10:20:25.295967: step: 372/463, loss: 0.07318393886089325 2023-01-22 10:20:25.896663: step: 374/463, loss: 0.30374351143836975 2023-01-22 10:20:26.462071: step: 376/463, loss: 0.4182368814945221 2023-01-22 10:20:27.044871: step: 378/463, loss: 0.15355642139911652 2023-01-22 10:20:27.644328: step: 380/463, loss: 0.053889188915491104 2023-01-22 10:20:28.243133: step: 382/463, loss: 0.23191863298416138 2023-01-22 10:20:28.882077: step: 384/463, loss: 0.2678071856498718 2023-01-22 10:20:29.504420: step: 386/463, loss: 0.15507501363754272 2023-01-22 10:20:30.057600: step: 388/463, loss: 0.17070400714874268 2023-01-22 10:20:30.707234: step: 390/463, loss: 0.2675144672393799 2023-01-22 10:20:31.411175: step: 392/463, loss: 0.18658870458602905 2023-01-22 10:20:32.040898: step: 394/463, loss: 0.09802158176898956 2023-01-22 10:20:32.675297: step: 396/463, loss: 0.5530169010162354 2023-01-22 10:20:33.278134: step: 398/463, loss: 0.13036175072193146 2023-01-22 10:20:33.887731: step: 400/463, loss: 0.5727664232254028 2023-01-22 10:20:34.500662: step: 402/463, loss: 0.09940128028392792 2023-01-22 10:20:35.116705: step: 404/463, loss: 0.31986093521118164 2023-01-22 10:20:35.727568: step: 406/463, loss: 0.7030962109565735 2023-01-22 10:20:36.386503: step: 408/463, loss: 0.11078163981437683 2023-01-22 10:20:37.063240: step: 410/463, loss: 0.3519532084465027 2023-01-22 10:20:37.671025: step: 412/463, loss: 0.23051252961158752 2023-01-22 10:20:38.350532: step: 414/463, loss: 0.1121712401509285 2023-01-22 10:20:38.933827: step: 416/463, loss: 0.3549158573150635 2023-01-22 10:20:39.528645: step: 418/463, loss: 0.31530246138572693 2023-01-22 10:20:40.074686: step: 420/463, loss: 0.3870757520198822 2023-01-22 10:20:40.641151: step: 422/463, loss: 0.16960963606834412 2023-01-22 10:20:41.311078: step: 424/463, loss: 0.5314084887504578 2023-01-22 10:20:41.948620: step: 426/463, loss: 0.14330002665519714 2023-01-22 10:20:42.546197: step: 428/463, loss: 0.5325902700424194 2023-01-22 10:20:43.270491: step: 430/463, loss: 0.2526671886444092 2023-01-22 10:20:43.867422: step: 432/463, loss: 0.3225879669189453 2023-01-22 10:20:44.488602: step: 434/463, loss: 0.2309187948703766 2023-01-22 10:20:45.100495: step: 436/463, loss: 0.16792495548725128 2023-01-22 10:20:45.725357: step: 438/463, loss: 0.6884794235229492 2023-01-22 10:20:46.360598: step: 440/463, loss: 0.059158552438020706 2023-01-22 10:20:46.947330: step: 442/463, loss: 0.31807082891464233 2023-01-22 10:20:47.556730: step: 444/463, loss: 0.8425359725952148 2023-01-22 10:20:48.137201: step: 446/463, loss: 0.22956913709640503 2023-01-22 10:20:48.731692: step: 448/463, loss: 0.11970395594835281 2023-01-22 10:20:49.337526: step: 450/463, loss: 0.4807383120059967 2023-01-22 10:20:49.978252: step: 452/463, loss: 0.21160563826560974 2023-01-22 10:20:50.627885: step: 454/463, loss: 0.633194625377655 2023-01-22 10:20:51.239248: step: 456/463, loss: 0.1652614027261734 2023-01-22 10:20:51.894602: step: 458/463, loss: 0.13365940749645233 2023-01-22 10:20:52.559224: step: 460/463, loss: 0.22545641660690308 2023-01-22 10:20:53.189060: step: 462/463, loss: 0.6370010375976562 2023-01-22 10:20:53.792615: step: 464/463, loss: 0.08026273548603058 2023-01-22 10:20:54.459160: step: 466/463, loss: 0.5588064789772034 2023-01-22 10:20:55.089834: step: 468/463, loss: 0.9731622338294983 2023-01-22 10:20:55.744297: step: 470/463, loss: 0.12446781992912292 2023-01-22 10:20:56.383173: step: 472/463, loss: 0.45057204365730286 2023-01-22 10:20:57.045650: step: 474/463, loss: 0.0737568810582161 2023-01-22 10:20:57.636100: step: 476/463, loss: 0.43063193559646606 2023-01-22 10:20:58.238878: step: 478/463, loss: 0.46717628836631775 2023-01-22 10:20:58.878459: step: 480/463, loss: 0.22403323650360107 2023-01-22 10:20:59.440671: step: 482/463, loss: 0.7971420288085938 2023-01-22 10:21:00.017525: step: 484/463, loss: 0.10911144316196442 2023-01-22 10:21:00.647955: step: 486/463, loss: 0.3648187518119812 2023-01-22 10:21:01.258962: step: 488/463, loss: 0.1820109486579895 2023-01-22 10:21:01.926592: step: 490/463, loss: 0.05152883008122444 2023-01-22 10:21:02.577235: step: 492/463, loss: 0.3507932126522064 2023-01-22 10:21:03.288966: step: 494/463, loss: 0.980003833770752 2023-01-22 10:21:03.841990: step: 496/463, loss: 0.06188702583312988 2023-01-22 10:21:04.542293: step: 498/463, loss: 0.12341655045747757 2023-01-22 10:21:05.215291: step: 500/463, loss: 0.3014623522758484 2023-01-22 10:21:05.788420: step: 502/463, loss: 0.22810029983520508 2023-01-22 10:21:06.372238: step: 504/463, loss: 0.24539735913276672 2023-01-22 10:21:07.053523: step: 506/463, loss: 0.25342482328414917 2023-01-22 10:21:07.687299: step: 508/463, loss: 0.6143503785133362 2023-01-22 10:21:08.382638: step: 510/463, loss: 0.08440279960632324 2023-01-22 10:21:09.029110: step: 512/463, loss: 0.5813785791397095 2023-01-22 10:21:09.691533: step: 514/463, loss: 0.18750399351119995 2023-01-22 10:21:10.311943: step: 516/463, loss: 0.12383110076189041 2023-01-22 10:21:10.899739: step: 518/463, loss: 0.1619398444890976 2023-01-22 10:21:11.564753: step: 520/463, loss: 0.36592888832092285 2023-01-22 10:21:12.264332: step: 522/463, loss: 0.18676599860191345 2023-01-22 10:21:12.808502: step: 524/463, loss: 0.20253662765026093 2023-01-22 10:21:13.480276: step: 526/463, loss: 0.28952017426490784 2023-01-22 10:21:14.097694: step: 528/463, loss: 0.0837351381778717 2023-01-22 10:21:14.755179: step: 530/463, loss: 0.1996394693851471 2023-01-22 10:21:15.422821: step: 532/463, loss: 0.3002845346927643 2023-01-22 10:21:16.038667: step: 534/463, loss: 0.14645184576511383 2023-01-22 10:21:16.635890: step: 536/463, loss: 0.067990243434906 2023-01-22 10:21:17.246565: step: 538/463, loss: 0.4760712683200836 2023-01-22 10:21:17.832660: step: 540/463, loss: 0.5932899117469788 2023-01-22 10:21:18.384795: step: 542/463, loss: 0.16231553256511688 2023-01-22 10:21:19.006834: step: 544/463, loss: 0.8529051542282104 2023-01-22 10:21:19.600038: step: 546/463, loss: 0.13931874930858612 2023-01-22 10:21:20.234662: step: 548/463, loss: 0.13295228779315948 2023-01-22 10:21:20.837124: step: 550/463, loss: 0.43543460965156555 2023-01-22 10:21:21.468680: step: 552/463, loss: 0.6473884582519531 2023-01-22 10:21:22.084655: step: 554/463, loss: 0.6208295822143555 2023-01-22 10:21:22.728656: step: 556/463, loss: 0.32245805859565735 2023-01-22 10:21:23.333680: step: 558/463, loss: 1.770228385925293 2023-01-22 10:21:24.020480: step: 560/463, loss: 0.20916807651519775 2023-01-22 10:21:24.604645: step: 562/463, loss: 0.14589859545230865 2023-01-22 10:21:25.188768: step: 564/463, loss: 0.24990589916706085 2023-01-22 10:21:25.924301: step: 566/463, loss: 0.3019903898239136 2023-01-22 10:21:26.466384: step: 568/463, loss: 0.11575135588645935 2023-01-22 10:21:27.030935: step: 570/463, loss: 0.083357073366642 2023-01-22 10:21:27.573717: step: 572/463, loss: 0.05302351340651512 2023-01-22 10:21:28.180323: step: 574/463, loss: 0.2778109312057495 2023-01-22 10:21:28.786039: step: 576/463, loss: 0.05298285931348801 2023-01-22 10:21:29.383842: step: 578/463, loss: 0.8646448850631714 2023-01-22 10:21:29.946169: step: 580/463, loss: 0.5422811508178711 2023-01-22 10:21:30.511419: step: 582/463, loss: 0.14728735387325287 2023-01-22 10:21:31.112737: step: 584/463, loss: 0.187782421708107 2023-01-22 10:21:31.780884: step: 586/463, loss: 0.16480490565299988 2023-01-22 10:21:32.389572: step: 588/463, loss: 0.2632267475128174 2023-01-22 10:21:32.990326: step: 590/463, loss: 0.0823042243719101 2023-01-22 10:21:33.547641: step: 592/463, loss: 0.3487679064273834 2023-01-22 10:21:34.172302: step: 594/463, loss: 0.1836131364107132 2023-01-22 10:21:34.728461: step: 596/463, loss: 0.219617560505867 2023-01-22 10:21:35.342416: step: 598/463, loss: 0.760192334651947 2023-01-22 10:21:35.964363: step: 600/463, loss: 0.25891590118408203 2023-01-22 10:21:36.604751: step: 602/463, loss: 0.7730326652526855 2023-01-22 10:21:37.213758: step: 604/463, loss: 0.26593461632728577 2023-01-22 10:21:37.845260: step: 606/463, loss: 0.37019580602645874 2023-01-22 10:21:38.491280: step: 608/463, loss: 0.2653299868106842 2023-01-22 10:21:39.084498: step: 610/463, loss: 0.2394438236951828 2023-01-22 10:21:39.696065: step: 612/463, loss: 0.44029101729393005 2023-01-22 10:21:40.355881: step: 614/463, loss: 0.05722891539335251 2023-01-22 10:21:40.944697: step: 616/463, loss: 0.3484257161617279 2023-01-22 10:21:41.545528: step: 618/463, loss: 0.08210018277168274 2023-01-22 10:21:42.183193: step: 620/463, loss: 0.19306351244449615 2023-01-22 10:21:42.800046: step: 622/463, loss: 0.1366589218378067 2023-01-22 10:21:43.445494: step: 624/463, loss: 0.25840088725090027 2023-01-22 10:21:43.985402: step: 626/463, loss: 0.06171680614352226 2023-01-22 10:21:44.690262: step: 628/463, loss: 0.14395509660243988 2023-01-22 10:21:45.363914: step: 630/463, loss: 0.20327600836753845 2023-01-22 10:21:45.951017: step: 632/463, loss: 0.13372549414634705 2023-01-22 10:21:46.592336: step: 634/463, loss: 0.09817241132259369 2023-01-22 10:21:47.223760: step: 636/463, loss: 0.537545919418335 2023-01-22 10:21:47.883259: step: 638/463, loss: 1.0766065120697021 2023-01-22 10:21:48.552078: step: 640/463, loss: 0.10995373129844666 2023-01-22 10:21:49.178352: step: 642/463, loss: 0.154397651553154 2023-01-22 10:21:49.781413: step: 644/463, loss: 0.06673179566860199 2023-01-22 10:21:50.411463: step: 646/463, loss: 0.13693280518054962 2023-01-22 10:21:51.012643: step: 648/463, loss: 0.23969599604606628 2023-01-22 10:21:51.620219: step: 650/463, loss: 0.290406733751297 2023-01-22 10:21:52.243379: step: 652/463, loss: 0.7215564846992493 2023-01-22 10:21:52.885718: step: 654/463, loss: 0.9090859889984131 2023-01-22 10:21:53.458347: step: 656/463, loss: 0.24216076731681824 2023-01-22 10:21:54.066289: step: 658/463, loss: 0.14913325011730194 2023-01-22 10:21:54.804408: step: 660/463, loss: 0.1350444257259369 2023-01-22 10:21:55.454753: step: 662/463, loss: 0.09829533845186234 2023-01-22 10:21:56.104341: step: 664/463, loss: 0.05164027586579323 2023-01-22 10:21:56.690347: step: 666/463, loss: 0.9949164986610413 2023-01-22 10:21:57.314541: step: 668/463, loss: 0.28633543848991394 2023-01-22 10:21:57.889768: step: 670/463, loss: 0.10687119513750076 2023-01-22 10:21:58.486452: step: 672/463, loss: 0.14936110377311707 2023-01-22 10:21:59.103361: step: 674/463, loss: 0.2001819610595703 2023-01-22 10:21:59.695602: step: 676/463, loss: 0.275068461894989 2023-01-22 10:22:00.313847: step: 678/463, loss: 0.25014397501945496 2023-01-22 10:22:00.979716: step: 680/463, loss: 0.4746881127357483 2023-01-22 10:22:01.626681: step: 682/463, loss: 0.6380590200424194 2023-01-22 10:22:02.231162: step: 684/463, loss: 0.20501750707626343 2023-01-22 10:22:02.953467: step: 686/463, loss: 0.45265549421310425 2023-01-22 10:22:03.612721: step: 688/463, loss: 0.0979803279042244 2023-01-22 10:22:04.225821: step: 690/463, loss: 0.2585076689720154 2023-01-22 10:22:04.809699: step: 692/463, loss: 0.8459354639053345 2023-01-22 10:22:05.463192: step: 694/463, loss: 0.30131617188453674 2023-01-22 10:22:06.037604: step: 696/463, loss: 0.3636021316051483 2023-01-22 10:22:06.614738: step: 698/463, loss: 0.17327608168125153 2023-01-22 10:22:07.243596: step: 700/463, loss: 0.11873305588960648 2023-01-22 10:22:07.928044: step: 702/463, loss: 0.2941829562187195 2023-01-22 10:22:08.615735: step: 704/463, loss: 0.48770639300346375 2023-01-22 10:22:09.221117: step: 706/463, loss: 0.21048739552497864 2023-01-22 10:22:09.786417: step: 708/463, loss: 0.26141446828842163 2023-01-22 10:22:10.472286: step: 710/463, loss: 1.0029133558273315 2023-01-22 10:22:11.086028: step: 712/463, loss: 0.825661301612854 2023-01-22 10:22:11.703825: step: 714/463, loss: 0.27143594622612 2023-01-22 10:22:12.299467: step: 716/463, loss: 0.12954536080360413 2023-01-22 10:22:12.897358: step: 718/463, loss: 0.12344024330377579 2023-01-22 10:22:13.503008: step: 720/463, loss: 0.11319894343614578 2023-01-22 10:22:14.060484: step: 722/463, loss: 0.037569086998701096 2023-01-22 10:22:14.646448: step: 724/463, loss: 0.12392205744981766 2023-01-22 10:22:15.301131: step: 726/463, loss: 0.14336884021759033 2023-01-22 10:22:15.837078: step: 728/463, loss: 0.25060126185417175 2023-01-22 10:22:16.426696: step: 730/463, loss: 0.2917499244213104 2023-01-22 10:22:17.035901: step: 732/463, loss: 0.11173426359891891 2023-01-22 10:22:17.654436: step: 734/463, loss: 0.12138108164072037 2023-01-22 10:22:18.289861: step: 736/463, loss: 0.450734943151474 2023-01-22 10:22:18.845167: step: 738/463, loss: 0.9772469997406006 2023-01-22 10:22:19.484118: step: 740/463, loss: 2.3889918327331543 2023-01-22 10:22:20.094590: step: 742/463, loss: 0.15132781863212585 2023-01-22 10:22:20.698942: step: 744/463, loss: 0.16488295793533325 2023-01-22 10:22:21.261465: step: 746/463, loss: 0.11444897204637527 2023-01-22 10:22:21.902121: step: 748/463, loss: 0.18337833881378174 2023-01-22 10:22:22.504654: step: 750/463, loss: 0.2182713747024536 2023-01-22 10:22:23.126306: step: 752/463, loss: 0.29011327028274536 2023-01-22 10:22:23.793955: step: 754/463, loss: 0.08822859823703766 2023-01-22 10:22:24.401953: step: 756/463, loss: 0.14828428626060486 2023-01-22 10:22:24.973209: step: 758/463, loss: 0.22737431526184082 2023-01-22 10:22:25.528728: step: 760/463, loss: 0.107344850897789 2023-01-22 10:22:26.102155: step: 762/463, loss: 0.37095901370048523 2023-01-22 10:22:26.739735: step: 764/463, loss: 0.22544699907302856 2023-01-22 10:22:27.348236: step: 766/463, loss: 0.3135944902896881 2023-01-22 10:22:27.969062: step: 768/463, loss: 0.318268358707428 2023-01-22 10:22:28.545362: step: 770/463, loss: 0.8681970238685608 2023-01-22 10:22:29.166818: step: 772/463, loss: 0.17022764682769775 2023-01-22 10:22:29.758045: step: 774/463, loss: 0.12838152050971985 2023-01-22 10:22:30.465326: step: 776/463, loss: 0.10882436484098434 2023-01-22 10:22:31.115127: step: 778/463, loss: 0.10818567126989365 2023-01-22 10:22:31.813768: step: 780/463, loss: 0.14951752126216888 2023-01-22 10:22:32.469597: step: 782/463, loss: 0.6003298163414001 2023-01-22 10:22:33.245444: step: 784/463, loss: 2.19765567779541 2023-01-22 10:22:33.877021: step: 786/463, loss: 0.15947005152702332 2023-01-22 10:22:34.454228: step: 788/463, loss: 0.07240942120552063 2023-01-22 10:22:35.086632: step: 790/463, loss: 0.13658851385116577 2023-01-22 10:22:35.712259: step: 792/463, loss: 0.15842501819133759 2023-01-22 10:22:36.396855: step: 794/463, loss: 0.12694789469242096 2023-01-22 10:22:37.056886: step: 796/463, loss: 0.19413883984088898 2023-01-22 10:22:37.725080: step: 798/463, loss: 0.42260080575942993 2023-01-22 10:22:38.349400: step: 800/463, loss: 0.1677320897579193 2023-01-22 10:22:38.976265: step: 802/463, loss: 0.5036795139312744 2023-01-22 10:22:39.626433: step: 804/463, loss: 0.19367152452468872 2023-01-22 10:22:40.208640: step: 806/463, loss: 0.1507994681596756 2023-01-22 10:22:40.808125: step: 808/463, loss: 0.05353556573390961 2023-01-22 10:22:41.406972: step: 810/463, loss: 0.4089425504207611 2023-01-22 10:22:42.000135: step: 812/463, loss: 0.5463809370994568 2023-01-22 10:22:42.582989: step: 814/463, loss: 0.06974062323570251 2023-01-22 10:22:43.212321: step: 816/463, loss: 0.35333529114723206 2023-01-22 10:22:43.799326: step: 818/463, loss: 0.07739656418561935 2023-01-22 10:22:44.473061: step: 820/463, loss: 0.34687095880508423 2023-01-22 10:22:45.107819: step: 822/463, loss: 0.3159623146057129 2023-01-22 10:22:45.696259: step: 824/463, loss: 0.4063344895839691 2023-01-22 10:22:46.319470: step: 826/463, loss: 0.1810424029827118 2023-01-22 10:22:46.955024: step: 828/463, loss: 0.2012706995010376 2023-01-22 10:22:47.643187: step: 830/463, loss: 0.09158824384212494 2023-01-22 10:22:48.257465: step: 832/463, loss: 0.6169873476028442 2023-01-22 10:22:48.943691: step: 834/463, loss: 0.16272945702075958 2023-01-22 10:22:49.599212: step: 836/463, loss: 0.15982531011104584 2023-01-22 10:22:50.213718: step: 838/463, loss: 0.06779561191797256 2023-01-22 10:22:50.866345: step: 840/463, loss: 0.3595781624317169 2023-01-22 10:22:51.453212: step: 842/463, loss: 0.0791391134262085 2023-01-22 10:22:52.084332: step: 844/463, loss: 0.27549371123313904 2023-01-22 10:22:52.685727: step: 846/463, loss: 0.4131931960582733 2023-01-22 10:22:53.250514: step: 848/463, loss: 0.6292086243629456 2023-01-22 10:22:53.915120: step: 850/463, loss: 0.30800503492355347 2023-01-22 10:22:54.475418: step: 852/463, loss: 0.1196153312921524 2023-01-22 10:22:55.082645: step: 854/463, loss: 0.13580776751041412 2023-01-22 10:22:55.773975: step: 856/463, loss: 0.5255787968635559 2023-01-22 10:22:56.420300: step: 858/463, loss: 0.1318511813879013 2023-01-22 10:22:57.030615: step: 860/463, loss: 0.18214105069637299 2023-01-22 10:22:57.667767: step: 862/463, loss: 0.36088091135025024 2023-01-22 10:22:58.305379: step: 864/463, loss: 0.7797210216522217 2023-01-22 10:22:58.905775: step: 866/463, loss: 0.4265013337135315 2023-01-22 10:22:59.541206: step: 868/463, loss: 0.7816517949104309 2023-01-22 10:23:00.131892: step: 870/463, loss: 0.25371304154396057 2023-01-22 10:23:00.752020: step: 872/463, loss: 0.18142247200012207 2023-01-22 10:23:01.446944: step: 874/463, loss: 0.2577288746833801 2023-01-22 10:23:02.038835: step: 876/463, loss: 0.11698098480701447 2023-01-22 10:23:02.625496: step: 878/463, loss: 0.20003195106983185 2023-01-22 10:23:03.235389: step: 880/463, loss: 0.1782340556383133 2023-01-22 10:23:03.834414: step: 882/463, loss: 0.1248849630355835 2023-01-22 10:23:04.430779: step: 884/463, loss: 0.4253688454627991 2023-01-22 10:23:05.074225: step: 886/463, loss: 0.07649129629135132 2023-01-22 10:23:05.662613: step: 888/463, loss: 0.5954459309577942 2023-01-22 10:23:06.312334: step: 890/463, loss: 0.19676120579242706 2023-01-22 10:23:06.873793: step: 892/463, loss: 0.4825001358985901 2023-01-22 10:23:07.484067: step: 894/463, loss: 0.29770323634147644 2023-01-22 10:23:08.082119: step: 896/463, loss: 0.2951374650001526 2023-01-22 10:23:08.720672: step: 898/463, loss: 0.24668170511722565 2023-01-22 10:23:09.362211: step: 900/463, loss: 0.32290175557136536 2023-01-22 10:23:09.946802: step: 902/463, loss: 0.20828649401664734 2023-01-22 10:23:10.553440: step: 904/463, loss: 0.09835977107286453 2023-01-22 10:23:11.139958: step: 906/463, loss: 0.4512248933315277 2023-01-22 10:23:11.725555: step: 908/463, loss: 1.2370660305023193 2023-01-22 10:23:12.329763: step: 910/463, loss: 0.4330728054046631 2023-01-22 10:23:12.946027: step: 912/463, loss: 0.1754506230354309 2023-01-22 10:23:13.544215: step: 914/463, loss: 0.2189464271068573 2023-01-22 10:23:14.088708: step: 916/463, loss: 0.05806979164481163 2023-01-22 10:23:14.704891: step: 918/463, loss: 0.45422038435935974 2023-01-22 10:23:15.332967: step: 920/463, loss: 0.22595399618148804 2023-01-22 10:23:15.920648: step: 922/463, loss: 1.0573160648345947 2023-01-22 10:23:16.517754: step: 924/463, loss: 0.2549581527709961 2023-01-22 10:23:17.151674: step: 926/463, loss: 0.2506772577762604 ================================================== Loss: 0.327 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2949267317939609, 'r': 0.3150735294117647, 'f1': 0.3046674311926605}, 'combined': 0.22449179140511824, 'epoch': 10} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3376481677994374, 'r': 0.39816415375613806, 'f1': 0.36541762909040765}, 'combined': 0.2832423727877801, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2642291344667697, 'r': 0.3243951612903226, 'f1': 0.29123722316865414}, 'combined': 0.21459584865058726, 'epoch': 10} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.326678037972693, 'r': 0.40564524751940095, 'f1': 0.3619040830677394}, 'combined': 0.2805189543395875, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2671913725285436, 'r': 0.31941283622956823, 'f1': 0.29097763991872505}, 'combined': 0.21440457678221844, 'epoch': 10} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32113335504985907, 'r': 0.3837071337911919, 'f1': 0.34964268137756854}, 'combined': 0.2710149013548618, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24728260869565216, 'r': 0.325, 'f1': 0.28086419753086417}, 'combined': 0.18724279835390945, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.275, 'r': 0.4782608695652174, 'f1': 0.34920634920634924}, 'combined': 0.17460317460317462, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.3103448275862069, 'f1': 0.339622641509434}, 'combined': 0.22641509433962265, 'epoch': 10} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29855383301254707, 'r': 0.34444161379815674, 'f1': 0.3198603180116804}, 'combined': 0.23568655011386977, 'epoch': 7} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.30988948788864984, 'r': 0.3867922100668993, 'f1': 0.34409642236532006}, 'combined': 0.2667158871922577, 'epoch': 7} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2636363636363636, 'r': 0.4142857142857143, 'f1': 0.3222222222222222}, 'combined': 0.2148148148148148, 'epoch': 7} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.294349755002927, 'r': 0.3345645222898544, 'f1': 0.3131714089640378}, 'combined': 0.23075788028929098, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3358950473035951, 'r': 0.33466466251493726, 'f1': 0.3352787261158821}, 'combined': 0.25988111785058804, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2671913725285436, 'r': 0.31941283622956823, 'f1': 0.29097763991872505}, 'combined': 0.21440457678221844, 'epoch': 10} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32113335504985907, 'r': 0.3837071337911919, 'f1': 0.34964268137756854}, 'combined': 0.2710149013548618, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.3103448275862069, 'f1': 0.339622641509434}, 'combined': 0.22641509433962265, 'epoch': 10} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:26:07.062842: step: 2/463, loss: 0.38464900851249695 2023-01-22 10:26:07.731309: step: 4/463, loss: 0.24363644421100616 2023-01-22 10:26:08.404271: step: 6/463, loss: 0.12040785700082779 2023-01-22 10:26:09.038939: step: 8/463, loss: 0.09246577322483063 2023-01-22 10:26:09.636631: step: 10/463, loss: 0.3931816518306732 2023-01-22 10:26:10.235570: step: 12/463, loss: 0.2293281853199005 2023-01-22 10:26:10.838999: step: 14/463, loss: 0.13477280735969543 2023-01-22 10:26:11.491496: step: 16/463, loss: 0.22993196547031403 2023-01-22 10:26:12.243368: step: 18/463, loss: 0.16220282018184662 2023-01-22 10:26:12.863770: step: 20/463, loss: 0.07360216230154037 2023-01-22 10:26:13.514310: step: 22/463, loss: 0.12905588746070862 2023-01-22 10:26:14.105866: step: 24/463, loss: 0.2561604976654053 2023-01-22 10:26:14.673434: step: 26/463, loss: 0.6398051977157593 2023-01-22 10:26:15.324011: step: 28/463, loss: 0.3928891122341156 2023-01-22 10:26:15.918100: step: 30/463, loss: 0.21045248210430145 2023-01-22 10:26:16.560649: step: 32/463, loss: 0.5834149718284607 2023-01-22 10:26:17.151760: step: 34/463, loss: 0.29112717509269714 2023-01-22 10:26:17.711059: step: 36/463, loss: 0.3674471378326416 2023-01-22 10:26:18.333311: step: 38/463, loss: 0.16936175525188446 2023-01-22 10:26:18.911788: step: 40/463, loss: 0.18998980522155762 2023-01-22 10:26:19.511303: step: 42/463, loss: 0.12141333520412445 2023-01-22 10:26:20.104471: step: 44/463, loss: 0.4282970130443573 2023-01-22 10:26:20.735269: step: 46/463, loss: 0.5694759488105774 2023-01-22 10:26:21.357250: step: 48/463, loss: 0.24875542521476746 2023-01-22 10:26:21.936461: step: 50/463, loss: 0.17153924703598022 2023-01-22 10:26:22.555437: step: 52/463, loss: 0.07150880247354507 2023-01-22 10:26:23.187884: step: 54/463, loss: 0.862336277961731 2023-01-22 10:26:23.815818: step: 56/463, loss: 0.41236478090286255 2023-01-22 10:26:24.399127: step: 58/463, loss: 0.14219239354133606 2023-01-22 10:26:25.146829: step: 60/463, loss: 0.11710482835769653 2023-01-22 10:26:25.836621: step: 62/463, loss: 0.10768640786409378 2023-01-22 10:26:26.442139: step: 64/463, loss: 0.17711031436920166 2023-01-22 10:26:26.976135: step: 66/463, loss: 0.14531758427619934 2023-01-22 10:26:27.621626: step: 68/463, loss: 1.847318410873413 2023-01-22 10:26:28.233834: step: 70/463, loss: 0.1776900440454483 2023-01-22 10:26:28.855483: step: 72/463, loss: 0.32158163189888 2023-01-22 10:26:29.481018: step: 74/463, loss: 0.5123260617256165 2023-01-22 10:26:30.053057: step: 76/463, loss: 0.7484617829322815 2023-01-22 10:26:30.653115: step: 78/463, loss: 0.35480397939682007 2023-01-22 10:26:31.272345: step: 80/463, loss: 0.2403063327074051 2023-01-22 10:26:31.886403: step: 82/463, loss: 0.18592043220996857 2023-01-22 10:26:32.485374: step: 84/463, loss: 0.25290006399154663 2023-01-22 10:26:33.131398: step: 86/463, loss: 0.21364113688468933 2023-01-22 10:26:33.740867: step: 88/463, loss: 0.3772701025009155 2023-01-22 10:26:34.408749: step: 90/463, loss: 0.2814018428325653 2023-01-22 10:26:34.995396: step: 92/463, loss: 0.14555954933166504 2023-01-22 10:26:35.596635: step: 94/463, loss: 0.16270212829113007 2023-01-22 10:26:36.190765: step: 96/463, loss: 0.2507583498954773 2023-01-22 10:26:36.771481: step: 98/463, loss: 0.05216884985566139 2023-01-22 10:26:37.311516: step: 100/463, loss: 0.202926903963089 2023-01-22 10:26:37.950683: step: 102/463, loss: 0.09507398307323456 2023-01-22 10:26:38.591680: step: 104/463, loss: 0.13441112637519836 2023-01-22 10:26:39.258332: step: 106/463, loss: 1.1544842720031738 2023-01-22 10:26:39.885884: step: 108/463, loss: 0.09096304327249527 2023-01-22 10:26:40.493169: step: 110/463, loss: 0.165726438164711 2023-01-22 10:26:41.100529: step: 112/463, loss: 0.2630023956298828 2023-01-22 10:26:41.732339: step: 114/463, loss: 0.3290812373161316 2023-01-22 10:26:42.352561: step: 116/463, loss: 0.47309359908103943 2023-01-22 10:26:42.970351: step: 118/463, loss: 0.8138670921325684 2023-01-22 10:26:43.582609: step: 120/463, loss: 0.595111608505249 2023-01-22 10:26:44.272711: step: 122/463, loss: 0.1392076462507248 2023-01-22 10:26:44.915182: step: 124/463, loss: 0.2151304930448532 2023-01-22 10:26:45.579397: step: 126/463, loss: 0.1979188323020935 2023-01-22 10:26:46.166297: step: 128/463, loss: 0.2573375105857849 2023-01-22 10:26:46.812006: step: 130/463, loss: 0.15503670275211334 2023-01-22 10:26:47.377884: step: 132/463, loss: 0.06725090742111206 2023-01-22 10:26:47.955072: step: 134/463, loss: 0.36167651414871216 2023-01-22 10:26:48.584810: step: 136/463, loss: 0.14665812253952026 2023-01-22 10:26:49.224512: step: 138/463, loss: 0.062385838478803635 2023-01-22 10:26:49.809175: step: 140/463, loss: 0.07878714054822922 2023-01-22 10:26:50.408614: step: 142/463, loss: 0.1546541303396225 2023-01-22 10:26:51.109821: step: 144/463, loss: 0.175470232963562 2023-01-22 10:26:51.761591: step: 146/463, loss: 0.07201854139566422 2023-01-22 10:26:52.354917: step: 148/463, loss: 0.26908040046691895 2023-01-22 10:26:52.916677: step: 150/463, loss: 0.20902375876903534 2023-01-22 10:26:53.526705: step: 152/463, loss: 0.32269707322120667 2023-01-22 10:26:54.135096: step: 154/463, loss: 0.07607974112033844 2023-01-22 10:26:54.689477: step: 156/463, loss: 0.23623090982437134 2023-01-22 10:26:55.303324: step: 158/463, loss: 0.1768122762441635 2023-01-22 10:26:55.914679: step: 160/463, loss: 0.11020787060260773 2023-01-22 10:26:56.532980: step: 162/463, loss: 0.0947912409901619 2023-01-22 10:26:57.148662: step: 164/463, loss: 0.31853047013282776 2023-01-22 10:26:57.748365: step: 166/463, loss: 0.12231225520372391 2023-01-22 10:26:58.432627: step: 168/463, loss: 0.11263152956962585 2023-01-22 10:26:59.037916: step: 170/463, loss: 0.026019282639026642 2023-01-22 10:26:59.586218: step: 172/463, loss: 0.18614421784877777 2023-01-22 10:27:00.237825: step: 174/463, loss: 0.18698790669441223 2023-01-22 10:27:00.824214: step: 176/463, loss: 0.28849923610687256 2023-01-22 10:27:01.403351: step: 178/463, loss: 0.08335006237030029 2023-01-22 10:27:02.063902: step: 180/463, loss: 0.15389671921730042 2023-01-22 10:27:02.667373: step: 182/463, loss: 0.2285962849855423 2023-01-22 10:27:03.239625: step: 184/463, loss: 0.08953993767499924 2023-01-22 10:27:03.881482: step: 186/463, loss: 0.09249279648065567 2023-01-22 10:27:04.572036: step: 188/463, loss: 0.07478576898574829 2023-01-22 10:27:05.242662: step: 190/463, loss: 0.17967252433300018 2023-01-22 10:27:05.916354: step: 192/463, loss: 0.12968912720680237 2023-01-22 10:27:06.535222: step: 194/463, loss: 0.11943177878856659 2023-01-22 10:27:07.136658: step: 196/463, loss: 0.18584540486335754 2023-01-22 10:27:07.704556: step: 198/463, loss: 0.17804957926273346 2023-01-22 10:27:08.333853: step: 200/463, loss: 0.469115287065506 2023-01-22 10:27:08.948158: step: 202/463, loss: 0.19804894924163818 2023-01-22 10:27:09.576858: step: 204/463, loss: 0.05510970577597618 2023-01-22 10:27:10.163250: step: 206/463, loss: 0.3034987449645996 2023-01-22 10:27:10.773828: step: 208/463, loss: 0.0651090145111084 2023-01-22 10:27:11.344477: step: 210/463, loss: 0.0950593501329422 2023-01-22 10:27:11.981006: step: 212/463, loss: 0.3335636854171753 2023-01-22 10:27:12.569300: step: 214/463, loss: 1.0063867568969727 2023-01-22 10:27:13.222670: step: 216/463, loss: 0.1597270667552948 2023-01-22 10:27:13.795202: step: 218/463, loss: 0.18164093792438507 2023-01-22 10:27:14.421249: step: 220/463, loss: 0.13767415285110474 2023-01-22 10:27:15.037751: step: 222/463, loss: 0.031589508056640625 2023-01-22 10:27:15.722265: step: 224/463, loss: 0.34365224838256836 2023-01-22 10:27:16.379720: step: 226/463, loss: 0.0505225732922554 2023-01-22 10:27:16.966317: step: 228/463, loss: 0.5094326734542847 2023-01-22 10:27:17.524596: step: 230/463, loss: 0.1921921670436859 2023-01-22 10:27:18.134252: step: 232/463, loss: 0.17939475178718567 2023-01-22 10:27:18.730989: step: 234/463, loss: 0.07661590725183487 2023-01-22 10:27:19.358129: step: 236/463, loss: 0.5066153407096863 2023-01-22 10:27:19.957921: step: 238/463, loss: 0.28491726517677307 2023-01-22 10:27:20.597217: step: 240/463, loss: 0.5721750855445862 2023-01-22 10:27:21.220543: step: 242/463, loss: 0.20861269533634186 2023-01-22 10:27:21.843223: step: 244/463, loss: 0.37435194849967957 2023-01-22 10:27:22.504329: step: 246/463, loss: 0.09008286148309708 2023-01-22 10:27:23.104749: step: 248/463, loss: 0.10733573883771896 2023-01-22 10:27:23.653078: step: 250/463, loss: 0.05971158295869827 2023-01-22 10:27:24.302880: step: 252/463, loss: 0.06712017953395844 2023-01-22 10:27:24.966969: step: 254/463, loss: 0.149140402674675 2023-01-22 10:27:25.594601: step: 256/463, loss: 0.15263497829437256 2023-01-22 10:27:26.221708: step: 258/463, loss: 0.9096626043319702 2023-01-22 10:27:26.824430: step: 260/463, loss: 0.09592042118310928 2023-01-22 10:27:27.429465: step: 262/463, loss: 0.3063996732234955 2023-01-22 10:27:28.025967: step: 264/463, loss: 0.3671858310699463 2023-01-22 10:27:28.686666: step: 266/463, loss: 0.16143399477005005 2023-01-22 10:27:29.289818: step: 268/463, loss: 0.05929265916347504 2023-01-22 10:27:29.907140: step: 270/463, loss: 0.1495087444782257 2023-01-22 10:27:30.536293: step: 272/463, loss: 0.09115415066480637 2023-01-22 10:27:31.168528: step: 274/463, loss: 0.30453792214393616 2023-01-22 10:27:31.754639: step: 276/463, loss: 0.38079485297203064 2023-01-22 10:27:32.503146: step: 278/463, loss: 0.342390775680542 2023-01-22 10:27:33.145486: step: 280/463, loss: 0.2871167063713074 2023-01-22 10:27:33.866444: step: 282/463, loss: 0.6299222111701965 2023-01-22 10:27:34.554570: step: 284/463, loss: 0.18496885895729065 2023-01-22 10:27:35.198055: step: 286/463, loss: 0.5490970015525818 2023-01-22 10:27:35.861797: step: 288/463, loss: 0.10340451449155807 2023-01-22 10:27:36.534723: step: 290/463, loss: 0.16580542922019958 2023-01-22 10:27:37.207401: step: 292/463, loss: 0.13065865635871887 2023-01-22 10:27:37.794302: step: 294/463, loss: 0.10927838832139969 2023-01-22 10:27:38.402720: step: 296/463, loss: 0.1008647158741951 2023-01-22 10:27:38.980687: step: 298/463, loss: 0.11493542045354843 2023-01-22 10:27:39.673796: step: 300/463, loss: 0.18604464828968048 2023-01-22 10:27:40.220149: step: 302/463, loss: 0.1425403505563736 2023-01-22 10:27:40.862316: step: 304/463, loss: 0.03212430328130722 2023-01-22 10:27:41.487409: step: 306/463, loss: 0.34877368807792664 2023-01-22 10:27:42.090418: step: 308/463, loss: 0.15047882497310638 2023-01-22 10:27:42.729128: step: 310/463, loss: 0.07522471249103546 2023-01-22 10:27:43.309199: step: 312/463, loss: 0.49591168761253357 2023-01-22 10:27:43.932218: step: 314/463, loss: 0.5783676505088806 2023-01-22 10:27:44.547000: step: 316/463, loss: 0.1600651890039444 2023-01-22 10:27:45.155900: step: 318/463, loss: 0.4939728379249573 2023-01-22 10:27:45.730385: step: 320/463, loss: 0.07901965081691742 2023-01-22 10:27:46.323772: step: 322/463, loss: 0.13412442803382874 2023-01-22 10:27:46.914615: step: 324/463, loss: 0.8046720027923584 2023-01-22 10:27:47.508325: step: 326/463, loss: 0.24747377634048462 2023-01-22 10:27:48.138816: step: 328/463, loss: 0.12344200909137726 2023-01-22 10:27:48.784933: step: 330/463, loss: 0.05452829971909523 2023-01-22 10:27:49.366237: step: 332/463, loss: 0.23342633247375488 2023-01-22 10:27:50.003897: step: 334/463, loss: 0.15630567073822021 2023-01-22 10:27:50.570077: step: 336/463, loss: 0.10500635951757431 2023-01-22 10:27:51.192508: step: 338/463, loss: 0.3273847997188568 2023-01-22 10:27:51.811625: step: 340/463, loss: 0.06708574295043945 2023-01-22 10:27:52.505270: step: 342/463, loss: 0.3618435859680176 2023-01-22 10:27:53.150611: step: 344/463, loss: 0.1889757513999939 2023-01-22 10:27:53.729714: step: 346/463, loss: 0.0756753608584404 2023-01-22 10:27:54.342909: step: 348/463, loss: 0.08635926991701126 2023-01-22 10:27:54.986273: step: 350/463, loss: 0.7102369070053101 2023-01-22 10:27:55.632480: step: 352/463, loss: 0.08359764516353607 2023-01-22 10:27:56.278227: step: 354/463, loss: 0.07731806486845016 2023-01-22 10:27:56.854254: step: 356/463, loss: 0.24834422767162323 2023-01-22 10:27:57.465224: step: 358/463, loss: 0.12789073586463928 2023-01-22 10:27:58.178838: step: 360/463, loss: 0.3180234730243683 2023-01-22 10:27:58.734369: step: 362/463, loss: 0.10265457630157471 2023-01-22 10:27:59.348248: step: 364/463, loss: 0.07504995912313461 2023-01-22 10:27:59.914004: step: 366/463, loss: 0.06866217404603958 2023-01-22 10:28:00.593676: step: 368/463, loss: 0.07730881869792938 2023-01-22 10:28:01.185155: step: 370/463, loss: 0.6829435229301453 2023-01-22 10:28:01.777542: step: 372/463, loss: 0.114606574177742 2023-01-22 10:28:02.385171: step: 374/463, loss: 0.17876236140727997 2023-01-22 10:28:02.968967: step: 376/463, loss: 0.07609118521213531 2023-01-22 10:28:03.580070: step: 378/463, loss: 0.13067157566547394 2023-01-22 10:28:04.211866: step: 380/463, loss: 0.12436798214912415 2023-01-22 10:28:04.798536: step: 382/463, loss: 0.07902736961841583 2023-01-22 10:28:05.505096: step: 384/463, loss: 0.8428719639778137 2023-01-22 10:28:06.062363: step: 386/463, loss: 0.12940949201583862 2023-01-22 10:28:06.614298: step: 388/463, loss: 0.5174376964569092 2023-01-22 10:28:07.199911: step: 390/463, loss: 0.08864948898553848 2023-01-22 10:28:07.823721: step: 392/463, loss: 0.1381201446056366 2023-01-22 10:28:08.413822: step: 394/463, loss: 0.2679230868816376 2023-01-22 10:28:09.094265: step: 396/463, loss: 0.12718528509140015 2023-01-22 10:28:09.766934: step: 398/463, loss: 0.3463614881038666 2023-01-22 10:28:10.338231: step: 400/463, loss: 0.33551958203315735 2023-01-22 10:28:10.965970: step: 402/463, loss: 0.1317533254623413 2023-01-22 10:28:11.580370: step: 404/463, loss: 0.2918298840522766 2023-01-22 10:28:12.265500: step: 406/463, loss: 0.1276000589132309 2023-01-22 10:28:12.827930: step: 408/463, loss: 0.16821874678134918 2023-01-22 10:28:13.421996: step: 410/463, loss: 0.07860761135816574 2023-01-22 10:28:13.974438: step: 412/463, loss: 0.11053800582885742 2023-01-22 10:28:14.633758: step: 414/463, loss: 0.49289923906326294 2023-01-22 10:28:15.274428: step: 416/463, loss: 0.7267265915870667 2023-01-22 10:28:15.966639: step: 418/463, loss: 0.24886372685432434 2023-01-22 10:28:16.561124: step: 420/463, loss: 0.1400095373392105 2023-01-22 10:28:17.140281: step: 422/463, loss: 0.4976447820663452 2023-01-22 10:28:17.707960: step: 424/463, loss: 0.1688401997089386 2023-01-22 10:28:18.305625: step: 426/463, loss: 0.22347904741764069 2023-01-22 10:28:18.925408: step: 428/463, loss: 0.640465259552002 2023-01-22 10:28:19.554800: step: 430/463, loss: 0.3505455255508423 2023-01-22 10:28:20.157986: step: 432/463, loss: 0.27340126037597656 2023-01-22 10:28:20.786042: step: 434/463, loss: 0.11721798777580261 2023-01-22 10:28:21.379837: step: 436/463, loss: 0.07473170757293701 2023-01-22 10:28:21.914295: step: 438/463, loss: 0.04971243068575859 2023-01-22 10:28:22.487069: step: 440/463, loss: 0.19187520444393158 2023-01-22 10:28:23.121489: step: 442/463, loss: 0.4347236454486847 2023-01-22 10:28:23.721556: step: 444/463, loss: 0.10797803103923798 2023-01-22 10:28:24.341269: step: 446/463, loss: 0.09673857688903809 2023-01-22 10:28:24.946732: step: 448/463, loss: 0.1593562811613083 2023-01-22 10:28:25.640936: step: 450/463, loss: 0.15798698365688324 2023-01-22 10:28:26.280242: step: 452/463, loss: 0.6502160429954529 2023-01-22 10:28:26.887846: step: 454/463, loss: 0.2936711609363556 2023-01-22 10:28:27.512250: step: 456/463, loss: 0.24623417854309082 2023-01-22 10:28:28.193003: step: 458/463, loss: 0.031187953427433968 2023-01-22 10:28:28.775570: step: 460/463, loss: 0.1089807003736496 2023-01-22 10:28:29.486482: step: 462/463, loss: 0.1410052627325058 2023-01-22 10:28:30.127061: step: 464/463, loss: 0.5307961106300354 2023-01-22 10:28:30.764541: step: 466/463, loss: 1.121724247932434 2023-01-22 10:28:31.374097: step: 468/463, loss: 0.19724996387958527 2023-01-22 10:28:32.042226: step: 470/463, loss: 0.11353181302547455 2023-01-22 10:28:32.687829: step: 472/463, loss: 0.19163569808006287 2023-01-22 10:28:33.289732: step: 474/463, loss: 0.20167407393455505 2023-01-22 10:28:33.908024: step: 476/463, loss: 0.07260171324014664 2023-01-22 10:28:34.513762: step: 478/463, loss: 0.29350942373275757 2023-01-22 10:28:35.140698: step: 480/463, loss: 0.513546884059906 2023-01-22 10:28:35.706593: step: 482/463, loss: 0.0812089666724205 2023-01-22 10:28:36.273912: step: 484/463, loss: 0.2336966097354889 2023-01-22 10:28:36.850497: step: 486/463, loss: 0.13227874040603638 2023-01-22 10:28:37.434993: step: 488/463, loss: 0.12969566881656647 2023-01-22 10:28:38.024208: step: 490/463, loss: 0.2063014656305313 2023-01-22 10:28:38.640696: step: 492/463, loss: 0.19133536517620087 2023-01-22 10:28:39.276962: step: 494/463, loss: 0.4032283425331116 2023-01-22 10:28:39.923890: step: 496/463, loss: 0.12233567237854004 2023-01-22 10:28:40.439114: step: 498/463, loss: 0.2952040135860443 2023-01-22 10:28:41.033263: step: 500/463, loss: 0.2888779938220978 2023-01-22 10:28:41.596841: step: 502/463, loss: 0.6097636222839355 2023-01-22 10:28:42.296104: step: 504/463, loss: 0.14834658801555634 2023-01-22 10:28:42.907745: step: 506/463, loss: 0.15530027449131012 2023-01-22 10:28:43.517885: step: 508/463, loss: 0.16488200426101685 2023-01-22 10:28:44.118100: step: 510/463, loss: 0.03965010866522789 2023-01-22 10:28:44.643244: step: 512/463, loss: 0.7460254430770874 2023-01-22 10:28:45.282452: step: 514/463, loss: 0.09015417844057083 2023-01-22 10:28:45.882003: step: 516/463, loss: 0.09670256078243256 2023-01-22 10:28:46.491229: step: 518/463, loss: 0.5697557330131531 2023-01-22 10:28:47.115432: step: 520/463, loss: 0.10155784338712692 2023-01-22 10:28:47.684027: step: 522/463, loss: 0.17859096825122833 2023-01-22 10:28:48.275443: step: 524/463, loss: 0.25560787320137024 2023-01-22 10:28:48.870268: step: 526/463, loss: 0.153215229511261 2023-01-22 10:28:49.539123: step: 528/463, loss: 0.23221251368522644 2023-01-22 10:28:50.163782: step: 530/463, loss: 0.1168912872672081 2023-01-22 10:28:50.780370: step: 532/463, loss: 0.19208881258964539 2023-01-22 10:28:51.409097: step: 534/463, loss: 0.2637074887752533 2023-01-22 10:28:52.085016: step: 536/463, loss: 0.15540830790996552 2023-01-22 10:28:52.679682: step: 538/463, loss: 0.3781346082687378 2023-01-22 10:28:53.313388: step: 540/463, loss: 0.14690083265304565 2023-01-22 10:28:53.904690: step: 542/463, loss: 0.256965696811676 2023-01-22 10:28:54.523571: step: 544/463, loss: 0.15526212751865387 2023-01-22 10:28:55.180369: step: 546/463, loss: 0.1643633097410202 2023-01-22 10:28:55.783570: step: 548/463, loss: 0.12368601560592651 2023-01-22 10:28:56.462655: step: 550/463, loss: 0.6030208468437195 2023-01-22 10:28:57.056096: step: 552/463, loss: 0.2335471659898758 2023-01-22 10:28:57.610369: step: 554/463, loss: 0.13969428837299347 2023-01-22 10:28:58.248748: step: 556/463, loss: 0.2825043797492981 2023-01-22 10:28:58.867220: step: 558/463, loss: 0.29415857791900635 2023-01-22 10:28:59.512791: step: 560/463, loss: 0.3851672410964966 2023-01-22 10:29:00.126387: step: 562/463, loss: 0.1347467005252838 2023-01-22 10:29:00.795422: step: 564/463, loss: 0.2585058808326721 2023-01-22 10:29:01.508208: step: 566/463, loss: 0.1628204882144928 2023-01-22 10:29:02.099183: step: 568/463, loss: 0.2897280752658844 2023-01-22 10:29:02.747566: step: 570/463, loss: 0.18531332910060883 2023-01-22 10:29:03.394911: step: 572/463, loss: 0.09126812219619751 2023-01-22 10:29:04.019834: step: 574/463, loss: 0.2615398168563843 2023-01-22 10:29:04.658876: step: 576/463, loss: 0.1360349804162979 2023-01-22 10:29:05.285013: step: 578/463, loss: 0.2566956877708435 2023-01-22 10:29:05.885712: step: 580/463, loss: 0.24119022488594055 2023-01-22 10:29:06.511127: step: 582/463, loss: 0.1254299432039261 2023-01-22 10:29:07.101531: step: 584/463, loss: 0.10666186362504959 2023-01-22 10:29:07.719742: step: 586/463, loss: 0.10470999777317047 2023-01-22 10:29:08.309679: step: 588/463, loss: 1.404681921005249 2023-01-22 10:29:08.935412: step: 590/463, loss: 0.06586433202028275 2023-01-22 10:29:09.637183: step: 592/463, loss: 0.8000977635383606 2023-01-22 10:29:10.217253: step: 594/463, loss: 0.6390179395675659 2023-01-22 10:29:10.822463: step: 596/463, loss: 0.9234566688537598 2023-01-22 10:29:11.544545: step: 598/463, loss: 0.15058062970638275 2023-01-22 10:29:12.208766: step: 600/463, loss: 0.146126389503479 2023-01-22 10:29:12.860414: step: 602/463, loss: 0.11970563232898712 2023-01-22 10:29:13.472515: step: 604/463, loss: 0.1082342341542244 2023-01-22 10:29:14.064829: step: 606/463, loss: 0.1443565934896469 2023-01-22 10:29:14.713723: step: 608/463, loss: 0.5981305837631226 2023-01-22 10:29:15.348618: step: 610/463, loss: 0.05745577812194824 2023-01-22 10:29:15.927220: step: 612/463, loss: 0.24875128269195557 2023-01-22 10:29:16.523423: step: 614/463, loss: 0.09177330881357193 2023-01-22 10:29:17.176002: step: 616/463, loss: 0.09690413624048233 2023-01-22 10:29:17.888984: step: 618/463, loss: 0.18338355422019958 2023-01-22 10:29:18.530977: step: 620/463, loss: 0.13161371648311615 2023-01-22 10:29:19.139244: step: 622/463, loss: 0.13530658185482025 2023-01-22 10:29:19.760345: step: 624/463, loss: 0.10445586591959 2023-01-22 10:29:20.403958: step: 626/463, loss: 0.6663369536399841 2023-01-22 10:29:21.049277: step: 628/463, loss: 6.0904669761657715 2023-01-22 10:29:21.731288: step: 630/463, loss: 0.0396406427025795 2023-01-22 10:29:22.409378: step: 632/463, loss: 0.15473344922065735 2023-01-22 10:29:23.014090: step: 634/463, loss: 0.12488384544849396 2023-01-22 10:29:23.620101: step: 636/463, loss: 0.43946170806884766 2023-01-22 10:29:24.240191: step: 638/463, loss: 0.07207348942756653 2023-01-22 10:29:24.901639: step: 640/463, loss: 0.11279551684856415 2023-01-22 10:29:25.494888: step: 642/463, loss: 0.4018101096153259 2023-01-22 10:29:26.106586: step: 644/463, loss: 0.14402031898498535 2023-01-22 10:29:26.702798: step: 646/463, loss: 0.2797985076904297 2023-01-22 10:29:27.314185: step: 648/463, loss: 0.18237920105457306 2023-01-22 10:29:27.918785: step: 650/463, loss: 0.21783073246479034 2023-01-22 10:29:28.583350: step: 652/463, loss: 1.066781759262085 2023-01-22 10:29:29.329992: step: 654/463, loss: 0.24042648077011108 2023-01-22 10:29:30.037628: step: 656/463, loss: 0.15281209349632263 2023-01-22 10:29:30.685392: step: 658/463, loss: 0.156751811504364 2023-01-22 10:29:31.292452: step: 660/463, loss: 0.24920238554477692 2023-01-22 10:29:31.902191: step: 662/463, loss: 0.2024933099746704 2023-01-22 10:29:32.491711: step: 664/463, loss: 0.14370478689670563 2023-01-22 10:29:33.177133: step: 666/463, loss: 0.38280215859413147 2023-01-22 10:29:33.788183: step: 668/463, loss: 0.06891478598117828 2023-01-22 10:29:34.417363: step: 670/463, loss: 0.18897640705108643 2023-01-22 10:29:35.059531: step: 672/463, loss: 0.6059722900390625 2023-01-22 10:29:35.688245: step: 674/463, loss: 0.22382034361362457 2023-01-22 10:29:36.365100: step: 676/463, loss: 0.3186624348163605 2023-01-22 10:29:36.978493: step: 678/463, loss: 0.06853877007961273 2023-01-22 10:29:37.574839: step: 680/463, loss: 0.23257888853549957 2023-01-22 10:29:38.120148: step: 682/463, loss: 0.04693547263741493 2023-01-22 10:29:38.753881: step: 684/463, loss: 0.25548672676086426 2023-01-22 10:29:39.430610: step: 686/463, loss: 0.06591357290744781 2023-01-22 10:29:40.026467: step: 688/463, loss: 0.1754855513572693 2023-01-22 10:29:40.611824: step: 690/463, loss: 0.15456275641918182 2023-01-22 10:29:41.207909: step: 692/463, loss: 0.34810635447502136 2023-01-22 10:29:41.802975: step: 694/463, loss: 0.3315097689628601 2023-01-22 10:29:42.442077: step: 696/463, loss: 0.4954250752925873 2023-01-22 10:29:43.045047: step: 698/463, loss: 0.3658079504966736 2023-01-22 10:29:43.719500: step: 700/463, loss: 0.5540649890899658 2023-01-22 10:29:44.314630: step: 702/463, loss: 0.020063428208231926 2023-01-22 10:29:44.941331: step: 704/463, loss: 0.7302101850509644 2023-01-22 10:29:45.556406: step: 706/463, loss: 0.09799230098724365 2023-01-22 10:29:46.174215: step: 708/463, loss: 0.43738266825675964 2023-01-22 10:29:46.889870: step: 710/463, loss: 1.3673121929168701 2023-01-22 10:29:47.489012: step: 712/463, loss: 0.13889220356941223 2023-01-22 10:29:48.084530: step: 714/463, loss: 0.16936029493808746 2023-01-22 10:29:48.685324: step: 716/463, loss: 0.2813599109649658 2023-01-22 10:29:49.380391: step: 718/463, loss: 0.2729129493236542 2023-01-22 10:29:49.988918: step: 720/463, loss: 0.12637236714363098 2023-01-22 10:29:50.570868: step: 722/463, loss: 0.17539240419864655 2023-01-22 10:29:51.221369: step: 724/463, loss: 0.21712090075016022 2023-01-22 10:29:51.867736: step: 726/463, loss: 0.07533413916826248 2023-01-22 10:29:52.418749: step: 728/463, loss: 0.3124937415122986 2023-01-22 10:29:53.046664: step: 730/463, loss: 0.054267656058073044 2023-01-22 10:29:53.662636: step: 732/463, loss: 0.19126291573047638 2023-01-22 10:29:54.177549: step: 734/463, loss: 0.10401225090026855 2023-01-22 10:29:54.814294: step: 736/463, loss: 0.2681736946105957 2023-01-22 10:29:55.431385: step: 738/463, loss: 0.04870062693953514 2023-01-22 10:29:56.091353: step: 740/463, loss: 0.16951194405555725 2023-01-22 10:29:56.734147: step: 742/463, loss: 0.05111130699515343 2023-01-22 10:29:57.329708: step: 744/463, loss: 0.22381947934627533 2023-01-22 10:29:57.951556: step: 746/463, loss: 0.21593955159187317 2023-01-22 10:29:58.592430: step: 748/463, loss: 0.2164674699306488 2023-01-22 10:29:59.175322: step: 750/463, loss: 0.17841942608356476 2023-01-22 10:29:59.790280: step: 752/463, loss: 0.37297749519348145 2023-01-22 10:30:00.372687: step: 754/463, loss: 0.026182163506746292 2023-01-22 10:30:01.009045: step: 756/463, loss: 0.18546372652053833 2023-01-22 10:30:01.609646: step: 758/463, loss: 0.31787431240081787 2023-01-22 10:30:02.245682: step: 760/463, loss: 0.16469746828079224 2023-01-22 10:30:02.813651: step: 762/463, loss: 0.08531952649354935 2023-01-22 10:30:03.455328: step: 764/463, loss: 0.44032901525497437 2023-01-22 10:30:03.999702: step: 766/463, loss: 0.05457795411348343 2023-01-22 10:30:04.699970: step: 768/463, loss: 0.4040237069129944 2023-01-22 10:30:05.281786: step: 770/463, loss: 0.17169338464736938 2023-01-22 10:30:05.865870: step: 772/463, loss: 0.07890637964010239 2023-01-22 10:30:06.534672: step: 774/463, loss: 0.21412260830402374 2023-01-22 10:30:07.107710: step: 776/463, loss: 0.09458138793706894 2023-01-22 10:30:07.754781: step: 778/463, loss: 0.4084387421607971 2023-01-22 10:30:08.415728: step: 780/463, loss: 2.2510499954223633 2023-01-22 10:30:09.104878: step: 782/463, loss: 0.13423019647598267 2023-01-22 10:30:09.690421: step: 784/463, loss: 0.38092344999313354 2023-01-22 10:30:10.291319: step: 786/463, loss: 0.15195366740226746 2023-01-22 10:30:11.001361: step: 788/463, loss: 0.16202634572982788 2023-01-22 10:30:11.614045: step: 790/463, loss: 0.04834723472595215 2023-01-22 10:30:12.260156: step: 792/463, loss: 0.5601105690002441 2023-01-22 10:30:12.912125: step: 794/463, loss: 0.12197083234786987 2023-01-22 10:30:13.512816: step: 796/463, loss: 0.17657619714736938 2023-01-22 10:30:14.085605: step: 798/463, loss: 0.3934799134731293 2023-01-22 10:30:14.745247: step: 800/463, loss: 0.21426966786384583 2023-01-22 10:30:15.381832: step: 802/463, loss: 0.25814029574394226 2023-01-22 10:30:16.055098: step: 804/463, loss: 0.307193785905838 2023-01-22 10:30:16.689018: step: 806/463, loss: 0.22508278489112854 2023-01-22 10:30:17.312709: step: 808/463, loss: 0.16606535017490387 2023-01-22 10:30:17.961371: step: 810/463, loss: 0.8540472388267517 2023-01-22 10:30:18.585389: step: 812/463, loss: 0.1355004459619522 2023-01-22 10:30:19.192291: step: 814/463, loss: 0.214835524559021 2023-01-22 10:30:19.735768: step: 816/463, loss: 0.21123486757278442 2023-01-22 10:30:20.425447: step: 818/463, loss: 0.684298038482666 2023-01-22 10:30:21.056401: step: 820/463, loss: 0.20782868564128876 2023-01-22 10:30:21.661259: step: 822/463, loss: 0.08836875110864639 2023-01-22 10:30:22.265746: step: 824/463, loss: 0.22916240990161896 2023-01-22 10:30:22.890335: step: 826/463, loss: 0.09399758279323578 2023-01-22 10:30:23.466695: step: 828/463, loss: 0.42939338088035583 2023-01-22 10:30:24.074025: step: 830/463, loss: 0.18317995965480804 2023-01-22 10:30:24.788949: step: 832/463, loss: 0.4541419446468353 2023-01-22 10:30:25.416971: step: 834/463, loss: 0.198395237326622 2023-01-22 10:30:26.169839: step: 836/463, loss: 0.2802852988243103 2023-01-22 10:30:26.777163: step: 838/463, loss: 0.0982271358370781 2023-01-22 10:30:27.400603: step: 840/463, loss: 0.13374069333076477 2023-01-22 10:30:28.020275: step: 842/463, loss: 0.26857098937034607 2023-01-22 10:30:28.613846: step: 844/463, loss: 0.14647513628005981 2023-01-22 10:30:29.233368: step: 846/463, loss: 0.1926325559616089 2023-01-22 10:30:29.829665: step: 848/463, loss: 0.5408521890640259 2023-01-22 10:30:30.474270: step: 850/463, loss: 0.14338821172714233 2023-01-22 10:30:30.986818: step: 852/463, loss: 0.3919200003147125 2023-01-22 10:30:31.572858: step: 854/463, loss: 0.09635207802057266 2023-01-22 10:30:32.196652: step: 856/463, loss: 0.1431223601102829 2023-01-22 10:30:32.810938: step: 858/463, loss: 0.12814195454120636 2023-01-22 10:30:33.426913: step: 860/463, loss: 2.128169059753418 2023-01-22 10:30:33.978413: step: 862/463, loss: 0.37324991822242737 2023-01-22 10:30:34.548871: step: 864/463, loss: 0.20899564027786255 2023-01-22 10:30:35.185940: step: 866/463, loss: 0.18874073028564453 2023-01-22 10:30:35.798423: step: 868/463, loss: 0.24432745575904846 2023-01-22 10:30:36.406604: step: 870/463, loss: 0.3748307526111603 2023-01-22 10:30:37.027545: step: 872/463, loss: 0.10357166081666946 2023-01-22 10:30:37.669963: step: 874/463, loss: 0.5530768036842346 2023-01-22 10:30:38.225006: step: 876/463, loss: 0.292721688747406 2023-01-22 10:30:38.807141: step: 878/463, loss: 0.2837667763233185 2023-01-22 10:30:39.421338: step: 880/463, loss: 0.18089216947555542 2023-01-22 10:30:40.057248: step: 882/463, loss: 0.7346042990684509 2023-01-22 10:30:40.685219: step: 884/463, loss: 0.21073144674301147 2023-01-22 10:30:41.452543: step: 886/463, loss: 0.46338459849357605 2023-01-22 10:30:42.144194: step: 888/463, loss: 0.7236040234565735 2023-01-22 10:30:42.762788: step: 890/463, loss: 0.17800061404705048 2023-01-22 10:30:43.365904: step: 892/463, loss: 0.2058294415473938 2023-01-22 10:30:44.039634: step: 894/463, loss: 0.28168317675590515 2023-01-22 10:30:44.624165: step: 896/463, loss: 0.19440966844558716 2023-01-22 10:30:45.247746: step: 898/463, loss: 0.38313883543014526 2023-01-22 10:30:45.822229: step: 900/463, loss: 0.3498222827911377 2023-01-22 10:30:46.435032: step: 902/463, loss: 0.1202121451497078 2023-01-22 10:30:47.053324: step: 904/463, loss: 0.36205485463142395 2023-01-22 10:30:47.643562: step: 906/463, loss: 0.7415978312492371 2023-01-22 10:30:48.209451: step: 908/463, loss: 0.09956806898117065 2023-01-22 10:30:48.909206: step: 910/463, loss: 0.3380570113658905 2023-01-22 10:30:49.624813: step: 912/463, loss: 0.3525811433792114 2023-01-22 10:30:50.196497: step: 914/463, loss: 0.11312661319971085 2023-01-22 10:30:50.853569: step: 916/463, loss: 0.13564646244049072 2023-01-22 10:30:51.456390: step: 918/463, loss: 0.39960673451423645 2023-01-22 10:30:52.077313: step: 920/463, loss: 0.11281851679086685 2023-01-22 10:30:52.675364: step: 922/463, loss: 0.15116821229457855 2023-01-22 10:30:53.302129: step: 924/463, loss: 0.14059405028820038 2023-01-22 10:30:53.937756: step: 926/463, loss: 0.25888195633888245 ================================================== Loss: 0.276 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3069909886051305, 'r': 0.34485515228508024, 'f1': 0.32482335166083515}, 'combined': 0.23934352227640482, 'epoch': 11} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33880100990551887, 'r': 0.4020148012757581, 'f1': 0.36771089011183256}, 'combined': 0.28501992439290375, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27154036912865687, 'r': 0.3560424953850131, 'f1': 0.3081024549555039}, 'combined': 0.22702286154616078, 'epoch': 11} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.31882382699072975, 'r': 0.4055626622749724, 'f1': 0.3570001428439886}, 'combined': 0.2767178140704601, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2826169984385876, 'r': 0.3534053168330726, 'f1': 0.31407184143512523}, 'combined': 0.23142135684693438, 'epoch': 11} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.31953084957822697, 'r': 0.38972190936609113, 'f1': 0.3511531572590536}, 'combined': 0.2721857008419459, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2, 'r': 0.2857142857142857, 'f1': 0.23529411764705882}, 'combined': 0.1568627450980392, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.1858974358974359, 'r': 0.31521739130434784, 'f1': 0.23387096774193547}, 'combined': 0.11693548387096774, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.21739130434782608, 'r': 0.1724137931034483, 'f1': 0.19230769230769232}, 'combined': 0.1282051282051282, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29855383301254707, 'r': 0.34444161379815674, 'f1': 0.3198603180116804}, 'combined': 0.23568655011386977, 'epoch': 7} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.30988948788864984, 'r': 0.3867922100668993, 'f1': 0.34409642236532006}, 'combined': 0.2667158871922577, 'epoch': 7} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2636363636363636, 'r': 0.4142857142857143, 'f1': 0.3222222222222222}, 'combined': 0.2148148148148148, 'epoch': 7} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.294349755002927, 'r': 0.3345645222898544, 'f1': 0.3131714089640378}, 'combined': 0.23075788028929098, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3358950473035951, 'r': 0.33466466251493726, 'f1': 0.3352787261158821}, 'combined': 0.25988111785058804, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2671913725285436, 'r': 0.31941283622956823, 'f1': 0.29097763991872505}, 'combined': 0.21440457678221844, 'epoch': 10} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32113335504985907, 'r': 0.3837071337911919, 'f1': 0.34964268137756854}, 'combined': 0.2710149013548618, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.3103448275862069, 'f1': 0.339622641509434}, 'combined': 0.22641509433962265, 'epoch': 10} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:33:33.714941: step: 2/463, loss: 0.04709136486053467 2023-01-22 10:33:34.325481: step: 4/463, loss: 0.2920854091644287 2023-01-22 10:33:34.977005: step: 6/463, loss: 0.09253279119729996 2023-01-22 10:33:35.586906: step: 8/463, loss: 0.1371663361787796 2023-01-22 10:33:36.279066: step: 10/463, loss: 0.4698277711868286 2023-01-22 10:33:36.922757: step: 12/463, loss: 0.09835254400968552 2023-01-22 10:33:37.602854: step: 14/463, loss: 0.17487305402755737 2023-01-22 10:33:38.199492: step: 16/463, loss: 0.09919741749763489 2023-01-22 10:33:38.813432: step: 18/463, loss: 0.2199268639087677 2023-01-22 10:33:39.445002: step: 20/463, loss: 0.37634027004241943 2023-01-22 10:33:40.033091: step: 22/463, loss: 0.06089381128549576 2023-01-22 10:33:40.666594: step: 24/463, loss: 0.09642766416072845 2023-01-22 10:33:41.254775: step: 26/463, loss: 0.09353947639465332 2023-01-22 10:33:41.858806: step: 28/463, loss: 0.03183140978217125 2023-01-22 10:33:42.479331: step: 30/463, loss: 0.10085129737854004 2023-01-22 10:33:43.152417: step: 32/463, loss: 0.3874399960041046 2023-01-22 10:33:43.776105: step: 34/463, loss: 0.16750377416610718 2023-01-22 10:33:44.490675: step: 36/463, loss: 0.1750829517841339 2023-01-22 10:33:45.136811: step: 38/463, loss: 0.22163164615631104 2023-01-22 10:33:45.794658: step: 40/463, loss: 0.08728481829166412 2023-01-22 10:33:46.351276: step: 42/463, loss: 0.1055922657251358 2023-01-22 10:33:47.019191: step: 44/463, loss: 0.06161033362150192 2023-01-22 10:33:47.622063: step: 46/463, loss: 0.1445070207118988 2023-01-22 10:33:48.228690: step: 48/463, loss: 0.11023698002099991 2023-01-22 10:33:48.835957: step: 50/463, loss: 0.03614789992570877 2023-01-22 10:33:49.500530: step: 52/463, loss: 0.1414175033569336 2023-01-22 10:33:50.075414: step: 54/463, loss: 0.12846750020980835 2023-01-22 10:33:50.706440: step: 56/463, loss: 0.1323210746049881 2023-01-22 10:33:51.317836: step: 58/463, loss: 0.14289246499538422 2023-01-22 10:33:51.926344: step: 60/463, loss: 0.2360147386789322 2023-01-22 10:33:52.473106: step: 62/463, loss: 0.12843173742294312 2023-01-22 10:33:53.071688: step: 64/463, loss: 0.3064770996570587 2023-01-22 10:33:53.721830: step: 66/463, loss: 0.07110176235437393 2023-01-22 10:33:54.391805: step: 68/463, loss: 0.5093967318534851 2023-01-22 10:33:54.961893: step: 70/463, loss: 0.02749595232307911 2023-01-22 10:33:55.523303: step: 72/463, loss: 0.15308287739753723 2023-01-22 10:33:56.172573: step: 74/463, loss: 0.12487535923719406 2023-01-22 10:33:56.791190: step: 76/463, loss: 0.1353198140859604 2023-01-22 10:33:57.424853: step: 78/463, loss: 0.24977631866931915 2023-01-22 10:33:58.003548: step: 80/463, loss: 0.23683463037014008 2023-01-22 10:33:58.571918: step: 82/463, loss: 0.15805213153362274 2023-01-22 10:33:59.206521: step: 84/463, loss: 0.1394563913345337 2023-01-22 10:33:59.870636: step: 86/463, loss: 0.06980950385332108 2023-01-22 10:34:00.493895: step: 88/463, loss: 0.15665149688720703 2023-01-22 10:34:01.150143: step: 90/463, loss: 0.2898462414741516 2023-01-22 10:34:01.818587: step: 92/463, loss: 0.06743495166301727 2023-01-22 10:34:02.457384: step: 94/463, loss: 0.2256937026977539 2023-01-22 10:34:03.179116: step: 96/463, loss: 0.14297865331172943 2023-01-22 10:34:03.923540: step: 98/463, loss: 0.1014813780784607 2023-01-22 10:34:04.521062: step: 100/463, loss: 0.40818488597869873 2023-01-22 10:34:05.104057: step: 102/463, loss: 0.06096276640892029 2023-01-22 10:34:05.687136: step: 104/463, loss: 0.5698774456977844 2023-01-22 10:34:06.265964: step: 106/463, loss: 0.12469528615474701 2023-01-22 10:34:06.859549: step: 108/463, loss: 0.16977208852767944 2023-01-22 10:34:07.464045: step: 110/463, loss: 0.11705274879932404 2023-01-22 10:34:08.026660: step: 112/463, loss: 0.18914425373077393 2023-01-22 10:34:08.636880: step: 114/463, loss: 0.3135256767272949 2023-01-22 10:34:09.262365: step: 116/463, loss: 0.08335922658443451 2023-01-22 10:34:09.932017: step: 118/463, loss: 0.10146239399909973 2023-01-22 10:34:10.546187: step: 120/463, loss: 0.5673781633377075 2023-01-22 10:34:11.199327: step: 122/463, loss: 0.1318645030260086 2023-01-22 10:34:11.781518: step: 124/463, loss: 0.06457088142633438 2023-01-22 10:34:12.338528: step: 126/463, loss: 0.1355675607919693 2023-01-22 10:34:12.912847: step: 128/463, loss: 0.2858072519302368 2023-01-22 10:34:13.611018: step: 130/463, loss: 0.2889961004257202 2023-01-22 10:34:14.209995: step: 132/463, loss: 0.0627320408821106 2023-01-22 10:34:14.779330: step: 134/463, loss: 0.029495416209101677 2023-01-22 10:34:15.463764: step: 136/463, loss: 0.06627761572599411 2023-01-22 10:34:16.110075: step: 138/463, loss: 0.4275059103965759 2023-01-22 10:34:16.640083: step: 140/463, loss: 0.45004311203956604 2023-01-22 10:34:17.295620: step: 142/463, loss: 0.24258460104465485 2023-01-22 10:34:17.985315: step: 144/463, loss: 0.15017956495285034 2023-01-22 10:34:18.568879: step: 146/463, loss: 0.2845919132232666 2023-01-22 10:34:19.150809: step: 148/463, loss: 0.1482020616531372 2023-01-22 10:34:19.773546: step: 150/463, loss: 0.07862148433923721 2023-01-22 10:34:20.366816: step: 152/463, loss: 0.24219466745853424 2023-01-22 10:34:20.994298: step: 154/463, loss: 0.11889304220676422 2023-01-22 10:34:21.621640: step: 156/463, loss: 0.3058384656906128 2023-01-22 10:34:22.196705: step: 158/463, loss: 0.11666297912597656 2023-01-22 10:34:22.816084: step: 160/463, loss: 0.07814386487007141 2023-01-22 10:34:23.493867: step: 162/463, loss: 0.1686367243528366 2023-01-22 10:34:24.116761: step: 164/463, loss: 0.11788913607597351 2023-01-22 10:34:24.687329: step: 166/463, loss: 0.03485008329153061 2023-01-22 10:34:25.209584: step: 168/463, loss: 0.1416996717453003 2023-01-22 10:34:25.826086: step: 170/463, loss: 0.9084913730621338 2023-01-22 10:34:26.495204: step: 172/463, loss: 0.8498354554176331 2023-01-22 10:34:27.104035: step: 174/463, loss: 0.26384708285331726 2023-01-22 10:34:27.673334: step: 176/463, loss: 1.0271313190460205 2023-01-22 10:34:28.297813: step: 178/463, loss: 0.12873773276805878 2023-01-22 10:34:28.879478: step: 180/463, loss: 0.0576903410255909 2023-01-22 10:34:29.470251: step: 182/463, loss: 0.11037561297416687 2023-01-22 10:34:30.081807: step: 184/463, loss: 0.11606007069349289 2023-01-22 10:34:30.658912: step: 186/463, loss: 0.13565154373645782 2023-01-22 10:34:31.271572: step: 188/463, loss: 0.24710839986801147 2023-01-22 10:34:31.926573: step: 190/463, loss: 0.2567966878414154 2023-01-22 10:34:32.550670: step: 192/463, loss: 0.30008643865585327 2023-01-22 10:34:33.206642: step: 194/463, loss: 0.4114149808883667 2023-01-22 10:34:33.823511: step: 196/463, loss: 0.11192763596773148 2023-01-22 10:34:34.500411: step: 198/463, loss: 0.1867704540491104 2023-01-22 10:34:35.120054: step: 200/463, loss: 0.04718553274869919 2023-01-22 10:34:35.727434: step: 202/463, loss: 0.07107377797365189 2023-01-22 10:34:36.280288: step: 204/463, loss: 0.04993832856416702 2023-01-22 10:34:36.838705: step: 206/463, loss: 0.16794365644454956 2023-01-22 10:34:37.410540: step: 208/463, loss: 0.10026872903108597 2023-01-22 10:34:38.030966: step: 210/463, loss: 0.29637181758880615 2023-01-22 10:34:38.624043: step: 212/463, loss: 0.08233652263879776 2023-01-22 10:34:39.273276: step: 214/463, loss: 0.14846354722976685 2023-01-22 10:34:39.885537: step: 216/463, loss: 0.07847565412521362 2023-01-22 10:34:40.544073: step: 218/463, loss: 0.45206472277641296 2023-01-22 10:34:41.164695: step: 220/463, loss: 0.6849433779716492 2023-01-22 10:34:41.785994: step: 222/463, loss: 0.0510321743786335 2023-01-22 10:34:42.501976: step: 224/463, loss: 0.10107812285423279 2023-01-22 10:34:43.094854: step: 226/463, loss: 0.3550601899623871 2023-01-22 10:34:43.726302: step: 228/463, loss: 0.14961254596710205 2023-01-22 10:34:44.334523: step: 230/463, loss: 0.1446916162967682 2023-01-22 10:34:44.996229: step: 232/463, loss: 0.12215256690979004 2023-01-22 10:34:45.599349: step: 234/463, loss: 0.5524691939353943 2023-01-22 10:34:46.301755: step: 236/463, loss: 0.37820738554000854 2023-01-22 10:34:46.945199: step: 238/463, loss: 0.3266206383705139 2023-01-22 10:34:47.556745: step: 240/463, loss: 0.10210172086954117 2023-01-22 10:34:48.120486: step: 242/463, loss: 0.09358435124158859 2023-01-22 10:34:48.721527: step: 244/463, loss: 0.15797604620456696 2023-01-22 10:34:49.341331: step: 246/463, loss: 0.11809127032756805 2023-01-22 10:34:49.983456: step: 248/463, loss: 0.12133277207612991 2023-01-22 10:34:50.583294: step: 250/463, loss: 0.11855854839086533 2023-01-22 10:34:51.224602: step: 252/463, loss: 0.09213931858539581 2023-01-22 10:34:51.946560: step: 254/463, loss: 0.16537536680698395 2023-01-22 10:34:52.541636: step: 256/463, loss: 0.053565863519907 2023-01-22 10:34:53.171585: step: 258/463, loss: 0.05515823885798454 2023-01-22 10:34:53.826535: step: 260/463, loss: 0.93522709608078 2023-01-22 10:34:54.466601: step: 262/463, loss: 0.1171746551990509 2023-01-22 10:34:55.123866: step: 264/463, loss: 0.0768083930015564 2023-01-22 10:34:55.736501: step: 266/463, loss: 0.5953385233879089 2023-01-22 10:34:56.433854: step: 268/463, loss: 1.0117777585983276 2023-01-22 10:34:56.991064: step: 270/463, loss: 0.08235375583171844 2023-01-22 10:34:57.608745: step: 272/463, loss: 0.3589954078197479 2023-01-22 10:34:58.275830: step: 274/463, loss: 0.12249688059091568 2023-01-22 10:34:58.964684: step: 276/463, loss: 0.21151240170001984 2023-01-22 10:34:59.596662: step: 278/463, loss: 0.12480197101831436 2023-01-22 10:35:00.295904: step: 280/463, loss: 0.1926862895488739 2023-01-22 10:35:00.867225: step: 282/463, loss: 0.05210735276341438 2023-01-22 10:35:01.551458: step: 284/463, loss: 0.13047321140766144 2023-01-22 10:35:02.141117: step: 286/463, loss: 0.4637804329395294 2023-01-22 10:35:02.805259: step: 288/463, loss: 0.21883392333984375 2023-01-22 10:35:03.368837: step: 290/463, loss: 0.574614405632019 2023-01-22 10:35:04.141254: step: 292/463, loss: 0.09791631996631622 2023-01-22 10:35:04.775319: step: 294/463, loss: 0.36240866780281067 2023-01-22 10:35:05.446927: step: 296/463, loss: 0.3311035633087158 2023-01-22 10:35:06.123575: step: 298/463, loss: 0.07071972638368607 2023-01-22 10:35:06.721331: step: 300/463, loss: 0.09666219353675842 2023-01-22 10:35:07.381230: step: 302/463, loss: 0.0765746682882309 2023-01-22 10:35:07.975503: step: 304/463, loss: 0.22984886169433594 2023-01-22 10:35:08.700472: step: 306/463, loss: 0.07878180593252182 2023-01-22 10:35:09.325153: step: 308/463, loss: 0.042481642216444016 2023-01-22 10:35:09.922105: step: 310/463, loss: 0.19018863141536713 2023-01-22 10:35:10.610905: step: 312/463, loss: 0.37769585847854614 2023-01-22 10:35:11.235382: step: 314/463, loss: 0.10424766689538956 2023-01-22 10:35:11.795145: step: 316/463, loss: 0.16901977360248566 2023-01-22 10:35:12.436994: step: 318/463, loss: 0.11063750833272934 2023-01-22 10:35:13.014544: step: 320/463, loss: 0.0661756843328476 2023-01-22 10:35:13.619537: step: 322/463, loss: 0.08627904951572418 2023-01-22 10:35:14.167636: step: 324/463, loss: 0.1924169510602951 2023-01-22 10:35:14.749222: step: 326/463, loss: 0.3969847559928894 2023-01-22 10:35:15.408602: step: 328/463, loss: 0.05523085966706276 2023-01-22 10:35:16.020607: step: 330/463, loss: 0.17462460696697235 2023-01-22 10:35:16.618769: step: 332/463, loss: 0.06294368207454681 2023-01-22 10:35:17.229079: step: 334/463, loss: 0.03935914486646652 2023-01-22 10:35:17.792460: step: 336/463, loss: 0.11624235659837723 2023-01-22 10:35:18.492299: step: 338/463, loss: 0.28423333168029785 2023-01-22 10:35:19.128724: step: 340/463, loss: 0.2618388533592224 2023-01-22 10:35:19.743114: step: 342/463, loss: 0.5247267484664917 2023-01-22 10:35:20.391862: step: 344/463, loss: 0.35072705149650574 2023-01-22 10:35:20.934218: step: 346/463, loss: 0.11733942478895187 2023-01-22 10:35:21.523684: step: 348/463, loss: 0.05037592723965645 2023-01-22 10:35:22.098709: step: 350/463, loss: 0.21319517493247986 2023-01-22 10:35:22.747989: step: 352/463, loss: 0.2764868438243866 2023-01-22 10:35:23.370209: step: 354/463, loss: 0.0621664896607399 2023-01-22 10:35:23.967323: step: 356/463, loss: 0.20700062811374664 2023-01-22 10:35:24.572214: step: 358/463, loss: 0.14665798842906952 2023-01-22 10:35:25.198410: step: 360/463, loss: 0.19634947180747986 2023-01-22 10:35:25.851425: step: 362/463, loss: 0.31635794043540955 2023-01-22 10:35:26.528385: step: 364/463, loss: 0.14732970297336578 2023-01-22 10:35:27.069774: step: 366/463, loss: 0.055862780660390854 2023-01-22 10:35:27.688270: step: 368/463, loss: 0.1924988329410553 2023-01-22 10:35:28.319720: step: 370/463, loss: 0.18968096375465393 2023-01-22 10:35:28.938106: step: 372/463, loss: 0.12090527266263962 2023-01-22 10:35:29.528031: step: 374/463, loss: 0.17040404677391052 2023-01-22 10:35:30.116821: step: 376/463, loss: 0.10571619868278503 2023-01-22 10:35:30.685517: step: 378/463, loss: 0.4054567813873291 2023-01-22 10:35:31.265854: step: 380/463, loss: 0.13476382195949554 2023-01-22 10:35:31.914379: step: 382/463, loss: 0.32289326190948486 2023-01-22 10:35:32.508396: step: 384/463, loss: 0.23520106077194214 2023-01-22 10:35:33.189799: step: 386/463, loss: 0.21489021182060242 2023-01-22 10:35:33.751110: step: 388/463, loss: 0.33941173553466797 2023-01-22 10:35:34.404207: step: 390/463, loss: 0.2983942925930023 2023-01-22 10:35:35.014770: step: 392/463, loss: 0.08099149912595749 2023-01-22 10:35:35.634478: step: 394/463, loss: 0.5526613593101501 2023-01-22 10:35:36.228101: step: 396/463, loss: 0.07397932559251785 2023-01-22 10:35:36.818812: step: 398/463, loss: 0.3973013460636139 2023-01-22 10:35:37.541971: step: 400/463, loss: 0.19855327904224396 2023-01-22 10:35:38.177259: step: 402/463, loss: 0.5161593556404114 2023-01-22 10:35:38.747369: step: 404/463, loss: 0.21995356678962708 2023-01-22 10:35:39.333686: step: 406/463, loss: 0.18139362335205078 2023-01-22 10:35:39.972754: step: 408/463, loss: 0.12144064903259277 2023-01-22 10:35:40.567767: step: 410/463, loss: 0.6702880263328552 2023-01-22 10:35:41.201583: step: 412/463, loss: 0.18607521057128906 2023-01-22 10:35:41.735946: step: 414/463, loss: 0.0619998537003994 2023-01-22 10:35:42.339069: step: 416/463, loss: 0.1705656796693802 2023-01-22 10:35:42.943919: step: 418/463, loss: 0.15435650944709778 2023-01-22 10:35:43.526579: step: 420/463, loss: 0.17575670778751373 2023-01-22 10:35:44.168457: step: 422/463, loss: 0.3229943811893463 2023-01-22 10:35:44.774595: step: 424/463, loss: 0.08034990727901459 2023-01-22 10:35:45.443049: step: 426/463, loss: 0.2815340757369995 2023-01-22 10:35:46.029699: step: 428/463, loss: 0.4721074402332306 2023-01-22 10:35:46.627519: step: 430/463, loss: 0.1263284832239151 2023-01-22 10:35:47.271146: step: 432/463, loss: 0.13392148911952972 2023-01-22 10:35:47.901687: step: 434/463, loss: 0.25827857851982117 2023-01-22 10:35:48.524832: step: 436/463, loss: 0.09906576573848724 2023-01-22 10:35:49.131968: step: 438/463, loss: 0.13283367455005646 2023-01-22 10:35:49.783889: step: 440/463, loss: 0.2914736568927765 2023-01-22 10:35:50.426584: step: 442/463, loss: 0.09527754783630371 2023-01-22 10:35:51.068852: step: 444/463, loss: 0.13543950021266937 2023-01-22 10:35:51.718369: step: 446/463, loss: 0.1158260628581047 2023-01-22 10:35:52.360878: step: 448/463, loss: 0.13117797672748566 2023-01-22 10:35:53.024159: step: 450/463, loss: 0.2001490443944931 2023-01-22 10:35:53.673810: step: 452/463, loss: 0.0987393781542778 2023-01-22 10:35:54.262104: step: 454/463, loss: 0.3383936882019043 2023-01-22 10:35:54.871801: step: 456/463, loss: 0.5780913829803467 2023-01-22 10:35:55.531111: step: 458/463, loss: 0.040081772953271866 2023-01-22 10:35:56.167477: step: 460/463, loss: 0.13392862677574158 2023-01-22 10:35:56.777195: step: 462/463, loss: 0.1242854967713356 2023-01-22 10:35:57.424223: step: 464/463, loss: 0.16392742097377777 2023-01-22 10:35:58.042056: step: 466/463, loss: 0.1988963633775711 2023-01-22 10:35:58.673696: step: 468/463, loss: 0.43538421392440796 2023-01-22 10:35:59.334300: step: 470/463, loss: 0.19662325084209442 2023-01-22 10:35:59.893141: step: 472/463, loss: 0.1487337201833725 2023-01-22 10:36:00.529815: step: 474/463, loss: 0.13890081644058228 2023-01-22 10:36:01.141859: step: 476/463, loss: 0.13792787492275238 2023-01-22 10:36:01.787808: step: 478/463, loss: 0.10439697653055191 2023-01-22 10:36:02.393420: step: 480/463, loss: 0.40365296602249146 2023-01-22 10:36:02.995887: step: 482/463, loss: 0.10915505886077881 2023-01-22 10:36:03.574326: step: 484/463, loss: 0.1635630875825882 2023-01-22 10:36:04.206487: step: 486/463, loss: 0.15946418046951294 2023-01-22 10:36:04.770392: step: 488/463, loss: 0.04774601384997368 2023-01-22 10:36:05.354474: step: 490/463, loss: 0.08587408065795898 2023-01-22 10:36:05.960002: step: 492/463, loss: 0.4820919930934906 2023-01-22 10:36:06.606794: step: 494/463, loss: 0.08232135325670242 2023-01-22 10:36:07.239690: step: 496/463, loss: 0.3370683491230011 2023-01-22 10:36:07.827161: step: 498/463, loss: 0.28991711139678955 2023-01-22 10:36:08.466663: step: 500/463, loss: 0.05478794872760773 2023-01-22 10:36:09.089853: step: 502/463, loss: 0.12191804498434067 2023-01-22 10:36:09.710881: step: 504/463, loss: 0.1375276893377304 2023-01-22 10:36:10.350909: step: 506/463, loss: 0.10110702365636826 2023-01-22 10:36:10.944506: step: 508/463, loss: 0.09454682469367981 2023-01-22 10:36:11.536363: step: 510/463, loss: 0.08920510113239288 2023-01-22 10:36:12.120416: step: 512/463, loss: 0.12301144003868103 2023-01-22 10:36:12.743626: step: 514/463, loss: 0.0665707141160965 2023-01-22 10:36:13.367296: step: 516/463, loss: 3.8497567176818848 2023-01-22 10:36:14.013711: step: 518/463, loss: 0.13835231959819794 2023-01-22 10:36:14.660585: step: 520/463, loss: 0.18506622314453125 2023-01-22 10:36:15.325688: step: 522/463, loss: 0.1813444197177887 2023-01-22 10:36:16.011347: step: 524/463, loss: 0.4440092444419861 2023-01-22 10:36:16.567481: step: 526/463, loss: 0.08467331528663635 2023-01-22 10:36:17.186587: step: 528/463, loss: 0.1103239357471466 2023-01-22 10:36:17.919450: step: 530/463, loss: 0.17476728558540344 2023-01-22 10:36:18.642342: step: 532/463, loss: 0.20910315215587616 2023-01-22 10:36:19.231626: step: 534/463, loss: 0.18270151317119598 2023-01-22 10:36:19.831904: step: 536/463, loss: 0.02789652906358242 2023-01-22 10:36:20.435505: step: 538/463, loss: 0.1144997775554657 2023-01-22 10:36:21.065356: step: 540/463, loss: 0.2162732183933258 2023-01-22 10:36:21.704344: step: 542/463, loss: 0.06295991688966751 2023-01-22 10:36:22.316886: step: 544/463, loss: 1.3117586374282837 2023-01-22 10:36:22.957295: step: 546/463, loss: 0.17998982965946198 2023-01-22 10:36:23.630899: step: 548/463, loss: 0.335711270570755 2023-01-22 10:36:24.270298: step: 550/463, loss: 3.912230968475342 2023-01-22 10:36:24.860933: step: 552/463, loss: 1.2657710313796997 2023-01-22 10:36:25.455204: step: 554/463, loss: 0.06327852606773376 2023-01-22 10:36:26.062050: step: 556/463, loss: 0.2076507955789566 2023-01-22 10:36:26.673910: step: 558/463, loss: 0.1588555872440338 2023-01-22 10:36:27.271954: step: 560/463, loss: 0.1802477389574051 2023-01-22 10:36:27.848167: step: 562/463, loss: 0.1872415542602539 2023-01-22 10:36:28.405039: step: 564/463, loss: 0.5986431241035461 2023-01-22 10:36:29.044539: step: 566/463, loss: 0.21616558730602264 2023-01-22 10:36:29.650615: step: 568/463, loss: 0.43030881881713867 2023-01-22 10:36:30.236703: step: 570/463, loss: 0.061145681887865067 2023-01-22 10:36:30.941967: step: 572/463, loss: 0.15694135427474976 2023-01-22 10:36:31.516966: step: 574/463, loss: 0.1301918774843216 2023-01-22 10:36:32.094370: step: 576/463, loss: 0.0886097326874733 2023-01-22 10:36:32.697954: step: 578/463, loss: 2.15177583694458 2023-01-22 10:36:33.389327: step: 580/463, loss: 0.17664887011051178 2023-01-22 10:36:33.993723: step: 582/463, loss: 0.22328969836235046 2023-01-22 10:36:34.581015: step: 584/463, loss: 0.18291985988616943 2023-01-22 10:36:35.151103: step: 586/463, loss: 0.03667150437831879 2023-01-22 10:36:35.810434: step: 588/463, loss: 0.31059208512306213 2023-01-22 10:36:36.394781: step: 590/463, loss: 0.9711023569107056 2023-01-22 10:36:37.007549: step: 592/463, loss: 0.04945782944560051 2023-01-22 10:36:37.582951: step: 594/463, loss: 1.0641658306121826 2023-01-22 10:36:38.255647: step: 596/463, loss: 0.21667850017547607 2023-01-22 10:36:38.833726: step: 598/463, loss: 0.036601223051548004 2023-01-22 10:36:39.431691: step: 600/463, loss: 0.2349911332130432 2023-01-22 10:36:40.032470: step: 602/463, loss: 0.3330759108066559 2023-01-22 10:36:40.625127: step: 604/463, loss: 0.03820441663265228 2023-01-22 10:36:41.228370: step: 606/463, loss: 0.2384144812822342 2023-01-22 10:36:41.863391: step: 608/463, loss: 0.13914506137371063 2023-01-22 10:36:42.532645: step: 610/463, loss: 0.333781361579895 2023-01-22 10:36:43.171806: step: 612/463, loss: 0.12820909917354584 2023-01-22 10:36:43.786274: step: 614/463, loss: 0.09873773157596588 2023-01-22 10:36:44.466679: step: 616/463, loss: 0.1655219942331314 2023-01-22 10:36:45.057807: step: 618/463, loss: 0.08512184023857117 2023-01-22 10:36:45.732571: step: 620/463, loss: 0.5956094264984131 2023-01-22 10:36:46.369746: step: 622/463, loss: 0.059289418160915375 2023-01-22 10:36:46.930529: step: 624/463, loss: 0.12848839163780212 2023-01-22 10:36:47.519558: step: 626/463, loss: 0.24718551337718964 2023-01-22 10:36:48.162343: step: 628/463, loss: 0.1663002222776413 2023-01-22 10:36:48.729728: step: 630/463, loss: 0.23154708743095398 2023-01-22 10:36:49.405995: step: 632/463, loss: 0.26078882813453674 2023-01-22 10:36:50.074758: step: 634/463, loss: 0.10760688781738281 2023-01-22 10:36:50.723737: step: 636/463, loss: 0.08595269173383713 2023-01-22 10:36:51.386439: step: 638/463, loss: 0.16869382560253143 2023-01-22 10:36:52.019033: step: 640/463, loss: 0.2620159089565277 2023-01-22 10:36:52.607807: step: 642/463, loss: 0.43244874477386475 2023-01-22 10:36:53.208752: step: 644/463, loss: 0.6806952357292175 2023-01-22 10:36:53.894128: step: 646/463, loss: 0.1981651782989502 2023-01-22 10:36:54.464539: step: 648/463, loss: 0.16820865869522095 2023-01-22 10:36:55.174297: step: 650/463, loss: 0.20617768168449402 2023-01-22 10:36:55.778044: step: 652/463, loss: 0.14653196930885315 2023-01-22 10:36:56.297972: step: 654/463, loss: 0.16634276509284973 2023-01-22 10:36:56.863210: step: 656/463, loss: 0.19455400109291077 2023-01-22 10:36:57.529489: step: 658/463, loss: 0.6135392785072327 2023-01-22 10:36:58.138961: step: 660/463, loss: 0.2134610414505005 2023-01-22 10:36:58.758567: step: 662/463, loss: 0.047566983848810196 2023-01-22 10:36:59.378474: step: 664/463, loss: 0.16085729002952576 2023-01-22 10:37:00.014141: step: 666/463, loss: 0.20550133287906647 2023-01-22 10:37:00.599841: step: 668/463, loss: 0.3620936870574951 2023-01-22 10:37:01.159160: step: 670/463, loss: 0.5609982013702393 2023-01-22 10:37:01.756490: step: 672/463, loss: 0.6223273277282715 2023-01-22 10:37:02.407472: step: 674/463, loss: 0.6914418935775757 2023-01-22 10:37:03.058342: step: 676/463, loss: 0.36965328454971313 2023-01-22 10:37:03.617971: step: 678/463, loss: 0.13208265602588654 2023-01-22 10:37:04.292231: step: 680/463, loss: 5.155648231506348 2023-01-22 10:37:04.923136: step: 682/463, loss: 0.503275454044342 2023-01-22 10:37:05.535894: step: 684/463, loss: 0.15227098762989044 2023-01-22 10:37:06.187948: step: 686/463, loss: 0.12176734954118729 2023-01-22 10:37:06.821369: step: 688/463, loss: 0.09478053450584412 2023-01-22 10:37:07.414519: step: 690/463, loss: 0.2884616553783417 2023-01-22 10:37:07.993701: step: 692/463, loss: 1.4465655088424683 2023-01-22 10:37:08.540963: step: 694/463, loss: 1.026667833328247 2023-01-22 10:37:09.188490: step: 696/463, loss: 0.12842486798763275 2023-01-22 10:37:09.787784: step: 698/463, loss: 0.09896834194660187 2023-01-22 10:37:10.394544: step: 700/463, loss: 0.07807404547929764 2023-01-22 10:37:11.006090: step: 702/463, loss: 0.04168181121349335 2023-01-22 10:37:11.663538: step: 704/463, loss: 0.1086229458451271 2023-01-22 10:37:12.293880: step: 706/463, loss: 0.10747560113668442 2023-01-22 10:37:12.890706: step: 708/463, loss: 0.4578455090522766 2023-01-22 10:37:13.586205: step: 710/463, loss: 0.23231656849384308 2023-01-22 10:37:14.209647: step: 712/463, loss: 0.3161855936050415 2023-01-22 10:37:14.864686: step: 714/463, loss: 0.10036582499742508 2023-01-22 10:37:15.481854: step: 716/463, loss: 0.06386017054319382 2023-01-22 10:37:16.096875: step: 718/463, loss: 0.27133670449256897 2023-01-22 10:37:16.695997: step: 720/463, loss: 0.16375049948692322 2023-01-22 10:37:17.295977: step: 722/463, loss: 0.12791162729263306 2023-01-22 10:37:17.885724: step: 724/463, loss: 0.11244847625494003 2023-01-22 10:37:18.491128: step: 726/463, loss: 0.06352280080318451 2023-01-22 10:37:19.118368: step: 728/463, loss: 0.161606565117836 2023-01-22 10:37:19.673640: step: 730/463, loss: 0.11433148384094238 2023-01-22 10:37:20.275598: step: 732/463, loss: 0.19788090884685516 2023-01-22 10:37:20.936469: step: 734/463, loss: 0.08007632195949554 2023-01-22 10:37:21.562763: step: 736/463, loss: 0.1446334272623062 2023-01-22 10:37:22.208546: step: 738/463, loss: 0.29650601744651794 2023-01-22 10:37:22.887031: step: 740/463, loss: 0.7098445892333984 2023-01-22 10:37:23.472749: step: 742/463, loss: 0.21318648755550385 2023-01-22 10:37:24.090642: step: 744/463, loss: 0.1889760047197342 2023-01-22 10:37:24.753568: step: 746/463, loss: 0.12877604365348816 2023-01-22 10:37:25.349035: step: 748/463, loss: 0.12182393670082092 2023-01-22 10:37:25.963903: step: 750/463, loss: 0.2841183543205261 2023-01-22 10:37:26.545540: step: 752/463, loss: 0.15512509644031525 2023-01-22 10:37:27.126899: step: 754/463, loss: 0.12062467634677887 2023-01-22 10:37:27.719492: step: 756/463, loss: 0.15865936875343323 2023-01-22 10:37:28.328130: step: 758/463, loss: 0.12482234090566635 2023-01-22 10:37:28.948753: step: 760/463, loss: 0.06911307573318481 2023-01-22 10:37:29.580512: step: 762/463, loss: 0.7442855834960938 2023-01-22 10:37:30.228608: step: 764/463, loss: 0.24386733770370483 2023-01-22 10:37:30.808037: step: 766/463, loss: 0.09315664321184158 2023-01-22 10:37:31.366332: step: 768/463, loss: 0.15519924461841583 2023-01-22 10:37:31.940296: step: 770/463, loss: 0.12154491245746613 2023-01-22 10:37:32.529122: step: 772/463, loss: 0.08926746994256973 2023-01-22 10:37:33.176490: step: 774/463, loss: 0.04717402160167694 2023-01-22 10:37:33.919293: step: 776/463, loss: 0.16514192521572113 2023-01-22 10:37:34.523900: step: 778/463, loss: 0.2481343299150467 2023-01-22 10:37:35.071444: step: 780/463, loss: 0.1458498239517212 2023-01-22 10:37:35.694040: step: 782/463, loss: 0.12890134751796722 2023-01-22 10:37:36.287222: step: 784/463, loss: 0.2519560158252716 2023-01-22 10:37:36.968706: step: 786/463, loss: 0.10948509722948074 2023-01-22 10:37:37.646643: step: 788/463, loss: 7.444417476654053 2023-01-22 10:37:38.224415: step: 790/463, loss: 0.13372346758842468 2023-01-22 10:37:38.825548: step: 792/463, loss: 0.09975311905145645 2023-01-22 10:37:39.423591: step: 794/463, loss: 0.1279948651790619 2023-01-22 10:37:40.044241: step: 796/463, loss: 0.2808547616004944 2023-01-22 10:37:40.687347: step: 798/463, loss: 0.13118933141231537 2023-01-22 10:37:41.272983: step: 800/463, loss: 0.5269955396652222 2023-01-22 10:37:41.883184: step: 802/463, loss: 0.1489705890417099 2023-01-22 10:37:42.605218: step: 804/463, loss: 0.09192366898059845 2023-01-22 10:37:43.194960: step: 806/463, loss: 0.6658681631088257 2023-01-22 10:37:43.802736: step: 808/463, loss: 0.3161330819129944 2023-01-22 10:37:44.481083: step: 810/463, loss: 0.8518253564834595 2023-01-22 10:37:45.125127: step: 812/463, loss: 0.29379013180732727 2023-01-22 10:37:45.732559: step: 814/463, loss: 0.9104217886924744 2023-01-22 10:37:46.327595: step: 816/463, loss: 0.3292897641658783 2023-01-22 10:37:46.894159: step: 818/463, loss: 0.10330037027597427 2023-01-22 10:37:47.508470: step: 820/463, loss: 0.17516545951366425 2023-01-22 10:37:48.112141: step: 822/463, loss: 0.13572344183921814 2023-01-22 10:37:48.687301: step: 824/463, loss: 0.3371018171310425 2023-01-22 10:37:49.420033: step: 826/463, loss: 0.09697898477315903 2023-01-22 10:37:50.129845: step: 828/463, loss: 0.1890784353017807 2023-01-22 10:37:50.772134: step: 830/463, loss: 0.11863324791193008 2023-01-22 10:37:51.425918: step: 832/463, loss: 0.06436974555253983 2023-01-22 10:37:51.998422: step: 834/463, loss: 0.1450974941253662 2023-01-22 10:37:52.579459: step: 836/463, loss: 1.2689837217330933 2023-01-22 10:37:53.212022: step: 838/463, loss: 0.2716820538043976 2023-01-22 10:37:53.859049: step: 840/463, loss: 0.14493253827095032 2023-01-22 10:37:54.476420: step: 842/463, loss: 0.1309964507818222 2023-01-22 10:37:55.101403: step: 844/463, loss: 0.20912817120552063 2023-01-22 10:37:55.729166: step: 846/463, loss: 0.5990986824035645 2023-01-22 10:37:56.420924: step: 848/463, loss: 0.2713647484779358 2023-01-22 10:37:57.076049: step: 850/463, loss: 0.13493691384792328 2023-01-22 10:37:57.696216: step: 852/463, loss: 0.07767636328935623 2023-01-22 10:37:58.317263: step: 854/463, loss: 0.4309062957763672 2023-01-22 10:37:58.889595: step: 856/463, loss: 0.26253634691238403 2023-01-22 10:37:59.500377: step: 858/463, loss: 0.057649221271276474 2023-01-22 10:38:00.149882: step: 860/463, loss: 0.41135984659194946 2023-01-22 10:38:00.775916: step: 862/463, loss: 0.4964521527290344 2023-01-22 10:38:01.450666: step: 864/463, loss: 0.1342688947916031 2023-01-22 10:38:02.079659: step: 866/463, loss: 0.03948821872472763 2023-01-22 10:38:02.677449: step: 868/463, loss: 0.07460054755210876 2023-01-22 10:38:03.281319: step: 870/463, loss: 0.25301721692085266 2023-01-22 10:38:03.900573: step: 872/463, loss: 0.1827431321144104 2023-01-22 10:38:04.481288: step: 874/463, loss: 0.1411261409521103 2023-01-22 10:38:05.122010: step: 876/463, loss: 0.13656504452228546 2023-01-22 10:38:05.755223: step: 878/463, loss: 0.5958837270736694 2023-01-22 10:38:06.325695: step: 880/463, loss: 0.36162328720092773 2023-01-22 10:38:06.976758: step: 882/463, loss: 0.09074178338050842 2023-01-22 10:38:07.574288: step: 884/463, loss: 0.38318493962287903 2023-01-22 10:38:08.186503: step: 886/463, loss: 0.0527978278696537 2023-01-22 10:38:08.849668: step: 888/463, loss: 0.27555370330810547 2023-01-22 10:38:09.478486: step: 890/463, loss: 0.28918150067329407 2023-01-22 10:38:10.079019: step: 892/463, loss: 0.07164045423269272 2023-01-22 10:38:10.674879: step: 894/463, loss: 0.5946390628814697 2023-01-22 10:38:11.323019: step: 896/463, loss: 0.09591452777385712 2023-01-22 10:38:11.952651: step: 898/463, loss: 0.2889423370361328 2023-01-22 10:38:12.577247: step: 900/463, loss: 0.10507835447788239 2023-01-22 10:38:13.183463: step: 902/463, loss: 0.8069683313369751 2023-01-22 10:38:13.861444: step: 904/463, loss: 0.09550615400075912 2023-01-22 10:38:14.453621: step: 906/463, loss: 0.3159022629261017 2023-01-22 10:38:15.121510: step: 908/463, loss: 0.39066919684410095 2023-01-22 10:38:15.710235: step: 910/463, loss: 0.1605747938156128 2023-01-22 10:38:16.383352: step: 912/463, loss: 0.12333541363477707 2023-01-22 10:38:17.007506: step: 914/463, loss: 0.14795920252799988 2023-01-22 10:38:17.552229: step: 916/463, loss: 0.0860903412103653 2023-01-22 10:38:18.106153: step: 918/463, loss: 0.7397198677062988 2023-01-22 10:38:18.682130: step: 920/463, loss: 0.6861542463302612 2023-01-22 10:38:19.346338: step: 922/463, loss: 0.2602579891681671 2023-01-22 10:38:19.991585: step: 924/463, loss: 0.2313385158777237 2023-01-22 10:38:20.622457: step: 926/463, loss: 0.20819246768951416 ================================================== Loss: 0.276 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32697583547632153, 'r': 0.3493119456796376, 'f1': 0.33777503738196146}, 'combined': 0.24888686964986634, 'epoch': 12} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32704014522535685, 'r': 0.4148119489071622, 'f1': 0.3657337118403505}, 'combined': 0.2834873747279272, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28096831137112466, 'r': 0.3550757027953872, 'f1': 0.3137047701142817}, 'combined': 0.2311508832421023, 'epoch': 12} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.31140868246618736, 'r': 0.4173105322019312, 'f1': 0.3566644611435202}, 'combined': 0.2764576205992836, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2924916708884511, 'r': 0.3468829113952219, 'f1': 0.31737377483555895}, 'combined': 0.23385436040514868, 'epoch': 12} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3140376709485158, 'r': 0.4049585039896762, 'f1': 0.35374937963931574}, 'combined': 0.27419808373956533, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.20907738095238093, 'r': 0.286734693877551, 'f1': 0.24182444061962133}, 'combined': 0.16121629374641422, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27702702702702703, 'r': 0.44565217391304346, 'f1': 0.3416666666666667}, 'combined': 0.17083333333333334, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36904761904761907, 'r': 0.2672413793103448, 'f1': 0.31}, 'combined': 0.20666666666666667, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29855383301254707, 'r': 0.34444161379815674, 'f1': 0.3198603180116804}, 'combined': 0.23568655011386977, 'epoch': 7} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.30988948788864984, 'r': 0.3867922100668993, 'f1': 0.34409642236532006}, 'combined': 0.2667158871922577, 'epoch': 7} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2636363636363636, 'r': 0.4142857142857143, 'f1': 0.3222222222222222}, 'combined': 0.2148148148148148, 'epoch': 7} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.294349755002927, 'r': 0.3345645222898544, 'f1': 0.3131714089640378}, 'combined': 0.23075788028929098, 'epoch': 6} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3358950473035951, 'r': 0.33466466251493726, 'f1': 0.3352787261158821}, 'combined': 0.25988111785058804, 'epoch': 6} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 6} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2671913725285436, 'r': 0.31941283622956823, 'f1': 0.29097763991872505}, 'combined': 0.21440457678221844, 'epoch': 10} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32113335504985907, 'r': 0.3837071337911919, 'f1': 0.34964268137756854}, 'combined': 0.2710149013548618, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.3103448275862069, 'f1': 0.339622641509434}, 'combined': 0.22641509433962265, 'epoch': 10} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:41:00.076222: step: 2/463, loss: 2.565711498260498 2023-01-22 10:41:00.782313: step: 4/463, loss: 0.16664744913578033 2023-01-22 10:41:01.408599: step: 6/463, loss: 0.1379290223121643 2023-01-22 10:41:01.992982: step: 8/463, loss: 0.1690729409456253 2023-01-22 10:41:02.568646: step: 10/463, loss: 0.17966759204864502 2023-01-22 10:41:03.161385: step: 12/463, loss: 0.22286882996559143 2023-01-22 10:41:03.753519: step: 14/463, loss: 0.15766093134880066 2023-01-22 10:41:04.417653: step: 16/463, loss: 0.1296265721321106 2023-01-22 10:41:05.059803: step: 18/463, loss: 0.07797739654779434 2023-01-22 10:41:05.668369: step: 20/463, loss: 0.14893348515033722 2023-01-22 10:41:06.222476: step: 22/463, loss: 0.08049022406339645 2023-01-22 10:41:06.820519: step: 24/463, loss: 0.8503106236457825 2023-01-22 10:41:07.412254: step: 26/463, loss: 0.1962115317583084 2023-01-22 10:41:08.043938: step: 28/463, loss: 0.13229620456695557 2023-01-22 10:41:08.700639: step: 30/463, loss: 0.47463545203208923 2023-01-22 10:41:09.308632: step: 32/463, loss: 0.2727740705013275 2023-01-22 10:41:09.884453: step: 34/463, loss: 0.0912417396903038 2023-01-22 10:41:10.523437: step: 36/463, loss: 0.04754031449556351 2023-01-22 10:41:11.120329: step: 38/463, loss: 0.3348187804222107 2023-01-22 10:41:11.717112: step: 40/463, loss: 0.09286195039749146 2023-01-22 10:41:12.423123: step: 42/463, loss: 0.23952889442443848 2023-01-22 10:41:12.988316: step: 44/463, loss: 0.07650396227836609 2023-01-22 10:41:13.613663: step: 46/463, loss: 0.15996961295604706 2023-01-22 10:41:14.243377: step: 48/463, loss: 0.05410662665963173 2023-01-22 10:41:14.931326: step: 50/463, loss: 0.33852213621139526 2023-01-22 10:41:15.563351: step: 52/463, loss: 0.04223308339715004 2023-01-22 10:41:16.144322: step: 54/463, loss: 0.5379517078399658 2023-01-22 10:41:16.782153: step: 56/463, loss: 0.1742713451385498 2023-01-22 10:41:17.464003: step: 58/463, loss: 0.21306444704532623 2023-01-22 10:41:18.056645: step: 60/463, loss: 0.11296823620796204 2023-01-22 10:41:18.728915: step: 62/463, loss: 0.28409895300865173 2023-01-22 10:41:19.462624: step: 64/463, loss: 0.060683343559503555 2023-01-22 10:41:20.105128: step: 66/463, loss: 0.08932936936616898 2023-01-22 10:41:20.747997: step: 68/463, loss: 0.21831552684307098 2023-01-22 10:41:21.348696: step: 70/463, loss: 0.08629943430423737 2023-01-22 10:41:21.978009: step: 72/463, loss: 0.3023335933685303 2023-01-22 10:41:22.688672: step: 74/463, loss: 0.11173388361930847 2023-01-22 10:41:23.303195: step: 76/463, loss: 0.09676092118024826 2023-01-22 10:41:23.982050: step: 78/463, loss: 0.10400616377592087 2023-01-22 10:41:24.571479: step: 80/463, loss: 0.6862213015556335 2023-01-22 10:41:25.218643: step: 82/463, loss: 0.14357812702655792 2023-01-22 10:41:25.840146: step: 84/463, loss: 0.0835581049323082 2023-01-22 10:41:26.419447: step: 86/463, loss: 0.08401966094970703 2023-01-22 10:41:26.996341: step: 88/463, loss: 0.3063279092311859 2023-01-22 10:41:27.613451: step: 90/463, loss: 0.12771596014499664 2023-01-22 10:41:28.252907: step: 92/463, loss: 0.15308119356632233 2023-01-22 10:41:28.873537: step: 94/463, loss: 0.05297008529305458 2023-01-22 10:41:29.531778: step: 96/463, loss: 0.1168137788772583 2023-01-22 10:41:30.107229: step: 98/463, loss: 0.06922732293605804 2023-01-22 10:41:30.664387: step: 100/463, loss: 0.16214075684547424 2023-01-22 10:41:31.267308: step: 102/463, loss: 0.07149788737297058 2023-01-22 10:41:31.957427: step: 104/463, loss: 0.27030134201049805 2023-01-22 10:41:32.575925: step: 106/463, loss: 0.07457955926656723 2023-01-22 10:41:33.169942: step: 108/463, loss: 0.022927306592464447 2023-01-22 10:41:33.816966: step: 110/463, loss: 0.14719931781291962 2023-01-22 10:41:34.366654: step: 112/463, loss: 0.799362301826477 2023-01-22 10:41:34.916315: step: 114/463, loss: 0.05564933270215988 2023-01-22 10:41:35.523561: step: 116/463, loss: 0.07638891041278839 2023-01-22 10:41:36.118332: step: 118/463, loss: 0.24894677102565765 2023-01-22 10:41:36.708367: step: 120/463, loss: 0.06474931538105011 2023-01-22 10:41:37.321717: step: 122/463, loss: 0.12605683505535126 2023-01-22 10:41:37.871361: step: 124/463, loss: 0.12097879499197006 2023-01-22 10:41:38.504909: step: 126/463, loss: 0.2464277297258377 2023-01-22 10:41:39.103193: step: 128/463, loss: 0.264183908700943 2023-01-22 10:41:39.714324: step: 130/463, loss: 0.18785572052001953 2023-01-22 10:41:40.294818: step: 132/463, loss: 0.05885416641831398 2023-01-22 10:41:40.934375: step: 134/463, loss: 0.11870820075273514 2023-01-22 10:41:41.571571: step: 136/463, loss: 0.12184017151594162 2023-01-22 10:41:42.206148: step: 138/463, loss: 0.11965828388929367 2023-01-22 10:41:42.820714: step: 140/463, loss: 0.3557186424732208 2023-01-22 10:41:43.364313: step: 142/463, loss: 0.04448102414608002 2023-01-22 10:41:43.952114: step: 144/463, loss: 0.19215553998947144 2023-01-22 10:41:44.598105: step: 146/463, loss: 0.10128043591976166 2023-01-22 10:41:45.228858: step: 148/463, loss: 6.752254009246826 2023-01-22 10:41:45.884010: step: 150/463, loss: 0.06240927055478096 2023-01-22 10:41:46.493715: step: 152/463, loss: 0.5488643646240234 2023-01-22 10:41:47.043756: step: 154/463, loss: 0.2403147667646408 2023-01-22 10:41:47.635104: step: 156/463, loss: 0.1681583970785141 2023-01-22 10:41:48.202301: step: 158/463, loss: 0.10901160538196564 2023-01-22 10:41:48.827797: step: 160/463, loss: 0.22435398399829865 2023-01-22 10:41:49.510406: step: 162/463, loss: 0.09708784520626068 2023-01-22 10:41:50.131621: step: 164/463, loss: 0.11457079648971558 2023-01-22 10:41:50.699410: step: 166/463, loss: 0.3900364637374878 2023-01-22 10:41:51.273254: step: 168/463, loss: 0.28347858786582947 2023-01-22 10:41:51.847115: step: 170/463, loss: 0.12434875220060349 2023-01-22 10:41:52.477382: step: 172/463, loss: 0.022395463660359383 2023-01-22 10:41:53.128914: step: 174/463, loss: 0.08362209796905518 2023-01-22 10:41:53.699042: step: 176/463, loss: 0.05168960988521576 2023-01-22 10:41:54.307393: step: 178/463, loss: 0.12967200577259064 2023-01-22 10:41:54.911368: step: 180/463, loss: 0.11016476154327393 2023-01-22 10:41:55.483734: step: 182/463, loss: 0.14662586152553558 2023-01-22 10:41:56.087314: step: 184/463, loss: 0.3271436095237732 2023-01-22 10:41:56.757936: step: 186/463, loss: 0.04441945254802704 2023-01-22 10:41:57.364053: step: 188/463, loss: 0.11458349972963333 2023-01-22 10:41:57.937861: step: 190/463, loss: 0.14119161665439606 2023-01-22 10:41:58.563447: step: 192/463, loss: 0.08486742526292801 2023-01-22 10:41:59.176904: step: 194/463, loss: 0.08645319938659668 2023-01-22 10:41:59.811191: step: 196/463, loss: 0.12720341980457306 2023-01-22 10:42:00.367463: step: 198/463, loss: 0.13414324820041656 2023-01-22 10:42:00.983309: step: 200/463, loss: 0.12985605001449585 2023-01-22 10:42:01.589827: step: 202/463, loss: 0.2638033926486969 2023-01-22 10:42:02.256743: step: 204/463, loss: 0.3215266168117523 2023-01-22 10:42:02.964967: step: 206/463, loss: 0.11334839463233948 2023-01-22 10:42:03.595594: step: 208/463, loss: 0.30918675661087036 2023-01-22 10:42:04.134350: step: 210/463, loss: 0.06669747084379196 2023-01-22 10:42:04.793562: step: 212/463, loss: 0.17311087250709534 2023-01-22 10:42:05.412277: step: 214/463, loss: 0.26042595505714417 2023-01-22 10:42:06.033733: step: 216/463, loss: 0.3607120215892792 2023-01-22 10:42:06.588116: step: 218/463, loss: 0.07826436311006546 2023-01-22 10:42:07.183400: step: 220/463, loss: 0.1280171126127243 2023-01-22 10:42:07.792648: step: 222/463, loss: 0.07049118727445602 2023-01-22 10:42:08.428733: step: 224/463, loss: 0.1527566909790039 2023-01-22 10:42:09.054721: step: 226/463, loss: 0.11164062470197678 2023-01-22 10:42:09.609285: step: 228/463, loss: 0.07164878398180008 2023-01-22 10:42:10.229169: step: 230/463, loss: 0.10502752661705017 2023-01-22 10:42:10.846093: step: 232/463, loss: 0.17939122021198273 2023-01-22 10:42:11.476681: step: 234/463, loss: 0.33820387721061707 2023-01-22 10:42:12.006960: step: 236/463, loss: 0.352577805519104 2023-01-22 10:42:12.602625: step: 238/463, loss: 0.07940854877233505 2023-01-22 10:42:13.227085: step: 240/463, loss: 0.2308470606803894 2023-01-22 10:42:13.807963: step: 242/463, loss: 0.17906461656093597 2023-01-22 10:42:14.428083: step: 244/463, loss: 0.1280842423439026 2023-01-22 10:42:14.962564: step: 246/463, loss: 0.2776082158088684 2023-01-22 10:42:15.567817: step: 248/463, loss: 0.30667030811309814 2023-01-22 10:42:16.162141: step: 250/463, loss: 0.04092853143811226 2023-01-22 10:42:16.822004: step: 252/463, loss: 0.3598691523075104 2023-01-22 10:42:17.432919: step: 254/463, loss: 0.061636462807655334 2023-01-22 10:42:18.101770: step: 256/463, loss: 0.12093415856361389 2023-01-22 10:42:18.750969: step: 258/463, loss: 0.10726702958345413 2023-01-22 10:42:19.330647: step: 260/463, loss: 0.20799991488456726 2023-01-22 10:42:19.924247: step: 262/463, loss: 0.11698877066373825 2023-01-22 10:42:20.605532: step: 264/463, loss: 0.9746891856193542 2023-01-22 10:42:21.301192: step: 266/463, loss: 0.11387989670038223 2023-01-22 10:42:21.890448: step: 268/463, loss: 0.25322672724723816 2023-01-22 10:42:22.536493: step: 270/463, loss: 0.35835787653923035 2023-01-22 10:42:23.112696: step: 272/463, loss: 0.10283799469470978 2023-01-22 10:42:23.774338: step: 274/463, loss: 0.10783140361309052 2023-01-22 10:42:24.430504: step: 276/463, loss: 0.2683259844779968 2023-01-22 10:42:25.148348: step: 278/463, loss: 0.3214755058288574 2023-01-22 10:42:25.764346: step: 280/463, loss: 0.04341986030340195 2023-01-22 10:42:26.448482: step: 282/463, loss: 0.1504833847284317 2023-01-22 10:42:27.062958: step: 284/463, loss: 0.09483832120895386 2023-01-22 10:42:27.689135: step: 286/463, loss: 0.013112193904817104 2023-01-22 10:42:28.332831: step: 288/463, loss: 0.20012256503105164 2023-01-22 10:42:28.977264: step: 290/463, loss: 0.3879971206188202 2023-01-22 10:42:29.610340: step: 292/463, loss: 0.07313261926174164 2023-01-22 10:42:30.213406: step: 294/463, loss: 0.03994950279593468 2023-01-22 10:42:30.926266: step: 296/463, loss: 0.17103610932826996 2023-01-22 10:42:31.609062: step: 298/463, loss: 0.13750629127025604 2023-01-22 10:42:32.252409: step: 300/463, loss: 0.067560113966465 2023-01-22 10:42:32.889472: step: 302/463, loss: 0.3391919434070587 2023-01-22 10:42:33.542942: step: 304/463, loss: 0.12707072496414185 2023-01-22 10:42:34.166894: step: 306/463, loss: 0.4030746817588806 2023-01-22 10:42:34.795953: step: 308/463, loss: 0.24013054370880127 2023-01-22 10:42:35.436673: step: 310/463, loss: 0.11444075405597687 2023-01-22 10:42:35.996265: step: 312/463, loss: 0.05910976603627205 2023-01-22 10:42:36.672147: step: 314/463, loss: 0.18328982591629028 2023-01-22 10:42:37.258886: step: 316/463, loss: 0.21811260282993317 2023-01-22 10:42:37.869932: step: 318/463, loss: 0.08503499627113342 2023-01-22 10:42:38.475253: step: 320/463, loss: 0.05776338651776314 2023-01-22 10:42:39.042421: step: 322/463, loss: 0.1270158886909485 2023-01-22 10:42:39.759169: step: 324/463, loss: 0.11699722707271576 2023-01-22 10:42:40.511435: step: 326/463, loss: 0.07528286427259445 2023-01-22 10:42:41.133146: step: 328/463, loss: 0.0936053916811943 2023-01-22 10:42:41.780736: step: 330/463, loss: 0.14466506242752075 2023-01-22 10:42:42.355542: step: 332/463, loss: 0.13197006285190582 2023-01-22 10:42:42.970292: step: 334/463, loss: 0.10153637081384659 2023-01-22 10:42:43.537688: step: 336/463, loss: 0.06331086158752441 2023-01-22 10:42:44.149701: step: 338/463, loss: 0.1506926715373993 2023-01-22 10:42:44.763415: step: 340/463, loss: 0.38452062010765076 2023-01-22 10:42:45.337794: step: 342/463, loss: 0.08930423855781555 2023-01-22 10:42:45.940929: step: 344/463, loss: 0.14809022843837738 2023-01-22 10:42:46.576119: step: 346/463, loss: 0.12939181923866272 2023-01-22 10:42:47.169078: step: 348/463, loss: 0.24422359466552734 2023-01-22 10:42:47.840557: step: 350/463, loss: 0.555176854133606 2023-01-22 10:42:48.443007: step: 352/463, loss: 0.03883303701877594 2023-01-22 10:42:49.010659: step: 354/463, loss: 0.07291294634342194 2023-01-22 10:42:49.668385: step: 356/463, loss: 0.19484737515449524 2023-01-22 10:42:50.292435: step: 358/463, loss: 0.20809970796108246 2023-01-22 10:42:50.925903: step: 360/463, loss: 0.18012313544750214 2023-01-22 10:42:51.541444: step: 362/463, loss: 0.11634792387485504 2023-01-22 10:42:52.140614: step: 364/463, loss: 0.7800766825675964 2023-01-22 10:42:52.687461: step: 366/463, loss: 8.991044044494629 2023-01-22 10:42:53.292826: step: 368/463, loss: 0.3084280788898468 2023-01-22 10:42:53.882060: step: 370/463, loss: 0.07614462822675705 2023-01-22 10:42:54.521365: step: 372/463, loss: 0.3468588590621948 2023-01-22 10:42:55.166607: step: 374/463, loss: 0.1128867119550705 2023-01-22 10:42:55.771042: step: 376/463, loss: 0.12678956985473633 2023-01-22 10:42:56.362639: step: 378/463, loss: 0.19237515330314636 2023-01-22 10:42:57.020337: step: 380/463, loss: 0.10420754551887512 2023-01-22 10:42:57.628652: step: 382/463, loss: 0.6848708391189575 2023-01-22 10:42:58.197999: step: 384/463, loss: 0.26128289103507996 2023-01-22 10:42:58.831438: step: 386/463, loss: 0.037828296422958374 2023-01-22 10:42:59.568278: step: 388/463, loss: 0.49120399355888367 2023-01-22 10:43:00.130931: step: 390/463, loss: 0.03331886976957321 2023-01-22 10:43:00.755133: step: 392/463, loss: 0.1993192881345749 2023-01-22 10:43:01.405019: step: 394/463, loss: 0.06030919402837753 2023-01-22 10:43:02.001433: step: 396/463, loss: 0.10487332195043564 2023-01-22 10:43:02.598407: step: 398/463, loss: 0.03509514406323433 2023-01-22 10:43:03.297882: step: 400/463, loss: 0.11517808586359024 2023-01-22 10:43:03.875154: step: 402/463, loss: 0.05602710321545601 2023-01-22 10:43:04.491858: step: 404/463, loss: 0.43672317266464233 2023-01-22 10:43:05.063454: step: 406/463, loss: 0.1379372626543045 2023-01-22 10:43:05.654940: step: 408/463, loss: 0.07176533341407776 2023-01-22 10:43:06.217588: step: 410/463, loss: 0.07386907935142517 2023-01-22 10:43:06.898548: step: 412/463, loss: 0.1997169405221939 2023-01-22 10:43:07.493648: step: 414/463, loss: 0.6408401727676392 2023-01-22 10:43:08.117039: step: 416/463, loss: 0.11517812311649323 2023-01-22 10:43:08.746274: step: 418/463, loss: 0.2481084167957306 2023-01-22 10:43:09.347204: step: 420/463, loss: 0.07898946851491928 2023-01-22 10:43:09.957501: step: 422/463, loss: 0.09962379187345505 2023-01-22 10:43:10.591929: step: 424/463, loss: 0.20499686896800995 2023-01-22 10:43:11.242344: step: 426/463, loss: 0.0725511908531189 2023-01-22 10:43:11.897178: step: 428/463, loss: 0.1533195674419403 2023-01-22 10:43:12.484615: step: 430/463, loss: 0.22300267219543457 2023-01-22 10:43:13.111203: step: 432/463, loss: 0.3045209050178528 2023-01-22 10:43:13.749428: step: 434/463, loss: 0.1204148530960083 2023-01-22 10:43:14.371676: step: 436/463, loss: 0.19843950867652893 2023-01-22 10:43:14.913481: step: 438/463, loss: 0.12317971885204315 2023-01-22 10:43:15.505882: step: 440/463, loss: 0.058026187121868134 2023-01-22 10:43:16.089970: step: 442/463, loss: 0.1778186559677124 2023-01-22 10:43:16.793217: step: 444/463, loss: 0.10294865071773529 2023-01-22 10:43:17.469198: step: 446/463, loss: 0.1299281120300293 2023-01-22 10:43:18.037194: step: 448/463, loss: 0.07985076308250427 2023-01-22 10:43:18.603428: step: 450/463, loss: 0.17704281210899353 2023-01-22 10:43:19.177889: step: 452/463, loss: 0.3871375322341919 2023-01-22 10:43:19.741624: step: 454/463, loss: 0.21685855090618134 2023-01-22 10:43:20.351383: step: 456/463, loss: 0.2827184498310089 2023-01-22 10:43:20.986651: step: 458/463, loss: 2.6488826274871826 2023-01-22 10:43:21.575234: step: 460/463, loss: 0.09979060292243958 2023-01-22 10:43:22.156664: step: 462/463, loss: 0.1777031421661377 2023-01-22 10:43:22.776422: step: 464/463, loss: 0.22616173326969147 2023-01-22 10:43:23.386419: step: 466/463, loss: 0.35037845373153687 2023-01-22 10:43:24.050996: step: 468/463, loss: 0.26178237795829773 2023-01-22 10:43:24.753331: step: 470/463, loss: 0.11901715397834778 2023-01-22 10:43:25.334649: step: 472/463, loss: 0.17046403884887695 2023-01-22 10:43:25.964145: step: 474/463, loss: 0.35896649956703186 2023-01-22 10:43:26.630003: step: 476/463, loss: 0.1533816158771515 2023-01-22 10:43:27.273281: step: 478/463, loss: 0.817365825176239 2023-01-22 10:43:27.968147: step: 480/463, loss: 0.5074924826622009 2023-01-22 10:43:28.548247: step: 482/463, loss: 0.06507206708192825 2023-01-22 10:43:29.258035: step: 484/463, loss: 0.09565138816833496 2023-01-22 10:43:29.842523: step: 486/463, loss: 0.10228903591632843 2023-01-22 10:43:30.421467: step: 488/463, loss: 0.11880123615264893 2023-01-22 10:43:31.099595: step: 490/463, loss: 0.40800732374191284 2023-01-22 10:43:31.718039: step: 492/463, loss: 0.14800399541854858 2023-01-22 10:43:32.357752: step: 494/463, loss: 0.09144240617752075 2023-01-22 10:43:32.965992: step: 496/463, loss: 0.30299872159957886 2023-01-22 10:43:33.627683: step: 498/463, loss: 0.3642329275608063 2023-01-22 10:43:34.201313: step: 500/463, loss: 0.12590834498405457 2023-01-22 10:43:34.840980: step: 502/463, loss: 0.05030178278684616 2023-01-22 10:43:35.521134: step: 504/463, loss: 0.09050225466489792 2023-01-22 10:43:36.135778: step: 506/463, loss: 0.17131109535694122 2023-01-22 10:43:36.774417: step: 508/463, loss: 0.0957876667380333 2023-01-22 10:43:37.333044: step: 510/463, loss: 0.5671736598014832 2023-01-22 10:43:37.931560: step: 512/463, loss: 0.19126763939857483 2023-01-22 10:43:38.595786: step: 514/463, loss: 0.08095432072877884 2023-01-22 10:43:39.262565: step: 516/463, loss: 0.1317719668149948 2023-01-22 10:43:39.911025: step: 518/463, loss: 0.40390151739120483 2023-01-22 10:43:40.595289: step: 520/463, loss: 0.08847786486148834 2023-01-22 10:43:41.215761: step: 522/463, loss: 0.15618719160556793 2023-01-22 10:43:41.907706: step: 524/463, loss: 0.20740501582622528 2023-01-22 10:43:42.499982: step: 526/463, loss: 0.19934247434139252 2023-01-22 10:43:43.171358: step: 528/463, loss: 0.18491405248641968 2023-01-22 10:43:43.784881: step: 530/463, loss: 0.07651141285896301 2023-01-22 10:43:44.367946: step: 532/463, loss: 1.7004739046096802 2023-01-22 10:43:45.028519: step: 534/463, loss: 0.059145186096429825 2023-01-22 10:43:45.628988: step: 536/463, loss: 0.11134350299835205 2023-01-22 10:43:46.236585: step: 538/463, loss: 0.08535700291395187 2023-01-22 10:43:46.856099: step: 540/463, loss: 0.5215968489646912 2023-01-22 10:43:47.501249: step: 542/463, loss: 0.12605799734592438 2023-01-22 10:43:48.100277: step: 544/463, loss: 0.11664626747369766 2023-01-22 10:43:48.664660: step: 546/463, loss: 0.2695358395576477 2023-01-22 10:43:49.332974: step: 548/463, loss: 0.23393937945365906 2023-01-22 10:43:49.931326: step: 550/463, loss: 0.2841077148914337 2023-01-22 10:43:50.499757: step: 552/463, loss: 4.392961025238037 2023-01-22 10:43:51.130772: step: 554/463, loss: 2.2758395671844482 2023-01-22 10:43:51.724104: step: 556/463, loss: 0.08077837526798248 2023-01-22 10:43:52.351344: step: 558/463, loss: 0.1420605629682541 2023-01-22 10:43:53.033297: step: 560/463, loss: 0.1339176446199417 2023-01-22 10:43:53.613591: step: 562/463, loss: 0.11798575520515442 2023-01-22 10:43:54.274066: step: 564/463, loss: 0.9329640865325928 2023-01-22 10:43:54.832945: step: 566/463, loss: 0.07212428003549576 2023-01-22 10:43:55.438117: step: 568/463, loss: 0.3512352705001831 2023-01-22 10:43:56.096462: step: 570/463, loss: 0.15040907263755798 2023-01-22 10:43:56.660276: step: 572/463, loss: 0.4386076331138611 2023-01-22 10:43:57.253672: step: 574/463, loss: 0.08159884810447693 2023-01-22 10:43:57.901731: step: 576/463, loss: 0.16141971945762634 2023-01-22 10:43:58.463012: step: 578/463, loss: 0.1143106147646904 2023-01-22 10:43:59.093842: step: 580/463, loss: 0.37543439865112305 2023-01-22 10:43:59.734033: step: 582/463, loss: 0.16019099950790405 2023-01-22 10:44:00.352481: step: 584/463, loss: 0.18394871056079865 2023-01-22 10:44:01.008749: step: 586/463, loss: 0.08112835884094238 2023-01-22 10:44:01.582694: step: 588/463, loss: 0.17310790717601776 2023-01-22 10:44:02.156047: step: 590/463, loss: 0.12453050166368484 2023-01-22 10:44:02.674115: step: 592/463, loss: 0.16222529113292694 2023-01-22 10:44:03.320342: step: 594/463, loss: 0.2559312880039215 2023-01-22 10:44:03.929873: step: 596/463, loss: 0.3091009259223938 2023-01-22 10:44:04.557566: step: 598/463, loss: 0.06659483164548874 2023-01-22 10:44:05.147061: step: 600/463, loss: 0.15208786725997925 2023-01-22 10:44:05.825016: step: 602/463, loss: 0.09993623197078705 2023-01-22 10:44:06.440372: step: 604/463, loss: 0.22920849919319153 2023-01-22 10:44:07.062476: step: 606/463, loss: 0.11052244901657104 2023-01-22 10:44:07.665349: step: 608/463, loss: 0.3802890479564667 2023-01-22 10:44:08.329175: step: 610/463, loss: 0.1383506804704666 2023-01-22 10:44:09.016874: step: 612/463, loss: 0.028482681140303612 2023-01-22 10:44:09.610687: step: 614/463, loss: 1.3009333610534668 2023-01-22 10:44:10.220121: step: 616/463, loss: 0.18642878532409668 2023-01-22 10:44:10.807189: step: 618/463, loss: 0.12663745880126953 2023-01-22 10:44:11.381843: step: 620/463, loss: 0.05997009947896004 2023-01-22 10:44:11.960286: step: 622/463, loss: 0.12205195426940918 2023-01-22 10:44:12.635795: step: 624/463, loss: 0.3809179365634918 2023-01-22 10:44:13.172766: step: 626/463, loss: 0.18063528835773468 2023-01-22 10:44:13.816324: step: 628/463, loss: 0.14560022950172424 2023-01-22 10:44:14.562594: step: 630/463, loss: 0.36161816120147705 2023-01-22 10:44:15.170895: step: 632/463, loss: 0.7624815702438354 2023-01-22 10:44:15.757556: step: 634/463, loss: 0.042645346373319626 2023-01-22 10:44:16.370247: step: 636/463, loss: 0.1863245815038681 2023-01-22 10:44:17.001159: step: 638/463, loss: 0.06144614890217781 2023-01-22 10:44:17.538126: step: 640/463, loss: 0.12099575996398926 2023-01-22 10:44:18.125951: step: 642/463, loss: 0.06944125890731812 2023-01-22 10:44:18.711000: step: 644/463, loss: 0.09125297516584396 2023-01-22 10:44:19.377905: step: 646/463, loss: 0.31644928455352783 2023-01-22 10:44:19.968837: step: 648/463, loss: 0.244612917304039 2023-01-22 10:44:20.571921: step: 650/463, loss: 0.1229962483048439 2023-01-22 10:44:21.196255: step: 652/463, loss: 0.16088464856147766 2023-01-22 10:44:21.934667: step: 654/463, loss: 0.592374861240387 2023-01-22 10:44:22.605452: step: 656/463, loss: 0.12203109264373779 2023-01-22 10:44:23.251104: step: 658/463, loss: 0.15243643522262573 2023-01-22 10:44:23.870308: step: 660/463, loss: 0.06060110405087471 2023-01-22 10:44:24.487756: step: 662/463, loss: 0.1305978149175644 2023-01-22 10:44:25.123417: step: 664/463, loss: 0.0422486811876297 2023-01-22 10:44:25.728860: step: 666/463, loss: 0.04774981364607811 2023-01-22 10:44:26.369563: step: 668/463, loss: 0.361824631690979 2023-01-22 10:44:27.005517: step: 670/463, loss: 0.1629701405763626 2023-01-22 10:44:27.620440: step: 672/463, loss: 0.10527696460485458 2023-01-22 10:44:28.302910: step: 674/463, loss: 0.10681945085525513 2023-01-22 10:44:28.913466: step: 676/463, loss: 0.4135546088218689 2023-01-22 10:44:29.545695: step: 678/463, loss: 0.08351342380046844 2023-01-22 10:44:30.144931: step: 680/463, loss: 0.07854975014925003 2023-01-22 10:44:30.875516: step: 682/463, loss: 0.5218799710273743 2023-01-22 10:44:31.491981: step: 684/463, loss: 0.06561725586652756 2023-01-22 10:44:32.124900: step: 686/463, loss: 0.40641576051712036 2023-01-22 10:44:32.725860: step: 688/463, loss: 0.07957141101360321 2023-01-22 10:44:33.377995: step: 690/463, loss: 0.14579223096370697 2023-01-22 10:44:33.926273: step: 692/463, loss: 0.6082996726036072 2023-01-22 10:44:34.531175: step: 694/463, loss: 0.1418827623128891 2023-01-22 10:44:35.173021: step: 696/463, loss: 0.2510085105895996 2023-01-22 10:44:35.800366: step: 698/463, loss: 0.0828835517168045 2023-01-22 10:44:36.320738: step: 700/463, loss: 0.1153096854686737 2023-01-22 10:44:36.938662: step: 702/463, loss: 0.29370972514152527 2023-01-22 10:44:37.604557: step: 704/463, loss: 0.15233348309993744 2023-01-22 10:44:38.199828: step: 706/463, loss: 0.199041947722435 2023-01-22 10:44:38.816990: step: 708/463, loss: 0.08511388301849365 2023-01-22 10:44:39.430160: step: 710/463, loss: 0.5866748094558716 2023-01-22 10:44:40.061490: step: 712/463, loss: 0.11261086165904999 2023-01-22 10:44:40.626548: step: 714/463, loss: 0.1824646145105362 2023-01-22 10:44:41.226694: step: 716/463, loss: 0.566473126411438 2023-01-22 10:44:41.790646: step: 718/463, loss: 0.21250124275684357 2023-01-22 10:44:42.343799: step: 720/463, loss: 0.14525753259658813 2023-01-22 10:44:43.036387: step: 722/463, loss: 0.1874159425497055 2023-01-22 10:44:43.687407: step: 724/463, loss: 0.5277420878410339 2023-01-22 10:44:44.344941: step: 726/463, loss: 0.08111932873725891 2023-01-22 10:44:44.953942: step: 728/463, loss: 0.5192026495933533 2023-01-22 10:44:45.518820: step: 730/463, loss: 0.05693024396896362 2023-01-22 10:44:46.137724: step: 732/463, loss: 0.10415806621313095 2023-01-22 10:44:46.729943: step: 734/463, loss: 0.14154361188411713 2023-01-22 10:44:47.371435: step: 736/463, loss: 0.24166518449783325 2023-01-22 10:44:48.044571: step: 738/463, loss: 0.057798732072114944 2023-01-22 10:44:48.682181: step: 740/463, loss: 0.47920742630958557 2023-01-22 10:44:49.235331: step: 742/463, loss: 0.13112948834896088 2023-01-22 10:44:49.888450: step: 744/463, loss: 0.13557542860507965 2023-01-22 10:44:50.504723: step: 746/463, loss: 0.07506858557462692 2023-01-22 10:44:51.162459: step: 748/463, loss: 0.09090665727853775 2023-01-22 10:44:51.809653: step: 750/463, loss: 0.1906386762857437 2023-01-22 10:44:52.473090: step: 752/463, loss: 0.05091777443885803 2023-01-22 10:44:53.025678: step: 754/463, loss: 0.8419276475906372 2023-01-22 10:44:53.597230: step: 756/463, loss: 0.06863037496805191 2023-01-22 10:44:54.283912: step: 758/463, loss: 0.08943196386098862 2023-01-22 10:44:54.917298: step: 760/463, loss: 0.2722793221473694 2023-01-22 10:44:55.547421: step: 762/463, loss: 0.23512597382068634 2023-01-22 10:44:56.144932: step: 764/463, loss: 0.10137669742107391 2023-01-22 10:44:56.717134: step: 766/463, loss: 0.21107909083366394 2023-01-22 10:44:57.417081: step: 768/463, loss: 0.042703546583652496 2023-01-22 10:44:58.099898: step: 770/463, loss: 0.18837223947048187 2023-01-22 10:44:58.687573: step: 772/463, loss: 0.4225706160068512 2023-01-22 10:44:59.325198: step: 774/463, loss: 0.07106520980596542 2023-01-22 10:45:00.041099: step: 776/463, loss: 0.19308072328567505 2023-01-22 10:45:00.639183: step: 778/463, loss: 0.21760398149490356 2023-01-22 10:45:01.313294: step: 780/463, loss: 0.29596397280693054 2023-01-22 10:45:02.002570: step: 782/463, loss: 0.1805022656917572 2023-01-22 10:45:02.638339: step: 784/463, loss: 0.4072858691215515 2023-01-22 10:45:03.226398: step: 786/463, loss: 0.04376452416181564 2023-01-22 10:45:04.003540: step: 788/463, loss: 0.2397332340478897 2023-01-22 10:45:04.648794: step: 790/463, loss: 0.11247055232524872 2023-01-22 10:45:05.247931: step: 792/463, loss: 0.11076261848211288 2023-01-22 10:45:05.900170: step: 794/463, loss: 0.06936417520046234 2023-01-22 10:45:06.534585: step: 796/463, loss: 0.17803461849689484 2023-01-22 10:45:07.108098: step: 798/463, loss: 0.5655487775802612 2023-01-22 10:45:07.737910: step: 800/463, loss: 0.20942959189414978 2023-01-22 10:45:08.364219: step: 802/463, loss: 0.04037226736545563 2023-01-22 10:45:08.913300: step: 804/463, loss: 0.08091913163661957 2023-01-22 10:45:09.596659: step: 806/463, loss: 0.08412367105484009 2023-01-22 10:45:10.200758: step: 808/463, loss: 0.12440893054008484 2023-01-22 10:45:10.850447: step: 810/463, loss: 0.17068389058113098 2023-01-22 10:45:11.529907: step: 812/463, loss: 0.09930028021335602 2023-01-22 10:45:12.097055: step: 814/463, loss: 0.17937523126602173 2023-01-22 10:45:12.730989: step: 816/463, loss: 0.2971479296684265 2023-01-22 10:45:13.327051: step: 818/463, loss: 0.08814814686775208 2023-01-22 10:45:13.915155: step: 820/463, loss: 0.046063054352998734 2023-01-22 10:45:14.519487: step: 822/463, loss: 0.24929530918598175 2023-01-22 10:45:15.123838: step: 824/463, loss: 0.24770347774028778 2023-01-22 10:45:15.686685: step: 826/463, loss: 0.09377054125070572 2023-01-22 10:45:16.292933: step: 828/463, loss: 0.06481131166219711 2023-01-22 10:45:16.877121: step: 830/463, loss: 0.6607604026794434 2023-01-22 10:45:17.539106: step: 832/463, loss: 0.1551063507795334 2023-01-22 10:45:18.215517: step: 834/463, loss: 0.09614669531583786 2023-01-22 10:45:18.834821: step: 836/463, loss: 0.19514885544776917 2023-01-22 10:45:19.452718: step: 838/463, loss: 0.09270910173654556 2023-01-22 10:45:20.079834: step: 840/463, loss: 0.10282081365585327 2023-01-22 10:45:20.716620: step: 842/463, loss: 0.11490132659673691 2023-01-22 10:45:21.325212: step: 844/463, loss: 0.07945281267166138 2023-01-22 10:45:21.900192: step: 846/463, loss: 0.08512766659259796 2023-01-22 10:45:22.675017: step: 848/463, loss: 0.07688874006271362 2023-01-22 10:45:23.271857: step: 850/463, loss: 0.14501172304153442 2023-01-22 10:45:23.882679: step: 852/463, loss: 0.20960292220115662 2023-01-22 10:45:24.551560: step: 854/463, loss: 0.31516411900520325 2023-01-22 10:45:25.192463: step: 856/463, loss: 0.09838423132896423 2023-01-22 10:45:25.747157: step: 858/463, loss: 0.09700152277946472 2023-01-22 10:45:26.439242: step: 860/463, loss: 0.31320977210998535 2023-01-22 10:45:27.074671: step: 862/463, loss: 0.14346487820148468 2023-01-22 10:45:27.758813: step: 864/463, loss: 0.06369202584028244 2023-01-22 10:45:28.291911: step: 866/463, loss: 0.4695272445678711 2023-01-22 10:45:28.853791: step: 868/463, loss: 0.06738274544477463 2023-01-22 10:45:29.490585: step: 870/463, loss: 0.07605982571840286 2023-01-22 10:45:30.135278: step: 872/463, loss: 0.053483352065086365 2023-01-22 10:45:30.821270: step: 874/463, loss: 0.06770220398902893 2023-01-22 10:45:31.525106: step: 876/463, loss: 0.09027217328548431 2023-01-22 10:45:32.116573: step: 878/463, loss: 0.7285488843917847 2023-01-22 10:45:32.836654: step: 880/463, loss: 0.18153871595859528 2023-01-22 10:45:33.430666: step: 882/463, loss: 0.05441578850150108 2023-01-22 10:45:34.040892: step: 884/463, loss: 0.17478787899017334 2023-01-22 10:45:34.668707: step: 886/463, loss: 0.0539497546851635 2023-01-22 10:45:35.283148: step: 888/463, loss: 0.2606639266014099 2023-01-22 10:45:35.873703: step: 890/463, loss: 0.05971585959196091 2023-01-22 10:45:36.501513: step: 892/463, loss: 0.11647982895374298 2023-01-22 10:45:37.088558: step: 894/463, loss: 0.19568830728530884 2023-01-22 10:45:37.760267: step: 896/463, loss: 0.10186778008937836 2023-01-22 10:45:38.357194: step: 898/463, loss: 0.15877921879291534 2023-01-22 10:45:38.965194: step: 900/463, loss: 0.059198394417762756 2023-01-22 10:45:39.609712: step: 902/463, loss: 0.17944201827049255 2023-01-22 10:45:40.189285: step: 904/463, loss: 0.913094162940979 2023-01-22 10:45:40.774062: step: 906/463, loss: 0.16600652039051056 2023-01-22 10:45:41.410509: step: 908/463, loss: 0.034814320504665375 2023-01-22 10:45:42.023348: step: 910/463, loss: 0.1060621440410614 2023-01-22 10:45:42.672990: step: 912/463, loss: 0.05400104448199272 2023-01-22 10:45:43.269319: step: 914/463, loss: 0.13888785243034363 2023-01-22 10:45:43.921252: step: 916/463, loss: 0.2110578715801239 2023-01-22 10:45:44.548224: step: 918/463, loss: 0.11863532662391663 2023-01-22 10:45:45.127680: step: 920/463, loss: 0.19453021883964539 2023-01-22 10:45:45.702032: step: 922/463, loss: 0.0719323381781578 2023-01-22 10:45:46.333774: step: 924/463, loss: 0.22827577590942383 2023-01-22 10:45:46.943273: step: 926/463, loss: 0.26101136207580566 ================================================== Loss: 0.254 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3300500706661212, 'r': 0.3137667654719672, 'f1': 0.32170250078546053}, 'combined': 0.23704394794718142, 'epoch': 13} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33990473252466064, 'r': 0.39520173404751446, 'f1': 0.36547342681147105}, 'combined': 0.2832856226959728, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2910352429868532, 'r': 0.31478195161765904, 'f1': 0.3024431877894372}, 'combined': 0.2228528752132695, 'epoch': 13} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3225473871355501, 'r': 0.3848037761966397, 'f1': 0.35093588306952556}, 'combined': 0.27201728735532604, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2682926829268293, 'r': 0.3142857142857143, 'f1': 0.2894736842105263}, 'combined': 0.19298245614035087, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.23275862068965517, 'f1': 0.2872340425531915}, 'combined': 0.19148936170212766, 'epoch': 13} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29855383301254707, 'r': 0.34444161379815674, 'f1': 0.3198603180116804}, 'combined': 0.23568655011386977, 'epoch': 7} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.30988948788864984, 'r': 0.3867922100668993, 'f1': 0.34409642236532006}, 'combined': 0.2667158871922577, 'epoch': 7} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2636363636363636, 'r': 0.4142857142857143, 'f1': 0.3222222222222222}, 'combined': 0.2148148148148148, 'epoch': 7} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2671913725285436, 'r': 0.31941283622956823, 'f1': 0.29097763991872505}, 'combined': 0.21440457678221844, 'epoch': 10} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32113335504985907, 'r': 0.3837071337911919, 'f1': 0.34964268137756854}, 'combined': 0.2710149013548618, 'epoch': 10} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.3103448275862069, 'f1': 0.339622641509434}, 'combined': 0.22641509433962265, 'epoch': 10} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:48:30.247581: step: 2/463, loss: 0.10308526456356049 2023-01-22 10:48:30.932034: step: 4/463, loss: 0.16714175045490265 2023-01-22 10:48:31.550455: step: 6/463, loss: 0.010249671526253223 2023-01-22 10:48:32.189292: step: 8/463, loss: 0.34679824113845825 2023-01-22 10:48:32.830519: step: 10/463, loss: 0.05703078210353851 2023-01-22 10:48:33.425620: step: 12/463, loss: 0.14334838092327118 2023-01-22 10:48:34.020377: step: 14/463, loss: 0.10722551494836807 2023-01-22 10:48:34.585843: step: 16/463, loss: 0.23808908462524414 2023-01-22 10:48:35.266250: step: 18/463, loss: 0.18627379834651947 2023-01-22 10:48:35.810571: step: 20/463, loss: 0.06190408393740654 2023-01-22 10:48:36.432388: step: 22/463, loss: 0.2690027356147766 2023-01-22 10:48:37.034487: step: 24/463, loss: 0.029313528910279274 2023-01-22 10:48:37.632276: step: 26/463, loss: 0.08370479941368103 2023-01-22 10:48:38.250739: step: 28/463, loss: 0.17957046627998352 2023-01-22 10:48:38.869153: step: 30/463, loss: 0.32404181361198425 2023-01-22 10:48:39.474036: step: 32/463, loss: 0.11481583118438721 2023-01-22 10:48:40.064674: step: 34/463, loss: 0.17802946269512177 2023-01-22 10:48:40.682773: step: 36/463, loss: 0.18063296377658844 2023-01-22 10:48:41.223494: step: 38/463, loss: 0.043350815773010254 2023-01-22 10:48:41.747628: step: 40/463, loss: 0.34427180886268616 2023-01-22 10:48:42.396771: step: 42/463, loss: 0.08118477463722229 2023-01-22 10:48:43.072969: step: 44/463, loss: 0.18487665057182312 2023-01-22 10:48:43.654896: step: 46/463, loss: 0.12369714677333832 2023-01-22 10:48:44.304777: step: 48/463, loss: 1.3812236785888672 2023-01-22 10:48:44.959707: step: 50/463, loss: 0.14374962449073792 2023-01-22 10:48:45.522640: step: 52/463, loss: 0.05366966500878334 2023-01-22 10:48:46.164417: step: 54/463, loss: 3.730415105819702 2023-01-22 10:48:46.856013: step: 56/463, loss: 0.430982768535614 2023-01-22 10:48:47.446100: step: 58/463, loss: 0.12790866196155548 2023-01-22 10:48:48.053694: step: 60/463, loss: 0.3020043671131134 2023-01-22 10:48:48.677103: step: 62/463, loss: 0.9591559767723083 2023-01-22 10:48:49.356539: step: 64/463, loss: 0.014086284674704075 2023-01-22 10:48:49.956897: step: 66/463, loss: 0.6733552813529968 2023-01-22 10:48:50.576067: step: 68/463, loss: 0.14249533414840698 2023-01-22 10:48:51.142444: step: 70/463, loss: 1.796962857246399 2023-01-22 10:48:51.769623: step: 72/463, loss: 0.061169784516096115 2023-01-22 10:48:52.358695: step: 74/463, loss: 0.06592588871717453 2023-01-22 10:48:52.989302: step: 76/463, loss: 0.0839243084192276 2023-01-22 10:48:53.575473: step: 78/463, loss: 0.3629399836063385 2023-01-22 10:48:54.167088: step: 80/463, loss: 0.8248944878578186 2023-01-22 10:48:54.784165: step: 82/463, loss: 0.2846137583255768 2023-01-22 10:48:55.429205: step: 84/463, loss: 0.10227452963590622 2023-01-22 10:48:56.046228: step: 86/463, loss: 1.1886745691299438 2023-01-22 10:48:56.619000: step: 88/463, loss: 0.05253385007381439 2023-01-22 10:48:57.226661: step: 90/463, loss: 0.055734481662511826 2023-01-22 10:48:57.861753: step: 92/463, loss: 0.030808456242084503 2023-01-22 10:48:58.569262: step: 94/463, loss: 0.8350070714950562 2023-01-22 10:48:59.184183: step: 96/463, loss: 0.24728679656982422 2023-01-22 10:48:59.852861: step: 98/463, loss: 0.9745913147926331 2023-01-22 10:49:00.423303: step: 100/463, loss: 0.17144189774990082 2023-01-22 10:49:01.080981: step: 102/463, loss: 0.07628318667411804 2023-01-22 10:49:01.747473: step: 104/463, loss: 0.056118909269571304 2023-01-22 10:49:02.313855: step: 106/463, loss: 0.16399188339710236 2023-01-22 10:49:02.923845: step: 108/463, loss: 0.6762322783470154 2023-01-22 10:49:03.608800: step: 110/463, loss: 0.0123615562915802 2023-01-22 10:49:04.240087: step: 112/463, loss: 0.0478915199637413 2023-01-22 10:49:04.838630: step: 114/463, loss: 0.08068694174289703 2023-01-22 10:49:05.425079: step: 116/463, loss: 0.0741761326789856 2023-01-22 10:49:06.028049: step: 118/463, loss: 1.2439968585968018 2023-01-22 10:49:06.643230: step: 120/463, loss: 0.047961268573999405 2023-01-22 10:49:07.267751: step: 122/463, loss: 0.38738203048706055 2023-01-22 10:49:07.881287: step: 124/463, loss: 0.12012168765068054 2023-01-22 10:49:08.515487: step: 126/463, loss: 0.04441584646701813 2023-01-22 10:49:09.106760: step: 128/463, loss: 0.08495910465717316 2023-01-22 10:49:09.716735: step: 130/463, loss: 0.04328349232673645 2023-01-22 10:49:10.350426: step: 132/463, loss: 0.3903776705265045 2023-01-22 10:49:10.922518: step: 134/463, loss: 0.03481090068817139 2023-01-22 10:49:11.571022: step: 136/463, loss: 0.10361975431442261 2023-01-22 10:49:12.229528: step: 138/463, loss: 0.1403813362121582 2023-01-22 10:49:12.851386: step: 140/463, loss: 0.025142883881926537 2023-01-22 10:49:13.372564: step: 142/463, loss: 0.12225999683141708 2023-01-22 10:49:13.986319: step: 144/463, loss: 0.06028455123305321 2023-01-22 10:49:14.599612: step: 146/463, loss: 0.13699457049369812 2023-01-22 10:49:15.277882: step: 148/463, loss: 0.28219887614250183 2023-01-22 10:49:15.883290: step: 150/463, loss: 0.28693246841430664 2023-01-22 10:49:16.479893: step: 152/463, loss: 0.3915354311466217 2023-01-22 10:49:17.071276: step: 154/463, loss: 0.06182054057717323 2023-01-22 10:49:17.660096: step: 156/463, loss: 0.16470405459403992 2023-01-22 10:49:18.216567: step: 158/463, loss: 0.12182014435529709 2023-01-22 10:49:18.735811: step: 160/463, loss: 0.11554552614688873 2023-01-22 10:49:19.352605: step: 162/463, loss: 0.5084474086761475 2023-01-22 10:49:19.982509: step: 164/463, loss: 0.16285544633865356 2023-01-22 10:49:20.637548: step: 166/463, loss: 2.6137630939483643 2023-01-22 10:49:21.255735: step: 168/463, loss: 0.03740686550736427 2023-01-22 10:49:21.966722: step: 170/463, loss: 0.14077307283878326 2023-01-22 10:49:22.554486: step: 172/463, loss: 0.09140763431787491 2023-01-22 10:49:23.177692: step: 174/463, loss: 0.09143885225057602 2023-01-22 10:49:23.808533: step: 176/463, loss: 0.07552574574947357 2023-01-22 10:49:24.425353: step: 178/463, loss: 0.06250397861003876 2023-01-22 10:49:25.039351: step: 180/463, loss: 0.1933819204568863 2023-01-22 10:49:25.599336: step: 182/463, loss: 0.21095974743366241 2023-01-22 10:49:26.289714: step: 184/463, loss: 0.1560210883617401 2023-01-22 10:49:26.874426: step: 186/463, loss: 0.0751553401350975 2023-01-22 10:49:27.566255: step: 188/463, loss: 0.09765796363353729 2023-01-22 10:49:28.245635: step: 190/463, loss: 0.39050137996673584 2023-01-22 10:49:28.862729: step: 192/463, loss: 0.08853725343942642 2023-01-22 10:49:29.499769: step: 194/463, loss: 0.1773661971092224 2023-01-22 10:49:30.104790: step: 196/463, loss: 0.0525774210691452 2023-01-22 10:49:30.744996: step: 198/463, loss: 0.05022650212049484 2023-01-22 10:49:31.360024: step: 200/463, loss: 0.1793585568666458 2023-01-22 10:49:31.958855: step: 202/463, loss: 0.12643963098526 2023-01-22 10:49:32.610437: step: 204/463, loss: 0.05982673168182373 2023-01-22 10:49:33.221811: step: 206/463, loss: 0.16795258224010468 2023-01-22 10:49:33.818984: step: 208/463, loss: 0.3783799707889557 2023-01-22 10:49:34.355032: step: 210/463, loss: 0.22475412487983704 2023-01-22 10:49:34.972173: step: 212/463, loss: 0.20961827039718628 2023-01-22 10:49:35.546427: step: 214/463, loss: 0.03246244043111801 2023-01-22 10:49:36.129023: step: 216/463, loss: 0.10360752046108246 2023-01-22 10:49:36.684857: step: 218/463, loss: 0.08381180465221405 2023-01-22 10:49:37.377062: step: 220/463, loss: 0.22995568811893463 2023-01-22 10:49:37.989197: step: 222/463, loss: 0.07965733855962753 2023-01-22 10:49:38.594440: step: 224/463, loss: 0.12934575974941254 2023-01-22 10:49:39.163956: step: 226/463, loss: 0.08320559561252594 2023-01-22 10:49:39.810981: step: 228/463, loss: 0.13966448605060577 2023-01-22 10:49:40.410250: step: 230/463, loss: 0.6244457364082336 2023-01-22 10:49:40.974328: step: 232/463, loss: 0.2568169832229614 2023-01-22 10:49:41.553131: step: 234/463, loss: 0.0757133886218071 2023-01-22 10:49:42.217467: step: 236/463, loss: 4.109152317047119 2023-01-22 10:49:42.937343: step: 238/463, loss: 0.5262159109115601 2023-01-22 10:49:43.586936: step: 240/463, loss: 0.09857461601495743 2023-01-22 10:49:44.169685: step: 242/463, loss: 0.4256633222103119 2023-01-22 10:49:44.873214: step: 244/463, loss: 0.04133503884077072 2023-01-22 10:49:45.559931: step: 246/463, loss: 0.35673826932907104 2023-01-22 10:49:46.251967: step: 248/463, loss: 0.12713421881198883 2023-01-22 10:49:46.934208: step: 250/463, loss: 0.10045666247606277 2023-01-22 10:49:47.505781: step: 252/463, loss: 0.05888919532299042 2023-01-22 10:49:48.110137: step: 254/463, loss: 0.0960940271615982 2023-01-22 10:49:48.712621: step: 256/463, loss: 0.12819786369800568 2023-01-22 10:49:49.404053: step: 258/463, loss: 0.10528997331857681 2023-01-22 10:49:50.020853: step: 260/463, loss: 0.16441646218299866 2023-01-22 10:49:50.629980: step: 262/463, loss: 0.1633167564868927 2023-01-22 10:49:51.222712: step: 264/463, loss: 0.8436607718467712 2023-01-22 10:49:51.846265: step: 266/463, loss: 0.1013575866818428 2023-01-22 10:49:52.467834: step: 268/463, loss: 0.10785786807537079 2023-01-22 10:49:53.062663: step: 270/463, loss: 0.34189173579216003 2023-01-22 10:49:53.615223: step: 272/463, loss: 0.04892894998192787 2023-01-22 10:49:54.278632: step: 274/463, loss: 0.049560192972421646 2023-01-22 10:49:54.891961: step: 276/463, loss: 0.018713340163230896 2023-01-22 10:49:55.490750: step: 278/463, loss: 0.0486927255988121 2023-01-22 10:49:56.119606: step: 280/463, loss: 0.15397493541240692 2023-01-22 10:49:56.845023: step: 282/463, loss: 0.1246669590473175 2023-01-22 10:49:57.437347: step: 284/463, loss: 0.023076198995113373 2023-01-22 10:49:58.024751: step: 286/463, loss: 0.038538575172424316 2023-01-22 10:49:58.633971: step: 288/463, loss: 0.09551247954368591 2023-01-22 10:49:59.302083: step: 290/463, loss: 0.11477039009332657 2023-01-22 10:49:59.941629: step: 292/463, loss: 0.11501011252403259 2023-01-22 10:50:00.586570: step: 294/463, loss: 0.1834041327238083 2023-01-22 10:50:01.207993: step: 296/463, loss: 0.1461332142353058 2023-01-22 10:50:01.822602: step: 298/463, loss: 0.19254513084888458 2023-01-22 10:50:02.432336: step: 300/463, loss: 0.08515982329845428 2023-01-22 10:50:03.047905: step: 302/463, loss: 0.23184725642204285 2023-01-22 10:50:03.651013: step: 304/463, loss: 0.10106851905584335 2023-01-22 10:50:04.228531: step: 306/463, loss: 0.09503541886806488 2023-01-22 10:50:04.878356: step: 308/463, loss: 0.4804510176181793 2023-01-22 10:50:05.562547: step: 310/463, loss: 0.6137498617172241 2023-01-22 10:50:06.213440: step: 312/463, loss: 0.14076784253120422 2023-01-22 10:50:06.791854: step: 314/463, loss: 0.08881313353776932 2023-01-22 10:50:07.427327: step: 316/463, loss: 0.26988980174064636 2023-01-22 10:50:08.057446: step: 318/463, loss: 0.14868198335170746 2023-01-22 10:50:08.631070: step: 320/463, loss: 0.13986501097679138 2023-01-22 10:50:09.254945: step: 322/463, loss: 0.082999087870121 2023-01-22 10:50:09.969185: step: 324/463, loss: 0.10994285345077515 2023-01-22 10:50:10.595077: step: 326/463, loss: 0.08804375678300858 2023-01-22 10:50:11.216017: step: 328/463, loss: 0.08210384845733643 2023-01-22 10:50:11.800816: step: 330/463, loss: 2.456934928894043 2023-01-22 10:50:12.434458: step: 332/463, loss: 0.13021592795848846 2023-01-22 10:50:13.076008: step: 334/463, loss: 0.5375983119010925 2023-01-22 10:50:13.691870: step: 336/463, loss: 0.06238596886396408 2023-01-22 10:50:14.325373: step: 338/463, loss: 0.11796872317790985 2023-01-22 10:50:14.941235: step: 340/463, loss: 0.08578043431043625 2023-01-22 10:50:15.536023: step: 342/463, loss: 0.12710529565811157 2023-01-22 10:50:16.149219: step: 344/463, loss: 0.17944364249706268 2023-01-22 10:50:16.741776: step: 346/463, loss: 0.07273881137371063 2023-01-22 10:50:17.374825: step: 348/463, loss: 0.11611678451299667 2023-01-22 10:50:17.980920: step: 350/463, loss: 0.07212795317173004 2023-01-22 10:50:18.491104: step: 352/463, loss: 0.21441487967967987 2023-01-22 10:50:19.144121: step: 354/463, loss: 0.07607550173997879 2023-01-22 10:50:19.765316: step: 356/463, loss: 0.06200196221470833 2023-01-22 10:50:20.370011: step: 358/463, loss: 0.043460387736558914 2023-01-22 10:50:21.110682: step: 360/463, loss: 0.03744329512119293 2023-01-22 10:50:21.747177: step: 362/463, loss: 0.11077764630317688 2023-01-22 10:50:22.364964: step: 364/463, loss: 0.10055799782276154 2023-01-22 10:50:23.049693: step: 366/463, loss: 0.1503055989742279 2023-01-22 10:50:23.699183: step: 368/463, loss: 0.03452283516526222 2023-01-22 10:50:24.277866: step: 370/463, loss: 0.17187127470970154 2023-01-22 10:50:24.939844: step: 372/463, loss: 0.36115145683288574 2023-01-22 10:50:25.543193: step: 374/463, loss: 0.1219983696937561 2023-01-22 10:50:26.174403: step: 376/463, loss: 0.12503086030483246 2023-01-22 10:50:26.757649: step: 378/463, loss: 0.18449562788009644 2023-01-22 10:50:27.405798: step: 380/463, loss: 0.1929616630077362 2023-01-22 10:50:28.070396: step: 382/463, loss: 0.06423379480838776 2023-01-22 10:50:28.812560: step: 384/463, loss: 0.2761771082878113 2023-01-22 10:50:29.419450: step: 386/463, loss: 0.08925159275531769 2023-01-22 10:50:30.101740: step: 388/463, loss: 0.2523607313632965 2023-01-22 10:50:30.816502: step: 390/463, loss: 0.06463966518640518 2023-01-22 10:50:31.411656: step: 392/463, loss: 0.11851001530885696 2023-01-22 10:50:31.995117: step: 394/463, loss: 0.08548794686794281 2023-01-22 10:50:32.618677: step: 396/463, loss: 1.2107921838760376 2023-01-22 10:50:33.243980: step: 398/463, loss: 0.016283638775348663 2023-01-22 10:50:33.838000: step: 400/463, loss: 0.08287622034549713 2023-01-22 10:50:34.373395: step: 402/463, loss: 0.10828357189893723 2023-01-22 10:50:35.032696: step: 404/463, loss: 0.17843776941299438 2023-01-22 10:50:35.672413: step: 406/463, loss: 0.1125880628824234 2023-01-22 10:50:36.289537: step: 408/463, loss: 0.09485150873661041 2023-01-22 10:50:36.934465: step: 410/463, loss: 0.07006898522377014 2023-01-22 10:50:37.542832: step: 412/463, loss: 0.034832850098609924 2023-01-22 10:50:38.207462: step: 414/463, loss: 0.8836945295333862 2023-01-22 10:50:38.908141: step: 416/463, loss: 0.057975392788648605 2023-01-22 10:50:39.563829: step: 418/463, loss: 0.29926618933677673 2023-01-22 10:50:40.191578: step: 420/463, loss: 0.1539737582206726 2023-01-22 10:50:40.813134: step: 422/463, loss: 0.12392600625753403 2023-01-22 10:50:41.436634: step: 424/463, loss: 0.13452214002609253 2023-01-22 10:50:42.050798: step: 426/463, loss: 0.1232166513800621 2023-01-22 10:50:42.660510: step: 428/463, loss: 0.07230601459741592 2023-01-22 10:50:43.302601: step: 430/463, loss: 0.07903006672859192 2023-01-22 10:50:43.963463: step: 432/463, loss: 0.1106240525841713 2023-01-22 10:50:44.591694: step: 434/463, loss: 0.12355376780033112 2023-01-22 10:50:45.125292: step: 436/463, loss: 0.09508159011602402 2023-01-22 10:50:45.738008: step: 438/463, loss: 0.07258486747741699 2023-01-22 10:50:46.347559: step: 440/463, loss: 0.05816726014018059 2023-01-22 10:50:46.970952: step: 442/463, loss: 0.032864321023225784 2023-01-22 10:50:47.567662: step: 444/463, loss: 0.12572872638702393 2023-01-22 10:50:48.192474: step: 446/463, loss: 0.16226233541965485 2023-01-22 10:50:48.840848: step: 448/463, loss: 0.04432658106088638 2023-01-22 10:50:49.438503: step: 450/463, loss: 0.0903104692697525 2023-01-22 10:50:50.172659: step: 452/463, loss: 0.06945456564426422 2023-01-22 10:50:50.801381: step: 454/463, loss: 0.5241070985794067 2023-01-22 10:50:51.415880: step: 456/463, loss: 0.3017179071903229 2023-01-22 10:50:52.056709: step: 458/463, loss: 0.254629909992218 2023-01-22 10:50:52.714440: step: 460/463, loss: 0.33663269877433777 2023-01-22 10:50:53.395996: step: 462/463, loss: 0.09105712920427322 2023-01-22 10:50:54.004608: step: 464/463, loss: 0.07593025267124176 2023-01-22 10:50:54.643149: step: 466/463, loss: 0.0862785205245018 2023-01-22 10:50:55.265384: step: 468/463, loss: 0.1536739468574524 2023-01-22 10:50:55.908934: step: 470/463, loss: 0.12506939470767975 2023-01-22 10:50:56.517443: step: 472/463, loss: 0.11008240282535553 2023-01-22 10:50:57.132384: step: 474/463, loss: 0.3848530650138855 2023-01-22 10:50:57.729220: step: 476/463, loss: 0.3592287302017212 2023-01-22 10:50:58.327775: step: 478/463, loss: 0.0907362625002861 2023-01-22 10:50:58.928380: step: 480/463, loss: 1.080715537071228 2023-01-22 10:50:59.580855: step: 482/463, loss: 0.5033161640167236 2023-01-22 10:51:00.268138: step: 484/463, loss: 0.07604740560054779 2023-01-22 10:51:00.831682: step: 486/463, loss: 0.15551824867725372 2023-01-22 10:51:01.499973: step: 488/463, loss: 0.1979314684867859 2023-01-22 10:51:02.089790: step: 490/463, loss: 0.1543964296579361 2023-01-22 10:51:02.692044: step: 492/463, loss: 0.12534230947494507 2023-01-22 10:51:03.285087: step: 494/463, loss: 0.12120353430509567 2023-01-22 10:51:04.014289: step: 496/463, loss: 0.14934006333351135 2023-01-22 10:51:04.599715: step: 498/463, loss: 0.05352311581373215 2023-01-22 10:51:05.260086: step: 500/463, loss: 0.2460317313671112 2023-01-22 10:51:05.799798: step: 502/463, loss: 0.052115026861429214 2023-01-22 10:51:06.397004: step: 504/463, loss: 0.1804737150669098 2023-01-22 10:51:06.955641: step: 506/463, loss: 0.038035281002521515 2023-01-22 10:51:07.523063: step: 508/463, loss: 0.07762838900089264 2023-01-22 10:51:08.081270: step: 510/463, loss: 0.1987018883228302 2023-01-22 10:51:08.687637: step: 512/463, loss: 0.0974213108420372 2023-01-22 10:51:09.347839: step: 514/463, loss: 0.14953991770744324 2023-01-22 10:51:09.946951: step: 516/463, loss: 0.02866896614432335 2023-01-22 10:51:10.559334: step: 518/463, loss: 0.08622793853282928 2023-01-22 10:51:11.182363: step: 520/463, loss: 0.13547734916210175 2023-01-22 10:51:11.726952: step: 522/463, loss: 0.09220833331346512 2023-01-22 10:51:12.331431: step: 524/463, loss: 0.05490013211965561 2023-01-22 10:51:12.954738: step: 526/463, loss: 0.15554659068584442 2023-01-22 10:51:13.586795: step: 528/463, loss: 0.22622404992580414 2023-01-22 10:51:14.237001: step: 530/463, loss: 0.058216605335474014 2023-01-22 10:51:14.951922: step: 532/463, loss: 0.15217728912830353 2023-01-22 10:51:15.546858: step: 534/463, loss: 0.1438591182231903 2023-01-22 10:51:16.226949: step: 536/463, loss: 0.028094308450818062 2023-01-22 10:51:16.839455: step: 538/463, loss: 0.2892555296421051 2023-01-22 10:51:17.430298: step: 540/463, loss: 0.0639430359005928 2023-01-22 10:51:18.036743: step: 542/463, loss: 0.1315533071756363 2023-01-22 10:51:18.692126: step: 544/463, loss: 0.12075936049222946 2023-01-22 10:51:19.338590: step: 546/463, loss: 0.35180947184562683 2023-01-22 10:51:19.944407: step: 548/463, loss: 0.14668837189674377 2023-01-22 10:51:20.528043: step: 550/463, loss: 4.931124687194824 2023-01-22 10:51:21.123215: step: 552/463, loss: 0.14781951904296875 2023-01-22 10:51:21.747794: step: 554/463, loss: 0.10335811972618103 2023-01-22 10:51:22.348996: step: 556/463, loss: 0.1324312388896942 2023-01-22 10:51:22.978621: step: 558/463, loss: 0.10734470933675766 2023-01-22 10:51:23.598326: step: 560/463, loss: 0.06613200157880783 2023-01-22 10:51:24.230137: step: 562/463, loss: 0.4820294976234436 2023-01-22 10:51:24.865436: step: 564/463, loss: 0.18396775424480438 2023-01-22 10:51:25.439336: step: 566/463, loss: 0.01487127784639597 2023-01-22 10:51:26.056512: step: 568/463, loss: 0.08771449327468872 2023-01-22 10:51:26.614772: step: 570/463, loss: 0.13871484994888306 2023-01-22 10:51:27.216416: step: 572/463, loss: 0.11097869277000427 2023-01-22 10:51:27.844493: step: 574/463, loss: 0.12313664704561234 2023-01-22 10:51:28.505489: step: 576/463, loss: 0.05219290032982826 2023-01-22 10:51:29.170657: step: 578/463, loss: 0.04261372610926628 2023-01-22 10:51:29.842986: step: 580/463, loss: 0.16862012445926666 2023-01-22 10:51:30.463477: step: 582/463, loss: 0.15329620242118835 2023-01-22 10:51:31.035924: step: 584/463, loss: 0.0879918709397316 2023-01-22 10:51:31.633625: step: 586/463, loss: 0.10960620641708374 2023-01-22 10:51:32.349881: step: 588/463, loss: 0.06510740518569946 2023-01-22 10:51:33.029363: step: 590/463, loss: 0.1167898029088974 2023-01-22 10:51:33.702571: step: 592/463, loss: 0.37295305728912354 2023-01-22 10:51:34.438407: step: 594/463, loss: 0.0940602496266365 2023-01-22 10:51:35.033255: step: 596/463, loss: 0.08408602327108383 2023-01-22 10:51:35.633487: step: 598/463, loss: 0.14934015274047852 2023-01-22 10:51:36.178415: step: 600/463, loss: 0.1785050332546234 2023-01-22 10:51:36.793657: step: 602/463, loss: 0.07750960439443588 2023-01-22 10:51:37.430505: step: 604/463, loss: 0.2220855951309204 2023-01-22 10:51:38.058919: step: 606/463, loss: 0.09620335698127747 2023-01-22 10:51:38.677288: step: 608/463, loss: 0.15664805471897125 2023-01-22 10:51:39.350299: step: 610/463, loss: 0.06983480602502823 2023-01-22 10:51:40.025776: step: 612/463, loss: 0.5398403406143188 2023-01-22 10:51:40.669517: step: 614/463, loss: 0.1134791448712349 2023-01-22 10:51:41.251599: step: 616/463, loss: 0.11525683104991913 2023-01-22 10:51:42.073787: step: 618/463, loss: 0.2604376971721649 2023-01-22 10:51:42.715344: step: 620/463, loss: 0.4766441583633423 2023-01-22 10:51:43.337407: step: 622/463, loss: 0.1398719698190689 2023-01-22 10:51:43.959362: step: 624/463, loss: 0.44540050625801086 2023-01-22 10:51:44.521090: step: 626/463, loss: 0.05978726968169212 2023-01-22 10:51:45.156273: step: 628/463, loss: 0.2011292725801468 2023-01-22 10:51:45.802295: step: 630/463, loss: 0.1322762370109558 2023-01-22 10:51:46.508348: step: 632/463, loss: 0.24957512319087982 2023-01-22 10:51:47.061469: step: 634/463, loss: 0.08489439636468887 2023-01-22 10:51:47.676260: step: 636/463, loss: 0.07555403560400009 2023-01-22 10:51:48.314030: step: 638/463, loss: 0.06060856208205223 2023-01-22 10:51:48.926516: step: 640/463, loss: 0.07927530258893967 2023-01-22 10:51:49.527782: step: 642/463, loss: 0.07236307859420776 2023-01-22 10:51:50.147394: step: 644/463, loss: 0.04239349067211151 2023-01-22 10:51:50.826241: step: 646/463, loss: 0.03760574385523796 2023-01-22 10:51:51.418168: step: 648/463, loss: 0.9239044189453125 2023-01-22 10:51:52.066417: step: 650/463, loss: 0.1705137938261032 2023-01-22 10:51:52.695763: step: 652/463, loss: 0.11683175712823868 2023-01-22 10:51:53.319965: step: 654/463, loss: 0.5038808584213257 2023-01-22 10:51:53.945291: step: 656/463, loss: 0.2034892737865448 2023-01-22 10:51:54.576824: step: 658/463, loss: 0.09336759150028229 2023-01-22 10:51:55.204910: step: 660/463, loss: 0.05636840686202049 2023-01-22 10:51:55.788972: step: 662/463, loss: 0.11377755552530289 2023-01-22 10:51:56.491234: step: 664/463, loss: 0.2625134587287903 2023-01-22 10:51:57.134661: step: 666/463, loss: 0.21744701266288757 2023-01-22 10:51:57.750863: step: 668/463, loss: 0.0924687534570694 2023-01-22 10:51:58.330949: step: 670/463, loss: 0.5260862112045288 2023-01-22 10:51:58.931403: step: 672/463, loss: 0.1251498907804489 2023-01-22 10:51:59.594829: step: 674/463, loss: 0.10914484411478043 2023-01-22 10:52:00.190649: step: 676/463, loss: 0.10781820118427277 2023-01-22 10:52:00.811352: step: 678/463, loss: 0.08858411759138107 2023-01-22 10:52:01.485629: step: 680/463, loss: 0.11982058733701706 2023-01-22 10:52:02.079086: step: 682/463, loss: 0.12719088792800903 2023-01-22 10:52:02.678064: step: 684/463, loss: 0.12275876104831696 2023-01-22 10:52:03.259487: step: 686/463, loss: 0.0874873623251915 2023-01-22 10:52:03.844018: step: 688/463, loss: 0.5289443135261536 2023-01-22 10:52:04.538603: step: 690/463, loss: 0.09343475848436356 2023-01-22 10:52:05.146381: step: 692/463, loss: 0.1273340880870819 2023-01-22 10:52:05.824584: step: 694/463, loss: 0.12136916071176529 2023-01-22 10:52:06.390949: step: 696/463, loss: 0.7252759337425232 2023-01-22 10:52:06.963136: step: 698/463, loss: 0.07977968454360962 2023-01-22 10:52:07.602208: step: 700/463, loss: 0.15106070041656494 2023-01-22 10:52:08.150213: step: 702/463, loss: 0.044744301587343216 2023-01-22 10:52:08.717837: step: 704/463, loss: 0.5736659169197083 2023-01-22 10:52:09.334274: step: 706/463, loss: 0.08373929560184479 2023-01-22 10:52:09.922248: step: 708/463, loss: 0.04708670452237129 2023-01-22 10:52:10.573609: step: 710/463, loss: 0.09988915175199509 2023-01-22 10:52:11.252387: step: 712/463, loss: 0.1506214290857315 2023-01-22 10:52:11.846350: step: 714/463, loss: 0.2638477087020874 2023-01-22 10:52:12.463755: step: 716/463, loss: 0.20712675154209137 2023-01-22 10:52:13.115020: step: 718/463, loss: 0.1186700165271759 2023-01-22 10:52:13.752790: step: 720/463, loss: 0.18628276884555817 2023-01-22 10:52:14.317841: step: 722/463, loss: 0.07569295912981033 2023-01-22 10:52:14.941720: step: 724/463, loss: 0.14078210294246674 2023-01-22 10:52:15.530982: step: 726/463, loss: 0.07991614192724228 2023-01-22 10:52:16.193829: step: 728/463, loss: 0.2108035385608673 2023-01-22 10:52:16.852075: step: 730/463, loss: 1.492363452911377 2023-01-22 10:52:17.501099: step: 732/463, loss: 0.09867862612009048 2023-01-22 10:52:18.140952: step: 734/463, loss: 0.04936336353421211 2023-01-22 10:52:18.771639: step: 736/463, loss: 0.5099826455116272 2023-01-22 10:52:19.515955: step: 738/463, loss: 0.07888239622116089 2023-01-22 10:52:20.109681: step: 740/463, loss: 0.16393595933914185 2023-01-22 10:52:20.836935: step: 742/463, loss: 0.14436742663383484 2023-01-22 10:52:21.508249: step: 744/463, loss: 0.07070403546094894 2023-01-22 10:52:22.109631: step: 746/463, loss: 0.6342538595199585 2023-01-22 10:52:22.821329: step: 748/463, loss: 0.1487412005662918 2023-01-22 10:52:23.479494: step: 750/463, loss: 0.14240650832653046 2023-01-22 10:52:24.086246: step: 752/463, loss: 0.2808184027671814 2023-01-22 10:52:24.691976: step: 754/463, loss: 0.07024453580379486 2023-01-22 10:52:25.328406: step: 756/463, loss: 1.5754814147949219 2023-01-22 10:52:25.936262: step: 758/463, loss: 0.42156901955604553 2023-01-22 10:52:26.558052: step: 760/463, loss: 0.05434051528573036 2023-01-22 10:52:27.193535: step: 762/463, loss: 0.06405092030763626 2023-01-22 10:52:27.848862: step: 764/463, loss: 0.13429778814315796 2023-01-22 10:52:28.398705: step: 766/463, loss: 0.15609587728977203 2023-01-22 10:52:29.047951: step: 768/463, loss: 0.26023316383361816 2023-01-22 10:52:29.623895: step: 770/463, loss: 0.17534756660461426 2023-01-22 10:52:30.205478: step: 772/463, loss: 0.03903448209166527 2023-01-22 10:52:30.839796: step: 774/463, loss: 0.21714311838150024 2023-01-22 10:52:31.439530: step: 776/463, loss: 0.13616353273391724 2023-01-22 10:52:32.027602: step: 778/463, loss: 0.1222415417432785 2023-01-22 10:52:32.655939: step: 780/463, loss: 0.09427404403686523 2023-01-22 10:52:33.320346: step: 782/463, loss: 0.14996539056301117 2023-01-22 10:52:33.964925: step: 784/463, loss: 0.24420617520809174 2023-01-22 10:52:34.585879: step: 786/463, loss: 0.5018675923347473 2023-01-22 10:52:35.268970: step: 788/463, loss: 0.2124158889055252 2023-01-22 10:52:35.886709: step: 790/463, loss: 0.3533600866794586 2023-01-22 10:52:36.552364: step: 792/463, loss: 0.22449904680252075 2023-01-22 10:52:37.163792: step: 794/463, loss: 0.13094858825206757 2023-01-22 10:52:37.746544: step: 796/463, loss: 0.6667919754981995 2023-01-22 10:52:38.351345: step: 798/463, loss: 0.03039870783686638 2023-01-22 10:52:38.971148: step: 800/463, loss: 0.08532169461250305 2023-01-22 10:52:39.639842: step: 802/463, loss: 0.0879097655415535 2023-01-22 10:52:40.273722: step: 804/463, loss: 0.039150360971689224 2023-01-22 10:52:40.875655: step: 806/463, loss: 0.12181497365236282 2023-01-22 10:52:41.448299: step: 808/463, loss: 0.409475177526474 2023-01-22 10:52:41.998293: step: 810/463, loss: 0.160431906580925 2023-01-22 10:52:42.646419: step: 812/463, loss: 0.12796978652477264 2023-01-22 10:52:43.249963: step: 814/463, loss: 0.06792762875556946 2023-01-22 10:52:43.795488: step: 816/463, loss: 0.09617536514997482 2023-01-22 10:52:44.435103: step: 818/463, loss: 0.08104352653026581 2023-01-22 10:52:45.057171: step: 820/463, loss: 0.13039171695709229 2023-01-22 10:52:45.720889: step: 822/463, loss: 0.8699960112571716 2023-01-22 10:52:46.310386: step: 824/463, loss: 0.07382344454526901 2023-01-22 10:52:46.885826: step: 826/463, loss: 0.34303969144821167 2023-01-22 10:52:47.507610: step: 828/463, loss: 0.11564187705516815 2023-01-22 10:52:48.100356: step: 830/463, loss: 0.0836898684501648 2023-01-22 10:52:48.708286: step: 832/463, loss: 0.1890992969274521 2023-01-22 10:52:49.326844: step: 834/463, loss: 0.06025129184126854 2023-01-22 10:52:49.965567: step: 836/463, loss: 0.20985692739486694 2023-01-22 10:52:50.585044: step: 838/463, loss: 0.08359216153621674 2023-01-22 10:52:51.293392: step: 840/463, loss: 0.051489606499671936 2023-01-22 10:52:51.861102: step: 842/463, loss: 0.11594334989786148 2023-01-22 10:52:52.431522: step: 844/463, loss: 0.1484472006559372 2023-01-22 10:52:53.053859: step: 846/463, loss: 0.06749340146780014 2023-01-22 10:52:53.673800: step: 848/463, loss: 0.1993170529603958 2023-01-22 10:52:54.273755: step: 850/463, loss: 0.2687409222126007 2023-01-22 10:52:54.969087: step: 852/463, loss: 0.09031051397323608 2023-01-22 10:52:55.660386: step: 854/463, loss: 0.11227161437273026 2023-01-22 10:52:56.296091: step: 856/463, loss: 0.048474475741386414 2023-01-22 10:52:57.044356: step: 858/463, loss: 0.08880440145730972 2023-01-22 10:52:57.726775: step: 860/463, loss: 3.4016387462615967 2023-01-22 10:52:58.322979: step: 862/463, loss: 0.205790713429451 2023-01-22 10:52:58.949033: step: 864/463, loss: 0.14389650523662567 2023-01-22 10:52:59.608116: step: 866/463, loss: 0.01701684482395649 2023-01-22 10:53:00.162503: step: 868/463, loss: 0.07375527173280716 2023-01-22 10:53:00.794507: step: 870/463, loss: 0.05160212516784668 2023-01-22 10:53:01.379731: step: 872/463, loss: 0.20682241022586823 2023-01-22 10:53:01.989096: step: 874/463, loss: 0.0913391262292862 2023-01-22 10:53:02.606644: step: 876/463, loss: 0.13339069485664368 2023-01-22 10:53:03.213200: step: 878/463, loss: 0.04677043855190277 2023-01-22 10:53:03.758160: step: 880/463, loss: 0.16638848185539246 2023-01-22 10:53:04.371009: step: 882/463, loss: 0.08223778009414673 2023-01-22 10:53:04.963342: step: 884/463, loss: 0.07895812392234802 2023-01-22 10:53:05.661311: step: 886/463, loss: 0.0934305414557457 2023-01-22 10:53:06.244653: step: 888/463, loss: 0.10494174808263779 2023-01-22 10:53:06.890819: step: 890/463, loss: 0.046117182821035385 2023-01-22 10:53:07.475099: step: 892/463, loss: 0.1175730749964714 2023-01-22 10:53:08.103151: step: 894/463, loss: 0.2696318030357361 2023-01-22 10:53:08.706930: step: 896/463, loss: 0.04674027860164642 2023-01-22 10:53:09.334964: step: 898/463, loss: 0.12179773300886154 2023-01-22 10:53:09.934769: step: 900/463, loss: 0.1435602456331253 2023-01-22 10:53:10.537843: step: 902/463, loss: 0.11121556162834167 2023-01-22 10:53:11.185932: step: 904/463, loss: 0.10599341988563538 2023-01-22 10:53:11.730690: step: 906/463, loss: 0.19182957708835602 2023-01-22 10:53:12.345847: step: 908/463, loss: 0.09481100738048553 2023-01-22 10:53:13.038435: step: 910/463, loss: 0.28158435225486755 2023-01-22 10:53:13.631052: step: 912/463, loss: 0.08506053686141968 2023-01-22 10:53:14.240129: step: 914/463, loss: 0.2001275271177292 2023-01-22 10:53:14.860832: step: 916/463, loss: 0.06941293925046921 2023-01-22 10:53:15.501636: step: 918/463, loss: 0.8327589631080627 2023-01-22 10:53:16.032855: step: 920/463, loss: 0.17840911448001862 2023-01-22 10:53:16.630055: step: 922/463, loss: 0.12436456233263016 2023-01-22 10:53:17.243076: step: 924/463, loss: 0.13938581943511963 2023-01-22 10:53:17.888085: step: 926/463, loss: 0.19173870980739594 ================================================== Loss: 0.234 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3308816758747698, 'r': 0.34092741935483867, 'f1': 0.33582943925233644}, 'combined': 0.24745327102803735, 'epoch': 14} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3321056865422275, 'r': 0.38399720006445054, 'f1': 0.3561713160018091}, 'combined': 0.27607537412580424, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29796259842519685, 'r': 0.35902514231499055, 'f1': 0.3256561962134251}, 'combined': 0.23995719720989217, 'epoch': 14} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3205281375222519, 'r': 0.39417890441615167, 'f1': 0.35355865457936764}, 'combined': 0.2740502490041032, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3027309708605665, 'r': 0.35155854680581916, 'f1': 0.3253228343576237}, 'combined': 0.23971156215824904, 'epoch': 14} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32515220389793503, 'r': 0.38402627022871927, 'f1': 0.35214545470614966}, 'combined': 0.2729548500593122, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27325581395348836, 'r': 0.3357142857142857, 'f1': 0.30128205128205127}, 'combined': 0.20085470085470084, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3106060606060606, 'r': 0.44565217391304346, 'f1': 0.3660714285714286}, 'combined': 0.1830357142857143, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36904761904761907, 'r': 0.2672413793103448, 'f1': 0.31}, 'combined': 0.20666666666666667, 'epoch': 14} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29855383301254707, 'r': 0.34444161379815674, 'f1': 0.3198603180116804}, 'combined': 0.23568655011386977, 'epoch': 7} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.30988948788864984, 'r': 0.3867922100668993, 'f1': 0.34409642236532006}, 'combined': 0.2667158871922577, 'epoch': 7} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2636363636363636, 'r': 0.4142857142857143, 'f1': 0.3222222222222222}, 'combined': 0.2148148148148148, 'epoch': 7} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3027309708605665, 'r': 0.35155854680581916, 'f1': 0.3253228343576237}, 'combined': 0.23971156215824904, 'epoch': 14} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32515220389793503, 'r': 0.38402627022871927, 'f1': 0.35214545470614966}, 'combined': 0.2729548500593122, 'epoch': 14} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36904761904761907, 'r': 0.2672413793103448, 'f1': 0.31}, 'combined': 0.20666666666666667, 'epoch': 14} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 10:56:05.629221: step: 2/463, loss: 0.15447042882442474 2023-01-22 10:56:06.207721: step: 4/463, loss: 0.02770250476896763 2023-01-22 10:56:06.826975: step: 6/463, loss: 0.08984176069498062 2023-01-22 10:56:07.407733: step: 8/463, loss: 0.03866231441497803 2023-01-22 10:56:08.021424: step: 10/463, loss: 0.10324466228485107 2023-01-22 10:56:08.655705: step: 12/463, loss: 0.046043239533901215 2023-01-22 10:56:09.236558: step: 14/463, loss: 0.2281324863433838 2023-01-22 10:56:09.850904: step: 16/463, loss: 0.08534029871225357 2023-01-22 10:56:10.480375: step: 18/463, loss: 0.0729091614484787 2023-01-22 10:56:11.152394: step: 20/463, loss: 0.0972452163696289 2023-01-22 10:56:11.779747: step: 22/463, loss: 0.6486608386039734 2023-01-22 10:56:12.367118: step: 24/463, loss: 0.09822855144739151 2023-01-22 10:56:12.949124: step: 26/463, loss: 0.06654565781354904 2023-01-22 10:56:13.624904: step: 28/463, loss: 0.07373248040676117 2023-01-22 10:56:14.283992: step: 30/463, loss: 0.10150497406721115 2023-01-22 10:56:14.984481: step: 32/463, loss: 0.11966792494058609 2023-01-22 10:56:15.538599: step: 34/463, loss: 0.2044639140367508 2023-01-22 10:56:16.126222: step: 36/463, loss: 0.027446717023849487 2023-01-22 10:56:16.749571: step: 38/463, loss: 0.031536053866147995 2023-01-22 10:56:17.386263: step: 40/463, loss: 0.1815873235464096 2023-01-22 10:56:18.006342: step: 42/463, loss: 0.11311647295951843 2023-01-22 10:56:18.586978: step: 44/463, loss: 0.1141190305352211 2023-01-22 10:56:19.178757: step: 46/463, loss: 0.15072199702262878 2023-01-22 10:56:19.907131: step: 48/463, loss: 0.17301441729068756 2023-01-22 10:56:20.613940: step: 50/463, loss: 0.24043482542037964 2023-01-22 10:56:21.197080: step: 52/463, loss: 0.17363114655017853 2023-01-22 10:56:21.818691: step: 54/463, loss: 0.04408092424273491 2023-01-22 10:56:22.441361: step: 56/463, loss: 0.5342665910720825 2023-01-22 10:56:23.084521: step: 58/463, loss: 0.06617753207683563 2023-01-22 10:56:23.778379: step: 60/463, loss: 0.18174047768115997 2023-01-22 10:56:24.372273: step: 62/463, loss: 0.07808267325162888 2023-01-22 10:56:25.020993: step: 64/463, loss: 0.6816559433937073 2023-01-22 10:56:25.631405: step: 66/463, loss: 0.09485840797424316 2023-01-22 10:56:26.242465: step: 68/463, loss: 0.20093004405498505 2023-01-22 10:56:26.897333: step: 70/463, loss: 0.08908053487539291 2023-01-22 10:56:27.481627: step: 72/463, loss: 0.6995729804039001 2023-01-22 10:56:28.127039: step: 74/463, loss: 0.3616619408130646 2023-01-22 10:56:28.731100: step: 76/463, loss: 0.08824338763952255 2023-01-22 10:56:29.377245: step: 78/463, loss: 0.11003921926021576 2023-01-22 10:56:30.020397: step: 80/463, loss: 0.06630346924066544 2023-01-22 10:56:30.684579: step: 82/463, loss: 0.06528856605291367 2023-01-22 10:56:31.277718: step: 84/463, loss: 0.09057553112506866 2023-01-22 10:56:31.858345: step: 86/463, loss: 0.5225180983543396 2023-01-22 10:56:32.544969: step: 88/463, loss: 0.19920146465301514 2023-01-22 10:56:33.141859: step: 90/463, loss: 0.06880192458629608 2023-01-22 10:56:33.773800: step: 92/463, loss: 0.028462907299399376 2023-01-22 10:56:34.419674: step: 94/463, loss: 0.0857849195599556 2023-01-22 10:56:35.061459: step: 96/463, loss: 0.13245193660259247 2023-01-22 10:56:35.672287: step: 98/463, loss: 0.1076107993721962 2023-01-22 10:56:36.269714: step: 100/463, loss: 0.20794677734375 2023-01-22 10:56:36.916070: step: 102/463, loss: 0.0402422659099102 2023-01-22 10:56:37.466876: step: 104/463, loss: 0.055854782462120056 2023-01-22 10:56:38.093460: step: 106/463, loss: 0.05146775022149086 2023-01-22 10:56:38.674610: step: 108/463, loss: 0.09846664220094681 2023-01-22 10:56:39.311146: step: 110/463, loss: 0.06401441246271133 2023-01-22 10:56:39.930070: step: 112/463, loss: 0.0930890366435051 2023-01-22 10:56:40.525305: step: 114/463, loss: 0.4546594023704529 2023-01-22 10:56:41.097688: step: 116/463, loss: 0.12890735268592834 2023-01-22 10:56:41.679927: step: 118/463, loss: 0.16613930463790894 2023-01-22 10:56:42.238350: step: 120/463, loss: 0.040514469146728516 2023-01-22 10:56:42.912613: step: 122/463, loss: 0.048164013773202896 2023-01-22 10:56:43.531070: step: 124/463, loss: 0.040693316608667374 2023-01-22 10:56:44.167610: step: 126/463, loss: 0.06072457507252693 2023-01-22 10:56:44.810148: step: 128/463, loss: 0.023524239659309387 2023-01-22 10:56:45.399528: step: 130/463, loss: 0.07855761796236038 2023-01-22 10:56:46.033020: step: 132/463, loss: 0.12775106728076935 2023-01-22 10:56:46.598971: step: 134/463, loss: 0.041023239493370056 2023-01-22 10:56:47.199238: step: 136/463, loss: 0.5008127093315125 2023-01-22 10:56:47.903212: step: 138/463, loss: 0.07672007381916046 2023-01-22 10:56:48.523886: step: 140/463, loss: 0.10578087717294693 2023-01-22 10:56:49.155430: step: 142/463, loss: 0.028427373617887497 2023-01-22 10:56:49.745063: step: 144/463, loss: 0.06272473186254501 2023-01-22 10:56:50.336473: step: 146/463, loss: 0.1564113348722458 2023-01-22 10:56:50.955347: step: 148/463, loss: 0.07387494295835495 2023-01-22 10:56:51.574159: step: 150/463, loss: 0.056282248347997665 2023-01-22 10:56:52.180310: step: 152/463, loss: 0.06835278123617172 2023-01-22 10:56:52.789228: step: 154/463, loss: 0.1928885132074356 2023-01-22 10:56:53.379776: step: 156/463, loss: 0.07516981661319733 2023-01-22 10:56:53.978922: step: 158/463, loss: 0.7094244360923767 2023-01-22 10:56:54.594253: step: 160/463, loss: 0.038302235305309296 2023-01-22 10:56:55.176454: step: 162/463, loss: 0.05817342922091484 2023-01-22 10:56:55.798879: step: 164/463, loss: 0.01996573433279991 2023-01-22 10:56:56.455341: step: 166/463, loss: 0.47693943977355957 2023-01-22 10:56:57.087627: step: 168/463, loss: 0.12544715404510498 2023-01-22 10:56:57.674492: step: 170/463, loss: 0.12725156545639038 2023-01-22 10:56:58.277613: step: 172/463, loss: 0.06777847558259964 2023-01-22 10:56:58.864030: step: 174/463, loss: 0.09384545683860779 2023-01-22 10:56:59.458964: step: 176/463, loss: 0.08972682803869247 2023-01-22 10:57:00.046707: step: 178/463, loss: 0.5601344704627991 2023-01-22 10:57:00.637636: step: 180/463, loss: 0.11076360940933228 2023-01-22 10:57:01.235462: step: 182/463, loss: 0.14465606212615967 2023-01-22 10:57:01.811684: step: 184/463, loss: 0.03793802484869957 2023-01-22 10:57:02.417783: step: 186/463, loss: 0.15447227656841278 2023-01-22 10:57:03.048280: step: 188/463, loss: 0.05333952605724335 2023-01-22 10:57:03.668371: step: 190/463, loss: 0.16936078667640686 2023-01-22 10:57:04.288329: step: 192/463, loss: 0.12389491498470306 2023-01-22 10:57:04.908488: step: 194/463, loss: 0.12573525309562683 2023-01-22 10:57:05.475150: step: 196/463, loss: 0.32096216082572937 2023-01-22 10:57:06.096881: step: 198/463, loss: 0.09927521646022797 2023-01-22 10:57:06.717593: step: 200/463, loss: 0.10896749049425125 2023-01-22 10:57:07.305863: step: 202/463, loss: 0.1163899302482605 2023-01-22 10:57:07.951840: step: 204/463, loss: 0.10979020595550537 2023-01-22 10:57:08.544821: step: 206/463, loss: 0.05496586486697197 2023-01-22 10:57:09.144383: step: 208/463, loss: 0.10316412150859833 2023-01-22 10:57:09.705186: step: 210/463, loss: 0.35646650195121765 2023-01-22 10:57:10.269932: step: 212/463, loss: 0.2738085985183716 2023-01-22 10:57:10.904201: step: 214/463, loss: 5.22773551940918 2023-01-22 10:57:11.502111: step: 216/463, loss: 0.09675916284322739 2023-01-22 10:57:12.117528: step: 218/463, loss: 0.1906154900789261 2023-01-22 10:57:12.732716: step: 220/463, loss: 0.07194644957780838 2023-01-22 10:57:13.374263: step: 222/463, loss: 0.10693216323852539 2023-01-22 10:57:13.892303: step: 224/463, loss: 0.11566611379384995 2023-01-22 10:57:14.430981: step: 226/463, loss: 0.22901403903961182 2023-01-22 10:57:15.018941: step: 228/463, loss: 0.21474911272525787 2023-01-22 10:57:15.586610: step: 230/463, loss: 0.08919728547334671 2023-01-22 10:57:16.253503: step: 232/463, loss: 0.0692751333117485 2023-01-22 10:57:16.856813: step: 234/463, loss: 0.07684727758169174 2023-01-22 10:57:17.441218: step: 236/463, loss: 0.3953125476837158 2023-01-22 10:57:18.029930: step: 238/463, loss: 0.07253973931074142 2023-01-22 10:57:18.654647: step: 240/463, loss: 0.17964813113212585 2023-01-22 10:57:19.248391: step: 242/463, loss: 0.06846963614225388 2023-01-22 10:57:19.828603: step: 244/463, loss: 0.08263533562421799 2023-01-22 10:57:20.502772: step: 246/463, loss: 0.12028109282255173 2023-01-22 10:57:21.118820: step: 248/463, loss: 0.09234782308340073 2023-01-22 10:57:21.762108: step: 250/463, loss: 0.12176024168729782 2023-01-22 10:57:22.333131: step: 252/463, loss: 0.06890027970075607 2023-01-22 10:57:22.956858: step: 254/463, loss: 0.3822815418243408 2023-01-22 10:57:23.509034: step: 256/463, loss: 0.029156535863876343 2023-01-22 10:57:24.177851: step: 258/463, loss: 0.05815451592206955 2023-01-22 10:57:24.749595: step: 260/463, loss: 0.08179517090320587 2023-01-22 10:57:25.372377: step: 262/463, loss: 0.37647223472595215 2023-01-22 10:57:25.962174: step: 264/463, loss: 0.2492261528968811 2023-01-22 10:57:26.694331: step: 266/463, loss: 0.20476193726062775 2023-01-22 10:57:27.310509: step: 268/463, loss: 0.039153389632701874 2023-01-22 10:57:27.951604: step: 270/463, loss: 0.054536063224077225 2023-01-22 10:57:28.542848: step: 272/463, loss: 0.2026681751012802 2023-01-22 10:57:29.192575: step: 274/463, loss: 0.2152506560087204 2023-01-22 10:57:29.815442: step: 276/463, loss: 0.24596165120601654 2023-01-22 10:57:30.484789: step: 278/463, loss: 0.36492782831192017 2023-01-22 10:57:31.107743: step: 280/463, loss: 0.06065182387828827 2023-01-22 10:57:31.695300: step: 282/463, loss: 0.5141716599464417 2023-01-22 10:57:32.320419: step: 284/463, loss: 0.07557240128517151 2023-01-22 10:57:32.962612: step: 286/463, loss: 0.15395209193229675 2023-01-22 10:57:33.539627: step: 288/463, loss: 0.061565786600112915 2023-01-22 10:57:34.163027: step: 290/463, loss: 0.13776887953281403 2023-01-22 10:57:34.764799: step: 292/463, loss: 0.035926662385463715 2023-01-22 10:57:35.343921: step: 294/463, loss: 0.07954820990562439 2023-01-22 10:57:36.001858: step: 296/463, loss: 0.0456632599234581 2023-01-22 10:57:36.630841: step: 298/463, loss: 0.088347427546978 2023-01-22 10:57:37.271688: step: 300/463, loss: 0.11988343298435211 2023-01-22 10:57:37.906855: step: 302/463, loss: 0.14200922846794128 2023-01-22 10:57:38.492485: step: 304/463, loss: 0.07645564526319504 2023-01-22 10:57:39.106190: step: 306/463, loss: 0.11405491083860397 2023-01-22 10:57:39.678299: step: 308/463, loss: 0.14642997086048126 2023-01-22 10:57:40.283655: step: 310/463, loss: 0.15205056965351105 2023-01-22 10:57:40.872375: step: 312/463, loss: 0.03897145390510559 2023-01-22 10:57:41.488337: step: 314/463, loss: 0.20041872560977936 2023-01-22 10:57:42.143765: step: 316/463, loss: 0.03764211758971214 2023-01-22 10:57:42.797280: step: 318/463, loss: 0.2205481082201004 2023-01-22 10:57:43.448212: step: 320/463, loss: 0.028743749484419823 2023-01-22 10:57:44.061717: step: 322/463, loss: 0.04010086506605148 2023-01-22 10:57:44.680880: step: 324/463, loss: 0.08388634026050568 2023-01-22 10:57:45.376093: step: 326/463, loss: 0.04325639829039574 2023-01-22 10:57:46.044036: step: 328/463, loss: 0.3005983829498291 2023-01-22 10:57:46.616400: step: 330/463, loss: 0.07320655882358551 2023-01-22 10:57:47.191429: step: 332/463, loss: 0.04171394929289818 2023-01-22 10:57:47.842071: step: 334/463, loss: 0.21179060637950897 2023-01-22 10:57:48.483554: step: 336/463, loss: 0.031535230576992035 2023-01-22 10:57:49.086663: step: 338/463, loss: 0.059583790600299835 2023-01-22 10:57:49.659054: step: 340/463, loss: 0.23425628244876862 2023-01-22 10:57:50.260388: step: 342/463, loss: 0.4230680763721466 2023-01-22 10:57:50.812599: step: 344/463, loss: 0.026921847835183144 2023-01-22 10:57:51.432970: step: 346/463, loss: 0.03770776465535164 2023-01-22 10:57:52.049803: step: 348/463, loss: 0.044509172439575195 2023-01-22 10:57:52.637890: step: 350/463, loss: 0.25164082646369934 2023-01-22 10:57:53.201389: step: 352/463, loss: 3.1193528175354004 2023-01-22 10:57:54.452906: step: 354/463, loss: 0.008484726771712303 2023-01-22 10:57:55.046892: step: 356/463, loss: 0.14153073728084564 2023-01-22 10:57:55.728842: step: 358/463, loss: 0.09729599207639694 2023-01-22 10:57:56.361616: step: 360/463, loss: 0.01014737132936716 2023-01-22 10:57:57.004046: step: 362/463, loss: 0.6379090547561646 2023-01-22 10:57:57.617473: step: 364/463, loss: 0.06369558721780777 2023-01-22 10:57:58.229422: step: 366/463, loss: 0.17121385037899017 2023-01-22 10:57:58.779166: step: 368/463, loss: 0.09536228328943253 2023-01-22 10:57:59.366291: step: 370/463, loss: 0.0385630838572979 2023-01-22 10:57:59.947910: step: 372/463, loss: 0.02957984246313572 2023-01-22 10:58:00.543830: step: 374/463, loss: 0.08003193140029907 2023-01-22 10:58:01.221016: step: 376/463, loss: 0.09847112745046616 2023-01-22 10:58:01.895062: step: 378/463, loss: 0.33379536867141724 2023-01-22 10:58:02.532889: step: 380/463, loss: 0.08243055641651154 2023-01-22 10:58:03.178895: step: 382/463, loss: 0.08104237914085388 2023-01-22 10:58:03.823214: step: 384/463, loss: 0.061112821102142334 2023-01-22 10:58:04.376352: step: 386/463, loss: 0.13808482885360718 2023-01-22 10:58:05.032632: step: 388/463, loss: 0.07363472878932953 2023-01-22 10:58:05.642668: step: 390/463, loss: 0.04818311333656311 2023-01-22 10:58:06.207236: step: 392/463, loss: 0.10252054035663605 2023-01-22 10:58:06.783002: step: 394/463, loss: 0.10292989015579224 2023-01-22 10:58:07.459291: step: 396/463, loss: 0.02241295576095581 2023-01-22 10:58:08.040096: step: 398/463, loss: 0.018636174499988556 2023-01-22 10:58:08.664378: step: 400/463, loss: 0.06365367025136948 2023-01-22 10:58:09.251403: step: 402/463, loss: 0.08667969703674316 2023-01-22 10:58:09.895839: step: 404/463, loss: 0.1124776229262352 2023-01-22 10:58:10.452748: step: 406/463, loss: 1.2408829927444458 2023-01-22 10:58:11.058047: step: 408/463, loss: 0.0829954668879509 2023-01-22 10:58:11.703057: step: 410/463, loss: 0.1821594089269638 2023-01-22 10:58:12.329188: step: 412/463, loss: 0.17024026811122894 2023-01-22 10:58:12.934092: step: 414/463, loss: 0.13009324669837952 2023-01-22 10:58:13.615165: step: 416/463, loss: 0.08420077711343765 2023-01-22 10:58:14.223320: step: 418/463, loss: 0.2093186378479004 2023-01-22 10:58:14.784592: step: 420/463, loss: 0.12217625975608826 2023-01-22 10:58:15.445271: step: 422/463, loss: 0.08837151527404785 2023-01-22 10:58:16.029109: step: 424/463, loss: 0.14958344399929047 2023-01-22 10:58:16.676292: step: 426/463, loss: 0.04692188277840614 2023-01-22 10:58:17.302336: step: 428/463, loss: 0.2951260209083557 2023-01-22 10:58:17.915050: step: 430/463, loss: 0.19845955073833466 2023-01-22 10:58:18.535858: step: 432/463, loss: 0.03155672177672386 2023-01-22 10:58:19.126798: step: 434/463, loss: 0.7305673360824585 2023-01-22 10:58:19.714161: step: 436/463, loss: 0.11973927915096283 2023-01-22 10:58:20.406642: step: 438/463, loss: 0.19857928156852722 2023-01-22 10:58:21.051774: step: 440/463, loss: 0.30530673265457153 2023-01-22 10:58:21.722650: step: 442/463, loss: 0.106770820915699 2023-01-22 10:58:22.269699: step: 444/463, loss: 0.06208482384681702 2023-01-22 10:58:22.834150: step: 446/463, loss: 0.6903566122055054 2023-01-22 10:58:23.488628: step: 448/463, loss: 0.06192076578736305 2023-01-22 10:58:24.116231: step: 450/463, loss: 0.061335619539022446 2023-01-22 10:58:24.732245: step: 452/463, loss: 0.036922723054885864 2023-01-22 10:58:25.370715: step: 454/463, loss: 0.20546886324882507 2023-01-22 10:58:25.984556: step: 456/463, loss: 0.14055319130420685 2023-01-22 10:58:26.595536: step: 458/463, loss: 0.072812020778656 2023-01-22 10:58:27.233198: step: 460/463, loss: 0.15294283628463745 2023-01-22 10:58:27.841357: step: 462/463, loss: 0.029308443889021873 2023-01-22 10:58:28.493206: step: 464/463, loss: 0.2965143322944641 2023-01-22 10:58:29.058753: step: 466/463, loss: 0.039474621415138245 2023-01-22 10:58:29.724017: step: 468/463, loss: 0.16768600046634674 2023-01-22 10:58:30.335639: step: 470/463, loss: 0.13155882060527802 2023-01-22 10:58:30.998894: step: 472/463, loss: 0.0809381976723671 2023-01-22 10:58:31.593366: step: 474/463, loss: 0.5181536674499512 2023-01-22 10:58:32.205095: step: 476/463, loss: 0.14347179234027863 2023-01-22 10:58:32.837359: step: 478/463, loss: 0.10302814096212387 2023-01-22 10:58:33.457209: step: 480/463, loss: 2.829655647277832 2023-01-22 10:58:34.063407: step: 482/463, loss: 0.06911370158195496 2023-01-22 10:58:34.639903: step: 484/463, loss: 0.26369795203208923 2023-01-22 10:58:35.263236: step: 486/463, loss: 0.11180444806814194 2023-01-22 10:58:35.914725: step: 488/463, loss: 0.10409456491470337 2023-01-22 10:58:36.568868: step: 490/463, loss: 0.12288526445627213 2023-01-22 10:58:37.234530: step: 492/463, loss: 0.04631849005818367 2023-01-22 10:58:37.865481: step: 494/463, loss: 0.5771387219429016 2023-01-22 10:58:38.482650: step: 496/463, loss: 0.27514976263046265 2023-01-22 10:58:39.128203: step: 498/463, loss: 0.16434890031814575 2023-01-22 10:58:39.772920: step: 500/463, loss: 0.43808069825172424 2023-01-22 10:58:40.346902: step: 502/463, loss: 0.4952459931373596 2023-01-22 10:58:40.939418: step: 504/463, loss: 0.19927358627319336 2023-01-22 10:58:41.664972: step: 506/463, loss: 0.19992642104625702 2023-01-22 10:58:42.269962: step: 508/463, loss: 0.031801093369722366 2023-01-22 10:58:42.847752: step: 510/463, loss: 0.12465578317642212 2023-01-22 10:58:43.527581: step: 512/463, loss: 0.07475098967552185 2023-01-22 10:58:44.175403: step: 514/463, loss: 0.10618216544389725 2023-01-22 10:58:44.781054: step: 516/463, loss: 0.019582342356443405 2023-01-22 10:58:45.333400: step: 518/463, loss: 0.10032596439123154 2023-01-22 10:58:45.954776: step: 520/463, loss: 0.11950547993183136 2023-01-22 10:58:46.555889: step: 522/463, loss: 0.043447431176900864 2023-01-22 10:58:47.221211: step: 524/463, loss: 0.10636359453201294 2023-01-22 10:58:47.846508: step: 526/463, loss: 0.031716953963041306 2023-01-22 10:58:48.460285: step: 528/463, loss: 0.11483673006296158 2023-01-22 10:58:49.054194: step: 530/463, loss: 0.04162720963358879 2023-01-22 10:58:49.699525: step: 532/463, loss: 0.01935102976858616 2023-01-22 10:58:50.265305: step: 534/463, loss: 0.7199362516403198 2023-01-22 10:58:50.894776: step: 536/463, loss: 0.07453575730323792 2023-01-22 10:58:51.458684: step: 538/463, loss: 0.11602141708135605 2023-01-22 10:58:52.017829: step: 540/463, loss: 0.14062979817390442 2023-01-22 10:58:52.583757: step: 542/463, loss: 0.09119509905576706 2023-01-22 10:58:53.238083: step: 544/463, loss: 0.5877248644828796 2023-01-22 10:58:53.922742: step: 546/463, loss: 0.07616828382015228 2023-01-22 10:58:54.568140: step: 548/463, loss: 0.10570330172777176 2023-01-22 10:58:55.299892: step: 550/463, loss: 0.06280627101659775 2023-01-22 10:58:55.911081: step: 552/463, loss: 5.030261993408203 2023-01-22 10:58:56.543616: step: 554/463, loss: 0.03418249636888504 2023-01-22 10:58:57.142506: step: 556/463, loss: 0.34527361392974854 2023-01-22 10:58:57.785007: step: 558/463, loss: 0.19497722387313843 2023-01-22 10:58:58.432149: step: 560/463, loss: 0.05754159763455391 2023-01-22 10:58:59.083279: step: 562/463, loss: 0.07024494558572769 2023-01-22 10:58:59.710297: step: 564/463, loss: 0.19119054079055786 2023-01-22 10:59:00.300848: step: 566/463, loss: 0.07003539800643921 2023-01-22 10:59:00.957703: step: 568/463, loss: 0.07729244977235794 2023-01-22 10:59:01.635009: step: 570/463, loss: 0.047074176371097565 2023-01-22 10:59:02.226383: step: 572/463, loss: 0.06070127338171005 2023-01-22 10:59:02.811953: step: 574/463, loss: 0.04916515201330185 2023-01-22 10:59:03.426387: step: 576/463, loss: 0.1154446229338646 2023-01-22 10:59:04.089666: step: 578/463, loss: 0.33982861042022705 2023-01-22 10:59:04.706789: step: 580/463, loss: 0.3637949526309967 2023-01-22 10:59:05.305360: step: 582/463, loss: 0.09657775610685349 2023-01-22 10:59:05.935296: step: 584/463, loss: 0.11097947508096695 2023-01-22 10:59:06.510935: step: 586/463, loss: 0.1281764805316925 2023-01-22 10:59:07.143925: step: 588/463, loss: 0.40551435947418213 2023-01-22 10:59:07.762456: step: 590/463, loss: 0.03508146479725838 2023-01-22 10:59:08.438651: step: 592/463, loss: 0.14648066461086273 2023-01-22 10:59:09.051451: step: 594/463, loss: 0.06095085293054581 2023-01-22 10:59:09.609034: step: 596/463, loss: 0.08103873580694199 2023-01-22 10:59:10.222444: step: 598/463, loss: 0.2542259693145752 2023-01-22 10:59:10.837569: step: 600/463, loss: 0.04838281124830246 2023-01-22 10:59:11.414979: step: 602/463, loss: 0.11283738166093826 2023-01-22 10:59:12.031483: step: 604/463, loss: 0.07624384015798569 2023-01-22 10:59:12.646059: step: 606/463, loss: 0.12532417476177216 2023-01-22 10:59:13.241868: step: 608/463, loss: 0.9456416368484497 2023-01-22 10:59:13.892387: step: 610/463, loss: 0.08015812933444977 2023-01-22 10:59:14.478360: step: 612/463, loss: 0.06637389212846756 2023-01-22 10:59:15.144326: step: 614/463, loss: 0.5557191371917725 2023-01-22 10:59:15.755745: step: 616/463, loss: 0.2358798235654831 2023-01-22 10:59:16.358843: step: 618/463, loss: 0.0686626061797142 2023-01-22 10:59:16.972706: step: 620/463, loss: 0.05433014780282974 2023-01-22 10:59:17.636880: step: 622/463, loss: 0.10262155532836914 2023-01-22 10:59:18.278437: step: 624/463, loss: 0.17306746542453766 2023-01-22 10:59:18.892108: step: 626/463, loss: 0.14202268421649933 2023-01-22 10:59:19.486556: step: 628/463, loss: 0.02132358029484749 2023-01-22 10:59:20.089849: step: 630/463, loss: 0.11165513098239899 2023-01-22 10:59:20.666986: step: 632/463, loss: 0.05399324744939804 2023-01-22 10:59:21.291177: step: 634/463, loss: 0.13710065186023712 2023-01-22 10:59:21.909239: step: 636/463, loss: 0.3590250611305237 2023-01-22 10:59:22.455869: step: 638/463, loss: 0.016618745401501656 2023-01-22 10:59:23.097737: step: 640/463, loss: 0.25671032071113586 2023-01-22 10:59:23.711090: step: 642/463, loss: 0.12181492894887924 2023-01-22 10:59:24.319970: step: 644/463, loss: 0.028102317824959755 2023-01-22 10:59:24.898454: step: 646/463, loss: 0.22035276889801025 2023-01-22 10:59:25.575010: step: 648/463, loss: 0.07939334958791733 2023-01-22 10:59:26.273762: step: 650/463, loss: 0.1954226940870285 2023-01-22 10:59:26.910956: step: 652/463, loss: 0.08822217583656311 2023-01-22 10:59:27.561762: step: 654/463, loss: 0.0699228048324585 2023-01-22 10:59:28.169162: step: 656/463, loss: 0.1583009511232376 2023-01-22 10:59:28.913633: step: 658/463, loss: 0.2637269198894501 2023-01-22 10:59:29.587672: step: 660/463, loss: 0.1944698989391327 2023-01-22 10:59:30.207954: step: 662/463, loss: 0.161536306142807 2023-01-22 10:59:30.850960: step: 664/463, loss: 0.09331867098808289 2023-01-22 10:59:31.466727: step: 666/463, loss: 0.014847720973193645 2023-01-22 10:59:32.127492: step: 668/463, loss: 0.12522071599960327 2023-01-22 10:59:32.896968: step: 670/463, loss: 0.06093982979655266 2023-01-22 10:59:33.567725: step: 672/463, loss: 0.06558510661125183 2023-01-22 10:59:34.186430: step: 674/463, loss: 0.05113206058740616 2023-01-22 10:59:34.905032: step: 676/463, loss: 0.20429395139217377 2023-01-22 10:59:35.629316: step: 678/463, loss: 0.9285648465156555 2023-01-22 10:59:36.318273: step: 680/463, loss: 0.03893982246518135 2023-01-22 10:59:36.998554: step: 682/463, loss: 0.07223246246576309 2023-01-22 10:59:37.694075: step: 684/463, loss: 0.0838775485754013 2023-01-22 10:59:38.344393: step: 686/463, loss: 0.05167365074157715 2023-01-22 10:59:38.873827: step: 688/463, loss: 0.11468897759914398 2023-01-22 10:59:39.563496: step: 690/463, loss: 0.0827876478433609 2023-01-22 10:59:40.254155: step: 692/463, loss: 0.04692939668893814 2023-01-22 10:59:40.812021: step: 694/463, loss: 0.09748489409685135 2023-01-22 10:59:41.590959: step: 696/463, loss: 0.1157601922750473 2023-01-22 10:59:42.232308: step: 698/463, loss: 0.058855295181274414 2023-01-22 10:59:42.855557: step: 700/463, loss: 0.041207291185855865 2023-01-22 10:59:43.454742: step: 702/463, loss: 0.43969887495040894 2023-01-22 10:59:44.106667: step: 704/463, loss: 0.0555635541677475 2023-01-22 10:59:44.744730: step: 706/463, loss: 0.18539059162139893 2023-01-22 10:59:45.344341: step: 708/463, loss: 0.058708589524030685 2023-01-22 10:59:46.033606: step: 710/463, loss: 0.09798247367143631 2023-01-22 10:59:46.683902: step: 712/463, loss: 0.11035928875207901 2023-01-22 10:59:47.302300: step: 714/463, loss: 0.09508532285690308 2023-01-22 10:59:47.870216: step: 716/463, loss: 0.14314141869544983 2023-01-22 10:59:48.491643: step: 718/463, loss: 0.09973669797182083 2023-01-22 10:59:49.113498: step: 720/463, loss: 0.29856204986572266 2023-01-22 10:59:49.697466: step: 722/463, loss: 0.4054211676120758 2023-01-22 10:59:50.356456: step: 724/463, loss: 0.0859195664525032 2023-01-22 10:59:50.953415: step: 726/463, loss: 0.09558829665184021 2023-01-22 10:59:51.580825: step: 728/463, loss: 0.20391599833965302 2023-01-22 10:59:52.130810: step: 730/463, loss: 0.0953231006860733 2023-01-22 10:59:52.789265: step: 732/463, loss: 0.07102783024311066 2023-01-22 10:59:53.421497: step: 734/463, loss: 0.0794430747628212 2023-01-22 10:59:54.031639: step: 736/463, loss: 0.07178302854299545 2023-01-22 10:59:54.628009: step: 738/463, loss: 0.07235711812973022 2023-01-22 10:59:55.209133: step: 740/463, loss: 0.12912996113300323 2023-01-22 10:59:55.881658: step: 742/463, loss: 0.06768649816513062 2023-01-22 10:59:56.489043: step: 744/463, loss: 0.029639367014169693 2023-01-22 10:59:57.105238: step: 746/463, loss: 0.1089198887348175 2023-01-22 10:59:57.736499: step: 748/463, loss: 0.2974899709224701 2023-01-22 10:59:58.397961: step: 750/463, loss: 0.111813023686409 2023-01-22 10:59:59.061805: step: 752/463, loss: 0.2780773937702179 2023-01-22 10:59:59.662186: step: 754/463, loss: 0.0642269179224968 2023-01-22 11:00:00.277831: step: 756/463, loss: 0.08696962147951126 2023-01-22 11:00:00.921306: step: 758/463, loss: 0.05171702057123184 2023-01-22 11:00:01.542121: step: 760/463, loss: 0.15012021362781525 2023-01-22 11:00:02.171897: step: 762/463, loss: 0.022429676726460457 2023-01-22 11:00:02.815105: step: 764/463, loss: 0.051252152770757675 2023-01-22 11:00:03.381409: step: 766/463, loss: 0.14745859801769257 2023-01-22 11:00:04.017259: step: 768/463, loss: 0.1641344130039215 2023-01-22 11:00:04.622471: step: 770/463, loss: 0.040366142988204956 2023-01-22 11:00:05.245788: step: 772/463, loss: 0.11375197023153305 2023-01-22 11:00:05.865626: step: 774/463, loss: 0.19613416492938995 2023-01-22 11:00:06.440414: step: 776/463, loss: 0.1344902366399765 2023-01-22 11:00:07.046607: step: 778/463, loss: 0.32456353306770325 2023-01-22 11:00:07.714747: step: 780/463, loss: 0.04453514516353607 2023-01-22 11:00:08.325704: step: 782/463, loss: 0.234833762049675 2023-01-22 11:00:08.932892: step: 784/463, loss: 0.03816795349121094 2023-01-22 11:00:09.649257: step: 786/463, loss: 0.16284196078777313 2023-01-22 11:00:10.260664: step: 788/463, loss: 0.07719583809375763 2023-01-22 11:00:10.905406: step: 790/463, loss: 0.14916366338729858 2023-01-22 11:00:11.542024: step: 792/463, loss: 0.2743769884109497 2023-01-22 11:00:12.144525: step: 794/463, loss: 0.2819214165210724 2023-01-22 11:00:12.780956: step: 796/463, loss: 0.13958901166915894 2023-01-22 11:00:13.467990: step: 798/463, loss: 0.024825511500239372 2023-01-22 11:00:14.116902: step: 800/463, loss: 0.11520198732614517 2023-01-22 11:00:14.731762: step: 802/463, loss: 0.5912827849388123 2023-01-22 11:00:15.376911: step: 804/463, loss: 0.11832356452941895 2023-01-22 11:00:15.957898: step: 806/463, loss: 1.5584622621536255 2023-01-22 11:00:16.606460: step: 808/463, loss: 0.11311966180801392 2023-01-22 11:00:17.269938: step: 810/463, loss: 0.1519646793603897 2023-01-22 11:00:17.951979: step: 812/463, loss: 0.1936693787574768 2023-01-22 11:00:18.633939: step: 814/463, loss: 0.0492548905313015 2023-01-22 11:00:19.320852: step: 816/463, loss: 0.45868703722953796 2023-01-22 11:00:20.017431: step: 818/463, loss: 0.08923227339982986 2023-01-22 11:00:20.659094: step: 820/463, loss: 0.08991425484418869 2023-01-22 11:00:21.270910: step: 822/463, loss: 0.06471320986747742 2023-01-22 11:00:21.803295: step: 824/463, loss: 0.052482862025499344 2023-01-22 11:00:22.406682: step: 826/463, loss: 0.19096697866916656 2023-01-22 11:00:22.968548: step: 828/463, loss: 0.15873336791992188 2023-01-22 11:00:23.588652: step: 830/463, loss: 0.16968882083892822 2023-01-22 11:00:24.196394: step: 832/463, loss: 0.11979441344738007 2023-01-22 11:00:24.825965: step: 834/463, loss: 0.316781610250473 2023-01-22 11:00:25.445172: step: 836/463, loss: 0.11227911710739136 2023-01-22 11:00:26.072896: step: 838/463, loss: 0.07925921678543091 2023-01-22 11:00:26.730200: step: 840/463, loss: 0.1379840224981308 2023-01-22 11:00:27.320263: step: 842/463, loss: 0.07753778249025345 2023-01-22 11:00:27.911316: step: 844/463, loss: 0.5877388715744019 2023-01-22 11:00:28.551419: step: 846/463, loss: 0.21845382452011108 2023-01-22 11:00:29.185466: step: 848/463, loss: 0.10501208901405334 2023-01-22 11:00:29.841221: step: 850/463, loss: 0.05404500290751457 2023-01-22 11:00:30.464859: step: 852/463, loss: 0.20136189460754395 2023-01-22 11:00:31.155494: step: 854/463, loss: 0.09578698128461838 2023-01-22 11:00:31.770224: step: 856/463, loss: 0.2713068723678589 2023-01-22 11:00:32.373859: step: 858/463, loss: 0.16097477078437805 2023-01-22 11:00:32.992698: step: 860/463, loss: 0.21929654479026794 2023-01-22 11:00:33.567863: step: 862/463, loss: 0.10486377775669098 2023-01-22 11:00:34.180227: step: 864/463, loss: 0.11905966699123383 2023-01-22 11:00:34.768077: step: 866/463, loss: 0.04707135632634163 2023-01-22 11:00:35.415815: step: 868/463, loss: 0.15588407218456268 2023-01-22 11:00:36.091031: step: 870/463, loss: 0.07511145621538162 2023-01-22 11:00:36.673585: step: 872/463, loss: 0.021590089425444603 2023-01-22 11:00:37.309294: step: 874/463, loss: 0.04562314599752426 2023-01-22 11:00:37.869585: step: 876/463, loss: 0.9890525341033936 2023-01-22 11:00:38.489844: step: 878/463, loss: 0.06867709010839462 2023-01-22 11:00:39.095495: step: 880/463, loss: 0.04112846776843071 2023-01-22 11:00:39.699657: step: 882/463, loss: 0.2877635955810547 2023-01-22 11:00:40.298298: step: 884/463, loss: 0.0786876529455185 2023-01-22 11:00:40.863247: step: 886/463, loss: 0.06392823904752731 2023-01-22 11:00:41.486465: step: 888/463, loss: 0.03156006336212158 2023-01-22 11:00:42.174869: step: 890/463, loss: 0.5516294240951538 2023-01-22 11:00:42.736275: step: 892/463, loss: 0.11540041118860245 2023-01-22 11:00:43.478234: step: 894/463, loss: 0.21750399470329285 2023-01-22 11:00:44.100687: step: 896/463, loss: 0.37393686175346375 2023-01-22 11:00:44.718522: step: 898/463, loss: 0.17360295355319977 2023-01-22 11:00:45.263231: step: 900/463, loss: 0.16414818167686462 2023-01-22 11:00:45.815390: step: 902/463, loss: 0.10427167266607285 2023-01-22 11:00:46.464864: step: 904/463, loss: 0.116034135222435 2023-01-22 11:00:47.090664: step: 906/463, loss: 0.3298241198062897 2023-01-22 11:00:47.704597: step: 908/463, loss: 0.1641688346862793 2023-01-22 11:00:48.300429: step: 910/463, loss: 0.0668342337012291 2023-01-22 11:00:48.871778: step: 912/463, loss: 0.052535999566316605 2023-01-22 11:00:49.422394: step: 914/463, loss: 0.07598011195659637 2023-01-22 11:00:50.001192: step: 916/463, loss: 0.05164685100317001 2023-01-22 11:00:50.588694: step: 918/463, loss: 0.0815548375248909 2023-01-22 11:00:51.181777: step: 920/463, loss: 0.20160195231437683 2023-01-22 11:00:51.800744: step: 922/463, loss: 0.07057924568653107 2023-01-22 11:00:52.415225: step: 924/463, loss: 0.10972698777914047 2023-01-22 11:00:53.021057: step: 926/463, loss: 0.046164121478796005 ================================================== Loss: 0.190 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.307815143824027, 'r': 0.3451968690702087, 'f1': 0.3254360465116279}, 'combined': 0.23979498164014684, 'epoch': 15} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3428013097703563, 'r': 0.3765143062275513, 'f1': 0.35886777501145495}, 'combined': 0.2781654524012235, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27384963768115944, 'r': 0.35855075901328276, 'f1': 0.31052793755135577}, 'combined': 0.2288100592483674, 'epoch': 15} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3231743107138612, 'r': 0.3816902474883379, 'f1': 0.3500033622143377}, 'combined': 0.2712944721469986, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27448744745979536, 'r': 0.35001055918972007, 'f1': 0.3076823431075605}, 'combined': 0.22671330544767612, 'epoch': 15} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3279096426751969, 'r': 0.36919973554882, 'f1': 0.3473318739966417}, 'combined': 0.2692237492222773, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2632978723404255, 'r': 0.3535714285714286, 'f1': 0.3018292682926829}, 'combined': 0.20121951219512194, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.41304347826086957, 'f1': 0.31147540983606553}, 'combined': 0.15573770491803277, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.425, 'r': 0.29310344827586204, 'f1': 0.3469387755102041}, 'combined': 0.2312925170068027, 'epoch': 15} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29855383301254707, 'r': 0.34444161379815674, 'f1': 0.3198603180116804}, 'combined': 0.23568655011386977, 'epoch': 7} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.30988948788864984, 'r': 0.3867922100668993, 'f1': 0.34409642236532006}, 'combined': 0.2667158871922577, 'epoch': 7} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2636363636363636, 'r': 0.4142857142857143, 'f1': 0.3222222222222222}, 'combined': 0.2148148148148148, 'epoch': 7} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27448744745979536, 'r': 0.35001055918972007, 'f1': 0.3076823431075605}, 'combined': 0.22671330544767612, 'epoch': 15} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3279096426751969, 'r': 0.36919973554882, 'f1': 0.3473318739966417}, 'combined': 0.2692237492222773, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.425, 'r': 0.29310344827586204, 'f1': 0.3469387755102041}, 'combined': 0.2312925170068027, 'epoch': 15} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:03:38.176399: step: 2/463, loss: 0.09392962604761124 2023-01-22 11:03:38.838039: step: 4/463, loss: 0.09815418720245361 2023-01-22 11:03:39.521572: step: 6/463, loss: 0.035261865705251694 2023-01-22 11:03:40.208079: step: 8/463, loss: 0.1329983025789261 2023-01-22 11:03:40.804640: step: 10/463, loss: 0.03475189581513405 2023-01-22 11:03:41.460464: step: 12/463, loss: 0.1617887318134308 2023-01-22 11:03:42.111659: step: 14/463, loss: 0.07033581286668777 2023-01-22 11:03:42.803554: step: 16/463, loss: 0.05658769607543945 2023-01-22 11:03:43.418971: step: 18/463, loss: 0.1192563846707344 2023-01-22 11:03:43.964748: step: 20/463, loss: 0.12088558822870255 2023-01-22 11:03:44.566413: step: 22/463, loss: 0.11392351984977722 2023-01-22 11:03:45.225623: step: 24/463, loss: 0.47715991735458374 2023-01-22 11:03:45.805682: step: 26/463, loss: 0.057405758649110794 2023-01-22 11:03:46.373028: step: 28/463, loss: 0.0925820916891098 2023-01-22 11:03:47.075303: step: 30/463, loss: 0.01833358220756054 2023-01-22 11:03:47.765686: step: 32/463, loss: 0.1886693388223648 2023-01-22 11:03:48.387442: step: 34/463, loss: 0.1294589638710022 2023-01-22 11:03:48.930558: step: 36/463, loss: 0.034382808953523636 2023-01-22 11:03:49.572301: step: 38/463, loss: 0.2707841992378235 2023-01-22 11:03:50.214265: step: 40/463, loss: 0.042352087795734406 2023-01-22 11:03:50.876183: step: 42/463, loss: 0.1567608267068863 2023-01-22 11:03:51.630601: step: 44/463, loss: 0.041665706783533096 2023-01-22 11:03:52.229381: step: 46/463, loss: 0.5418013334274292 2023-01-22 11:03:52.833096: step: 48/463, loss: 0.03634791821241379 2023-01-22 11:03:53.420828: step: 50/463, loss: 0.041979484260082245 2023-01-22 11:03:53.992156: step: 52/463, loss: 0.08751584589481354 2023-01-22 11:03:54.632250: step: 54/463, loss: 0.07277601212263107 2023-01-22 11:03:55.245191: step: 56/463, loss: 0.018951643258333206 2023-01-22 11:03:55.851448: step: 58/463, loss: 0.029824329540133476 2023-01-22 11:03:56.481313: step: 60/463, loss: 0.103655606508255 2023-01-22 11:03:57.068942: step: 62/463, loss: 0.1494414210319519 2023-01-22 11:03:57.705342: step: 64/463, loss: 0.19174297153949738 2023-01-22 11:03:58.270366: step: 66/463, loss: 0.01808498427271843 2023-01-22 11:03:58.894933: step: 68/463, loss: 0.2951965928077698 2023-01-22 11:03:59.523247: step: 70/463, loss: 1.166751742362976 2023-01-22 11:04:00.140599: step: 72/463, loss: 0.10040746629238129 2023-01-22 11:04:00.702042: step: 74/463, loss: 0.18768052756786346 2023-01-22 11:04:01.334699: step: 76/463, loss: 0.11471167951822281 2023-01-22 11:04:01.946936: step: 78/463, loss: 0.1377413272857666 2023-01-22 11:04:02.600484: step: 80/463, loss: 0.04321456700563431 2023-01-22 11:04:03.230761: step: 82/463, loss: 0.02770810015499592 2023-01-22 11:04:03.890088: step: 84/463, loss: 0.06217678263783455 2023-01-22 11:04:04.476984: step: 86/463, loss: 0.048308681696653366 2023-01-22 11:04:05.100967: step: 88/463, loss: 0.2211398482322693 2023-01-22 11:04:05.729444: step: 90/463, loss: 0.058094583451747894 2023-01-22 11:04:06.378345: step: 92/463, loss: 4.702839374542236 2023-01-22 11:04:07.010222: step: 94/463, loss: 0.16564664244651794 2023-01-22 11:04:07.570693: step: 96/463, loss: 0.07649922370910645 2023-01-22 11:04:08.177033: step: 98/463, loss: 0.18419580161571503 2023-01-22 11:04:08.758974: step: 100/463, loss: 0.009880303405225277 2023-01-22 11:04:09.382216: step: 102/463, loss: 0.05654880404472351 2023-01-22 11:04:10.027858: step: 104/463, loss: 0.08590541034936905 2023-01-22 11:04:10.704029: step: 106/463, loss: 0.015450265258550644 2023-01-22 11:04:11.333842: step: 108/463, loss: 0.09660845249891281 2023-01-22 11:04:12.009808: step: 110/463, loss: 0.07592320442199707 2023-01-22 11:04:12.596533: step: 112/463, loss: 0.13545340299606323 2023-01-22 11:04:13.195947: step: 114/463, loss: 0.10005438327789307 2023-01-22 11:04:13.842656: step: 116/463, loss: 0.027361372485756874 2023-01-22 11:04:14.489148: step: 118/463, loss: 0.06829187273979187 2023-01-22 11:04:15.113751: step: 120/463, loss: 0.02588752657175064 2023-01-22 11:04:15.758847: step: 122/463, loss: 0.23005206882953644 2023-01-22 11:04:16.412417: step: 124/463, loss: 0.14744412899017334 2023-01-22 11:04:17.090481: step: 126/463, loss: 0.06101863831281662 2023-01-22 11:04:17.735081: step: 128/463, loss: 0.04215003550052643 2023-01-22 11:04:18.321050: step: 130/463, loss: 0.10857198387384415 2023-01-22 11:04:18.870020: step: 132/463, loss: 0.01935398392379284 2023-01-22 11:04:19.443361: step: 134/463, loss: 0.03415912762284279 2023-01-22 11:04:20.002916: step: 136/463, loss: 0.09079161286354065 2023-01-22 11:04:20.647381: step: 138/463, loss: 0.04729504883289337 2023-01-22 11:04:21.338167: step: 140/463, loss: 0.20324020087718964 2023-01-22 11:04:22.030264: step: 142/463, loss: 0.1295374631881714 2023-01-22 11:04:22.640952: step: 144/463, loss: 0.08116629719734192 2023-01-22 11:04:23.235335: step: 146/463, loss: 0.10156693309545517 2023-01-22 11:04:23.837977: step: 148/463, loss: 0.07977838814258575 2023-01-22 11:04:24.432107: step: 150/463, loss: 0.09160180389881134 2023-01-22 11:04:25.038461: step: 152/463, loss: 0.0389992855489254 2023-01-22 11:04:25.684888: step: 154/463, loss: 0.16848435997962952 2023-01-22 11:04:26.330010: step: 156/463, loss: 0.04797053709626198 2023-01-22 11:04:26.987720: step: 158/463, loss: 0.15641289949417114 2023-01-22 11:04:27.593239: step: 160/463, loss: 0.03032768703997135 2023-01-22 11:04:28.217396: step: 162/463, loss: 0.17668519914150238 2023-01-22 11:04:28.878755: step: 164/463, loss: 0.16846053302288055 2023-01-22 11:04:29.511616: step: 166/463, loss: 0.1726134717464447 2023-01-22 11:04:30.115095: step: 168/463, loss: 0.04289911314845085 2023-01-22 11:04:30.639903: step: 170/463, loss: 0.02088414877653122 2023-01-22 11:04:31.221594: step: 172/463, loss: 0.11820492148399353 2023-01-22 11:04:31.852528: step: 174/463, loss: 0.023546187207102776 2023-01-22 11:04:32.478852: step: 176/463, loss: 0.03653356060385704 2023-01-22 11:04:33.086152: step: 178/463, loss: 0.05195843055844307 2023-01-22 11:04:33.679612: step: 180/463, loss: 0.1093713566660881 2023-01-22 11:04:34.271098: step: 182/463, loss: 0.09825106710195541 2023-01-22 11:04:34.920976: step: 184/463, loss: 0.13142557442188263 2023-01-22 11:04:35.575607: step: 186/463, loss: 0.05736227706074715 2023-01-22 11:04:36.151221: step: 188/463, loss: 0.06639780849218369 2023-01-22 11:04:36.817859: step: 190/463, loss: 0.03851672634482384 2023-01-22 11:04:37.406126: step: 192/463, loss: 0.038353074342012405 2023-01-22 11:04:38.001654: step: 194/463, loss: 0.0741320252418518 2023-01-22 11:04:38.586502: step: 196/463, loss: 0.0817531943321228 2023-01-22 11:04:39.148279: step: 198/463, loss: 0.10499387979507446 2023-01-22 11:04:39.750975: step: 200/463, loss: 0.010208223946392536 2023-01-22 11:04:40.382789: step: 202/463, loss: 0.16373755037784576 2023-01-22 11:04:40.988515: step: 204/463, loss: 0.08796074241399765 2023-01-22 11:04:41.651064: step: 206/463, loss: 0.061089225113391876 2023-01-22 11:04:42.228851: step: 208/463, loss: 0.1279318928718567 2023-01-22 11:04:42.829895: step: 210/463, loss: 0.04245173931121826 2023-01-22 11:04:43.445945: step: 212/463, loss: 0.05689255893230438 2023-01-22 11:04:44.064321: step: 214/463, loss: 0.2360590696334839 2023-01-22 11:04:44.630029: step: 216/463, loss: 0.8640976548194885 2023-01-22 11:04:45.249488: step: 218/463, loss: 0.7225779891014099 2023-01-22 11:04:45.887893: step: 220/463, loss: 0.2748264968395233 2023-01-22 11:04:46.530504: step: 222/463, loss: 0.0847368836402893 2023-01-22 11:04:47.154036: step: 224/463, loss: 0.06542827188968658 2023-01-22 11:04:47.767142: step: 226/463, loss: 0.09589657187461853 2023-01-22 11:04:48.380307: step: 228/463, loss: 0.16839241981506348 2023-01-22 11:04:48.980604: step: 230/463, loss: 0.09850162267684937 2023-01-22 11:04:49.707122: step: 232/463, loss: 0.10379438102245331 2023-01-22 11:04:50.265403: step: 234/463, loss: 0.04850511625409126 2023-01-22 11:04:50.921990: step: 236/463, loss: 0.0336916521191597 2023-01-22 11:04:51.488001: step: 238/463, loss: 0.09501839429140091 2023-01-22 11:04:52.139336: step: 240/463, loss: 0.16434313356876373 2023-01-22 11:04:52.718919: step: 242/463, loss: 0.0782540887594223 2023-01-22 11:04:53.322941: step: 244/463, loss: 0.11280696839094162 2023-01-22 11:04:53.946658: step: 246/463, loss: 0.08513292670249939 2023-01-22 11:04:54.543138: step: 248/463, loss: 0.05924632027745247 2023-01-22 11:04:55.161691: step: 250/463, loss: 0.07422038167715073 2023-01-22 11:04:55.787393: step: 252/463, loss: 1.486720323562622 2023-01-22 11:04:56.458813: step: 254/463, loss: 0.07059735059738159 2023-01-22 11:04:57.109593: step: 256/463, loss: 0.10897721350193024 2023-01-22 11:04:57.738540: step: 258/463, loss: 0.13812844455242157 2023-01-22 11:04:58.430459: step: 260/463, loss: 0.18931354582309723 2023-01-22 11:04:59.026513: step: 262/463, loss: 0.04782324656844139 2023-01-22 11:04:59.620489: step: 264/463, loss: 0.06735178083181381 2023-01-22 11:05:00.297803: step: 266/463, loss: 0.17109337449073792 2023-01-22 11:05:00.878450: step: 268/463, loss: 0.06339196860790253 2023-01-22 11:05:01.526742: step: 270/463, loss: 0.06292379647493362 2023-01-22 11:05:02.159784: step: 272/463, loss: 0.18615616858005524 2023-01-22 11:05:02.779382: step: 274/463, loss: 0.02608204260468483 2023-01-22 11:05:03.398705: step: 276/463, loss: 0.07891713827848434 2023-01-22 11:05:03.934814: step: 278/463, loss: 0.5470752120018005 2023-01-22 11:05:04.517487: step: 280/463, loss: 0.0880608856678009 2023-01-22 11:05:05.089678: step: 282/463, loss: 0.11302398890256882 2023-01-22 11:05:05.775822: step: 284/463, loss: 0.7214975357055664 2023-01-22 11:05:06.441666: step: 286/463, loss: 0.19971822202205658 2023-01-22 11:05:07.027098: step: 288/463, loss: 0.016864435747265816 2023-01-22 11:05:07.645043: step: 290/463, loss: 0.05873178690671921 2023-01-22 11:05:08.259159: step: 292/463, loss: 0.0906856432557106 2023-01-22 11:05:08.915257: step: 294/463, loss: 0.07318469882011414 2023-01-22 11:05:09.508135: step: 296/463, loss: 0.007730225101113319 2023-01-22 11:05:10.093308: step: 298/463, loss: 0.02793276682496071 2023-01-22 11:05:10.725116: step: 300/463, loss: 0.08700327575206757 2023-01-22 11:05:11.345937: step: 302/463, loss: 0.2188662439584732 2023-01-22 11:05:11.973351: step: 304/463, loss: 0.14005441963672638 2023-01-22 11:05:12.617703: step: 306/463, loss: 0.049777571111917496 2023-01-22 11:05:13.243550: step: 308/463, loss: 0.0704931691288948 2023-01-22 11:05:13.812700: step: 310/463, loss: 0.03475925326347351 2023-01-22 11:05:14.346637: step: 312/463, loss: 0.4202437996864319 2023-01-22 11:05:14.934501: step: 314/463, loss: 0.11502720415592194 2023-01-22 11:05:15.537790: step: 316/463, loss: 0.06981195509433746 2023-01-22 11:05:16.157397: step: 318/463, loss: 0.38931483030319214 2023-01-22 11:05:16.737699: step: 320/463, loss: 0.04184393957257271 2023-01-22 11:05:17.368455: step: 322/463, loss: 0.022175397723913193 2023-01-22 11:05:17.933936: step: 324/463, loss: 0.08440104871988297 2023-01-22 11:05:18.553718: step: 326/463, loss: 0.07439716160297394 2023-01-22 11:05:19.117270: step: 328/463, loss: 0.6232091784477234 2023-01-22 11:05:19.739315: step: 330/463, loss: 0.12134966999292374 2023-01-22 11:05:20.366711: step: 332/463, loss: 0.20825999975204468 2023-01-22 11:05:20.982926: step: 334/463, loss: 0.020665599033236504 2023-01-22 11:05:21.568519: step: 336/463, loss: 0.038438279181718826 2023-01-22 11:05:22.194995: step: 338/463, loss: 0.03745255991816521 2023-01-22 11:05:22.832342: step: 340/463, loss: 0.7567254304885864 2023-01-22 11:05:23.472803: step: 342/463, loss: 0.0475521981716156 2023-01-22 11:05:24.117565: step: 344/463, loss: 0.12182119488716125 2023-01-22 11:05:24.744104: step: 346/463, loss: 0.27585577964782715 2023-01-22 11:05:25.354507: step: 348/463, loss: 0.08076296001672745 2023-01-22 11:05:26.010113: step: 350/463, loss: 0.05759488791227341 2023-01-22 11:05:26.580604: step: 352/463, loss: 0.07504604756832123 2023-01-22 11:05:27.206257: step: 354/463, loss: 0.3895440399646759 2023-01-22 11:05:27.801765: step: 356/463, loss: 0.04366806149482727 2023-01-22 11:05:28.381188: step: 358/463, loss: 0.13325652480125427 2023-01-22 11:05:28.985589: step: 360/463, loss: 0.7767708897590637 2023-01-22 11:05:29.540598: step: 362/463, loss: 0.045997437089681625 2023-01-22 11:05:30.124036: step: 364/463, loss: 0.09603548049926758 2023-01-22 11:05:30.714904: step: 366/463, loss: 0.08423476666212082 2023-01-22 11:05:31.291316: step: 368/463, loss: 0.12476283311843872 2023-01-22 11:05:31.855859: step: 370/463, loss: 0.1248638927936554 2023-01-22 11:05:32.551089: step: 372/463, loss: 0.9874676465988159 2023-01-22 11:05:33.160539: step: 374/463, loss: 0.9051044583320618 2023-01-22 11:05:33.812271: step: 376/463, loss: 0.02435396797955036 2023-01-22 11:05:34.457601: step: 378/463, loss: 0.054532065987586975 2023-01-22 11:05:35.052544: step: 380/463, loss: 1.1708176136016846 2023-01-22 11:05:35.623797: step: 382/463, loss: 0.06749589741230011 2023-01-22 11:05:36.365771: step: 384/463, loss: 0.07421965897083282 2023-01-22 11:05:37.030933: step: 386/463, loss: 0.06768511235713959 2023-01-22 11:05:37.592837: step: 388/463, loss: 0.05945784971117973 2023-01-22 11:05:38.201358: step: 390/463, loss: 0.08609744161367416 2023-01-22 11:05:38.803553: step: 392/463, loss: 0.06459945440292358 2023-01-22 11:05:39.517844: step: 394/463, loss: 0.11390979588031769 2023-01-22 11:05:40.090749: step: 396/463, loss: 0.09988781809806824 2023-01-22 11:05:40.708985: step: 398/463, loss: 0.03948311507701874 2023-01-22 11:05:41.509823: step: 400/463, loss: 0.13762977719306946 2023-01-22 11:05:42.104220: step: 402/463, loss: 0.30338630080223083 2023-01-22 11:05:42.688707: step: 404/463, loss: 0.11689325422048569 2023-01-22 11:05:43.298625: step: 406/463, loss: 0.07996908575296402 2023-01-22 11:05:43.901195: step: 408/463, loss: 0.0428166538476944 2023-01-22 11:05:44.509848: step: 410/463, loss: 0.06921526789665222 2023-01-22 11:05:45.157872: step: 412/463, loss: 0.06507103890180588 2023-01-22 11:05:45.803172: step: 414/463, loss: 0.11402113735675812 2023-01-22 11:05:46.444999: step: 416/463, loss: 0.266824334859848 2023-01-22 11:05:47.086531: step: 418/463, loss: 0.07551302760839462 2023-01-22 11:05:47.659047: step: 420/463, loss: 0.0763019323348999 2023-01-22 11:05:48.235534: step: 422/463, loss: 0.1426866203546524 2023-01-22 11:05:48.850586: step: 424/463, loss: 0.0955159068107605 2023-01-22 11:05:49.447852: step: 426/463, loss: 0.046397652477025986 2023-01-22 11:05:50.158893: step: 428/463, loss: 0.05934610962867737 2023-01-22 11:05:50.755947: step: 430/463, loss: 0.1134750247001648 2023-01-22 11:05:51.347205: step: 432/463, loss: 0.585870087146759 2023-01-22 11:05:51.958837: step: 434/463, loss: 0.028768330812454224 2023-01-22 11:05:52.563544: step: 436/463, loss: 0.21743199229240417 2023-01-22 11:05:53.136775: step: 438/463, loss: 0.11486555635929108 2023-01-22 11:05:53.830698: step: 440/463, loss: 0.12417805939912796 2023-01-22 11:05:54.424747: step: 442/463, loss: 0.11379312723875046 2023-01-22 11:05:55.062035: step: 444/463, loss: 0.24372287094593048 2023-01-22 11:05:55.747388: step: 446/463, loss: 0.010859759524464607 2023-01-22 11:05:56.411106: step: 448/463, loss: 0.2165071666240692 2023-01-22 11:05:56.967147: step: 450/463, loss: 0.04450880363583565 2023-01-22 11:05:57.590541: step: 452/463, loss: 0.14124315977096558 2023-01-22 11:05:58.199697: step: 454/463, loss: 0.14413774013519287 2023-01-22 11:05:58.848156: step: 456/463, loss: 0.10476325452327728 2023-01-22 11:05:59.536302: step: 458/463, loss: 0.030846182256937027 2023-01-22 11:06:00.151380: step: 460/463, loss: 0.13036273419857025 2023-01-22 11:06:00.800496: step: 462/463, loss: 0.17384564876556396 2023-01-22 11:06:01.377364: step: 464/463, loss: 0.08751040697097778 2023-01-22 11:06:01.974127: step: 466/463, loss: 0.06706613302230835 2023-01-22 11:06:02.647570: step: 468/463, loss: 0.08410689979791641 2023-01-22 11:06:03.294350: step: 470/463, loss: 0.3431842625141144 2023-01-22 11:06:03.898470: step: 472/463, loss: 0.035335078835487366 2023-01-22 11:06:04.528337: step: 474/463, loss: 0.11014202237129211 2023-01-22 11:06:05.129413: step: 476/463, loss: 0.08605427294969559 2023-01-22 11:06:05.785854: step: 478/463, loss: 0.1027742400765419 2023-01-22 11:06:06.431132: step: 480/463, loss: 0.1063171923160553 2023-01-22 11:06:07.042086: step: 482/463, loss: 0.003478048136457801 2023-01-22 11:06:07.691128: step: 484/463, loss: 0.1457296758890152 2023-01-22 11:06:08.315683: step: 486/463, loss: 0.1212291270494461 2023-01-22 11:06:08.917187: step: 488/463, loss: 0.04395725578069687 2023-01-22 11:06:09.538123: step: 490/463, loss: 0.0438603051006794 2023-01-22 11:06:10.149900: step: 492/463, loss: 0.06670898199081421 2023-01-22 11:06:10.804356: step: 494/463, loss: 0.06832463294267654 2023-01-22 11:06:11.475968: step: 496/463, loss: 0.10354993492364883 2023-01-22 11:06:12.111469: step: 498/463, loss: 0.1462126076221466 2023-01-22 11:06:12.767305: step: 500/463, loss: 0.49212706089019775 2023-01-22 11:06:13.297322: step: 502/463, loss: 0.20970354974269867 2023-01-22 11:06:14.015732: step: 504/463, loss: 0.062238797545433044 2023-01-22 11:06:14.635949: step: 506/463, loss: 0.3281267285346985 2023-01-22 11:06:15.318731: step: 508/463, loss: 0.08876488357782364 2023-01-22 11:06:15.899326: step: 510/463, loss: 0.0857429951429367 2023-01-22 11:06:16.503154: step: 512/463, loss: 0.027456985786557198 2023-01-22 11:06:17.132203: step: 514/463, loss: 0.8833217620849609 2023-01-22 11:06:17.867370: step: 516/463, loss: 0.11567305773496628 2023-01-22 11:06:18.492934: step: 518/463, loss: 0.3653855323791504 2023-01-22 11:06:19.113941: step: 520/463, loss: 0.08590216934680939 2023-01-22 11:06:19.718791: step: 522/463, loss: 0.06966375559568405 2023-01-22 11:06:20.324874: step: 524/463, loss: 0.12032757699489594 2023-01-22 11:06:20.980564: step: 526/463, loss: 0.1204896792769432 2023-01-22 11:06:21.617726: step: 528/463, loss: 0.16273993253707886 2023-01-22 11:06:22.202101: step: 530/463, loss: 0.0689341276884079 2023-01-22 11:06:22.759771: step: 532/463, loss: 0.08280076831579208 2023-01-22 11:06:23.368229: step: 534/463, loss: 0.06909555941820145 2023-01-22 11:06:23.946456: step: 536/463, loss: 0.08990427851676941 2023-01-22 11:06:24.529483: step: 538/463, loss: 0.120759017765522 2023-01-22 11:06:25.018429: step: 540/463, loss: 0.08502160757780075 2023-01-22 11:06:25.643803: step: 542/463, loss: 0.06618203967809677 2023-01-22 11:06:26.321220: step: 544/463, loss: 0.16904030740261078 2023-01-22 11:06:26.993621: step: 546/463, loss: 0.05971629172563553 2023-01-22 11:06:27.565097: step: 548/463, loss: 0.0433935821056366 2023-01-22 11:06:28.184309: step: 550/463, loss: 0.89015793800354 2023-01-22 11:06:28.849184: step: 552/463, loss: 2.8742613792419434 2023-01-22 11:06:29.475761: step: 554/463, loss: 0.5891845226287842 2023-01-22 11:06:30.155202: step: 556/463, loss: 1.4074040651321411 2023-01-22 11:06:30.804739: step: 558/463, loss: 0.15140140056610107 2023-01-22 11:06:31.402678: step: 560/463, loss: 0.01083358284085989 2023-01-22 11:06:32.107847: step: 562/463, loss: 0.16573470830917358 2023-01-22 11:06:32.737006: step: 564/463, loss: 0.09017710387706757 2023-01-22 11:06:33.387896: step: 566/463, loss: 0.11884922534227371 2023-01-22 11:06:33.966808: step: 568/463, loss: 0.18361549079418182 2023-01-22 11:06:34.519188: step: 570/463, loss: 0.0705447569489479 2023-01-22 11:06:35.162936: step: 572/463, loss: 0.10155633836984634 2023-01-22 11:06:35.762789: step: 574/463, loss: 0.0580846406519413 2023-01-22 11:06:36.369744: step: 576/463, loss: 0.07159799337387085 2023-01-22 11:06:37.023133: step: 578/463, loss: 0.17857329547405243 2023-01-22 11:06:37.672281: step: 580/463, loss: 0.12237653136253357 2023-01-22 11:06:38.364020: step: 582/463, loss: 0.04622486233711243 2023-01-22 11:06:38.950021: step: 584/463, loss: 0.04272199794650078 2023-01-22 11:06:39.537204: step: 586/463, loss: 0.4913056194782257 2023-01-22 11:06:40.189742: step: 588/463, loss: 0.13528907299041748 2023-01-22 11:06:40.728098: step: 590/463, loss: 0.0324455164372921 2023-01-22 11:06:41.319048: step: 592/463, loss: 0.21316123008728027 2023-01-22 11:06:41.955140: step: 594/463, loss: 0.06118357554078102 2023-01-22 11:06:42.657412: step: 596/463, loss: 0.023242700845003128 2023-01-22 11:06:43.295773: step: 598/463, loss: 0.11060090363025665 2023-01-22 11:06:43.911655: step: 600/463, loss: 0.05608632043004036 2023-01-22 11:06:44.477174: step: 602/463, loss: 0.10846617072820663 2023-01-22 11:06:45.139502: step: 604/463, loss: 0.2460298091173172 2023-01-22 11:06:45.749041: step: 606/463, loss: 0.13271866738796234 2023-01-22 11:06:46.407260: step: 608/463, loss: 0.1562288999557495 2023-01-22 11:06:46.948778: step: 610/463, loss: 0.050401438027620316 2023-01-22 11:06:47.649581: step: 612/463, loss: 0.2274414449930191 2023-01-22 11:06:48.240351: step: 614/463, loss: 0.11631855368614197 2023-01-22 11:06:48.779625: step: 616/463, loss: 0.04366886988282204 2023-01-22 11:06:49.374261: step: 618/463, loss: 0.17520736157894135 2023-01-22 11:06:50.048383: step: 620/463, loss: 0.4167800545692444 2023-01-22 11:06:50.619706: step: 622/463, loss: 0.040646426379680634 2023-01-22 11:06:51.241968: step: 624/463, loss: 0.29388168454170227 2023-01-22 11:06:51.896758: step: 626/463, loss: 0.08017006516456604 2023-01-22 11:06:52.547730: step: 628/463, loss: 0.14051766693592072 2023-01-22 11:06:53.225461: step: 630/463, loss: 0.12048596888780594 2023-01-22 11:06:53.806336: step: 632/463, loss: 0.0794791504740715 2023-01-22 11:06:54.371204: step: 634/463, loss: 0.10982103645801544 2023-01-22 11:06:55.008537: step: 636/463, loss: 0.08861525356769562 2023-01-22 11:06:55.578452: step: 638/463, loss: 0.11401385068893433 2023-01-22 11:06:56.158758: step: 640/463, loss: 0.19314491748809814 2023-01-22 11:06:56.682599: step: 642/463, loss: 0.028576646000146866 2023-01-22 11:06:57.264463: step: 644/463, loss: 0.14793406426906586 2023-01-22 11:06:57.884927: step: 646/463, loss: 0.15287043154239655 2023-01-22 11:06:58.496017: step: 648/463, loss: 0.08074107021093369 2023-01-22 11:06:59.096626: step: 650/463, loss: 0.24262771010398865 2023-01-22 11:06:59.728080: step: 652/463, loss: 0.13698968291282654 2023-01-22 11:07:00.340755: step: 654/463, loss: 0.03198055550456047 2023-01-22 11:07:01.064080: step: 656/463, loss: 0.06725078821182251 2023-01-22 11:07:01.691139: step: 658/463, loss: 0.056147851049900055 2023-01-22 11:07:02.227123: step: 660/463, loss: 0.11801397055387497 2023-01-22 11:07:02.895489: step: 662/463, loss: 0.07209347188472748 2023-01-22 11:07:03.505171: step: 664/463, loss: 0.13116048276424408 2023-01-22 11:07:04.133933: step: 666/463, loss: 0.18685312569141388 2023-01-22 11:07:04.768151: step: 668/463, loss: 0.09314344078302383 2023-01-22 11:07:05.419861: step: 670/463, loss: 0.032252855598926544 2023-01-22 11:07:06.058538: step: 672/463, loss: 0.08380568772554398 2023-01-22 11:07:06.665720: step: 674/463, loss: 0.08002911508083344 2023-01-22 11:07:07.392878: step: 676/463, loss: 0.20918309688568115 2023-01-22 11:07:07.936568: step: 678/463, loss: 0.12389720231294632 2023-01-22 11:07:08.569145: step: 680/463, loss: 0.05499038100242615 2023-01-22 11:07:09.167426: step: 682/463, loss: 0.12249012291431427 2023-01-22 11:07:09.773275: step: 684/463, loss: 0.1472024917602539 2023-01-22 11:07:10.395970: step: 686/463, loss: 0.12132182717323303 2023-01-22 11:07:11.021051: step: 688/463, loss: 0.1617874652147293 2023-01-22 11:07:11.675860: step: 690/463, loss: 0.04404882341623306 2023-01-22 11:07:12.316267: step: 692/463, loss: 0.10959150642156601 2023-01-22 11:07:12.998416: step: 694/463, loss: 0.5979035496711731 2023-01-22 11:07:13.627660: step: 696/463, loss: 0.051047198474407196 2023-01-22 11:07:14.317119: step: 698/463, loss: 0.06753871589899063 2023-01-22 11:07:15.026490: step: 700/463, loss: 0.08026600629091263 2023-01-22 11:07:15.609219: step: 702/463, loss: 0.10995573550462723 2023-01-22 11:07:16.244518: step: 704/463, loss: 0.24805115163326263 2023-01-22 11:07:16.887207: step: 706/463, loss: 0.08290494978427887 2023-01-22 11:07:17.525324: step: 708/463, loss: 1.1645796298980713 2023-01-22 11:07:18.152105: step: 710/463, loss: 0.02313629724085331 2023-01-22 11:07:18.753523: step: 712/463, loss: 0.12914200127124786 2023-01-22 11:07:19.390250: step: 714/463, loss: 0.036950692534446716 2023-01-22 11:07:19.981293: step: 716/463, loss: 0.2037443071603775 2023-01-22 11:07:20.551663: step: 718/463, loss: 0.05308006331324577 2023-01-22 11:07:21.141174: step: 720/463, loss: 0.016386108472943306 2023-01-22 11:07:21.665022: step: 722/463, loss: 0.04003317281603813 2023-01-22 11:07:22.212698: step: 724/463, loss: 0.0642763078212738 2023-01-22 11:07:22.800161: step: 726/463, loss: 27.997026443481445 2023-01-22 11:07:23.424687: step: 728/463, loss: 0.11276333779096603 2023-01-22 11:07:24.052826: step: 730/463, loss: 0.06293115764856339 2023-01-22 11:07:24.674616: step: 732/463, loss: 0.13065288960933685 2023-01-22 11:07:25.311942: step: 734/463, loss: 0.0786309465765953 2023-01-22 11:07:25.892037: step: 736/463, loss: 0.03341026231646538 2023-01-22 11:07:26.506353: step: 738/463, loss: 0.11513106524944305 2023-01-22 11:07:27.103650: step: 740/463, loss: 0.09458275139331818 2023-01-22 11:07:27.758093: step: 742/463, loss: 0.07368090748786926 2023-01-22 11:07:28.332017: step: 744/463, loss: 0.032300472259521484 2023-01-22 11:07:28.927439: step: 746/463, loss: 0.051260169595479965 2023-01-22 11:07:29.546748: step: 748/463, loss: 0.10917123407125473 2023-01-22 11:07:30.146277: step: 750/463, loss: 0.07715870440006256 2023-01-22 11:07:30.729268: step: 752/463, loss: 0.06744939088821411 2023-01-22 11:07:31.333702: step: 754/463, loss: 0.0620088204741478 2023-01-22 11:07:31.978852: step: 756/463, loss: 0.12787121534347534 2023-01-22 11:07:32.666828: step: 758/463, loss: 0.27201634645462036 2023-01-22 11:07:33.308158: step: 760/463, loss: 0.45338839292526245 2023-01-22 11:07:33.932867: step: 762/463, loss: 0.2032330334186554 2023-01-22 11:07:34.517666: step: 764/463, loss: 0.09003154188394547 2023-01-22 11:07:35.126782: step: 766/463, loss: 0.11048809438943863 2023-01-22 11:07:35.848838: step: 768/463, loss: 0.05917493253946304 2023-01-22 11:07:36.482246: step: 770/463, loss: 0.2775425910949707 2023-01-22 11:07:37.101561: step: 772/463, loss: 0.18640881776809692 2023-01-22 11:07:37.729197: step: 774/463, loss: 0.664874255657196 2023-01-22 11:07:38.374658: step: 776/463, loss: 0.1160912811756134 2023-01-22 11:07:38.936525: step: 778/463, loss: 0.11691008508205414 2023-01-22 11:07:39.516378: step: 780/463, loss: 0.694681704044342 2023-01-22 11:07:40.140610: step: 782/463, loss: 0.06484073400497437 2023-01-22 11:07:40.758002: step: 784/463, loss: 0.12963417172431946 2023-01-22 11:07:41.361535: step: 786/463, loss: 0.3227337598800659 2023-01-22 11:07:42.001964: step: 788/463, loss: 0.11776954680681229 2023-01-22 11:07:42.600828: step: 790/463, loss: 0.5221192836761475 2023-01-22 11:07:43.197958: step: 792/463, loss: 0.20026175677776337 2023-01-22 11:07:43.790505: step: 794/463, loss: 0.1020629033446312 2023-01-22 11:07:44.380262: step: 796/463, loss: 0.04261652007699013 2023-01-22 11:07:45.015739: step: 798/463, loss: 0.12003949284553528 2023-01-22 11:07:45.651212: step: 800/463, loss: 0.12371769547462463 2023-01-22 11:07:46.297106: step: 802/463, loss: 0.04024579003453255 2023-01-22 11:07:46.905153: step: 804/463, loss: 0.09820590913295746 2023-01-22 11:07:47.530375: step: 806/463, loss: 0.08650613576173782 2023-01-22 11:07:48.176079: step: 808/463, loss: 0.0983637124300003 2023-01-22 11:07:48.793803: step: 810/463, loss: 0.058388736099004745 2023-01-22 11:07:49.352108: step: 812/463, loss: 0.07422322779893875 2023-01-22 11:07:49.976975: step: 814/463, loss: 0.1438947468996048 2023-01-22 11:07:50.580495: step: 816/463, loss: 0.7769674062728882 2023-01-22 11:07:51.170680: step: 818/463, loss: 0.16482600569725037 2023-01-22 11:07:51.773716: step: 820/463, loss: 0.0664047971367836 2023-01-22 11:07:52.455743: step: 822/463, loss: 0.06674491614103317 2023-01-22 11:07:53.042677: step: 824/463, loss: 0.1059534102678299 2023-01-22 11:07:53.695123: step: 826/463, loss: 0.15020324289798737 2023-01-22 11:07:54.299992: step: 828/463, loss: 0.10606904327869415 2023-01-22 11:07:54.929183: step: 830/463, loss: 0.0406748466193676 2023-01-22 11:07:55.531161: step: 832/463, loss: 0.054141566157341 2023-01-22 11:07:56.109628: step: 834/463, loss: 0.1280638575553894 2023-01-22 11:07:56.726422: step: 836/463, loss: 0.12861491739749908 2023-01-22 11:07:57.323620: step: 838/463, loss: 0.10322591662406921 2023-01-22 11:07:57.963190: step: 840/463, loss: 0.2662043273448944 2023-01-22 11:07:58.576116: step: 842/463, loss: 0.06580877304077148 2023-01-22 11:07:59.207041: step: 844/463, loss: 0.18206185102462769 2023-01-22 11:07:59.875142: step: 846/463, loss: 0.15396100282669067 2023-01-22 11:08:00.498582: step: 848/463, loss: 0.108707956969738 2023-01-22 11:08:01.129531: step: 850/463, loss: 0.23792918026447296 2023-01-22 11:08:01.728013: step: 852/463, loss: 0.27361077070236206 2023-01-22 11:08:02.292246: step: 854/463, loss: 0.35999900102615356 2023-01-22 11:08:02.946642: step: 856/463, loss: 0.09687886387109756 2023-01-22 11:08:03.621321: step: 858/463, loss: 0.10489597916603088 2023-01-22 11:08:04.189130: step: 860/463, loss: 0.11961831152439117 2023-01-22 11:08:04.748227: step: 862/463, loss: 0.014973568730056286 2023-01-22 11:08:05.377948: step: 864/463, loss: 0.3130744695663452 2023-01-22 11:08:05.997066: step: 866/463, loss: 0.2770446836948395 2023-01-22 11:08:06.592503: step: 868/463, loss: 0.7259390950202942 2023-01-22 11:08:07.204009: step: 870/463, loss: 0.12542423605918884 2023-01-22 11:08:07.858779: step: 872/463, loss: 0.013127334415912628 2023-01-22 11:08:08.426201: step: 874/463, loss: 0.4161779284477234 2023-01-22 11:08:09.038071: step: 876/463, loss: 0.03206418827176094 2023-01-22 11:08:09.724751: step: 878/463, loss: 0.042767807841300964 2023-01-22 11:08:10.403830: step: 880/463, loss: 0.08185069262981415 2023-01-22 11:08:11.030903: step: 882/463, loss: 0.46371862292289734 2023-01-22 11:08:11.700270: step: 884/463, loss: 0.09322955459356308 2023-01-22 11:08:12.301405: step: 886/463, loss: 0.07903829216957092 2023-01-22 11:08:12.935311: step: 888/463, loss: 0.04770863801240921 2023-01-22 11:08:13.546900: step: 890/463, loss: 0.4304206669330597 2023-01-22 11:08:14.184080: step: 892/463, loss: 0.1135517805814743 2023-01-22 11:08:14.802654: step: 894/463, loss: 0.4329846203327179 2023-01-22 11:08:15.464073: step: 896/463, loss: 0.09617350250482559 2023-01-22 11:08:16.056565: step: 898/463, loss: 0.06635677814483643 2023-01-22 11:08:16.647557: step: 900/463, loss: 0.05550463870167732 2023-01-22 11:08:17.318109: step: 902/463, loss: 0.2550792992115021 2023-01-22 11:08:17.912280: step: 904/463, loss: 0.27705320715904236 2023-01-22 11:08:18.602253: step: 906/463, loss: 0.10798818618059158 2023-01-22 11:08:19.240507: step: 908/463, loss: 0.33742591738700867 2023-01-22 11:08:19.802541: step: 910/463, loss: 0.13067403435707092 2023-01-22 11:08:20.505759: step: 912/463, loss: 1.6436805725097656 2023-01-22 11:08:21.162997: step: 914/463, loss: 0.15067541599273682 2023-01-22 11:08:21.874092: step: 916/463, loss: 0.30853891372680664 2023-01-22 11:08:22.436112: step: 918/463, loss: 0.1106971800327301 2023-01-22 11:08:23.086617: step: 920/463, loss: 0.5722981691360474 2023-01-22 11:08:23.715820: step: 922/463, loss: 0.28582245111465454 2023-01-22 11:08:24.422716: step: 924/463, loss: 0.9868047833442688 2023-01-22 11:08:25.066383: step: 926/463, loss: 0.275545209646225 ================================================== Loss: 0.236 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.323670913884007, 'r': 0.3494663187855787, 'f1': 0.3360743613138686}, 'combined': 0.2476337399154821, 'epoch': 16} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3329677160544025, 'r': 0.37244642503511755, 'f1': 0.35160235179020205}, 'combined': 0.27253388033498915, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2846045727136432, 'r': 0.36021110056926, 'f1': 0.31797529313232836}, 'combined': 0.23429758441329457, 'epoch': 16} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.31510370598716464, 'r': 0.37505450299023735, 'f1': 0.3424752826297761}, 'combined': 0.2654593099809748, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2911107121183462, 'r': 0.3513214666172072, 'f1': 0.31839451918704764}, 'combined': 0.23460648782203508, 'epoch': 16} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3159075640832669, 'r': 0.3673006144901954, 'f1': 0.3396711165026202}, 'combined': 0.26328574580585873, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2403846153846154, 'r': 0.35714285714285715, 'f1': 0.28735632183908044}, 'combined': 0.19157088122605362, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.30405405405405406, 'r': 0.4891304347826087, 'f1': 0.375}, 'combined': 0.1875, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3888888888888889, 'r': 0.2413793103448276, 'f1': 0.2978723404255319}, 'combined': 0.19858156028368792, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29855383301254707, 'r': 0.34444161379815674, 'f1': 0.3198603180116804}, 'combined': 0.23568655011386977, 'epoch': 7} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.30988948788864984, 'r': 0.3867922100668993, 'f1': 0.34409642236532006}, 'combined': 0.2667158871922577, 'epoch': 7} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2636363636363636, 'r': 0.4142857142857143, 'f1': 0.3222222222222222}, 'combined': 0.2148148148148148, 'epoch': 7} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27448744745979536, 'r': 0.35001055918972007, 'f1': 0.3076823431075605}, 'combined': 0.22671330544767612, 'epoch': 15} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3279096426751969, 'r': 0.36919973554882, 'f1': 0.3473318739966417}, 'combined': 0.2692237492222773, 'epoch': 15} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.425, 'r': 0.29310344827586204, 'f1': 0.3469387755102041}, 'combined': 0.2312925170068027, 'epoch': 15} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:11:05.506464: step: 2/463, loss: 0.061208248138427734 2023-01-22 11:11:06.209563: step: 4/463, loss: 1.429548740386963 2023-01-22 11:11:06.830971: step: 6/463, loss: 0.07827788591384888 2023-01-22 11:11:07.457950: step: 8/463, loss: 0.10401318967342377 2023-01-22 11:11:08.069741: step: 10/463, loss: 0.07103412598371506 2023-01-22 11:11:08.668762: step: 12/463, loss: 0.05524838715791702 2023-01-22 11:11:09.287047: step: 14/463, loss: 0.2792140543460846 2023-01-22 11:11:09.931929: step: 16/463, loss: 0.19342955946922302 2023-01-22 11:11:10.551448: step: 18/463, loss: 0.043455831706523895 2023-01-22 11:11:11.201769: step: 20/463, loss: 0.04472591355443001 2023-01-22 11:11:11.844885: step: 22/463, loss: 0.28313031792640686 2023-01-22 11:11:12.440523: step: 24/463, loss: 0.2675344944000244 2023-01-22 11:11:13.097049: step: 26/463, loss: 0.014542317017912865 2023-01-22 11:11:13.685359: step: 28/463, loss: 0.058553364127874374 2023-01-22 11:11:14.274832: step: 30/463, loss: 0.04602428898215294 2023-01-22 11:11:14.979967: step: 32/463, loss: 0.8514562845230103 2023-01-22 11:11:15.567272: step: 34/463, loss: 0.15211881697177887 2023-01-22 11:11:16.134561: step: 36/463, loss: 0.16342812776565552 2023-01-22 11:11:16.723636: step: 38/463, loss: 0.9615288376808167 2023-01-22 11:11:17.322774: step: 40/463, loss: 0.41481366753578186 2023-01-22 11:11:17.909973: step: 42/463, loss: 0.09782220423221588 2023-01-22 11:11:18.458163: step: 44/463, loss: 0.04602811112999916 2023-01-22 11:11:19.038997: step: 46/463, loss: 0.11853159219026566 2023-01-22 11:11:19.679827: step: 48/463, loss: 0.05424831435084343 2023-01-22 11:11:20.276547: step: 50/463, loss: 0.08900800347328186 2023-01-22 11:11:20.940583: step: 52/463, loss: 0.3510980010032654 2023-01-22 11:11:21.532995: step: 54/463, loss: 0.09976553916931152 2023-01-22 11:11:22.215886: step: 56/463, loss: 0.23481567203998566 2023-01-22 11:11:22.845282: step: 58/463, loss: 0.010560777969658375 2023-01-22 11:11:23.498196: step: 60/463, loss: 0.11461454629898071 2023-01-22 11:11:24.109772: step: 62/463, loss: 0.1466471254825592 2023-01-22 11:11:24.657953: step: 64/463, loss: 0.054992325603961945 2023-01-22 11:11:25.233896: step: 66/463, loss: 0.1268271952867508 2023-01-22 11:11:25.876229: step: 68/463, loss: 0.019168691709637642 2023-01-22 11:11:26.449543: step: 70/463, loss: 0.006661098450422287 2023-01-22 11:11:27.032968: step: 72/463, loss: 0.10852862894535065 2023-01-22 11:11:27.698682: step: 74/463, loss: 0.10976220667362213 2023-01-22 11:11:28.358233: step: 76/463, loss: 0.026197120547294617 2023-01-22 11:11:28.968395: step: 78/463, loss: 0.06628680229187012 2023-01-22 11:11:29.594404: step: 80/463, loss: 0.12858764827251434 2023-01-22 11:11:30.163831: step: 82/463, loss: 0.11108959466218948 2023-01-22 11:11:30.812666: step: 84/463, loss: 0.08075893670320511 2023-01-22 11:11:31.467712: step: 86/463, loss: 0.010406739078462124 2023-01-22 11:11:32.087497: step: 88/463, loss: 0.061715152114629745 2023-01-22 11:11:32.645612: step: 90/463, loss: 0.0024050285574048758 2023-01-22 11:11:33.282480: step: 92/463, loss: 0.01013980619609356 2023-01-22 11:11:33.878874: step: 94/463, loss: 0.07551796734333038 2023-01-22 11:11:34.564067: step: 96/463, loss: 0.24460965394973755 2023-01-22 11:11:35.168006: step: 98/463, loss: 0.20156948268413544 2023-01-22 11:11:35.803424: step: 100/463, loss: 0.033421777188777924 2023-01-22 11:11:36.371825: step: 102/463, loss: 0.07469838857650757 2023-01-22 11:11:36.951626: step: 104/463, loss: 0.016260556876659393 2023-01-22 11:11:37.596890: step: 106/463, loss: 0.29703056812286377 2023-01-22 11:11:38.188185: step: 108/463, loss: 0.022552143782377243 2023-01-22 11:11:38.757427: step: 110/463, loss: 0.03729711100459099 2023-01-22 11:11:39.294314: step: 112/463, loss: 0.03979526460170746 2023-01-22 11:11:39.883432: step: 114/463, loss: 0.08763419836759567 2023-01-22 11:11:40.467503: step: 116/463, loss: 0.03372212126851082 2023-01-22 11:11:41.107865: step: 118/463, loss: 0.044284623116254807 2023-01-22 11:11:41.709366: step: 120/463, loss: 0.08314429968595505 2023-01-22 11:11:42.368785: step: 122/463, loss: 0.10130926221609116 2023-01-22 11:11:42.974781: step: 124/463, loss: 0.1690724939107895 2023-01-22 11:11:43.539986: step: 126/463, loss: 0.07167366147041321 2023-01-22 11:11:44.133915: step: 128/463, loss: 0.020160706713795662 2023-01-22 11:11:44.685787: step: 130/463, loss: 0.09696692228317261 2023-01-22 11:11:45.338374: step: 132/463, loss: 0.027926130220294 2023-01-22 11:11:45.967123: step: 134/463, loss: 0.13102640211582184 2023-01-22 11:11:46.630099: step: 136/463, loss: 0.23273417353630066 2023-01-22 11:11:47.322118: step: 138/463, loss: 0.0639929547905922 2023-01-22 11:11:47.967842: step: 140/463, loss: 0.023816965520381927 2023-01-22 11:11:48.578335: step: 142/463, loss: 0.05975884944200516 2023-01-22 11:11:49.209835: step: 144/463, loss: 0.09767508506774902 2023-01-22 11:11:49.847499: step: 146/463, loss: 0.05500223487615585 2023-01-22 11:11:50.491594: step: 148/463, loss: 0.21780887246131897 2023-01-22 11:11:51.130463: step: 150/463, loss: 0.08378569036722183 2023-01-22 11:11:51.748790: step: 152/463, loss: 0.049797382205724716 2023-01-22 11:11:52.341285: step: 154/463, loss: 0.2314986288547516 2023-01-22 11:11:52.961417: step: 156/463, loss: 0.04122598469257355 2023-01-22 11:11:53.567362: step: 158/463, loss: 0.058828771114349365 2023-01-22 11:11:54.256663: step: 160/463, loss: 0.05163532868027687 2023-01-22 11:11:54.871879: step: 162/463, loss: 0.2564520835876465 2023-01-22 11:11:55.472012: step: 164/463, loss: 0.06570015102624893 2023-01-22 11:11:56.083188: step: 166/463, loss: 0.06088108569383621 2023-01-22 11:11:56.734443: step: 168/463, loss: 0.05846810340881348 2023-01-22 11:11:57.303875: step: 170/463, loss: 0.011461921036243439 2023-01-22 11:11:57.835376: step: 172/463, loss: 0.05215035751461983 2023-01-22 11:11:58.389409: step: 174/463, loss: 0.019592925906181335 2023-01-22 11:11:58.977801: step: 176/463, loss: 0.9721873998641968 2023-01-22 11:11:59.544702: step: 178/463, loss: 0.04376525804400444 2023-01-22 11:12:00.159963: step: 180/463, loss: 0.065320685505867 2023-01-22 11:12:00.762083: step: 182/463, loss: 0.011112179607152939 2023-01-22 11:12:01.354456: step: 184/463, loss: 0.07126247137784958 2023-01-22 11:12:01.931336: step: 186/463, loss: 0.0482366569340229 2023-01-22 11:12:02.525354: step: 188/463, loss: 0.40055400133132935 2023-01-22 11:12:03.114518: step: 190/463, loss: 0.07437822222709656 2023-01-22 11:12:03.684092: step: 192/463, loss: 0.04190584644675255 2023-01-22 11:12:04.323881: step: 194/463, loss: 0.036455072462558746 2023-01-22 11:12:04.947000: step: 196/463, loss: 0.037047579884529114 2023-01-22 11:12:05.554149: step: 198/463, loss: 0.05403919517993927 2023-01-22 11:12:06.178710: step: 200/463, loss: 0.21770811080932617 2023-01-22 11:12:06.861558: step: 202/463, loss: 0.026273254305124283 2023-01-22 11:12:07.480728: step: 204/463, loss: 0.18321312963962555 2023-01-22 11:12:08.100086: step: 206/463, loss: 0.08238894492387772 2023-01-22 11:12:08.709180: step: 208/463, loss: 0.14070747792720795 2023-01-22 11:12:09.311059: step: 210/463, loss: 0.033078089356422424 2023-01-22 11:12:09.880588: step: 212/463, loss: 0.017914224416017532 2023-01-22 11:12:10.518494: step: 214/463, loss: 0.08555115759372711 2023-01-22 11:12:11.164323: step: 216/463, loss: 0.042873188853263855 2023-01-22 11:12:11.795709: step: 218/463, loss: 0.12525737285614014 2023-01-22 11:12:12.453866: step: 220/463, loss: 0.0575256310403347 2023-01-22 11:12:13.121374: step: 222/463, loss: 0.23342075943946838 2023-01-22 11:12:13.718319: step: 224/463, loss: 0.15358057618141174 2023-01-22 11:12:14.345667: step: 226/463, loss: 0.10575661808252335 2023-01-22 11:12:14.987964: step: 228/463, loss: 0.0845039114356041 2023-01-22 11:12:15.598648: step: 230/463, loss: 0.07489588111639023 2023-01-22 11:12:16.227513: step: 232/463, loss: 0.07380463182926178 2023-01-22 11:12:16.807831: step: 234/463, loss: 0.06168938800692558 2023-01-22 11:12:17.436704: step: 236/463, loss: 0.0815376564860344 2023-01-22 11:12:18.037351: step: 238/463, loss: 0.03183263912796974 2023-01-22 11:12:18.596122: step: 240/463, loss: 0.12306354194879532 2023-01-22 11:12:19.210504: step: 242/463, loss: 0.7525889873504639 2023-01-22 11:12:19.911229: step: 244/463, loss: 0.07208044081926346 2023-01-22 11:12:20.518327: step: 246/463, loss: 0.14129307866096497 2023-01-22 11:12:21.114810: step: 248/463, loss: 0.04030977934598923 2023-01-22 11:12:21.710942: step: 250/463, loss: 0.040892407298088074 2023-01-22 11:12:22.449597: step: 252/463, loss: 0.0148267587646842 2023-01-22 11:12:23.265526: step: 254/463, loss: 0.10283830761909485 2023-01-22 11:12:23.893489: step: 256/463, loss: 0.05003456771373749 2023-01-22 11:12:24.516999: step: 258/463, loss: 0.04177189990878105 2023-01-22 11:12:25.060755: step: 260/463, loss: 0.04389839619398117 2023-01-22 11:12:25.680932: step: 262/463, loss: 0.14525310695171356 2023-01-22 11:12:26.304278: step: 264/463, loss: 0.131211519241333 2023-01-22 11:12:26.941956: step: 266/463, loss: 0.09023848176002502 2023-01-22 11:12:27.575560: step: 268/463, loss: 0.030174342915415764 2023-01-22 11:12:28.164363: step: 270/463, loss: 0.07709553092718124 2023-01-22 11:12:28.774639: step: 272/463, loss: 0.060623396188020706 2023-01-22 11:12:29.399345: step: 274/463, loss: 0.06450683623552322 2023-01-22 11:12:30.037766: step: 276/463, loss: 0.09293755888938904 2023-01-22 11:12:30.654369: step: 278/463, loss: 0.2938070595264435 2023-01-22 11:12:31.263447: step: 280/463, loss: 0.02832983247935772 2023-01-22 11:12:31.836694: step: 282/463, loss: 0.01586303487420082 2023-01-22 11:12:32.435644: step: 284/463, loss: 0.03767448663711548 2023-01-22 11:12:33.065717: step: 286/463, loss: 0.119881272315979 2023-01-22 11:12:33.697778: step: 288/463, loss: 0.16706766188144684 2023-01-22 11:12:34.286215: step: 290/463, loss: 0.0495007298886776 2023-01-22 11:12:34.915230: step: 292/463, loss: 0.018432429060339928 2023-01-22 11:12:35.551973: step: 294/463, loss: 0.05264729633927345 2023-01-22 11:12:36.168300: step: 296/463, loss: 0.028099611401557922 2023-01-22 11:12:36.772182: step: 298/463, loss: 0.18391457200050354 2023-01-22 11:12:37.341198: step: 300/463, loss: 0.16782978177070618 2023-01-22 11:12:38.028855: step: 302/463, loss: 0.28628191351890564 2023-01-22 11:12:38.554429: step: 304/463, loss: 0.1303740292787552 2023-01-22 11:12:39.141812: step: 306/463, loss: 0.10783737152814865 2023-01-22 11:12:39.742317: step: 308/463, loss: 0.06982791423797607 2023-01-22 11:12:40.430766: step: 310/463, loss: 0.18841378390789032 2023-01-22 11:12:41.018489: step: 312/463, loss: 0.14633822441101074 2023-01-22 11:12:41.669191: step: 314/463, loss: 0.12172915786504745 2023-01-22 11:12:42.342881: step: 316/463, loss: 0.024316532537341118 2023-01-22 11:12:42.996812: step: 318/463, loss: 0.23092934489250183 2023-01-22 11:12:43.594392: step: 320/463, loss: 0.09777764230966568 2023-01-22 11:12:44.194349: step: 322/463, loss: 0.9154044389724731 2023-01-22 11:12:44.743927: step: 324/463, loss: 0.09682703763246536 2023-01-22 11:12:45.405050: step: 326/463, loss: 0.06696777790784836 2023-01-22 11:12:45.954597: step: 328/463, loss: 0.1595633625984192 2023-01-22 11:12:46.573794: step: 330/463, loss: 0.026845725253224373 2023-01-22 11:12:47.126657: step: 332/463, loss: 0.06576258689165115 2023-01-22 11:12:47.708371: step: 334/463, loss: 0.14095371961593628 2023-01-22 11:12:48.329185: step: 336/463, loss: 0.05589592084288597 2023-01-22 11:12:48.898765: step: 338/463, loss: 0.08479630202054977 2023-01-22 11:12:49.504334: step: 340/463, loss: 0.10343464463949203 2023-01-22 11:12:50.143802: step: 342/463, loss: 0.07250874489545822 2023-01-22 11:12:50.766163: step: 344/463, loss: 0.03034813329577446 2023-01-22 11:12:51.320398: step: 346/463, loss: 0.037292033433914185 2023-01-22 11:12:51.923862: step: 348/463, loss: 0.4737735986709595 2023-01-22 11:12:52.510994: step: 350/463, loss: 0.03390367329120636 2023-01-22 11:12:53.055602: step: 352/463, loss: 0.08879923820495605 2023-01-22 11:12:53.689664: step: 354/463, loss: 0.37008336186408997 2023-01-22 11:12:54.310410: step: 356/463, loss: 0.15540654957294464 2023-01-22 11:12:54.934858: step: 358/463, loss: 0.10053622722625732 2023-01-22 11:12:55.532229: step: 360/463, loss: 0.16275997459888458 2023-01-22 11:12:56.079389: step: 362/463, loss: 0.05168945714831352 2023-01-22 11:12:56.679721: step: 364/463, loss: 0.0929010733962059 2023-01-22 11:12:57.375181: step: 366/463, loss: 0.03830442950129509 2023-01-22 11:12:58.066818: step: 368/463, loss: 0.05620065703988075 2023-01-22 11:12:58.662326: step: 370/463, loss: 0.028583606705069542 2023-01-22 11:12:59.221150: step: 372/463, loss: 0.038274135440588 2023-01-22 11:12:59.808444: step: 374/463, loss: 0.04940010979771614 2023-01-22 11:13:00.440167: step: 376/463, loss: 0.026470765471458435 2023-01-22 11:13:00.995912: step: 378/463, loss: 0.3079555034637451 2023-01-22 11:13:01.651110: step: 380/463, loss: 0.026415301486849785 2023-01-22 11:13:02.201626: step: 382/463, loss: 0.02228335663676262 2023-01-22 11:13:02.763055: step: 384/463, loss: 0.024316811934113503 2023-01-22 11:13:03.360152: step: 386/463, loss: 0.21048268675804138 2023-01-22 11:13:03.965009: step: 388/463, loss: 0.18815036118030548 2023-01-22 11:13:04.604377: step: 390/463, loss: 0.2901209890842438 2023-01-22 11:13:05.211894: step: 392/463, loss: 0.08952522277832031 2023-01-22 11:13:05.797226: step: 394/463, loss: 0.13785718381404877 2023-01-22 11:13:06.453925: step: 396/463, loss: 0.015936898067593575 2023-01-22 11:13:07.010352: step: 398/463, loss: 1.0970044136047363 2023-01-22 11:13:07.648347: step: 400/463, loss: 0.051290832459926605 2023-01-22 11:13:08.257501: step: 402/463, loss: 0.09496838599443436 2023-01-22 11:13:08.935417: step: 404/463, loss: 0.10430146753787994 2023-01-22 11:13:09.504269: step: 406/463, loss: 0.0883362665772438 2023-01-22 11:13:10.145236: step: 408/463, loss: 0.047782305628061295 2023-01-22 11:13:10.799477: step: 410/463, loss: 0.06281021237373352 2023-01-22 11:13:11.467408: step: 412/463, loss: 0.28949886560440063 2023-01-22 11:13:12.129921: step: 414/463, loss: 0.11550695449113846 2023-01-22 11:13:12.786663: step: 416/463, loss: 0.08808974176645279 2023-01-22 11:13:13.469888: step: 418/463, loss: 0.0716627985239029 2023-01-22 11:13:14.100942: step: 420/463, loss: 0.12860070168972015 2023-01-22 11:13:14.697474: step: 422/463, loss: 0.05852087587118149 2023-01-22 11:13:15.429517: step: 424/463, loss: 0.0697203278541565 2023-01-22 11:13:16.100352: step: 426/463, loss: 0.10973231494426727 2023-01-22 11:13:16.680593: step: 428/463, loss: 0.5625197291374207 2023-01-22 11:13:17.294547: step: 430/463, loss: 0.006370704621076584 2023-01-22 11:13:17.893548: step: 432/463, loss: 0.04488911107182503 2023-01-22 11:13:18.537998: step: 434/463, loss: 0.12623180449008942 2023-01-22 11:13:19.144231: step: 436/463, loss: 0.10643579810857773 2023-01-22 11:13:19.721158: step: 438/463, loss: 0.0876208022236824 2023-01-22 11:13:20.332160: step: 440/463, loss: 0.1903076171875 2023-01-22 11:13:20.964833: step: 442/463, loss: 0.08365371823310852 2023-01-22 11:13:21.584508: step: 444/463, loss: 0.07737105339765549 2023-01-22 11:13:22.206042: step: 446/463, loss: 0.14239755272865295 2023-01-22 11:13:22.808933: step: 448/463, loss: 0.19661210477352142 2023-01-22 11:13:23.427649: step: 450/463, loss: 0.0818001851439476 2023-01-22 11:13:24.119350: step: 452/463, loss: 0.3861251473426819 2023-01-22 11:13:24.780282: step: 454/463, loss: 0.030177662149071693 2023-01-22 11:13:25.413901: step: 456/463, loss: 0.026532568037509918 2023-01-22 11:13:26.035026: step: 458/463, loss: 0.12874577939510345 2023-01-22 11:13:26.655470: step: 460/463, loss: 0.08066501468420029 2023-01-22 11:13:27.246045: step: 462/463, loss: 0.05050784349441528 2023-01-22 11:13:27.870889: step: 464/463, loss: 0.018761588260531425 2023-01-22 11:13:28.481967: step: 466/463, loss: 0.09819670021533966 2023-01-22 11:13:29.063076: step: 468/463, loss: 0.014587967656552792 2023-01-22 11:13:29.769137: step: 470/463, loss: 0.10897120088338852 2023-01-22 11:13:30.477970: step: 472/463, loss: 0.02867637760937214 2023-01-22 11:13:31.048836: step: 474/463, loss: 0.012765491381287575 2023-01-22 11:13:31.618988: step: 476/463, loss: 0.8871848583221436 2023-01-22 11:13:32.220947: step: 478/463, loss: 0.11894292384386063 2023-01-22 11:13:33.028933: step: 480/463, loss: 0.08654782176017761 2023-01-22 11:13:33.640025: step: 482/463, loss: 0.047143712639808655 2023-01-22 11:13:34.321682: step: 484/463, loss: 0.08438290655612946 2023-01-22 11:13:34.954555: step: 486/463, loss: 0.11088824272155762 2023-01-22 11:13:35.553680: step: 488/463, loss: 0.08681084960699081 2023-01-22 11:13:36.149483: step: 490/463, loss: 0.057544413954019547 2023-01-22 11:13:36.699790: step: 492/463, loss: 0.021170584484934807 2023-01-22 11:13:37.265935: step: 494/463, loss: 0.04923864081501961 2023-01-22 11:13:37.916701: step: 496/463, loss: 0.06782519817352295 2023-01-22 11:13:38.584341: step: 498/463, loss: 0.7223324179649353 2023-01-22 11:13:39.192238: step: 500/463, loss: 0.35374197363853455 2023-01-22 11:13:39.796256: step: 502/463, loss: 0.12288373708724976 2023-01-22 11:13:40.397381: step: 504/463, loss: 0.044332731515169144 2023-01-22 11:13:41.008471: step: 506/463, loss: 0.07677768915891647 2023-01-22 11:13:41.651102: step: 508/463, loss: 0.16519592702388763 2023-01-22 11:13:42.297248: step: 510/463, loss: 0.09459847956895828 2023-01-22 11:13:42.928294: step: 512/463, loss: 0.026427011936903 2023-01-22 11:13:43.573792: step: 514/463, loss: 0.07196318358182907 2023-01-22 11:13:44.156165: step: 516/463, loss: 0.21120774745941162 2023-01-22 11:13:44.841413: step: 518/463, loss: 0.04945854842662811 2023-01-22 11:13:45.439881: step: 520/463, loss: 0.03394310548901558 2023-01-22 11:13:46.079306: step: 522/463, loss: 0.0673530250787735 2023-01-22 11:13:46.674959: step: 524/463, loss: 0.07183351367712021 2023-01-22 11:13:47.275571: step: 526/463, loss: 0.10946710407733917 2023-01-22 11:13:47.946847: step: 528/463, loss: 0.16529658436775208 2023-01-22 11:13:48.521626: step: 530/463, loss: 0.147574782371521 2023-01-22 11:13:49.140003: step: 532/463, loss: 0.22300377488136292 2023-01-22 11:13:49.738700: step: 534/463, loss: 0.01837911456823349 2023-01-22 11:13:50.300963: step: 536/463, loss: 0.044504567980766296 2023-01-22 11:13:50.942593: step: 538/463, loss: 0.039813846349716187 2023-01-22 11:13:51.556123: step: 540/463, loss: 0.05233435705304146 2023-01-22 11:13:52.159712: step: 542/463, loss: 0.11505598574876785 2023-01-22 11:13:52.756263: step: 544/463, loss: 0.061726927757263184 2023-01-22 11:13:53.384539: step: 546/463, loss: 0.09867498278617859 2023-01-22 11:13:54.006905: step: 548/463, loss: 0.054983608424663544 2023-01-22 11:13:54.593927: step: 550/463, loss: 0.057428546249866486 2023-01-22 11:13:55.328711: step: 552/463, loss: 0.0610651895403862 2023-01-22 11:13:55.901641: step: 554/463, loss: 0.06848292052745819 2023-01-22 11:13:56.565209: step: 556/463, loss: 0.1013367623090744 2023-01-22 11:13:57.145666: step: 558/463, loss: 0.011504033580422401 2023-01-22 11:13:57.760000: step: 560/463, loss: 0.05420619994401932 2023-01-22 11:13:58.390095: step: 562/463, loss: 0.09809090942144394 2023-01-22 11:13:59.003307: step: 564/463, loss: 0.004097132943570614 2023-01-22 11:13:59.714060: step: 566/463, loss: 0.051969267427921295 2023-01-22 11:14:00.389426: step: 568/463, loss: 0.09839022904634476 2023-01-22 11:14:01.074647: step: 570/463, loss: 0.1341186761856079 2023-01-22 11:14:01.686526: step: 572/463, loss: 0.16783669590950012 2023-01-22 11:14:02.322911: step: 574/463, loss: 0.09007496386766434 2023-01-22 11:14:02.924819: step: 576/463, loss: 0.0610160268843174 2023-01-22 11:14:03.598053: step: 578/463, loss: 0.03092244826257229 2023-01-22 11:14:04.173744: step: 580/463, loss: 0.10315368324518204 2023-01-22 11:14:04.814194: step: 582/463, loss: 0.15601463615894318 2023-01-22 11:14:05.438409: step: 584/463, loss: 0.11801301687955856 2023-01-22 11:14:06.073297: step: 586/463, loss: 0.10612158477306366 2023-01-22 11:14:06.756441: step: 588/463, loss: 0.07959010452032089 2023-01-22 11:14:07.454470: step: 590/463, loss: 0.12788362801074982 2023-01-22 11:14:08.089392: step: 592/463, loss: 0.09778382629156113 2023-01-22 11:14:08.721897: step: 594/463, loss: 1.2908345460891724 2023-01-22 11:14:09.361967: step: 596/463, loss: 0.07603273540735245 2023-01-22 11:14:10.050136: step: 598/463, loss: 0.16689461469650269 2023-01-22 11:14:10.636121: step: 600/463, loss: 0.036326803267002106 2023-01-22 11:14:11.124098: step: 602/463, loss: 0.00665775453671813 2023-01-22 11:14:11.766222: step: 604/463, loss: 0.11159547418355942 2023-01-22 11:14:12.382605: step: 606/463, loss: 0.03686422482132912 2023-01-22 11:14:12.959878: step: 608/463, loss: 0.1336565911769867 2023-01-22 11:14:13.542858: step: 610/463, loss: 0.18711890280246735 2023-01-22 11:14:14.095420: step: 612/463, loss: 0.019359368830919266 2023-01-22 11:14:14.713419: step: 614/463, loss: 0.06999849528074265 2023-01-22 11:14:15.290525: step: 616/463, loss: 0.15988655388355255 2023-01-22 11:14:15.926882: step: 618/463, loss: 0.026183636859059334 2023-01-22 11:14:16.616486: step: 620/463, loss: 0.03431953489780426 2023-01-22 11:14:17.244363: step: 622/463, loss: 0.03576991334557533 2023-01-22 11:14:17.835372: step: 624/463, loss: 0.04971986263990402 2023-01-22 11:14:18.412122: step: 626/463, loss: 0.08186327666044235 2023-01-22 11:14:19.007989: step: 628/463, loss: 0.4426536560058594 2023-01-22 11:14:19.566477: step: 630/463, loss: 0.060494065284729004 2023-01-22 11:14:20.198056: step: 632/463, loss: 0.10471770167350769 2023-01-22 11:14:20.838824: step: 634/463, loss: 0.08326321095228195 2023-01-22 11:14:21.475015: step: 636/463, loss: 0.07041725516319275 2023-01-22 11:14:22.147760: step: 638/463, loss: 0.06596620380878448 2023-01-22 11:14:22.776041: step: 640/463, loss: 0.06585359573364258 2023-01-22 11:14:23.404441: step: 642/463, loss: 0.0484500490128994 2023-01-22 11:14:24.041351: step: 644/463, loss: 0.11510353535413742 2023-01-22 11:14:24.725123: step: 646/463, loss: 0.13813678920269012 2023-01-22 11:14:25.321843: step: 648/463, loss: 0.16530629992485046 2023-01-22 11:14:25.914936: step: 650/463, loss: 0.1415550410747528 2023-01-22 11:14:26.557697: step: 652/463, loss: 0.11802859604358673 2023-01-22 11:14:27.169941: step: 654/463, loss: 0.07877757400274277 2023-01-22 11:14:27.758176: step: 656/463, loss: 0.04003753885626793 2023-01-22 11:14:28.368670: step: 658/463, loss: 0.017675675451755524 2023-01-22 11:14:29.073104: step: 660/463, loss: 0.10854945331811905 2023-01-22 11:14:29.732893: step: 662/463, loss: 0.2632792294025421 2023-01-22 11:14:30.324671: step: 664/463, loss: 0.08814460784196854 2023-01-22 11:14:30.900418: step: 666/463, loss: 0.187367781996727 2023-01-22 11:14:31.611766: step: 668/463, loss: 0.024920079857110977 2023-01-22 11:14:32.164085: step: 670/463, loss: 0.10781847685575485 2023-01-22 11:14:32.839856: step: 672/463, loss: 0.03659043833613396 2023-01-22 11:14:33.549691: step: 674/463, loss: 0.3417806029319763 2023-01-22 11:14:34.136071: step: 676/463, loss: 0.13100674748420715 2023-01-22 11:14:34.736816: step: 678/463, loss: 0.17757271230220795 2023-01-22 11:14:35.422350: step: 680/463, loss: 0.10012602806091309 2023-01-22 11:14:35.999209: step: 682/463, loss: 0.12737083435058594 2023-01-22 11:14:36.575562: step: 684/463, loss: 0.035889822989702225 2023-01-22 11:14:37.162343: step: 686/463, loss: 0.26752403378486633 2023-01-22 11:14:37.731577: step: 688/463, loss: 0.37662583589553833 2023-01-22 11:14:38.318519: step: 690/463, loss: 0.07922212779521942 2023-01-22 11:14:39.011148: step: 692/463, loss: 0.13277605175971985 2023-01-22 11:14:39.591246: step: 694/463, loss: 0.1505248099565506 2023-01-22 11:14:40.183353: step: 696/463, loss: 0.0532086007297039 2023-01-22 11:14:40.827577: step: 698/463, loss: 0.14752548933029175 2023-01-22 11:14:41.412686: step: 700/463, loss: 0.01776857115328312 2023-01-22 11:14:42.005602: step: 702/463, loss: 0.046205390244722366 2023-01-22 11:14:42.599564: step: 704/463, loss: 0.09470363706350327 2023-01-22 11:14:43.227037: step: 706/463, loss: 0.08440268784761429 2023-01-22 11:14:43.868124: step: 708/463, loss: 0.057107601314783096 2023-01-22 11:14:44.512985: step: 710/463, loss: 0.21635369956493378 2023-01-22 11:14:45.112625: step: 712/463, loss: 0.09460914880037308 2023-01-22 11:14:45.716255: step: 714/463, loss: 0.06444735080003738 2023-01-22 11:14:46.368627: step: 716/463, loss: 0.08464868366718292 2023-01-22 11:14:46.956610: step: 718/463, loss: 0.052589572966098785 2023-01-22 11:14:47.559471: step: 720/463, loss: 0.03821743279695511 2023-01-22 11:14:48.204228: step: 722/463, loss: 0.08025336265563965 2023-01-22 11:14:48.807604: step: 724/463, loss: 0.09823353588581085 2023-01-22 11:14:49.522326: step: 726/463, loss: 0.08961719274520874 2023-01-22 11:14:50.083148: step: 728/463, loss: 0.09130401164293289 2023-01-22 11:14:50.695082: step: 730/463, loss: 0.03249844163656235 2023-01-22 11:14:51.243935: step: 732/463, loss: 0.10508283972740173 2023-01-22 11:14:51.940134: step: 734/463, loss: 0.040572043508291245 2023-01-22 11:14:52.523152: step: 736/463, loss: 0.054280102252960205 2023-01-22 11:14:53.141454: step: 738/463, loss: 0.08831977099180222 2023-01-22 11:14:53.760565: step: 740/463, loss: 0.09214296191930771 2023-01-22 11:14:54.434392: step: 742/463, loss: 0.0737830400466919 2023-01-22 11:14:55.092684: step: 744/463, loss: 0.07194742560386658 2023-01-22 11:14:55.704819: step: 746/463, loss: 0.15930859744548798 2023-01-22 11:14:56.349082: step: 748/463, loss: 0.025986719876527786 2023-01-22 11:14:57.047916: step: 750/463, loss: 0.07572995871305466 2023-01-22 11:14:57.726316: step: 752/463, loss: 0.0746951624751091 2023-01-22 11:14:58.383519: step: 754/463, loss: 0.04142388328909874 2023-01-22 11:14:59.046252: step: 756/463, loss: 0.10964342206716537 2023-01-22 11:14:59.627457: step: 758/463, loss: 0.1708681434392929 2023-01-22 11:15:00.337428: step: 760/463, loss: 0.040345918387174606 2023-01-22 11:15:00.931115: step: 762/463, loss: 0.0728670209646225 2023-01-22 11:15:01.519413: step: 764/463, loss: 0.0607268251478672 2023-01-22 11:15:02.138560: step: 766/463, loss: 0.09249906986951828 2023-01-22 11:15:02.774012: step: 768/463, loss: 5.884969711303711 2023-01-22 11:15:03.382431: step: 770/463, loss: 0.06606926769018173 2023-01-22 11:15:04.009191: step: 772/463, loss: 0.14891093969345093 2023-01-22 11:15:04.657875: step: 774/463, loss: 0.08733320981264114 2023-01-22 11:15:05.259820: step: 776/463, loss: 0.023043058812618256 2023-01-22 11:15:05.893846: step: 778/463, loss: 0.05227816849946976 2023-01-22 11:15:06.506408: step: 780/463, loss: 0.044602736830711365 2023-01-22 11:15:07.132258: step: 782/463, loss: 0.35288313031196594 2023-01-22 11:15:07.772110: step: 784/463, loss: 0.2534416913986206 2023-01-22 11:15:08.364789: step: 786/463, loss: 0.15967564284801483 2023-01-22 11:15:08.989085: step: 788/463, loss: 0.1509036272764206 2023-01-22 11:15:09.630296: step: 790/463, loss: 0.09032673388719559 2023-01-22 11:15:10.251657: step: 792/463, loss: 0.25398415327072144 2023-01-22 11:15:10.885323: step: 794/463, loss: 0.26440057158470154 2023-01-22 11:15:11.552568: step: 796/463, loss: 0.08120163530111313 2023-01-22 11:15:12.164255: step: 798/463, loss: 0.05926734209060669 2023-01-22 11:15:12.815346: step: 800/463, loss: 0.15492422878742218 2023-01-22 11:15:13.422896: step: 802/463, loss: 0.0764470025897026 2023-01-22 11:15:14.037548: step: 804/463, loss: 0.1691388338804245 2023-01-22 11:15:14.620510: step: 806/463, loss: 0.06764068454504013 2023-01-22 11:15:15.340409: step: 808/463, loss: 0.031133485957980156 2023-01-22 11:15:15.917325: step: 810/463, loss: 0.2223028838634491 2023-01-22 11:15:16.509951: step: 812/463, loss: 0.06927622854709625 2023-01-22 11:15:17.119400: step: 814/463, loss: 0.0654175728559494 2023-01-22 11:15:17.716771: step: 816/463, loss: 0.09919803589582443 2023-01-22 11:15:18.421774: step: 818/463, loss: 0.14725886285305023 2023-01-22 11:15:19.119720: step: 820/463, loss: 1.3823851346969604 2023-01-22 11:15:19.712542: step: 822/463, loss: 0.14381439983844757 2023-01-22 11:15:20.324955: step: 824/463, loss: 0.1574321687221527 2023-01-22 11:15:20.984101: step: 826/463, loss: 0.16179075837135315 2023-01-22 11:15:21.571981: step: 828/463, loss: 0.12029152363538742 2023-01-22 11:15:22.253532: step: 830/463, loss: 0.24406377971172333 2023-01-22 11:15:22.831808: step: 832/463, loss: 0.04816749319434166 2023-01-22 11:15:23.364685: step: 834/463, loss: 0.11215320229530334 2023-01-22 11:15:23.899038: step: 836/463, loss: 0.7431363463401794 2023-01-22 11:15:24.526387: step: 838/463, loss: 0.11409196257591248 2023-01-22 11:15:25.058775: step: 840/463, loss: 0.0903141126036644 2023-01-22 11:15:25.639434: step: 842/463, loss: 0.023744821548461914 2023-01-22 11:15:26.281955: step: 844/463, loss: 0.0793248787522316 2023-01-22 11:15:27.008205: step: 846/463, loss: 0.06717075407505035 2023-01-22 11:15:27.647609: step: 848/463, loss: 0.03410543128848076 2023-01-22 11:15:28.281465: step: 850/463, loss: 0.256104052066803 2023-01-22 11:15:28.863822: step: 852/463, loss: 0.15324832499027252 2023-01-22 11:15:29.517606: step: 854/463, loss: 0.07798101752996445 2023-01-22 11:15:30.198307: step: 856/463, loss: 0.10182260721921921 2023-01-22 11:15:30.798321: step: 858/463, loss: 0.14487837255001068 2023-01-22 11:15:31.375211: step: 860/463, loss: 0.08484792709350586 2023-01-22 11:15:31.974321: step: 862/463, loss: 0.10247684270143509 2023-01-22 11:15:32.608508: step: 864/463, loss: 0.0686623752117157 2023-01-22 11:15:33.228991: step: 866/463, loss: 0.013124987483024597 2023-01-22 11:15:33.908031: step: 868/463, loss: 0.11345875263214111 2023-01-22 11:15:34.552626: step: 870/463, loss: 0.02788414992392063 2023-01-22 11:15:35.226897: step: 872/463, loss: 0.05029228329658508 2023-01-22 11:15:35.881306: step: 874/463, loss: 0.036897026002407074 2023-01-22 11:15:36.575696: step: 876/463, loss: 0.09647022932767868 2023-01-22 11:15:37.267773: step: 878/463, loss: 0.1591930389404297 2023-01-22 11:15:37.884920: step: 880/463, loss: 0.3004050850868225 2023-01-22 11:15:38.524002: step: 882/463, loss: 0.05016656965017319 2023-01-22 11:15:39.200113: step: 884/463, loss: 0.062221091240644455 2023-01-22 11:15:39.866407: step: 886/463, loss: 0.045141298323869705 2023-01-22 11:15:40.495206: step: 888/463, loss: 0.6509191393852234 2023-01-22 11:15:41.068018: step: 890/463, loss: 0.09943657368421555 2023-01-22 11:15:41.688709: step: 892/463, loss: 0.058808110654354095 2023-01-22 11:15:42.265707: step: 894/463, loss: 0.3118111193180084 2023-01-22 11:15:42.836686: step: 896/463, loss: 0.4183562695980072 2023-01-22 11:15:43.550526: step: 898/463, loss: 0.19850215315818787 2023-01-22 11:15:44.130591: step: 900/463, loss: 0.061391137540340424 2023-01-22 11:15:44.724176: step: 902/463, loss: 0.1711278259754181 2023-01-22 11:15:45.382889: step: 904/463, loss: 0.046011898666620255 2023-01-22 11:15:46.089945: step: 906/463, loss: 0.039038825780153275 2023-01-22 11:15:46.669306: step: 908/463, loss: 0.007601679768413305 2023-01-22 11:15:47.332967: step: 910/463, loss: 0.30937790870666504 2023-01-22 11:15:47.990713: step: 912/463, loss: 0.039402756839990616 2023-01-22 11:15:48.688178: step: 914/463, loss: 0.05828939005732536 2023-01-22 11:15:49.367960: step: 916/463, loss: 0.09467930346727371 2023-01-22 11:15:49.956149: step: 918/463, loss: 0.2072845995426178 2023-01-22 11:15:50.536164: step: 920/463, loss: 0.05487317591905594 2023-01-22 11:15:51.117678: step: 922/463, loss: 0.09829351305961609 2023-01-22 11:15:51.723334: step: 924/463, loss: 0.07700677961111069 2023-01-22 11:15:52.434991: step: 926/463, loss: 0.6743320226669312 ================================================== Loss: 0.140 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28891680064308684, 'r': 0.3409985768500949, 'f1': 0.3128046127067015}, 'combined': 0.2304876093628327, 'epoch': 17} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3240768317992097, 'r': 0.41254265812675134, 'f1': 0.3629975026622769}, 'combined': 0.2813664853171716, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2932561166829384, 'r': 0.3299826891707447, 'f1': 0.31053728070175435}, 'combined': 0.22881694367497687, 'epoch': 17} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3259682586613274, 'r': 0.40056945021157603, 'f1': 0.35943881388057297}, 'combined': 0.27860807583087477, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3125, 'r': 0.4891304347826087, 'f1': 0.38135593220338987}, 'combined': 0.19067796610169493, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4473684210526316, 'r': 0.29310344827586204, 'f1': 0.35416666666666663}, 'combined': 0.23611111111111108, 'epoch': 17} New best chinese model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2932561166829384, 'r': 0.3299826891707447, 'f1': 0.31053728070175435}, 'combined': 0.22881694367497687, 'epoch': 17} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3259682586613274, 'r': 0.40056945021157603, 'f1': 0.35943881388057297}, 'combined': 0.27860807583087477, 'epoch': 17} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4473684210526316, 'r': 0.29310344827586204, 'f1': 0.35416666666666663}, 'combined': 0.23611111111111108, 'epoch': 17} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:18:53.435997: step: 2/463, loss: 0.016212042421102524 2023-01-22 11:18:54.094055: step: 4/463, loss: 0.9640198945999146 2023-01-22 11:18:54.654423: step: 6/463, loss: 0.033647093921899796 2023-01-22 11:18:55.270934: step: 8/463, loss: 0.05437152087688446 2023-01-22 11:18:55.879240: step: 10/463, loss: 0.07215385138988495 2023-01-22 11:18:56.476141: step: 12/463, loss: 0.0493975393474102 2023-01-22 11:18:57.017053: step: 14/463, loss: 0.058827850967645645 2023-01-22 11:18:57.697632: step: 16/463, loss: 0.09795331209897995 2023-01-22 11:18:58.346712: step: 18/463, loss: 0.020355330780148506 2023-01-22 11:18:58.911388: step: 20/463, loss: 0.03013799898326397 2023-01-22 11:18:59.545901: step: 22/463, loss: 0.05115118250250816 2023-01-22 11:19:00.153210: step: 24/463, loss: 0.010236588306725025 2023-01-22 11:19:00.728139: step: 26/463, loss: 0.048115234822034836 2023-01-22 11:19:01.384245: step: 28/463, loss: 0.10292096436023712 2023-01-22 11:19:02.004563: step: 30/463, loss: 0.37770336866378784 2023-01-22 11:19:02.588945: step: 32/463, loss: 0.07796694338321686 2023-01-22 11:19:03.196817: step: 34/463, loss: 0.03693462908267975 2023-01-22 11:19:03.815896: step: 36/463, loss: 0.042251937091350555 2023-01-22 11:19:04.514988: step: 38/463, loss: 0.18346579372882843 2023-01-22 11:19:05.145304: step: 40/463, loss: 0.4044155776500702 2023-01-22 11:19:05.721975: step: 42/463, loss: 0.08191284537315369 2023-01-22 11:19:06.322521: step: 44/463, loss: 0.07532992213964462 2023-01-22 11:19:06.974329: step: 46/463, loss: 0.11294304579496384 2023-01-22 11:19:07.569802: step: 48/463, loss: 0.05083438381552696 2023-01-22 11:19:08.209565: step: 50/463, loss: 0.05494581162929535 2023-01-22 11:19:08.788035: step: 52/463, loss: 0.06806744635105133 2023-01-22 11:19:09.438543: step: 54/463, loss: 0.32477229833602905 2023-01-22 11:19:10.053883: step: 56/463, loss: 0.07577423751354218 2023-01-22 11:19:10.646547: step: 58/463, loss: 0.04088554531335831 2023-01-22 11:19:11.260775: step: 60/463, loss: 0.034088753163814545 2023-01-22 11:19:11.852995: step: 62/463, loss: 0.019968640059232712 2023-01-22 11:19:12.495096: step: 64/463, loss: 0.11061062663793564 2023-01-22 11:19:13.090114: step: 66/463, loss: 0.05607874318957329 2023-01-22 11:19:13.691590: step: 68/463, loss: 0.03767048195004463 2023-01-22 11:19:14.291285: step: 70/463, loss: 0.039938606321811676 2023-01-22 11:19:14.953817: step: 72/463, loss: 0.01798577792942524 2023-01-22 11:19:15.551852: step: 74/463, loss: 0.03460743650794029 2023-01-22 11:19:16.149904: step: 76/463, loss: 0.018315061926841736 2023-01-22 11:19:16.710181: step: 78/463, loss: 0.09156130254268646 2023-01-22 11:19:17.265937: step: 80/463, loss: 0.20378705859184265 2023-01-22 11:19:17.927666: step: 82/463, loss: 0.105721615254879 2023-01-22 11:19:18.533294: step: 84/463, loss: 0.560261070728302 2023-01-22 11:19:19.135313: step: 86/463, loss: 0.046659983694553375 2023-01-22 11:19:19.740742: step: 88/463, loss: 0.028879733756184578 2023-01-22 11:19:20.337896: step: 90/463, loss: 0.0708206444978714 2023-01-22 11:19:20.914464: step: 92/463, loss: 0.03559279069304466 2023-01-22 11:19:21.535333: step: 94/463, loss: 0.09447220712900162 2023-01-22 11:19:22.183923: step: 96/463, loss: 0.13801820576190948 2023-01-22 11:19:22.782698: step: 98/463, loss: 0.08263944834470749 2023-01-22 11:19:23.477388: step: 100/463, loss: 0.015989182516932487 2023-01-22 11:19:24.117327: step: 102/463, loss: 0.09653551876544952 2023-01-22 11:19:24.724693: step: 104/463, loss: 0.07864303141832352 2023-01-22 11:19:25.317427: step: 106/463, loss: 0.18088604509830475 2023-01-22 11:19:25.947714: step: 108/463, loss: 0.06670957803726196 2023-01-22 11:19:26.581132: step: 110/463, loss: 0.10206970572471619 2023-01-22 11:19:27.217484: step: 112/463, loss: 0.07119515538215637 2023-01-22 11:19:27.767319: step: 114/463, loss: 0.02532247081398964 2023-01-22 11:19:28.362534: step: 116/463, loss: 1.8201171159744263 2023-01-22 11:19:28.965508: step: 118/463, loss: 0.048613276332616806 2023-01-22 11:19:29.556946: step: 120/463, loss: 0.01938631944358349 2023-01-22 11:19:30.187526: step: 122/463, loss: 0.0514845997095108 2023-01-22 11:19:30.768871: step: 124/463, loss: 0.09292665123939514 2023-01-22 11:19:31.399132: step: 126/463, loss: 0.1530778855085373 2023-01-22 11:19:32.027506: step: 128/463, loss: 0.2171950787305832 2023-01-22 11:19:32.622770: step: 130/463, loss: 0.03372624143958092 2023-01-22 11:19:33.269236: step: 132/463, loss: 0.04612148553133011 2023-01-22 11:19:33.842575: step: 134/463, loss: 0.09772387892007828 2023-01-22 11:19:34.495967: step: 136/463, loss: 0.043327052146196365 2023-01-22 11:19:35.166987: step: 138/463, loss: 0.05234206095337868 2023-01-22 11:19:35.820144: step: 140/463, loss: 0.09337692707777023 2023-01-22 11:19:36.483364: step: 142/463, loss: 0.175821915268898 2023-01-22 11:19:37.155254: step: 144/463, loss: 0.07792326807975769 2023-01-22 11:19:37.817048: step: 146/463, loss: 0.04891330748796463 2023-01-22 11:19:38.509872: step: 148/463, loss: 0.03033485822379589 2023-01-22 11:19:39.143880: step: 150/463, loss: 0.12295791506767273 2023-01-22 11:19:39.747031: step: 152/463, loss: 0.03542214632034302 2023-01-22 11:19:40.358468: step: 154/463, loss: 0.07410010695457458 2023-01-22 11:19:40.945593: step: 156/463, loss: 0.05069688707590103 2023-01-22 11:19:41.550081: step: 158/463, loss: 1.4251694679260254 2023-01-22 11:19:42.144441: step: 160/463, loss: 0.10487470030784607 2023-01-22 11:19:42.861528: step: 162/463, loss: 0.2275792956352234 2023-01-22 11:19:43.399722: step: 164/463, loss: 0.00966346450150013 2023-01-22 11:19:43.966566: step: 166/463, loss: 0.012949720025062561 2023-01-22 11:19:44.591543: step: 168/463, loss: 0.06521248072385788 2023-01-22 11:19:45.232816: step: 170/463, loss: 0.03360576182603836 2023-01-22 11:19:45.839799: step: 172/463, loss: 0.1602082997560501 2023-01-22 11:19:46.397591: step: 174/463, loss: 0.0936887189745903 2023-01-22 11:19:47.019220: step: 176/463, loss: 0.10705532133579254 2023-01-22 11:19:47.641822: step: 178/463, loss: 0.04392546787858009 2023-01-22 11:19:48.287681: step: 180/463, loss: 0.00891173630952835 2023-01-22 11:19:48.960681: step: 182/463, loss: 0.08424145728349686 2023-01-22 11:19:49.643293: step: 184/463, loss: 0.025389881804585457 2023-01-22 11:19:50.247967: step: 186/463, loss: 0.06496134400367737 2023-01-22 11:19:50.899331: step: 188/463, loss: 0.028190920129418373 2023-01-22 11:19:51.556475: step: 190/463, loss: 0.0437450185418129 2023-01-22 11:19:52.174532: step: 192/463, loss: 0.07979384809732437 2023-01-22 11:19:52.773448: step: 194/463, loss: 0.01194043830037117 2023-01-22 11:19:53.425671: step: 196/463, loss: 0.07126058638095856 2023-01-22 11:19:54.018624: step: 198/463, loss: 0.31532159447669983 2023-01-22 11:19:54.632891: step: 200/463, loss: 0.04363734647631645 2023-01-22 11:19:55.182387: step: 202/463, loss: 0.011161845177412033 2023-01-22 11:19:55.834253: step: 204/463, loss: 0.18528276681900024 2023-01-22 11:19:56.459182: step: 206/463, loss: 0.17572219669818878 2023-01-22 11:19:57.093395: step: 208/463, loss: 0.04270785301923752 2023-01-22 11:19:57.672215: step: 210/463, loss: 0.3265828788280487 2023-01-22 11:19:58.340247: step: 212/463, loss: 0.24936145544052124 2023-01-22 11:19:58.959645: step: 214/463, loss: 0.0223789531737566 2023-01-22 11:19:59.563128: step: 216/463, loss: 0.10817316919565201 2023-01-22 11:20:00.197460: step: 218/463, loss: 0.05690561607480049 2023-01-22 11:20:00.882233: step: 220/463, loss: 0.16372817754745483 2023-01-22 11:20:01.426437: step: 222/463, loss: 0.01704958640038967 2023-01-22 11:20:02.126852: step: 224/463, loss: 0.03656533360481262 2023-01-22 11:20:02.716564: step: 226/463, loss: 0.05308743566274643 2023-01-22 11:20:03.355053: step: 228/463, loss: 0.1447989046573639 2023-01-22 11:20:03.918358: step: 230/463, loss: 0.015387726947665215 2023-01-22 11:20:04.482360: step: 232/463, loss: 0.032462846487760544 2023-01-22 11:20:05.169056: step: 234/463, loss: 0.05053481459617615 2023-01-22 11:20:05.842200: step: 236/463, loss: 0.03542039170861244 2023-01-22 11:20:06.435392: step: 238/463, loss: 0.0714607983827591 2023-01-22 11:20:07.023642: step: 240/463, loss: 0.17304441332817078 2023-01-22 11:20:07.634530: step: 242/463, loss: 0.028681520372629166 2023-01-22 11:20:08.206617: step: 244/463, loss: 0.23255522549152374 2023-01-22 11:20:08.834813: step: 246/463, loss: 0.11861496418714523 2023-01-22 11:20:09.405580: step: 248/463, loss: 0.0869048461318016 2023-01-22 11:20:10.067286: step: 250/463, loss: 0.062077596783638 2023-01-22 11:20:10.703527: step: 252/463, loss: 0.12153871357440948 2023-01-22 11:20:11.298138: step: 254/463, loss: 0.12055100500583649 2023-01-22 11:20:11.828569: step: 256/463, loss: 0.038677528500556946 2023-01-22 11:20:12.480593: step: 258/463, loss: 0.28849631547927856 2023-01-22 11:20:13.083285: step: 260/463, loss: 0.053183041512966156 2023-01-22 11:20:13.675036: step: 262/463, loss: 0.11701661348342896 2023-01-22 11:20:14.317636: step: 264/463, loss: 0.15511856973171234 2023-01-22 11:20:14.946355: step: 266/463, loss: 0.03338862583041191 2023-01-22 11:20:15.598424: step: 268/463, loss: 0.271430104970932 2023-01-22 11:20:16.199461: step: 270/463, loss: 0.1402658075094223 2023-01-22 11:20:16.832018: step: 272/463, loss: 0.05355101078748703 2023-01-22 11:20:17.481320: step: 274/463, loss: 0.05734524503350258 2023-01-22 11:20:18.098928: step: 276/463, loss: 0.11237657070159912 2023-01-22 11:20:18.737690: step: 278/463, loss: 0.025945687666535378 2023-01-22 11:20:19.335625: step: 280/463, loss: 0.2567163109779358 2023-01-22 11:20:19.943555: step: 282/463, loss: 0.053242068737745285 2023-01-22 11:20:20.547469: step: 284/463, loss: 0.03981756046414375 2023-01-22 11:20:21.144225: step: 286/463, loss: 0.07972758263349533 2023-01-22 11:20:21.734600: step: 288/463, loss: 0.12114940583705902 2023-01-22 11:20:22.349681: step: 290/463, loss: 0.06699173897504807 2023-01-22 11:20:22.940273: step: 292/463, loss: 0.18497785925865173 2023-01-22 11:20:23.523167: step: 294/463, loss: 0.08582198619842529 2023-01-22 11:20:24.133884: step: 296/463, loss: 0.1405123770236969 2023-01-22 11:20:24.765457: step: 298/463, loss: 0.12506988644599915 2023-01-22 11:20:25.400770: step: 300/463, loss: 0.0543283186852932 2023-01-22 11:20:26.005829: step: 302/463, loss: 0.0586734302341938 2023-01-22 11:20:26.605915: step: 304/463, loss: 0.08253441005945206 2023-01-22 11:20:27.246297: step: 306/463, loss: 0.05644260719418526 2023-01-22 11:20:27.844734: step: 308/463, loss: 0.08561031520366669 2023-01-22 11:20:28.516186: step: 310/463, loss: 0.045978374779224396 2023-01-22 11:20:29.121029: step: 312/463, loss: 0.06379242986440659 2023-01-22 11:20:29.779959: step: 314/463, loss: 0.07658006250858307 2023-01-22 11:20:30.315098: step: 316/463, loss: 0.03514092415571213 2023-01-22 11:20:30.931357: step: 318/463, loss: 0.01809585653245449 2023-01-22 11:20:31.549366: step: 320/463, loss: 0.04561099037528038 2023-01-22 11:20:32.115133: step: 322/463, loss: 0.09169846028089523 2023-01-22 11:20:32.695499: step: 324/463, loss: 0.03276433050632477 2023-01-22 11:20:33.383285: step: 326/463, loss: 0.02641312964260578 2023-01-22 11:20:34.002030: step: 328/463, loss: 0.012321699410676956 2023-01-22 11:20:34.602879: step: 330/463, loss: 0.04649710655212402 2023-01-22 11:20:35.212084: step: 332/463, loss: 0.13455890119075775 2023-01-22 11:20:35.835602: step: 334/463, loss: 0.29181528091430664 2023-01-22 11:20:36.444416: step: 336/463, loss: 0.4780136048793793 2023-01-22 11:20:37.157072: step: 338/463, loss: 0.04577526077628136 2023-01-22 11:20:37.839626: step: 340/463, loss: 0.025612493976950645 2023-01-22 11:20:38.434761: step: 342/463, loss: 0.6556715369224548 2023-01-22 11:20:39.044858: step: 344/463, loss: 0.06692571938037872 2023-01-22 11:20:39.717226: step: 346/463, loss: 0.08582320809364319 2023-01-22 11:20:40.336085: step: 348/463, loss: 0.034909769892692566 2023-01-22 11:20:40.993460: step: 350/463, loss: 0.059673845767974854 2023-01-22 11:20:41.568771: step: 352/463, loss: 0.037033919245004654 2023-01-22 11:20:42.127855: step: 354/463, loss: 0.06435783207416534 2023-01-22 11:20:42.730981: step: 356/463, loss: 0.07478026300668716 2023-01-22 11:20:43.358769: step: 358/463, loss: 0.4015183746814728 2023-01-22 11:20:44.038065: step: 360/463, loss: 0.10015476495027542 2023-01-22 11:20:44.656710: step: 362/463, loss: 0.3877939283847809 2023-01-22 11:20:45.251725: step: 364/463, loss: 0.13038840889930725 2023-01-22 11:20:45.886856: step: 366/463, loss: 0.01645221933722496 2023-01-22 11:20:46.528092: step: 368/463, loss: 0.013122377917170525 2023-01-22 11:20:47.146309: step: 370/463, loss: 0.058484334498643875 2023-01-22 11:20:47.747428: step: 372/463, loss: 0.036616064608097076 2023-01-22 11:20:48.364287: step: 374/463, loss: 0.019679535180330276 2023-01-22 11:20:48.948370: step: 376/463, loss: 0.10396323353052139 2023-01-22 11:20:49.521364: step: 378/463, loss: 0.07096651941537857 2023-01-22 11:20:50.180528: step: 380/463, loss: 0.038530830293893814 2023-01-22 11:20:50.764711: step: 382/463, loss: 0.056489501148462296 2023-01-22 11:20:51.358876: step: 384/463, loss: 0.053779445588588715 2023-01-22 11:20:51.936696: step: 386/463, loss: 0.04143083095550537 2023-01-22 11:20:52.550127: step: 388/463, loss: 0.05594902113080025 2023-01-22 11:20:53.190419: step: 390/463, loss: 0.0597904734313488 2023-01-22 11:20:53.800189: step: 392/463, loss: 0.16412444412708282 2023-01-22 11:20:54.451176: step: 394/463, loss: 0.22092363238334656 2023-01-22 11:20:55.091714: step: 396/463, loss: 0.1443624645471573 2023-01-22 11:20:55.769126: step: 398/463, loss: 0.15668828785419464 2023-01-22 11:20:56.392476: step: 400/463, loss: 0.09394413232803345 2023-01-22 11:20:57.005332: step: 402/463, loss: 0.2287728488445282 2023-01-22 11:20:57.681917: step: 404/463, loss: 1.1928030252456665 2023-01-22 11:20:58.277892: step: 406/463, loss: 0.03721761330962181 2023-01-22 11:20:58.879572: step: 408/463, loss: 0.14985324442386627 2023-01-22 11:20:59.484879: step: 410/463, loss: 0.06386313587427139 2023-01-22 11:21:00.023939: step: 412/463, loss: 0.038746338337659836 2023-01-22 11:21:00.614012: step: 414/463, loss: 0.04794111102819443 2023-01-22 11:21:01.242785: step: 416/463, loss: 0.03729976713657379 2023-01-22 11:21:01.812791: step: 418/463, loss: 0.12474322319030762 2023-01-22 11:21:02.462444: step: 420/463, loss: 0.1808999478816986 2023-01-22 11:21:03.079142: step: 422/463, loss: 0.01970197632908821 2023-01-22 11:21:03.671126: step: 424/463, loss: 0.2853885889053345 2023-01-22 11:21:04.256994: step: 426/463, loss: 0.6189596056938171 2023-01-22 11:21:04.839831: step: 428/463, loss: 0.03852459043264389 2023-01-22 11:21:05.394360: step: 430/463, loss: 0.02537391521036625 2023-01-22 11:21:05.988201: step: 432/463, loss: 0.04569804668426514 2023-01-22 11:21:06.638203: step: 434/463, loss: 0.12830261886119843 2023-01-22 11:21:07.318672: step: 436/463, loss: 0.3350285291671753 2023-01-22 11:21:07.968987: step: 438/463, loss: 0.1817103922367096 2023-01-22 11:21:08.716701: step: 440/463, loss: 0.08359193801879883 2023-01-22 11:21:09.245687: step: 442/463, loss: 0.13854703307151794 2023-01-22 11:21:09.880716: step: 444/463, loss: 0.11180835217237473 2023-01-22 11:21:10.404991: step: 446/463, loss: 0.0782872810959816 2023-01-22 11:21:10.981660: step: 448/463, loss: 0.22110621631145477 2023-01-22 11:21:11.592720: step: 450/463, loss: 0.06191756948828697 2023-01-22 11:21:12.253602: step: 452/463, loss: 0.2135303020477295 2023-01-22 11:21:12.884893: step: 454/463, loss: 0.033807553350925446 2023-01-22 11:21:13.521445: step: 456/463, loss: 0.25119462609291077 2023-01-22 11:21:14.149504: step: 458/463, loss: 0.05710972100496292 2023-01-22 11:21:14.701146: step: 460/463, loss: 0.08755708485841751 2023-01-22 11:21:15.296764: step: 462/463, loss: 0.07173552364110947 2023-01-22 11:21:15.969599: step: 464/463, loss: 0.1192893236875534 2023-01-22 11:21:16.570245: step: 466/463, loss: 1.0184739828109741 2023-01-22 11:21:17.189497: step: 468/463, loss: 0.31705155968666077 2023-01-22 11:21:17.761133: step: 470/463, loss: 0.017777632921934128 2023-01-22 11:21:18.410676: step: 472/463, loss: 0.06604321300983429 2023-01-22 11:21:18.984487: step: 474/463, loss: 0.062269970774650574 2023-01-22 11:21:19.529222: step: 476/463, loss: 0.07097317278385162 2023-01-22 11:21:20.196174: step: 478/463, loss: 0.06228772923350334 2023-01-22 11:21:20.757297: step: 480/463, loss: 0.8452541828155518 2023-01-22 11:21:21.402249: step: 482/463, loss: 0.18494856357574463 2023-01-22 11:21:22.002381: step: 484/463, loss: 0.08495352417230606 2023-01-22 11:21:22.519798: step: 486/463, loss: 0.037212520837783813 2023-01-22 11:21:23.175539: step: 488/463, loss: 0.04034935683012009 2023-01-22 11:21:23.863980: step: 490/463, loss: 0.04735064506530762 2023-01-22 11:21:24.472931: step: 492/463, loss: 0.0665123462677002 2023-01-22 11:21:25.132989: step: 494/463, loss: 0.04500744864344597 2023-01-22 11:21:25.698089: step: 496/463, loss: 0.08867621421813965 2023-01-22 11:21:26.280913: step: 498/463, loss: 0.09523636102676392 2023-01-22 11:21:26.878955: step: 500/463, loss: 0.128495991230011 2023-01-22 11:21:27.421560: step: 502/463, loss: 0.26585331559181213 2023-01-22 11:21:27.991285: step: 504/463, loss: 0.09859692305326462 2023-01-22 11:21:28.576633: step: 506/463, loss: 0.05489655211567879 2023-01-22 11:21:29.193398: step: 508/463, loss: 0.07878757268190384 2023-01-22 11:21:29.923548: step: 510/463, loss: 0.09481282532215118 2023-01-22 11:21:30.561851: step: 512/463, loss: 0.14488010108470917 2023-01-22 11:21:31.121098: step: 514/463, loss: 0.22927995026111603 2023-01-22 11:21:31.764955: step: 516/463, loss: 0.03839798644185066 2023-01-22 11:21:32.437668: step: 518/463, loss: 0.08230244368314743 2023-01-22 11:21:33.054900: step: 520/463, loss: 0.03852389380335808 2023-01-22 11:21:33.683046: step: 522/463, loss: 0.07344438135623932 2023-01-22 11:21:34.306134: step: 524/463, loss: 0.13041947782039642 2023-01-22 11:21:34.950293: step: 526/463, loss: 0.08121833950281143 2023-01-22 11:21:35.575877: step: 528/463, loss: 0.0040967268869280815 2023-01-22 11:21:36.207698: step: 530/463, loss: 0.04251902177929878 2023-01-22 11:21:36.780197: step: 532/463, loss: 0.03161359205842018 2023-01-22 11:21:37.353398: step: 534/463, loss: 0.02550979144871235 2023-01-22 11:21:37.962206: step: 536/463, loss: 0.044608063995838165 2023-01-22 11:21:38.537867: step: 538/463, loss: 0.12192875891923904 2023-01-22 11:21:39.154714: step: 540/463, loss: 0.06912586838006973 2023-01-22 11:21:39.728781: step: 542/463, loss: 0.2503063678741455 2023-01-22 11:21:40.346111: step: 544/463, loss: 0.041538435965776443 2023-01-22 11:21:40.928824: step: 546/463, loss: 0.010474284179508686 2023-01-22 11:21:41.601293: step: 548/463, loss: 0.03558509051799774 2023-01-22 11:21:42.187960: step: 550/463, loss: 0.16151836514472961 2023-01-22 11:21:42.821775: step: 552/463, loss: 0.11909962445497513 2023-01-22 11:21:43.488647: step: 554/463, loss: 0.08550700545310974 2023-01-22 11:21:44.129228: step: 556/463, loss: 0.08892244845628738 2023-01-22 11:21:44.728464: step: 558/463, loss: 0.020676758140325546 2023-01-22 11:21:45.274985: step: 560/463, loss: 0.0018308733124285936 2023-01-22 11:21:45.863614: step: 562/463, loss: 0.08395480364561081 2023-01-22 11:21:46.540980: step: 564/463, loss: 0.07625464349985123 2023-01-22 11:21:47.158466: step: 566/463, loss: 0.07002590596675873 2023-01-22 11:21:47.879867: step: 568/463, loss: 0.1519494354724884 2023-01-22 11:21:48.508297: step: 570/463, loss: 0.1494431048631668 2023-01-22 11:21:49.153582: step: 572/463, loss: 0.04557216167449951 2023-01-22 11:21:49.831527: step: 574/463, loss: 0.1067340224981308 2023-01-22 11:21:50.511078: step: 576/463, loss: 0.22042644023895264 2023-01-22 11:21:51.160285: step: 578/463, loss: 0.08238231390714645 2023-01-22 11:21:51.797290: step: 580/463, loss: 0.07242780178785324 2023-01-22 11:21:52.432315: step: 582/463, loss: 0.099951833486557 2023-01-22 11:21:53.086026: step: 584/463, loss: 0.04533222317695618 2023-01-22 11:21:53.663506: step: 586/463, loss: 0.030246000736951828 2023-01-22 11:21:54.255079: step: 588/463, loss: 0.771239697933197 2023-01-22 11:21:54.873666: step: 590/463, loss: 0.07724495977163315 2023-01-22 11:21:55.465989: step: 592/463, loss: 0.07560603320598602 2023-01-22 11:21:56.100982: step: 594/463, loss: 0.09971284121274948 2023-01-22 11:21:56.696069: step: 596/463, loss: 0.05480746924877167 2023-01-22 11:21:57.323492: step: 598/463, loss: 0.0808015763759613 2023-01-22 11:21:57.957228: step: 600/463, loss: 0.09976930916309357 2023-01-22 11:21:58.577217: step: 602/463, loss: 0.24950411915779114 2023-01-22 11:21:59.219610: step: 604/463, loss: 0.2291785180568695 2023-01-22 11:21:59.905532: step: 606/463, loss: 0.03308527544140816 2023-01-22 11:22:00.490755: step: 608/463, loss: 0.13219588994979858 2023-01-22 11:22:01.072767: step: 610/463, loss: 0.2507569491863251 2023-01-22 11:22:01.650546: step: 612/463, loss: 0.03770134225487709 2023-01-22 11:22:02.311759: step: 614/463, loss: 0.19599707424640656 2023-01-22 11:22:02.994726: step: 616/463, loss: 0.257695734500885 2023-01-22 11:22:03.654611: step: 618/463, loss: 0.2742365896701813 2023-01-22 11:22:04.287414: step: 620/463, loss: 0.08532330393791199 2023-01-22 11:22:04.832689: step: 622/463, loss: 0.02215675637125969 2023-01-22 11:22:05.521346: step: 624/463, loss: 0.3949394226074219 2023-01-22 11:22:06.324753: step: 626/463, loss: 0.06408332288265228 2023-01-22 11:22:06.977100: step: 628/463, loss: 0.05704617127776146 2023-01-22 11:22:07.580640: step: 630/463, loss: 0.36017370223999023 2023-01-22 11:22:08.208073: step: 632/463, loss: 0.0625026598572731 2023-01-22 11:22:08.954266: step: 634/463, loss: 0.20242901146411896 2023-01-22 11:22:09.539766: step: 636/463, loss: 0.07182762771844864 2023-01-22 11:22:10.191578: step: 638/463, loss: 0.01648804545402527 2023-01-22 11:22:10.820326: step: 640/463, loss: 0.03705969080328941 2023-01-22 11:22:11.471210: step: 642/463, loss: 0.09607403725385666 2023-01-22 11:22:12.116038: step: 644/463, loss: 0.08543264120817184 2023-01-22 11:22:12.731072: step: 646/463, loss: 0.03443874791264534 2023-01-22 11:22:13.366769: step: 648/463, loss: 0.04173902049660683 2023-01-22 11:22:13.929255: step: 650/463, loss: 0.10508611053228378 2023-01-22 11:22:14.623359: step: 652/463, loss: 0.04642002284526825 2023-01-22 11:22:15.280934: step: 654/463, loss: 0.023942647501826286 2023-01-22 11:22:15.896739: step: 656/463, loss: 0.17312268912792206 2023-01-22 11:22:16.506692: step: 658/463, loss: 0.058510906994342804 2023-01-22 11:22:17.039289: step: 660/463, loss: 0.09554650634527206 2023-01-22 11:22:17.686853: step: 662/463, loss: 0.10357148945331573 2023-01-22 11:22:18.357205: step: 664/463, loss: 0.10483022034168243 2023-01-22 11:22:18.965584: step: 666/463, loss: 0.04725139960646629 2023-01-22 11:22:19.606689: step: 668/463, loss: 0.08748379349708557 2023-01-22 11:22:20.190692: step: 670/463, loss: 0.02116716280579567 2023-01-22 11:22:20.778784: step: 672/463, loss: 0.04394834116101265 2023-01-22 11:22:21.407187: step: 674/463, loss: 0.2515764534473419 2023-01-22 11:22:22.042093: step: 676/463, loss: 0.050028614699840546 2023-01-22 11:22:22.693611: step: 678/463, loss: 0.12193179130554199 2023-01-22 11:22:23.367723: step: 680/463, loss: 0.08774694800376892 2023-01-22 11:22:23.988072: step: 682/463, loss: 0.2395859658718109 2023-01-22 11:22:24.595042: step: 684/463, loss: 0.20482489466667175 2023-01-22 11:22:25.226751: step: 686/463, loss: 0.0740254819393158 2023-01-22 11:22:25.895722: step: 688/463, loss: 0.1632409244775772 2023-01-22 11:22:26.523455: step: 690/463, loss: 0.05424309894442558 2023-01-22 11:22:27.148029: step: 692/463, loss: 0.14362530410289764 2023-01-22 11:22:27.785731: step: 694/463, loss: 0.06453726440668106 2023-01-22 11:22:28.386827: step: 696/463, loss: 0.08554147183895111 2023-01-22 11:22:29.029901: step: 698/463, loss: 0.0859757587313652 2023-01-22 11:22:29.626334: step: 700/463, loss: 0.2012406885623932 2023-01-22 11:22:30.227124: step: 702/463, loss: 0.16985918581485748 2023-01-22 11:22:30.856757: step: 704/463, loss: 0.10331309586763382 2023-01-22 11:22:31.463583: step: 706/463, loss: 0.07001883536577225 2023-01-22 11:22:32.101053: step: 708/463, loss: 0.3086925745010376 2023-01-22 11:22:32.743521: step: 710/463, loss: 0.06393751502037048 2023-01-22 11:22:33.313642: step: 712/463, loss: 0.02271709032356739 2023-01-22 11:22:33.955631: step: 714/463, loss: 0.046378519386053085 2023-01-22 11:22:34.563032: step: 716/463, loss: 0.1023581251502037 2023-01-22 11:22:35.179672: step: 718/463, loss: 0.5596896409988403 2023-01-22 11:22:35.752979: step: 720/463, loss: 0.4274919033050537 2023-01-22 11:22:36.369535: step: 722/463, loss: 0.0569419339299202 2023-01-22 11:22:36.959271: step: 724/463, loss: 0.057337820529937744 2023-01-22 11:22:37.658087: step: 726/463, loss: 0.06475400179624557 2023-01-22 11:22:38.238456: step: 728/463, loss: 0.22655194997787476 2023-01-22 11:22:38.831491: step: 730/463, loss: 0.043507952243089676 2023-01-22 11:22:39.424814: step: 732/463, loss: 0.07175178825855255 2023-01-22 11:22:40.114079: step: 734/463, loss: 0.06373856961727142 2023-01-22 11:22:40.707847: step: 736/463, loss: 0.2925908863544464 2023-01-22 11:22:41.368564: step: 738/463, loss: 0.05292638763785362 2023-01-22 11:22:42.036120: step: 740/463, loss: 0.03942211717367172 2023-01-22 11:22:42.681143: step: 742/463, loss: 0.04859014227986336 2023-01-22 11:22:43.289740: step: 744/463, loss: 0.02931671403348446 2023-01-22 11:22:43.915795: step: 746/463, loss: 0.08924669772386551 2023-01-22 11:22:44.534814: step: 748/463, loss: 0.04821545630693436 2023-01-22 11:22:45.129572: step: 750/463, loss: 0.09331602603197098 2023-01-22 11:22:45.784670: step: 752/463, loss: 0.06827334314584732 2023-01-22 11:22:46.376467: step: 754/463, loss: 0.2204732447862625 2023-01-22 11:22:47.025206: step: 756/463, loss: 0.03531024977564812 2023-01-22 11:22:47.605071: step: 758/463, loss: 0.006756413262337446 2023-01-22 11:22:48.201672: step: 760/463, loss: 0.009446326643228531 2023-01-22 11:22:48.795781: step: 762/463, loss: 0.027397599071264267 2023-01-22 11:22:49.396047: step: 764/463, loss: 0.12816797196865082 2023-01-22 11:22:50.007814: step: 766/463, loss: 0.0505727082490921 2023-01-22 11:22:50.717801: step: 768/463, loss: 0.174888014793396 2023-01-22 11:22:51.326959: step: 770/463, loss: 0.07031068950891495 2023-01-22 11:22:51.905580: step: 772/463, loss: 0.06723949313163757 2023-01-22 11:22:52.510993: step: 774/463, loss: 0.05044790729880333 2023-01-22 11:22:53.072536: step: 776/463, loss: 0.00863361544907093 2023-01-22 11:22:53.695253: step: 778/463, loss: 0.028967279940843582 2023-01-22 11:22:54.329240: step: 780/463, loss: 0.040094662457704544 2023-01-22 11:22:54.976633: step: 782/463, loss: 0.1270979940891266 2023-01-22 11:22:55.574967: step: 784/463, loss: 0.03343029320240021 2023-01-22 11:22:56.126151: step: 786/463, loss: 0.0570879690349102 2023-01-22 11:22:56.758533: step: 788/463, loss: 0.10782936215400696 2023-01-22 11:22:57.378767: step: 790/463, loss: 0.4224686324596405 2023-01-22 11:22:57.977650: step: 792/463, loss: 0.025654686614871025 2023-01-22 11:22:58.563120: step: 794/463, loss: 0.12931935489177704 2023-01-22 11:22:59.146140: step: 796/463, loss: 0.6991729736328125 2023-01-22 11:22:59.765127: step: 798/463, loss: 0.055386822670698166 2023-01-22 11:23:00.386460: step: 800/463, loss: 0.0575665645301342 2023-01-22 11:23:00.996295: step: 802/463, loss: 0.1003890335559845 2023-01-22 11:23:01.589015: step: 804/463, loss: 0.04175858199596405 2023-01-22 11:23:02.127995: step: 806/463, loss: 0.05459296330809593 2023-01-22 11:23:02.813611: step: 808/463, loss: 0.08588755875825882 2023-01-22 11:23:03.467465: step: 810/463, loss: 0.09460239112377167 2023-01-22 11:23:04.020791: step: 812/463, loss: 0.0710568055510521 2023-01-22 11:23:04.718098: step: 814/463, loss: 0.06560572236776352 2023-01-22 11:23:05.350793: step: 816/463, loss: 0.04303162544965744 2023-01-22 11:23:05.969623: step: 818/463, loss: 0.06561961024999619 2023-01-22 11:23:06.633210: step: 820/463, loss: 0.05641253665089607 2023-01-22 11:23:07.255414: step: 822/463, loss: 0.09744884818792343 2023-01-22 11:23:07.886875: step: 824/463, loss: 0.035097185522317886 2023-01-22 11:23:08.514378: step: 826/463, loss: 0.14613856375217438 2023-01-22 11:23:09.155359: step: 828/463, loss: 0.1462436467409134 2023-01-22 11:23:09.788330: step: 830/463, loss: 0.06405829638242722 2023-01-22 11:23:10.417718: step: 832/463, loss: 0.07300765812397003 2023-01-22 11:23:10.976036: step: 834/463, loss: 0.07874969393014908 2023-01-22 11:23:11.563494: step: 836/463, loss: 0.21054033935070038 2023-01-22 11:23:12.216369: step: 838/463, loss: 0.08521483093500137 2023-01-22 11:23:12.831691: step: 840/463, loss: 0.05369347706437111 2023-01-22 11:23:13.477839: step: 842/463, loss: 0.075861357152462 2023-01-22 11:23:14.075533: step: 844/463, loss: 0.10476447641849518 2023-01-22 11:23:14.651991: step: 846/463, loss: 0.10134749859571457 2023-01-22 11:23:15.318243: step: 848/463, loss: 0.01853303797543049 2023-01-22 11:23:15.886917: step: 850/463, loss: 0.09675605595111847 2023-01-22 11:23:16.500621: step: 852/463, loss: 0.04180556535720825 2023-01-22 11:23:17.052860: step: 854/463, loss: 0.016176365315914154 2023-01-22 11:23:17.709073: step: 856/463, loss: 0.07607618719339371 2023-01-22 11:23:18.336967: step: 858/463, loss: 0.02337942086160183 2023-01-22 11:23:18.950189: step: 860/463, loss: 0.055050596594810486 2023-01-22 11:23:19.541345: step: 862/463, loss: 0.20243799686431885 2023-01-22 11:23:20.132606: step: 864/463, loss: 0.10927688330411911 2023-01-22 11:23:20.732243: step: 866/463, loss: 0.09429860860109329 2023-01-22 11:23:21.281774: step: 868/463, loss: 0.05700754374265671 2023-01-22 11:23:21.872888: step: 870/463, loss: 0.13355594873428345 2023-01-22 11:23:22.467307: step: 872/463, loss: 0.03869052976369858 2023-01-22 11:23:23.131492: step: 874/463, loss: 0.04819696396589279 2023-01-22 11:23:23.808161: step: 876/463, loss: 0.07907703518867493 2023-01-22 11:23:24.496843: step: 878/463, loss: 0.31548523902893066 2023-01-22 11:23:25.110991: step: 880/463, loss: 0.17865057289600372 2023-01-22 11:23:25.773010: step: 882/463, loss: 0.20975464582443237 2023-01-22 11:23:26.493691: step: 884/463, loss: 0.041771065443754196 2023-01-22 11:23:27.094698: step: 886/463, loss: 0.10529671609401703 2023-01-22 11:23:27.740696: step: 888/463, loss: 0.10200495272874832 2023-01-22 11:23:28.370342: step: 890/463, loss: 0.12388349324464798 2023-01-22 11:23:28.954717: step: 892/463, loss: 0.1299099624156952 2023-01-22 11:23:29.551095: step: 894/463, loss: 0.03455130010843277 2023-01-22 11:23:30.135257: step: 896/463, loss: 0.17462481558322906 2023-01-22 11:23:30.772925: step: 898/463, loss: 0.06822777539491653 2023-01-22 11:23:31.386169: step: 900/463, loss: 0.08120286464691162 2023-01-22 11:23:32.076511: step: 902/463, loss: 0.12477584928274155 2023-01-22 11:23:32.761233: step: 904/463, loss: 0.031804341822862625 2023-01-22 11:23:33.396421: step: 906/463, loss: 0.14419203996658325 2023-01-22 11:23:33.990855: step: 908/463, loss: 0.08351092785596848 2023-01-22 11:23:34.640422: step: 910/463, loss: 0.1912321001291275 2023-01-22 11:23:35.201918: step: 912/463, loss: 0.03968960419297218 2023-01-22 11:23:35.748449: step: 914/463, loss: 0.04105373099446297 2023-01-22 11:23:36.335998: step: 916/463, loss: 0.22343170642852783 2023-01-22 11:23:36.932447: step: 918/463, loss: 0.08241358399391174 2023-01-22 11:23:37.513284: step: 920/463, loss: 0.058043330907821655 2023-01-22 11:23:38.088027: step: 922/463, loss: 0.008563480339944363 2023-01-22 11:23:38.704175: step: 924/463, loss: 0.03655325248837471 2023-01-22 11:23:39.322237: step: 926/463, loss: 0.07631516456604004 ================================================== Loss: 0.117 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3165311338289963, 'r': 0.32313804554079695, 'f1': 0.31980046948356805}, 'combined': 0.23564245119841856, 'epoch': 18} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3411376476053596, 'r': 0.38942367493185354, 'f1': 0.36368494276897567}, 'combined': 0.28189933362954095, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28958662974683547, 'r': 0.34728415559772297, 'f1': 0.31582182916307167}, 'combined': 0.2327108214885791, 'epoch': 18} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3284778070802762, 'r': 0.4009361468773959, 'f1': 0.3611080528167275}, 'combined': 0.2799019356761237, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.3142857142857143, 'f1': 0.27848101265822783}, 'combined': 0.18565400843881855, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3106060606060606, 'r': 0.44565217391304346, 'f1': 0.3660714285714286}, 'combined': 0.1830357142857143, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:26:27.866715: step: 2/463, loss: 0.09843173623085022 2023-01-22 11:26:28.477891: step: 4/463, loss: 0.10880041122436523 2023-01-22 11:26:29.160394: step: 6/463, loss: 0.08595127612352371 2023-01-22 11:26:29.765468: step: 8/463, loss: 0.017172206193208694 2023-01-22 11:26:30.434375: step: 10/463, loss: 0.047578807920217514 2023-01-22 11:26:31.095681: step: 12/463, loss: 0.021150914952158928 2023-01-22 11:26:31.741320: step: 14/463, loss: 0.056047938764095306 2023-01-22 11:26:32.446173: step: 16/463, loss: 0.1916477531194687 2023-01-22 11:26:33.038735: step: 18/463, loss: 0.0209858026355505 2023-01-22 11:26:33.620600: step: 20/463, loss: 0.07440309226512909 2023-01-22 11:26:34.259772: step: 22/463, loss: 0.13617275655269623 2023-01-22 11:26:34.865451: step: 24/463, loss: 0.3860137462615967 2023-01-22 11:26:35.514851: step: 26/463, loss: 0.01971534825861454 2023-01-22 11:26:36.107747: step: 28/463, loss: 0.10272450745105743 2023-01-22 11:26:36.759252: step: 30/463, loss: 0.020555952563881874 2023-01-22 11:26:37.447715: step: 32/463, loss: 0.05352829024195671 2023-01-22 11:26:38.067378: step: 34/463, loss: 0.040844038128852844 2023-01-22 11:26:38.705990: step: 36/463, loss: 0.2471899688243866 2023-01-22 11:26:39.290325: step: 38/463, loss: 0.11653846502304077 2023-01-22 11:26:39.903389: step: 40/463, loss: 0.07643802464008331 2023-01-22 11:26:40.540685: step: 42/463, loss: 0.04667147621512413 2023-01-22 11:26:41.174671: step: 44/463, loss: 0.20017176866531372 2023-01-22 11:26:41.720491: step: 46/463, loss: 0.031188489869236946 2023-01-22 11:26:42.363266: step: 48/463, loss: 0.08504738658666611 2023-01-22 11:26:43.039789: step: 50/463, loss: 0.17975234985351562 2023-01-22 11:26:43.644122: step: 52/463, loss: 0.09627849608659744 2023-01-22 11:26:44.299501: step: 54/463, loss: 0.03888966515660286 2023-01-22 11:26:44.904470: step: 56/463, loss: 0.023116713389754295 2023-01-22 11:26:45.530630: step: 58/463, loss: 0.45193710923194885 2023-01-22 11:26:46.134533: step: 60/463, loss: 0.11137518286705017 2023-01-22 11:26:46.725057: step: 62/463, loss: 0.015942148864269257 2023-01-22 11:26:47.333078: step: 64/463, loss: 0.31105291843414307 2023-01-22 11:26:47.934929: step: 66/463, loss: 0.04012041166424751 2023-01-22 11:26:48.495213: step: 68/463, loss: 1.0168992280960083 2023-01-22 11:26:49.087695: step: 70/463, loss: 0.043375808745622635 2023-01-22 11:26:49.700877: step: 72/463, loss: 0.15089629590511322 2023-01-22 11:26:50.311986: step: 74/463, loss: 0.08969930559396744 2023-01-22 11:26:50.947385: step: 76/463, loss: 0.029811233282089233 2023-01-22 11:26:51.530970: step: 78/463, loss: 0.09008944779634476 2023-01-22 11:26:52.121559: step: 80/463, loss: 0.24686199426651 2023-01-22 11:26:52.693739: step: 82/463, loss: 0.06390325725078583 2023-01-22 11:26:53.272978: step: 84/463, loss: 0.0450165830552578 2023-01-22 11:26:53.849843: step: 86/463, loss: 0.037968046963214874 2023-01-22 11:26:54.493800: step: 88/463, loss: 0.02627360075712204 2023-01-22 11:26:55.090193: step: 90/463, loss: 0.07466509938240051 2023-01-22 11:26:55.700229: step: 92/463, loss: 0.1126580536365509 2023-01-22 11:26:56.293602: step: 94/463, loss: 0.1909865438938141 2023-01-22 11:26:56.928724: step: 96/463, loss: 0.16507361829280853 2023-01-22 11:26:57.548625: step: 98/463, loss: 0.04729074612259865 2023-01-22 11:26:58.309731: step: 100/463, loss: 0.06562110781669617 2023-01-22 11:26:58.912725: step: 102/463, loss: 0.023107284680008888 2023-01-22 11:26:59.525778: step: 104/463, loss: 0.017375657334923744 2023-01-22 11:27:00.115469: step: 106/463, loss: 0.04252321645617485 2023-01-22 11:27:00.777936: step: 108/463, loss: 0.12977533042430878 2023-01-22 11:27:01.324910: step: 110/463, loss: 0.27735015749931335 2023-01-22 11:27:01.964060: step: 112/463, loss: 0.24240757524967194 2023-01-22 11:27:02.562261: step: 114/463, loss: 0.04336678236722946 2023-01-22 11:27:03.154564: step: 116/463, loss: 0.02648986130952835 2023-01-22 11:27:03.760024: step: 118/463, loss: 0.0028780458960682154 2023-01-22 11:27:04.344426: step: 120/463, loss: 0.3205176889896393 2023-01-22 11:27:04.975605: step: 122/463, loss: 0.07898540049791336 2023-01-22 11:27:05.543773: step: 124/463, loss: 0.13012634217739105 2023-01-22 11:27:06.177947: step: 126/463, loss: 0.05570770427584648 2023-01-22 11:27:06.785261: step: 128/463, loss: 0.13161610066890717 2023-01-22 11:27:07.343538: step: 130/463, loss: 0.024745089933276176 2023-01-22 11:27:08.047998: step: 132/463, loss: 0.07149546593427658 2023-01-22 11:27:08.663622: step: 134/463, loss: 0.02650449052453041 2023-01-22 11:27:09.274911: step: 136/463, loss: 0.057667359709739685 2023-01-22 11:27:09.791243: step: 138/463, loss: 0.07903765141963959 2023-01-22 11:27:10.406537: step: 140/463, loss: 0.03494666889309883 2023-01-22 11:27:11.119385: step: 142/463, loss: 0.017760442569851875 2023-01-22 11:27:11.722068: step: 144/463, loss: 0.01631774567067623 2023-01-22 11:27:12.341754: step: 146/463, loss: 0.32027414441108704 2023-01-22 11:27:12.932174: step: 148/463, loss: 0.12576505541801453 2023-01-22 11:27:13.499262: step: 150/463, loss: 0.011223982088267803 2023-01-22 11:27:14.069856: step: 152/463, loss: 0.07385571300983429 2023-01-22 11:27:14.656191: step: 154/463, loss: 0.07310042530298233 2023-01-22 11:27:15.286826: step: 156/463, loss: 0.021011769771575928 2023-01-22 11:27:15.931569: step: 158/463, loss: 0.5341897010803223 2023-01-22 11:27:16.535732: step: 160/463, loss: 0.02520231157541275 2023-01-22 11:27:17.151294: step: 162/463, loss: 0.03399830311536789 2023-01-22 11:27:17.789536: step: 164/463, loss: 0.030097810551524162 2023-01-22 11:27:18.403688: step: 166/463, loss: 0.05835844203829765 2023-01-22 11:27:19.030139: step: 168/463, loss: 0.0858881026506424 2023-01-22 11:27:19.665848: step: 170/463, loss: 0.026198573410511017 2023-01-22 11:27:20.301739: step: 172/463, loss: 0.01906190626323223 2023-01-22 11:27:20.917962: step: 174/463, loss: 0.05735083296895027 2023-01-22 11:27:21.538857: step: 176/463, loss: 0.1060795858502388 2023-01-22 11:27:22.068055: step: 178/463, loss: 0.007871311157941818 2023-01-22 11:27:22.680267: step: 180/463, loss: 0.0605691596865654 2023-01-22 11:27:23.358862: step: 182/463, loss: 0.06482990831136703 2023-01-22 11:27:24.008597: step: 184/463, loss: 0.03421315550804138 2023-01-22 11:27:24.554715: step: 186/463, loss: 0.07118727266788483 2023-01-22 11:27:25.153911: step: 188/463, loss: 0.09793812036514282 2023-01-22 11:27:25.782446: step: 190/463, loss: 0.05160688981413841 2023-01-22 11:27:26.400536: step: 192/463, loss: 0.03019959107041359 2023-01-22 11:27:26.995526: step: 194/463, loss: 0.014281491748988628 2023-01-22 11:27:27.581435: step: 196/463, loss: 0.020693454891443253 2023-01-22 11:27:28.239727: step: 198/463, loss: 0.17934651672840118 2023-01-22 11:27:28.841824: step: 200/463, loss: 0.0876389816403389 2023-01-22 11:27:29.488774: step: 202/463, loss: 0.1064068153500557 2023-01-22 11:27:30.122611: step: 204/463, loss: 0.2992803156375885 2023-01-22 11:27:30.733973: step: 206/463, loss: 0.03400499001145363 2023-01-22 11:27:31.296731: step: 208/463, loss: 0.017595138400793076 2023-01-22 11:27:32.022372: step: 210/463, loss: 0.041268959641456604 2023-01-22 11:27:32.647577: step: 212/463, loss: 0.00404773373156786 2023-01-22 11:27:33.281981: step: 214/463, loss: 0.08111508190631866 2023-01-22 11:27:33.805054: step: 216/463, loss: 0.12711355090141296 2023-01-22 11:27:34.408585: step: 218/463, loss: 0.08298701792955399 2023-01-22 11:27:35.018740: step: 220/463, loss: 0.13042567670345306 2023-01-22 11:27:35.631566: step: 222/463, loss: 0.05210493877530098 2023-01-22 11:27:36.200408: step: 224/463, loss: 0.035714223980903625 2023-01-22 11:27:36.840222: step: 226/463, loss: 0.04237968847155571 2023-01-22 11:27:37.496045: step: 228/463, loss: 0.04566330462694168 2023-01-22 11:27:38.049516: step: 230/463, loss: 0.13896937668323517 2023-01-22 11:27:38.659158: step: 232/463, loss: 0.0792575404047966 2023-01-22 11:27:39.295038: step: 234/463, loss: 0.13525839149951935 2023-01-22 11:27:39.911338: step: 236/463, loss: 0.061421819031238556 2023-01-22 11:27:40.525870: step: 238/463, loss: 0.025934318080544472 2023-01-22 11:27:41.102309: step: 240/463, loss: 0.08406510204076767 2023-01-22 11:27:41.692180: step: 242/463, loss: 0.01069964561611414 2023-01-22 11:27:42.297308: step: 244/463, loss: 0.19616185128688812 2023-01-22 11:27:42.946939: step: 246/463, loss: 0.06312242895364761 2023-01-22 11:27:43.621021: step: 248/463, loss: 0.04364520311355591 2023-01-22 11:27:44.246766: step: 250/463, loss: 0.0789756253361702 2023-01-22 11:27:44.844991: step: 252/463, loss: 0.8129016757011414 2023-01-22 11:27:45.480337: step: 254/463, loss: 0.34075844287872314 2023-01-22 11:27:46.146239: step: 256/463, loss: 0.032831981778144836 2023-01-22 11:27:46.780921: step: 258/463, loss: 0.06537775695323944 2023-01-22 11:27:47.425829: step: 260/463, loss: 0.014605416916310787 2023-01-22 11:27:48.003315: step: 262/463, loss: 0.20081882178783417 2023-01-22 11:27:48.679921: step: 264/463, loss: 0.06995569914579391 2023-01-22 11:27:49.337708: step: 266/463, loss: 0.05390030890703201 2023-01-22 11:27:49.938453: step: 268/463, loss: 0.03299311175942421 2023-01-22 11:27:50.610077: step: 270/463, loss: 0.2626551389694214 2023-01-22 11:27:51.301165: step: 272/463, loss: 0.04602523520588875 2023-01-22 11:27:51.886217: step: 274/463, loss: 0.19189557433128357 2023-01-22 11:27:52.483737: step: 276/463, loss: 0.0708315297961235 2023-01-22 11:27:53.173406: step: 278/463, loss: 0.07911559194326401 2023-01-22 11:27:53.790907: step: 280/463, loss: 0.07578671723604202 2023-01-22 11:27:54.403725: step: 282/463, loss: 0.07183322310447693 2023-01-22 11:27:55.076430: step: 284/463, loss: 0.015800967812538147 2023-01-22 11:27:55.695964: step: 286/463, loss: 0.021861545741558075 2023-01-22 11:27:56.304479: step: 288/463, loss: 0.0278567336499691 2023-01-22 11:27:57.053281: step: 290/463, loss: 0.061614371836185455 2023-01-22 11:27:57.642769: step: 292/463, loss: 0.06986402720212936 2023-01-22 11:27:58.268910: step: 294/463, loss: 0.08326896280050278 2023-01-22 11:27:58.890673: step: 296/463, loss: 0.09929545968770981 2023-01-22 11:27:59.541355: step: 298/463, loss: 0.012018418870866299 2023-01-22 11:28:00.149846: step: 300/463, loss: 0.05839642137289047 2023-01-22 11:28:00.796083: step: 302/463, loss: 0.09725894033908844 2023-01-22 11:28:01.456516: step: 304/463, loss: 0.04040858522057533 2023-01-22 11:28:02.031450: step: 306/463, loss: 0.045502495020627975 2023-01-22 11:28:02.747129: step: 308/463, loss: 0.04373488575220108 2023-01-22 11:28:03.415677: step: 310/463, loss: 0.01789252460002899 2023-01-22 11:28:03.981287: step: 312/463, loss: 0.04127953201532364 2023-01-22 11:28:04.636151: step: 314/463, loss: 0.07758328318595886 2023-01-22 11:28:05.208398: step: 316/463, loss: 0.10204042494297028 2023-01-22 11:28:05.836540: step: 318/463, loss: 0.07615073025226593 2023-01-22 11:28:06.464192: step: 320/463, loss: 0.0411040261387825 2023-01-22 11:28:07.082496: step: 322/463, loss: 0.03426584601402283 2023-01-22 11:28:07.695779: step: 324/463, loss: 0.03321380540728569 2023-01-22 11:28:08.315620: step: 326/463, loss: 0.021382590755820274 2023-01-22 11:28:08.983765: step: 328/463, loss: 0.08947504311800003 2023-01-22 11:28:09.554785: step: 330/463, loss: 0.07809731364250183 2023-01-22 11:28:10.184063: step: 332/463, loss: 0.04807485267519951 2023-01-22 11:28:10.766417: step: 334/463, loss: 0.04300491884350777 2023-01-22 11:28:11.425099: step: 336/463, loss: 0.12432550638914108 2023-01-22 11:28:12.027313: step: 338/463, loss: 0.03360001742839813 2023-01-22 11:28:12.698253: step: 340/463, loss: 0.03814920410513878 2023-01-22 11:28:13.299232: step: 342/463, loss: 0.7751902937889099 2023-01-22 11:28:13.838535: step: 344/463, loss: 0.025956541299819946 2023-01-22 11:28:14.457490: step: 346/463, loss: 0.0020128029864281416 2023-01-22 11:28:15.037032: step: 348/463, loss: 0.09925977885723114 2023-01-22 11:28:15.632735: step: 350/463, loss: 0.07244475185871124 2023-01-22 11:28:16.267616: step: 352/463, loss: 3.778965950012207 2023-01-22 11:28:16.861374: step: 354/463, loss: 0.02549927495419979 2023-01-22 11:28:17.484814: step: 356/463, loss: 0.03316636011004448 2023-01-22 11:28:18.125294: step: 358/463, loss: 0.06820200383663177 2023-01-22 11:28:18.755952: step: 360/463, loss: 0.08836186677217484 2023-01-22 11:28:19.360904: step: 362/463, loss: 0.052920252084732056 2023-01-22 11:28:19.932735: step: 364/463, loss: 0.0044011990539729595 2023-01-22 11:28:20.528942: step: 366/463, loss: 0.09695777297019958 2023-01-22 11:28:21.120240: step: 368/463, loss: 0.02842007204890251 2023-01-22 11:28:21.754681: step: 370/463, loss: 0.029343625530600548 2023-01-22 11:28:22.414650: step: 372/463, loss: 0.4363693594932556 2023-01-22 11:28:23.048588: step: 374/463, loss: 0.02570498362183571 2023-01-22 11:28:23.663999: step: 376/463, loss: 0.11292698234319687 2023-01-22 11:28:24.304938: step: 378/463, loss: 0.13322654366493225 2023-01-22 11:28:24.892912: step: 380/463, loss: 0.06595969200134277 2023-01-22 11:28:25.524939: step: 382/463, loss: 0.018003471195697784 2023-01-22 11:28:26.180424: step: 384/463, loss: 0.08815129101276398 2023-01-22 11:28:26.826902: step: 386/463, loss: 1.4322130680084229 2023-01-22 11:28:27.463332: step: 388/463, loss: 0.11667540669441223 2023-01-22 11:28:28.071556: step: 390/463, loss: 0.10367243736982346 2023-01-22 11:28:28.743252: step: 392/463, loss: 0.3529355525970459 2023-01-22 11:28:29.377626: step: 394/463, loss: 0.053055375814437866 2023-01-22 11:28:30.043890: step: 396/463, loss: 0.04888347536325455 2023-01-22 11:28:30.663135: step: 398/463, loss: 0.06784988939762115 2023-01-22 11:28:31.255579: step: 400/463, loss: 0.031823400408029556 2023-01-22 11:28:31.863927: step: 402/463, loss: 0.12803302705287933 2023-01-22 11:28:32.474822: step: 404/463, loss: 0.31832724809646606 2023-01-22 11:28:33.125205: step: 406/463, loss: 0.019247863441705704 2023-01-22 11:28:33.703065: step: 408/463, loss: 0.6069300174713135 2023-01-22 11:28:34.403256: step: 410/463, loss: 0.05580489709973335 2023-01-22 11:28:35.056915: step: 412/463, loss: 0.017603939399123192 2023-01-22 11:28:35.595310: step: 414/463, loss: 0.03991398215293884 2023-01-22 11:28:36.254571: step: 416/463, loss: 0.5047932863235474 2023-01-22 11:28:36.870435: step: 418/463, loss: 0.01245469506829977 2023-01-22 11:28:37.522885: step: 420/463, loss: 0.514163613319397 2023-01-22 11:28:38.192964: step: 422/463, loss: 0.06172584369778633 2023-01-22 11:28:38.807202: step: 424/463, loss: 0.04152445122599602 2023-01-22 11:28:39.408288: step: 426/463, loss: 0.03773270919919014 2023-01-22 11:28:40.079112: step: 428/463, loss: 0.03397948667407036 2023-01-22 11:28:40.666082: step: 430/463, loss: 0.03543838858604431 2023-01-22 11:28:41.297671: step: 432/463, loss: 0.03854277729988098 2023-01-22 11:28:41.918623: step: 434/463, loss: 0.0606447234749794 2023-01-22 11:28:42.670153: step: 436/463, loss: 0.04678954929113388 2023-01-22 11:28:43.351252: step: 438/463, loss: 0.08492782711982727 2023-01-22 11:28:43.967844: step: 440/463, loss: 0.05202166736125946 2023-01-22 11:28:44.581238: step: 442/463, loss: 0.0697241723537445 2023-01-22 11:28:45.243489: step: 444/463, loss: 0.023754039779305458 2023-01-22 11:28:45.896497: step: 446/463, loss: 0.08370796591043472 2023-01-22 11:28:46.491604: step: 448/463, loss: 0.008632220327854156 2023-01-22 11:28:47.046168: step: 450/463, loss: 0.01273120567202568 2023-01-22 11:28:47.689565: step: 452/463, loss: 0.20612354576587677 2023-01-22 11:28:48.240474: step: 454/463, loss: 0.03448454290628433 2023-01-22 11:28:48.941303: step: 456/463, loss: 0.09639697521924973 2023-01-22 11:28:49.543846: step: 458/463, loss: 0.008333135396242142 2023-01-22 11:28:50.164111: step: 460/463, loss: 0.07454507052898407 2023-01-22 11:28:50.737910: step: 462/463, loss: 0.03628313168883324 2023-01-22 11:28:51.472344: step: 464/463, loss: 0.0862494558095932 2023-01-22 11:28:52.110933: step: 466/463, loss: 0.06211767718195915 2023-01-22 11:28:52.697920: step: 468/463, loss: 0.046333055943250656 2023-01-22 11:28:53.319421: step: 470/463, loss: 0.07183966785669327 2023-01-22 11:28:53.863789: step: 472/463, loss: 0.0159163698554039 2023-01-22 11:28:54.426401: step: 474/463, loss: 0.0667303204536438 2023-01-22 11:28:55.029307: step: 476/463, loss: 0.12091360986232758 2023-01-22 11:28:55.650549: step: 478/463, loss: 0.03381025046110153 2023-01-22 11:28:56.299646: step: 480/463, loss: 0.18052664399147034 2023-01-22 11:28:56.883560: step: 482/463, loss: 0.1478506624698639 2023-01-22 11:28:57.501478: step: 484/463, loss: 0.11141731590032578 2023-01-22 11:28:58.063381: step: 486/463, loss: 0.015993352979421616 2023-01-22 11:28:58.677232: step: 488/463, loss: 0.03572019934654236 2023-01-22 11:28:59.258315: step: 490/463, loss: 0.00907127559185028 2023-01-22 11:28:59.864835: step: 492/463, loss: 0.04498286172747612 2023-01-22 11:29:00.535617: step: 494/463, loss: 0.0726713165640831 2023-01-22 11:29:01.215784: step: 496/463, loss: 0.09351115673780441 2023-01-22 11:29:01.782764: step: 498/463, loss: 0.08858353644609451 2023-01-22 11:29:02.389114: step: 500/463, loss: 0.10741402208805084 2023-01-22 11:29:03.028969: step: 502/463, loss: 0.018738150596618652 2023-01-22 11:29:03.668029: step: 504/463, loss: 0.010571149177849293 2023-01-22 11:29:04.313900: step: 506/463, loss: 0.05955428630113602 2023-01-22 11:29:04.945874: step: 508/463, loss: 0.23404785990715027 2023-01-22 11:29:05.551379: step: 510/463, loss: 0.01985454186797142 2023-01-22 11:29:06.137636: step: 512/463, loss: 0.29550686478614807 2023-01-22 11:29:06.828941: step: 514/463, loss: 0.04698484018445015 2023-01-22 11:29:07.498405: step: 516/463, loss: 0.1044471338391304 2023-01-22 11:29:08.087285: step: 518/463, loss: 0.03806724771857262 2023-01-22 11:29:08.647064: step: 520/463, loss: 0.017785893753170967 2023-01-22 11:29:09.322860: step: 522/463, loss: 0.04650108516216278 2023-01-22 11:29:09.981176: step: 524/463, loss: 0.12200185656547546 2023-01-22 11:29:10.608041: step: 526/463, loss: 0.156696617603302 2023-01-22 11:29:11.246341: step: 528/463, loss: 0.055220555514097214 2023-01-22 11:29:11.872535: step: 530/463, loss: 0.0432281568646431 2023-01-22 11:29:12.509334: step: 532/463, loss: 0.09745674580335617 2023-01-22 11:29:13.142212: step: 534/463, loss: 0.016220947727560997 2023-01-22 11:29:13.790327: step: 536/463, loss: 0.027817638590931892 2023-01-22 11:29:14.371342: step: 538/463, loss: 0.016521908342838287 2023-01-22 11:29:14.950004: step: 540/463, loss: 0.05771870166063309 2023-01-22 11:29:15.688618: step: 542/463, loss: 0.517961859703064 2023-01-22 11:29:16.320342: step: 544/463, loss: 0.14686883985996246 2023-01-22 11:29:16.869487: step: 546/463, loss: 0.059000756591558456 2023-01-22 11:29:17.536689: step: 548/463, loss: 0.05594101548194885 2023-01-22 11:29:18.161819: step: 550/463, loss: 0.036989595741033554 2023-01-22 11:29:18.793768: step: 552/463, loss: 0.05881727114319801 2023-01-22 11:29:19.416433: step: 554/463, loss: 0.03861016780138016 2023-01-22 11:29:20.051112: step: 556/463, loss: 0.12573686242103577 2023-01-22 11:29:20.664808: step: 558/463, loss: 0.013770697638392448 2023-01-22 11:29:21.279050: step: 560/463, loss: 0.03228835016489029 2023-01-22 11:29:21.927660: step: 562/463, loss: 0.12452410906553268 2023-01-22 11:29:22.550544: step: 564/463, loss: 0.03419553115963936 2023-01-22 11:29:23.208987: step: 566/463, loss: 0.25904738903045654 2023-01-22 11:29:23.866160: step: 568/463, loss: 0.14948739111423492 2023-01-22 11:29:24.472021: step: 570/463, loss: 0.10230248421430588 2023-01-22 11:29:25.115440: step: 572/463, loss: 0.046159714460372925 2023-01-22 11:29:25.674570: step: 574/463, loss: 0.03526247292757034 2023-01-22 11:29:26.309213: step: 576/463, loss: 0.037601035088300705 2023-01-22 11:29:26.917806: step: 578/463, loss: 0.07432859390974045 2023-01-22 11:29:27.537477: step: 580/463, loss: 0.043209258466959 2023-01-22 11:29:28.178070: step: 582/463, loss: 0.2695164680480957 2023-01-22 11:29:28.810271: step: 584/463, loss: 0.09182669222354889 2023-01-22 11:29:29.381546: step: 586/463, loss: 0.05101155489683151 2023-01-22 11:29:30.013454: step: 588/463, loss: 0.06139244884252548 2023-01-22 11:29:30.610447: step: 590/463, loss: 0.121304452419281 2023-01-22 11:29:31.211638: step: 592/463, loss: 0.019609736278653145 2023-01-22 11:29:31.887369: step: 594/463, loss: 0.19669921696186066 2023-01-22 11:29:32.461468: step: 596/463, loss: 0.1184077039361 2023-01-22 11:29:32.983831: step: 598/463, loss: 0.02450251765549183 2023-01-22 11:29:33.585060: step: 600/463, loss: 0.08885645866394043 2023-01-22 11:29:34.269441: step: 602/463, loss: 0.03356620669364929 2023-01-22 11:29:34.907524: step: 604/463, loss: 0.03580283746123314 2023-01-22 11:29:35.555738: step: 606/463, loss: 0.08973096311092377 2023-01-22 11:29:36.172122: step: 608/463, loss: 0.1778457909822464 2023-01-22 11:29:36.751171: step: 610/463, loss: 0.004249132238328457 2023-01-22 11:29:37.408271: step: 612/463, loss: 0.04661310464143753 2023-01-22 11:29:38.081898: step: 614/463, loss: 0.0789538323879242 2023-01-22 11:29:38.699916: step: 616/463, loss: 0.021688908338546753 2023-01-22 11:29:39.359781: step: 618/463, loss: 0.06403544545173645 2023-01-22 11:29:40.038462: step: 620/463, loss: 0.023572752252221107 2023-01-22 11:29:40.632838: step: 622/463, loss: 0.028784917667508125 2023-01-22 11:29:41.247771: step: 624/463, loss: 0.06966814398765564 2023-01-22 11:29:41.889877: step: 626/463, loss: 0.04270821064710617 2023-01-22 11:29:42.524955: step: 628/463, loss: 0.030175073072314262 2023-01-22 11:29:43.105643: step: 630/463, loss: 0.06828122586011887 2023-01-22 11:29:43.691720: step: 632/463, loss: 0.11224208027124405 2023-01-22 11:29:44.313003: step: 634/463, loss: 0.00522087886929512 2023-01-22 11:29:44.965547: step: 636/463, loss: 0.7217215895652771 2023-01-22 11:29:45.655073: step: 638/463, loss: 0.04315922036767006 2023-01-22 11:29:46.329358: step: 640/463, loss: 0.05577242374420166 2023-01-22 11:29:46.908830: step: 642/463, loss: 0.11067011207342148 2023-01-22 11:29:47.538353: step: 644/463, loss: 0.0692674070596695 2023-01-22 11:29:48.191824: step: 646/463, loss: 0.08684011548757553 2023-01-22 11:29:48.850043: step: 648/463, loss: 0.15701603889465332 2023-01-22 11:29:49.454618: step: 650/463, loss: 0.07535672932863235 2023-01-22 11:29:50.101640: step: 652/463, loss: 0.08991323411464691 2023-01-22 11:29:50.792655: step: 654/463, loss: 0.10656873136758804 2023-01-22 11:29:51.419557: step: 656/463, loss: 0.2171446979045868 2023-01-22 11:29:52.050150: step: 658/463, loss: 0.10648135840892792 2023-01-22 11:29:52.662753: step: 660/463, loss: 0.14075885713100433 2023-01-22 11:29:53.270775: step: 662/463, loss: 0.018190212547779083 2023-01-22 11:29:53.904499: step: 664/463, loss: 0.22377075254917145 2023-01-22 11:29:54.485020: step: 666/463, loss: 0.0262388177216053 2023-01-22 11:29:55.046652: step: 668/463, loss: 7.585052013397217 2023-01-22 11:29:55.661825: step: 670/463, loss: 0.11799109727144241 2023-01-22 11:29:56.273729: step: 672/463, loss: 0.05876987427473068 2023-01-22 11:29:56.888593: step: 674/463, loss: 0.05100049823522568 2023-01-22 11:29:57.504394: step: 676/463, loss: 0.04835920035839081 2023-01-22 11:29:58.171752: step: 678/463, loss: 0.1110786646604538 2023-01-22 11:29:58.778295: step: 680/463, loss: 0.05327126011252403 2023-01-22 11:29:59.412444: step: 682/463, loss: 0.02951670251786709 2023-01-22 11:30:00.015698: step: 684/463, loss: 0.06278248131275177 2023-01-22 11:30:00.649115: step: 686/463, loss: 0.1263350546360016 2023-01-22 11:30:01.293418: step: 688/463, loss: 0.08174838870763779 2023-01-22 11:30:01.890439: step: 690/463, loss: 0.0589042492210865 2023-01-22 11:30:02.548365: step: 692/463, loss: 0.2909718155860901 2023-01-22 11:30:03.192251: step: 694/463, loss: 0.13094452023506165 2023-01-22 11:30:03.784677: step: 696/463, loss: 0.046198680996894836 2023-01-22 11:30:04.412232: step: 698/463, loss: 0.039940908551216125 2023-01-22 11:30:05.012729: step: 700/463, loss: 0.06243208795785904 2023-01-22 11:30:05.586798: step: 702/463, loss: 0.04296639561653137 2023-01-22 11:30:06.187361: step: 704/463, loss: 0.011986475437879562 2023-01-22 11:30:06.784716: step: 706/463, loss: 0.05324605852365494 2023-01-22 11:30:07.399527: step: 708/463, loss: 0.0964583232998848 2023-01-22 11:30:08.030442: step: 710/463, loss: 0.050626084208488464 2023-01-22 11:30:08.654569: step: 712/463, loss: 0.0022668896708637476 2023-01-22 11:30:09.329931: step: 714/463, loss: 0.10261612385511398 2023-01-22 11:30:09.956369: step: 716/463, loss: 0.006185358390212059 2023-01-22 11:30:10.596797: step: 718/463, loss: 0.14385487139225006 2023-01-22 11:30:11.194208: step: 720/463, loss: 0.0318455770611763 2023-01-22 11:30:11.774733: step: 722/463, loss: 0.07089218497276306 2023-01-22 11:30:12.378766: step: 724/463, loss: 0.16315022110939026 2023-01-22 11:30:12.975969: step: 726/463, loss: 0.3529326915740967 2023-01-22 11:30:13.579299: step: 728/463, loss: 0.43606990575790405 2023-01-22 11:30:14.147243: step: 730/463, loss: 0.11184833198785782 2023-01-22 11:30:14.702687: step: 732/463, loss: 0.16461403667926788 2023-01-22 11:30:15.296945: step: 734/463, loss: 0.08659656345844269 2023-01-22 11:30:15.998115: step: 736/463, loss: 0.12101877480745316 2023-01-22 11:30:16.666560: step: 738/463, loss: 0.33059343695640564 2023-01-22 11:30:17.268003: step: 740/463, loss: 0.027007615193724632 2023-01-22 11:30:17.908807: step: 742/463, loss: 0.02912762761116028 2023-01-22 11:30:18.600982: step: 744/463, loss: 0.05541957542300224 2023-01-22 11:30:19.175448: step: 746/463, loss: 0.04138094559311867 2023-01-22 11:30:19.818381: step: 748/463, loss: 0.0278069656342268 2023-01-22 11:30:20.417616: step: 750/463, loss: 0.04563678056001663 2023-01-22 11:30:21.008037: step: 752/463, loss: 0.12673847377300262 2023-01-22 11:30:21.645977: step: 754/463, loss: 0.040106259286403656 2023-01-22 11:30:22.234511: step: 756/463, loss: 0.0527421310544014 2023-01-22 11:30:22.839063: step: 758/463, loss: 0.005679921247065067 2023-01-22 11:30:23.445452: step: 760/463, loss: 0.045885588973760605 2023-01-22 11:30:24.050028: step: 762/463, loss: 0.02683146297931671 2023-01-22 11:30:24.681282: step: 764/463, loss: 0.4279610812664032 2023-01-22 11:30:25.326088: step: 766/463, loss: 0.046658970415592194 2023-01-22 11:30:26.018940: step: 768/463, loss: 0.03990132734179497 2023-01-22 11:30:26.624161: step: 770/463, loss: 0.06619829684495926 2023-01-22 11:30:27.210113: step: 772/463, loss: 0.048530030995607376 2023-01-22 11:30:27.787050: step: 774/463, loss: 0.022241076454520226 2023-01-22 11:30:28.396595: step: 776/463, loss: 0.08825267851352692 2023-01-22 11:30:28.983451: step: 778/463, loss: 0.03845822066068649 2023-01-22 11:30:29.631109: step: 780/463, loss: 0.11483372002840042 2023-01-22 11:30:30.215998: step: 782/463, loss: 0.12185422331094742 2023-01-22 11:30:30.793145: step: 784/463, loss: 0.040376223623752594 2023-01-22 11:30:31.427054: step: 786/463, loss: 0.026955559849739075 2023-01-22 11:30:32.043531: step: 788/463, loss: 0.019123699516057968 2023-01-22 11:30:32.661824: step: 790/463, loss: 0.1206919401884079 2023-01-22 11:30:33.315144: step: 792/463, loss: 0.13978567719459534 2023-01-22 11:30:33.899261: step: 794/463, loss: 0.06431294977664948 2023-01-22 11:30:34.450166: step: 796/463, loss: 0.12576158344745636 2023-01-22 11:30:35.071379: step: 798/463, loss: 0.029137829318642616 2023-01-22 11:30:35.787280: step: 800/463, loss: 0.0604996532201767 2023-01-22 11:30:36.434071: step: 802/463, loss: 0.042811159044504166 2023-01-22 11:30:37.051302: step: 804/463, loss: 0.29408660531044006 2023-01-22 11:30:37.753651: step: 806/463, loss: 0.025647656992077827 2023-01-22 11:30:38.441639: step: 808/463, loss: 0.05955564230680466 2023-01-22 11:30:39.059418: step: 810/463, loss: 0.07902026921510696 2023-01-22 11:30:39.612316: step: 812/463, loss: 0.06509557366371155 2023-01-22 11:30:40.189624: step: 814/463, loss: 0.03752699866890907 2023-01-22 11:30:40.806123: step: 816/463, loss: 0.03674310818314552 2023-01-22 11:30:41.489794: step: 818/463, loss: 0.02645660936832428 2023-01-22 11:30:42.083384: step: 820/463, loss: 0.04556189477443695 2023-01-22 11:30:42.673629: step: 822/463, loss: 0.013146854937076569 2023-01-22 11:30:43.238896: step: 824/463, loss: 0.04506433382630348 2023-01-22 11:30:43.834284: step: 826/463, loss: 0.03950195759534836 2023-01-22 11:30:44.474326: step: 828/463, loss: 0.10667979717254639 2023-01-22 11:30:45.053334: step: 830/463, loss: 0.02954714745283127 2023-01-22 11:30:45.619860: step: 832/463, loss: 0.05457380786538124 2023-01-22 11:30:46.245669: step: 834/463, loss: 0.026683717966079712 2023-01-22 11:30:46.788430: step: 836/463, loss: 0.08873116970062256 2023-01-22 11:30:47.439815: step: 838/463, loss: 0.08901484310626984 2023-01-22 11:30:48.059115: step: 840/463, loss: 0.06199616566300392 2023-01-22 11:30:48.648731: step: 842/463, loss: 0.03429458290338516 2023-01-22 11:30:49.317918: step: 844/463, loss: 0.09406204521656036 2023-01-22 11:30:49.990594: step: 846/463, loss: 0.04288970306515694 2023-01-22 11:30:50.622523: step: 848/463, loss: 0.02828223817050457 2023-01-22 11:30:51.247540: step: 850/463, loss: 0.3204517066478729 2023-01-22 11:30:51.911881: step: 852/463, loss: 0.021954761818051338 2023-01-22 11:30:52.472821: step: 854/463, loss: 0.19784387946128845 2023-01-22 11:30:53.070139: step: 856/463, loss: 0.03635372593998909 2023-01-22 11:30:53.755775: step: 858/463, loss: 0.033746834844350815 2023-01-22 11:30:54.402890: step: 860/463, loss: 0.022397411987185478 2023-01-22 11:30:54.990021: step: 862/463, loss: 0.14670518040657043 2023-01-22 11:30:55.587397: step: 864/463, loss: 0.0737113505601883 2023-01-22 11:30:56.238542: step: 866/463, loss: 0.059790484607219696 2023-01-22 11:30:56.866003: step: 868/463, loss: 0.013940228149294853 2023-01-22 11:30:57.480527: step: 870/463, loss: 0.042724523693323135 2023-01-22 11:30:58.113375: step: 872/463, loss: 0.08587834984064102 2023-01-22 11:30:58.677959: step: 874/463, loss: 0.009747837670147419 2023-01-22 11:30:59.327916: step: 876/463, loss: 0.07691461592912674 2023-01-22 11:30:59.923295: step: 878/463, loss: 0.008721565827727318 2023-01-22 11:31:00.590801: step: 880/463, loss: 0.08181518316268921 2023-01-22 11:31:01.210716: step: 882/463, loss: 0.0121048828586936 2023-01-22 11:31:01.952861: step: 884/463, loss: 0.042846113443374634 2023-01-22 11:31:02.579671: step: 886/463, loss: 0.09717530012130737 2023-01-22 11:31:03.192689: step: 888/463, loss: 0.03697590529918671 2023-01-22 11:31:03.863909: step: 890/463, loss: 0.05085385590791702 2023-01-22 11:31:04.472569: step: 892/463, loss: 0.03842558339238167 2023-01-22 11:31:05.052691: step: 894/463, loss: 0.05055253580212593 2023-01-22 11:31:05.638382: step: 896/463, loss: 0.03438316285610199 2023-01-22 11:31:06.265858: step: 898/463, loss: 0.059900399297475815 2023-01-22 11:31:06.794331: step: 900/463, loss: 0.09600280225276947 2023-01-22 11:31:07.464837: step: 902/463, loss: 0.06828293204307556 2023-01-22 11:31:08.172217: step: 904/463, loss: 0.02007468417286873 2023-01-22 11:31:08.834953: step: 906/463, loss: 0.07203416526317596 2023-01-22 11:31:09.461630: step: 908/463, loss: 0.05091244354844093 2023-01-22 11:31:10.116517: step: 910/463, loss: 0.010370898991823196 2023-01-22 11:31:10.706836: step: 912/463, loss: 0.679309606552124 2023-01-22 11:31:11.367624: step: 914/463, loss: 0.14878763258457184 2023-01-22 11:31:11.993644: step: 916/463, loss: 0.02331198751926422 2023-01-22 11:31:12.620113: step: 918/463, loss: 0.1366574913263321 2023-01-22 11:31:13.218984: step: 920/463, loss: 0.02488672360777855 2023-01-22 11:31:13.816255: step: 922/463, loss: 0.15476688742637634 2023-01-22 11:31:14.507154: step: 924/463, loss: 0.003639964619651437 2023-01-22 11:31:15.118971: step: 926/463, loss: 0.07893939316272736 ================================================== Loss: 0.119 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33879190962099126, 'r': 0.31500576036866357, 'f1': 0.3264661469307487}, 'combined': 0.2405540030016043, 'epoch': 19} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3563446702139818, 'r': 0.3789437347771847, 'f1': 0.3672969117483981}, 'combined': 0.28469904164229903, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31116149339316884, 'r': 0.338321699647601, 'f1': 0.3241737012987013}, 'combined': 0.23886483253588517, 'epoch': 19} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34198250079879633, 'r': 0.3881878570648102, 'f1': 0.36362323589023976}, 'combined': 0.281851503417315, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30681078809122453, 'r': 0.32718721614282387, 'f1': 0.31667155722179646}, 'combined': 0.23333693690027107, 'epoch': 19} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3348861280923331, 'r': 0.3696674998519228, 'f1': 0.3514182960584465}, 'combined': 0.27239121512664277, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2926829268292683, 'r': 0.34285714285714286, 'f1': 0.31578947368421056}, 'combined': 0.2105263157894737, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3203125, 'r': 0.44565217391304346, 'f1': 0.3727272727272727}, 'combined': 0.18636363636363634, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5961538461538461, 'r': 0.2672413793103448, 'f1': 0.36904761904761896}, 'combined': 0.24603174603174596, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:33:56.085996: step: 2/463, loss: 0.04250545799732208 2023-01-22 11:33:56.715802: step: 4/463, loss: 0.08801089972257614 2023-01-22 11:33:57.316249: step: 6/463, loss: 0.10532476752996445 2023-01-22 11:33:57.905496: step: 8/463, loss: 0.0667368546128273 2023-01-22 11:33:58.502491: step: 10/463, loss: 0.04844643548130989 2023-01-22 11:33:59.067571: step: 12/463, loss: 2.2428369522094727 2023-01-22 11:33:59.685727: step: 14/463, loss: 0.38902151584625244 2023-01-22 11:34:00.277653: step: 16/463, loss: 0.03707941621541977 2023-01-22 11:34:00.893468: step: 18/463, loss: 0.08036953955888748 2023-01-22 11:34:01.474586: step: 20/463, loss: 0.02767416276037693 2023-01-22 11:34:02.046069: step: 22/463, loss: 0.09718021005392075 2023-01-22 11:34:02.647151: step: 24/463, loss: 0.04521965980529785 2023-01-22 11:34:03.282851: step: 26/463, loss: 0.09202714264392853 2023-01-22 11:34:03.862570: step: 28/463, loss: 0.055955443531274796 2023-01-22 11:34:04.470678: step: 30/463, loss: 0.03415090963244438 2023-01-22 11:34:05.119747: step: 32/463, loss: 0.6268648505210876 2023-01-22 11:34:05.809409: step: 34/463, loss: 0.059366077184677124 2023-01-22 11:34:06.383671: step: 36/463, loss: 0.0190225038677454 2023-01-22 11:34:07.010365: step: 38/463, loss: 0.046397119760513306 2023-01-22 11:34:07.595604: step: 40/463, loss: 0.08980638533830643 2023-01-22 11:34:08.175223: step: 42/463, loss: 0.7615835666656494 2023-01-22 11:34:08.785848: step: 44/463, loss: 0.04266585409641266 2023-01-22 11:34:09.419014: step: 46/463, loss: 0.09545864164829254 2023-01-22 11:34:10.022548: step: 48/463, loss: 0.040855683386325836 2023-01-22 11:34:10.626936: step: 50/463, loss: 0.04735015705227852 2023-01-22 11:34:11.189605: step: 52/463, loss: 0.11715692281723022 2023-01-22 11:34:11.822568: step: 54/463, loss: 0.01710144430398941 2023-01-22 11:34:12.530344: step: 56/463, loss: 0.045234616845846176 2023-01-22 11:34:13.075349: step: 58/463, loss: 0.04355061054229736 2023-01-22 11:34:13.625282: step: 60/463, loss: 0.029900608584284782 2023-01-22 11:34:14.250488: step: 62/463, loss: 0.07774461805820465 2023-01-22 11:34:14.809989: step: 64/463, loss: 0.09202646464109421 2023-01-22 11:34:15.461643: step: 66/463, loss: 0.05168768763542175 2023-01-22 11:34:16.080451: step: 68/463, loss: 0.5962936878204346 2023-01-22 11:34:16.732190: step: 70/463, loss: 0.06609019637107849 2023-01-22 11:34:17.375571: step: 72/463, loss: 0.059408511966466904 2023-01-22 11:34:18.046434: step: 74/463, loss: 0.049335163086652756 2023-01-22 11:34:18.701878: step: 76/463, loss: 0.010667647235095501 2023-01-22 11:34:19.307784: step: 78/463, loss: 0.15229101479053497 2023-01-22 11:34:20.034034: step: 80/463, loss: 0.035849448293447495 2023-01-22 11:34:20.634617: step: 82/463, loss: 0.025260496884584427 2023-01-22 11:34:21.240760: step: 84/463, loss: 0.058970965445041656 2023-01-22 11:34:21.827475: step: 86/463, loss: 0.029864775016903877 2023-01-22 11:34:22.430094: step: 88/463, loss: 0.0796518623828888 2023-01-22 11:34:22.964687: step: 90/463, loss: 0.14115923643112183 2023-01-22 11:34:23.581729: step: 92/463, loss: 0.05154859274625778 2023-01-22 11:34:24.225621: step: 94/463, loss: 0.3532062768936157 2023-01-22 11:34:24.885222: step: 96/463, loss: 0.04024605453014374 2023-01-22 11:34:25.481348: step: 98/463, loss: 0.04854840412735939 2023-01-22 11:34:26.122138: step: 100/463, loss: 0.9367901682853699 2023-01-22 11:34:26.688299: step: 102/463, loss: 0.01247659046202898 2023-01-22 11:34:27.307800: step: 104/463, loss: 0.9505859017372131 2023-01-22 11:34:27.933305: step: 106/463, loss: 0.05853164568543434 2023-01-22 11:34:28.571861: step: 108/463, loss: 0.012301057577133179 2023-01-22 11:34:29.205920: step: 110/463, loss: 0.07176526635885239 2023-01-22 11:34:29.842931: step: 112/463, loss: 0.36822131276130676 2023-01-22 11:34:30.457138: step: 114/463, loss: 3.4288408756256104 2023-01-22 11:34:31.048210: step: 116/463, loss: 0.07779096066951752 2023-01-22 11:34:31.725370: step: 118/463, loss: 0.06815287470817566 2023-01-22 11:34:32.373843: step: 120/463, loss: 0.10610940307378769 2023-01-22 11:34:33.022425: step: 122/463, loss: 0.09823642671108246 2023-01-22 11:34:33.666671: step: 124/463, loss: 0.025018181651830673 2023-01-22 11:34:34.176971: step: 126/463, loss: 0.034618958830833435 2023-01-22 11:34:34.853409: step: 128/463, loss: 0.0320744588971138 2023-01-22 11:34:35.502120: step: 130/463, loss: 0.019785603508353233 2023-01-22 11:34:36.142627: step: 132/463, loss: 0.06175101920962334 2023-01-22 11:34:36.731440: step: 134/463, loss: 0.04201485216617584 2023-01-22 11:34:37.390811: step: 136/463, loss: 0.07357841730117798 2023-01-22 11:34:37.981672: step: 138/463, loss: 0.0372995026409626 2023-01-22 11:34:38.587216: step: 140/463, loss: 0.07504367083311081 2023-01-22 11:34:39.161563: step: 142/463, loss: 0.06548241525888443 2023-01-22 11:34:39.726741: step: 144/463, loss: 0.038432713598012924 2023-01-22 11:34:40.325656: step: 146/463, loss: 0.020114658400416374 2023-01-22 11:34:40.953719: step: 148/463, loss: 0.025309262797236443 2023-01-22 11:34:41.547101: step: 150/463, loss: 0.2232677936553955 2023-01-22 11:34:42.129690: step: 152/463, loss: 0.05970882624387741 2023-01-22 11:34:42.765064: step: 154/463, loss: 0.0957968458533287 2023-01-22 11:34:43.367053: step: 156/463, loss: 0.019476965069770813 2023-01-22 11:34:43.955752: step: 158/463, loss: 0.006689433474093676 2023-01-22 11:34:44.626558: step: 160/463, loss: 0.02838699333369732 2023-01-22 11:34:45.271614: step: 162/463, loss: 0.06594093143939972 2023-01-22 11:34:45.897943: step: 164/463, loss: 0.03180518001317978 2023-01-22 11:34:46.510594: step: 166/463, loss: 0.02431909553706646 2023-01-22 11:34:47.147074: step: 168/463, loss: 0.0583120621740818 2023-01-22 11:34:47.746180: step: 170/463, loss: 0.06335896253585815 2023-01-22 11:34:48.357486: step: 172/463, loss: 0.08673340082168579 2023-01-22 11:34:48.985170: step: 174/463, loss: 0.07836554944515228 2023-01-22 11:34:49.619316: step: 176/463, loss: 0.0231226347386837 2023-01-22 11:34:50.243929: step: 178/463, loss: 0.027215857058763504 2023-01-22 11:34:50.818311: step: 180/463, loss: 0.010634574107825756 2023-01-22 11:34:51.436048: step: 182/463, loss: 0.05315183475613594 2023-01-22 11:34:52.024087: step: 184/463, loss: 0.062360480427742004 2023-01-22 11:34:52.671838: step: 186/463, loss: 0.03155219554901123 2023-01-22 11:34:53.227834: step: 188/463, loss: 0.06969751417636871 2023-01-22 11:34:53.800914: step: 190/463, loss: 0.012527299113571644 2023-01-22 11:34:54.379891: step: 192/463, loss: 0.06745979934930801 2023-01-22 11:34:55.002505: step: 194/463, loss: 0.03917837515473366 2023-01-22 11:34:55.636939: step: 196/463, loss: 0.03979508578777313 2023-01-22 11:34:56.267783: step: 198/463, loss: 0.033088989555835724 2023-01-22 11:34:56.865257: step: 200/463, loss: 0.013852034695446491 2023-01-22 11:34:57.589525: step: 202/463, loss: 0.004486322868615389 2023-01-22 11:34:58.162241: step: 204/463, loss: 0.04540802910923958 2023-01-22 11:34:58.755684: step: 206/463, loss: 0.027821453288197517 2023-01-22 11:34:59.423634: step: 208/463, loss: 0.19811707735061646 2023-01-22 11:35:00.045241: step: 210/463, loss: 0.032117076218128204 2023-01-22 11:35:00.674885: step: 212/463, loss: 0.07389821112155914 2023-01-22 11:35:01.230060: step: 214/463, loss: 0.020998641848564148 2023-01-22 11:35:01.912052: step: 216/463, loss: 0.09907980263233185 2023-01-22 11:35:02.559621: step: 218/463, loss: 0.02236035466194153 2023-01-22 11:35:03.154314: step: 220/463, loss: 0.031027527526021004 2023-01-22 11:35:03.764865: step: 222/463, loss: 0.010798958130180836 2023-01-22 11:35:04.420911: step: 224/463, loss: 0.08692353218793869 2023-01-22 11:35:05.000494: step: 226/463, loss: 0.04171307384967804 2023-01-22 11:35:05.624627: step: 228/463, loss: 0.017737584188580513 2023-01-22 11:35:06.220790: step: 230/463, loss: 0.08151774853467941 2023-01-22 11:35:06.794145: step: 232/463, loss: 0.05702713504433632 2023-01-22 11:35:07.423481: step: 234/463, loss: 0.03371307998895645 2023-01-22 11:35:08.053564: step: 236/463, loss: 0.03732139244675636 2023-01-22 11:35:08.666651: step: 238/463, loss: 0.05353681743144989 2023-01-22 11:35:09.234899: step: 240/463, loss: 0.11210224777460098 2023-01-22 11:35:09.839182: step: 242/463, loss: 0.13348416984081268 2023-01-22 11:35:10.517578: step: 244/463, loss: 0.6534484028816223 2023-01-22 11:35:11.145945: step: 246/463, loss: 0.02931036613881588 2023-01-22 11:35:11.795932: step: 248/463, loss: 0.17572371661663055 2023-01-22 11:35:12.423462: step: 250/463, loss: 0.0404517836868763 2023-01-22 11:35:13.049653: step: 252/463, loss: 0.12899602949619293 2023-01-22 11:35:13.694763: step: 254/463, loss: 0.04055695980787277 2023-01-22 11:35:14.363558: step: 256/463, loss: 0.8264485597610474 2023-01-22 11:35:15.041993: step: 258/463, loss: 0.0633826032280922 2023-01-22 11:35:15.686625: step: 260/463, loss: 0.02122495137155056 2023-01-22 11:35:16.293903: step: 262/463, loss: 0.025803962722420692 2023-01-22 11:35:16.924601: step: 264/463, loss: 0.022498609498143196 2023-01-22 11:35:17.429485: step: 266/463, loss: 0.04139091446995735 2023-01-22 11:35:18.079075: step: 268/463, loss: 0.04816873371601105 2023-01-22 11:35:18.769480: step: 270/463, loss: 0.3591598570346832 2023-01-22 11:35:19.374458: step: 272/463, loss: 0.006877565290778875 2023-01-22 11:35:20.019112: step: 274/463, loss: 0.19716450572013855 2023-01-22 11:35:20.623899: step: 276/463, loss: 0.44062668085098267 2023-01-22 11:35:21.257792: step: 278/463, loss: 0.07158097624778748 2023-01-22 11:35:21.845062: step: 280/463, loss: 0.004711037967354059 2023-01-22 11:35:22.502580: step: 282/463, loss: 0.0697772428393364 2023-01-22 11:35:23.073497: step: 284/463, loss: 0.2478795349597931 2023-01-22 11:35:23.670594: step: 286/463, loss: 0.09423603117465973 2023-01-22 11:35:24.298961: step: 288/463, loss: 0.06738671660423279 2023-01-22 11:35:25.057962: step: 290/463, loss: 0.02402724325656891 2023-01-22 11:35:25.746812: step: 292/463, loss: 0.4583995044231415 2023-01-22 11:35:26.343032: step: 294/463, loss: 0.05294394493103027 2023-01-22 11:35:26.952973: step: 296/463, loss: 0.08964475244283676 2023-01-22 11:35:27.618867: step: 298/463, loss: 0.010876458138227463 2023-01-22 11:35:28.291894: step: 300/463, loss: 0.020989466458559036 2023-01-22 11:35:28.891036: step: 302/463, loss: 0.021016955375671387 2023-01-22 11:35:29.620775: step: 304/463, loss: 0.24152059853076935 2023-01-22 11:35:30.278333: step: 306/463, loss: 0.006914257071912289 2023-01-22 11:35:30.965741: step: 308/463, loss: 0.47118011116981506 2023-01-22 11:35:31.548761: step: 310/463, loss: 0.0913260355591774 2023-01-22 11:35:32.273694: step: 312/463, loss: 0.023262247443199158 2023-01-22 11:35:32.939795: step: 314/463, loss: 0.13974708318710327 2023-01-22 11:35:33.558990: step: 316/463, loss: 0.014410230331122875 2023-01-22 11:35:34.230339: step: 318/463, loss: 0.029110895469784737 2023-01-22 11:35:34.834660: step: 320/463, loss: 0.04063471406698227 2023-01-22 11:35:35.556859: step: 322/463, loss: 0.0740271732211113 2023-01-22 11:35:36.250913: step: 324/463, loss: 0.026210526004433632 2023-01-22 11:35:36.811613: step: 326/463, loss: 0.052800897508859634 2023-01-22 11:35:37.455633: step: 328/463, loss: 0.13307073712348938 2023-01-22 11:35:38.146829: step: 330/463, loss: 0.03350155055522919 2023-01-22 11:35:38.800105: step: 332/463, loss: 0.09340842068195343 2023-01-22 11:35:39.496163: step: 334/463, loss: 0.09332094341516495 2023-01-22 11:35:40.126866: step: 336/463, loss: 0.02414083294570446 2023-01-22 11:35:40.702696: step: 338/463, loss: 0.0835174098610878 2023-01-22 11:35:41.363993: step: 340/463, loss: 0.04582780972123146 2023-01-22 11:35:42.024453: step: 342/463, loss: 0.3220927119255066 2023-01-22 11:35:42.603941: step: 344/463, loss: 0.035379808396101 2023-01-22 11:35:43.270133: step: 346/463, loss: 0.09873564541339874 2023-01-22 11:35:43.859129: step: 348/463, loss: 0.02120424248278141 2023-01-22 11:35:44.506406: step: 350/463, loss: 0.0415227934718132 2023-01-22 11:35:45.121604: step: 352/463, loss: 0.11348246037960052 2023-01-22 11:35:45.707828: step: 354/463, loss: 0.035063933581113815 2023-01-22 11:35:46.286782: step: 356/463, loss: 0.06592081487178802 2023-01-22 11:35:46.887437: step: 358/463, loss: 0.034271836280822754 2023-01-22 11:35:47.479568: step: 360/463, loss: 0.03443994000554085 2023-01-22 11:35:48.127934: step: 362/463, loss: 0.0638202503323555 2023-01-22 11:35:48.739485: step: 364/463, loss: 0.027193138375878334 2023-01-22 11:35:49.322122: step: 366/463, loss: 0.04081645607948303 2023-01-22 11:35:49.968348: step: 368/463, loss: 0.032549817115068436 2023-01-22 11:35:50.657357: step: 370/463, loss: 0.025189336389303207 2023-01-22 11:35:51.386303: step: 372/463, loss: 0.16192680597305298 2023-01-22 11:35:52.045821: step: 374/463, loss: 0.05168411508202553 2023-01-22 11:35:52.680401: step: 376/463, loss: 0.04189447686076164 2023-01-22 11:35:53.421357: step: 378/463, loss: 0.01261840295046568 2023-01-22 11:35:54.073106: step: 380/463, loss: 0.017110368236899376 2023-01-22 11:35:54.668586: step: 382/463, loss: 0.046921804547309875 2023-01-22 11:35:55.231292: step: 384/463, loss: 0.43964457511901855 2023-01-22 11:35:55.845778: step: 386/463, loss: 0.03576177358627319 2023-01-22 11:35:56.436923: step: 388/463, loss: 0.579998791217804 2023-01-22 11:35:57.062233: step: 390/463, loss: 0.13867127895355225 2023-01-22 11:35:57.689778: step: 392/463, loss: 0.0749794989824295 2023-01-22 11:35:58.373363: step: 394/463, loss: 0.09178011864423752 2023-01-22 11:35:58.959589: step: 396/463, loss: 0.25698941946029663 2023-01-22 11:35:59.596990: step: 398/463, loss: 0.09495487064123154 2023-01-22 11:36:00.278482: step: 400/463, loss: 0.01723267324268818 2023-01-22 11:36:00.865042: step: 402/463, loss: 0.024175819009542465 2023-01-22 11:36:01.470827: step: 404/463, loss: 0.03752123937010765 2023-01-22 11:36:02.128447: step: 406/463, loss: 0.017812224105000496 2023-01-22 11:36:02.776223: step: 408/463, loss: 0.0754508301615715 2023-01-22 11:36:03.415362: step: 410/463, loss: 0.025097239762544632 2023-01-22 11:36:03.996403: step: 412/463, loss: 0.05229254439473152 2023-01-22 11:36:04.699705: step: 414/463, loss: 0.031025778502225876 2023-01-22 11:36:05.282986: step: 416/463, loss: 0.5713721513748169 2023-01-22 11:36:05.867966: step: 418/463, loss: 0.3474188446998596 2023-01-22 11:36:06.498364: step: 420/463, loss: 0.076308973133564 2023-01-22 11:36:07.173360: step: 422/463, loss: 0.2900967597961426 2023-01-22 11:36:07.794072: step: 424/463, loss: 0.01907758042216301 2023-01-22 11:36:08.467685: step: 426/463, loss: 0.019886748865246773 2023-01-22 11:36:09.071813: step: 428/463, loss: 0.07937715202569962 2023-01-22 11:36:09.657660: step: 430/463, loss: 0.040851034224033356 2023-01-22 11:36:10.248136: step: 432/463, loss: 0.020795831456780434 2023-01-22 11:36:10.840355: step: 434/463, loss: 0.21069321036338806 2023-01-22 11:36:11.486833: step: 436/463, loss: 0.1839502602815628 2023-01-22 11:36:12.103621: step: 438/463, loss: 0.04914311692118645 2023-01-22 11:36:12.694294: step: 440/463, loss: 0.006894220598042011 2023-01-22 11:36:13.325429: step: 442/463, loss: 0.7006751298904419 2023-01-22 11:36:13.937833: step: 444/463, loss: 0.018299002200365067 2023-01-22 11:36:14.584375: step: 446/463, loss: 0.010620677843689919 2023-01-22 11:36:15.146516: step: 448/463, loss: 0.018639035522937775 2023-01-22 11:36:15.758215: step: 450/463, loss: 0.002787784906104207 2023-01-22 11:36:16.392521: step: 452/463, loss: 0.030481547117233276 2023-01-22 11:36:16.952114: step: 454/463, loss: 0.15855535864830017 2023-01-22 11:36:17.550648: step: 456/463, loss: 0.10882546007633209 2023-01-22 11:36:18.146934: step: 458/463, loss: 0.13556592166423798 2023-01-22 11:36:18.783665: step: 460/463, loss: 0.013005250133574009 2023-01-22 11:36:19.380674: step: 462/463, loss: 0.1138322651386261 2023-01-22 11:36:19.997016: step: 464/463, loss: 0.06282660365104675 2023-01-22 11:36:20.622183: step: 466/463, loss: 0.03192172199487686 2023-01-22 11:36:21.226637: step: 468/463, loss: 0.015465976670384407 2023-01-22 11:36:21.819077: step: 470/463, loss: 0.04728606715798378 2023-01-22 11:36:22.474066: step: 472/463, loss: 0.023267779499292374 2023-01-22 11:36:23.063285: step: 474/463, loss: 0.09839379042387009 2023-01-22 11:36:23.667028: step: 476/463, loss: 0.03243930637836456 2023-01-22 11:36:24.299657: step: 478/463, loss: 0.24113982915878296 2023-01-22 11:36:24.908660: step: 480/463, loss: 0.02225543186068535 2023-01-22 11:36:25.451174: step: 482/463, loss: 0.022931937128305435 2023-01-22 11:36:26.013739: step: 484/463, loss: 0.09219290316104889 2023-01-22 11:36:26.652221: step: 486/463, loss: 0.04385121166706085 2023-01-22 11:36:27.238205: step: 488/463, loss: 0.5618979334831238 2023-01-22 11:36:27.874832: step: 490/463, loss: 0.02479241043329239 2023-01-22 11:36:28.482794: step: 492/463, loss: 0.12881645560264587 2023-01-22 11:36:29.109402: step: 494/463, loss: 0.006323047913610935 2023-01-22 11:36:29.708027: step: 496/463, loss: 0.004740823991596699 2023-01-22 11:36:30.288023: step: 498/463, loss: 0.13583901524543762 2023-01-22 11:36:30.879089: step: 500/463, loss: 0.10019021481275558 2023-01-22 11:36:31.482001: step: 502/463, loss: 0.07118593901395798 2023-01-22 11:36:32.088242: step: 504/463, loss: 0.17805695533752441 2023-01-22 11:36:32.688851: step: 506/463, loss: 0.04027596116065979 2023-01-22 11:36:33.343236: step: 508/463, loss: 0.03283923491835594 2023-01-22 11:36:33.968649: step: 510/463, loss: 0.02354482188820839 2023-01-22 11:36:34.600324: step: 512/463, loss: 0.035854145884513855 2023-01-22 11:36:35.216583: step: 514/463, loss: 0.11175724118947983 2023-01-22 11:36:35.871802: step: 516/463, loss: 0.028801606968045235 2023-01-22 11:36:36.496363: step: 518/463, loss: 0.022815560922026634 2023-01-22 11:36:37.050905: step: 520/463, loss: 0.3163912296295166 2023-01-22 11:36:37.654950: step: 522/463, loss: 0.0188890527933836 2023-01-22 11:36:38.248819: step: 524/463, loss: 0.0735621452331543 2023-01-22 11:36:38.863872: step: 526/463, loss: 0.0456823967397213 2023-01-22 11:36:39.425441: step: 528/463, loss: 0.05895118787884712 2023-01-22 11:36:40.081383: step: 530/463, loss: 0.06408503651618958 2023-01-22 11:36:40.702025: step: 532/463, loss: 0.029579926282167435 2023-01-22 11:36:41.329009: step: 534/463, loss: 0.05187416821718216 2023-01-22 11:36:41.947132: step: 536/463, loss: 0.043843403458595276 2023-01-22 11:36:42.592012: step: 538/463, loss: 0.043029166758060455 2023-01-22 11:36:43.238608: step: 540/463, loss: 0.1379927098751068 2023-01-22 11:36:43.866472: step: 542/463, loss: 0.031104056164622307 2023-01-22 11:36:44.490223: step: 544/463, loss: 0.047198131680488586 2023-01-22 11:36:45.128082: step: 546/463, loss: 0.05940878763794899 2023-01-22 11:36:45.767542: step: 548/463, loss: 0.0956902876496315 2023-01-22 11:36:46.391571: step: 550/463, loss: 0.01451434288173914 2023-01-22 11:36:46.973045: step: 552/463, loss: 0.08825964480638504 2023-01-22 11:36:47.599421: step: 554/463, loss: 0.07416350394487381 2023-01-22 11:36:48.159140: step: 556/463, loss: 0.12103522568941116 2023-01-22 11:36:48.871527: step: 558/463, loss: 0.042535293847322464 2023-01-22 11:36:49.462942: step: 560/463, loss: 0.00959598459303379 2023-01-22 11:36:50.096869: step: 562/463, loss: 0.05251099914312363 2023-01-22 11:36:50.737319: step: 564/463, loss: 0.0059568071737885475 2023-01-22 11:36:51.317047: step: 566/463, loss: 0.054183561354875565 2023-01-22 11:36:51.951026: step: 568/463, loss: 0.07713527977466583 2023-01-22 11:36:52.546660: step: 570/463, loss: 0.06466488540172577 2023-01-22 11:36:53.127659: step: 572/463, loss: 0.0050270589999854565 2023-01-22 11:36:53.779065: step: 574/463, loss: 0.0593777559697628 2023-01-22 11:36:54.496468: step: 576/463, loss: 0.0652415007352829 2023-01-22 11:36:55.200024: step: 578/463, loss: 0.10233879089355469 2023-01-22 11:36:55.761783: step: 580/463, loss: 0.054444149136543274 2023-01-22 11:36:56.391063: step: 582/463, loss: 0.41499340534210205 2023-01-22 11:36:57.016207: step: 584/463, loss: 0.03818292170763016 2023-01-22 11:36:57.645842: step: 586/463, loss: 0.042066749185323715 2023-01-22 11:36:58.393148: step: 588/463, loss: 0.040170393884181976 2023-01-22 11:36:58.991987: step: 590/463, loss: 0.0734221488237381 2023-01-22 11:36:59.542079: step: 592/463, loss: 0.05708552896976471 2023-01-22 11:37:00.234456: step: 594/463, loss: 0.14176659286022186 2023-01-22 11:37:00.881881: step: 596/463, loss: 0.12767548859119415 2023-01-22 11:37:01.459067: step: 598/463, loss: 0.05253032594919205 2023-01-22 11:37:02.117736: step: 600/463, loss: 0.028156928718090057 2023-01-22 11:37:02.715229: step: 602/463, loss: 0.216679185628891 2023-01-22 11:37:03.323656: step: 604/463, loss: 0.08172070235013962 2023-01-22 11:37:03.981330: step: 606/463, loss: 0.0089958431199193 2023-01-22 11:37:04.526022: step: 608/463, loss: 0.0861603319644928 2023-01-22 11:37:05.184560: step: 610/463, loss: 0.015388678759336472 2023-01-22 11:37:05.845594: step: 612/463, loss: 1.600328803062439 2023-01-22 11:37:06.500636: step: 614/463, loss: 0.06564446538686752 2023-01-22 11:37:07.156272: step: 616/463, loss: 0.09172430634498596 2023-01-22 11:37:07.748366: step: 618/463, loss: 0.017240114510059357 2023-01-22 11:37:08.375605: step: 620/463, loss: 0.048070311546325684 2023-01-22 11:37:09.098926: step: 622/463, loss: 0.0269217137247324 2023-01-22 11:37:09.697294: step: 624/463, loss: 0.04842456802725792 2023-01-22 11:37:10.279068: step: 626/463, loss: 0.015573249198496342 2023-01-22 11:37:10.886473: step: 628/463, loss: 0.07935141772031784 2023-01-22 11:37:11.507006: step: 630/463, loss: 0.06779279559850693 2023-01-22 11:37:12.114781: step: 632/463, loss: 0.09676484763622284 2023-01-22 11:37:12.711111: step: 634/463, loss: 0.07864780724048615 2023-01-22 11:37:13.380385: step: 636/463, loss: 0.018157804384827614 2023-01-22 11:37:13.956403: step: 638/463, loss: 0.05776529759168625 2023-01-22 11:37:14.526709: step: 640/463, loss: 0.10632098466157913 2023-01-22 11:37:15.137787: step: 642/463, loss: 0.06286674737930298 2023-01-22 11:37:15.734577: step: 644/463, loss: 0.23010165989398956 2023-01-22 11:37:16.354291: step: 646/463, loss: 0.06769032031297684 2023-01-22 11:37:16.954303: step: 648/463, loss: 0.1818058341741562 2023-01-22 11:37:17.655409: step: 650/463, loss: 0.08866716176271439 2023-01-22 11:37:18.223604: step: 652/463, loss: 0.017704658210277557 2023-01-22 11:37:18.784249: step: 654/463, loss: 0.001529622240923345 2023-01-22 11:37:19.295894: step: 656/463, loss: 0.061790235340595245 2023-01-22 11:37:19.952537: step: 658/463, loss: 0.17239807546138763 2023-01-22 11:37:20.589146: step: 660/463, loss: 0.09937804192304611 2023-01-22 11:37:21.184229: step: 662/463, loss: 0.15803074836730957 2023-01-22 11:37:21.793625: step: 664/463, loss: 0.04156778007745743 2023-01-22 11:37:22.418952: step: 666/463, loss: 0.031508464366197586 2023-01-22 11:37:23.086730: step: 668/463, loss: 0.02762809582054615 2023-01-22 11:37:23.694940: step: 670/463, loss: 0.00946555845439434 2023-01-22 11:37:24.277347: step: 672/463, loss: 0.1350875049829483 2023-01-22 11:37:24.864934: step: 674/463, loss: 0.0069588604383170605 2023-01-22 11:37:25.475456: step: 676/463, loss: 0.07308116555213928 2023-01-22 11:37:26.012827: step: 678/463, loss: 0.006525191944092512 2023-01-22 11:37:26.564739: step: 680/463, loss: 0.8197893500328064 2023-01-22 11:37:27.177348: step: 682/463, loss: 0.023802677169442177 2023-01-22 11:37:27.796112: step: 684/463, loss: 1.0044938325881958 2023-01-22 11:37:28.467720: step: 686/463, loss: 0.051745470613241196 2023-01-22 11:37:29.109404: step: 688/463, loss: 0.041098348796367645 2023-01-22 11:37:29.625481: step: 690/463, loss: 0.024875445291399956 2023-01-22 11:37:30.209207: step: 692/463, loss: 0.042112741619348526 2023-01-22 11:37:30.809206: step: 694/463, loss: 0.035496532917022705 2023-01-22 11:37:31.400074: step: 696/463, loss: 0.03009173460304737 2023-01-22 11:37:31.962076: step: 698/463, loss: 0.06345498561859131 2023-01-22 11:37:32.590710: step: 700/463, loss: 0.06617318093776703 2023-01-22 11:37:33.247276: step: 702/463, loss: 0.05781959742307663 2023-01-22 11:37:33.840434: step: 704/463, loss: 0.29335883259773254 2023-01-22 11:37:34.425985: step: 706/463, loss: 0.026401255279779434 2023-01-22 11:37:35.042283: step: 708/463, loss: 0.06644775718450546 2023-01-22 11:37:35.641093: step: 710/463, loss: 0.015934163704514503 2023-01-22 11:37:36.263244: step: 712/463, loss: 0.03380304574966431 2023-01-22 11:37:36.857702: step: 714/463, loss: 0.15150348842144012 2023-01-22 11:37:37.495682: step: 716/463, loss: 0.026380375027656555 2023-01-22 11:37:38.037690: step: 718/463, loss: 0.09296206384897232 2023-01-22 11:37:38.644262: step: 720/463, loss: 0.12324699014425278 2023-01-22 11:37:39.280850: step: 722/463, loss: 0.0270185898989439 2023-01-22 11:37:39.935926: step: 724/463, loss: 0.12142333388328552 2023-01-22 11:37:40.563747: step: 726/463, loss: 0.03840349242091179 2023-01-22 11:37:41.175607: step: 728/463, loss: 0.0963706523180008 2023-01-22 11:37:41.795735: step: 730/463, loss: 0.180616095662117 2023-01-22 11:37:42.407550: step: 732/463, loss: 0.027679789811372757 2023-01-22 11:37:43.039884: step: 734/463, loss: 0.10157615691423416 2023-01-22 11:37:43.660230: step: 736/463, loss: 0.13992363214492798 2023-01-22 11:37:44.202354: step: 738/463, loss: 0.10379081964492798 2023-01-22 11:37:44.714018: step: 740/463, loss: 0.32321059703826904 2023-01-22 11:37:45.294373: step: 742/463, loss: 0.0481429360806942 2023-01-22 11:37:45.896359: step: 744/463, loss: 0.27094048261642456 2023-01-22 11:37:46.503992: step: 746/463, loss: 0.061478398740291595 2023-01-22 11:37:47.106059: step: 748/463, loss: 0.11156271398067474 2023-01-22 11:37:47.746324: step: 750/463, loss: 0.13841915130615234 2023-01-22 11:37:48.434765: step: 752/463, loss: 0.06979890912771225 2023-01-22 11:37:49.089571: step: 754/463, loss: 0.0640893206000328 2023-01-22 11:37:49.758295: step: 756/463, loss: 0.10758936405181885 2023-01-22 11:37:50.373963: step: 758/463, loss: 0.41406506299972534 2023-01-22 11:37:51.003617: step: 760/463, loss: 0.07428723573684692 2023-01-22 11:37:51.593570: step: 762/463, loss: 0.10503371804952621 2023-01-22 11:37:52.203798: step: 764/463, loss: 0.029797162860631943 2023-01-22 11:37:52.808507: step: 766/463, loss: 0.05734733119606972 2023-01-22 11:37:53.452185: step: 768/463, loss: 0.054886799305677414 2023-01-22 11:37:54.162115: step: 770/463, loss: 0.03895945847034454 2023-01-22 11:37:54.791947: step: 772/463, loss: 0.10972396284341812 2023-01-22 11:37:55.345620: step: 774/463, loss: 0.03161252662539482 2023-01-22 11:37:55.970076: step: 776/463, loss: 0.03737922012805939 2023-01-22 11:37:56.518799: step: 778/463, loss: 0.08636228740215302 2023-01-22 11:37:57.151026: step: 780/463, loss: 0.08123937249183655 2023-01-22 11:37:57.786919: step: 782/463, loss: 0.005904227960854769 2023-01-22 11:37:58.401812: step: 784/463, loss: 0.023396696895360947 2023-01-22 11:37:58.972772: step: 786/463, loss: 0.020909195765852928 2023-01-22 11:37:59.562049: step: 788/463, loss: 0.010047647170722485 2023-01-22 11:38:00.173025: step: 790/463, loss: 0.040804263204336166 2023-01-22 11:38:00.816791: step: 792/463, loss: 0.06735529005527496 2023-01-22 11:38:01.408435: step: 794/463, loss: 0.029419753700494766 2023-01-22 11:38:02.034133: step: 796/463, loss: 0.03022507205605507 2023-01-22 11:38:02.687125: step: 798/463, loss: 0.13375067710876465 2023-01-22 11:38:03.337122: step: 800/463, loss: 0.006585591472685337 2023-01-22 11:38:03.927172: step: 802/463, loss: 0.03674134984612465 2023-01-22 11:38:04.541736: step: 804/463, loss: 0.04075662046670914 2023-01-22 11:38:05.130153: step: 806/463, loss: 0.0020204675383865833 2023-01-22 11:38:05.735721: step: 808/463, loss: 0.029373684898018837 2023-01-22 11:38:06.282533: step: 810/463, loss: 0.017697075381875038 2023-01-22 11:38:06.866042: step: 812/463, loss: 0.021982727572321892 2023-01-22 11:38:07.505604: step: 814/463, loss: 0.040190767496824265 2023-01-22 11:38:08.112220: step: 816/463, loss: 0.1584051251411438 2023-01-22 11:38:08.633938: step: 818/463, loss: 0.007187770213931799 2023-01-22 11:38:09.292262: step: 820/463, loss: 0.06534581631422043 2023-01-22 11:38:09.928558: step: 822/463, loss: 0.07006745040416718 2023-01-22 11:38:10.560797: step: 824/463, loss: 0.055165085941553116 2023-01-22 11:38:11.201851: step: 826/463, loss: 0.05553819239139557 2023-01-22 11:38:11.857780: step: 828/463, loss: 0.05101989582180977 2023-01-22 11:38:12.488862: step: 830/463, loss: 0.012289288453757763 2023-01-22 11:38:13.178964: step: 832/463, loss: 0.04550894349813461 2023-01-22 11:38:13.772993: step: 834/463, loss: 1.0833523273468018 2023-01-22 11:38:14.373393: step: 836/463, loss: 0.012613041326403618 2023-01-22 11:38:15.044288: step: 838/463, loss: 0.08594474196434021 2023-01-22 11:38:15.664796: step: 840/463, loss: 0.006145032122731209 2023-01-22 11:38:16.223371: step: 842/463, loss: 0.02410002052783966 2023-01-22 11:38:16.847610: step: 844/463, loss: 0.08066446334123611 2023-01-22 11:38:17.483079: step: 846/463, loss: 0.048206426203250885 2023-01-22 11:38:18.106581: step: 848/463, loss: 0.0734802708029747 2023-01-22 11:38:18.741915: step: 850/463, loss: 0.41602522134780884 2023-01-22 11:38:19.385020: step: 852/463, loss: 0.028285112231969833 2023-01-22 11:38:19.989542: step: 854/463, loss: 0.5295729041099548 2023-01-22 11:38:20.644110: step: 856/463, loss: 0.022632591426372528 2023-01-22 11:38:21.305900: step: 858/463, loss: 0.052682146430015564 2023-01-22 11:38:22.076224: step: 860/463, loss: 0.2440948784351349 2023-01-22 11:38:22.687780: step: 862/463, loss: 0.005891554988920689 2023-01-22 11:38:23.318783: step: 864/463, loss: 0.23199871182441711 2023-01-22 11:38:23.892594: step: 866/463, loss: 0.04123353585600853 2023-01-22 11:38:24.559631: step: 868/463, loss: 0.04443017765879631 2023-01-22 11:38:25.119635: step: 870/463, loss: 0.00010082490189233795 2023-01-22 11:38:25.744573: step: 872/463, loss: 0.027467235922813416 2023-01-22 11:38:26.346800: step: 874/463, loss: 0.011563832871615887 2023-01-22 11:38:26.936496: step: 876/463, loss: 0.14248572289943695 2023-01-22 11:38:27.502124: step: 878/463, loss: 0.10942830890417099 2023-01-22 11:38:28.120018: step: 880/463, loss: 0.06527270376682281 2023-01-22 11:38:28.779503: step: 882/463, loss: 0.014301876537501812 2023-01-22 11:38:29.478803: step: 884/463, loss: 0.045024458318948746 2023-01-22 11:38:30.116410: step: 886/463, loss: 0.15976426005363464 2023-01-22 11:38:30.687726: step: 888/463, loss: 0.03387608006596565 2023-01-22 11:38:31.292870: step: 890/463, loss: 0.1103304773569107 2023-01-22 11:38:31.886210: step: 892/463, loss: 0.08368083089590073 2023-01-22 11:38:32.497435: step: 894/463, loss: 0.04028048366308212 2023-01-22 11:38:33.138824: step: 896/463, loss: 0.12643633782863617 2023-01-22 11:38:33.710548: step: 898/463, loss: 0.06794684380292892 2023-01-22 11:38:34.302685: step: 900/463, loss: 0.06601326912641525 2023-01-22 11:38:34.865736: step: 902/463, loss: 0.06228647381067276 2023-01-22 11:38:35.431475: step: 904/463, loss: 0.07344299554824829 2023-01-22 11:38:35.987436: step: 906/463, loss: 0.051804061979055405 2023-01-22 11:38:36.588284: step: 908/463, loss: 0.07039500772953033 2023-01-22 11:38:37.212891: step: 910/463, loss: 0.060677725821733475 2023-01-22 11:38:37.797149: step: 912/463, loss: 0.08781065791845322 2023-01-22 11:38:38.382907: step: 914/463, loss: 0.004770986270159483 2023-01-22 11:38:38.987965: step: 916/463, loss: 0.0813886970281601 2023-01-22 11:38:39.649668: step: 918/463, loss: 0.02861078456044197 2023-01-22 11:38:40.271430: step: 920/463, loss: 0.03386520966887474 2023-01-22 11:38:40.909279: step: 922/463, loss: 0.043594326823949814 2023-01-22 11:38:41.530065: step: 924/463, loss: 0.06822507828474045 2023-01-22 11:38:42.167196: step: 926/463, loss: 0.07693836092948914 ================================================== Loss: 0.111 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31452981651376144, 'r': 0.32527277039848196, 'f1': 0.3198111007462686}, 'combined': 0.23565028476040845, 'epoch': 20} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3361268522007431, 'r': 0.39915063698838243, 'f1': 0.36493772524652107}, 'combined': 0.282870389904002, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28494718309859157, 'r': 0.3455052182163188, 'f1': 0.31231775300171527}, 'combined': 0.23012887063284282, 'epoch': 20} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32847039899936065, 'r': 0.4166260575543361, 'f1': 0.367333185266708}, 'combined': 0.2847271579579268, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28808988951011716, 'r': 0.34220924256799495, 'f1': 0.31282614194854}, 'combined': 0.2305034730147137, 'epoch': 20} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3269426723020062, 'r': 0.40266836478372087, 'f1': 0.3608757667913413}, 'combined': 0.2797218862210397, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24444444444444444, 'r': 0.3142857142857143, 'f1': 0.27499999999999997}, 'combined': 0.1833333333333333, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24324324324324326, 'r': 0.391304347826087, 'f1': 0.30000000000000004}, 'combined': 0.15000000000000002, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.3017241379310345, 'f1': 0.35}, 'combined': 0.2333333333333333, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:41:23.019521: step: 2/463, loss: 0.10842946171760559 2023-01-22 11:41:23.709058: step: 4/463, loss: 0.32674673199653625 2023-01-22 11:41:24.336872: step: 6/463, loss: 0.1941383183002472 2023-01-22 11:41:25.035639: step: 8/463, loss: 0.14849162101745605 2023-01-22 11:41:25.639569: step: 10/463, loss: 0.0326245054602623 2023-01-22 11:41:26.284842: step: 12/463, loss: 0.03208509460091591 2023-01-22 11:41:26.910909: step: 14/463, loss: 0.04421968013048172 2023-01-22 11:41:27.450633: step: 16/463, loss: 0.011603240855038166 2023-01-22 11:41:28.058716: step: 18/463, loss: 0.5193854570388794 2023-01-22 11:41:28.611259: step: 20/463, loss: 0.13878828287124634 2023-01-22 11:41:29.201935: step: 22/463, loss: 0.04849619045853615 2023-01-22 11:41:29.802604: step: 24/463, loss: 0.03346783295273781 2023-01-22 11:41:30.422903: step: 26/463, loss: 0.018788419663906097 2023-01-22 11:41:31.009515: step: 28/463, loss: 0.011331932619214058 2023-01-22 11:41:31.631598: step: 30/463, loss: 0.015411270782351494 2023-01-22 11:41:32.265388: step: 32/463, loss: 0.11832278221845627 2023-01-22 11:41:32.838906: step: 34/463, loss: 0.006202243268489838 2023-01-22 11:41:33.437795: step: 36/463, loss: 0.1826816201210022 2023-01-22 11:41:34.035083: step: 38/463, loss: 0.025419119745492935 2023-01-22 11:41:34.639717: step: 40/463, loss: 0.030588358640670776 2023-01-22 11:41:35.241527: step: 42/463, loss: 0.05161495879292488 2023-01-22 11:41:35.871817: step: 44/463, loss: 0.03308122605085373 2023-01-22 11:41:36.483872: step: 46/463, loss: 0.026122014969587326 2023-01-22 11:41:37.112221: step: 48/463, loss: 0.013910619542002678 2023-01-22 11:41:37.789255: step: 50/463, loss: 1.746422529220581 2023-01-22 11:41:38.403850: step: 52/463, loss: 0.09099697321653366 2023-01-22 11:41:39.019997: step: 54/463, loss: 0.12887287139892578 2023-01-22 11:41:39.660298: step: 56/463, loss: 0.02142437919974327 2023-01-22 11:41:40.241879: step: 58/463, loss: 0.013874801807105541 2023-01-22 11:41:40.883107: step: 60/463, loss: 0.03262742981314659 2023-01-22 11:41:41.488072: step: 62/463, loss: 0.24533799290657043 2023-01-22 11:41:42.260498: step: 64/463, loss: 0.02403237856924534 2023-01-22 11:41:42.850704: step: 66/463, loss: 0.0285759586840868 2023-01-22 11:41:43.487583: step: 68/463, loss: 0.20021891593933105 2023-01-22 11:41:44.079364: step: 70/463, loss: 0.02089514583349228 2023-01-22 11:41:44.669910: step: 72/463, loss: 0.19935274124145508 2023-01-22 11:41:45.275082: step: 74/463, loss: 0.014258578419685364 2023-01-22 11:41:45.890163: step: 76/463, loss: 0.009998084045946598 2023-01-22 11:41:46.484769: step: 78/463, loss: 0.010598234832286835 2023-01-22 11:41:47.125150: step: 80/463, loss: 0.3295132517814636 2023-01-22 11:41:47.750011: step: 82/463, loss: 0.06613471359014511 2023-01-22 11:41:48.365223: step: 84/463, loss: 0.05376961827278137 2023-01-22 11:41:49.053395: step: 86/463, loss: 0.17067649960517883 2023-01-22 11:41:49.711276: step: 88/463, loss: 0.010756397619843483 2023-01-22 11:41:50.406707: step: 90/463, loss: 0.06448723375797272 2023-01-22 11:41:51.047909: step: 92/463, loss: 0.08899065107107162 2023-01-22 11:41:51.689466: step: 94/463, loss: 0.010376730002462864 2023-01-22 11:41:52.259438: step: 96/463, loss: 0.0562828928232193 2023-01-22 11:41:52.850514: step: 98/463, loss: 0.07136587053537369 2023-01-22 11:41:53.424228: step: 100/463, loss: 0.025876399129629135 2023-01-22 11:41:54.050263: step: 102/463, loss: 0.04205651208758354 2023-01-22 11:41:54.642515: step: 104/463, loss: 0.0895906314253807 2023-01-22 11:41:55.211209: step: 106/463, loss: 0.4598084092140198 2023-01-22 11:41:55.834466: step: 108/463, loss: 0.021469302475452423 2023-01-22 11:41:56.458190: step: 110/463, loss: 0.12939992547035217 2023-01-22 11:41:57.037131: step: 112/463, loss: 0.026458879932761192 2023-01-22 11:41:57.639325: step: 114/463, loss: 0.03705986589193344 2023-01-22 11:41:58.270510: step: 116/463, loss: 0.07023543119430542 2023-01-22 11:41:58.825925: step: 118/463, loss: 0.0032180838752537966 2023-01-22 11:41:59.446582: step: 120/463, loss: 0.052397649735212326 2023-01-22 11:42:00.156377: step: 122/463, loss: 0.022886991500854492 2023-01-22 11:42:00.780103: step: 124/463, loss: 0.1497543901205063 2023-01-22 11:42:01.411266: step: 126/463, loss: 0.17021460831165314 2023-01-22 11:42:02.026120: step: 128/463, loss: 0.036867544054985046 2023-01-22 11:42:02.616167: step: 130/463, loss: 0.38709887862205505 2023-01-22 11:42:03.250136: step: 132/463, loss: 0.0445268377661705 2023-01-22 11:42:03.877932: step: 134/463, loss: 0.05580996349453926 2023-01-22 11:42:04.499395: step: 136/463, loss: 0.02718171663582325 2023-01-22 11:42:05.042521: step: 138/463, loss: 0.037013012915849686 2023-01-22 11:42:05.618459: step: 140/463, loss: 0.07311011105775833 2023-01-22 11:42:06.287328: step: 142/463, loss: 0.03917272388935089 2023-01-22 11:42:06.893035: step: 144/463, loss: 0.017371615394949913 2023-01-22 11:42:07.516275: step: 146/463, loss: 0.36177748441696167 2023-01-22 11:42:08.094848: step: 148/463, loss: 0.03513642027974129 2023-01-22 11:42:08.717445: step: 150/463, loss: 0.041847191751003265 2023-01-22 11:42:09.328156: step: 152/463, loss: 0.2530669569969177 2023-01-22 11:42:09.938679: step: 154/463, loss: 0.05493073910474777 2023-01-22 11:42:10.535666: step: 156/463, loss: 10.177796363830566 2023-01-22 11:42:11.146230: step: 158/463, loss: 0.06300440430641174 2023-01-22 11:42:11.728104: step: 160/463, loss: 0.0221159178763628 2023-01-22 11:42:12.308625: step: 162/463, loss: 0.04124729707837105 2023-01-22 11:42:12.855400: step: 164/463, loss: 0.02808583527803421 2023-01-22 11:42:13.387337: step: 166/463, loss: 0.008351719006896019 2023-01-22 11:42:14.017998: step: 168/463, loss: 0.07916621118783951 2023-01-22 11:42:14.688469: step: 170/463, loss: 0.08258962631225586 2023-01-22 11:42:15.410327: step: 172/463, loss: 0.03964458405971527 2023-01-22 11:42:16.039480: step: 174/463, loss: 0.022951669991016388 2023-01-22 11:42:16.711962: step: 176/463, loss: 0.029361989349126816 2023-01-22 11:42:17.357024: step: 178/463, loss: 0.35478201508522034 2023-01-22 11:42:17.926290: step: 180/463, loss: 0.02392745018005371 2023-01-22 11:42:18.527367: step: 182/463, loss: 0.0789598599076271 2023-01-22 11:42:19.110361: step: 184/463, loss: 0.019819023087620735 2023-01-22 11:42:19.777671: step: 186/463, loss: 0.02444101870059967 2023-01-22 11:42:20.325059: step: 188/463, loss: 0.002599931787699461 2023-01-22 11:42:21.023409: step: 190/463, loss: 0.1535947620868683 2023-01-22 11:42:21.655629: step: 192/463, loss: 0.24095189571380615 2023-01-22 11:42:22.220742: step: 194/463, loss: 0.019466117024421692 2023-01-22 11:42:22.873330: step: 196/463, loss: 0.11408529430627823 2023-01-22 11:42:23.439329: step: 198/463, loss: 0.05276991054415703 2023-01-22 11:42:24.045034: step: 200/463, loss: 0.012610968202352524 2023-01-22 11:42:24.693636: step: 202/463, loss: 0.02691648341715336 2023-01-22 11:42:25.321565: step: 204/463, loss: 0.12883315980434418 2023-01-22 11:42:25.910369: step: 206/463, loss: 0.006722901947796345 2023-01-22 11:42:26.485330: step: 208/463, loss: 0.006243908312171698 2023-01-22 11:42:27.038673: step: 210/463, loss: 0.027313238009810448 2023-01-22 11:42:27.679251: step: 212/463, loss: 0.04436987265944481 2023-01-22 11:42:28.323614: step: 214/463, loss: 0.663070559501648 2023-01-22 11:42:29.048488: step: 216/463, loss: 0.003890047548338771 2023-01-22 11:42:29.670458: step: 218/463, loss: 0.26263242959976196 2023-01-22 11:42:30.263016: step: 220/463, loss: 3.3298776149749756 2023-01-22 11:42:30.868680: step: 222/463, loss: 0.03333837166428566 2023-01-22 11:42:31.506835: step: 224/463, loss: 0.03925684466958046 2023-01-22 11:42:32.109443: step: 226/463, loss: 0.0636269748210907 2023-01-22 11:42:32.678167: step: 228/463, loss: 0.04158621281385422 2023-01-22 11:42:33.248019: step: 230/463, loss: 0.09085356444120407 2023-01-22 11:42:33.915582: step: 232/463, loss: 0.06672497093677521 2023-01-22 11:42:34.518288: step: 234/463, loss: 0.02141600102186203 2023-01-22 11:42:35.153854: step: 236/463, loss: 0.04261079058051109 2023-01-22 11:42:35.722112: step: 238/463, loss: 0.004412642680108547 2023-01-22 11:42:36.365953: step: 240/463, loss: 0.02278420701622963 2023-01-22 11:42:37.020860: step: 242/463, loss: 0.18767501413822174 2023-01-22 11:42:37.653859: step: 244/463, loss: 0.04235748201608658 2023-01-22 11:42:38.317303: step: 246/463, loss: 0.03558219596743584 2023-01-22 11:42:38.893327: step: 248/463, loss: 0.10046972334384918 2023-01-22 11:42:39.521083: step: 250/463, loss: 0.1349455863237381 2023-01-22 11:42:40.141149: step: 252/463, loss: 0.07888194173574448 2023-01-22 11:42:40.793095: step: 254/463, loss: 0.021533489227294922 2023-01-22 11:42:41.348895: step: 256/463, loss: 0.003026509890332818 2023-01-22 11:42:41.925397: step: 258/463, loss: 0.02129051648080349 2023-01-22 11:42:42.465297: step: 260/463, loss: 0.5292913913726807 2023-01-22 11:42:43.086334: step: 262/463, loss: 0.05269792675971985 2023-01-22 11:42:43.690629: step: 264/463, loss: 0.07120180130004883 2023-01-22 11:42:44.281037: step: 266/463, loss: 0.18456529080867767 2023-01-22 11:42:45.019549: step: 268/463, loss: 0.029535286128520966 2023-01-22 11:42:45.678596: step: 270/463, loss: 0.011946989223361015 2023-01-22 11:42:46.305855: step: 272/463, loss: 0.034383513033390045 2023-01-22 11:42:46.983458: step: 274/463, loss: 0.21425393223762512 2023-01-22 11:42:47.550972: step: 276/463, loss: 0.10387646406888962 2023-01-22 11:42:48.189147: step: 278/463, loss: 0.03382065147161484 2023-01-22 11:42:48.846058: step: 280/463, loss: 0.09736025333404541 2023-01-22 11:42:49.480746: step: 282/463, loss: 0.26629090309143066 2023-01-22 11:42:50.010863: step: 284/463, loss: 0.02002924680709839 2023-01-22 11:42:50.615711: step: 286/463, loss: 0.07871459424495697 2023-01-22 11:42:51.253129: step: 288/463, loss: 0.0659560114145279 2023-01-22 11:42:51.871280: step: 290/463, loss: 0.2082950621843338 2023-01-22 11:42:52.588653: step: 292/463, loss: 0.07427112013101578 2023-01-22 11:42:53.242445: step: 294/463, loss: 0.133941188454628 2023-01-22 11:42:53.828733: step: 296/463, loss: 0.41010451316833496 2023-01-22 11:42:54.436787: step: 298/463, loss: 0.5334764122962952 2023-01-22 11:42:55.037657: step: 300/463, loss: 0.08961945027112961 2023-01-22 11:42:55.759748: step: 302/463, loss: 0.03199724107980728 2023-01-22 11:42:56.326498: step: 304/463, loss: 0.011130590923130512 2023-01-22 11:42:57.030473: step: 306/463, loss: 0.025210335850715637 2023-01-22 11:42:57.722794: step: 308/463, loss: 0.013276624493300915 2023-01-22 11:42:58.354754: step: 310/463, loss: 0.09065524488687515 2023-01-22 11:42:59.000354: step: 312/463, loss: 0.14581745862960815 2023-01-22 11:42:59.631603: step: 314/463, loss: 0.0038363952189683914 2023-01-22 11:43:00.249498: step: 316/463, loss: 0.06042252480983734 2023-01-22 11:43:00.837805: step: 318/463, loss: 0.05053623020648956 2023-01-22 11:43:01.461545: step: 320/463, loss: 0.27989882230758667 2023-01-22 11:43:02.090161: step: 322/463, loss: 0.036740854382514954 2023-01-22 11:43:02.746285: step: 324/463, loss: 0.014833400957286358 2023-01-22 11:43:03.368790: step: 326/463, loss: 0.024892011657357216 2023-01-22 11:43:03.909062: step: 328/463, loss: 0.03105071187019348 2023-01-22 11:43:04.466599: step: 330/463, loss: 0.004904492292553186 2023-01-22 11:43:05.050071: step: 332/463, loss: 0.05492234230041504 2023-01-22 11:43:05.644702: step: 334/463, loss: 0.02544984593987465 2023-01-22 11:43:06.389526: step: 336/463, loss: 0.04948979243636131 2023-01-22 11:43:06.981130: step: 338/463, loss: 0.06087706610560417 2023-01-22 11:43:07.591233: step: 340/463, loss: 0.058417242020368576 2023-01-22 11:43:08.269018: step: 342/463, loss: 0.041188452392816544 2023-01-22 11:43:08.876919: step: 344/463, loss: 0.08050759881734848 2023-01-22 11:43:09.488920: step: 346/463, loss: 0.07650867104530334 2023-01-22 11:43:10.045573: step: 348/463, loss: 0.02573893405497074 2023-01-22 11:43:10.671072: step: 350/463, loss: 0.05392170324921608 2023-01-22 11:43:11.244942: step: 352/463, loss: 0.05369649454951286 2023-01-22 11:43:11.821562: step: 354/463, loss: 0.01662009209394455 2023-01-22 11:43:12.487787: step: 356/463, loss: 0.17258664965629578 2023-01-22 11:43:13.163120: step: 358/463, loss: 0.03924667462706566 2023-01-22 11:43:13.770225: step: 360/463, loss: 0.06593135744333267 2023-01-22 11:43:14.424097: step: 362/463, loss: 0.08732613176107407 2023-01-22 11:43:15.025201: step: 364/463, loss: 0.04834168031811714 2023-01-22 11:43:15.719078: step: 366/463, loss: 0.11759504675865173 2023-01-22 11:43:16.335645: step: 368/463, loss: 0.053821392357349396 2023-01-22 11:43:17.031618: step: 370/463, loss: 0.3536268174648285 2023-01-22 11:43:17.640619: step: 372/463, loss: 0.11019283533096313 2023-01-22 11:43:18.289363: step: 374/463, loss: 0.16470111906528473 2023-01-22 11:43:18.882413: step: 376/463, loss: 0.02005312591791153 2023-01-22 11:43:19.573148: step: 378/463, loss: 0.07273054867982864 2023-01-22 11:43:20.123850: step: 380/463, loss: 0.012648736126720905 2023-01-22 11:43:20.777923: step: 382/463, loss: 0.03662315383553505 2023-01-22 11:43:21.435528: step: 384/463, loss: 0.026740530505776405 2023-01-22 11:43:21.980023: step: 386/463, loss: 0.016627658158540726 2023-01-22 11:43:22.656602: step: 388/463, loss: 0.03042297065258026 2023-01-22 11:43:23.280875: step: 390/463, loss: 0.04296514019370079 2023-01-22 11:43:23.899928: step: 392/463, loss: 0.029243730008602142 2023-01-22 11:43:24.565857: step: 394/463, loss: 0.022035405039787292 2023-01-22 11:43:25.209399: step: 396/463, loss: 0.031127285212278366 2023-01-22 11:43:25.787520: step: 398/463, loss: 0.03823459520936012 2023-01-22 11:43:26.412882: step: 400/463, loss: 0.06997408717870712 2023-01-22 11:43:27.039424: step: 402/463, loss: 0.9979336857795715 2023-01-22 11:43:27.661796: step: 404/463, loss: 0.02430730313062668 2023-01-22 11:43:28.283783: step: 406/463, loss: 0.019080815836787224 2023-01-22 11:43:28.889977: step: 408/463, loss: 0.011614823713898659 2023-01-22 11:43:29.555454: step: 410/463, loss: 0.04508303105831146 2023-01-22 11:43:30.111202: step: 412/463, loss: 0.05451278015971184 2023-01-22 11:43:30.691342: step: 414/463, loss: 0.019044803455471992 2023-01-22 11:43:31.250074: step: 416/463, loss: 0.008757064118981361 2023-01-22 11:43:31.842444: step: 418/463, loss: 0.006078549660742283 2023-01-22 11:43:32.478693: step: 420/463, loss: 0.031054750084877014 2023-01-22 11:43:33.127846: step: 422/463, loss: 0.010163325816392899 2023-01-22 11:43:33.693531: step: 424/463, loss: 0.26389050483703613 2023-01-22 11:43:34.317592: step: 426/463, loss: 0.016442103311419487 2023-01-22 11:43:34.900741: step: 428/463, loss: 0.09762738645076752 2023-01-22 11:43:35.544232: step: 430/463, loss: 0.08909660577774048 2023-01-22 11:43:36.185940: step: 432/463, loss: 0.027438441291451454 2023-01-22 11:43:36.882380: step: 434/463, loss: 0.0046327016316354275 2023-01-22 11:43:37.530042: step: 436/463, loss: 0.10689191520214081 2023-01-22 11:43:38.150696: step: 438/463, loss: 0.061745233833789825 2023-01-22 11:43:38.718784: step: 440/463, loss: 0.017984019592404366 2023-01-22 11:43:39.319505: step: 442/463, loss: 0.010926088318228722 2023-01-22 11:43:39.941349: step: 444/463, loss: 0.09590668976306915 2023-01-22 11:43:40.495666: step: 446/463, loss: 0.01594839245080948 2023-01-22 11:43:41.135280: step: 448/463, loss: 0.04712144285440445 2023-01-22 11:43:41.725526: step: 450/463, loss: 0.024126214906573296 2023-01-22 11:43:42.401483: step: 452/463, loss: 0.08683865517377853 2023-01-22 11:43:43.044788: step: 454/463, loss: 0.00911947526037693 2023-01-22 11:43:43.679408: step: 456/463, loss: 0.019975975155830383 2023-01-22 11:43:44.252372: step: 458/463, loss: 0.07355379313230515 2023-01-22 11:43:44.875140: step: 460/463, loss: 0.04055371508002281 2023-01-22 11:43:45.489525: step: 462/463, loss: 0.07014424353837967 2023-01-22 11:43:46.138889: step: 464/463, loss: 0.46137484908103943 2023-01-22 11:43:46.721193: step: 466/463, loss: 0.04279604181647301 2023-01-22 11:43:47.353123: step: 468/463, loss: 0.02388113923370838 2023-01-22 11:43:47.951168: step: 470/463, loss: 0.07741028070449829 2023-01-22 11:43:48.543873: step: 472/463, loss: 0.04212034493684769 2023-01-22 11:43:49.177742: step: 474/463, loss: 0.033438291400671005 2023-01-22 11:43:49.814268: step: 476/463, loss: 0.011724998243153095 2023-01-22 11:43:50.417204: step: 478/463, loss: 0.038654226809740067 2023-01-22 11:43:51.034009: step: 480/463, loss: 0.04388156160712242 2023-01-22 11:43:51.686784: step: 482/463, loss: 0.028641149401664734 2023-01-22 11:43:52.307425: step: 484/463, loss: 0.14782431721687317 2023-01-22 11:43:52.922687: step: 486/463, loss: 0.14872324466705322 2023-01-22 11:43:53.544339: step: 488/463, loss: 0.19952279329299927 2023-01-22 11:43:54.113673: step: 490/463, loss: 0.05978836119174957 2023-01-22 11:43:54.685902: step: 492/463, loss: 0.007217899430543184 2023-01-22 11:43:55.258923: step: 494/463, loss: 0.011317849159240723 2023-01-22 11:43:55.894315: step: 496/463, loss: 0.05255355313420296 2023-01-22 11:43:56.531368: step: 498/463, loss: 0.04206959530711174 2023-01-22 11:43:57.103848: step: 500/463, loss: 0.08046004176139832 2023-01-22 11:43:57.754808: step: 502/463, loss: 0.27565500140190125 2023-01-22 11:43:58.430264: step: 504/463, loss: 0.0955238789319992 2023-01-22 11:43:59.008438: step: 506/463, loss: 0.22641132771968842 2023-01-22 11:43:59.593564: step: 508/463, loss: 0.03411635383963585 2023-01-22 11:44:00.244931: step: 510/463, loss: 0.010817231610417366 2023-01-22 11:44:00.835563: step: 512/463, loss: 0.09243965893983841 2023-01-22 11:44:01.432707: step: 514/463, loss: 0.0375051274895668 2023-01-22 11:44:02.166610: step: 516/463, loss: 0.10578754544258118 2023-01-22 11:44:02.818078: step: 518/463, loss: 0.031016208231449127 2023-01-22 11:44:03.491345: step: 520/463, loss: 0.037243012338876724 2023-01-22 11:44:04.094828: step: 522/463, loss: 0.044066064059734344 2023-01-22 11:44:04.708174: step: 524/463, loss: 0.013921495527029037 2023-01-22 11:44:05.323344: step: 526/463, loss: 0.14074905216693878 2023-01-22 11:44:06.021451: step: 528/463, loss: 0.02002963051199913 2023-01-22 11:44:06.585957: step: 530/463, loss: 0.038184359669685364 2023-01-22 11:44:07.167990: step: 532/463, loss: 0.01914183236658573 2023-01-22 11:44:07.734500: step: 534/463, loss: 0.09031576663255692 2023-01-22 11:44:08.305859: step: 536/463, loss: 0.06424997746944427 2023-01-22 11:44:08.910059: step: 538/463, loss: 0.046822234988212585 2023-01-22 11:44:09.491641: step: 540/463, loss: 0.19398356974124908 2023-01-22 11:44:10.062797: step: 542/463, loss: 0.005893049295991659 2023-01-22 11:44:10.681608: step: 544/463, loss: 0.02531193383038044 2023-01-22 11:44:11.316322: step: 546/463, loss: 0.0798172876238823 2023-01-22 11:44:11.860244: step: 548/463, loss: 0.008896934799849987 2023-01-22 11:44:12.482378: step: 550/463, loss: 0.04455339536070824 2023-01-22 11:44:13.073768: step: 552/463, loss: 1.1011356115341187 2023-01-22 11:44:13.656116: step: 554/463, loss: 0.0317896269261837 2023-01-22 11:44:14.255739: step: 556/463, loss: 0.05265568569302559 2023-01-22 11:44:14.852369: step: 558/463, loss: 0.11968901753425598 2023-01-22 11:44:15.419489: step: 560/463, loss: 0.026453066617250443 2023-01-22 11:44:16.084022: step: 562/463, loss: 0.01327671017497778 2023-01-22 11:44:16.740259: step: 564/463, loss: 0.008031188510358334 2023-01-22 11:44:17.408208: step: 566/463, loss: 0.07956541329622269 2023-01-22 11:44:18.006476: step: 568/463, loss: 0.0658179223537445 2023-01-22 11:44:18.681771: step: 570/463, loss: 0.034820958971977234 2023-01-22 11:44:19.327822: step: 572/463, loss: 0.04435864835977554 2023-01-22 11:44:19.993719: step: 574/463, loss: 0.06307648122310638 2023-01-22 11:44:20.645608: step: 576/463, loss: 0.08670160919427872 2023-01-22 11:44:21.289457: step: 578/463, loss: 0.247022807598114 2023-01-22 11:44:21.947783: step: 580/463, loss: 0.04984545707702637 2023-01-22 11:44:22.588035: step: 582/463, loss: 0.013959272764623165 2023-01-22 11:44:23.222413: step: 584/463, loss: 0.03683488443493843 2023-01-22 11:44:23.854393: step: 586/463, loss: 0.07650494575500488 2023-01-22 11:44:24.435727: step: 588/463, loss: 0.19359926879405975 2023-01-22 11:44:25.012774: step: 590/463, loss: 0.01624019630253315 2023-01-22 11:44:25.672832: step: 592/463, loss: 0.01648665778338909 2023-01-22 11:44:26.367404: step: 594/463, loss: 0.022035900503396988 2023-01-22 11:44:27.003031: step: 596/463, loss: 0.04287360981106758 2023-01-22 11:44:27.568067: step: 598/463, loss: 0.02396414428949356 2023-01-22 11:44:28.160545: step: 600/463, loss: 0.05802258849143982 2023-01-22 11:44:28.782342: step: 602/463, loss: 0.12769955396652222 2023-01-22 11:44:29.418327: step: 604/463, loss: 0.0201233122497797 2023-01-22 11:44:30.040117: step: 606/463, loss: 0.1204633116722107 2023-01-22 11:44:30.676339: step: 608/463, loss: 0.004853670485317707 2023-01-22 11:44:31.386294: step: 610/463, loss: 0.0698622316122055 2023-01-22 11:44:32.005230: step: 612/463, loss: 0.050907183438539505 2023-01-22 11:44:32.617289: step: 614/463, loss: 0.0035860727075487375 2023-01-22 11:44:33.322152: step: 616/463, loss: 0.06869044899940491 2023-01-22 11:44:33.956862: step: 618/463, loss: 0.059697411954402924 2023-01-22 11:44:34.592053: step: 620/463, loss: 0.015558258630335331 2023-01-22 11:44:35.265195: step: 622/463, loss: 0.044381264597177505 2023-01-22 11:44:35.843881: step: 624/463, loss: 0.03939526528120041 2023-01-22 11:44:36.422756: step: 626/463, loss: 0.04848700761795044 2023-01-22 11:44:37.004483: step: 628/463, loss: 0.09361636638641357 2023-01-22 11:44:37.651883: step: 630/463, loss: 0.017986519262194633 2023-01-22 11:44:38.341560: step: 632/463, loss: 0.047542911022901535 2023-01-22 11:44:39.019815: step: 634/463, loss: 0.286553293466568 2023-01-22 11:44:39.665221: step: 636/463, loss: 0.05607404559850693 2023-01-22 11:44:40.369174: step: 638/463, loss: 0.07178696990013123 2023-01-22 11:44:41.004998: step: 640/463, loss: 0.45497363805770874 2023-01-22 11:44:41.648283: step: 642/463, loss: 0.055788442492485046 2023-01-22 11:44:42.261474: step: 644/463, loss: 0.36743780970573425 2023-01-22 11:44:42.935750: step: 646/463, loss: 0.03997711464762688 2023-01-22 11:44:43.511720: step: 648/463, loss: 0.01330278068780899 2023-01-22 11:44:44.196833: step: 650/463, loss: 0.07009090483188629 2023-01-22 11:44:44.777380: step: 652/463, loss: 0.06956334412097931 2023-01-22 11:44:45.477958: step: 654/463, loss: 0.3698880970478058 2023-01-22 11:44:46.114519: step: 656/463, loss: 0.10815276205539703 2023-01-22 11:44:46.655801: step: 658/463, loss: 0.06989423930644989 2023-01-22 11:44:47.236950: step: 660/463, loss: 0.015422336757183075 2023-01-22 11:44:47.831309: step: 662/463, loss: 0.07557134330272675 2023-01-22 11:44:48.445895: step: 664/463, loss: 0.0898468941450119 2023-01-22 11:44:49.033449: step: 666/463, loss: 0.024765724316239357 2023-01-22 11:44:49.656715: step: 668/463, loss: 0.032509028911590576 2023-01-22 11:44:50.273163: step: 670/463, loss: 0.0723862424492836 2023-01-22 11:44:50.879141: step: 672/463, loss: 0.0674487054347992 2023-01-22 11:44:51.428961: step: 674/463, loss: 0.3608826696872711 2023-01-22 11:44:52.049968: step: 676/463, loss: 0.019310543313622475 2023-01-22 11:44:52.712464: step: 678/463, loss: 0.05626765638589859 2023-01-22 11:44:53.307292: step: 680/463, loss: 0.01838117279112339 2023-01-22 11:44:53.926160: step: 682/463, loss: 0.24786485731601715 2023-01-22 11:44:54.496825: step: 684/463, loss: 0.08619832992553711 2023-01-22 11:44:55.125046: step: 686/463, loss: 0.04205776005983353 2023-01-22 11:44:55.692236: step: 688/463, loss: 0.04023507609963417 2023-01-22 11:44:56.302400: step: 690/463, loss: 0.06675650924444199 2023-01-22 11:44:56.843515: step: 692/463, loss: 0.023906243965029716 2023-01-22 11:44:57.447798: step: 694/463, loss: 0.08106192201375961 2023-01-22 11:44:58.050622: step: 696/463, loss: 0.030015112832188606 2023-01-22 11:44:58.749830: step: 698/463, loss: 0.09135319292545319 2023-01-22 11:44:59.417855: step: 700/463, loss: 0.0055384705774486065 2023-01-22 11:45:00.062425: step: 702/463, loss: 0.1287613809108734 2023-01-22 11:45:00.675232: step: 704/463, loss: 0.11701430380344391 2023-01-22 11:45:01.277301: step: 706/463, loss: 0.009921292774379253 2023-01-22 11:45:01.920505: step: 708/463, loss: 0.03269552066922188 2023-01-22 11:45:02.573605: step: 710/463, loss: 0.15854285657405853 2023-01-22 11:45:03.165130: step: 712/463, loss: 0.054304350167512894 2023-01-22 11:45:03.746216: step: 714/463, loss: 0.00865735299885273 2023-01-22 11:45:04.342643: step: 716/463, loss: 0.09294646233320236 2023-01-22 11:45:05.003437: step: 718/463, loss: 0.10276513546705246 2023-01-22 11:45:05.651328: step: 720/463, loss: 0.40639618039131165 2023-01-22 11:45:06.219324: step: 722/463, loss: 0.14518621563911438 2023-01-22 11:45:06.869159: step: 724/463, loss: 0.06630782037973404 2023-01-22 11:45:07.448948: step: 726/463, loss: 0.05585341528058052 2023-01-22 11:45:08.038060: step: 728/463, loss: 0.008550072088837624 2023-01-22 11:45:08.620125: step: 730/463, loss: 0.017883699387311935 2023-01-22 11:45:09.278670: step: 732/463, loss: 0.03385250270366669 2023-01-22 11:45:09.910781: step: 734/463, loss: 0.2274763584136963 2023-01-22 11:45:10.519841: step: 736/463, loss: 0.007525038439780474 2023-01-22 11:45:11.135773: step: 738/463, loss: 0.06738711148500443 2023-01-22 11:45:11.718173: step: 740/463, loss: 0.10399018228054047 2023-01-22 11:45:12.318206: step: 742/463, loss: 0.3323141038417816 2023-01-22 11:45:12.917115: step: 744/463, loss: 0.051415394991636276 2023-01-22 11:45:13.572781: step: 746/463, loss: 0.11459618806838989 2023-01-22 11:45:14.180685: step: 748/463, loss: 0.0683571845293045 2023-01-22 11:45:14.802775: step: 750/463, loss: 0.04533589631319046 2023-01-22 11:45:15.457227: step: 752/463, loss: 0.02209385484457016 2023-01-22 11:45:16.111784: step: 754/463, loss: 0.012202427722513676 2023-01-22 11:45:16.731447: step: 756/463, loss: 0.028971416875720024 2023-01-22 11:45:17.268414: step: 758/463, loss: 0.052366968244314194 2023-01-22 11:45:17.914754: step: 760/463, loss: 0.046881161630153656 2023-01-22 11:45:18.554139: step: 762/463, loss: 0.051236484199762344 2023-01-22 11:45:19.201131: step: 764/463, loss: 0.147416353225708 2023-01-22 11:45:19.805547: step: 766/463, loss: 0.01865893229842186 2023-01-22 11:45:20.345587: step: 768/463, loss: 0.01721896044909954 2023-01-22 11:45:20.998945: step: 770/463, loss: 0.02192249521613121 2023-01-22 11:45:21.694260: step: 772/463, loss: 0.20242924988269806 2023-01-22 11:45:22.338005: step: 774/463, loss: 0.0168751310557127 2023-01-22 11:45:22.960150: step: 776/463, loss: 0.05086857080459595 2023-01-22 11:45:23.527061: step: 778/463, loss: 0.04998449608683586 2023-01-22 11:45:24.184969: step: 780/463, loss: 0.10142890363931656 2023-01-22 11:45:24.796995: step: 782/463, loss: 0.10507799685001373 2023-01-22 11:45:25.396254: step: 784/463, loss: 0.015436510555446148 2023-01-22 11:45:26.094693: step: 786/463, loss: 7.141009330749512 2023-01-22 11:45:26.677200: step: 788/463, loss: 0.02300797775387764 2023-01-22 11:45:27.375754: step: 790/463, loss: 0.025129621848464012 2023-01-22 11:45:28.018208: step: 792/463, loss: 0.2690727412700653 2023-01-22 11:45:28.626496: step: 794/463, loss: 0.07071518152952194 2023-01-22 11:45:29.305529: step: 796/463, loss: 0.2657509446144104 2023-01-22 11:45:29.951538: step: 798/463, loss: 0.01510376762598753 2023-01-22 11:45:30.648848: step: 800/463, loss: 0.05102163180708885 2023-01-22 11:45:31.257702: step: 802/463, loss: 0.3241473436355591 2023-01-22 11:45:31.879333: step: 804/463, loss: 0.01089604664593935 2023-01-22 11:45:32.462195: step: 806/463, loss: 0.01734016090631485 2023-01-22 11:45:33.092083: step: 808/463, loss: 0.4902510941028595 2023-01-22 11:45:33.715983: step: 810/463, loss: 0.04687965661287308 2023-01-22 11:45:34.326642: step: 812/463, loss: 0.05287744104862213 2023-01-22 11:45:35.037248: step: 814/463, loss: 0.0641651302576065 2023-01-22 11:45:35.599237: step: 816/463, loss: 0.6312924027442932 2023-01-22 11:45:36.185426: step: 818/463, loss: 0.05096278712153435 2023-01-22 11:45:36.776140: step: 820/463, loss: 0.23992712795734406 2023-01-22 11:45:37.346784: step: 822/463, loss: 0.05443326383829117 2023-01-22 11:45:37.982710: step: 824/463, loss: 0.04396352916955948 2023-01-22 11:45:38.564035: step: 826/463, loss: 0.04003969579935074 2023-01-22 11:45:39.160847: step: 828/463, loss: 0.06067924574017525 2023-01-22 11:45:39.769646: step: 830/463, loss: 0.19403262436389923 2023-01-22 11:45:40.389746: step: 832/463, loss: 0.012331809848546982 2023-01-22 11:45:41.002372: step: 834/463, loss: 0.2989160418510437 2023-01-22 11:45:41.600830: step: 836/463, loss: 0.10023240000009537 2023-01-22 11:45:42.206352: step: 838/463, loss: 0.06966780126094818 2023-01-22 11:45:42.811722: step: 840/463, loss: 0.045513153076171875 2023-01-22 11:45:43.456915: step: 842/463, loss: 0.018077896907925606 2023-01-22 11:45:44.133995: step: 844/463, loss: 0.05654609948396683 2023-01-22 11:45:44.820867: step: 846/463, loss: 0.07529649883508682 2023-01-22 11:45:45.498856: step: 848/463, loss: 0.04621685668826103 2023-01-22 11:45:46.068393: step: 850/463, loss: 0.04698904976248741 2023-01-22 11:45:46.708174: step: 852/463, loss: 0.035035859793424606 2023-01-22 11:45:47.327160: step: 854/463, loss: 0.030948083847761154 2023-01-22 11:45:47.962495: step: 856/463, loss: 0.033439572900533676 2023-01-22 11:45:48.615151: step: 858/463, loss: 0.054421015083789825 2023-01-22 11:45:49.264409: step: 860/463, loss: 0.07792049646377563 2023-01-22 11:45:49.874325: step: 862/463, loss: 0.04047496244311333 2023-01-22 11:45:50.480008: step: 864/463, loss: 0.045256830751895905 2023-01-22 11:45:51.074559: step: 866/463, loss: 0.186228409409523 2023-01-22 11:45:51.689358: step: 868/463, loss: 0.4676240384578705 2023-01-22 11:45:52.358730: step: 870/463, loss: 0.33627933263778687 2023-01-22 11:45:52.952260: step: 872/463, loss: 0.04321770742535591 2023-01-22 11:45:53.616869: step: 874/463, loss: 0.02136065997183323 2023-01-22 11:45:54.171609: step: 876/463, loss: 0.14712314307689667 2023-01-22 11:45:54.787268: step: 878/463, loss: 0.25933483242988586 2023-01-22 11:45:55.598563: step: 880/463, loss: 0.18477784097194672 2023-01-22 11:45:56.239113: step: 882/463, loss: 0.2101510763168335 2023-01-22 11:45:56.857612: step: 884/463, loss: 0.04477240517735481 2023-01-22 11:45:57.457605: step: 886/463, loss: 0.030523166060447693 2023-01-22 11:45:58.089106: step: 888/463, loss: 0.07675585150718689 2023-01-22 11:45:58.728017: step: 890/463, loss: 0.02099417708814144 2023-01-22 11:45:59.299109: step: 892/463, loss: 0.06957526504993439 2023-01-22 11:45:59.913840: step: 894/463, loss: 0.006169005297124386 2023-01-22 11:46:00.534920: step: 896/463, loss: 0.04441026225686073 2023-01-22 11:46:01.107062: step: 898/463, loss: 0.020933441817760468 2023-01-22 11:46:01.745372: step: 900/463, loss: 0.03602498024702072 2023-01-22 11:46:02.377336: step: 902/463, loss: 0.45958322286605835 2023-01-22 11:46:02.986797: step: 904/463, loss: 0.0485958494246006 2023-01-22 11:46:03.586989: step: 906/463, loss: 0.015496374107897282 2023-01-22 11:46:04.199381: step: 908/463, loss: 0.038306329399347305 2023-01-22 11:46:04.920924: step: 910/463, loss: 0.008287390694022179 2023-01-22 11:46:05.521490: step: 912/463, loss: 0.06858644634485245 2023-01-22 11:46:06.114325: step: 914/463, loss: 0.04479951784014702 2023-01-22 11:46:06.695565: step: 916/463, loss: 0.037823405116796494 2023-01-22 11:46:07.290917: step: 918/463, loss: 0.07592744380235672 2023-01-22 11:46:08.002835: step: 920/463, loss: 0.10402562469244003 2023-01-22 11:46:08.544564: step: 922/463, loss: 0.2988271713256836 2023-01-22 11:46:09.148932: step: 924/463, loss: 0.013648463413119316 2023-01-22 11:46:09.699135: step: 926/463, loss: 0.031043766066432 ================================================== Loss: 0.136 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3259977064220183, 'r': 0.33713235294117644, 'f1': 0.33147154850746263}, 'combined': 0.24424219363707772, 'epoch': 21} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34115144521892055, 'r': 0.4135834156652171, 'f1': 0.37389177918052036}, 'combined': 0.28981085276193447, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28278576982892695, 'r': 0.345030834914611, 'f1': 0.3108226495726496}, 'combined': 0.2290272154745839, 'epoch': 21} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3296157309344655, 'r': 0.4195935545443334, 'f1': 0.3692016072335097}, 'combined': 0.28617540847764866, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2891425563197767, 'r': 0.3379730829088851, 'f1': 0.3116567186228914}, 'combined': 0.2296417926694989, 'epoch': 21} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32708897633734824, 'r': 0.40314918866579413, 'f1': 0.36115793929055906}, 'combined': 0.27994060366062473, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26785714285714285, 'r': 0.375, 'f1': 0.31250000000000006}, 'combined': 0.20833333333333337, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.28205128205128205, 'r': 0.4782608695652174, 'f1': 0.3548387096774194}, 'combined': 0.1774193548387097, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39285714285714285, 'r': 0.28448275862068967, 'f1': 0.33}, 'combined': 0.22, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:48:49.791357: step: 2/463, loss: 0.0845867320895195 2023-01-22 11:48:50.445089: step: 4/463, loss: 0.055136144161224365 2023-01-22 11:48:51.093845: step: 6/463, loss: 0.10939284414052963 2023-01-22 11:48:51.700953: step: 8/463, loss: 0.008023769594728947 2023-01-22 11:48:52.366502: step: 10/463, loss: 0.22477668523788452 2023-01-22 11:48:52.985559: step: 12/463, loss: 0.01714356429874897 2023-01-22 11:48:53.668875: step: 14/463, loss: 0.02461835741996765 2023-01-22 11:48:54.321453: step: 16/463, loss: 0.010595309548079967 2023-01-22 11:48:54.929043: step: 18/463, loss: 0.025409379974007607 2023-01-22 11:48:55.543535: step: 20/463, loss: 0.010673393495380878 2023-01-22 11:48:56.156803: step: 22/463, loss: 0.1065206527709961 2023-01-22 11:48:56.750308: step: 24/463, loss: 0.024548014625906944 2023-01-22 11:48:57.415617: step: 26/463, loss: 0.06019691377878189 2023-01-22 11:48:58.047730: step: 28/463, loss: 0.049314118921756744 2023-01-22 11:48:58.627188: step: 30/463, loss: 0.02850780449807644 2023-01-22 11:48:59.299786: step: 32/463, loss: 0.07022781670093536 2023-01-22 11:48:59.947984: step: 34/463, loss: 0.012563971802592278 2023-01-22 11:49:00.592934: step: 36/463, loss: 0.046172142028808594 2023-01-22 11:49:01.155417: step: 38/463, loss: 0.2738581895828247 2023-01-22 11:49:01.782150: step: 40/463, loss: 0.02885405160486698 2023-01-22 11:49:02.437633: step: 42/463, loss: 0.03181665018200874 2023-01-22 11:49:03.037850: step: 44/463, loss: 0.029062461107969284 2023-01-22 11:49:03.604024: step: 46/463, loss: 0.002969985129311681 2023-01-22 11:49:04.221876: step: 48/463, loss: 0.017055639997124672 2023-01-22 11:49:04.836489: step: 50/463, loss: 0.03472554683685303 2023-01-22 11:49:05.443769: step: 52/463, loss: 0.6524677276611328 2023-01-22 11:49:06.144910: step: 54/463, loss: 0.06620614975690842 2023-01-22 11:49:06.774912: step: 56/463, loss: 0.005133425816893578 2023-01-22 11:49:07.363413: step: 58/463, loss: 0.36377400159835815 2023-01-22 11:49:07.933806: step: 60/463, loss: 0.07737980037927628 2023-01-22 11:49:08.599436: step: 62/463, loss: 0.033549461513757706 2023-01-22 11:49:09.237069: step: 64/463, loss: 0.011558969505131245 2023-01-22 11:49:09.880602: step: 66/463, loss: 0.12483873218297958 2023-01-22 11:49:10.454383: step: 68/463, loss: 0.013231134973466396 2023-01-22 11:49:11.052952: step: 70/463, loss: 0.007984398864209652 2023-01-22 11:49:11.619655: step: 72/463, loss: 0.0069300392642617226 2023-01-22 11:49:12.283758: step: 74/463, loss: 0.02041485719382763 2023-01-22 11:49:12.888816: step: 76/463, loss: 0.015719905495643616 2023-01-22 11:49:13.469219: step: 78/463, loss: 0.10667863488197327 2023-01-22 11:49:14.124819: step: 80/463, loss: 0.05963480472564697 2023-01-22 11:49:14.736372: step: 82/463, loss: 0.06292406469583511 2023-01-22 11:49:15.388150: step: 84/463, loss: 0.04972478002309799 2023-01-22 11:49:16.091292: step: 86/463, loss: 0.019618812948465347 2023-01-22 11:49:16.691864: step: 88/463, loss: 0.055557891726493835 2023-01-22 11:49:17.204178: step: 90/463, loss: 0.0877290740609169 2023-01-22 11:49:17.852386: step: 92/463, loss: 0.005274410359561443 2023-01-22 11:49:18.453759: step: 94/463, loss: 0.08651704341173172 2023-01-22 11:49:19.039965: step: 96/463, loss: 0.061573419719934464 2023-01-22 11:49:19.726163: step: 98/463, loss: 0.03633836284279823 2023-01-22 11:49:20.348305: step: 100/463, loss: 0.05434788763523102 2023-01-22 11:49:20.947012: step: 102/463, loss: 0.053024712949991226 2023-01-22 11:49:21.527468: step: 104/463, loss: 1.0160967111587524 2023-01-22 11:49:22.192996: step: 106/463, loss: 0.07882880419492722 2023-01-22 11:49:22.762969: step: 108/463, loss: 0.2912052273750305 2023-01-22 11:49:23.334641: step: 110/463, loss: 0.05966493487358093 2023-01-22 11:49:24.026466: step: 112/463, loss: 0.010922667570412159 2023-01-22 11:49:24.587666: step: 114/463, loss: 14.686832427978516 2023-01-22 11:49:25.106099: step: 116/463, loss: 0.0011662731412798166 2023-01-22 11:49:25.688071: step: 118/463, loss: 0.0961623340845108 2023-01-22 11:49:26.418641: step: 120/463, loss: 0.5014403462409973 2023-01-22 11:49:27.086307: step: 122/463, loss: 0.750453531742096 2023-01-22 11:49:27.664354: step: 124/463, loss: 0.2161422073841095 2023-01-22 11:49:28.237120: step: 126/463, loss: 0.12633661925792694 2023-01-22 11:49:28.828051: step: 128/463, loss: 0.012642335146665573 2023-01-22 11:49:29.451287: step: 130/463, loss: 0.0704883486032486 2023-01-22 11:49:30.079709: step: 132/463, loss: 0.01433706097304821 2023-01-22 11:49:30.730916: step: 134/463, loss: 0.001600269926711917 2023-01-22 11:49:31.352848: step: 136/463, loss: 0.04002080857753754 2023-01-22 11:49:31.966275: step: 138/463, loss: 0.17413103580474854 2023-01-22 11:49:32.601387: step: 140/463, loss: 0.030261939391493797 2023-01-22 11:49:33.301295: step: 142/463, loss: 0.002093089744448662 2023-01-22 11:49:33.879892: step: 144/463, loss: 0.05285588651895523 2023-01-22 11:49:34.487866: step: 146/463, loss: 0.03583652526140213 2023-01-22 11:49:35.112431: step: 148/463, loss: 0.01902391016483307 2023-01-22 11:49:35.686611: step: 150/463, loss: 0.004319375846534967 2023-01-22 11:49:36.441723: step: 152/463, loss: 0.18238073587417603 2023-01-22 11:49:37.101146: step: 154/463, loss: 0.022598445415496826 2023-01-22 11:49:37.708262: step: 156/463, loss: 0.04447271302342415 2023-01-22 11:49:38.340508: step: 158/463, loss: 0.015802208334207535 2023-01-22 11:49:39.006042: step: 160/463, loss: 0.4238336980342865 2023-01-22 11:49:39.641030: step: 162/463, loss: 0.0012278840877115726 2023-01-22 11:49:40.280643: step: 164/463, loss: 0.023539898917078972 2023-01-22 11:49:40.879715: step: 166/463, loss: 0.07528958469629288 2023-01-22 11:49:41.529089: step: 168/463, loss: 0.07155793905258179 2023-01-22 11:49:42.155137: step: 170/463, loss: 0.18612246215343475 2023-01-22 11:49:42.825507: step: 172/463, loss: 0.016407344490289688 2023-01-22 11:49:43.469941: step: 174/463, loss: 0.17127607762813568 2023-01-22 11:49:44.059867: step: 176/463, loss: 0.049892738461494446 2023-01-22 11:49:44.743248: step: 178/463, loss: 0.03789215534925461 2023-01-22 11:49:45.311272: step: 180/463, loss: 0.002035361248999834 2023-01-22 11:49:45.980498: step: 182/463, loss: 0.051167696714401245 2023-01-22 11:49:46.640820: step: 184/463, loss: 0.041595276445150375 2023-01-22 11:49:47.242713: step: 186/463, loss: 0.062073949724435806 2023-01-22 11:49:47.826061: step: 188/463, loss: 0.0916956290602684 2023-01-22 11:49:48.447277: step: 190/463, loss: 0.03840848430991173 2023-01-22 11:49:49.042244: step: 192/463, loss: 0.2149547040462494 2023-01-22 11:49:49.714859: step: 194/463, loss: 0.035140734165906906 2023-01-22 11:49:50.311325: step: 196/463, loss: 0.009174899198114872 2023-01-22 11:49:50.975469: step: 198/463, loss: 0.03373410925269127 2023-01-22 11:49:51.599819: step: 200/463, loss: 0.05076950788497925 2023-01-22 11:49:52.222415: step: 202/463, loss: 0.04133080318570137 2023-01-22 11:49:52.838011: step: 204/463, loss: 0.07168225944042206 2023-01-22 11:49:53.485240: step: 206/463, loss: 0.017578035593032837 2023-01-22 11:49:54.089443: step: 208/463, loss: 0.05091509595513344 2023-01-22 11:49:54.716176: step: 210/463, loss: 0.14064140617847443 2023-01-22 11:49:55.310919: step: 212/463, loss: 0.014854749664664268 2023-01-22 11:49:55.868186: step: 214/463, loss: 0.018066704273223877 2023-01-22 11:49:56.530230: step: 216/463, loss: 0.077924445271492 2023-01-22 11:49:57.095715: step: 218/463, loss: 0.045320775359869 2023-01-22 11:49:57.785288: step: 220/463, loss: 0.02095767669379711 2023-01-22 11:49:58.407303: step: 222/463, loss: 0.11336018145084381 2023-01-22 11:49:59.104137: step: 224/463, loss: 0.06618199497461319 2023-01-22 11:49:59.719529: step: 226/463, loss: 0.032144319266080856 2023-01-22 11:50:00.402032: step: 228/463, loss: 0.01780652441084385 2023-01-22 11:50:00.957203: step: 230/463, loss: 0.02497301995754242 2023-01-22 11:50:01.541281: step: 232/463, loss: 0.056469835340976715 2023-01-22 11:50:02.208728: step: 234/463, loss: 0.01768629625439644 2023-01-22 11:50:02.839491: step: 236/463, loss: 0.04045190289616585 2023-01-22 11:50:03.489322: step: 238/463, loss: 0.08207670599222183 2023-01-22 11:50:04.082186: step: 240/463, loss: 0.10335548222064972 2023-01-22 11:50:04.664017: step: 242/463, loss: 0.003417340340092778 2023-01-22 11:50:05.313331: step: 244/463, loss: 0.0884276032447815 2023-01-22 11:50:05.848561: step: 246/463, loss: 0.20539116859436035 2023-01-22 11:50:06.470034: step: 248/463, loss: 0.04645257815718651 2023-01-22 11:50:07.113581: step: 250/463, loss: 0.008458147756755352 2023-01-22 11:50:07.781990: step: 252/463, loss: 0.024484852328896523 2023-01-22 11:50:08.399761: step: 254/463, loss: 0.05897795781493187 2023-01-22 11:50:09.015345: step: 256/463, loss: 0.10937213897705078 2023-01-22 11:50:09.605815: step: 258/463, loss: 0.026765946298837662 2023-01-22 11:50:10.201549: step: 260/463, loss: 0.009288814850151539 2023-01-22 11:50:10.821236: step: 262/463, loss: 0.0523076094686985 2023-01-22 11:50:11.425673: step: 264/463, loss: 0.05381142720580101 2023-01-22 11:50:12.067010: step: 266/463, loss: 0.35319727659225464 2023-01-22 11:50:12.717755: step: 268/463, loss: 0.10190211236476898 2023-01-22 11:50:13.358053: step: 270/463, loss: 0.018330315127968788 2023-01-22 11:50:13.933024: step: 272/463, loss: 0.07109789550304413 2023-01-22 11:50:14.505187: step: 274/463, loss: 0.016778625547885895 2023-01-22 11:50:15.084033: step: 276/463, loss: 0.01825481280684471 2023-01-22 11:50:15.676436: step: 278/463, loss: 0.013317803852260113 2023-01-22 11:50:16.313461: step: 280/463, loss: 0.0818164125084877 2023-01-22 11:50:16.929221: step: 282/463, loss: 0.04808545857667923 2023-01-22 11:50:17.591363: step: 284/463, loss: 0.010904652997851372 2023-01-22 11:50:18.182558: step: 286/463, loss: 0.075593963265419 2023-01-22 11:50:18.768576: step: 288/463, loss: 0.3891074061393738 2023-01-22 11:50:19.365416: step: 290/463, loss: 0.01172602642327547 2023-01-22 11:50:19.948771: step: 292/463, loss: 0.007304557599127293 2023-01-22 11:50:20.592571: step: 294/463, loss: 0.024865420535206795 2023-01-22 11:50:21.242148: step: 296/463, loss: 0.41033780574798584 2023-01-22 11:50:21.813881: step: 298/463, loss: 0.030303271487355232 2023-01-22 11:50:22.449724: step: 300/463, loss: 0.07504013925790787 2023-01-22 11:50:23.005194: step: 302/463, loss: 0.0629776269197464 2023-01-22 11:50:23.616793: step: 304/463, loss: 0.024631312116980553 2023-01-22 11:50:24.252326: step: 306/463, loss: 0.3940405547618866 2023-01-22 11:50:24.840755: step: 308/463, loss: 0.011382722295820713 2023-01-22 11:50:25.448527: step: 310/463, loss: 0.008826471865177155 2023-01-22 11:50:26.052190: step: 312/463, loss: 0.021369939669966698 2023-01-22 11:50:26.686479: step: 314/463, loss: 0.022341836243867874 2023-01-22 11:50:27.208408: step: 316/463, loss: 0.021838104352355003 2023-01-22 11:50:27.786130: step: 318/463, loss: 0.031893715262413025 2023-01-22 11:50:28.416831: step: 320/463, loss: 0.02486550249159336 2023-01-22 11:50:29.006296: step: 322/463, loss: 0.033111006021499634 2023-01-22 11:50:29.605163: step: 324/463, loss: 0.00798120815306902 2023-01-22 11:50:30.279212: step: 326/463, loss: 0.010326381772756577 2023-01-22 11:50:30.858136: step: 328/463, loss: 0.0234376210719347 2023-01-22 11:50:31.408604: step: 330/463, loss: 0.02511567622423172 2023-01-22 11:50:31.956375: step: 332/463, loss: 0.010141482576727867 2023-01-22 11:50:32.541757: step: 334/463, loss: 0.14301179349422455 2023-01-22 11:50:33.221809: step: 336/463, loss: 0.0386841744184494 2023-01-22 11:50:33.822041: step: 338/463, loss: 0.018559901043772697 2023-01-22 11:50:34.449695: step: 340/463, loss: 0.07188437879085541 2023-01-22 11:50:35.050430: step: 342/463, loss: 0.035321999341249466 2023-01-22 11:50:35.619860: step: 344/463, loss: 0.01174361165612936 2023-01-22 11:50:36.174025: step: 346/463, loss: 0.01247880794107914 2023-01-22 11:50:36.831983: step: 348/463, loss: 0.020676346495747566 2023-01-22 11:50:37.423200: step: 350/463, loss: 0.04405326768755913 2023-01-22 11:50:37.977801: step: 352/463, loss: 0.026704121381044388 2023-01-22 11:50:38.635397: step: 354/463, loss: 0.04153049737215042 2023-01-22 11:50:39.264365: step: 356/463, loss: 0.034516237676143646 2023-01-22 11:50:39.871890: step: 358/463, loss: 0.04867745563387871 2023-01-22 11:50:40.426366: step: 360/463, loss: 0.03840473294258118 2023-01-22 11:50:41.007155: step: 362/463, loss: 0.04419498145580292 2023-01-22 11:50:41.713880: step: 364/463, loss: 0.10025046765804291 2023-01-22 11:50:42.331544: step: 366/463, loss: 0.008328991942107677 2023-01-22 11:50:42.959830: step: 368/463, loss: 0.4963262677192688 2023-01-22 11:50:43.590851: step: 370/463, loss: 0.023153891786932945 2023-01-22 11:50:44.238075: step: 372/463, loss: 0.06186239793896675 2023-01-22 11:50:44.866498: step: 374/463, loss: 0.07661768794059753 2023-01-22 11:50:45.484950: step: 376/463, loss: 0.03685622289776802 2023-01-22 11:50:46.030983: step: 378/463, loss: 0.5296953916549683 2023-01-22 11:50:46.593898: step: 380/463, loss: 0.04537447914481163 2023-01-22 11:50:47.283703: step: 382/463, loss: 0.07284008711576462 2023-01-22 11:50:47.857902: step: 384/463, loss: 0.0784781202673912 2023-01-22 11:50:48.470667: step: 386/463, loss: 0.004668379668146372 2023-01-22 11:50:49.147821: step: 388/463, loss: 0.04326092451810837 2023-01-22 11:50:49.807202: step: 390/463, loss: 0.29147958755493164 2023-01-22 11:50:50.379778: step: 392/463, loss: 0.07475181668996811 2023-01-22 11:50:51.039108: step: 394/463, loss: 0.036760374903678894 2023-01-22 11:50:51.636122: step: 396/463, loss: 0.033690277487039566 2023-01-22 11:50:52.293092: step: 398/463, loss: 0.045227665454149246 2023-01-22 11:50:52.846364: step: 400/463, loss: 0.0044243293814361095 2023-01-22 11:50:53.471835: step: 402/463, loss: 0.0560561865568161 2023-01-22 11:50:54.101178: step: 404/463, loss: 0.08145659416913986 2023-01-22 11:50:54.691851: step: 406/463, loss: 0.05191114544868469 2023-01-22 11:50:55.293025: step: 408/463, loss: 0.016322242096066475 2023-01-22 11:50:55.938386: step: 410/463, loss: 0.054643746465444565 2023-01-22 11:50:56.538092: step: 412/463, loss: 0.5975512266159058 2023-01-22 11:50:57.171591: step: 414/463, loss: 0.05992380157113075 2023-01-22 11:50:57.788678: step: 416/463, loss: 0.08229263126850128 2023-01-22 11:50:58.501430: step: 418/463, loss: 0.2635195851325989 2023-01-22 11:50:59.081715: step: 420/463, loss: 0.027859417721629143 2023-01-22 11:50:59.678478: step: 422/463, loss: 0.9496694207191467 2023-01-22 11:51:00.304364: step: 424/463, loss: 0.09949032962322235 2023-01-22 11:51:00.881790: step: 426/463, loss: 0.019798634573817253 2023-01-22 11:51:01.521488: step: 428/463, loss: 0.2418743222951889 2023-01-22 11:51:02.114233: step: 430/463, loss: 0.04883335158228874 2023-01-22 11:51:02.607377: step: 432/463, loss: 0.015203049406409264 2023-01-22 11:51:03.259623: step: 434/463, loss: 0.09791691601276398 2023-01-22 11:51:03.842115: step: 436/463, loss: 0.027008995413780212 2023-01-22 11:51:04.495738: step: 438/463, loss: 0.0531264990568161 2023-01-22 11:51:05.134664: step: 440/463, loss: 0.027465471997857094 2023-01-22 11:51:05.808312: step: 442/463, loss: 0.04369494691491127 2023-01-22 11:51:06.375902: step: 444/463, loss: 1.1143391132354736 2023-01-22 11:51:07.051344: step: 446/463, loss: 0.02195393294095993 2023-01-22 11:51:07.631967: step: 448/463, loss: 0.030329877510666847 2023-01-22 11:51:08.160633: step: 450/463, loss: 0.17108339071273804 2023-01-22 11:51:08.817598: step: 452/463, loss: 0.0349968820810318 2023-01-22 11:51:09.425299: step: 454/463, loss: 0.07262814044952393 2023-01-22 11:51:10.064183: step: 456/463, loss: 0.007454673293977976 2023-01-22 11:51:10.650307: step: 458/463, loss: 0.054338742047548294 2023-01-22 11:51:11.281384: step: 460/463, loss: 0.01876160316169262 2023-01-22 11:51:11.907883: step: 462/463, loss: 0.03726070001721382 2023-01-22 11:51:12.530274: step: 464/463, loss: 0.0870211124420166 2023-01-22 11:51:13.048597: step: 466/463, loss: 0.016749411821365356 2023-01-22 11:51:13.693455: step: 468/463, loss: 0.026697583496570587 2023-01-22 11:51:14.349887: step: 470/463, loss: 0.0754585713148117 2023-01-22 11:51:14.942987: step: 472/463, loss: 0.030796436592936516 2023-01-22 11:51:15.591072: step: 474/463, loss: 0.04457510635256767 2023-01-22 11:51:16.207074: step: 476/463, loss: 0.07331067323684692 2023-01-22 11:51:16.809811: step: 478/463, loss: 0.019977036863565445 2023-01-22 11:51:17.418641: step: 480/463, loss: 0.017528826370835304 2023-01-22 11:51:18.026019: step: 482/463, loss: 0.04221212863922119 2023-01-22 11:51:18.634393: step: 484/463, loss: 0.015823766589164734 2023-01-22 11:51:19.256451: step: 486/463, loss: 0.03506346419453621 2023-01-22 11:51:19.855555: step: 488/463, loss: 0.09809498488903046 2023-01-22 11:51:20.485068: step: 490/463, loss: 0.2927551865577698 2023-01-22 11:51:21.047520: step: 492/463, loss: 0.6947611570358276 2023-01-22 11:51:21.667283: step: 494/463, loss: 0.7500637769699097 2023-01-22 11:51:22.323271: step: 496/463, loss: 0.13056710362434387 2023-01-22 11:51:23.010871: step: 498/463, loss: 0.013169165700674057 2023-01-22 11:51:23.586475: step: 500/463, loss: 0.01657673716545105 2023-01-22 11:51:24.175905: step: 502/463, loss: 0.08607902377843857 2023-01-22 11:51:24.815492: step: 504/463, loss: 0.04229207709431648 2023-01-22 11:51:25.435823: step: 506/463, loss: 0.01435808651149273 2023-01-22 11:51:26.107394: step: 508/463, loss: 0.0861494243144989 2023-01-22 11:51:26.736596: step: 510/463, loss: 0.03750699758529663 2023-01-22 11:51:27.348539: step: 512/463, loss: 0.20145943760871887 2023-01-22 11:51:27.939175: step: 514/463, loss: 0.20735104382038116 2023-01-22 11:51:28.488930: step: 516/463, loss: 0.06900104135274887 2023-01-22 11:51:29.067022: step: 518/463, loss: 0.02022702246904373 2023-01-22 11:51:29.693804: step: 520/463, loss: 0.1689458042383194 2023-01-22 11:51:30.342462: step: 522/463, loss: 0.0423530638217926 2023-01-22 11:51:30.887782: step: 524/463, loss: 0.02521570771932602 2023-01-22 11:51:31.484526: step: 526/463, loss: 0.019922278821468353 2023-01-22 11:51:32.068822: step: 528/463, loss: 0.5382683873176575 2023-01-22 11:51:32.629651: step: 530/463, loss: 8.46787166595459 2023-01-22 11:51:33.253497: step: 532/463, loss: 0.08376821875572205 2023-01-22 11:51:33.917804: step: 534/463, loss: 0.0022371658124029636 2023-01-22 11:51:34.566346: step: 536/463, loss: 0.08728696405887604 2023-01-22 11:51:35.181993: step: 538/463, loss: 0.06019327789545059 2023-01-22 11:51:35.792333: step: 540/463, loss: 0.06385639309883118 2023-01-22 11:51:36.437817: step: 542/463, loss: 0.03904721513390541 2023-01-22 11:51:37.051446: step: 544/463, loss: 0.15966156125068665 2023-01-22 11:51:37.637434: step: 546/463, loss: 0.06001008674502373 2023-01-22 11:51:38.169177: step: 548/463, loss: 0.01917579211294651 2023-01-22 11:51:38.796172: step: 550/463, loss: 0.057754792273044586 2023-01-22 11:51:39.519359: step: 552/463, loss: 0.21190311014652252 2023-01-22 11:51:40.239639: step: 554/463, loss: 0.021517438814044 2023-01-22 11:51:40.921581: step: 556/463, loss: 0.05135061591863632 2023-01-22 11:51:41.554884: step: 558/463, loss: 0.4718446731567383 2023-01-22 11:51:42.178830: step: 560/463, loss: 0.029273126274347305 2023-01-22 11:51:42.790017: step: 562/463, loss: 0.028569400310516357 2023-01-22 11:51:43.433910: step: 564/463, loss: 0.25605496764183044 2023-01-22 11:51:43.985811: step: 566/463, loss: 0.016277015209197998 2023-01-22 11:51:44.561902: step: 568/463, loss: 0.06891877204179764 2023-01-22 11:51:45.145179: step: 570/463, loss: 0.3750668168067932 2023-01-22 11:51:45.794927: step: 572/463, loss: 0.0220035370439291 2023-01-22 11:51:46.381492: step: 574/463, loss: 0.040341295301914215 2023-01-22 11:51:46.960876: step: 576/463, loss: 0.10551276803016663 2023-01-22 11:51:47.567639: step: 578/463, loss: 0.06054641678929329 2023-01-22 11:51:48.211656: step: 580/463, loss: 0.11303631961345673 2023-01-22 11:51:48.894378: step: 582/463, loss: 0.012128137052059174 2023-01-22 11:51:49.495930: step: 584/463, loss: 0.0325421467423439 2023-01-22 11:51:50.130697: step: 586/463, loss: 0.03865436464548111 2023-01-22 11:51:50.793355: step: 588/463, loss: 0.021554769948124886 2023-01-22 11:51:51.439208: step: 590/463, loss: 0.060039252042770386 2023-01-22 11:51:52.083940: step: 592/463, loss: 0.1342383623123169 2023-01-22 11:51:52.662097: step: 594/463, loss: 0.023065274581313133 2023-01-22 11:51:53.237446: step: 596/463, loss: 0.043455976992845535 2023-01-22 11:51:53.849052: step: 598/463, loss: 0.05807381495833397 2023-01-22 11:51:54.460235: step: 600/463, loss: 0.18533416092395782 2023-01-22 11:51:54.988324: step: 602/463, loss: 0.059211406856775284 2023-01-22 11:51:55.615257: step: 604/463, loss: 0.011857549659907818 2023-01-22 11:51:56.245576: step: 606/463, loss: 0.09485988318920135 2023-01-22 11:51:56.885179: step: 608/463, loss: 0.08637198805809021 2023-01-22 11:51:57.450883: step: 610/463, loss: 0.041609227657318115 2023-01-22 11:51:58.083239: step: 612/463, loss: 0.06289152055978775 2023-01-22 11:51:58.700492: step: 614/463, loss: 0.14114490151405334 2023-01-22 11:51:59.300259: step: 616/463, loss: 0.07924644649028778 2023-01-22 11:51:59.806417: step: 618/463, loss: 0.013395792804658413 2023-01-22 11:52:00.372957: step: 620/463, loss: 0.027372779324650764 2023-01-22 11:52:01.056765: step: 622/463, loss: 0.012691386975347996 2023-01-22 11:52:01.669227: step: 624/463, loss: 0.18363018333911896 2023-01-22 11:52:02.192604: step: 626/463, loss: 0.021132204681634903 2023-01-22 11:52:02.862016: step: 628/463, loss: 0.06695834547281265 2023-01-22 11:52:03.459232: step: 630/463, loss: 0.10878331959247589 2023-01-22 11:52:04.103465: step: 632/463, loss: 0.05908231437206268 2023-01-22 11:52:04.678424: step: 634/463, loss: 0.04573296383023262 2023-01-22 11:52:05.255571: step: 636/463, loss: 0.06109517440199852 2023-01-22 11:52:05.927458: step: 638/463, loss: 0.047686848789453506 2023-01-22 11:52:06.463096: step: 640/463, loss: 0.04905715957283974 2023-01-22 11:52:07.088080: step: 642/463, loss: 0.017701629549264908 2023-01-22 11:52:07.716439: step: 644/463, loss: 0.01931239850819111 2023-01-22 11:52:08.324340: step: 646/463, loss: 0.014090669341385365 2023-01-22 11:52:08.951118: step: 648/463, loss: 0.46045982837677 2023-01-22 11:52:09.553820: step: 650/463, loss: 0.011598912999033928 2023-01-22 11:52:10.221123: step: 652/463, loss: 0.4908623993396759 2023-01-22 11:52:10.899803: step: 654/463, loss: 0.04658355563879013 2023-01-22 11:52:11.542321: step: 656/463, loss: 0.03780944272875786 2023-01-22 11:52:12.150483: step: 658/463, loss: 0.043316230177879333 2023-01-22 11:52:12.771051: step: 660/463, loss: 0.03826010972261429 2023-01-22 11:52:13.504564: step: 662/463, loss: 0.25017714500427246 2023-01-22 11:52:14.095264: step: 664/463, loss: 0.06542626023292542 2023-01-22 11:52:14.738571: step: 666/463, loss: 0.01193663664162159 2023-01-22 11:52:15.359236: step: 668/463, loss: 0.7353683710098267 2023-01-22 11:52:15.971143: step: 670/463, loss: 0.11667991429567337 2023-01-22 11:52:16.566791: step: 672/463, loss: 0.09642888605594635 2023-01-22 11:52:17.182386: step: 674/463, loss: 0.0075606112368404865 2023-01-22 11:52:17.838576: step: 676/463, loss: 0.012751021422445774 2023-01-22 11:52:18.411560: step: 678/463, loss: 0.09923321008682251 2023-01-22 11:52:19.137600: step: 680/463, loss: 0.004397675395011902 2023-01-22 11:52:19.817180: step: 682/463, loss: 0.1764136701822281 2023-01-22 11:52:20.446001: step: 684/463, loss: 0.0556979775428772 2023-01-22 11:52:21.006792: step: 686/463, loss: 0.15813681483268738 2023-01-22 11:52:21.547484: step: 688/463, loss: 0.02768089435994625 2023-01-22 11:52:22.156669: step: 690/463, loss: 0.5395810008049011 2023-01-22 11:52:22.823584: step: 692/463, loss: 0.06268332153558731 2023-01-22 11:52:23.445379: step: 694/463, loss: 0.01339671015739441 2023-01-22 11:52:24.032447: step: 696/463, loss: 0.025722624734044075 2023-01-22 11:52:24.633117: step: 698/463, loss: 0.06820755451917648 2023-01-22 11:52:25.269955: step: 700/463, loss: 0.09104981273412704 2023-01-22 11:52:25.886795: step: 702/463, loss: 0.7487509846687317 2023-01-22 11:52:26.539012: step: 704/463, loss: 0.2617780566215515 2023-01-22 11:52:27.109594: step: 706/463, loss: 0.004109368193894625 2023-01-22 11:52:27.756756: step: 708/463, loss: 0.15713359415531158 2023-01-22 11:52:28.358073: step: 710/463, loss: 0.9107391238212585 2023-01-22 11:52:29.037648: step: 712/463, loss: 0.13343945145606995 2023-01-22 11:52:29.584053: step: 714/463, loss: 0.006756212562322617 2023-01-22 11:52:30.226300: step: 716/463, loss: 0.01382492296397686 2023-01-22 11:52:30.865127: step: 718/463, loss: 0.030257657170295715 2023-01-22 11:52:31.505836: step: 720/463, loss: 0.06503091752529144 2023-01-22 11:52:32.142071: step: 722/463, loss: 0.012537148781120777 2023-01-22 11:52:32.764944: step: 724/463, loss: 0.025673488155007362 2023-01-22 11:52:33.340845: step: 726/463, loss: 0.03498581424355507 2023-01-22 11:52:33.950494: step: 728/463, loss: 0.0922277495265007 2023-01-22 11:52:34.551324: step: 730/463, loss: 0.01603466272354126 2023-01-22 11:52:35.157971: step: 732/463, loss: 0.10051598399877548 2023-01-22 11:52:35.795584: step: 734/463, loss: 0.04973762854933739 2023-01-22 11:52:36.491083: step: 736/463, loss: 0.04688718169927597 2023-01-22 11:52:37.092215: step: 738/463, loss: 0.12407837063074112 2023-01-22 11:52:37.653901: step: 740/463, loss: 0.005452557932585478 2023-01-22 11:52:38.252083: step: 742/463, loss: 0.039612412452697754 2023-01-22 11:52:38.886587: step: 744/463, loss: 0.0330551452934742 2023-01-22 11:52:39.518309: step: 746/463, loss: 0.038212452083826065 2023-01-22 11:52:40.127803: step: 748/463, loss: 0.0795302540063858 2023-01-22 11:52:40.716475: step: 750/463, loss: 0.01583004929125309 2023-01-22 11:52:41.295694: step: 752/463, loss: 0.019439078867435455 2023-01-22 11:52:41.877617: step: 754/463, loss: 0.11109906435012817 2023-01-22 11:52:42.496258: step: 756/463, loss: 0.10784592479467392 2023-01-22 11:52:43.153524: step: 758/463, loss: 0.04472780600190163 2023-01-22 11:52:43.774732: step: 760/463, loss: 0.05449187755584717 2023-01-22 11:52:44.386583: step: 762/463, loss: 0.022585907950997353 2023-01-22 11:52:45.226994: step: 764/463, loss: 1.8076122999191284 2023-01-22 11:52:45.775447: step: 766/463, loss: 0.01235184259712696 2023-01-22 11:52:46.463433: step: 768/463, loss: 0.03791683912277222 2023-01-22 11:52:47.037778: step: 770/463, loss: 0.038350652903318405 2023-01-22 11:52:47.629186: step: 772/463, loss: 0.015076812356710434 2023-01-22 11:52:48.205218: step: 774/463, loss: 0.05062127113342285 2023-01-22 11:52:48.838570: step: 776/463, loss: 0.11897704005241394 2023-01-22 11:52:49.447131: step: 778/463, loss: 0.025867735967040062 2023-01-22 11:52:50.022641: step: 780/463, loss: 0.010863203555345535 2023-01-22 11:52:50.622654: step: 782/463, loss: 0.0549979992210865 2023-01-22 11:52:51.272347: step: 784/463, loss: 0.07117492705583572 2023-01-22 11:52:51.884895: step: 786/463, loss: 0.05387565866112709 2023-01-22 11:52:52.556906: step: 788/463, loss: 0.024039993062615395 2023-01-22 11:52:53.287978: step: 790/463, loss: 0.04744725301861763 2023-01-22 11:52:53.913462: step: 792/463, loss: 0.09313289821147919 2023-01-22 11:52:54.528994: step: 794/463, loss: 0.020370492711663246 2023-01-22 11:52:55.128757: step: 796/463, loss: 0.1677919179201126 2023-01-22 11:52:55.698305: step: 798/463, loss: 0.015784895047545433 2023-01-22 11:52:56.270875: step: 800/463, loss: 0.045689504593610764 2023-01-22 11:52:56.877235: step: 802/463, loss: 0.0012720375088974833 2023-01-22 11:52:57.433905: step: 804/463, loss: 0.040277477353811264 2023-01-22 11:52:58.068266: step: 806/463, loss: 0.05956878513097763 2023-01-22 11:52:58.668867: step: 808/463, loss: 0.07988844066858292 2023-01-22 11:52:59.310402: step: 810/463, loss: 0.024668551981449127 2023-01-22 11:52:59.925542: step: 812/463, loss: 0.027776291593909264 2023-01-22 11:53:00.592419: step: 814/463, loss: 0.06997717171907425 2023-01-22 11:53:01.251115: step: 816/463, loss: 0.04439396783709526 2023-01-22 11:53:01.850868: step: 818/463, loss: 0.10873216390609741 2023-01-22 11:53:02.575303: step: 820/463, loss: 0.3765268921852112 2023-01-22 11:53:03.183116: step: 822/463, loss: 0.089633509516716 2023-01-22 11:53:03.780754: step: 824/463, loss: 0.012148439884185791 2023-01-22 11:53:04.396274: step: 826/463, loss: 0.018590301275253296 2023-01-22 11:53:05.047787: step: 828/463, loss: 0.03449949622154236 2023-01-22 11:53:05.626502: step: 830/463, loss: 0.048406459391117096 2023-01-22 11:53:06.272869: step: 832/463, loss: 0.013000254519283772 2023-01-22 11:53:06.913624: step: 834/463, loss: 0.016134848818182945 2023-01-22 11:53:07.551171: step: 836/463, loss: 0.023261623457074165 2023-01-22 11:53:08.153062: step: 838/463, loss: 0.04515976086258888 2023-01-22 11:53:08.791113: step: 840/463, loss: 0.04853673651814461 2023-01-22 11:53:09.511593: step: 842/463, loss: 0.03299034386873245 2023-01-22 11:53:10.138724: step: 844/463, loss: 0.0691247284412384 2023-01-22 11:53:10.817311: step: 846/463, loss: 0.03056233562529087 2023-01-22 11:53:11.391369: step: 848/463, loss: 0.023293346166610718 2023-01-22 11:53:12.067499: step: 850/463, loss: 0.22583970427513123 2023-01-22 11:53:12.683877: step: 852/463, loss: 0.07189960032701492 2023-01-22 11:53:13.296473: step: 854/463, loss: 0.010108246468007565 2023-01-22 11:53:13.919470: step: 856/463, loss: 0.025587771087884903 2023-01-22 11:53:14.559156: step: 858/463, loss: 0.01010210532695055 2023-01-22 11:53:15.292215: step: 860/463, loss: 1.3255618810653687 2023-01-22 11:53:15.952516: step: 862/463, loss: 0.10920163244009018 2023-01-22 11:53:16.589273: step: 864/463, loss: 0.042911626398563385 2023-01-22 11:53:17.205789: step: 866/463, loss: 0.0149594871327281 2023-01-22 11:53:17.827360: step: 868/463, loss: 0.026271982118487358 2023-01-22 11:53:18.460724: step: 870/463, loss: 0.1612677127122879 2023-01-22 11:53:19.055159: step: 872/463, loss: 0.011072476394474506 2023-01-22 11:53:19.735480: step: 874/463, loss: 0.02402637153863907 2023-01-22 11:53:20.329905: step: 876/463, loss: 0.011492603458464146 2023-01-22 11:53:20.918635: step: 878/463, loss: 0.04561467096209526 2023-01-22 11:53:21.521667: step: 880/463, loss: 0.03588796406984329 2023-01-22 11:53:22.151180: step: 882/463, loss: 0.00598631938919425 2023-01-22 11:53:22.717849: step: 884/463, loss: 0.01217762753367424 2023-01-22 11:53:23.268012: step: 886/463, loss: 0.033257659524679184 2023-01-22 11:53:23.845919: step: 888/463, loss: 0.02424241602420807 2023-01-22 11:53:24.515056: step: 890/463, loss: 0.14332902431488037 2023-01-22 11:53:25.013512: step: 892/463, loss: 0.08317562937736511 2023-01-22 11:53:25.607075: step: 894/463, loss: 0.010957978665828705 2023-01-22 11:53:26.252495: step: 896/463, loss: 0.25936129689216614 2023-01-22 11:53:26.953528: step: 898/463, loss: 0.3190154731273651 2023-01-22 11:53:27.624791: step: 900/463, loss: 0.02404329553246498 2023-01-22 11:53:28.316522: step: 902/463, loss: 0.006931992247700691 2023-01-22 11:53:28.918896: step: 904/463, loss: 0.05773597210645676 2023-01-22 11:53:29.540866: step: 906/463, loss: 0.06446735560894012 2023-01-22 11:53:30.164265: step: 908/463, loss: 0.0477750338613987 2023-01-22 11:53:30.762608: step: 910/463, loss: 0.018832825124263763 2023-01-22 11:53:31.357843: step: 912/463, loss: 0.16611790657043457 2023-01-22 11:53:31.982201: step: 914/463, loss: 0.11244844645261765 2023-01-22 11:53:32.556092: step: 916/463, loss: 0.06754232197999954 2023-01-22 11:53:33.270735: step: 918/463, loss: 0.5503419637680054 2023-01-22 11:53:33.882835: step: 920/463, loss: 0.01725826971232891 2023-01-22 11:53:34.561068: step: 922/463, loss: 0.09535322338342667 2023-01-22 11:53:35.165020: step: 924/463, loss: 0.029331080615520477 2023-01-22 11:53:35.801179: step: 926/463, loss: 0.01061540748924017 ================================================== Loss: 0.146 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3179433946679544, 'r': 0.3082904642795535, 'f1': 0.3130425330931112}, 'combined': 0.23066291912123982, 'epoch': 22} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33037013294124856, 'r': 0.4005130563874144, 'f1': 0.36207578342293883}, 'combined': 0.28065204265318705, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.27927520848716497, 'r': 0.3169005212055496, 'f1': 0.29690057720057716}, 'combined': 0.21876884635832, 'epoch': 22} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3170561067009344, 'r': 0.40535390112224245, 'f1': 0.355808829706333}, 'combined': 0.2757944038872055, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29493217109843906, 'r': 0.32011613257743293, 'f1': 0.30700855663022225}, 'combined': 0.22621683120121638, 'epoch': 22} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3102676306098442, 'r': 0.3884048831715145, 'f1': 0.34496694929845534}, 'combined': 0.26739064969545345, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31756756756756754, 'r': 0.3357142857142857, 'f1': 0.3263888888888889}, 'combined': 0.2175925925925926, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22727272727272727, 'r': 0.32608695652173914, 'f1': 0.26785714285714285}, 'combined': 0.13392857142857142, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40789473684210525, 'r': 0.2672413793103448, 'f1': 0.3229166666666667}, 'combined': 0.2152777777777778, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 11:56:14.641023: step: 2/463, loss: 0.014017235487699509 2023-01-22 11:56:15.298548: step: 4/463, loss: 0.03739186003804207 2023-01-22 11:56:15.877079: step: 6/463, loss: 0.005126425065100193 2023-01-22 11:56:16.578179: step: 8/463, loss: 0.17005835473537445 2023-01-22 11:56:17.187591: step: 10/463, loss: 0.05091765895485878 2023-01-22 11:56:17.843340: step: 12/463, loss: 0.10212653875350952 2023-01-22 11:56:18.416832: step: 14/463, loss: 0.10305936634540558 2023-01-22 11:56:19.003474: step: 16/463, loss: 0.05862724408507347 2023-01-22 11:56:19.578168: step: 18/463, loss: 0.1589479297399521 2023-01-22 11:56:20.224657: step: 20/463, loss: 0.07201798260211945 2023-01-22 11:56:20.826231: step: 22/463, loss: 0.014142402447760105 2023-01-22 11:56:21.452891: step: 24/463, loss: 0.16887494921684265 2023-01-22 11:56:22.061707: step: 26/463, loss: 0.024210674688220024 2023-01-22 11:56:22.679480: step: 28/463, loss: 0.03990819677710533 2023-01-22 11:56:23.270016: step: 30/463, loss: 0.045158859342336655 2023-01-22 11:56:23.867563: step: 32/463, loss: 0.033288709819316864 2023-01-22 11:56:24.458743: step: 34/463, loss: 0.016043180599808693 2023-01-22 11:56:25.092863: step: 36/463, loss: 0.011747236363589764 2023-01-22 11:56:25.733776: step: 38/463, loss: 0.4941384494304657 2023-01-22 11:56:26.408946: step: 40/463, loss: 0.013136547058820724 2023-01-22 11:56:27.049842: step: 42/463, loss: 0.02664070390164852 2023-01-22 11:56:27.673257: step: 44/463, loss: 0.0012856022221967578 2023-01-22 11:56:28.287318: step: 46/463, loss: 0.049490366131067276 2023-01-22 11:56:28.891670: step: 48/463, loss: 0.2771098017692566 2023-01-22 11:56:29.508659: step: 50/463, loss: 0.13030368089675903 2023-01-22 11:56:30.160761: step: 52/463, loss: 0.27063220739364624 2023-01-22 11:56:30.798796: step: 54/463, loss: 0.36873123049736023 2023-01-22 11:56:31.385653: step: 56/463, loss: 0.006062122527509928 2023-01-22 11:56:32.008521: step: 58/463, loss: 0.053145766258239746 2023-01-22 11:56:32.704230: step: 60/463, loss: 0.02280028536915779 2023-01-22 11:56:33.317089: step: 62/463, loss: 0.04428766295313835 2023-01-22 11:56:33.925328: step: 64/463, loss: 0.03361102566123009 2023-01-22 11:56:34.507642: step: 66/463, loss: 0.07194822281599045 2023-01-22 11:56:35.116801: step: 68/463, loss: 0.021001052111387253 2023-01-22 11:56:35.758602: step: 70/463, loss: 0.037330612540245056 2023-01-22 11:56:36.441054: step: 72/463, loss: 0.040480900555849075 2023-01-22 11:56:37.096332: step: 74/463, loss: 0.036421723663806915 2023-01-22 11:56:37.727434: step: 76/463, loss: 0.18305079638957977 2023-01-22 11:56:38.344739: step: 78/463, loss: 0.029274918138980865 2023-01-22 11:56:38.996691: step: 80/463, loss: 0.04791848734021187 2023-01-22 11:56:39.626179: step: 82/463, loss: 0.042708199471235275 2023-01-22 11:56:40.198300: step: 84/463, loss: 0.00914548896253109 2023-01-22 11:56:40.783237: step: 86/463, loss: 0.09204190969467163 2023-01-22 11:56:41.319886: step: 88/463, loss: 0.07539556175470352 2023-01-22 11:56:41.940100: step: 90/463, loss: 0.05940885469317436 2023-01-22 11:56:42.648054: step: 92/463, loss: 0.02225893922150135 2023-01-22 11:56:43.275916: step: 94/463, loss: 0.013536185957491398 2023-01-22 11:56:43.877248: step: 96/463, loss: 0.028028864413499832 2023-01-22 11:56:44.447141: step: 98/463, loss: 0.03247951716184616 2023-01-22 11:56:45.132912: step: 100/463, loss: 0.17127849161624908 2023-01-22 11:56:45.716079: step: 102/463, loss: 0.014644253998994827 2023-01-22 11:56:46.333896: step: 104/463, loss: 0.03772616758942604 2023-01-22 11:56:46.985048: step: 106/463, loss: 0.05379296839237213 2023-01-22 11:56:47.561258: step: 108/463, loss: 0.052070584148168564 2023-01-22 11:56:48.127043: step: 110/463, loss: 0.02765987627208233 2023-01-22 11:56:48.762530: step: 112/463, loss: 0.006331499200314283 2023-01-22 11:56:49.407463: step: 114/463, loss: 0.006876499857753515 2023-01-22 11:56:50.010598: step: 116/463, loss: 0.02339460887014866 2023-01-22 11:56:50.604642: step: 118/463, loss: 0.01504096295684576 2023-01-22 11:56:51.216096: step: 120/463, loss: 0.011009931564331055 2023-01-22 11:56:51.887006: step: 122/463, loss: 0.06653671711683273 2023-01-22 11:56:52.460494: step: 124/463, loss: 0.06888808310031891 2023-01-22 11:56:53.072842: step: 126/463, loss: 0.09010745584964752 2023-01-22 11:56:53.669398: step: 128/463, loss: 0.02575107105076313 2023-01-22 11:56:54.267144: step: 130/463, loss: 0.03347647190093994 2023-01-22 11:56:54.924452: step: 132/463, loss: 0.03668920695781708 2023-01-22 11:56:55.517100: step: 134/463, loss: 0.24982771277427673 2023-01-22 11:56:56.180581: step: 136/463, loss: 0.013085935264825821 2023-01-22 11:56:56.797669: step: 138/463, loss: 0.017120778560638428 2023-01-22 11:56:57.381821: step: 140/463, loss: 0.005012511275708675 2023-01-22 11:56:57.990490: step: 142/463, loss: 0.022262532263994217 2023-01-22 11:56:58.658964: step: 144/463, loss: 0.026318036019802094 2023-01-22 11:56:59.204617: step: 146/463, loss: 0.10604467242956161 2023-01-22 11:56:59.765762: step: 148/463, loss: 0.022634495049715042 2023-01-22 11:57:00.309020: step: 150/463, loss: 0.02065012790262699 2023-01-22 11:57:00.895038: step: 152/463, loss: 0.014927259646356106 2023-01-22 11:57:01.533084: step: 154/463, loss: 0.052358001470565796 2023-01-22 11:57:02.181898: step: 156/463, loss: 0.0679406225681305 2023-01-22 11:57:02.829770: step: 158/463, loss: 0.0314582921564579 2023-01-22 11:57:03.460740: step: 160/463, loss: 0.19728310406208038 2023-01-22 11:57:04.026786: step: 162/463, loss: 0.03380490839481354 2023-01-22 11:57:04.652515: step: 164/463, loss: 0.037048306316137314 2023-01-22 11:57:05.211026: step: 166/463, loss: 0.06547125428915024 2023-01-22 11:57:05.792266: step: 168/463, loss: 0.019977785646915436 2023-01-22 11:57:06.430842: step: 170/463, loss: 0.0842510312795639 2023-01-22 11:57:07.053463: step: 172/463, loss: 0.01584748551249504 2023-01-22 11:57:07.654013: step: 174/463, loss: 0.01813764125108719 2023-01-22 11:57:08.388566: step: 176/463, loss: 0.03895140811800957 2023-01-22 11:57:08.968106: step: 178/463, loss: 0.16105754673480988 2023-01-22 11:57:09.589600: step: 180/463, loss: 0.00824294425547123 2023-01-22 11:57:10.107024: step: 182/463, loss: 0.011547056958079338 2023-01-22 11:57:10.728329: step: 184/463, loss: 0.047988519072532654 2023-01-22 11:57:11.295862: step: 186/463, loss: 0.07069321721792221 2023-01-22 11:57:11.921933: step: 188/463, loss: 0.011686659418046474 2023-01-22 11:57:12.514420: step: 190/463, loss: 0.03281664475798607 2023-01-22 11:57:13.103633: step: 192/463, loss: 0.021711131557822227 2023-01-22 11:57:13.685819: step: 194/463, loss: 0.00901354942470789 2023-01-22 11:57:14.322081: step: 196/463, loss: 0.0044713797979056835 2023-01-22 11:57:14.917377: step: 198/463, loss: 0.06275229156017303 2023-01-22 11:57:15.577185: step: 200/463, loss: 0.09739736467599869 2023-01-22 11:57:16.191776: step: 202/463, loss: 0.024020083248615265 2023-01-22 11:57:16.793217: step: 204/463, loss: 0.029642995446920395 2023-01-22 11:57:17.411109: step: 206/463, loss: 0.08722873777151108 2023-01-22 11:57:17.979124: step: 208/463, loss: 0.04675403982400894 2023-01-22 11:57:18.527066: step: 210/463, loss: 0.0379650853574276 2023-01-22 11:57:19.126453: step: 212/463, loss: 0.05018343776464462 2023-01-22 11:57:19.765363: step: 214/463, loss: 0.00901576317846775 2023-01-22 11:57:20.317770: step: 216/463, loss: 0.0041709220968186855 2023-01-22 11:57:20.945228: step: 218/463, loss: 0.049517132341861725 2023-01-22 11:57:21.585283: step: 220/463, loss: 0.049965955317020416 2023-01-22 11:57:22.192358: step: 222/463, loss: 0.04325241968035698 2023-01-22 11:57:22.795805: step: 224/463, loss: 0.0521477572619915 2023-01-22 11:57:23.466313: step: 226/463, loss: 0.012771560810506344 2023-01-22 11:57:24.002530: step: 228/463, loss: 0.06846003234386444 2023-01-22 11:57:24.613196: step: 230/463, loss: 0.04135928675532341 2023-01-22 11:57:25.199450: step: 232/463, loss: 0.039707936346530914 2023-01-22 11:57:25.843054: step: 234/463, loss: 0.860537588596344 2023-01-22 11:57:26.512600: step: 236/463, loss: 0.4724205434322357 2023-01-22 11:57:27.054641: step: 238/463, loss: 0.11152902245521545 2023-01-22 11:57:27.655636: step: 240/463, loss: 0.2870498299598694 2023-01-22 11:57:28.292412: step: 242/463, loss: 0.08885440230369568 2023-01-22 11:57:28.994233: step: 244/463, loss: 0.025001036003232002 2023-01-22 11:57:29.508035: step: 246/463, loss: 0.016671447083353996 2023-01-22 11:57:30.121114: step: 248/463, loss: 0.02292332425713539 2023-01-22 11:57:30.738217: step: 250/463, loss: 0.058331362903118134 2023-01-22 11:57:31.375132: step: 252/463, loss: 0.4987170696258545 2023-01-22 11:57:31.954748: step: 254/463, loss: 0.008618383668363094 2023-01-22 11:57:32.507293: step: 256/463, loss: 0.0457315631210804 2023-01-22 11:57:33.188395: step: 258/463, loss: 0.046473924070596695 2023-01-22 11:57:33.818592: step: 260/463, loss: 0.028331683948636055 2023-01-22 11:57:34.450026: step: 262/463, loss: 0.014770310372114182 2023-01-22 11:57:35.081080: step: 264/463, loss: 0.11662633717060089 2023-01-22 11:57:35.669628: step: 266/463, loss: 0.023096852004528046 2023-01-22 11:57:36.454842: step: 268/463, loss: 0.015185844153165817 2023-01-22 11:57:37.091874: step: 270/463, loss: 0.026402989402413368 2023-01-22 11:57:37.782476: step: 272/463, loss: 0.007479024585336447 2023-01-22 11:57:38.354355: step: 274/463, loss: 0.03724350035190582 2023-01-22 11:57:39.091435: step: 276/463, loss: 0.0006274238112382591 2023-01-22 11:57:39.733953: step: 278/463, loss: 0.01200056355446577 2023-01-22 11:57:40.316735: step: 280/463, loss: 0.07404100149869919 2023-01-22 11:57:40.905936: step: 282/463, loss: 0.052078571170568466 2023-01-22 11:57:41.537809: step: 284/463, loss: 0.05625596269965172 2023-01-22 11:57:42.174821: step: 286/463, loss: 0.08361687511205673 2023-01-22 11:57:42.779711: step: 288/463, loss: 0.0884314775466919 2023-01-22 11:57:43.356725: step: 290/463, loss: 0.006218751892447472 2023-01-22 11:57:43.942546: step: 292/463, loss: 0.01155450101941824 2023-01-22 11:57:44.562894: step: 294/463, loss: 0.02805587835609913 2023-01-22 11:57:45.150096: step: 296/463, loss: 0.01961454004049301 2023-01-22 11:57:45.798354: step: 298/463, loss: 0.03049517422914505 2023-01-22 11:57:46.359248: step: 300/463, loss: 0.012401064857840538 2023-01-22 11:57:46.947393: step: 302/463, loss: 0.06635325402021408 2023-01-22 11:57:47.533421: step: 304/463, loss: 0.04234880208969116 2023-01-22 11:57:48.092291: step: 306/463, loss: 0.24127665162086487 2023-01-22 11:57:48.679944: step: 308/463, loss: 0.03843383118510246 2023-01-22 11:57:49.322046: step: 310/463, loss: 0.0485689714550972 2023-01-22 11:57:49.969113: step: 312/463, loss: 0.05422547459602356 2023-01-22 11:57:50.663473: step: 314/463, loss: 0.020614638924598694 2023-01-22 11:57:51.231836: step: 316/463, loss: 0.04707881063222885 2023-01-22 11:57:51.862483: step: 318/463, loss: 0.1666991263628006 2023-01-22 11:57:52.458875: step: 320/463, loss: 0.023372367024421692 2023-01-22 11:57:53.033051: step: 322/463, loss: 0.013147193938493729 2023-01-22 11:57:53.658392: step: 324/463, loss: 0.40948766469955444 2023-01-22 11:57:54.232947: step: 326/463, loss: 0.11535700410604477 2023-01-22 11:57:54.820174: step: 328/463, loss: 0.04028856009244919 2023-01-22 11:57:55.389516: step: 330/463, loss: 1.2974011898040771 2023-01-22 11:57:55.995333: step: 332/463, loss: 0.1290600448846817 2023-01-22 11:57:56.601593: step: 334/463, loss: 0.022823981940746307 2023-01-22 11:57:57.418602: step: 336/463, loss: 0.007736002095043659 2023-01-22 11:57:58.025910: step: 338/463, loss: 0.008360418491065502 2023-01-22 11:57:58.634704: step: 340/463, loss: 0.0717049241065979 2023-01-22 11:57:59.181275: step: 342/463, loss: 0.009852655231952667 2023-01-22 11:57:59.780749: step: 344/463, loss: 0.039784081280231476 2023-01-22 11:58:00.472442: step: 346/463, loss: 0.0010238605318590999 2023-01-22 11:58:01.107367: step: 348/463, loss: 0.01217496208846569 2023-01-22 11:58:01.671717: step: 350/463, loss: 0.03211135417222977 2023-01-22 11:58:02.341865: step: 352/463, loss: 0.007680355105549097 2023-01-22 11:58:02.961054: step: 354/463, loss: 0.022676914930343628 2023-01-22 11:58:03.552412: step: 356/463, loss: 0.1013747975230217 2023-01-22 11:58:04.110599: step: 358/463, loss: 0.0569462925195694 2023-01-22 11:58:04.689353: step: 360/463, loss: 0.038382649421691895 2023-01-22 11:58:05.325419: step: 362/463, loss: 0.027897289022803307 2023-01-22 11:58:05.960285: step: 364/463, loss: 0.012575906701385975 2023-01-22 11:58:06.619537: step: 366/463, loss: 1.4045130014419556 2023-01-22 11:58:07.242872: step: 368/463, loss: 0.03215888887643814 2023-01-22 11:58:07.849592: step: 370/463, loss: 0.042204152792692184 2023-01-22 11:58:08.437710: step: 372/463, loss: 0.08064927160739899 2023-01-22 11:58:09.068569: step: 374/463, loss: 0.030682578682899475 2023-01-22 11:58:09.715255: step: 376/463, loss: 0.020798666402697563 2023-01-22 11:58:10.372488: step: 378/463, loss: 0.013296670280396938 2023-01-22 11:58:11.026997: step: 380/463, loss: 0.009307530708611012 2023-01-22 11:58:11.643576: step: 382/463, loss: 0.050121158361434937 2023-01-22 11:58:12.257363: step: 384/463, loss: 0.033195070922374725 2023-01-22 11:58:12.905272: step: 386/463, loss: 0.09362410008907318 2023-01-22 11:58:13.557445: step: 388/463, loss: 0.024581091478466988 2023-01-22 11:58:14.218814: step: 390/463, loss: 0.015465802513062954 2023-01-22 11:58:14.878354: step: 392/463, loss: 0.00540462089702487 2023-01-22 11:58:15.487008: step: 394/463, loss: 0.011925828643143177 2023-01-22 11:58:16.103285: step: 396/463, loss: 0.06564971059560776 2023-01-22 11:58:16.698690: step: 398/463, loss: 0.3053116500377655 2023-01-22 11:58:17.269539: step: 400/463, loss: 0.015455009415745735 2023-01-22 11:58:17.863349: step: 402/463, loss: 0.04941024258732796 2023-01-22 11:58:18.481414: step: 404/463, loss: 0.34712353348731995 2023-01-22 11:58:19.094709: step: 406/463, loss: 0.029072783887386322 2023-01-22 11:58:19.649838: step: 408/463, loss: 0.01681593991816044 2023-01-22 11:58:20.250418: step: 410/463, loss: 0.7202590107917786 2023-01-22 11:58:20.884668: step: 412/463, loss: 0.6915916204452515 2023-01-22 11:58:21.527977: step: 414/463, loss: 0.005883147940039635 2023-01-22 11:58:22.101252: step: 416/463, loss: 0.9827921986579895 2023-01-22 11:58:22.689243: step: 418/463, loss: 0.03431384637951851 2023-01-22 11:58:23.314506: step: 420/463, loss: 0.052616264671087265 2023-01-22 11:58:23.903216: step: 422/463, loss: 0.1803695410490036 2023-01-22 11:58:24.526845: step: 424/463, loss: 0.13254913687705994 2023-01-22 11:58:25.116973: step: 426/463, loss: 0.0035180964041501284 2023-01-22 11:58:25.720351: step: 428/463, loss: 0.026401324197649956 2023-01-22 11:58:26.308251: step: 430/463, loss: 0.044410549104213715 2023-01-22 11:58:26.923311: step: 432/463, loss: 0.049371637403964996 2023-01-22 11:58:27.503427: step: 434/463, loss: 0.041617993265390396 2023-01-22 11:58:28.120473: step: 436/463, loss: 0.015298226848244667 2023-01-22 11:58:28.782740: step: 438/463, loss: 0.011941668577492237 2023-01-22 11:58:29.410734: step: 440/463, loss: 0.056415703147649765 2023-01-22 11:58:30.020727: step: 442/463, loss: 0.1737140566110611 2023-01-22 11:58:30.571048: step: 444/463, loss: 0.05153241753578186 2023-01-22 11:58:31.157189: step: 446/463, loss: 0.01888580061495304 2023-01-22 11:58:31.751161: step: 448/463, loss: 0.029533660039305687 2023-01-22 11:58:32.297546: step: 450/463, loss: 0.10582781583070755 2023-01-22 11:58:32.872118: step: 452/463, loss: 1.2449015378952026 2023-01-22 11:58:33.462285: step: 454/463, loss: 0.017602896317839622 2023-01-22 11:58:34.023901: step: 456/463, loss: 0.10646458715200424 2023-01-22 11:58:34.642713: step: 458/463, loss: 0.08195698261260986 2023-01-22 11:58:35.278859: step: 460/463, loss: 0.002595989964902401 2023-01-22 11:58:35.873425: step: 462/463, loss: 0.047766849398612976 2023-01-22 11:58:36.436123: step: 464/463, loss: 0.02091420255601406 2023-01-22 11:58:37.014295: step: 466/463, loss: 0.014015436172485352 2023-01-22 11:58:37.622813: step: 468/463, loss: 0.016696209087967873 2023-01-22 11:58:38.267182: step: 470/463, loss: 0.10650330781936646 2023-01-22 11:58:38.878521: step: 472/463, loss: 0.04598746821284294 2023-01-22 11:58:39.452816: step: 474/463, loss: 0.045018021017313004 2023-01-22 11:58:40.048151: step: 476/463, loss: 0.01934024691581726 2023-01-22 11:58:40.667067: step: 478/463, loss: 0.16567553579807281 2023-01-22 11:58:41.323790: step: 480/463, loss: 0.05272384732961655 2023-01-22 11:58:41.974074: step: 482/463, loss: 0.007351537235081196 2023-01-22 11:58:42.662478: step: 484/463, loss: 0.11825446784496307 2023-01-22 11:58:43.321260: step: 486/463, loss: 0.049544557929039 2023-01-22 11:58:43.947294: step: 488/463, loss: 0.3693927526473999 2023-01-22 11:58:44.559283: step: 490/463, loss: 0.03016485832631588 2023-01-22 11:58:45.208395: step: 492/463, loss: 0.036366693675518036 2023-01-22 11:58:45.813075: step: 494/463, loss: 0.09071648120880127 2023-01-22 11:58:46.329484: step: 496/463, loss: 0.012566464953124523 2023-01-22 11:58:46.957530: step: 498/463, loss: 0.010331925936043262 2023-01-22 11:58:47.546764: step: 500/463, loss: 0.028316188603639603 2023-01-22 11:58:48.179275: step: 502/463, loss: 0.022236144170165062 2023-01-22 11:58:48.797612: step: 504/463, loss: 0.016863614320755005 2023-01-22 11:58:49.386069: step: 506/463, loss: 0.10762310773134232 2023-01-22 11:58:49.993371: step: 508/463, loss: 0.0328141450881958 2023-01-22 11:58:50.578494: step: 510/463, loss: 0.014507602900266647 2023-01-22 11:58:51.184515: step: 512/463, loss: 0.3011797368526459 2023-01-22 11:58:51.779082: step: 514/463, loss: 0.01493346318602562 2023-01-22 11:58:52.353639: step: 516/463, loss: 0.005622792057693005 2023-01-22 11:58:52.900745: step: 518/463, loss: 0.5354915261268616 2023-01-22 11:58:53.428525: step: 520/463, loss: 0.015625547617673874 2023-01-22 11:58:53.989371: step: 522/463, loss: 0.0063484408892691135 2023-01-22 11:58:54.599179: step: 524/463, loss: 0.14776206016540527 2023-01-22 11:58:55.182733: step: 526/463, loss: 0.0332455188035965 2023-01-22 11:58:55.750661: step: 528/463, loss: 0.03845450282096863 2023-01-22 11:58:56.440486: step: 530/463, loss: 0.11550804227590561 2023-01-22 11:58:57.102735: step: 532/463, loss: 0.11595935374498367 2023-01-22 11:58:57.747194: step: 534/463, loss: 0.02594004198908806 2023-01-22 11:58:58.319487: step: 536/463, loss: 0.04169909283518791 2023-01-22 11:58:58.938767: step: 538/463, loss: 0.027592720463871956 2023-01-22 11:58:59.546761: step: 540/463, loss: 0.07582306116819382 2023-01-22 11:59:00.201468: step: 542/463, loss: 0.013903995975852013 2023-01-22 11:59:00.762704: step: 544/463, loss: 0.00185700214933604 2023-01-22 11:59:01.398656: step: 546/463, loss: 0.030083443969488144 2023-01-22 11:59:01.980066: step: 548/463, loss: 0.019753124564886093 2023-01-22 11:59:02.595509: step: 550/463, loss: 0.0333632193505764 2023-01-22 11:59:03.240700: step: 552/463, loss: 0.13638849556446075 2023-01-22 11:59:03.904550: step: 554/463, loss: 0.05351114273071289 2023-01-22 11:59:04.493088: step: 556/463, loss: 0.0046550920233130455 2023-01-22 11:59:05.165662: step: 558/463, loss: 0.018831003457307816 2023-01-22 11:59:05.714023: step: 560/463, loss: 0.010704170912504196 2023-01-22 11:59:06.333724: step: 562/463, loss: 0.09039629250764847 2023-01-22 11:59:06.950187: step: 564/463, loss: 0.0254190806299448 2023-01-22 11:59:07.514061: step: 566/463, loss: 0.055956847965717316 2023-01-22 11:59:08.083428: step: 568/463, loss: 0.05081801488995552 2023-01-22 11:59:08.684540: step: 570/463, loss: 0.043163515627384186 2023-01-22 11:59:09.319532: step: 572/463, loss: 0.01678292080760002 2023-01-22 11:59:09.963519: step: 574/463, loss: 0.023207440972328186 2023-01-22 11:59:10.568910: step: 576/463, loss: 0.4035690724849701 2023-01-22 11:59:11.163737: step: 578/463, loss: 0.1154913380742073 2023-01-22 11:59:11.805170: step: 580/463, loss: 0.12089009582996368 2023-01-22 11:59:12.498272: step: 582/463, loss: 0.17366036772727966 2023-01-22 11:59:13.137873: step: 584/463, loss: 0.2882901728153229 2023-01-22 11:59:13.697250: step: 586/463, loss: 0.05161074548959732 2023-01-22 11:59:14.298955: step: 588/463, loss: 0.35056570172309875 2023-01-22 11:59:14.931993: step: 590/463, loss: 0.0208967886865139 2023-01-22 11:59:15.565524: step: 592/463, loss: 0.20979025959968567 2023-01-22 11:59:16.191685: step: 594/463, loss: 0.06464440375566483 2023-01-22 11:59:16.784880: step: 596/463, loss: 0.03202921897172928 2023-01-22 11:59:17.322888: step: 598/463, loss: 0.07376474887132645 2023-01-22 11:59:17.891031: step: 600/463, loss: 0.04743989184498787 2023-01-22 11:59:18.495605: step: 602/463, loss: 0.055407650768756866 2023-01-22 11:59:19.204069: step: 604/463, loss: 0.05285326763987541 2023-01-22 11:59:19.864260: step: 606/463, loss: 0.10778897255659103 2023-01-22 11:59:20.494920: step: 608/463, loss: 0.038896068930625916 2023-01-22 11:59:21.121898: step: 610/463, loss: 0.08925910294055939 2023-01-22 11:59:21.724786: step: 612/463, loss: 0.1635931432247162 2023-01-22 11:59:22.305587: step: 614/463, loss: 0.8457882404327393 2023-01-22 11:59:22.877180: step: 616/463, loss: 0.03410865738987923 2023-01-22 11:59:23.425850: step: 618/463, loss: 0.015545263886451721 2023-01-22 11:59:24.068780: step: 620/463, loss: 0.06418374925851822 2023-01-22 11:59:24.699279: step: 622/463, loss: 0.009307267144322395 2023-01-22 11:59:25.265034: step: 624/463, loss: 0.00849175825715065 2023-01-22 11:59:25.860736: step: 626/463, loss: 0.028681280091404915 2023-01-22 11:59:26.463260: step: 628/463, loss: 0.0491660051047802 2023-01-22 11:59:27.085703: step: 630/463, loss: 0.47927841544151306 2023-01-22 11:59:27.671103: step: 632/463, loss: 0.009912054054439068 2023-01-22 11:59:28.261317: step: 634/463, loss: 0.0031794968526810408 2023-01-22 11:59:28.891043: step: 636/463, loss: 0.03298630937933922 2023-01-22 11:59:29.501745: step: 638/463, loss: 0.5963751077651978 2023-01-22 11:59:30.106744: step: 640/463, loss: 1.1689939498901367 2023-01-22 11:59:30.728163: step: 642/463, loss: 0.008170985616743565 2023-01-22 11:59:31.330666: step: 644/463, loss: 0.04075191915035248 2023-01-22 11:59:31.923733: step: 646/463, loss: 0.00829495582729578 2023-01-22 11:59:32.513135: step: 648/463, loss: 0.4498771131038666 2023-01-22 11:59:33.143661: step: 650/463, loss: 0.14536608755588531 2023-01-22 11:59:33.740115: step: 652/463, loss: 0.024806663393974304 2023-01-22 11:59:34.388611: step: 654/463, loss: 0.05230482667684555 2023-01-22 11:59:34.967688: step: 656/463, loss: 0.16959525644779205 2023-01-22 11:59:35.637009: step: 658/463, loss: 0.026518968865275383 2023-01-22 11:59:36.302814: step: 660/463, loss: 0.010883325710892677 2023-01-22 11:59:36.910255: step: 662/463, loss: 0.039032381027936935 2023-01-22 11:59:37.544991: step: 664/463, loss: 0.014130848459899426 2023-01-22 11:59:38.230448: step: 666/463, loss: 0.07402977347373962 2023-01-22 11:59:38.899205: step: 668/463, loss: 0.14945565164089203 2023-01-22 11:59:39.491417: step: 670/463, loss: 0.311570405960083 2023-01-22 11:59:40.048477: step: 672/463, loss: 0.8940517902374268 2023-01-22 11:59:40.640955: step: 674/463, loss: 0.06886366754770279 2023-01-22 11:59:41.249391: step: 676/463, loss: 0.10833122581243515 2023-01-22 11:59:41.870754: step: 678/463, loss: 0.09355586022138596 2023-01-22 11:59:42.511037: step: 680/463, loss: 0.06463313847780228 2023-01-22 11:59:43.063931: step: 682/463, loss: 0.014167838729918003 2023-01-22 11:59:43.695718: step: 684/463, loss: 0.06615443527698517 2023-01-22 11:59:44.360071: step: 686/463, loss: 0.3571716248989105 2023-01-22 11:59:44.931662: step: 688/463, loss: 0.07164973020553589 2023-01-22 11:59:45.544894: step: 690/463, loss: 0.894069254398346 2023-01-22 11:59:46.114622: step: 692/463, loss: 0.010855305008590221 2023-01-22 11:59:46.656192: step: 694/463, loss: 0.019030166789889336 2023-01-22 11:59:47.268347: step: 696/463, loss: 0.044320087879896164 2023-01-22 11:59:47.855939: step: 698/463, loss: 0.0061429706402122974 2023-01-22 11:59:48.458975: step: 700/463, loss: 0.01441947091370821 2023-01-22 11:59:49.064514: step: 702/463, loss: 0.050518304109573364 2023-01-22 11:59:49.657080: step: 704/463, loss: 0.013496499508619308 2023-01-22 11:59:50.338321: step: 706/463, loss: 0.04813800007104874 2023-01-22 11:59:50.933146: step: 708/463, loss: 0.09693578630685806 2023-01-22 11:59:51.520242: step: 710/463, loss: 0.03551693633198738 2023-01-22 11:59:52.068005: step: 712/463, loss: 0.007124726660549641 2023-01-22 11:59:52.698971: step: 714/463, loss: 0.006127381697297096 2023-01-22 11:59:53.278250: step: 716/463, loss: 0.023651884868741035 2023-01-22 11:59:53.885750: step: 718/463, loss: 0.042162906378507614 2023-01-22 11:59:54.530859: step: 720/463, loss: 0.011224256828427315 2023-01-22 11:59:55.091016: step: 722/463, loss: 0.4536055326461792 2023-01-22 11:59:55.705295: step: 724/463, loss: 0.06699256598949432 2023-01-22 11:59:56.317407: step: 726/463, loss: 0.08349325507879257 2023-01-22 11:59:56.881451: step: 728/463, loss: 0.009899456053972244 2023-01-22 11:59:57.492496: step: 730/463, loss: 0.010055750608444214 2023-01-22 11:59:58.102680: step: 732/463, loss: 0.05743797495961189 2023-01-22 11:59:58.814404: step: 734/463, loss: 0.023769082501530647 2023-01-22 11:59:59.475215: step: 736/463, loss: 0.10105634480714798 2023-01-22 12:00:00.124614: step: 738/463, loss: 0.5656456351280212 2023-01-22 12:00:00.757678: step: 740/463, loss: 0.0425361767411232 2023-01-22 12:00:01.359532: step: 742/463, loss: 0.07228134572505951 2023-01-22 12:00:01.998468: step: 744/463, loss: 0.003149681957438588 2023-01-22 12:00:02.582485: step: 746/463, loss: 0.003421169938519597 2023-01-22 12:00:03.216382: step: 748/463, loss: 0.42067599296569824 2023-01-22 12:00:03.789811: step: 750/463, loss: 0.1624079793691635 2023-01-22 12:00:04.379487: step: 752/463, loss: 0.04837449640035629 2023-01-22 12:00:04.982828: step: 754/463, loss: 0.012449271976947784 2023-01-22 12:00:05.626779: step: 756/463, loss: 0.0254416074603796 2023-01-22 12:00:06.263861: step: 758/463, loss: 0.023787543177604675 2023-01-22 12:00:06.907158: step: 760/463, loss: 0.06165437400341034 2023-01-22 12:00:07.515433: step: 762/463, loss: 0.20485974848270416 2023-01-22 12:00:08.141990: step: 764/463, loss: 0.28936567902565 2023-01-22 12:00:08.763583: step: 766/463, loss: 0.113552987575531 2023-01-22 12:00:09.345245: step: 768/463, loss: 0.040712811052799225 2023-01-22 12:00:09.981018: step: 770/463, loss: 0.02676175907254219 2023-01-22 12:00:10.576450: step: 772/463, loss: 0.06688287109136581 2023-01-22 12:00:11.138427: step: 774/463, loss: 0.7004122138023376 2023-01-22 12:00:11.659136: step: 776/463, loss: 0.07582084089517593 2023-01-22 12:00:12.265347: step: 778/463, loss: 0.1933680921792984 2023-01-22 12:00:12.807366: step: 780/463, loss: 0.04180564358830452 2023-01-22 12:00:13.424087: step: 782/463, loss: 0.055355582386255264 2023-01-22 12:00:14.008498: step: 784/463, loss: 0.08811771124601364 2023-01-22 12:00:14.578592: step: 786/463, loss: 0.028482133522629738 2023-01-22 12:00:15.159130: step: 788/463, loss: 0.004225490614771843 2023-01-22 12:00:15.734142: step: 790/463, loss: 0.036629583686590195 2023-01-22 12:00:16.316785: step: 792/463, loss: 0.054877351969480515 2023-01-22 12:00:17.001515: step: 794/463, loss: 0.3363019824028015 2023-01-22 12:00:17.624015: step: 796/463, loss: 0.05422777682542801 2023-01-22 12:00:18.174888: step: 798/463, loss: 0.006331595126539469 2023-01-22 12:00:18.765123: step: 800/463, loss: 0.07655809819698334 2023-01-22 12:00:19.409640: step: 802/463, loss: 0.07453346997499466 2023-01-22 12:00:20.026583: step: 804/463, loss: 0.04395703598856926 2023-01-22 12:00:20.588153: step: 806/463, loss: 0.045428112149238586 2023-01-22 12:00:21.235735: step: 808/463, loss: 0.01732235588133335 2023-01-22 12:00:21.908603: step: 810/463, loss: 0.0650506392121315 2023-01-22 12:00:22.479340: step: 812/463, loss: 0.009408474899828434 2023-01-22 12:00:23.089252: step: 814/463, loss: 0.06196252629160881 2023-01-22 12:00:23.655178: step: 816/463, loss: 0.4552536606788635 2023-01-22 12:00:24.341721: step: 818/463, loss: 0.15773415565490723 2023-01-22 12:00:24.996911: step: 820/463, loss: 0.03790159150958061 2023-01-22 12:00:25.584533: step: 822/463, loss: 0.037578023970127106 2023-01-22 12:00:26.241312: step: 824/463, loss: 0.011477263644337654 2023-01-22 12:00:26.839773: step: 826/463, loss: 0.09522224217653275 2023-01-22 12:00:27.477536: step: 828/463, loss: 0.03931977599859238 2023-01-22 12:00:28.074339: step: 830/463, loss: 0.00780304754152894 2023-01-22 12:00:28.663324: step: 832/463, loss: 0.03427712991833687 2023-01-22 12:00:29.281444: step: 834/463, loss: 0.035402603447437286 2023-01-22 12:00:29.898896: step: 836/463, loss: 0.2521076202392578 2023-01-22 12:00:30.513699: step: 838/463, loss: 0.03218855708837509 2023-01-22 12:00:31.143735: step: 840/463, loss: 0.09992952644824982 2023-01-22 12:00:31.815800: step: 842/463, loss: 0.044545166194438934 2023-01-22 12:00:32.512539: step: 844/463, loss: 0.028844941407442093 2023-01-22 12:00:33.198311: step: 846/463, loss: 0.003648433368653059 2023-01-22 12:00:33.837501: step: 848/463, loss: 0.08368342369794846 2023-01-22 12:00:34.430912: step: 850/463, loss: 0.048617392778396606 2023-01-22 12:00:35.057923: step: 852/463, loss: 0.01440340280532837 2023-01-22 12:00:35.610856: step: 854/463, loss: 0.025879204273223877 2023-01-22 12:00:36.217807: step: 856/463, loss: 1.2511944770812988 2023-01-22 12:00:36.802239: step: 858/463, loss: 0.04545079171657562 2023-01-22 12:00:37.401021: step: 860/463, loss: 0.017826208844780922 2023-01-22 12:00:38.010220: step: 862/463, loss: 0.05402884632349014 2023-01-22 12:00:38.673839: step: 864/463, loss: 0.16573858261108398 2023-01-22 12:00:39.244519: step: 866/463, loss: 0.00812308769673109 2023-01-22 12:00:39.851196: step: 868/463, loss: 0.026841744780540466 2023-01-22 12:00:40.419078: step: 870/463, loss: 0.02524542063474655 2023-01-22 12:00:41.007353: step: 872/463, loss: 0.0005836548516526818 2023-01-22 12:00:41.601995: step: 874/463, loss: 0.022158058360219002 2023-01-22 12:00:42.264002: step: 876/463, loss: 0.0849095955491066 2023-01-22 12:00:42.805163: step: 878/463, loss: 0.08241454511880875 2023-01-22 12:00:43.389973: step: 880/463, loss: 0.021352414041757584 2023-01-22 12:00:44.108728: step: 882/463, loss: 0.10853380709886551 2023-01-22 12:00:44.722674: step: 884/463, loss: 0.0011751658748835325 2023-01-22 12:00:45.363656: step: 886/463, loss: 0.032426316291093826 2023-01-22 12:00:45.969976: step: 888/463, loss: 0.9668465256690979 2023-01-22 12:00:46.565452: step: 890/463, loss: 0.004351734183728695 2023-01-22 12:00:47.204599: step: 892/463, loss: 0.058757588267326355 2023-01-22 12:00:47.851590: step: 894/463, loss: 0.015361789613962173 2023-01-22 12:00:48.463545: step: 896/463, loss: 0.01097427774220705 2023-01-22 12:00:49.086411: step: 898/463, loss: 0.09028850495815277 2023-01-22 12:00:49.676375: step: 900/463, loss: 0.10042666643857956 2023-01-22 12:00:50.343280: step: 902/463, loss: 0.04647265374660492 2023-01-22 12:00:50.962618: step: 904/463, loss: 0.021175991743803024 2023-01-22 12:00:51.567139: step: 906/463, loss: 0.004938920494168997 2023-01-22 12:00:52.142523: step: 908/463, loss: 0.050417378544807434 2023-01-22 12:00:52.793912: step: 910/463, loss: 0.13690118491649628 2023-01-22 12:00:53.392339: step: 912/463, loss: 0.11915592849254608 2023-01-22 12:00:54.043489: step: 914/463, loss: 0.023038644343614578 2023-01-22 12:00:54.608541: step: 916/463, loss: 0.10941333323717117 2023-01-22 12:00:55.273615: step: 918/463, loss: 0.04674834758043289 2023-01-22 12:00:55.869077: step: 920/463, loss: 0.015698300674557686 2023-01-22 12:00:56.481459: step: 922/463, loss: 0.0308549664914608 2023-01-22 12:00:57.055693: step: 924/463, loss: 0.022881057113409042 2023-01-22 12:00:57.643592: step: 926/463, loss: 0.022768784314393997 ================================================== Loss: 0.098 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33574913701540093, 'r': 0.3427571835185687, 'f1': 0.33921696847753185}, 'combined': 0.24994934519397083, 'epoch': 23} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33865712619108307, 'r': 0.4002877429060044, 'f1': 0.366902328796742}, 'combined': 0.2843931926558479, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2939002133872417, 'r': 0.3546879235565194, 'f1': 0.32144546124554724}, 'combined': 0.23685455039145584, 'epoch': 23} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32015959256260984, 'r': 0.4096159493080449, 'f1': 0.35940496197351035}, 'combined': 0.27858183655363006, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2963372970802413, 'r': 0.3452582550422546, 'f1': 0.31893269133614055}, 'combined': 0.23500303572136672, 'epoch': 23} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.31808983337923963, 'r': 0.3932268620359166, 'f1': 0.3516899514139559}, 'combined': 0.2726017805218223, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.265625, 'r': 0.36428571428571427, 'f1': 0.30722891566265054}, 'combined': 0.20481927710843367, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26282051282051283, 'r': 0.44565217391304346, 'f1': 0.3306451612903226}, 'combined': 0.1653225806451613, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.421875, 'r': 0.23275862068965517, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:03:34.489075: step: 2/463, loss: 0.061294905841350555 2023-01-22 12:03:35.071366: step: 4/463, loss: 0.006343673914670944 2023-01-22 12:03:35.703483: step: 6/463, loss: 0.020606111735105515 2023-01-22 12:03:36.296587: step: 8/463, loss: 0.16978049278259277 2023-01-22 12:03:36.902849: step: 10/463, loss: 0.024464059621095657 2023-01-22 12:03:37.556719: step: 12/463, loss: 0.06635589152574539 2023-01-22 12:03:38.127139: step: 14/463, loss: 0.019664715975522995 2023-01-22 12:03:38.715061: step: 16/463, loss: 0.009384295903146267 2023-01-22 12:03:39.332109: step: 18/463, loss: 0.015013431198894978 2023-01-22 12:03:39.929299: step: 20/463, loss: 0.010599148459732533 2023-01-22 12:03:40.512592: step: 22/463, loss: 0.7978048324584961 2023-01-22 12:03:41.085529: step: 24/463, loss: 0.050290998071432114 2023-01-22 12:03:41.715654: step: 26/463, loss: 0.028274554759263992 2023-01-22 12:03:42.314611: step: 28/463, loss: 0.02509673498570919 2023-01-22 12:03:42.880793: step: 30/463, loss: 0.04342924430966377 2023-01-22 12:03:43.524819: step: 32/463, loss: 0.21507760882377625 2023-01-22 12:03:44.099100: step: 34/463, loss: 0.010677285492420197 2023-01-22 12:03:44.700681: step: 36/463, loss: 0.04876670613884926 2023-01-22 12:03:45.427135: step: 38/463, loss: 0.0214841291308403 2023-01-22 12:03:46.066514: step: 40/463, loss: 0.3389042615890503 2023-01-22 12:03:46.714221: step: 42/463, loss: 0.001654837396927178 2023-01-22 12:03:47.276563: step: 44/463, loss: 0.01829889416694641 2023-01-22 12:03:47.855324: step: 46/463, loss: 0.031863272190093994 2023-01-22 12:03:48.481676: step: 48/463, loss: 0.3395855128765106 2023-01-22 12:03:49.110230: step: 50/463, loss: 0.01945796236395836 2023-01-22 12:03:49.672409: step: 52/463, loss: 0.011326232925057411 2023-01-22 12:03:50.271320: step: 54/463, loss: 0.060719557106494904 2023-01-22 12:03:50.833264: step: 56/463, loss: 0.27859875559806824 2023-01-22 12:03:51.527284: step: 58/463, loss: 0.04889040067791939 2023-01-22 12:03:52.202028: step: 60/463, loss: 0.01434397604316473 2023-01-22 12:03:52.829397: step: 62/463, loss: 0.06446627527475357 2023-01-22 12:03:53.392184: step: 64/463, loss: 0.0687406063079834 2023-01-22 12:03:53.928922: step: 66/463, loss: 0.009840857237577438 2023-01-22 12:03:54.540377: step: 68/463, loss: 0.03942156583070755 2023-01-22 12:03:55.113873: step: 70/463, loss: 7.957068010000512e-05 2023-01-22 12:03:55.681307: step: 72/463, loss: 0.1568048745393753 2023-01-22 12:03:56.380564: step: 74/463, loss: 0.018056249246001244 2023-01-22 12:03:57.017030: step: 76/463, loss: 0.16042578220367432 2023-01-22 12:03:57.723424: step: 78/463, loss: 0.02998526021838188 2023-01-22 12:03:58.386607: step: 80/463, loss: 0.03995296731591225 2023-01-22 12:03:58.976982: step: 82/463, loss: 0.07112596929073334 2023-01-22 12:03:59.579181: step: 84/463, loss: 0.45406466722488403 2023-01-22 12:04:00.199458: step: 86/463, loss: 0.07411360740661621 2023-01-22 12:04:00.805017: step: 88/463, loss: 0.06382579356431961 2023-01-22 12:04:01.422554: step: 90/463, loss: 0.05234390124678612 2023-01-22 12:04:02.054298: step: 92/463, loss: 0.02708696387708187 2023-01-22 12:04:02.626894: step: 94/463, loss: 0.06781347841024399 2023-01-22 12:04:03.220450: step: 96/463, loss: 0.0020681589376181364 2023-01-22 12:04:03.835723: step: 98/463, loss: 0.010786955244839191 2023-01-22 12:04:04.399628: step: 100/463, loss: 0.007816801778972149 2023-01-22 12:04:05.027367: step: 102/463, loss: 0.11301437020301819 2023-01-22 12:04:05.738440: step: 104/463, loss: 0.005041416268795729 2023-01-22 12:04:06.293639: step: 106/463, loss: 0.09249261766672134 2023-01-22 12:04:06.837246: step: 108/463, loss: 0.0031062893103808165 2023-01-22 12:04:07.432061: step: 110/463, loss: 0.0008614999242126942 2023-01-22 12:04:08.068091: step: 112/463, loss: 0.03815028816461563 2023-01-22 12:04:08.737022: step: 114/463, loss: 1.0728869438171387 2023-01-22 12:04:09.430118: step: 116/463, loss: 0.04436250776052475 2023-01-22 12:04:10.037716: step: 118/463, loss: 0.023092331364750862 2023-01-22 12:04:10.670123: step: 120/463, loss: 0.026097899302840233 2023-01-22 12:04:11.294286: step: 122/463, loss: 0.03697734698653221 2023-01-22 12:04:11.906835: step: 124/463, loss: 0.06943757086992264 2023-01-22 12:04:12.443031: step: 126/463, loss: 0.09368494153022766 2023-01-22 12:04:12.984779: step: 128/463, loss: 0.1767715960741043 2023-01-22 12:04:13.636660: step: 130/463, loss: 0.020567482337355614 2023-01-22 12:04:14.215040: step: 132/463, loss: 0.0010978621430695057 2023-01-22 12:04:14.816565: step: 134/463, loss: 0.0038466467522084713 2023-01-22 12:04:15.414784: step: 136/463, loss: 0.002895973389968276 2023-01-22 12:04:16.014709: step: 138/463, loss: 0.6470154523849487 2023-01-22 12:04:16.604370: step: 140/463, loss: 0.023963846266269684 2023-01-22 12:04:17.174681: step: 142/463, loss: 0.05351338908076286 2023-01-22 12:04:17.751173: step: 144/463, loss: 0.007462936453521252 2023-01-22 12:04:18.340922: step: 146/463, loss: 0.04259811341762543 2023-01-22 12:04:18.952096: step: 148/463, loss: 0.07926907390356064 2023-01-22 12:04:19.536699: step: 150/463, loss: 0.005697930231690407 2023-01-22 12:04:20.130127: step: 152/463, loss: 0.01964966580271721 2023-01-22 12:04:20.704505: step: 154/463, loss: 0.07333017140626907 2023-01-22 12:04:21.334930: step: 156/463, loss: 0.11011946201324463 2023-01-22 12:04:21.982315: step: 158/463, loss: 0.16936245560646057 2023-01-22 12:04:22.635945: step: 160/463, loss: 0.07704948633909225 2023-01-22 12:04:23.222879: step: 162/463, loss: 0.02215109020471573 2023-01-22 12:04:23.807507: step: 164/463, loss: 0.1850002408027649 2023-01-22 12:04:24.387145: step: 166/463, loss: 0.06054436042904854 2023-01-22 12:04:24.984600: step: 168/463, loss: 0.07248638570308685 2023-01-22 12:04:25.585888: step: 170/463, loss: 0.021736355498433113 2023-01-22 12:04:26.188873: step: 172/463, loss: 0.057320334017276764 2023-01-22 12:04:26.829444: step: 174/463, loss: 0.03244979307055473 2023-01-22 12:04:27.400734: step: 176/463, loss: 0.07547648251056671 2023-01-22 12:04:28.083801: step: 178/463, loss: 0.05297822877764702 2023-01-22 12:04:28.659417: step: 180/463, loss: 0.02737613581120968 2023-01-22 12:04:29.283982: step: 182/463, loss: 0.12709006667137146 2023-01-22 12:04:29.835757: step: 184/463, loss: 0.014109165407717228 2023-01-22 12:04:30.384751: step: 186/463, loss: 0.009420432150363922 2023-01-22 12:04:30.982020: step: 188/463, loss: 0.08026494830846786 2023-01-22 12:04:31.565392: step: 190/463, loss: 0.014953376725316048 2023-01-22 12:04:32.187807: step: 192/463, loss: 0.01593739725649357 2023-01-22 12:04:32.804372: step: 194/463, loss: 0.04336761310696602 2023-01-22 12:04:33.417695: step: 196/463, loss: 0.004452051594853401 2023-01-22 12:04:34.029707: step: 198/463, loss: 0.019622813910245895 2023-01-22 12:04:34.630062: step: 200/463, loss: 0.5644239187240601 2023-01-22 12:04:35.254884: step: 202/463, loss: 0.03331039473414421 2023-01-22 12:04:35.849399: step: 204/463, loss: 0.042713724076747894 2023-01-22 12:04:36.495951: step: 206/463, loss: 0.0993862971663475 2023-01-22 12:04:37.124667: step: 208/463, loss: 0.012197350151836872 2023-01-22 12:04:37.689816: step: 210/463, loss: 0.04980878159403801 2023-01-22 12:04:38.254670: step: 212/463, loss: 0.012405703775584698 2023-01-22 12:04:38.834489: step: 214/463, loss: 0.014293259009718895 2023-01-22 12:04:39.409458: step: 216/463, loss: 0.08689762651920319 2023-01-22 12:04:40.006056: step: 218/463, loss: 0.009880058467388153 2023-01-22 12:04:40.633367: step: 220/463, loss: 0.03535918518900871 2023-01-22 12:04:41.222509: step: 222/463, loss: 0.007327070459723473 2023-01-22 12:04:41.896035: step: 224/463, loss: 0.0013676685048267245 2023-01-22 12:04:42.442498: step: 226/463, loss: 4.08041524887085 2023-01-22 12:04:43.029669: step: 228/463, loss: 0.18339861929416656 2023-01-22 12:04:43.602882: step: 230/463, loss: 0.004719461780041456 2023-01-22 12:04:44.252954: step: 232/463, loss: 0.005354912020266056 2023-01-22 12:04:44.800449: step: 234/463, loss: 0.009334675036370754 2023-01-22 12:04:45.368585: step: 236/463, loss: 0.006596957799047232 2023-01-22 12:04:46.001508: step: 238/463, loss: 0.108518585562706 2023-01-22 12:04:46.657216: step: 240/463, loss: 0.01934421807527542 2023-01-22 12:04:47.260271: step: 242/463, loss: 0.007274204865098 2023-01-22 12:04:47.919634: step: 244/463, loss: 0.002430056221783161 2023-01-22 12:04:48.567783: step: 246/463, loss: 0.018906325101852417 2023-01-22 12:04:49.186736: step: 248/463, loss: 0.09841140359640121 2023-01-22 12:04:49.830066: step: 250/463, loss: 0.2465820610523224 2023-01-22 12:04:50.492607: step: 252/463, loss: 0.09582491964101791 2023-01-22 12:04:51.133344: step: 254/463, loss: 0.023469191044569016 2023-01-22 12:04:51.775130: step: 256/463, loss: 0.01208808645606041 2023-01-22 12:04:52.362021: step: 258/463, loss: 0.1477283090353012 2023-01-22 12:04:53.050782: step: 260/463, loss: 0.03751266375184059 2023-01-22 12:04:53.643582: step: 262/463, loss: 0.05043237656354904 2023-01-22 12:04:54.242874: step: 264/463, loss: 0.0037228099536150694 2023-01-22 12:04:54.880271: step: 266/463, loss: 0.027014896273612976 2023-01-22 12:04:55.509949: step: 268/463, loss: 0.4177056550979614 2023-01-22 12:04:56.155469: step: 270/463, loss: 0.032803405076265335 2023-01-22 12:04:56.797104: step: 272/463, loss: 0.12205017358064651 2023-01-22 12:04:57.360632: step: 274/463, loss: 0.010484627448022366 2023-01-22 12:04:57.978641: step: 276/463, loss: 0.020143119618296623 2023-01-22 12:04:58.603662: step: 278/463, loss: 0.06068652868270874 2023-01-22 12:04:59.186067: step: 280/463, loss: 0.007905379869043827 2023-01-22 12:04:59.757914: step: 282/463, loss: 0.10386954993009567 2023-01-22 12:05:00.371767: step: 284/463, loss: 0.02009562961757183 2023-01-22 12:05:00.965865: step: 286/463, loss: 0.06188122555613518 2023-01-22 12:05:01.561802: step: 288/463, loss: 0.03297923877835274 2023-01-22 12:05:02.166298: step: 290/463, loss: 0.006055665202438831 2023-01-22 12:05:02.758796: step: 292/463, loss: 0.018160022795200348 2023-01-22 12:05:03.297135: step: 294/463, loss: 0.12376287579536438 2023-01-22 12:05:03.927743: step: 296/463, loss: 0.14398975670337677 2023-01-22 12:05:04.513886: step: 298/463, loss: 0.024244364351034164 2023-01-22 12:05:05.027328: step: 300/463, loss: 0.16585154831409454 2023-01-22 12:05:05.632880: step: 302/463, loss: 0.031117623671889305 2023-01-22 12:05:06.280098: step: 304/463, loss: 1.4300962686538696 2023-01-22 12:05:07.011675: step: 306/463, loss: 0.16099242866039276 2023-01-22 12:05:07.622481: step: 308/463, loss: 0.01632041484117508 2023-01-22 12:05:08.226859: step: 310/463, loss: 0.09779483824968338 2023-01-22 12:05:08.858131: step: 312/463, loss: 0.0809023380279541 2023-01-22 12:05:09.452672: step: 314/463, loss: 0.02666088379919529 2023-01-22 12:05:10.008722: step: 316/463, loss: 0.0623222254216671 2023-01-22 12:05:10.590922: step: 318/463, loss: 0.007449913304299116 2023-01-22 12:05:11.192389: step: 320/463, loss: 0.0750175192952156 2023-01-22 12:05:11.870177: step: 322/463, loss: 0.00797069538384676 2023-01-22 12:05:12.525123: step: 324/463, loss: 0.016828155145049095 2023-01-22 12:05:13.134804: step: 326/463, loss: 2.867417097091675 2023-01-22 12:05:13.857493: step: 328/463, loss: 0.07930314540863037 2023-01-22 12:05:14.460440: step: 330/463, loss: 0.016846250742673874 2023-01-22 12:05:15.091774: step: 332/463, loss: 0.01605255901813507 2023-01-22 12:05:15.718557: step: 334/463, loss: 0.46677348017692566 2023-01-22 12:05:16.292405: step: 336/463, loss: 0.0068283299915492535 2023-01-22 12:05:16.913015: step: 338/463, loss: 0.03346501290798187 2023-01-22 12:05:17.703411: step: 340/463, loss: 0.020151332020759583 2023-01-22 12:05:18.262582: step: 342/463, loss: 0.009587912820279598 2023-01-22 12:05:18.863064: step: 344/463, loss: 0.012885798700153828 2023-01-22 12:05:19.604901: step: 346/463, loss: 0.06680378317832947 2023-01-22 12:05:20.242886: step: 348/463, loss: 0.03617333620786667 2023-01-22 12:05:20.912078: step: 350/463, loss: 0.03804966062307358 2023-01-22 12:05:21.494124: step: 352/463, loss: 0.20497827231884003 2023-01-22 12:05:22.117249: step: 354/463, loss: 0.0541839562356472 2023-01-22 12:05:22.789063: step: 356/463, loss: 0.06272148340940475 2023-01-22 12:05:23.421031: step: 358/463, loss: 0.6745994091033936 2023-01-22 12:05:24.051665: step: 360/463, loss: 0.0856032744050026 2023-01-22 12:05:24.594219: step: 362/463, loss: 0.0008191489614546299 2023-01-22 12:05:25.202567: step: 364/463, loss: 0.007314196787774563 2023-01-22 12:05:25.772554: step: 366/463, loss: 0.8927024006843567 2023-01-22 12:05:26.443073: step: 368/463, loss: 0.025360634550452232 2023-01-22 12:05:27.105722: step: 370/463, loss: 0.015439298935234547 2023-01-22 12:05:27.747542: step: 372/463, loss: 0.012043795548379421 2023-01-22 12:05:28.391016: step: 374/463, loss: 0.002002270892262459 2023-01-22 12:05:29.030175: step: 376/463, loss: 0.10055901855230331 2023-01-22 12:05:29.592848: step: 378/463, loss: 0.03908461332321167 2023-01-22 12:05:30.217270: step: 380/463, loss: 0.7719851136207581 2023-01-22 12:05:30.905766: step: 382/463, loss: 0.09145240485668182 2023-01-22 12:05:31.562544: step: 384/463, loss: 0.15544719994068146 2023-01-22 12:05:32.135830: step: 386/463, loss: 0.03723986819386482 2023-01-22 12:05:32.732126: step: 388/463, loss: 0.00631159171462059 2023-01-22 12:05:33.333892: step: 390/463, loss: 0.06047888845205307 2023-01-22 12:05:33.933094: step: 392/463, loss: 0.012118219397962093 2023-01-22 12:05:34.510805: step: 394/463, loss: 0.03209967911243439 2023-01-22 12:05:35.127333: step: 396/463, loss: 0.022075003013014793 2023-01-22 12:05:35.847453: step: 398/463, loss: 0.013022121042013168 2023-01-22 12:05:36.429820: step: 400/463, loss: 0.009034481830894947 2023-01-22 12:05:37.102585: step: 402/463, loss: 0.1878408044576645 2023-01-22 12:05:37.697930: step: 404/463, loss: 0.03177250549197197 2023-01-22 12:05:38.317503: step: 406/463, loss: 0.05853579193353653 2023-01-22 12:05:38.887928: step: 408/463, loss: 0.0261134784668684 2023-01-22 12:05:39.514484: step: 410/463, loss: 1.5228782892227173 2023-01-22 12:05:40.076739: step: 412/463, loss: 0.052909981459379196 2023-01-22 12:05:40.718975: step: 414/463, loss: 0.03807179629802704 2023-01-22 12:05:41.385877: step: 416/463, loss: 0.16231660544872284 2023-01-22 12:05:41.936943: step: 418/463, loss: 0.03277049958705902 2023-01-22 12:05:42.473665: step: 420/463, loss: 0.0011334748705849051 2023-01-22 12:05:43.046365: step: 422/463, loss: 0.03694593161344528 2023-01-22 12:05:43.654252: step: 424/463, loss: 0.06627807021141052 2023-01-22 12:05:44.199955: step: 426/463, loss: 0.009900666773319244 2023-01-22 12:05:44.835136: step: 428/463, loss: 0.027718110010027885 2023-01-22 12:05:45.437964: step: 430/463, loss: 0.07971540838479996 2023-01-22 12:05:46.059109: step: 432/463, loss: 0.01798867993056774 2023-01-22 12:05:46.666994: step: 434/463, loss: 0.03339134529232979 2023-01-22 12:05:47.297738: step: 436/463, loss: 0.01760207675397396 2023-01-22 12:05:47.908854: step: 438/463, loss: 0.01757189631462097 2023-01-22 12:05:48.547667: step: 440/463, loss: 0.022632887586951256 2023-01-22 12:05:49.225226: step: 442/463, loss: 0.009646367281675339 2023-01-22 12:05:49.808952: step: 444/463, loss: 0.035784099251031876 2023-01-22 12:05:50.487331: step: 446/463, loss: 0.041638486087322235 2023-01-22 12:05:51.110888: step: 448/463, loss: 0.03877909481525421 2023-01-22 12:05:51.749597: step: 450/463, loss: 0.007765759713947773 2023-01-22 12:05:52.372436: step: 452/463, loss: 0.01499869953840971 2023-01-22 12:05:52.989499: step: 454/463, loss: 0.11714904010295868 2023-01-22 12:05:53.581868: step: 456/463, loss: 0.0636599063873291 2023-01-22 12:05:54.190570: step: 458/463, loss: 0.05196675285696983 2023-01-22 12:05:54.751930: step: 460/463, loss: 0.028296956792473793 2023-01-22 12:05:55.382435: step: 462/463, loss: 0.04651957377791405 2023-01-22 12:05:55.990577: step: 464/463, loss: 0.028146252036094666 2023-01-22 12:05:56.613558: step: 466/463, loss: 0.14111503958702087 2023-01-22 12:05:57.142966: step: 468/463, loss: 0.05156390741467476 2023-01-22 12:05:57.760365: step: 470/463, loss: 0.013155822642147541 2023-01-22 12:05:58.398869: step: 472/463, loss: 0.06433604657649994 2023-01-22 12:05:58.995363: step: 474/463, loss: 0.0026400901842862368 2023-01-22 12:05:59.601077: step: 476/463, loss: 0.5728375911712646 2023-01-22 12:06:00.172949: step: 478/463, loss: 0.04358116537332535 2023-01-22 12:06:00.836940: step: 480/463, loss: 0.01163279078900814 2023-01-22 12:06:01.495708: step: 482/463, loss: 0.05154048651456833 2023-01-22 12:06:02.094610: step: 484/463, loss: 0.013692816719412804 2023-01-22 12:06:02.661721: step: 486/463, loss: 0.0017231665551662445 2023-01-22 12:06:03.255459: step: 488/463, loss: 0.020986400544643402 2023-01-22 12:06:03.832189: step: 490/463, loss: 0.0148360850289464 2023-01-22 12:06:04.399957: step: 492/463, loss: 0.037350621074438095 2023-01-22 12:06:05.012166: step: 494/463, loss: 0.023542063310742378 2023-01-22 12:06:05.594506: step: 496/463, loss: 0.0386778898537159 2023-01-22 12:06:06.220269: step: 498/463, loss: 0.12982696294784546 2023-01-22 12:06:06.867506: step: 500/463, loss: 0.015460880473256111 2023-01-22 12:06:07.491628: step: 502/463, loss: 0.03337129205465317 2023-01-22 12:06:08.079795: step: 504/463, loss: 0.014453509822487831 2023-01-22 12:06:08.604958: step: 506/463, loss: 0.20806880295276642 2023-01-22 12:06:09.206234: step: 508/463, loss: 0.02205553837120533 2023-01-22 12:06:09.819748: step: 510/463, loss: 0.06790025532245636 2023-01-22 12:06:10.430007: step: 512/463, loss: 0.03640223667025566 2023-01-22 12:06:11.014219: step: 514/463, loss: 0.04309734329581261 2023-01-22 12:06:11.569487: step: 516/463, loss: 0.0029864616226404905 2023-01-22 12:06:12.200999: step: 518/463, loss: 0.13067884743213654 2023-01-22 12:06:12.919356: step: 520/463, loss: 0.009787706658244133 2023-01-22 12:06:13.474645: step: 522/463, loss: 0.010587071068584919 2023-01-22 12:06:14.065228: step: 524/463, loss: 0.03710310533642769 2023-01-22 12:06:14.664590: step: 526/463, loss: 0.008763926103711128 2023-01-22 12:06:15.278843: step: 528/463, loss: 0.05441180616617203 2023-01-22 12:06:15.850125: step: 530/463, loss: 0.06790116429328918 2023-01-22 12:06:16.445220: step: 532/463, loss: 0.04022807627916336 2023-01-22 12:06:17.112841: step: 534/463, loss: 0.037353623658418655 2023-01-22 12:06:17.746795: step: 536/463, loss: 0.07474343478679657 2023-01-22 12:06:18.326998: step: 538/463, loss: 0.01485541369765997 2023-01-22 12:06:18.990253: step: 540/463, loss: 0.07869409024715424 2023-01-22 12:06:19.689101: step: 542/463, loss: 0.018076535314321518 2023-01-22 12:06:20.276708: step: 544/463, loss: 0.013396735303103924 2023-01-22 12:06:20.923154: step: 546/463, loss: 0.019285796210169792 2023-01-22 12:06:21.535780: step: 548/463, loss: 0.003920732531696558 2023-01-22 12:06:22.156957: step: 550/463, loss: 0.34737080335617065 2023-01-22 12:06:22.728045: step: 552/463, loss: 0.006014332640916109 2023-01-22 12:06:23.353967: step: 554/463, loss: 0.06717444956302643 2023-01-22 12:06:23.986181: step: 556/463, loss: 0.007173856254667044 2023-01-22 12:06:24.647343: step: 558/463, loss: 0.026601284742355347 2023-01-22 12:06:25.300495: step: 560/463, loss: 0.14075852930545807 2023-01-22 12:06:25.935341: step: 562/463, loss: 0.4079218804836273 2023-01-22 12:06:26.613830: step: 564/463, loss: 0.01909816823899746 2023-01-22 12:06:27.223888: step: 566/463, loss: 0.09817704558372498 2023-01-22 12:06:27.835950: step: 568/463, loss: 0.006945817265659571 2023-01-22 12:06:28.462144: step: 570/463, loss: 0.033449966460466385 2023-01-22 12:06:29.155440: step: 572/463, loss: 0.08662756532430649 2023-01-22 12:06:29.744188: step: 574/463, loss: 0.04582451656460762 2023-01-22 12:06:30.323081: step: 576/463, loss: 0.09477768838405609 2023-01-22 12:06:30.954518: step: 578/463, loss: 0.042833440005779266 2023-01-22 12:06:31.578356: step: 580/463, loss: 0.012643870897591114 2023-01-22 12:06:32.132651: step: 582/463, loss: 0.03557109832763672 2023-01-22 12:06:32.699753: step: 584/463, loss: 0.007770916912704706 2023-01-22 12:06:33.290365: step: 586/463, loss: 0.011807246133685112 2023-01-22 12:06:33.949286: step: 588/463, loss: 0.11939933151006699 2023-01-22 12:06:34.586146: step: 590/463, loss: 0.016973499208688736 2023-01-22 12:06:35.163070: step: 592/463, loss: 0.01595240831375122 2023-01-22 12:06:35.806204: step: 594/463, loss: 0.01475546695291996 2023-01-22 12:06:36.309573: step: 596/463, loss: 0.022738425061106682 2023-01-22 12:06:36.902926: step: 598/463, loss: 0.00842673797160387 2023-01-22 12:06:37.570907: step: 600/463, loss: 0.04169292002916336 2023-01-22 12:06:38.230703: step: 602/463, loss: 0.023298628628253937 2023-01-22 12:06:38.877730: step: 604/463, loss: 0.008476962335407734 2023-01-22 12:06:39.439619: step: 606/463, loss: 0.037734705954790115 2023-01-22 12:06:40.058315: step: 608/463, loss: 0.03686380758881569 2023-01-22 12:06:40.674200: step: 610/463, loss: 0.24258795380592346 2023-01-22 12:06:41.269772: step: 612/463, loss: 0.009392541833221912 2023-01-22 12:06:41.959413: step: 614/463, loss: 0.03928908705711365 2023-01-22 12:06:42.615159: step: 616/463, loss: 0.05161019787192345 2023-01-22 12:06:43.255120: step: 618/463, loss: 0.04276500269770622 2023-01-22 12:06:43.815824: step: 620/463, loss: 0.01665964536368847 2023-01-22 12:06:44.406758: step: 622/463, loss: 0.02709885872900486 2023-01-22 12:06:44.941388: step: 624/463, loss: 0.003725471207872033 2023-01-22 12:06:45.520568: step: 626/463, loss: 0.007334047928452492 2023-01-22 12:06:46.152569: step: 628/463, loss: 0.06844276934862137 2023-01-22 12:06:46.724432: step: 630/463, loss: 0.0035458628553897142 2023-01-22 12:06:47.342871: step: 632/463, loss: 0.022977720946073532 2023-01-22 12:06:48.060051: step: 634/463, loss: 0.1353224813938141 2023-01-22 12:06:48.708165: step: 636/463, loss: 0.04656210541725159 2023-01-22 12:06:49.379733: step: 638/463, loss: 0.004388928879052401 2023-01-22 12:06:49.973406: step: 640/463, loss: 0.014378667809069157 2023-01-22 12:06:50.544043: step: 642/463, loss: 0.08394616842269897 2023-01-22 12:06:51.256886: step: 644/463, loss: 0.06383013725280762 2023-01-22 12:06:51.896453: step: 646/463, loss: 0.0629916712641716 2023-01-22 12:06:52.466166: step: 648/463, loss: 0.02984936162829399 2023-01-22 12:06:53.038082: step: 650/463, loss: 0.010352713987231255 2023-01-22 12:06:53.754916: step: 652/463, loss: 0.01131836324930191 2023-01-22 12:06:54.404509: step: 654/463, loss: 0.053480587899684906 2023-01-22 12:06:55.010845: step: 656/463, loss: 0.021115224808454514 2023-01-22 12:06:55.590724: step: 658/463, loss: 0.38840916752815247 2023-01-22 12:06:56.199301: step: 660/463, loss: 0.031535156071186066 2023-01-22 12:06:56.807562: step: 662/463, loss: 0.007785444613546133 2023-01-22 12:06:57.438335: step: 664/463, loss: 0.016515525057911873 2023-01-22 12:06:58.040140: step: 666/463, loss: 0.04285736009478569 2023-01-22 12:06:58.571396: step: 668/463, loss: 0.1247667446732521 2023-01-22 12:06:59.200507: step: 670/463, loss: 0.027118753641843796 2023-01-22 12:06:59.783829: step: 672/463, loss: 0.011570295318961143 2023-01-22 12:07:00.393013: step: 674/463, loss: 0.03898542374372482 2023-01-22 12:07:00.993821: step: 676/463, loss: 0.059527862817049026 2023-01-22 12:07:01.520253: step: 678/463, loss: 0.007822910323739052 2023-01-22 12:07:02.168721: step: 680/463, loss: 0.02750343270599842 2023-01-22 12:07:02.797285: step: 682/463, loss: 0.053470540791749954 2023-01-22 12:07:03.411620: step: 684/463, loss: 0.00998302735388279 2023-01-22 12:07:04.036577: step: 686/463, loss: 0.04914086312055588 2023-01-22 12:07:04.691859: step: 688/463, loss: 0.049978163093328476 2023-01-22 12:07:05.321077: step: 690/463, loss: 0.003041710937395692 2023-01-22 12:07:05.903604: step: 692/463, loss: 0.010977196507155895 2023-01-22 12:07:06.504240: step: 694/463, loss: 0.18136140704154968 2023-01-22 12:07:07.170812: step: 696/463, loss: 0.053340643644332886 2023-01-22 12:07:07.777964: step: 698/463, loss: 0.03233860060572624 2023-01-22 12:07:08.399998: step: 700/463, loss: 0.029896695166826248 2023-01-22 12:07:09.033181: step: 702/463, loss: 0.07791373133659363 2023-01-22 12:07:09.626641: step: 704/463, loss: 0.033302806317806244 2023-01-22 12:07:10.295365: step: 706/463, loss: 0.01567240059375763 2023-01-22 12:07:10.910854: step: 708/463, loss: 0.07613213360309601 2023-01-22 12:07:11.613001: step: 710/463, loss: 0.00995422713458538 2023-01-22 12:07:12.186581: step: 712/463, loss: 0.016865411773324013 2023-01-22 12:07:12.806483: step: 714/463, loss: 0.04179783910512924 2023-01-22 12:07:13.415406: step: 716/463, loss: 0.0659717470407486 2023-01-22 12:07:14.029468: step: 718/463, loss: 0.014638642780482769 2023-01-22 12:07:14.654144: step: 720/463, loss: 0.13202062249183655 2023-01-22 12:07:15.238154: step: 722/463, loss: 0.013809260912239552 2023-01-22 12:07:15.873235: step: 724/463, loss: 0.051346078515052795 2023-01-22 12:07:16.514251: step: 726/463, loss: 0.08912888914346695 2023-01-22 12:07:17.180597: step: 728/463, loss: 0.03262588009238243 2023-01-22 12:07:17.794872: step: 730/463, loss: 0.04851076379418373 2023-01-22 12:07:18.371977: step: 732/463, loss: 0.001667431672103703 2023-01-22 12:07:19.081028: step: 734/463, loss: 0.010037771426141262 2023-01-22 12:07:19.686776: step: 736/463, loss: 0.00821502972394228 2023-01-22 12:07:20.315962: step: 738/463, loss: 0.0088306600227952 2023-01-22 12:07:20.961338: step: 740/463, loss: 0.161117285490036 2023-01-22 12:07:21.597671: step: 742/463, loss: 0.07299044728279114 2023-01-22 12:07:22.290869: step: 744/463, loss: 0.01481643971055746 2023-01-22 12:07:22.944782: step: 746/463, loss: 0.07599938660860062 2023-01-22 12:07:23.625356: step: 748/463, loss: 0.07450322061777115 2023-01-22 12:07:24.189952: step: 750/463, loss: 0.00329537782818079 2023-01-22 12:07:24.807197: step: 752/463, loss: 0.4358955919742584 2023-01-22 12:07:25.407465: step: 754/463, loss: 0.17394433915615082 2023-01-22 12:07:25.953198: step: 756/463, loss: 0.05408826470375061 2023-01-22 12:07:26.609366: step: 758/463, loss: 0.1048794835805893 2023-01-22 12:07:27.243893: step: 760/463, loss: 0.4868112802505493 2023-01-22 12:07:27.870837: step: 762/463, loss: 0.07624334841966629 2023-01-22 12:07:28.487763: step: 764/463, loss: 0.04283759370446205 2023-01-22 12:07:29.080451: step: 766/463, loss: 0.0119438786059618 2023-01-22 12:07:29.705244: step: 768/463, loss: 0.07219856977462769 2023-01-22 12:07:30.260658: step: 770/463, loss: 0.026810215786099434 2023-01-22 12:07:30.845772: step: 772/463, loss: 0.011506829410791397 2023-01-22 12:07:31.463184: step: 774/463, loss: 0.07864825427532196 2023-01-22 12:07:32.071673: step: 776/463, loss: 0.04946066811680794 2023-01-22 12:07:32.649405: step: 778/463, loss: 0.007479182444512844 2023-01-22 12:07:33.288932: step: 780/463, loss: 0.023926807567477226 2023-01-22 12:07:33.880423: step: 782/463, loss: 0.07274968177080154 2023-01-22 12:07:34.453376: step: 784/463, loss: 0.00914521049708128 2023-01-22 12:07:35.033653: step: 786/463, loss: 0.021941477432847023 2023-01-22 12:07:35.605767: step: 788/463, loss: 0.020317258313298225 2023-01-22 12:07:36.190437: step: 790/463, loss: 0.028045807033777237 2023-01-22 12:07:36.876428: step: 792/463, loss: 0.07886797189712524 2023-01-22 12:07:37.623100: step: 794/463, loss: 0.028771137818694115 2023-01-22 12:07:38.299935: step: 796/463, loss: 0.09927427768707275 2023-01-22 12:07:38.852705: step: 798/463, loss: 0.027209876105189323 2023-01-22 12:07:39.489246: step: 800/463, loss: 0.02445671521127224 2023-01-22 12:07:40.099739: step: 802/463, loss: 0.04573247954249382 2023-01-22 12:07:40.744388: step: 804/463, loss: 0.045140888541936874 2023-01-22 12:07:41.357925: step: 806/463, loss: 0.00567480456084013 2023-01-22 12:07:42.016211: step: 808/463, loss: 0.017955539748072624 2023-01-22 12:07:42.638701: step: 810/463, loss: 0.03771361708641052 2023-01-22 12:07:43.254325: step: 812/463, loss: 0.05207766219973564 2023-01-22 12:07:43.837708: step: 814/463, loss: 0.49033501744270325 2023-01-22 12:07:44.396177: step: 816/463, loss: 0.09133315831422806 2023-01-22 12:07:44.964790: step: 818/463, loss: 0.04222963750362396 2023-01-22 12:07:45.594744: step: 820/463, loss: 0.012394051998853683 2023-01-22 12:07:46.130078: step: 822/463, loss: 0.06862370669841766 2023-01-22 12:07:46.755390: step: 824/463, loss: 0.3051982820034027 2023-01-22 12:07:47.383787: step: 826/463, loss: 0.02007221430540085 2023-01-22 12:07:48.009133: step: 828/463, loss: 0.01869572326540947 2023-01-22 12:07:48.621620: step: 830/463, loss: 2.292128801345825 2023-01-22 12:07:49.255154: step: 832/463, loss: 0.12170281261205673 2023-01-22 12:07:49.841947: step: 834/463, loss: 0.027322562411427498 2023-01-22 12:07:50.504740: step: 836/463, loss: 0.02945788949728012 2023-01-22 12:07:51.087289: step: 838/463, loss: 0.0032249148935079575 2023-01-22 12:07:51.661013: step: 840/463, loss: 0.1998729556798935 2023-01-22 12:07:52.305092: step: 842/463, loss: 0.01237794104963541 2023-01-22 12:07:52.873699: step: 844/463, loss: 0.051994867622852325 2023-01-22 12:07:53.450697: step: 846/463, loss: 0.030856935307383537 2023-01-22 12:07:54.077708: step: 848/463, loss: 0.025783058255910873 2023-01-22 12:07:54.700776: step: 850/463, loss: 0.3581508696079254 2023-01-22 12:07:55.351526: step: 852/463, loss: 0.6835121512413025 2023-01-22 12:07:55.949277: step: 854/463, loss: 0.04245099052786827 2023-01-22 12:07:56.590834: step: 856/463, loss: 0.006474511232227087 2023-01-22 12:07:57.181248: step: 858/463, loss: 0.0609377883374691 2023-01-22 12:07:57.735698: step: 860/463, loss: 0.025864077731966972 2023-01-22 12:07:58.326362: step: 862/463, loss: 0.006679542362689972 2023-01-22 12:07:58.930223: step: 864/463, loss: 0.014713131822645664 2023-01-22 12:07:59.523807: step: 866/463, loss: 0.028294630348682404 2023-01-22 12:08:00.181990: step: 868/463, loss: 0.0871676504611969 2023-01-22 12:08:00.715889: step: 870/463, loss: 0.004147926811128855 2023-01-22 12:08:01.298730: step: 872/463, loss: 0.038954958319664 2023-01-22 12:08:01.841176: step: 874/463, loss: 0.0804627537727356 2023-01-22 12:08:02.449719: step: 876/463, loss: 0.3137335181236267 2023-01-22 12:08:03.040003: step: 878/463, loss: 0.03683378919959068 2023-01-22 12:08:03.675096: step: 880/463, loss: 0.008188140578567982 2023-01-22 12:08:04.369698: step: 882/463, loss: 0.005750198848545551 2023-01-22 12:08:05.028937: step: 884/463, loss: 0.006674158852547407 2023-01-22 12:08:05.650946: step: 886/463, loss: 0.08540567010641098 2023-01-22 12:08:06.264806: step: 888/463, loss: 0.017784301191568375 2023-01-22 12:08:06.863534: step: 890/463, loss: 0.07429122179746628 2023-01-22 12:08:07.460725: step: 892/463, loss: 0.029044259339571 2023-01-22 12:08:08.123297: step: 894/463, loss: 0.04374846816062927 2023-01-22 12:08:08.816898: step: 896/463, loss: 0.020005352795124054 2023-01-22 12:08:09.454993: step: 898/463, loss: 0.01138212624937296 2023-01-22 12:08:10.058101: step: 900/463, loss: 0.13206884264945984 2023-01-22 12:08:10.614066: step: 902/463, loss: 0.0037485677748918533 2023-01-22 12:08:11.197438: step: 904/463, loss: 0.05094145983457565 2023-01-22 12:08:11.865443: step: 906/463, loss: 0.0736050233244896 2023-01-22 12:08:12.468672: step: 908/463, loss: 0.05084287375211716 2023-01-22 12:08:13.081298: step: 910/463, loss: 0.057781118899583817 2023-01-22 12:08:13.653019: step: 912/463, loss: 0.016191350296139717 2023-01-22 12:08:14.199949: step: 914/463, loss: 0.025147559121251106 2023-01-22 12:08:14.750973: step: 916/463, loss: 0.019232554361224174 2023-01-22 12:08:15.317902: step: 918/463, loss: 0.05542212352156639 2023-01-22 12:08:15.916904: step: 920/463, loss: 0.010585951618850231 2023-01-22 12:08:16.611422: step: 922/463, loss: 0.03650703653693199 2023-01-22 12:08:17.189673: step: 924/463, loss: 0.03709341958165169 2023-01-22 12:08:17.769006: step: 926/463, loss: 0.01807141862809658 ================================================== Loss: 0.096 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34965923036465635, 'r': 0.32909104034320596, 'f1': 0.3390634961111819}, 'combined': 0.24983626029244982, 'epoch': 24} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3370045852496824, 'r': 0.3970954763695706, 'f1': 0.36459061459079567}, 'combined': 0.28260133762540146, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31708680784496246, 'r': 0.34295916598031995, 'f1': 0.329515916994765}, 'combined': 0.24280120199614263, 'epoch': 24} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3252721398667122, 'r': 0.40659017483339027, 'f1': 0.3614134887407913}, 'combined': 0.2801386850526708, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3075627981514609, 'r': 0.32623833807716635, 'f1': 0.31662542203806016}, 'combined': 0.2333029425543601, 'epoch': 24} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32667259547365607, 'r': 0.40083448065931143, 'f1': 0.35997351626688473}, 'combined': 0.279022534139882, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2926829268292683, 'r': 0.34285714285714286, 'f1': 0.31578947368421056}, 'combined': 0.2105263157894737, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2857142857142857, 'r': 0.43478260869565216, 'f1': 0.3448275862068965}, 'combined': 0.17241379310344826, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.45588235294117646, 'r': 0.2672413793103448, 'f1': 0.33695652173913043}, 'combined': 0.2246376811594203, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:10:56.273998: step: 2/463, loss: 0.021109038963913918 2023-01-22 12:10:56.870318: step: 4/463, loss: 0.015032060444355011 2023-01-22 12:10:57.473194: step: 6/463, loss: 0.004206069279462099 2023-01-22 12:10:58.044993: step: 8/463, loss: 0.06405909359455109 2023-01-22 12:10:58.743364: step: 10/463, loss: 0.031621113419532776 2023-01-22 12:10:59.313505: step: 12/463, loss: 0.024908708408474922 2023-01-22 12:10:59.845029: step: 14/463, loss: 0.006067849230021238 2023-01-22 12:11:00.467000: step: 16/463, loss: 0.007873881608247757 2023-01-22 12:11:01.123559: step: 18/463, loss: 0.05631072819232941 2023-01-22 12:11:01.707796: step: 20/463, loss: 0.05310551077127457 2023-01-22 12:11:02.324807: step: 22/463, loss: 0.008314433507621288 2023-01-22 12:11:02.898006: step: 24/463, loss: 0.021149680018424988 2023-01-22 12:11:03.525072: step: 26/463, loss: 0.03197513520717621 2023-01-22 12:11:04.151141: step: 28/463, loss: 0.037717562168836594 2023-01-22 12:11:04.785849: step: 30/463, loss: 0.07420859485864639 2023-01-22 12:11:05.366528: step: 32/463, loss: 0.018345043063163757 2023-01-22 12:11:05.925559: step: 34/463, loss: 0.003830455709248781 2023-01-22 12:11:06.548314: step: 36/463, loss: 0.05385967344045639 2023-01-22 12:11:07.281524: step: 38/463, loss: 0.00831892341375351 2023-01-22 12:11:07.868151: step: 40/463, loss: 0.0027974292170256376 2023-01-22 12:11:08.518125: step: 42/463, loss: 0.01534788403660059 2023-01-22 12:11:09.309051: step: 44/463, loss: 0.0005573664093390107 2023-01-22 12:11:09.905525: step: 46/463, loss: 0.010175914503633976 2023-01-22 12:11:10.538897: step: 48/463, loss: 0.08881822228431702 2023-01-22 12:11:11.103149: step: 50/463, loss: 0.02033882401883602 2023-01-22 12:11:11.672583: step: 52/463, loss: 0.016143348067998886 2023-01-22 12:11:12.291938: step: 54/463, loss: 0.7632724046707153 2023-01-22 12:11:12.889207: step: 56/463, loss: 0.14287510514259338 2023-01-22 12:11:13.471664: step: 58/463, loss: 0.10106900334358215 2023-01-22 12:11:14.114375: step: 60/463, loss: 0.23967446386814117 2023-01-22 12:11:14.748173: step: 62/463, loss: 0.047041356563568115 2023-01-22 12:11:15.323356: step: 64/463, loss: 0.005142926704138517 2023-01-22 12:11:15.975171: step: 66/463, loss: 0.013033051043748856 2023-01-22 12:11:16.648700: step: 68/463, loss: 0.015337551012635231 2023-01-22 12:11:17.228577: step: 70/463, loss: 0.0034709018655121326 2023-01-22 12:11:17.840137: step: 72/463, loss: 0.024042075499892235 2023-01-22 12:11:18.402513: step: 74/463, loss: 0.12544138729572296 2023-01-22 12:11:18.994934: step: 76/463, loss: 0.080272376537323 2023-01-22 12:11:19.552567: step: 78/463, loss: 0.006036247126758099 2023-01-22 12:11:20.132298: step: 80/463, loss: 0.06610135734081268 2023-01-22 12:11:20.764205: step: 82/463, loss: 0.0021496752742677927 2023-01-22 12:11:21.397558: step: 84/463, loss: 0.014056055806577206 2023-01-22 12:11:22.028605: step: 86/463, loss: 0.1626308709383011 2023-01-22 12:11:22.597140: step: 88/463, loss: 0.03438613563776016 2023-01-22 12:11:23.199391: step: 90/463, loss: 0.001762519939802587 2023-01-22 12:11:23.766199: step: 92/463, loss: 0.34651169180870056 2023-01-22 12:11:24.422043: step: 94/463, loss: 0.0530976839363575 2023-01-22 12:11:24.987459: step: 96/463, loss: 0.006734954193234444 2023-01-22 12:11:25.660009: step: 98/463, loss: 0.03821629285812378 2023-01-22 12:11:26.243472: step: 100/463, loss: 0.03260607272386551 2023-01-22 12:11:26.902127: step: 102/463, loss: 0.005948987323790789 2023-01-22 12:11:27.481911: step: 104/463, loss: 0.006378699094057083 2023-01-22 12:11:28.109318: step: 106/463, loss: 0.018693476915359497 2023-01-22 12:11:28.714851: step: 108/463, loss: 0.32867875695228577 2023-01-22 12:11:29.340764: step: 110/463, loss: 0.05938457325100899 2023-01-22 12:11:29.941545: step: 112/463, loss: 0.19775789976119995 2023-01-22 12:11:30.617039: step: 114/463, loss: 0.041698336601257324 2023-01-22 12:11:31.199445: step: 116/463, loss: 0.00804909598082304 2023-01-22 12:11:31.843406: step: 118/463, loss: 0.003847773652523756 2023-01-22 12:11:32.473536: step: 120/463, loss: 0.02876480296254158 2023-01-22 12:11:33.122892: step: 122/463, loss: 0.08666608482599258 2023-01-22 12:11:33.792616: step: 124/463, loss: 0.01279232744127512 2023-01-22 12:11:34.384672: step: 126/463, loss: 0.004803666844964027 2023-01-22 12:11:35.026555: step: 128/463, loss: 0.05707241967320442 2023-01-22 12:11:35.621354: step: 130/463, loss: 0.007230275776237249 2023-01-22 12:11:36.259033: step: 132/463, loss: 0.01328516285866499 2023-01-22 12:11:36.853083: step: 134/463, loss: 0.017243817448616028 2023-01-22 12:11:37.497618: step: 136/463, loss: 0.08714798837900162 2023-01-22 12:11:38.081583: step: 138/463, loss: 0.008012217469513416 2023-01-22 12:11:38.689792: step: 140/463, loss: 0.012429896742105484 2023-01-22 12:11:39.336916: step: 142/463, loss: 0.04306786507368088 2023-01-22 12:11:39.961193: step: 144/463, loss: 0.014547363854944706 2023-01-22 12:11:40.595523: step: 146/463, loss: 0.04231032729148865 2023-01-22 12:11:41.221989: step: 148/463, loss: 0.019984044134616852 2023-01-22 12:11:41.808356: step: 150/463, loss: 0.08253974467515945 2023-01-22 12:11:42.358694: step: 152/463, loss: 0.0031620506197214127 2023-01-22 12:11:42.995333: step: 154/463, loss: 0.21046596765518188 2023-01-22 12:11:43.631038: step: 156/463, loss: 0.36317431926727295 2023-01-22 12:11:44.255318: step: 158/463, loss: 0.014552735723555088 2023-01-22 12:11:44.871170: step: 160/463, loss: 0.03379379212856293 2023-01-22 12:11:45.421317: step: 162/463, loss: 0.007758683990687132 2023-01-22 12:11:46.036024: step: 164/463, loss: 0.037862636148929596 2023-01-22 12:11:46.583628: step: 166/463, loss: 0.11231420934200287 2023-01-22 12:11:47.167488: step: 168/463, loss: 0.7467936873435974 2023-01-22 12:11:47.754099: step: 170/463, loss: 0.036791544407606125 2023-01-22 12:11:48.378833: step: 172/463, loss: 0.06604337692260742 2023-01-22 12:11:49.008761: step: 174/463, loss: 0.05442347005009651 2023-01-22 12:11:49.664408: step: 176/463, loss: 0.018999068066477776 2023-01-22 12:11:50.248995: step: 178/463, loss: 0.027576731517910957 2023-01-22 12:11:50.931968: step: 180/463, loss: 0.013368839398026466 2023-01-22 12:11:51.460175: step: 182/463, loss: 0.03161909058690071 2023-01-22 12:11:52.236180: step: 184/463, loss: 0.39545440673828125 2023-01-22 12:11:52.846483: step: 186/463, loss: 0.024257712066173553 2023-01-22 12:11:53.447854: step: 188/463, loss: 0.013289108872413635 2023-01-22 12:11:54.077271: step: 190/463, loss: 0.02358366549015045 2023-01-22 12:11:54.716916: step: 192/463, loss: 0.023323222994804382 2023-01-22 12:11:55.324602: step: 194/463, loss: 0.03617088869214058 2023-01-22 12:11:55.901952: step: 196/463, loss: 0.4803995192050934 2023-01-22 12:11:56.521167: step: 198/463, loss: 0.012015839107334614 2023-01-22 12:11:57.125181: step: 200/463, loss: 0.009398750960826874 2023-01-22 12:11:57.727790: step: 202/463, loss: 0.0317588709294796 2023-01-22 12:11:58.358505: step: 204/463, loss: 0.026876982301473618 2023-01-22 12:11:58.993130: step: 206/463, loss: 0.014062246307730675 2023-01-22 12:11:59.571578: step: 208/463, loss: 0.01816861890256405 2023-01-22 12:12:00.213905: step: 210/463, loss: 0.005815466400235891 2023-01-22 12:12:00.832920: step: 212/463, loss: 0.0068742139264941216 2023-01-22 12:12:01.489001: step: 214/463, loss: 0.10584881156682968 2023-01-22 12:12:02.075511: step: 216/463, loss: 0.0007289816858246922 2023-01-22 12:12:02.634769: step: 218/463, loss: 0.004263969603925943 2023-01-22 12:12:03.247269: step: 220/463, loss: 0.24089030921459198 2023-01-22 12:12:03.788235: step: 222/463, loss: 0.003271252615377307 2023-01-22 12:12:04.439849: step: 224/463, loss: 0.007085755467414856 2023-01-22 12:12:05.096882: step: 226/463, loss: 0.05333036184310913 2023-01-22 12:12:05.724718: step: 228/463, loss: 0.019386721774935722 2023-01-22 12:12:06.333298: step: 230/463, loss: 0.005321658216416836 2023-01-22 12:12:06.910612: step: 232/463, loss: 0.075748972594738 2023-01-22 12:12:07.459546: step: 234/463, loss: 0.005093716084957123 2023-01-22 12:12:08.090320: step: 236/463, loss: 0.03959015756845474 2023-01-22 12:12:08.648686: step: 238/463, loss: 0.03758808225393295 2023-01-22 12:12:09.251759: step: 240/463, loss: 0.023736048489809036 2023-01-22 12:12:09.934653: step: 242/463, loss: 0.007279905490577221 2023-01-22 12:12:10.502658: step: 244/463, loss: 0.01607891544699669 2023-01-22 12:12:11.133922: step: 246/463, loss: 0.052973464131355286 2023-01-22 12:12:11.714827: step: 248/463, loss: 0.1929202824831009 2023-01-22 12:12:12.302910: step: 250/463, loss: 0.009210491552948952 2023-01-22 12:12:12.909611: step: 252/463, loss: 0.012920624576508999 2023-01-22 12:12:13.529296: step: 254/463, loss: 0.062179360538721085 2023-01-22 12:12:14.176059: step: 256/463, loss: 0.03255145251750946 2023-01-22 12:12:14.751185: step: 258/463, loss: 0.03749460354447365 2023-01-22 12:12:15.344582: step: 260/463, loss: 0.05946195498108864 2023-01-22 12:12:15.943163: step: 262/463, loss: 0.014497810043394566 2023-01-22 12:12:16.518025: step: 264/463, loss: 0.007724605035036802 2023-01-22 12:12:17.150025: step: 266/463, loss: 0.015686891973018646 2023-01-22 12:12:17.839357: step: 268/463, loss: 0.0695074126124382 2023-01-22 12:12:18.498237: step: 270/463, loss: 0.07650492340326309 2023-01-22 12:12:19.028539: step: 272/463, loss: 0.01807987131178379 2023-01-22 12:12:19.681416: step: 274/463, loss: 0.04756436124444008 2023-01-22 12:12:20.295135: step: 276/463, loss: 0.055589038878679276 2023-01-22 12:12:20.869144: step: 278/463, loss: 0.054017044603824615 2023-01-22 12:12:21.470730: step: 280/463, loss: 0.06430082768201828 2023-01-22 12:12:22.086130: step: 282/463, loss: 0.006243611220270395 2023-01-22 12:12:22.661280: step: 284/463, loss: 0.05633990466594696 2023-01-22 12:12:23.328874: step: 286/463, loss: 0.005521700717508793 2023-01-22 12:12:23.925048: step: 288/463, loss: 0.04734700173139572 2023-01-22 12:12:24.540205: step: 290/463, loss: 0.0034006533678621054 2023-01-22 12:12:25.196952: step: 292/463, loss: 0.032506439834833145 2023-01-22 12:12:25.831197: step: 294/463, loss: 0.010944775305688381 2023-01-22 12:12:26.394115: step: 296/463, loss: 0.21538224816322327 2023-01-22 12:12:26.999705: step: 298/463, loss: 0.002352335723116994 2023-01-22 12:12:27.604975: step: 300/463, loss: 0.018740560859441757 2023-01-22 12:12:28.260875: step: 302/463, loss: 0.03530663996934891 2023-01-22 12:12:28.856502: step: 304/463, loss: 0.015676328912377357 2023-01-22 12:12:29.522774: step: 306/463, loss: 0.018409233540296555 2023-01-22 12:12:30.152647: step: 308/463, loss: 0.006620985455811024 2023-01-22 12:12:30.802742: step: 310/463, loss: 0.2368318885564804 2023-01-22 12:12:31.382561: step: 312/463, loss: 0.08788546919822693 2023-01-22 12:12:31.996829: step: 314/463, loss: 0.49794620275497437 2023-01-22 12:12:32.579100: step: 316/463, loss: 0.1598878800868988 2023-01-22 12:12:33.249154: step: 318/463, loss: 0.024894490838050842 2023-01-22 12:12:33.888481: step: 320/463, loss: 0.004775006789714098 2023-01-22 12:12:34.548171: step: 322/463, loss: 0.08404680341482162 2023-01-22 12:12:35.145775: step: 324/463, loss: 0.0188343096524477 2023-01-22 12:12:35.712639: step: 326/463, loss: 0.10189911723136902 2023-01-22 12:12:36.404688: step: 328/463, loss: 1.1384214162826538 2023-01-22 12:12:37.106403: step: 330/463, loss: 0.12365762144327164 2023-01-22 12:12:37.746563: step: 332/463, loss: 0.01701941154897213 2023-01-22 12:12:38.335108: step: 334/463, loss: 0.13388878107070923 2023-01-22 12:12:38.896546: step: 336/463, loss: 0.018471360206604004 2023-01-22 12:12:39.482748: step: 338/463, loss: 0.002054607030004263 2023-01-22 12:12:40.107863: step: 340/463, loss: 0.11944381892681122 2023-01-22 12:12:40.648914: step: 342/463, loss: 0.0077651264145970345 2023-01-22 12:12:41.285315: step: 344/463, loss: 0.020249146968126297 2023-01-22 12:12:41.880192: step: 346/463, loss: 0.022314513102173805 2023-01-22 12:12:42.474712: step: 348/463, loss: 0.04259047284722328 2023-01-22 12:12:43.024050: step: 350/463, loss: 0.025952594354748726 2023-01-22 12:12:43.708782: step: 352/463, loss: 0.06064560264348984 2023-01-22 12:12:44.290804: step: 354/463, loss: 0.021432967856526375 2023-01-22 12:12:44.973087: step: 356/463, loss: 0.055798523128032684 2023-01-22 12:12:45.607107: step: 358/463, loss: 0.008654449135065079 2023-01-22 12:12:46.223786: step: 360/463, loss: 0.017986932769417763 2023-01-22 12:12:46.831884: step: 362/463, loss: 0.017297813668847084 2023-01-22 12:12:47.445471: step: 364/463, loss: 0.06619615107774734 2023-01-22 12:12:48.007425: step: 366/463, loss: 0.07104445993900299 2023-01-22 12:12:48.634716: step: 368/463, loss: 0.0013265646994113922 2023-01-22 12:12:49.289955: step: 370/463, loss: 0.0579276867210865 2023-01-22 12:12:49.860718: step: 372/463, loss: 0.29696202278137207 2023-01-22 12:12:50.508913: step: 374/463, loss: 0.011779862456023693 2023-01-22 12:12:51.130498: step: 376/463, loss: 0.0030276374891400337 2023-01-22 12:12:51.753254: step: 378/463, loss: 0.04143163561820984 2023-01-22 12:12:52.364065: step: 380/463, loss: 0.046900972723960876 2023-01-22 12:12:52.932759: step: 382/463, loss: 0.009964827448129654 2023-01-22 12:12:53.550566: step: 384/463, loss: 0.05939556285738945 2023-01-22 12:12:54.127491: step: 386/463, loss: 0.018957680091261864 2023-01-22 12:12:54.674959: step: 388/463, loss: 0.033746786415576935 2023-01-22 12:12:55.282509: step: 390/463, loss: 0.15031953155994415 2023-01-22 12:12:55.848273: step: 392/463, loss: 0.063644178211689 2023-01-22 12:12:56.500014: step: 394/463, loss: 0.03243599832057953 2023-01-22 12:12:57.068296: step: 396/463, loss: 0.1770835518836975 2023-01-22 12:12:57.677858: step: 398/463, loss: 0.010382718406617641 2023-01-22 12:12:58.235609: step: 400/463, loss: 0.06889568269252777 2023-01-22 12:12:58.890272: step: 402/463, loss: 0.02138284407556057 2023-01-22 12:12:59.568859: step: 404/463, loss: 0.11205138266086578 2023-01-22 12:13:00.140780: step: 406/463, loss: 0.0032653072848916054 2023-01-22 12:13:00.761645: step: 408/463, loss: 0.00014596592518500984 2023-01-22 12:13:01.422379: step: 410/463, loss: 0.04450834542512894 2023-01-22 12:13:02.044047: step: 412/463, loss: 0.06521690636873245 2023-01-22 12:13:02.663426: step: 414/463, loss: 0.00714716874063015 2023-01-22 12:13:03.299237: step: 416/463, loss: 0.0194182638078928 2023-01-22 12:13:03.912135: step: 418/463, loss: 0.0040181344375014305 2023-01-22 12:13:04.508321: step: 420/463, loss: 0.012683051638305187 2023-01-22 12:13:05.082545: step: 422/463, loss: 0.013107290491461754 2023-01-22 12:13:05.697995: step: 424/463, loss: 0.013452489860355854 2023-01-22 12:13:06.244639: step: 426/463, loss: 0.0151299349963665 2023-01-22 12:13:06.817884: step: 428/463, loss: 0.011891954578459263 2023-01-22 12:13:07.361465: step: 430/463, loss: 0.04470183327794075 2023-01-22 12:13:07.988941: step: 432/463, loss: 0.02655380591750145 2023-01-22 12:13:08.572780: step: 434/463, loss: 0.0958181768655777 2023-01-22 12:13:09.161112: step: 436/463, loss: 0.018091343343257904 2023-01-22 12:13:09.748331: step: 438/463, loss: 0.044184423983097076 2023-01-22 12:13:10.332419: step: 440/463, loss: 0.014248855412006378 2023-01-22 12:13:10.864360: step: 442/463, loss: 0.01624976098537445 2023-01-22 12:13:11.441600: step: 444/463, loss: 0.04363221302628517 2023-01-22 12:13:12.072300: step: 446/463, loss: 0.00930438656359911 2023-01-22 12:13:12.703498: step: 448/463, loss: 0.047498274594545364 2023-01-22 12:13:13.276200: step: 450/463, loss: 0.017569968476891518 2023-01-22 12:13:13.855471: step: 452/463, loss: 0.012686186470091343 2023-01-22 12:13:14.467639: step: 454/463, loss: 0.003199036465957761 2023-01-22 12:13:15.063605: step: 456/463, loss: 0.02025398425757885 2023-01-22 12:13:15.665075: step: 458/463, loss: 0.010163580998778343 2023-01-22 12:13:16.293275: step: 460/463, loss: 0.11155115813016891 2023-01-22 12:13:16.974004: step: 462/463, loss: 0.023705290630459785 2023-01-22 12:13:17.522037: step: 464/463, loss: 0.012525953352451324 2023-01-22 12:13:18.136455: step: 466/463, loss: 0.025331133976578712 2023-01-22 12:13:18.750575: step: 468/463, loss: 0.012545017525553703 2023-01-22 12:13:19.404440: step: 470/463, loss: 0.0689067617058754 2023-01-22 12:13:19.995227: step: 472/463, loss: 0.0471944734454155 2023-01-22 12:13:20.610340: step: 474/463, loss: 0.04190956801176071 2023-01-22 12:13:21.232058: step: 476/463, loss: 0.03935954347252846 2023-01-22 12:13:21.892179: step: 478/463, loss: 0.8625763058662415 2023-01-22 12:13:22.459344: step: 480/463, loss: 0.058481764048337936 2023-01-22 12:13:23.059856: step: 482/463, loss: 0.06489741802215576 2023-01-22 12:13:23.663322: step: 484/463, loss: 0.032589737325906754 2023-01-22 12:13:24.250311: step: 486/463, loss: 0.006202941294759512 2023-01-22 12:13:24.798465: step: 488/463, loss: 0.04396497458219528 2023-01-22 12:13:25.432301: step: 490/463, loss: 0.012699656188488007 2023-01-22 12:13:26.030154: step: 492/463, loss: 0.005220841150730848 2023-01-22 12:13:26.586475: step: 494/463, loss: 0.09663787484169006 2023-01-22 12:13:27.209818: step: 496/463, loss: 0.04988407343626022 2023-01-22 12:13:27.868202: step: 498/463, loss: 0.020586606115102768 2023-01-22 12:13:28.432905: step: 500/463, loss: 0.01781546324491501 2023-01-22 12:13:29.081124: step: 502/463, loss: 0.01299281232059002 2023-01-22 12:13:29.702332: step: 504/463, loss: 0.015273337252438068 2023-01-22 12:13:30.281941: step: 506/463, loss: 0.1717660129070282 2023-01-22 12:13:30.881511: step: 508/463, loss: 0.1285363733768463 2023-01-22 12:13:31.449130: step: 510/463, loss: 0.03423415124416351 2023-01-22 12:13:32.144782: step: 512/463, loss: 0.014066099189221859 2023-01-22 12:13:32.748264: step: 514/463, loss: 0.03062060847878456 2023-01-22 12:13:33.319786: step: 516/463, loss: 0.042481083422899246 2023-01-22 12:13:33.968848: step: 518/463, loss: 0.04198916628956795 2023-01-22 12:13:34.569227: step: 520/463, loss: 0.004535003565251827 2023-01-22 12:13:35.185645: step: 522/463, loss: 0.007059819996356964 2023-01-22 12:13:35.769639: step: 524/463, loss: 0.027343953028321266 2023-01-22 12:13:36.420612: step: 526/463, loss: 0.030763663351535797 2023-01-22 12:13:37.013855: step: 528/463, loss: 0.08413953334093094 2023-01-22 12:13:37.628637: step: 530/463, loss: 0.002163316821679473 2023-01-22 12:13:38.290062: step: 532/463, loss: 0.041637860238552094 2023-01-22 12:13:38.895883: step: 534/463, loss: 0.19003233313560486 2023-01-22 12:13:39.488492: step: 536/463, loss: 0.04156087338924408 2023-01-22 12:13:40.090172: step: 538/463, loss: 0.02915462851524353 2023-01-22 12:13:40.685411: step: 540/463, loss: 0.000522002053912729 2023-01-22 12:13:41.297654: step: 542/463, loss: 0.1276184618473053 2023-01-22 12:13:41.921655: step: 544/463, loss: 0.0008053510682657361 2023-01-22 12:13:42.538033: step: 546/463, loss: 0.0077027203515172005 2023-01-22 12:13:43.132051: step: 548/463, loss: 0.042398128658533096 2023-01-22 12:13:43.771997: step: 550/463, loss: 0.019852623343467712 2023-01-22 12:13:44.356956: step: 552/463, loss: 0.6999385952949524 2023-01-22 12:13:44.953793: step: 554/463, loss: 0.026163244619965553 2023-01-22 12:13:45.496247: step: 556/463, loss: 0.019119925796985626 2023-01-22 12:13:46.092491: step: 558/463, loss: 0.03396812453866005 2023-01-22 12:13:46.768123: step: 560/463, loss: 0.02410472184419632 2023-01-22 12:13:47.411617: step: 562/463, loss: 0.37036454677581787 2023-01-22 12:13:48.009971: step: 564/463, loss: 0.05091991648077965 2023-01-22 12:13:48.674766: step: 566/463, loss: 0.046076055616140366 2023-01-22 12:13:49.291145: step: 568/463, loss: 0.017489347606897354 2023-01-22 12:13:49.859180: step: 570/463, loss: 0.0038825231604278088 2023-01-22 12:13:50.425416: step: 572/463, loss: 0.19779585301876068 2023-01-22 12:13:51.019960: step: 574/463, loss: 1.21391761302948 2023-01-22 12:13:51.623907: step: 576/463, loss: 0.11828425526618958 2023-01-22 12:13:52.285209: step: 578/463, loss: 0.038361556828022 2023-01-22 12:13:52.942865: step: 580/463, loss: 0.11542002856731415 2023-01-22 12:13:53.511981: step: 582/463, loss: 0.4270938038825989 2023-01-22 12:13:54.142556: step: 584/463, loss: 0.02628210000693798 2023-01-22 12:13:54.690794: step: 586/463, loss: 0.006752349901944399 2023-01-22 12:13:55.331125: step: 588/463, loss: 0.0343557707965374 2023-01-22 12:13:55.977438: step: 590/463, loss: 0.013685652986168861 2023-01-22 12:13:56.601364: step: 592/463, loss: 0.1740642935037613 2023-01-22 12:13:57.215874: step: 594/463, loss: 0.40346136689186096 2023-01-22 12:13:57.847795: step: 596/463, loss: 0.01499965600669384 2023-01-22 12:13:58.460834: step: 598/463, loss: 0.049805257469415665 2023-01-22 12:13:59.128441: step: 600/463, loss: 0.017432285472750664 2023-01-22 12:13:59.715454: step: 602/463, loss: 0.05167895182967186 2023-01-22 12:14:00.304874: step: 604/463, loss: 0.08549889177083969 2023-01-22 12:14:01.015394: step: 606/463, loss: 0.027763158082962036 2023-01-22 12:14:01.617984: step: 608/463, loss: 0.18631170690059662 2023-01-22 12:14:02.182647: step: 610/463, loss: 0.019247034564614296 2023-01-22 12:14:02.838682: step: 612/463, loss: 0.012513337656855583 2023-01-22 12:14:03.421748: step: 614/463, loss: 0.09164801239967346 2023-01-22 12:14:04.006173: step: 616/463, loss: 0.01599210686981678 2023-01-22 12:14:04.710525: step: 618/463, loss: 0.01972571574151516 2023-01-22 12:14:05.273961: step: 620/463, loss: 0.05408007279038429 2023-01-22 12:14:05.880901: step: 622/463, loss: 0.016336945816874504 2023-01-22 12:14:06.497088: step: 624/463, loss: 0.002914158161729574 2023-01-22 12:14:07.079936: step: 626/463, loss: 0.03858227655291557 2023-01-22 12:14:07.755222: step: 628/463, loss: 0.0351007878780365 2023-01-22 12:14:08.327480: step: 630/463, loss: 0.038395386189222336 2023-01-22 12:14:08.934063: step: 632/463, loss: 0.07069990783929825 2023-01-22 12:14:09.509424: step: 634/463, loss: 0.10475531220436096 2023-01-22 12:14:10.135733: step: 636/463, loss: 6.333949565887451 2023-01-22 12:14:10.754816: step: 638/463, loss: 0.04134509339928627 2023-01-22 12:14:11.423094: step: 640/463, loss: 0.04865711182355881 2023-01-22 12:14:12.070463: step: 642/463, loss: 0.019447248429059982 2023-01-22 12:14:12.661351: step: 644/463, loss: 0.01640874147415161 2023-01-22 12:14:13.295275: step: 646/463, loss: 0.015553612262010574 2023-01-22 12:14:13.856512: step: 648/463, loss: 0.004172757733613253 2023-01-22 12:14:14.446781: step: 650/463, loss: 0.0319741889834404 2023-01-22 12:14:14.982632: step: 652/463, loss: 0.02179543673992157 2023-01-22 12:14:15.667106: step: 654/463, loss: 0.014115767553448677 2023-01-22 12:14:16.257495: step: 656/463, loss: 0.0225237924605608 2023-01-22 12:14:16.874311: step: 658/463, loss: 0.05737101659178734 2023-01-22 12:14:17.509073: step: 660/463, loss: 0.09486011415719986 2023-01-22 12:14:18.125240: step: 662/463, loss: 0.04127555713057518 2023-01-22 12:14:18.733849: step: 664/463, loss: 0.019895924255251884 2023-01-22 12:14:19.322082: step: 666/463, loss: 0.0013534717727452517 2023-01-22 12:14:19.905798: step: 668/463, loss: 0.01699584722518921 2023-01-22 12:14:20.503537: step: 670/463, loss: 0.026871955022215843 2023-01-22 12:14:21.114297: step: 672/463, loss: 0.021039508283138275 2023-01-22 12:14:21.739860: step: 674/463, loss: 0.42728284001350403 2023-01-22 12:14:22.343751: step: 676/463, loss: 0.38508719205856323 2023-01-22 12:14:22.900377: step: 678/463, loss: 0.01717173308134079 2023-01-22 12:14:23.454108: step: 680/463, loss: 0.032114140689373016 2023-01-22 12:14:24.063183: step: 682/463, loss: 0.03041275404393673 2023-01-22 12:14:24.688009: step: 684/463, loss: 0.15438437461853027 2023-01-22 12:14:25.255948: step: 686/463, loss: 0.05008997395634651 2023-01-22 12:14:25.834984: step: 688/463, loss: 0.006459483411163092 2023-01-22 12:14:26.547998: step: 690/463, loss: 0.014323408715426922 2023-01-22 12:14:27.150239: step: 692/463, loss: 0.05100478231906891 2023-01-22 12:14:27.767235: step: 694/463, loss: 0.01353941299021244 2023-01-22 12:14:28.341700: step: 696/463, loss: 0.06035999208688736 2023-01-22 12:14:28.937300: step: 698/463, loss: 0.264396607875824 2023-01-22 12:14:29.583022: step: 700/463, loss: 1.3326385021209717 2023-01-22 12:14:30.198481: step: 702/463, loss: 0.01582072116434574 2023-01-22 12:14:30.804383: step: 704/463, loss: 0.04864613711833954 2023-01-22 12:14:31.401254: step: 706/463, loss: 0.07334981113672256 2023-01-22 12:14:32.017492: step: 708/463, loss: 0.066672183573246 2023-01-22 12:14:32.620950: step: 710/463, loss: 0.10230937600135803 2023-01-22 12:14:33.272850: step: 712/463, loss: 0.05611540749669075 2023-01-22 12:14:33.882359: step: 714/463, loss: 0.0199806597083807 2023-01-22 12:14:34.485298: step: 716/463, loss: 0.0004946466651745141 2023-01-22 12:14:35.066355: step: 718/463, loss: 0.008347051218152046 2023-01-22 12:14:35.688018: step: 720/463, loss: 0.03771701082587242 2023-01-22 12:14:36.290856: step: 722/463, loss: 0.04547347500920296 2023-01-22 12:14:36.898691: step: 724/463, loss: 0.008449878543615341 2023-01-22 12:14:37.506147: step: 726/463, loss: 0.07088819146156311 2023-01-22 12:14:38.102827: step: 728/463, loss: 0.0038683980237692595 2023-01-22 12:14:38.745729: step: 730/463, loss: 0.022889219224452972 2023-01-22 12:14:39.379536: step: 732/463, loss: 0.03200816363096237 2023-01-22 12:14:39.942927: step: 734/463, loss: 0.08053930848836899 2023-01-22 12:14:40.625297: step: 736/463, loss: 0.10302331298589706 2023-01-22 12:14:41.269878: step: 738/463, loss: 0.012389500625431538 2023-01-22 12:14:41.961774: step: 740/463, loss: 0.09445179253816605 2023-01-22 12:14:42.471724: step: 742/463, loss: 0.011306442320346832 2023-01-22 12:14:43.105533: step: 744/463, loss: 0.027445048093795776 2023-01-22 12:14:43.775818: step: 746/463, loss: 0.0002367985580349341 2023-01-22 12:14:44.424988: step: 748/463, loss: 0.03331078588962555 2023-01-22 12:14:45.123594: step: 750/463, loss: 0.07283668965101242 2023-01-22 12:14:45.673690: step: 752/463, loss: 0.011207148432731628 2023-01-22 12:14:46.275801: step: 754/463, loss: 0.05406389385461807 2023-01-22 12:14:46.861672: step: 756/463, loss: 0.06374567747116089 2023-01-22 12:14:47.439145: step: 758/463, loss: 0.11186698079109192 2023-01-22 12:14:48.018296: step: 760/463, loss: 0.0008981148712337017 2023-01-22 12:14:48.689866: step: 762/463, loss: 0.03608124330639839 2023-01-22 12:14:49.274057: step: 764/463, loss: 0.062166716903448105 2023-01-22 12:14:49.898977: step: 766/463, loss: 0.010939487256109715 2023-01-22 12:14:50.561987: step: 768/463, loss: 0.025811409577727318 2023-01-22 12:14:51.118588: step: 770/463, loss: 0.006605848204344511 2023-01-22 12:14:51.655509: step: 772/463, loss: 0.023401781916618347 2023-01-22 12:14:52.269705: step: 774/463, loss: 0.06945550441741943 2023-01-22 12:14:52.802952: step: 776/463, loss: 0.005378463305532932 2023-01-22 12:14:53.378334: step: 778/463, loss: 0.0723014697432518 2023-01-22 12:14:54.006736: step: 780/463, loss: 0.015718448907136917 2023-01-22 12:14:54.555114: step: 782/463, loss: 0.007162286899983883 2023-01-22 12:14:55.161059: step: 784/463, loss: 0.13311530649662018 2023-01-22 12:14:55.797754: step: 786/463, loss: 0.0475311279296875 2023-01-22 12:14:56.363863: step: 788/463, loss: 0.5126577615737915 2023-01-22 12:14:56.958807: step: 790/463, loss: 0.014258397743105888 2023-01-22 12:14:57.581969: step: 792/463, loss: 0.014141318388283253 2023-01-22 12:14:58.163679: step: 794/463, loss: 0.00015304111002478749 2023-01-22 12:14:58.768113: step: 796/463, loss: 0.5294798016548157 2023-01-22 12:14:59.350700: step: 798/463, loss: 0.10145343095064163 2023-01-22 12:15:00.011797: step: 800/463, loss: 0.0023837059270590544 2023-01-22 12:15:00.548981: step: 802/463, loss: 0.021642489358782768 2023-01-22 12:15:01.240182: step: 804/463, loss: 0.095314159989357 2023-01-22 12:15:01.885334: step: 806/463, loss: 0.036659009754657745 2023-01-22 12:15:02.506147: step: 808/463, loss: 0.035530366003513336 2023-01-22 12:15:03.113999: step: 810/463, loss: 0.11219654977321625 2023-01-22 12:15:03.742606: step: 812/463, loss: 0.011211954988539219 2023-01-22 12:15:04.325198: step: 814/463, loss: 0.06999460607767105 2023-01-22 12:15:04.975293: step: 816/463, loss: 0.670013964176178 2023-01-22 12:15:05.630346: step: 818/463, loss: 0.057076532393693924 2023-01-22 12:15:06.305775: step: 820/463, loss: 0.021547608077526093 2023-01-22 12:15:06.891332: step: 822/463, loss: 0.0015010681236162782 2023-01-22 12:15:07.483317: step: 824/463, loss: 0.029652509838342667 2023-01-22 12:15:08.078759: step: 826/463, loss: 0.04036566987633705 2023-01-22 12:15:08.676186: step: 828/463, loss: 0.021810103207826614 2023-01-22 12:15:09.269133: step: 830/463, loss: 0.2934378683567047 2023-01-22 12:15:09.870928: step: 832/463, loss: 0.05934227257966995 2023-01-22 12:15:10.463002: step: 834/463, loss: 0.04169616103172302 2023-01-22 12:15:11.099591: step: 836/463, loss: 0.06327860802412033 2023-01-22 12:15:11.723763: step: 838/463, loss: 0.04495702311396599 2023-01-22 12:15:12.336141: step: 840/463, loss: 1.1065717935562134 2023-01-22 12:15:12.935459: step: 842/463, loss: 1.1235878467559814 2023-01-22 12:15:13.590930: step: 844/463, loss: 0.01518157310783863 2023-01-22 12:15:14.268663: step: 846/463, loss: 0.042465049773454666 2023-01-22 12:15:14.829161: step: 848/463, loss: 0.020969387143850327 2023-01-22 12:15:15.402438: step: 850/463, loss: 0.024549081921577454 2023-01-22 12:15:16.104505: step: 852/463, loss: 0.7430814504623413 2023-01-22 12:15:16.699172: step: 854/463, loss: 0.014579113572835922 2023-01-22 12:15:17.333130: step: 856/463, loss: 0.03826327621936798 2023-01-22 12:15:17.903237: step: 858/463, loss: 0.03686225414276123 2023-01-22 12:15:18.497426: step: 860/463, loss: 0.06891360878944397 2023-01-22 12:15:19.089240: step: 862/463, loss: 0.0070788320153951645 2023-01-22 12:15:19.698044: step: 864/463, loss: 0.007806743495166302 2023-01-22 12:15:20.293414: step: 866/463, loss: 0.06779699772596359 2023-01-22 12:15:20.969900: step: 868/463, loss: 0.012989659793674946 2023-01-22 12:15:21.588114: step: 870/463, loss: 0.08983132243156433 2023-01-22 12:15:22.245130: step: 872/463, loss: 0.0053309425711631775 2023-01-22 12:15:22.912076: step: 874/463, loss: 0.0993824377655983 2023-01-22 12:15:23.582995: step: 876/463, loss: 0.010437862016260624 2023-01-22 12:15:24.146462: step: 878/463, loss: 0.02426217496395111 2023-01-22 12:15:24.751331: step: 880/463, loss: 0.0006782116834074259 2023-01-22 12:15:25.344085: step: 882/463, loss: 0.04411047697067261 2023-01-22 12:15:25.937339: step: 884/463, loss: 0.45282459259033203 2023-01-22 12:15:26.520328: step: 886/463, loss: 0.20769591629505157 2023-01-22 12:15:27.105206: step: 888/463, loss: 0.05739240348339081 2023-01-22 12:15:27.712929: step: 890/463, loss: 0.04293574020266533 2023-01-22 12:15:28.346039: step: 892/463, loss: 0.012517374008893967 2023-01-22 12:15:28.961771: step: 894/463, loss: 0.09229957312345505 2023-01-22 12:15:29.577011: step: 896/463, loss: 0.03911591321229935 2023-01-22 12:15:30.188933: step: 898/463, loss: 0.10301031172275543 2023-01-22 12:15:30.804732: step: 900/463, loss: 0.021950650960206985 2023-01-22 12:15:31.371255: step: 902/463, loss: 0.08346524089574814 2023-01-22 12:15:31.964155: step: 904/463, loss: 0.011329183354973793 2023-01-22 12:15:32.553346: step: 906/463, loss: 0.025982363149523735 2023-01-22 12:15:33.178398: step: 908/463, loss: 0.011318699456751347 2023-01-22 12:15:33.776821: step: 910/463, loss: 0.0502050444483757 2023-01-22 12:15:34.493403: step: 912/463, loss: 0.031187161803245544 2023-01-22 12:15:35.136072: step: 914/463, loss: 0.5288580656051636 2023-01-22 12:15:35.752380: step: 916/463, loss: 0.030677052214741707 2023-01-22 12:15:36.329213: step: 918/463, loss: 0.010374422185122967 2023-01-22 12:15:36.895221: step: 920/463, loss: 0.07955756783485413 2023-01-22 12:15:37.526412: step: 922/463, loss: 0.04105282574892044 2023-01-22 12:15:38.126403: step: 924/463, loss: 0.03244278207421303 2023-01-22 12:15:38.727475: step: 926/463, loss: 0.07540711015462875 ================================================== Loss: 0.091 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33552660666518563, 'r': 0.32342982198465714, 'f1': 0.32936718103558316}, 'combined': 0.24269160707885074, 'epoch': 25} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.342329042309785, 'r': 0.3920422672040369, 'f1': 0.36550298776177564}, 'combined': 0.2833085359684577, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31215268642523897, 'r': 0.3453226113584712, 'f1': 0.3279009300647105}, 'combined': 0.24161121162662877, 'epoch': 25} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3297974541185763, 'r': 0.40769997774768857, 'f1': 0.3646342587665312}, 'combined': 0.28263516708219166, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29629151775092494, 'r': 0.3204671064858201, 'f1': 0.3079054970246622}, 'combined': 0.22687773464975108, 'epoch': 25} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3248457821756092, 'r': 0.3872472237883871, 'f1': 0.3533123517666794}, 'combined': 0.27385933486221087, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.265625, 'r': 0.36428571428571427, 'f1': 0.30722891566265054}, 'combined': 0.20481927710843367, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2569444444444444, 'r': 0.40217391304347827, 'f1': 0.31355932203389825}, 'combined': 0.15677966101694912, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39705882352941174, 'r': 0.23275862068965517, 'f1': 0.2934782608695652}, 'combined': 0.19565217391304346, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:18:16.231446: step: 2/463, loss: 0.060164835304021835 2023-01-22 12:18:16.831287: step: 4/463, loss: 0.02558067813515663 2023-01-22 12:18:17.552252: step: 6/463, loss: 0.025403395295143127 2023-01-22 12:18:18.163913: step: 8/463, loss: 0.03196694701910019 2023-01-22 12:18:18.803973: step: 10/463, loss: 0.03795040398836136 2023-01-22 12:18:19.418354: step: 12/463, loss: 0.01664900593459606 2023-01-22 12:18:20.038313: step: 14/463, loss: 0.06506693363189697 2023-01-22 12:18:20.657815: step: 16/463, loss: 0.00960465706884861 2023-01-22 12:18:21.300055: step: 18/463, loss: 0.04162848740816116 2023-01-22 12:18:21.952112: step: 20/463, loss: 0.012118957936763763 2023-01-22 12:18:22.578521: step: 22/463, loss: 0.01889442279934883 2023-01-22 12:18:23.146977: step: 24/463, loss: 0.0547652430832386 2023-01-22 12:18:23.755499: step: 26/463, loss: 0.02215997315943241 2023-01-22 12:18:24.333043: step: 28/463, loss: 0.0023843380622565746 2023-01-22 12:18:24.937795: step: 30/463, loss: 0.005318155977874994 2023-01-22 12:18:25.509983: step: 32/463, loss: 0.016406558454036713 2023-01-22 12:18:26.124950: step: 34/463, loss: 0.003785031149163842 2023-01-22 12:18:26.699885: step: 36/463, loss: 0.0027621760964393616 2023-01-22 12:18:27.364195: step: 38/463, loss: 0.012590077705681324 2023-01-22 12:18:28.024400: step: 40/463, loss: 0.012336171232163906 2023-01-22 12:18:28.593248: step: 42/463, loss: 0.02353564463555813 2023-01-22 12:18:29.223758: step: 44/463, loss: 0.010895566083490849 2023-01-22 12:18:29.847931: step: 46/463, loss: 0.023744475096464157 2023-01-22 12:18:30.512603: step: 48/463, loss: 0.020291510969400406 2023-01-22 12:18:31.058124: step: 50/463, loss: 0.15573060512542725 2023-01-22 12:18:31.725817: step: 52/463, loss: 0.023421891033649445 2023-01-22 12:18:32.390918: step: 54/463, loss: 0.01674387790262699 2023-01-22 12:18:32.979897: step: 56/463, loss: 0.04184565320611 2023-01-22 12:18:33.594774: step: 58/463, loss: 0.016686532646417618 2023-01-22 12:18:34.180773: step: 60/463, loss: 0.023892097175121307 2023-01-22 12:18:34.819629: step: 62/463, loss: 0.05974258854985237 2023-01-22 12:18:35.419529: step: 64/463, loss: 0.09813424944877625 2023-01-22 12:18:36.004305: step: 66/463, loss: 0.018530558794736862 2023-01-22 12:18:36.635627: step: 68/463, loss: 0.05598936975002289 2023-01-22 12:18:37.219464: step: 70/463, loss: 0.005706106312572956 2023-01-22 12:18:37.863361: step: 72/463, loss: 0.02134627103805542 2023-01-22 12:18:38.443845: step: 74/463, loss: 0.10911321640014648 2023-01-22 12:18:39.021982: step: 76/463, loss: 0.12562420964241028 2023-01-22 12:18:39.858211: step: 78/463, loss: 0.008204949088394642 2023-01-22 12:18:40.518477: step: 80/463, loss: 0.020367205142974854 2023-01-22 12:18:41.183821: step: 82/463, loss: 0.028903182595968246 2023-01-22 12:18:41.738257: step: 84/463, loss: 0.009179981425404549 2023-01-22 12:18:42.336180: step: 86/463, loss: 0.011012246832251549 2023-01-22 12:18:42.919676: step: 88/463, loss: 0.001943445298820734 2023-01-22 12:18:43.502309: step: 90/463, loss: 0.000122124794870615 2023-01-22 12:18:44.064145: step: 92/463, loss: 0.0072256457060575485 2023-01-22 12:18:44.721193: step: 94/463, loss: 0.02838139794766903 2023-01-22 12:18:45.353517: step: 96/463, loss: 0.5995569825172424 2023-01-22 12:18:46.008564: step: 98/463, loss: 0.017027055844664574 2023-01-22 12:18:46.675190: step: 100/463, loss: 0.4004693031311035 2023-01-22 12:18:47.428040: step: 102/463, loss: 0.014569812454283237 2023-01-22 12:18:48.010993: step: 104/463, loss: 0.01985294185578823 2023-01-22 12:18:48.591940: step: 106/463, loss: 0.001956199062988162 2023-01-22 12:18:49.255266: step: 108/463, loss: 0.3121906518936157 2023-01-22 12:18:49.877244: step: 110/463, loss: 0.024625033140182495 2023-01-22 12:18:50.504106: step: 112/463, loss: 0.0011772587895393372 2023-01-22 12:18:51.108186: step: 114/463, loss: 0.012746881693601608 2023-01-22 12:18:51.758778: step: 116/463, loss: 0.011900834739208221 2023-01-22 12:18:52.387010: step: 118/463, loss: 0.021981066092848778 2023-01-22 12:18:53.000221: step: 120/463, loss: 0.022768914699554443 2023-01-22 12:18:53.629307: step: 122/463, loss: 0.033613648265600204 2023-01-22 12:18:54.215604: step: 124/463, loss: 0.0035845350939780474 2023-01-22 12:18:54.815296: step: 126/463, loss: 0.497938871383667 2023-01-22 12:18:55.425547: step: 128/463, loss: 0.03241952508687973 2023-01-22 12:18:55.996879: step: 130/463, loss: 0.007313554175198078 2023-01-22 12:18:56.625606: step: 132/463, loss: 0.04734067991375923 2023-01-22 12:18:57.269356: step: 134/463, loss: 0.06459705531597137 2023-01-22 12:18:57.996545: step: 136/463, loss: 0.01574254035949707 2023-01-22 12:18:58.577910: step: 138/463, loss: 0.023338835686445236 2023-01-22 12:18:59.170353: step: 140/463, loss: 0.01584446057677269 2023-01-22 12:18:59.693728: step: 142/463, loss: 0.09646377712488174 2023-01-22 12:19:00.254989: step: 144/463, loss: 0.011550257913768291 2023-01-22 12:19:00.895080: step: 146/463, loss: 0.03745417296886444 2023-01-22 12:19:01.490597: step: 148/463, loss: 0.057967957109212875 2023-01-22 12:19:02.062740: step: 150/463, loss: 0.023272907361388206 2023-01-22 12:19:02.665352: step: 152/463, loss: 0.10781530290842056 2023-01-22 12:19:03.275237: step: 154/463, loss: 0.09519363939762115 2023-01-22 12:19:03.841840: step: 156/463, loss: 0.04367026686668396 2023-01-22 12:19:04.439138: step: 158/463, loss: 0.052015941590070724 2023-01-22 12:19:05.016673: step: 160/463, loss: 0.02402150258421898 2023-01-22 12:19:05.625570: step: 162/463, loss: 0.021290911361575127 2023-01-22 12:19:06.217786: step: 164/463, loss: 0.0218874029815197 2023-01-22 12:19:06.906192: step: 166/463, loss: 0.010724851861596107 2023-01-22 12:19:07.514949: step: 168/463, loss: 0.021581370383501053 2023-01-22 12:19:08.103549: step: 170/463, loss: 0.006486351136118174 2023-01-22 12:19:08.702098: step: 172/463, loss: 0.06674645841121674 2023-01-22 12:19:09.348461: step: 174/463, loss: 0.024344950914382935 2023-01-22 12:19:09.988531: step: 176/463, loss: 0.0017576382961124182 2023-01-22 12:19:10.592316: step: 178/463, loss: 0.03176195174455643 2023-01-22 12:19:11.222508: step: 180/463, loss: 0.013579603284597397 2023-01-22 12:19:11.805042: step: 182/463, loss: 0.08059349656105042 2023-01-22 12:19:12.382842: step: 184/463, loss: 0.04349227622151375 2023-01-22 12:19:12.951186: step: 186/463, loss: 0.06103519722819328 2023-01-22 12:19:13.523162: step: 188/463, loss: 0.02008599415421486 2023-01-22 12:19:14.097572: step: 190/463, loss: 0.01783793978393078 2023-01-22 12:19:14.715456: step: 192/463, loss: 0.05660560354590416 2023-01-22 12:19:15.343325: step: 194/463, loss: 0.02937992662191391 2023-01-22 12:19:15.996142: step: 196/463, loss: 0.11923783272504807 2023-01-22 12:19:16.635890: step: 198/463, loss: 0.03090454638004303 2023-01-22 12:19:17.224130: step: 200/463, loss: 0.02430352382361889 2023-01-22 12:19:17.831353: step: 202/463, loss: 0.014538311399519444 2023-01-22 12:19:18.497808: step: 204/463, loss: 0.005214911885559559 2023-01-22 12:19:19.104616: step: 206/463, loss: 0.06654265522956848 2023-01-22 12:19:19.688281: step: 208/463, loss: 0.007305143866688013 2023-01-22 12:19:20.269973: step: 210/463, loss: 0.022058090195059776 2023-01-22 12:19:20.843380: step: 212/463, loss: 1.6268070936203003 2023-01-22 12:19:21.407360: step: 214/463, loss: 0.9395773410797119 2023-01-22 12:19:22.037723: step: 216/463, loss: 0.050523996353149414 2023-01-22 12:19:22.694736: step: 218/463, loss: 0.040161505341529846 2023-01-22 12:19:23.276666: step: 220/463, loss: 0.0412868969142437 2023-01-22 12:19:23.819755: step: 222/463, loss: 0.019668761640787125 2023-01-22 12:19:24.459322: step: 224/463, loss: 0.004019651561975479 2023-01-22 12:19:25.128198: step: 226/463, loss: 0.043066561222076416 2023-01-22 12:19:25.701556: step: 228/463, loss: 0.1460483968257904 2023-01-22 12:19:26.290612: step: 230/463, loss: 0.00818039383739233 2023-01-22 12:19:26.855787: step: 232/463, loss: 0.006487493868917227 2023-01-22 12:19:27.442796: step: 234/463, loss: 0.0703156590461731 2023-01-22 12:19:28.115447: step: 236/463, loss: 0.008411692455410957 2023-01-22 12:19:28.671445: step: 238/463, loss: 0.011830761097371578 2023-01-22 12:19:29.291646: step: 240/463, loss: 0.16915813088417053 2023-01-22 12:19:29.881886: step: 242/463, loss: 0.002356280107051134 2023-01-22 12:19:30.500404: step: 244/463, loss: 0.00916910357773304 2023-01-22 12:19:31.092849: step: 246/463, loss: 0.040827035903930664 2023-01-22 12:19:31.780180: step: 248/463, loss: 0.012625100091099739 2023-01-22 12:19:32.388452: step: 250/463, loss: 0.011709875427186489 2023-01-22 12:19:33.054579: step: 252/463, loss: 0.002782276598736644 2023-01-22 12:19:33.673934: step: 254/463, loss: 0.019311297684907913 2023-01-22 12:19:34.354596: step: 256/463, loss: 0.014067348092794418 2023-01-22 12:19:34.979521: step: 258/463, loss: 0.0077292839996516705 2023-01-22 12:19:35.689263: step: 260/463, loss: 0.07472358644008636 2023-01-22 12:19:36.348132: step: 262/463, loss: 0.5500505566596985 2023-01-22 12:19:36.964621: step: 264/463, loss: 0.01905016414821148 2023-01-22 12:19:37.574336: step: 266/463, loss: 0.002901636529713869 2023-01-22 12:19:38.165603: step: 268/463, loss: 0.31093186140060425 2023-01-22 12:19:38.751113: step: 270/463, loss: 0.04626436159014702 2023-01-22 12:19:39.407689: step: 272/463, loss: 0.07923506200313568 2023-01-22 12:19:40.026585: step: 274/463, loss: 0.1362936645746231 2023-01-22 12:19:40.659838: step: 276/463, loss: 0.052740342915058136 2023-01-22 12:19:41.269683: step: 278/463, loss: 0.031582821160554886 2023-01-22 12:19:41.839233: step: 280/463, loss: 0.020128924399614334 2023-01-22 12:19:42.419177: step: 282/463, loss: 0.11607906222343445 2023-01-22 12:19:43.112050: step: 284/463, loss: 0.0046155150048434734 2023-01-22 12:19:43.781949: step: 286/463, loss: 0.0035585558507591486 2023-01-22 12:19:44.434019: step: 288/463, loss: 0.01247965358197689 2023-01-22 12:19:44.950033: step: 290/463, loss: 0.020329592749476433 2023-01-22 12:19:45.653909: step: 292/463, loss: 0.045249029994010925 2023-01-22 12:19:46.163440: step: 294/463, loss: 0.005409767851233482 2023-01-22 12:19:46.745332: step: 296/463, loss: 0.019977781921625137 2023-01-22 12:19:47.371985: step: 298/463, loss: 0.01569727249443531 2023-01-22 12:19:48.010573: step: 300/463, loss: 0.07623255252838135 2023-01-22 12:19:48.650518: step: 302/463, loss: 0.045567549765110016 2023-01-22 12:19:49.253296: step: 304/463, loss: 0.020051337778568268 2023-01-22 12:19:49.879730: step: 306/463, loss: 0.026783756911754608 2023-01-22 12:19:50.494989: step: 308/463, loss: 0.02466309443116188 2023-01-22 12:19:51.058984: step: 310/463, loss: 0.11113037168979645 2023-01-22 12:19:51.645627: step: 312/463, loss: 0.002726368373259902 2023-01-22 12:19:52.275502: step: 314/463, loss: 0.012237334623932838 2023-01-22 12:19:52.862077: step: 316/463, loss: 0.23267483711242676 2023-01-22 12:19:53.455441: step: 318/463, loss: 0.018688658252358437 2023-01-22 12:19:54.000304: step: 320/463, loss: 0.00025364590692333877 2023-01-22 12:19:54.611333: step: 322/463, loss: 0.04208621755242348 2023-01-22 12:19:55.288730: step: 324/463, loss: 0.0012273071333765984 2023-01-22 12:19:55.946239: step: 326/463, loss: 0.0229034423828125 2023-01-22 12:19:56.610624: step: 328/463, loss: 0.004773963242769241 2023-01-22 12:19:57.237127: step: 330/463, loss: 0.04242382198572159 2023-01-22 12:19:57.826626: step: 332/463, loss: 0.10915303975343704 2023-01-22 12:19:58.483774: step: 334/463, loss: 0.0036423001438379288 2023-01-22 12:19:59.113770: step: 336/463, loss: 0.021851707249879837 2023-01-22 12:19:59.764597: step: 338/463, loss: 0.04234820604324341 2023-01-22 12:20:00.425101: step: 340/463, loss: 0.0048447963781654835 2023-01-22 12:20:01.026292: step: 342/463, loss: 0.012705449014902115 2023-01-22 12:20:01.651776: step: 344/463, loss: 0.000829791824799031 2023-01-22 12:20:02.268748: step: 346/463, loss: 0.055353257805109024 2023-01-22 12:20:02.945351: step: 348/463, loss: 0.009019903838634491 2023-01-22 12:20:03.560596: step: 350/463, loss: 0.020258044824004173 2023-01-22 12:20:04.259860: step: 352/463, loss: 0.06482817977666855 2023-01-22 12:20:04.821086: step: 354/463, loss: 0.01929132454097271 2023-01-22 12:20:05.423722: step: 356/463, loss: 0.0025611575692892075 2023-01-22 12:20:06.043022: step: 358/463, loss: 0.04649610444903374 2023-01-22 12:20:06.698344: step: 360/463, loss: 0.016384338960051537 2023-01-22 12:20:07.307008: step: 362/463, loss: 0.08392742276191711 2023-01-22 12:20:07.863921: step: 364/463, loss: 0.024707302451133728 2023-01-22 12:20:08.452621: step: 366/463, loss: 0.0018695153994485736 2023-01-22 12:20:09.042914: step: 368/463, loss: 0.02039937488734722 2023-01-22 12:20:09.659442: step: 370/463, loss: 0.0023016519844532013 2023-01-22 12:20:10.350953: step: 372/463, loss: 0.061753854155540466 2023-01-22 12:20:10.959752: step: 374/463, loss: 0.018502971157431602 2023-01-22 12:20:11.607645: step: 376/463, loss: 0.22654902935028076 2023-01-22 12:20:12.185839: step: 378/463, loss: 0.05308860167860985 2023-01-22 12:20:12.815410: step: 380/463, loss: 0.04791240394115448 2023-01-22 12:20:13.419429: step: 382/463, loss: 0.008300859481096268 2023-01-22 12:20:14.034420: step: 384/463, loss: 0.016533823683857918 2023-01-22 12:20:14.611755: step: 386/463, loss: 0.10109324753284454 2023-01-22 12:20:15.201821: step: 388/463, loss: 0.0027686231769621372 2023-01-22 12:20:15.812467: step: 390/463, loss: 0.13375160098075867 2023-01-22 12:20:16.393147: step: 392/463, loss: 0.031212475150823593 2023-01-22 12:20:16.937175: step: 394/463, loss: 0.02566165290772915 2023-01-22 12:20:17.573133: step: 396/463, loss: 0.020588349550962448 2023-01-22 12:20:18.138585: step: 398/463, loss: 0.0033577983267605305 2023-01-22 12:20:18.779464: step: 400/463, loss: 0.01071096584200859 2023-01-22 12:20:19.398991: step: 402/463, loss: 0.005720159038901329 2023-01-22 12:20:20.047498: step: 404/463, loss: 0.14227540791034698 2023-01-22 12:20:20.647721: step: 406/463, loss: 0.006035880651324987 2023-01-22 12:20:21.259395: step: 408/463, loss: 0.01138804666697979 2023-01-22 12:20:21.850169: step: 410/463, loss: 0.06716389954090118 2023-01-22 12:20:22.492730: step: 412/463, loss: 0.018787872046232224 2023-01-22 12:20:23.255992: step: 414/463, loss: 0.13190680742263794 2023-01-22 12:20:23.851724: step: 416/463, loss: 0.06670462340116501 2023-01-22 12:20:24.472167: step: 418/463, loss: 0.07727526873350143 2023-01-22 12:20:25.067784: step: 420/463, loss: 0.01599184423685074 2023-01-22 12:20:25.648784: step: 422/463, loss: 0.06036541610956192 2023-01-22 12:20:26.152360: step: 424/463, loss: 0.013064800761640072 2023-01-22 12:20:26.695843: step: 426/463, loss: 0.006833591032773256 2023-01-22 12:20:27.330078: step: 428/463, loss: 0.031750891357660294 2023-01-22 12:20:28.006218: step: 430/463, loss: 0.02114529348909855 2023-01-22 12:20:28.663955: step: 432/463, loss: 0.03230300918221474 2023-01-22 12:20:29.248210: step: 434/463, loss: 0.010033736936748028 2023-01-22 12:20:29.872977: step: 436/463, loss: 0.01874353177845478 2023-01-22 12:20:30.472483: step: 438/463, loss: 0.004949798341840506 2023-01-22 12:20:31.079922: step: 440/463, loss: 0.01905561238527298 2023-01-22 12:20:31.701265: step: 442/463, loss: 0.025847643613815308 2023-01-22 12:20:32.266707: step: 444/463, loss: 0.0005596587434411049 2023-01-22 12:20:32.855658: step: 446/463, loss: 0.025552352890372276 2023-01-22 12:20:33.499867: step: 448/463, loss: 0.00875480379909277 2023-01-22 12:20:34.105254: step: 450/463, loss: 0.010331067256629467 2023-01-22 12:20:34.643503: step: 452/463, loss: 0.00190172647126019 2023-01-22 12:20:35.262296: step: 454/463, loss: 0.037141069769859314 2023-01-22 12:20:35.887332: step: 456/463, loss: 0.03838363289833069 2023-01-22 12:20:36.474449: step: 458/463, loss: 0.059563618153333664 2023-01-22 12:20:37.035497: step: 460/463, loss: 0.0016895380103960633 2023-01-22 12:20:37.641430: step: 462/463, loss: 0.06795158237218857 2023-01-22 12:20:38.246149: step: 464/463, loss: 0.007092796266078949 2023-01-22 12:20:38.857697: step: 466/463, loss: 0.06822799891233444 2023-01-22 12:20:39.526873: step: 468/463, loss: 0.020045703276991844 2023-01-22 12:20:40.176662: step: 470/463, loss: 0.03994634747505188 2023-01-22 12:20:40.755442: step: 472/463, loss: 0.007352486252784729 2023-01-22 12:20:41.322807: step: 474/463, loss: 0.08837493509054184 2023-01-22 12:20:41.901602: step: 476/463, loss: 0.031150689348578453 2023-01-22 12:20:42.485930: step: 478/463, loss: 0.06251810491085052 2023-01-22 12:20:43.079896: step: 480/463, loss: 0.031099554151296616 2023-01-22 12:20:43.680619: step: 482/463, loss: 0.07929482311010361 2023-01-22 12:20:44.234140: step: 484/463, loss: 0.005624769255518913 2023-01-22 12:20:44.843873: step: 486/463, loss: 0.5399792790412903 2023-01-22 12:20:45.531531: step: 488/463, loss: 0.04697442799806595 2023-01-22 12:20:46.207548: step: 490/463, loss: 0.07591967284679413 2023-01-22 12:20:46.813500: step: 492/463, loss: 0.04184211418032646 2023-01-22 12:20:47.446380: step: 494/463, loss: 0.014643094502389431 2023-01-22 12:20:47.989504: step: 496/463, loss: 0.007793131750077009 2023-01-22 12:20:48.626304: step: 498/463, loss: 0.09160103648900986 2023-01-22 12:20:49.235562: step: 500/463, loss: 0.035016074776649475 2023-01-22 12:20:49.815976: step: 502/463, loss: 0.024046001955866814 2023-01-22 12:20:50.427142: step: 504/463, loss: 0.009723913855850697 2023-01-22 12:20:51.024160: step: 506/463, loss: 0.004270180594176054 2023-01-22 12:20:51.677376: step: 508/463, loss: 0.016087761148810387 2023-01-22 12:20:52.216208: step: 510/463, loss: 0.006169023457914591 2023-01-22 12:20:52.838051: step: 512/463, loss: 0.05757845193147659 2023-01-22 12:20:53.425364: step: 514/463, loss: 0.00573571166023612 2023-01-22 12:20:53.979651: step: 516/463, loss: 0.07166961580514908 2023-01-22 12:20:54.542788: step: 518/463, loss: 0.004224637523293495 2023-01-22 12:20:55.184501: step: 520/463, loss: 0.0032673669047653675 2023-01-22 12:20:55.816916: step: 522/463, loss: 0.025617776438593864 2023-01-22 12:20:56.418670: step: 524/463, loss: 0.06886880844831467 2023-01-22 12:20:56.960276: step: 526/463, loss: 0.004455070476979017 2023-01-22 12:20:57.629702: step: 528/463, loss: 0.006259072571992874 2023-01-22 12:20:58.261745: step: 530/463, loss: 0.04033125936985016 2023-01-22 12:20:58.868571: step: 532/463, loss: 0.053200703114271164 2023-01-22 12:20:59.435411: step: 534/463, loss: 0.004216287285089493 2023-01-22 12:21:00.157615: step: 536/463, loss: 0.06604675203561783 2023-01-22 12:21:00.769231: step: 538/463, loss: 0.05718152970075607 2023-01-22 12:21:01.311019: step: 540/463, loss: 0.00031880661845207214 2023-01-22 12:21:01.894648: step: 542/463, loss: 0.011241381987929344 2023-01-22 12:21:02.518460: step: 544/463, loss: 0.32326120138168335 2023-01-22 12:21:03.199117: step: 546/463, loss: 0.04331723600625992 2023-01-22 12:21:03.821335: step: 548/463, loss: 0.015126056037843227 2023-01-22 12:21:04.422055: step: 550/463, loss: 0.01892212964594364 2023-01-22 12:21:05.025745: step: 552/463, loss: 0.017604444175958633 2023-01-22 12:21:05.587000: step: 554/463, loss: 0.019526148214936256 2023-01-22 12:21:06.146406: step: 556/463, loss: 0.6796872019767761 2023-01-22 12:21:06.756047: step: 558/463, loss: 0.03267296403646469 2023-01-22 12:21:07.337306: step: 560/463, loss: 0.08375424891710281 2023-01-22 12:21:07.923860: step: 562/463, loss: 0.04032605141401291 2023-01-22 12:21:08.578358: step: 564/463, loss: 0.03152478113770485 2023-01-22 12:21:09.169058: step: 566/463, loss: 0.009987886995077133 2023-01-22 12:21:09.728652: step: 568/463, loss: 0.0707743912935257 2023-01-22 12:21:10.379582: step: 570/463, loss: 0.02383466064929962 2023-01-22 12:21:10.958901: step: 572/463, loss: 0.032773490995168686 2023-01-22 12:21:11.517471: step: 574/463, loss: 0.0003904775658156723 2023-01-22 12:21:12.085294: step: 576/463, loss: 0.003928330261260271 2023-01-22 12:21:12.628428: step: 578/463, loss: 0.33052366971969604 2023-01-22 12:21:13.291517: step: 580/463, loss: 0.0347343273460865 2023-01-22 12:21:13.891748: step: 582/463, loss: 0.05461281165480614 2023-01-22 12:21:14.487923: step: 584/463, loss: 0.1299811154603958 2023-01-22 12:21:15.075768: step: 586/463, loss: 0.007315927185118198 2023-01-22 12:21:15.696912: step: 588/463, loss: 0.0334121473133564 2023-01-22 12:21:16.254259: step: 590/463, loss: 0.08426441997289658 2023-01-22 12:21:16.854157: step: 592/463, loss: 0.050887297838926315 2023-01-22 12:21:17.447510: step: 594/463, loss: 0.10563686490058899 2023-01-22 12:21:18.002955: step: 596/463, loss: 0.07363232970237732 2023-01-22 12:21:18.633935: step: 598/463, loss: 0.054273054003715515 2023-01-22 12:21:19.213900: step: 600/463, loss: 0.07518825680017471 2023-01-22 12:21:19.831292: step: 602/463, loss: 0.04854172468185425 2023-01-22 12:21:20.437228: step: 604/463, loss: 0.007249117363244295 2023-01-22 12:21:21.071528: step: 606/463, loss: 0.06041597202420235 2023-01-22 12:21:21.637212: step: 608/463, loss: 0.5194256901741028 2023-01-22 12:21:22.233980: step: 610/463, loss: 0.039012517780065536 2023-01-22 12:21:22.865192: step: 612/463, loss: 0.028161583468317986 2023-01-22 12:21:23.508398: step: 614/463, loss: 0.0001226312160724774 2023-01-22 12:21:24.093137: step: 616/463, loss: 0.018892131745815277 2023-01-22 12:21:24.703385: step: 618/463, loss: 0.009805514477193356 2023-01-22 12:21:25.297366: step: 620/463, loss: 0.06061761453747749 2023-01-22 12:21:25.885868: step: 622/463, loss: 0.014063299633562565 2023-01-22 12:21:26.512055: step: 624/463, loss: 0.01469547487795353 2023-01-22 12:21:27.142540: step: 626/463, loss: 0.1260693520307541 2023-01-22 12:21:27.671739: step: 628/463, loss: 0.015741469338536263 2023-01-22 12:21:28.340308: step: 630/463, loss: 0.03137549012899399 2023-01-22 12:21:28.925298: step: 632/463, loss: 0.32331040501594543 2023-01-22 12:21:29.511993: step: 634/463, loss: 0.009754701517522335 2023-01-22 12:21:30.194708: step: 636/463, loss: 0.05197267234325409 2023-01-22 12:21:30.794045: step: 638/463, loss: 0.005566580221056938 2023-01-22 12:21:31.385694: step: 640/463, loss: 0.012628620490431786 2023-01-22 12:21:32.123831: step: 642/463, loss: 0.20186926424503326 2023-01-22 12:21:32.755084: step: 644/463, loss: 0.028702430427074432 2023-01-22 12:21:33.346935: step: 646/463, loss: 0.048512354493141174 2023-01-22 12:21:33.998150: step: 648/463, loss: 0.002821398666128516 2023-01-22 12:21:34.608776: step: 650/463, loss: 0.03064820170402527 2023-01-22 12:21:35.231956: step: 652/463, loss: 0.004929949529469013 2023-01-22 12:21:35.878719: step: 654/463, loss: 0.042304202914237976 2023-01-22 12:21:36.459657: step: 656/463, loss: 0.11738227307796478 2023-01-22 12:21:37.018925: step: 658/463, loss: 0.011186796240508556 2023-01-22 12:21:37.634362: step: 660/463, loss: 0.09208541363477707 2023-01-22 12:21:38.337940: step: 662/463, loss: 0.07006802409887314 2023-01-22 12:21:38.875462: step: 664/463, loss: 0.04129285365343094 2023-01-22 12:21:39.470002: step: 666/463, loss: 0.03904534876346588 2023-01-22 12:21:40.063248: step: 668/463, loss: 0.060648489743471146 2023-01-22 12:21:40.645745: step: 670/463, loss: 0.8541082143783569 2023-01-22 12:21:41.243776: step: 672/463, loss: 0.023084085434675217 2023-01-22 12:21:41.826096: step: 674/463, loss: 0.026811780408024788 2023-01-22 12:21:42.457690: step: 676/463, loss: 0.049302905797958374 2023-01-22 12:21:43.108535: step: 678/463, loss: 0.06594472378492355 2023-01-22 12:21:43.696776: step: 680/463, loss: 0.1358116716146469 2023-01-22 12:21:44.287844: step: 682/463, loss: 0.000393335911212489 2023-01-22 12:21:44.929003: step: 684/463, loss: 0.04331202059984207 2023-01-22 12:21:45.479318: step: 686/463, loss: 0.0031838964205235243 2023-01-22 12:21:46.131486: step: 688/463, loss: 0.15263943374156952 2023-01-22 12:21:46.759902: step: 690/463, loss: 0.032084375619888306 2023-01-22 12:21:47.408024: step: 692/463, loss: 0.033500246703624725 2023-01-22 12:21:48.048317: step: 694/463, loss: 0.02280724234879017 2023-01-22 12:21:48.664477: step: 696/463, loss: 0.006036567501723766 2023-01-22 12:21:49.393061: step: 698/463, loss: 0.027374595403671265 2023-01-22 12:21:50.037036: step: 700/463, loss: 0.029568640515208244 2023-01-22 12:21:50.607535: step: 702/463, loss: 0.011715746484696865 2023-01-22 12:21:51.254158: step: 704/463, loss: 0.012809795327484608 2023-01-22 12:21:51.927353: step: 706/463, loss: 0.01322891004383564 2023-01-22 12:21:52.573910: step: 708/463, loss: 0.10790175199508667 2023-01-22 12:21:53.214120: step: 710/463, loss: 0.01220192015171051 2023-01-22 12:21:53.832598: step: 712/463, loss: 0.01692129299044609 2023-01-22 12:21:54.495011: step: 714/463, loss: 0.020427130162715912 2023-01-22 12:21:55.159426: step: 716/463, loss: 0.3314805030822754 2023-01-22 12:21:55.801187: step: 718/463, loss: 0.11914760619401932 2023-01-22 12:21:56.361436: step: 720/463, loss: 0.021928012371063232 2023-01-22 12:21:56.991135: step: 722/463, loss: 0.019401831552386284 2023-01-22 12:21:57.585773: step: 724/463, loss: 0.0754886195063591 2023-01-22 12:21:58.152667: step: 726/463, loss: 0.022181248292326927 2023-01-22 12:21:58.753126: step: 728/463, loss: 0.08377305418252945 2023-01-22 12:21:59.346166: step: 730/463, loss: 0.0491647943854332 2023-01-22 12:22:00.020757: step: 732/463, loss: 0.30081477761268616 2023-01-22 12:22:00.675575: step: 734/463, loss: 0.009934273548424244 2023-01-22 12:22:01.342148: step: 736/463, loss: 0.1489568054676056 2023-01-22 12:22:01.897092: step: 738/463, loss: 0.016113916411995888 2023-01-22 12:22:02.509299: step: 740/463, loss: 0.011813441291451454 2023-01-22 12:22:03.119414: step: 742/463, loss: 0.0539797767996788 2023-01-22 12:22:03.728557: step: 744/463, loss: 0.024049753323197365 2023-01-22 12:22:04.331482: step: 746/463, loss: 0.019513430073857307 2023-01-22 12:22:04.952339: step: 748/463, loss: 0.00860277097672224 2023-01-22 12:22:05.579296: step: 750/463, loss: 0.008631817065179348 2023-01-22 12:22:06.294722: step: 752/463, loss: 0.027108194306492805 2023-01-22 12:22:06.923572: step: 754/463, loss: 0.13367898762226105 2023-01-22 12:22:07.592064: step: 756/463, loss: 0.0070381248369812965 2023-01-22 12:22:08.166416: step: 758/463, loss: 0.053361233323812485 2023-01-22 12:22:08.741650: step: 760/463, loss: 0.06090757995843887 2023-01-22 12:22:09.363394: step: 762/463, loss: 0.007821215316653252 2023-01-22 12:22:09.982930: step: 764/463, loss: 0.1697133630514145 2023-01-22 12:22:10.558090: step: 766/463, loss: 0.008045196533203125 2023-01-22 12:22:11.206802: step: 768/463, loss: 0.02766299992799759 2023-01-22 12:22:11.821747: step: 770/463, loss: 0.008898507803678513 2023-01-22 12:22:12.403430: step: 772/463, loss: 0.0013090830761939287 2023-01-22 12:22:13.024848: step: 774/463, loss: 0.02816668152809143 2023-01-22 12:22:13.629177: step: 776/463, loss: 0.021331140771508217 2023-01-22 12:22:14.198075: step: 778/463, loss: 0.10147601366043091 2023-01-22 12:22:14.938410: step: 780/463, loss: 0.04615384712815285 2023-01-22 12:22:15.505489: step: 782/463, loss: 0.04704257845878601 2023-01-22 12:22:16.128089: step: 784/463, loss: 0.005019376985728741 2023-01-22 12:22:16.858996: step: 786/463, loss: 0.04884747788310051 2023-01-22 12:22:17.412979: step: 788/463, loss: 0.015131869353353977 2023-01-22 12:22:17.979731: step: 790/463, loss: 0.000488363322801888 2023-01-22 12:22:18.567494: step: 792/463, loss: 0.045728642493486404 2023-01-22 12:22:19.173161: step: 794/463, loss: 0.05794370174407959 2023-01-22 12:22:19.835384: step: 796/463, loss: 0.05631436035037041 2023-01-22 12:22:20.453965: step: 798/463, loss: 0.012864883989095688 2023-01-22 12:22:21.046259: step: 800/463, loss: 0.036105845123529434 2023-01-22 12:22:21.741814: step: 802/463, loss: 0.012082905508577824 2023-01-22 12:22:22.376660: step: 804/463, loss: 0.01934638060629368 2023-01-22 12:22:23.041388: step: 806/463, loss: 0.032570015639066696 2023-01-22 12:22:23.706142: step: 808/463, loss: 0.17673109471797943 2023-01-22 12:22:24.238459: step: 810/463, loss: 0.008372608572244644 2023-01-22 12:22:24.862440: step: 812/463, loss: 0.03140426427125931 2023-01-22 12:22:25.476082: step: 814/463, loss: 0.03963291272521019 2023-01-22 12:22:26.083404: step: 816/463, loss: 0.04781262204051018 2023-01-22 12:22:26.700773: step: 818/463, loss: 0.0009222657536156476 2023-01-22 12:22:27.291056: step: 820/463, loss: 0.04003676399588585 2023-01-22 12:22:27.891632: step: 822/463, loss: 0.001807502587325871 2023-01-22 12:22:28.536095: step: 824/463, loss: 0.02812432125210762 2023-01-22 12:22:29.176240: step: 826/463, loss: 0.0046024015173316 2023-01-22 12:22:29.748539: step: 828/463, loss: 0.05028145760297775 2023-01-22 12:22:30.348608: step: 830/463, loss: 0.00679796701297164 2023-01-22 12:22:30.909661: step: 832/463, loss: 0.02829374186694622 2023-01-22 12:22:31.574633: step: 834/463, loss: 0.03555215522646904 2023-01-22 12:22:32.222324: step: 836/463, loss: 0.024895858019590378 2023-01-22 12:22:32.837411: step: 838/463, loss: 0.03511698544025421 2023-01-22 12:22:33.523498: step: 840/463, loss: 0.02095201052725315 2023-01-22 12:22:34.130335: step: 842/463, loss: 0.011551769450306892 2023-01-22 12:22:34.720841: step: 844/463, loss: 0.001830400782637298 2023-01-22 12:22:35.349739: step: 846/463, loss: 0.019419299438595772 2023-01-22 12:22:35.925711: step: 848/463, loss: 0.12913839519023895 2023-01-22 12:22:36.509679: step: 850/463, loss: 0.06914029270410538 2023-01-22 12:22:37.138157: step: 852/463, loss: 0.04706982150673866 2023-01-22 12:22:37.764593: step: 854/463, loss: 0.04811506345868111 2023-01-22 12:22:38.306263: step: 856/463, loss: 0.02079155296087265 2023-01-22 12:22:38.892940: step: 858/463, loss: 0.02727232128381729 2023-01-22 12:22:39.505956: step: 860/463, loss: 0.018009401857852936 2023-01-22 12:22:40.096362: step: 862/463, loss: 0.0789589062333107 2023-01-22 12:22:40.678076: step: 864/463, loss: 0.003665331983938813 2023-01-22 12:22:41.325057: step: 866/463, loss: 0.004751025699079037 2023-01-22 12:22:41.845956: step: 868/463, loss: 0.03943350166082382 2023-01-22 12:22:42.495659: step: 870/463, loss: 0.010919580236077309 2023-01-22 12:22:43.040439: step: 872/463, loss: 0.011350066401064396 2023-01-22 12:22:43.709024: step: 874/463, loss: 0.007064160890877247 2023-01-22 12:22:44.288054: step: 876/463, loss: 0.028873734176158905 2023-01-22 12:22:44.886899: step: 878/463, loss: 0.021631957963109016 2023-01-22 12:22:45.513469: step: 880/463, loss: 0.12724556028842926 2023-01-22 12:22:46.136220: step: 882/463, loss: 0.04799959436058998 2023-01-22 12:22:46.725070: step: 884/463, loss: 0.0035819008480757475 2023-01-22 12:22:47.356107: step: 886/463, loss: 0.006701014004647732 2023-01-22 12:22:47.980591: step: 888/463, loss: 0.08809933811426163 2023-01-22 12:22:48.548924: step: 890/463, loss: 0.018438981845974922 2023-01-22 12:22:49.186561: step: 892/463, loss: 0.27498531341552734 2023-01-22 12:22:49.800288: step: 894/463, loss: 0.05050842463970184 2023-01-22 12:22:50.283989: step: 896/463, loss: 0.018362948670983315 2023-01-22 12:22:50.906668: step: 898/463, loss: 0.028361402451992035 2023-01-22 12:22:51.467053: step: 900/463, loss: 0.01950116828083992 2023-01-22 12:22:52.059198: step: 902/463, loss: 0.02170678600668907 2023-01-22 12:22:52.671685: step: 904/463, loss: 0.056740038096904755 2023-01-22 12:22:53.321589: step: 906/463, loss: 0.018573565408587456 2023-01-22 12:22:53.879979: step: 908/463, loss: 0.023815959692001343 2023-01-22 12:22:54.483169: step: 910/463, loss: 0.09343187510967255 2023-01-22 12:22:55.076913: step: 912/463, loss: 0.11053943634033203 2023-01-22 12:22:55.735977: step: 914/463, loss: 0.0008950205519795418 2023-01-22 12:22:56.350320: step: 916/463, loss: 0.015571742318570614 2023-01-22 12:22:56.950903: step: 918/463, loss: 0.045694444328546524 2023-01-22 12:22:57.549814: step: 920/463, loss: 0.73042231798172 2023-01-22 12:22:58.164836: step: 922/463, loss: 0.013180899433791637 2023-01-22 12:22:58.756428: step: 924/463, loss: 0.014922033064067364 2023-01-22 12:22:59.306063: step: 926/463, loss: 0.015136092901229858 ================================================== Loss: 0.057 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32359513346899355, 'r': 0.3242091659803199, 'f1': 0.32390185871398786}, 'combined': 0.23866452747346473, 'epoch': 26} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3502320024162213, 'r': 0.40495575279375595, 'f1': 0.37561113302609245}, 'combined': 0.29114355765658845, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28923618439311916, 'r': 0.3441197108434264, 'f1': 0.3142999785346373}, 'combined': 0.23158945786762747, 'epoch': 26} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3386384503408636, 'r': 0.41645059426109876, 'f1': 0.37353524035950164}, 'combined': 0.2895344925274606, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2869837318652295, 'r': 0.328914941264893, 'f1': 0.30652197002051035}, 'combined': 0.2258582936993234, 'epoch': 26} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33916795326544374, 'r': 0.3999563272422466, 'f1': 0.3670624074564018}, 'combined': 0.28451727276524924, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.3142857142857143, 'f1': 0.27848101265822783}, 'combined': 0.18565400843881855, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2714285714285714, 'r': 0.41304347826086957, 'f1': 0.3275862068965517}, 'combined': 0.16379310344827586, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35526315789473684, 'r': 0.23275862068965517, 'f1': 0.28125}, 'combined': 0.1875, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:25:38.113926: step: 2/463, loss: 0.006801618728786707 2023-01-22 12:25:38.732325: step: 4/463, loss: 0.024481989443302155 2023-01-22 12:25:39.388375: step: 6/463, loss: 0.0005853949696756899 2023-01-22 12:25:39.930755: step: 8/463, loss: 0.017126750200986862 2023-01-22 12:25:40.576594: step: 10/463, loss: 0.0030214719008654356 2023-01-22 12:25:41.151995: step: 12/463, loss: 0.013345425017178059 2023-01-22 12:25:41.706517: step: 14/463, loss: 0.5065322518348694 2023-01-22 12:25:42.261790: step: 16/463, loss: 0.004352352116256952 2023-01-22 12:25:42.871663: step: 18/463, loss: 0.002745497040450573 2023-01-22 12:25:43.503185: step: 20/463, loss: 0.038777854293584824 2023-01-22 12:25:44.113176: step: 22/463, loss: 0.02615339122712612 2023-01-22 12:25:44.727282: step: 24/463, loss: 0.012106833979487419 2023-01-22 12:25:45.325271: step: 26/463, loss: 0.0317654013633728 2023-01-22 12:25:45.911745: step: 28/463, loss: 0.05028447136282921 2023-01-22 12:25:46.486989: step: 30/463, loss: 0.015668125823140144 2023-01-22 12:25:47.129857: step: 32/463, loss: 0.0029103541746735573 2023-01-22 12:25:47.769470: step: 34/463, loss: 0.0645848885178566 2023-01-22 12:25:48.370928: step: 36/463, loss: 0.03229474648833275 2023-01-22 12:25:48.989698: step: 38/463, loss: 0.0023157589603215456 2023-01-22 12:25:49.511367: step: 40/463, loss: 0.02975877933204174 2023-01-22 12:25:50.066725: step: 42/463, loss: 0.004943448584526777 2023-01-22 12:25:50.655181: step: 44/463, loss: 0.0011191468220204115 2023-01-22 12:25:51.250863: step: 46/463, loss: 0.0014116100501269102 2023-01-22 12:25:51.845864: step: 48/463, loss: 0.0008793671731837094 2023-01-22 12:25:52.491775: step: 50/463, loss: 1.3605868816375732 2023-01-22 12:25:53.072813: step: 52/463, loss: 0.02973097562789917 2023-01-22 12:25:53.688109: step: 54/463, loss: 0.0024194391444325447 2023-01-22 12:25:54.309422: step: 56/463, loss: 0.016222387552261353 2023-01-22 12:25:54.922976: step: 58/463, loss: 0.025964708998799324 2023-01-22 12:25:55.501795: step: 60/463, loss: 0.02551180124282837 2023-01-22 12:25:56.071791: step: 62/463, loss: 0.0044490871950984 2023-01-22 12:25:56.653746: step: 64/463, loss: 0.0005965310847386718 2023-01-22 12:25:57.281171: step: 66/463, loss: 0.019502514973282814 2023-01-22 12:25:57.938025: step: 68/463, loss: 0.08221524208784103 2023-01-22 12:25:58.572337: step: 70/463, loss: 0.008652446791529655 2023-01-22 12:25:59.292644: step: 72/463, loss: 0.018750254064798355 2023-01-22 12:25:59.895722: step: 74/463, loss: 0.01460979226976633 2023-01-22 12:26:00.549195: step: 76/463, loss: 0.019064562395215034 2023-01-22 12:26:01.137091: step: 78/463, loss: 0.7012366652488708 2023-01-22 12:26:01.790012: step: 80/463, loss: 0.0074412464164197445 2023-01-22 12:26:02.416727: step: 82/463, loss: 0.018528498709201813 2023-01-22 12:26:02.990186: step: 84/463, loss: 0.11513300985097885 2023-01-22 12:26:03.556880: step: 86/463, loss: 0.007294955663383007 2023-01-22 12:26:04.116756: step: 88/463, loss: 0.013756808824837208 2023-01-22 12:26:04.724043: step: 90/463, loss: 0.013349073007702827 2023-01-22 12:26:05.297508: step: 92/463, loss: 0.0031716362573206425 2023-01-22 12:26:05.902344: step: 94/463, loss: 0.04813292250037193 2023-01-22 12:26:06.626177: step: 96/463, loss: 2.49385404586792 2023-01-22 12:26:07.282650: step: 98/463, loss: 0.012211144901812077 2023-01-22 12:26:07.919484: step: 100/463, loss: 0.04815521091222763 2023-01-22 12:26:08.569754: step: 102/463, loss: 0.0015425365418195724 2023-01-22 12:26:09.180931: step: 104/463, loss: 0.0004060573410242796 2023-01-22 12:26:09.826114: step: 106/463, loss: 0.011621109209954739 2023-01-22 12:26:10.423487: step: 108/463, loss: 0.21835660934448242 2023-01-22 12:26:10.980429: step: 110/463, loss: 0.002669976558536291 2023-01-22 12:26:11.531425: step: 112/463, loss: 0.0043038190342485905 2023-01-22 12:26:12.122140: step: 114/463, loss: 0.36173519492149353 2023-01-22 12:26:12.705387: step: 116/463, loss: 0.0244632326066494 2023-01-22 12:26:13.326335: step: 118/463, loss: 0.004608567804098129 2023-01-22 12:26:13.950023: step: 120/463, loss: 0.045676980167627335 2023-01-22 12:26:14.532381: step: 122/463, loss: 0.008071781136095524 2023-01-22 12:26:15.134538: step: 124/463, loss: 0.0537240132689476 2023-01-22 12:26:15.698564: step: 126/463, loss: 0.022706232964992523 2023-01-22 12:26:16.308373: step: 128/463, loss: 0.08451388776302338 2023-01-22 12:26:16.857754: step: 130/463, loss: 0.005262545309960842 2023-01-22 12:26:17.493684: step: 132/463, loss: 0.018080025911331177 2023-01-22 12:26:18.066738: step: 134/463, loss: 0.12312135845422745 2023-01-22 12:26:18.638825: step: 136/463, loss: 0.016220148652791977 2023-01-22 12:26:19.242960: step: 138/463, loss: 0.01777135580778122 2023-01-22 12:26:19.827356: step: 140/463, loss: 0.005331530701369047 2023-01-22 12:26:20.451653: step: 142/463, loss: 0.09677737206220627 2023-01-22 12:26:21.039504: step: 144/463, loss: 0.26746582984924316 2023-01-22 12:26:21.634400: step: 146/463, loss: 0.007170252501964569 2023-01-22 12:26:22.306089: step: 148/463, loss: 0.001276511582545936 2023-01-22 12:26:22.952657: step: 150/463, loss: 0.07480557262897491 2023-01-22 12:26:23.618263: step: 152/463, loss: 0.017730310559272766 2023-01-22 12:26:24.225793: step: 154/463, loss: 0.054459985345602036 2023-01-22 12:26:24.840498: step: 156/463, loss: 0.03287053108215332 2023-01-22 12:26:25.525759: step: 158/463, loss: 0.08141745626926422 2023-01-22 12:26:26.144869: step: 160/463, loss: 0.09254834800958633 2023-01-22 12:26:26.757602: step: 162/463, loss: 0.0539998933672905 2023-01-22 12:26:27.355378: step: 164/463, loss: 0.0006075088167563081 2023-01-22 12:26:27.939473: step: 166/463, loss: 0.2539507746696472 2023-01-22 12:26:28.553302: step: 168/463, loss: 0.4138997495174408 2023-01-22 12:26:29.150856: step: 170/463, loss: 0.055280543863773346 2023-01-22 12:26:29.835356: step: 172/463, loss: 0.013681205920875072 2023-01-22 12:26:30.474752: step: 174/463, loss: 0.04357951879501343 2023-01-22 12:26:31.143303: step: 176/463, loss: 0.05259454995393753 2023-01-22 12:26:31.710114: step: 178/463, loss: 0.029853422194719315 2023-01-22 12:26:32.323204: step: 180/463, loss: 0.03497356176376343 2023-01-22 12:26:32.976193: step: 182/463, loss: 0.03157559037208557 2023-01-22 12:26:33.547408: step: 184/463, loss: 0.011175544932484627 2023-01-22 12:26:34.182384: step: 186/463, loss: 0.06846903264522552 2023-01-22 12:26:34.784322: step: 188/463, loss: 0.013638272881507874 2023-01-22 12:26:35.402370: step: 190/463, loss: 0.007281886879354715 2023-01-22 12:26:36.001797: step: 192/463, loss: 0.018111051991581917 2023-01-22 12:26:36.547077: step: 194/463, loss: 0.027280418202280998 2023-01-22 12:26:37.148393: step: 196/463, loss: 0.024845674633979797 2023-01-22 12:26:37.812261: step: 198/463, loss: 0.01672469824552536 2023-01-22 12:26:38.408137: step: 200/463, loss: 0.11909183114767075 2023-01-22 12:26:38.994012: step: 202/463, loss: 1.0849922895431519 2023-01-22 12:26:39.586338: step: 204/463, loss: 0.01760043203830719 2023-01-22 12:26:40.137568: step: 206/463, loss: 0.03172234073281288 2023-01-22 12:26:40.808540: step: 208/463, loss: 0.1025736853480339 2023-01-22 12:26:41.450625: step: 210/463, loss: 0.03677133843302727 2023-01-22 12:26:42.128994: step: 212/463, loss: 0.03349140286445618 2023-01-22 12:26:42.775437: step: 214/463, loss: 0.16007618606090546 2023-01-22 12:26:43.357669: step: 216/463, loss: 0.006242662202566862 2023-01-22 12:26:43.975128: step: 218/463, loss: 0.02215118333697319 2023-01-22 12:26:44.550788: step: 220/463, loss: 0.028634389862418175 2023-01-22 12:26:45.097862: step: 222/463, loss: 0.01322889793664217 2023-01-22 12:26:45.706244: step: 224/463, loss: 0.011774728074669838 2023-01-22 12:26:46.273659: step: 226/463, loss: 0.0705314353108406 2023-01-22 12:26:46.900688: step: 228/463, loss: 0.025055989623069763 2023-01-22 12:26:47.511721: step: 230/463, loss: 0.023890916258096695 2023-01-22 12:26:48.107999: step: 232/463, loss: 0.032594770193099976 2023-01-22 12:26:48.729673: step: 234/463, loss: 0.11032899469137192 2023-01-22 12:26:49.395076: step: 236/463, loss: 0.008940963074564934 2023-01-22 12:26:50.025547: step: 238/463, loss: 0.02393309399485588 2023-01-22 12:26:50.595926: step: 240/463, loss: 5.845835039508529e-05 2023-01-22 12:26:51.239335: step: 242/463, loss: 0.02399780973792076 2023-01-22 12:26:51.892897: step: 244/463, loss: 0.03954317048192024 2023-01-22 12:26:52.500129: step: 246/463, loss: 0.0921514555811882 2023-01-22 12:26:53.099315: step: 248/463, loss: 0.0035555388312786818 2023-01-22 12:26:53.698525: step: 250/463, loss: 0.008145295083522797 2023-01-22 12:26:54.300221: step: 252/463, loss: 0.08888237923383713 2023-01-22 12:26:54.970148: step: 254/463, loss: 0.16362272202968597 2023-01-22 12:26:55.645898: step: 256/463, loss: 0.0011100545525550842 2023-01-22 12:26:56.303754: step: 258/463, loss: 0.00926621351391077 2023-01-22 12:26:56.821240: step: 260/463, loss: 0.02183162048459053 2023-01-22 12:26:57.413735: step: 262/463, loss: 0.014006501995027065 2023-01-22 12:26:58.005786: step: 264/463, loss: 0.05721067637205124 2023-01-22 12:26:58.674249: step: 266/463, loss: 0.005617084912955761 2023-01-22 12:26:59.393178: step: 268/463, loss: 0.0004247408942319453 2023-01-22 12:26:59.978165: step: 270/463, loss: 0.022351911291480064 2023-01-22 12:27:00.562286: step: 272/463, loss: 0.07162696123123169 2023-01-22 12:27:01.184507: step: 274/463, loss: 0.09420433640480042 2023-01-22 12:27:01.748417: step: 276/463, loss: 0.052588921040296555 2023-01-22 12:27:02.333634: step: 278/463, loss: 0.05290602520108223 2023-01-22 12:27:03.017162: step: 280/463, loss: 0.0029987809248268604 2023-01-22 12:27:03.653787: step: 282/463, loss: 0.01152290590107441 2023-01-22 12:27:04.214228: step: 284/463, loss: 0.011828247457742691 2023-01-22 12:27:04.847087: step: 286/463, loss: 0.02860492654144764 2023-01-22 12:27:05.417318: step: 288/463, loss: 0.049362607300281525 2023-01-22 12:27:06.033734: step: 290/463, loss: 0.007479547057300806 2023-01-22 12:27:06.615151: step: 292/463, loss: 0.13382774591445923 2023-01-22 12:27:07.244709: step: 294/463, loss: 0.017522688955068588 2023-01-22 12:27:07.821072: step: 296/463, loss: 0.019173359498381615 2023-01-22 12:27:08.457829: step: 298/463, loss: 0.026090065017342567 2023-01-22 12:27:09.063751: step: 300/463, loss: 0.21056485176086426 2023-01-22 12:27:09.694811: step: 302/463, loss: 0.08407945930957794 2023-01-22 12:27:10.300454: step: 304/463, loss: 0.0062902262434363365 2023-01-22 12:27:10.888944: step: 306/463, loss: 0.02413555048406124 2023-01-22 12:27:11.420128: step: 308/463, loss: 0.0023375474847853184 2023-01-22 12:27:12.043486: step: 310/463, loss: 0.063972607254982 2023-01-22 12:27:12.697490: step: 312/463, loss: 0.024366719648241997 2023-01-22 12:27:13.315996: step: 314/463, loss: 0.05372471734881401 2023-01-22 12:27:13.910239: step: 316/463, loss: 0.012292707338929176 2023-01-22 12:27:14.487784: step: 318/463, loss: 0.055671609938144684 2023-01-22 12:27:15.063540: step: 320/463, loss: 0.0017355423187837005 2023-01-22 12:27:15.743597: step: 322/463, loss: 0.6892048120498657 2023-01-22 12:27:16.355637: step: 324/463, loss: 0.003017947543412447 2023-01-22 12:27:16.957207: step: 326/463, loss: 0.03591708466410637 2023-01-22 12:27:17.584144: step: 328/463, loss: 0.06136185675859451 2023-01-22 12:27:18.207692: step: 330/463, loss: 0.3679247200489044 2023-01-22 12:27:18.778650: step: 332/463, loss: 0.05334608629345894 2023-01-22 12:27:19.353467: step: 334/463, loss: 0.012763542123138905 2023-01-22 12:27:19.956084: step: 336/463, loss: 0.02279418334364891 2023-01-22 12:27:20.581222: step: 338/463, loss: 0.24126674234867096 2023-01-22 12:27:21.236606: step: 340/463, loss: 0.024322649464011192 2023-01-22 12:27:21.811713: step: 342/463, loss: 0.07952488958835602 2023-01-22 12:27:22.431677: step: 344/463, loss: 0.0027727605774998665 2023-01-22 12:27:23.118417: step: 346/463, loss: 0.01889275386929512 2023-01-22 12:27:23.792149: step: 348/463, loss: 0.02207927778363228 2023-01-22 12:27:24.370381: step: 350/463, loss: 0.05525800958275795 2023-01-22 12:27:25.009215: step: 352/463, loss: 0.01295787189155817 2023-01-22 12:27:25.637438: step: 354/463, loss: 0.08429592847824097 2023-01-22 12:27:26.234921: step: 356/463, loss: 0.0337245836853981 2023-01-22 12:27:26.816971: step: 358/463, loss: 0.007017158437520266 2023-01-22 12:27:27.427435: step: 360/463, loss: 0.018636619672179222 2023-01-22 12:27:27.968940: step: 362/463, loss: 0.03909965604543686 2023-01-22 12:27:28.548605: step: 364/463, loss: 0.037853535264730453 2023-01-22 12:27:29.197796: step: 366/463, loss: 0.047538187354803085 2023-01-22 12:27:29.808812: step: 368/463, loss: 0.013282271102070808 2023-01-22 12:27:30.447199: step: 370/463, loss: 0.007747524883598089 2023-01-22 12:27:30.998126: step: 372/463, loss: 0.04123048111796379 2023-01-22 12:27:31.623164: step: 374/463, loss: 0.01996202953159809 2023-01-22 12:27:32.189980: step: 376/463, loss: 0.01084333285689354 2023-01-22 12:27:32.793917: step: 378/463, loss: 0.006899149622768164 2023-01-22 12:27:33.437011: step: 380/463, loss: 0.00422108406201005 2023-01-22 12:27:34.092350: step: 382/463, loss: 0.07936445623636246 2023-01-22 12:27:34.832554: step: 384/463, loss: 0.04219507798552513 2023-01-22 12:27:35.420871: step: 386/463, loss: 0.19513475894927979 2023-01-22 12:27:36.007384: step: 388/463, loss: 0.006555625703185797 2023-01-22 12:27:36.632936: step: 390/463, loss: 0.018704913556575775 2023-01-22 12:27:37.178713: step: 392/463, loss: 0.0010827347869053483 2023-01-22 12:27:37.757364: step: 394/463, loss: 0.013838604092597961 2023-01-22 12:27:38.290708: step: 396/463, loss: 0.0020397063344717026 2023-01-22 12:27:38.977241: step: 398/463, loss: 0.05091743916273117 2023-01-22 12:27:39.634394: step: 400/463, loss: 0.04766228422522545 2023-01-22 12:27:40.200366: step: 402/463, loss: 0.04693634435534477 2023-01-22 12:27:40.780348: step: 404/463, loss: 0.01709311082959175 2023-01-22 12:27:41.367084: step: 406/463, loss: 0.046637218445539474 2023-01-22 12:27:41.983087: step: 408/463, loss: 0.009840174578130245 2023-01-22 12:27:42.555347: step: 410/463, loss: 0.06984955072402954 2023-01-22 12:27:43.164139: step: 412/463, loss: 0.0005389424622990191 2023-01-22 12:27:43.835799: step: 414/463, loss: 0.02151060476899147 2023-01-22 12:27:44.421828: step: 416/463, loss: 0.004965255036950111 2023-01-22 12:27:45.065612: step: 418/463, loss: 0.01524600200355053 2023-01-22 12:27:45.645320: step: 420/463, loss: 0.01070441398769617 2023-01-22 12:27:46.253496: step: 422/463, loss: 0.030057663097977638 2023-01-22 12:27:46.878150: step: 424/463, loss: 0.20159301161766052 2023-01-22 12:27:47.470379: step: 426/463, loss: 0.000649317807983607 2023-01-22 12:27:48.067557: step: 428/463, loss: 0.015819711610674858 2023-01-22 12:27:48.682499: step: 430/463, loss: 0.006689096800982952 2023-01-22 12:27:49.257508: step: 432/463, loss: 0.01107083074748516 2023-01-22 12:27:49.892812: step: 434/463, loss: 0.0056923553347587585 2023-01-22 12:27:50.503853: step: 436/463, loss: 0.17163553833961487 2023-01-22 12:27:51.106349: step: 438/463, loss: 0.008319463580846786 2023-01-22 12:27:51.742610: step: 440/463, loss: 0.015703219920396805 2023-01-22 12:27:52.447917: step: 442/463, loss: 0.02817375399172306 2023-01-22 12:27:53.149011: step: 444/463, loss: 0.019657757133245468 2023-01-22 12:27:53.807563: step: 446/463, loss: 0.020900066941976547 2023-01-22 12:27:54.459522: step: 448/463, loss: 0.00202009966596961 2023-01-22 12:27:55.038354: step: 450/463, loss: 0.017271751537919044 2023-01-22 12:27:55.671123: step: 452/463, loss: 0.03575164079666138 2023-01-22 12:27:56.303722: step: 454/463, loss: 0.18235991895198822 2023-01-22 12:27:56.931083: step: 456/463, loss: 0.02527477778494358 2023-01-22 12:27:57.524939: step: 458/463, loss: 0.0724487230181694 2023-01-22 12:27:58.138529: step: 460/463, loss: 0.011069240048527718 2023-01-22 12:27:58.855525: step: 462/463, loss: 0.03572274371981621 2023-01-22 12:27:59.443588: step: 464/463, loss: 0.0008663491462357342 2023-01-22 12:28:00.062069: step: 466/463, loss: 0.04608675464987755 2023-01-22 12:28:00.643838: step: 468/463, loss: 0.0616801381111145 2023-01-22 12:28:01.284856: step: 470/463, loss: 0.007470866199582815 2023-01-22 12:28:01.910218: step: 472/463, loss: 0.00344840157777071 2023-01-22 12:28:02.468809: step: 474/463, loss: 0.10380856692790985 2023-01-22 12:28:03.191293: step: 476/463, loss: 0.0004235657979734242 2023-01-22 12:28:03.850987: step: 478/463, loss: 0.017793817445635796 2023-01-22 12:28:04.491503: step: 480/463, loss: 0.003294572001323104 2023-01-22 12:28:05.097019: step: 482/463, loss: 0.06237761676311493 2023-01-22 12:28:05.690472: step: 484/463, loss: 0.06818075478076935 2023-01-22 12:28:06.269293: step: 486/463, loss: 0.1589372605085373 2023-01-22 12:28:06.903945: step: 488/463, loss: 0.08697634190320969 2023-01-22 12:28:07.500225: step: 490/463, loss: 0.07513870298862457 2023-01-22 12:28:08.068255: step: 492/463, loss: 0.010979725979268551 2023-01-22 12:28:08.732235: step: 494/463, loss: 0.08214328438043594 2023-01-22 12:28:09.380917: step: 496/463, loss: 0.0057365428656339645 2023-01-22 12:28:09.957634: step: 498/463, loss: 0.02907068096101284 2023-01-22 12:28:10.558481: step: 500/463, loss: 0.17985635995864868 2023-01-22 12:28:11.143083: step: 502/463, loss: 0.02707492746412754 2023-01-22 12:28:11.765310: step: 504/463, loss: 0.045984383672475815 2023-01-22 12:28:12.369966: step: 506/463, loss: 0.02451905980706215 2023-01-22 12:28:12.956587: step: 508/463, loss: 0.5614974498748779 2023-01-22 12:28:13.556718: step: 510/463, loss: 0.0032363124191761017 2023-01-22 12:28:14.131221: step: 512/463, loss: 1.7188669443130493 2023-01-22 12:28:14.711239: step: 514/463, loss: 0.06953755021095276 2023-01-22 12:28:15.315110: step: 516/463, loss: 0.013105669990181923 2023-01-22 12:28:15.991565: step: 518/463, loss: 0.2076960951089859 2023-01-22 12:28:16.548573: step: 520/463, loss: 0.02279459685087204 2023-01-22 12:28:17.138993: step: 522/463, loss: 0.02158598229289055 2023-01-22 12:28:17.713988: step: 524/463, loss: 0.038456112146377563 2023-01-22 12:28:18.266526: step: 526/463, loss: 0.37574759125709534 2023-01-22 12:28:18.818912: step: 528/463, loss: 0.025516856461763382 2023-01-22 12:28:19.480118: step: 530/463, loss: 0.013479000888764858 2023-01-22 12:28:20.108849: step: 532/463, loss: 0.015561453998088837 2023-01-22 12:28:20.641265: step: 534/463, loss: 0.012920614331960678 2023-01-22 12:28:21.251016: step: 536/463, loss: 0.03763693571090698 2023-01-22 12:28:21.876067: step: 538/463, loss: 0.025825072079896927 2023-01-22 12:28:22.426484: step: 540/463, loss: 0.015384487807750702 2023-01-22 12:28:23.050893: step: 542/463, loss: 0.06949108839035034 2023-01-22 12:28:23.695712: step: 544/463, loss: 0.0169373769313097 2023-01-22 12:28:24.297821: step: 546/463, loss: 0.0032806831877678633 2023-01-22 12:28:24.904161: step: 548/463, loss: 0.012654570862650871 2023-01-22 12:28:25.450882: step: 550/463, loss: 0.02029086835682392 2023-01-22 12:28:26.056707: step: 552/463, loss: 0.0030985043849796057 2023-01-22 12:28:26.678636: step: 554/463, loss: 0.08785047382116318 2023-01-22 12:28:27.300049: step: 556/463, loss: 0.01286221481859684 2023-01-22 12:28:27.950654: step: 558/463, loss: 0.007285997737199068 2023-01-22 12:28:28.498153: step: 560/463, loss: 0.004571579862385988 2023-01-22 12:28:29.172129: step: 562/463, loss: 0.005525792948901653 2023-01-22 12:28:29.853878: step: 564/463, loss: 0.047876108437776566 2023-01-22 12:28:30.463376: step: 566/463, loss: 2.84997296333313 2023-01-22 12:28:31.051546: step: 568/463, loss: 0.00041170447366312146 2023-01-22 12:28:31.619874: step: 570/463, loss: 0.0005570273497141898 2023-01-22 12:28:32.160436: step: 572/463, loss: 0.4013631343841553 2023-01-22 12:28:32.786462: step: 574/463, loss: 0.13628098368644714 2023-01-22 12:28:33.405771: step: 576/463, loss: 0.17943322658538818 2023-01-22 12:28:34.019793: step: 578/463, loss: 0.02283461205661297 2023-01-22 12:28:34.647582: step: 580/463, loss: 0.3710094690322876 2023-01-22 12:28:35.219921: step: 582/463, loss: 0.10611604899168015 2023-01-22 12:28:35.870948: step: 584/463, loss: 0.013936948962509632 2023-01-22 12:28:36.555601: step: 586/463, loss: 0.05231943726539612 2023-01-22 12:28:37.220474: step: 588/463, loss: 0.0753253623843193 2023-01-22 12:28:37.814712: step: 590/463, loss: 0.001737541169859469 2023-01-22 12:28:38.434024: step: 592/463, loss: 0.0053075747564435005 2023-01-22 12:28:39.072706: step: 594/463, loss: 0.006139205768704414 2023-01-22 12:28:39.717178: step: 596/463, loss: 0.036545101553201675 2023-01-22 12:28:40.319555: step: 598/463, loss: 0.0047314902767539024 2023-01-22 12:28:40.868206: step: 600/463, loss: 0.01959819905459881 2023-01-22 12:28:41.515394: step: 602/463, loss: 0.1848900467157364 2023-01-22 12:28:42.146035: step: 604/463, loss: 0.03643307834863663 2023-01-22 12:28:42.715818: step: 606/463, loss: 0.030170494690537453 2023-01-22 12:28:43.360671: step: 608/463, loss: 0.022433185949921608 2023-01-22 12:28:43.938803: step: 610/463, loss: 0.008484968915581703 2023-01-22 12:28:44.498134: step: 612/463, loss: 0.0015296782366931438 2023-01-22 12:28:45.101129: step: 614/463, loss: 0.03289848938584328 2023-01-22 12:28:45.667482: step: 616/463, loss: 0.05979551747441292 2023-01-22 12:28:46.260775: step: 618/463, loss: 0.04051965847611427 2023-01-22 12:28:46.993931: step: 620/463, loss: 0.035239219665527344 2023-01-22 12:28:47.620415: step: 622/463, loss: 0.06116524338722229 2023-01-22 12:28:48.234402: step: 624/463, loss: 0.026527203619480133 2023-01-22 12:28:48.827030: step: 626/463, loss: 0.016044294461607933 2023-01-22 12:28:49.367605: step: 628/463, loss: 0.012501475401222706 2023-01-22 12:28:49.939307: step: 630/463, loss: 0.2921847701072693 2023-01-22 12:28:50.532724: step: 632/463, loss: 0.04030440375208855 2023-01-22 12:28:51.151981: step: 634/463, loss: 0.010312661528587341 2023-01-22 12:28:51.714513: step: 636/463, loss: 0.025754885748028755 2023-01-22 12:28:52.337853: step: 638/463, loss: 0.04597608745098114 2023-01-22 12:28:52.923469: step: 640/463, loss: 0.009888525120913982 2023-01-22 12:28:53.563869: step: 642/463, loss: 0.018977848812937737 2023-01-22 12:28:54.157017: step: 644/463, loss: 0.025373727083206177 2023-01-22 12:28:54.738901: step: 646/463, loss: 0.013036086224019527 2023-01-22 12:28:55.347988: step: 648/463, loss: 0.006871116813272238 2023-01-22 12:28:55.950883: step: 650/463, loss: 0.030983146280050278 2023-01-22 12:28:56.516701: step: 652/463, loss: 0.014170871116220951 2023-01-22 12:28:57.095197: step: 654/463, loss: 0.027246158570051193 2023-01-22 12:28:57.607510: step: 656/463, loss: 0.052943792194128036 2023-01-22 12:28:58.164643: step: 658/463, loss: 0.0004469984851311892 2023-01-22 12:28:58.725597: step: 660/463, loss: 0.011634543538093567 2023-01-22 12:28:59.325034: step: 662/463, loss: 0.009565292857587337 2023-01-22 12:28:59.969395: step: 664/463, loss: 0.021424278616905212 2023-01-22 12:29:00.560921: step: 666/463, loss: 0.06298372894525528 2023-01-22 12:29:01.176401: step: 668/463, loss: 0.0040954649448394775 2023-01-22 12:29:01.743518: step: 670/463, loss: 0.01814587414264679 2023-01-22 12:29:02.380814: step: 672/463, loss: 0.03335180506110191 2023-01-22 12:29:02.985400: step: 674/463, loss: 0.004504937678575516 2023-01-22 12:29:03.631917: step: 676/463, loss: 0.22467023134231567 2023-01-22 12:29:04.247881: step: 678/463, loss: 0.048772286623716354 2023-01-22 12:29:04.925059: step: 680/463, loss: 0.18958498537540436 2023-01-22 12:29:05.536302: step: 682/463, loss: 0.06526894122362137 2023-01-22 12:29:06.048495: step: 684/463, loss: 0.0037250046152621508 2023-01-22 12:29:06.621817: step: 686/463, loss: 0.02711670473217964 2023-01-22 12:29:07.191141: step: 688/463, loss: 0.02521621808409691 2023-01-22 12:29:07.745637: step: 690/463, loss: 0.009328854270279408 2023-01-22 12:29:08.337690: step: 692/463, loss: 0.009633393958210945 2023-01-22 12:29:08.966809: step: 694/463, loss: 0.007632388733327389 2023-01-22 12:29:09.534119: step: 696/463, loss: 0.010496960952877998 2023-01-22 12:29:10.285521: step: 698/463, loss: 0.014901124872267246 2023-01-22 12:29:10.908682: step: 700/463, loss: 0.007905877195298672 2023-01-22 12:29:11.503704: step: 702/463, loss: 0.05746883526444435 2023-01-22 12:29:12.112218: step: 704/463, loss: 0.036977849900722504 2023-01-22 12:29:12.743351: step: 706/463, loss: 0.029813863337039948 2023-01-22 12:29:13.313763: step: 708/463, loss: 0.027757974341511726 2023-01-22 12:29:13.878663: step: 710/463, loss: 0.027523649856448174 2023-01-22 12:29:14.454656: step: 712/463, loss: 0.15209656953811646 2023-01-22 12:29:15.128020: step: 714/463, loss: 0.0017175053944811225 2023-01-22 12:29:15.744609: step: 716/463, loss: 0.03918301686644554 2023-01-22 12:29:16.331085: step: 718/463, loss: 0.03300347179174423 2023-01-22 12:29:16.953791: step: 720/463, loss: 0.02014309912919998 2023-01-22 12:29:17.562858: step: 722/463, loss: 0.0004547167045529932 2023-01-22 12:29:18.100951: step: 724/463, loss: 0.017180675640702248 2023-01-22 12:29:18.689690: step: 726/463, loss: 0.04160762578248978 2023-01-22 12:29:19.281657: step: 728/463, loss: 0.027141336351633072 2023-01-22 12:29:19.871073: step: 730/463, loss: 0.015554307028651237 2023-01-22 12:29:20.453378: step: 732/463, loss: 0.10072067379951477 2023-01-22 12:29:21.030269: step: 734/463, loss: 0.00099479709751904 2023-01-22 12:29:21.608642: step: 736/463, loss: 0.07003404945135117 2023-01-22 12:29:22.259990: step: 738/463, loss: 0.11261841654777527 2023-01-22 12:29:22.851297: step: 740/463, loss: 0.010142209939658642 2023-01-22 12:29:23.424023: step: 742/463, loss: 0.01635134406387806 2023-01-22 12:29:23.981716: step: 744/463, loss: 0.005189536605030298 2023-01-22 12:29:24.583780: step: 746/463, loss: 0.007973375730216503 2023-01-22 12:29:25.122946: step: 748/463, loss: 0.033141378313302994 2023-01-22 12:29:25.765591: step: 750/463, loss: 0.022291144356131554 2023-01-22 12:29:26.433640: step: 752/463, loss: 0.04240809381008148 2023-01-22 12:29:27.019741: step: 754/463, loss: 0.02537102811038494 2023-01-22 12:29:27.595142: step: 756/463, loss: 0.017941009253263474 2023-01-22 12:29:28.212825: step: 758/463, loss: 0.009395868517458439 2023-01-22 12:29:28.896352: step: 760/463, loss: 0.2223188430070877 2023-01-22 12:29:29.555152: step: 762/463, loss: 0.1926819384098053 2023-01-22 12:29:30.098226: step: 764/463, loss: 0.0008233282715082169 2023-01-22 12:29:30.728649: step: 766/463, loss: 0.4056546688079834 2023-01-22 12:29:31.356852: step: 768/463, loss: 0.01469552330672741 2023-01-22 12:29:31.967300: step: 770/463, loss: 0.06593307852745056 2023-01-22 12:29:32.599026: step: 772/463, loss: 0.002064867876470089 2023-01-22 12:29:33.172108: step: 774/463, loss: 0.01166010182350874 2023-01-22 12:29:33.825604: step: 776/463, loss: 0.14776545763015747 2023-01-22 12:29:34.401498: step: 778/463, loss: 0.07784542441368103 2023-01-22 12:29:34.999694: step: 780/463, loss: 0.014094438403844833 2023-01-22 12:29:35.618864: step: 782/463, loss: 0.050974804908037186 2023-01-22 12:29:36.278496: step: 784/463, loss: 0.022339699789881706 2023-01-22 12:29:36.915981: step: 786/463, loss: 0.008100991137325764 2023-01-22 12:29:37.550604: step: 788/463, loss: 0.02096448838710785 2023-01-22 12:29:38.160373: step: 790/463, loss: 0.03886687755584717 2023-01-22 12:29:38.752170: step: 792/463, loss: 0.0005544420564547181 2023-01-22 12:29:39.307134: step: 794/463, loss: 0.0037787563633173704 2023-01-22 12:29:39.956816: step: 796/463, loss: 0.00595829589292407 2023-01-22 12:29:40.616498: step: 798/463, loss: 0.023152174428105354 2023-01-22 12:29:41.260308: step: 800/463, loss: 0.06829645484685898 2023-01-22 12:29:41.845460: step: 802/463, loss: 0.025881821289658546 2023-01-22 12:29:42.424257: step: 804/463, loss: 0.08149004727602005 2023-01-22 12:29:43.137932: step: 806/463, loss: 0.0035178614780306816 2023-01-22 12:29:43.790626: step: 808/463, loss: 0.010716564022004604 2023-01-22 12:29:44.387838: step: 810/463, loss: 0.11442716419696808 2023-01-22 12:29:45.053344: step: 812/463, loss: 0.003705686191096902 2023-01-22 12:29:45.615976: step: 814/463, loss: 0.03979633376002312 2023-01-22 12:29:46.232429: step: 816/463, loss: 0.12115366011857986 2023-01-22 12:29:46.750994: step: 818/463, loss: 0.0043911864049732685 2023-01-22 12:29:47.356475: step: 820/463, loss: 0.0011728814570233226 2023-01-22 12:29:47.964806: step: 822/463, loss: 0.18231798708438873 2023-01-22 12:29:48.449858: step: 824/463, loss: 0.012082516215741634 2023-01-22 12:29:49.020114: step: 826/463, loss: 0.05960536748170853 2023-01-22 12:29:49.706631: step: 828/463, loss: 0.02781391330063343 2023-01-22 12:29:50.330100: step: 830/463, loss: 0.02283107303082943 2023-01-22 12:29:50.962233: step: 832/463, loss: 0.03302788361907005 2023-01-22 12:29:51.701051: step: 834/463, loss: 0.018646804615855217 2023-01-22 12:29:52.334646: step: 836/463, loss: 0.0058774747885763645 2023-01-22 12:29:52.975080: step: 838/463, loss: 0.01898062415421009 2023-01-22 12:29:53.668831: step: 840/463, loss: 0.006672234740108252 2023-01-22 12:29:54.235781: step: 842/463, loss: 0.037453487515449524 2023-01-22 12:29:54.878642: step: 844/463, loss: 0.07097356021404266 2023-01-22 12:29:55.527028: step: 846/463, loss: 0.0280532855540514 2023-01-22 12:29:56.175129: step: 848/463, loss: 0.09504487365484238 2023-01-22 12:29:56.834897: step: 850/463, loss: 0.03309900313615799 2023-01-22 12:29:57.370408: step: 852/463, loss: 0.028237462043762207 2023-01-22 12:29:57.988940: step: 854/463, loss: 0.009738100692629814 2023-01-22 12:29:58.596138: step: 856/463, loss: 0.006108644884079695 2023-01-22 12:29:59.235044: step: 858/463, loss: 0.05223897472023964 2023-01-22 12:29:59.966463: step: 860/463, loss: 0.05805351957678795 2023-01-22 12:30:00.563187: step: 862/463, loss: 0.028307832777500153 2023-01-22 12:30:01.234014: step: 864/463, loss: 0.015082805417478085 2023-01-22 12:30:01.849713: step: 866/463, loss: 0.011649715714156628 2023-01-22 12:30:02.538896: step: 868/463, loss: 0.004242930095642805 2023-01-22 12:30:03.151789: step: 870/463, loss: 0.04554912820458412 2023-01-22 12:30:03.721912: step: 872/463, loss: 0.014860392548143864 2023-01-22 12:30:04.340356: step: 874/463, loss: 0.06247008591890335 2023-01-22 12:30:05.011005: step: 876/463, loss: 0.07359500229358673 2023-01-22 12:30:05.682592: step: 878/463, loss: 0.017384247854351997 2023-01-22 12:30:06.332319: step: 880/463, loss: 0.20369195938110352 2023-01-22 12:30:06.943291: step: 882/463, loss: 0.03452306240797043 2023-01-22 12:30:07.566232: step: 884/463, loss: 0.002270209603011608 2023-01-22 12:30:08.268539: step: 886/463, loss: 0.011534066870808601 2023-01-22 12:30:08.840751: step: 888/463, loss: 0.0021770147141069174 2023-01-22 12:30:09.477119: step: 890/463, loss: 0.004283386282622814 2023-01-22 12:30:10.032973: step: 892/463, loss: 0.01173703558743 2023-01-22 12:30:10.616513: step: 894/463, loss: 0.005979097448289394 2023-01-22 12:30:11.221509: step: 896/463, loss: 0.004212734289467335 2023-01-22 12:30:11.828916: step: 898/463, loss: 0.016311684623360634 2023-01-22 12:30:12.457941: step: 900/463, loss: 0.002907142508774996 2023-01-22 12:30:13.059137: step: 902/463, loss: 0.035801492631435394 2023-01-22 12:30:13.645489: step: 904/463, loss: 0.01777712069451809 2023-01-22 12:30:14.303368: step: 906/463, loss: 0.07303626835346222 2023-01-22 12:30:15.040433: step: 908/463, loss: 0.011776966974139214 2023-01-22 12:30:15.665514: step: 910/463, loss: 0.010873616673052311 2023-01-22 12:30:16.246081: step: 912/463, loss: 0.03174722567200661 2023-01-22 12:30:16.894559: step: 914/463, loss: 0.016076233237981796 2023-01-22 12:30:17.491928: step: 916/463, loss: 0.014363965019583702 2023-01-22 12:30:18.087077: step: 918/463, loss: 0.036554571241140366 2023-01-22 12:30:18.746400: step: 920/463, loss: 0.015861066058278084 2023-01-22 12:30:19.377910: step: 922/463, loss: 0.012140725739300251 2023-01-22 12:30:19.941407: step: 924/463, loss: 0.032698407769203186 2023-01-22 12:30:20.537783: step: 926/463, loss: 0.013550748117268085 ================================================== Loss: 0.068 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3185845481466289, 'r': 0.35062436038907924, 'f1': 0.33383746689258315}, 'combined': 0.24598550192085072, 'epoch': 27} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3261768755389279, 'r': 0.40142540105388275, 'f1': 0.3599100423128343}, 'combined': 0.27897333423291465, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28869180484693874, 'r': 0.3681231363513147, 'f1': 0.3236044918384368}, 'combined': 0.23844541503884817, 'epoch': 27} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3131684681725742, 'r': 0.4081552278205058, 'f1': 0.3544077317387951}, 'combined': 0.27470838536691294, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28452286822603506, 'r': 0.35038964227456687, 'f1': 0.3140396963923414}, 'combined': 0.23139767102593578, 'epoch': 27} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.31195147718321864, 'r': 0.3971073491716524, 'f1': 0.34941592874950084}, 'combined': 0.27083914094458916, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2391304347826087, 'r': 0.3142857142857143, 'f1': 0.271604938271605}, 'combined': 0.18106995884773663, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2682926829268293, 'r': 0.4782608695652174, 'f1': 0.34375000000000006}, 'combined': 0.17187500000000003, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34523809523809523, 'r': 0.25, 'f1': 0.29}, 'combined': 0.1933333333333333, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:32:57.424614: step: 2/463, loss: 0.29374492168426514 2023-01-22 12:32:58.054175: step: 4/463, loss: 0.008950339630246162 2023-01-22 12:32:58.556683: step: 6/463, loss: 0.0025884020142257214 2023-01-22 12:32:59.128124: step: 8/463, loss: 0.01560173463076353 2023-01-22 12:32:59.684612: step: 10/463, loss: 0.0012035038089379668 2023-01-22 12:33:00.233982: step: 12/463, loss: 0.013082488439977169 2023-01-22 12:33:00.871821: step: 14/463, loss: 0.0173418577760458 2023-01-22 12:33:01.586498: step: 16/463, loss: 0.030459564179182053 2023-01-22 12:33:02.165891: step: 18/463, loss: 0.0023555199149996042 2023-01-22 12:33:02.754276: step: 20/463, loss: 0.017555763944983482 2023-01-22 12:33:03.395149: step: 22/463, loss: 3.182292938232422 2023-01-22 12:33:04.019173: step: 24/463, loss: 0.0032870571594685316 2023-01-22 12:33:04.749432: step: 26/463, loss: 0.04132572188973427 2023-01-22 12:33:05.341502: step: 28/463, loss: 0.006120634730905294 2023-01-22 12:33:05.914577: step: 30/463, loss: 0.10707765817642212 2023-01-22 12:33:06.523725: step: 32/463, loss: 0.011916041374206543 2023-01-22 12:33:07.156821: step: 34/463, loss: 0.011357552371919155 2023-01-22 12:33:07.753722: step: 36/463, loss: 0.04864846169948578 2023-01-22 12:33:08.388213: step: 38/463, loss: 0.18586254119873047 2023-01-22 12:33:08.989899: step: 40/463, loss: 0.07336920499801636 2023-01-22 12:33:09.575081: step: 42/463, loss: 0.01845753751695156 2023-01-22 12:33:10.167900: step: 44/463, loss: 0.021225254982709885 2023-01-22 12:33:10.718045: step: 46/463, loss: 0.002981279045343399 2023-01-22 12:33:11.227556: step: 48/463, loss: 0.0034501601476222277 2023-01-22 12:33:11.743267: step: 50/463, loss: 0.025557447224855423 2023-01-22 12:33:12.378176: step: 52/463, loss: 0.005544594023376703 2023-01-22 12:33:13.045838: step: 54/463, loss: 0.022663110867142677 2023-01-22 12:33:13.682768: step: 56/463, loss: 0.03357920050621033 2023-01-22 12:33:14.345146: step: 58/463, loss: 0.03633473441004753 2023-01-22 12:33:14.989346: step: 60/463, loss: 0.041334718465805054 2023-01-22 12:33:15.615403: step: 62/463, loss: 0.00767884636297822 2023-01-22 12:33:16.188983: step: 64/463, loss: 0.030544543638825417 2023-01-22 12:33:16.798600: step: 66/463, loss: 0.024337437003850937 2023-01-22 12:33:17.424887: step: 68/463, loss: 0.030278237536549568 2023-01-22 12:33:18.052613: step: 70/463, loss: 0.004849262069910765 2023-01-22 12:33:18.663152: step: 72/463, loss: 0.003848511492833495 2023-01-22 12:33:19.243609: step: 74/463, loss: 0.01226518303155899 2023-01-22 12:33:19.898900: step: 76/463, loss: 0.04755386337637901 2023-01-22 12:33:20.538344: step: 78/463, loss: 0.033736661076545715 2023-01-22 12:33:21.124351: step: 80/463, loss: 0.007528581656515598 2023-01-22 12:33:21.707693: step: 82/463, loss: 0.06172417104244232 2023-01-22 12:33:22.288954: step: 84/463, loss: 0.05943514406681061 2023-01-22 12:33:22.979468: step: 86/463, loss: 0.0009893669048324227 2023-01-22 12:33:23.710135: step: 88/463, loss: 0.0063304295763373375 2023-01-22 12:33:24.269410: step: 90/463, loss: 0.007008117623627186 2023-01-22 12:33:24.913957: step: 92/463, loss: 0.06716267019510269 2023-01-22 12:33:25.497690: step: 94/463, loss: 0.007413616869598627 2023-01-22 12:33:26.161119: step: 96/463, loss: 0.12252495437860489 2023-01-22 12:33:26.776339: step: 98/463, loss: 0.0070847985334694386 2023-01-22 12:33:27.334982: step: 100/463, loss: 0.020843129605054855 2023-01-22 12:33:27.943773: step: 102/463, loss: 0.005596775561571121 2023-01-22 12:33:28.503649: step: 104/463, loss: 0.007876403629779816 2023-01-22 12:33:29.273472: step: 106/463, loss: 4.0363924199482426e-05 2023-01-22 12:33:29.963042: step: 108/463, loss: 0.07214424014091492 2023-01-22 12:33:30.615121: step: 110/463, loss: 0.005892504006624222 2023-01-22 12:33:31.263374: step: 112/463, loss: 0.031808339059352875 2023-01-22 12:33:31.867383: step: 114/463, loss: 0.016272246837615967 2023-01-22 12:33:32.485933: step: 116/463, loss: 0.030042799189686775 2023-01-22 12:33:33.131430: step: 118/463, loss: 0.01700497791171074 2023-01-22 12:33:33.755740: step: 120/463, loss: 0.013594040647149086 2023-01-22 12:33:34.386919: step: 122/463, loss: 0.010613398626446724 2023-01-22 12:33:34.985648: step: 124/463, loss: 0.007190784905105829 2023-01-22 12:33:35.636721: step: 126/463, loss: 0.011261575855314732 2023-01-22 12:33:36.308604: step: 128/463, loss: 0.006488234270364046 2023-01-22 12:33:36.878240: step: 130/463, loss: 0.06378525495529175 2023-01-22 12:33:37.472574: step: 132/463, loss: 0.01844719611108303 2023-01-22 12:33:38.064157: step: 134/463, loss: 0.0003200969658792019 2023-01-22 12:33:38.687667: step: 136/463, loss: 0.04771817475557327 2023-01-22 12:33:39.315083: step: 138/463, loss: 0.02819829247891903 2023-01-22 12:33:39.979953: step: 140/463, loss: 0.0033267666585743427 2023-01-22 12:33:40.564759: step: 142/463, loss: 0.03953809663653374 2023-01-22 12:33:41.201878: step: 144/463, loss: 0.04431101679801941 2023-01-22 12:33:41.791686: step: 146/463, loss: 0.005385186523199081 2023-01-22 12:33:42.323198: step: 148/463, loss: 0.0013555175391957164 2023-01-22 12:33:42.852593: step: 150/463, loss: 0.003086766693741083 2023-01-22 12:33:43.480908: step: 152/463, loss: 0.06461112946271896 2023-01-22 12:33:44.141250: step: 154/463, loss: 0.011391893960535526 2023-01-22 12:33:44.730907: step: 156/463, loss: 0.0019549208227545023 2023-01-22 12:33:45.335712: step: 158/463, loss: 0.053492654114961624 2023-01-22 12:33:45.934905: step: 160/463, loss: 0.0008113268995657563 2023-01-22 12:33:46.524776: step: 162/463, loss: 0.004094821400940418 2023-01-22 12:33:47.128140: step: 164/463, loss: 0.21801036596298218 2023-01-22 12:33:47.729777: step: 166/463, loss: 0.01630508340895176 2023-01-22 12:33:48.332018: step: 168/463, loss: 0.0029705564957112074 2023-01-22 12:33:48.927556: step: 170/463, loss: 0.23823057115077972 2023-01-22 12:33:49.484734: step: 172/463, loss: 0.01696964167058468 2023-01-22 12:33:50.060669: step: 174/463, loss: 0.0002627323556225747 2023-01-22 12:33:50.656920: step: 176/463, loss: 0.03307105600833893 2023-01-22 12:33:51.221908: step: 178/463, loss: 0.31701192259788513 2023-01-22 12:33:51.857705: step: 180/463, loss: 0.003947002813220024 2023-01-22 12:33:52.441458: step: 182/463, loss: 0.34261614084243774 2023-01-22 12:33:53.061982: step: 184/463, loss: 0.0035143662244081497 2023-01-22 12:33:53.747382: step: 186/463, loss: 0.013044084422290325 2023-01-22 12:33:54.363895: step: 188/463, loss: 0.0020450023002922535 2023-01-22 12:33:54.979588: step: 190/463, loss: 0.018712855875492096 2023-01-22 12:33:55.604411: step: 192/463, loss: 0.01126101054251194 2023-01-22 12:33:56.198436: step: 194/463, loss: 0.006964448373764753 2023-01-22 12:33:56.776992: step: 196/463, loss: 0.02322833426296711 2023-01-22 12:33:57.356392: step: 198/463, loss: 0.005938366986811161 2023-01-22 12:33:57.925523: step: 200/463, loss: 0.0042057582177221775 2023-01-22 12:33:58.531630: step: 202/463, loss: 0.028879977762699127 2023-01-22 12:33:59.252053: step: 204/463, loss: 0.012913842685520649 2023-01-22 12:33:59.888035: step: 206/463, loss: 0.009799282997846603 2023-01-22 12:34:00.412186: step: 208/463, loss: 0.03414357081055641 2023-01-22 12:34:01.039192: step: 210/463, loss: 0.14262324571609497 2023-01-22 12:34:01.647627: step: 212/463, loss: 0.029071707278490067 2023-01-22 12:34:02.186481: step: 214/463, loss: 0.00010934104648185894 2023-01-22 12:34:02.798463: step: 216/463, loss: 0.07483761012554169 2023-01-22 12:34:03.479132: step: 218/463, loss: 0.01736161857843399 2023-01-22 12:34:04.076200: step: 220/463, loss: 0.0006832100916653872 2023-01-22 12:34:04.680125: step: 222/463, loss: 0.04779037833213806 2023-01-22 12:34:05.294922: step: 224/463, loss: 0.006258423440158367 2023-01-22 12:34:05.924825: step: 226/463, loss: 0.012496327050030231 2023-01-22 12:34:06.534110: step: 228/463, loss: 0.02091159299015999 2023-01-22 12:34:07.174333: step: 230/463, loss: 0.009413516148924828 2023-01-22 12:34:07.757932: step: 232/463, loss: 0.003302746918052435 2023-01-22 12:34:08.367886: step: 234/463, loss: 0.019160253927111626 2023-01-22 12:34:08.905370: step: 236/463, loss: 0.009196114726364613 2023-01-22 12:34:09.577061: step: 238/463, loss: 0.004442290402948856 2023-01-22 12:34:10.149739: step: 240/463, loss: 0.028344832360744476 2023-01-22 12:34:10.770235: step: 242/463, loss: 0.0005288837128318846 2023-01-22 12:34:11.387789: step: 244/463, loss: 0.03255986422300339 2023-01-22 12:34:11.967955: step: 246/463, loss: 0.05627922713756561 2023-01-22 12:34:12.572673: step: 248/463, loss: 0.21724070608615875 2023-01-22 12:34:13.163774: step: 250/463, loss: 0.025468379259109497 2023-01-22 12:34:13.847245: step: 252/463, loss: 0.03477541729807854 2023-01-22 12:34:14.535351: step: 254/463, loss: 0.009381677024066448 2023-01-22 12:34:15.173761: step: 256/463, loss: 0.02722826972603798 2023-01-22 12:34:15.800536: step: 258/463, loss: 0.01369366142898798 2023-01-22 12:34:16.522005: step: 260/463, loss: 0.00316370720975101 2023-01-22 12:34:17.106721: step: 262/463, loss: 0.009699487127363682 2023-01-22 12:34:17.747145: step: 264/463, loss: 0.018967123702168465 2023-01-22 12:34:18.361778: step: 266/463, loss: 0.013150692917406559 2023-01-22 12:34:18.932604: step: 268/463, loss: 0.0017656952841207385 2023-01-22 12:34:19.496192: step: 270/463, loss: 0.005274574737995863 2023-01-22 12:34:20.116087: step: 272/463, loss: 0.04965560510754585 2023-01-22 12:34:20.680414: step: 274/463, loss: 0.0036977967247366905 2023-01-22 12:34:21.273367: step: 276/463, loss: 0.02303214557468891 2023-01-22 12:34:21.858027: step: 278/463, loss: 0.0029469032306224108 2023-01-22 12:34:22.490879: step: 280/463, loss: 0.02842818759381771 2023-01-22 12:34:23.048506: step: 282/463, loss: 0.007984843105077744 2023-01-22 12:34:23.674336: step: 284/463, loss: 0.00031438929727301 2023-01-22 12:34:24.304981: step: 286/463, loss: 0.00906501803547144 2023-01-22 12:34:24.871638: step: 288/463, loss: 0.0017406274564564228 2023-01-22 12:34:25.441310: step: 290/463, loss: 0.036236152052879333 2023-01-22 12:34:26.019416: step: 292/463, loss: 0.004267120733857155 2023-01-22 12:34:26.627557: step: 294/463, loss: 0.011889545246958733 2023-01-22 12:34:27.264028: step: 296/463, loss: 0.008746356703341007 2023-01-22 12:34:27.966902: step: 298/463, loss: 0.02594771794974804 2023-01-22 12:34:28.596256: step: 300/463, loss: 0.0295711699873209 2023-01-22 12:34:29.189778: step: 302/463, loss: 0.06577367335557938 2023-01-22 12:34:29.841339: step: 304/463, loss: 0.007691263686865568 2023-01-22 12:34:30.468192: step: 306/463, loss: 0.026845639571547508 2023-01-22 12:34:31.109812: step: 308/463, loss: 0.0009184012887999415 2023-01-22 12:34:31.711510: step: 310/463, loss: 0.04784321039915085 2023-01-22 12:34:32.415822: step: 312/463, loss: 0.8103326559066772 2023-01-22 12:34:33.030162: step: 314/463, loss: 0.04451471194624901 2023-01-22 12:34:33.637318: step: 316/463, loss: 0.20168262720108032 2023-01-22 12:34:34.227121: step: 318/463, loss: 0.007925400510430336 2023-01-22 12:34:34.842403: step: 320/463, loss: 0.01127647515386343 2023-01-22 12:34:35.506224: step: 322/463, loss: 0.08415776491165161 2023-01-22 12:34:36.131635: step: 324/463, loss: 0.03440093994140625 2023-01-22 12:34:36.734196: step: 326/463, loss: 0.05367375910282135 2023-01-22 12:34:37.266310: step: 328/463, loss: 0.0526382252573967 2023-01-22 12:34:37.862738: step: 330/463, loss: 0.026704581454396248 2023-01-22 12:34:38.476425: step: 332/463, loss: 0.13789011538028717 2023-01-22 12:34:39.072988: step: 334/463, loss: 0.036408569663763046 2023-01-22 12:34:39.757609: step: 336/463, loss: 0.013833659701049328 2023-01-22 12:34:40.315642: step: 338/463, loss: 0.008015003986656666 2023-01-22 12:34:40.937857: step: 340/463, loss: 0.04638589173555374 2023-01-22 12:34:41.534782: step: 342/463, loss: 0.011765706352889538 2023-01-22 12:34:42.150891: step: 344/463, loss: 0.004654136952012777 2023-01-22 12:34:42.716235: step: 346/463, loss: 0.01899045892059803 2023-01-22 12:34:43.247990: step: 348/463, loss: 0.02097213640809059 2023-01-22 12:34:43.890083: step: 350/463, loss: 0.014730531722307205 2023-01-22 12:34:44.449314: step: 352/463, loss: 0.010041667148470879 2023-01-22 12:34:45.048104: step: 354/463, loss: 0.03531422093510628 2023-01-22 12:34:45.628194: step: 356/463, loss: 0.009784921072423458 2023-01-22 12:34:46.176231: step: 358/463, loss: 0.006354726385325193 2023-01-22 12:34:46.753626: step: 360/463, loss: 0.04932844638824463 2023-01-22 12:34:47.460585: step: 362/463, loss: 0.04441177099943161 2023-01-22 12:34:48.087188: step: 364/463, loss: 0.3677092492580414 2023-01-22 12:34:48.706711: step: 366/463, loss: 0.19194917380809784 2023-01-22 12:34:49.475547: step: 368/463, loss: 0.0059125991538167 2023-01-22 12:34:50.037997: step: 370/463, loss: 0.05111651495099068 2023-01-22 12:34:50.599588: step: 372/463, loss: 0.029977114871144295 2023-01-22 12:34:51.192252: step: 374/463, loss: 0.03323173522949219 2023-01-22 12:34:51.815449: step: 376/463, loss: 0.008927146904170513 2023-01-22 12:34:52.438482: step: 378/463, loss: 0.005805583670735359 2023-01-22 12:34:53.044850: step: 380/463, loss: 0.03207562118768692 2023-01-22 12:34:53.690162: step: 382/463, loss: 0.00576637452468276 2023-01-22 12:34:54.295612: step: 384/463, loss: 0.09505612403154373 2023-01-22 12:34:54.921144: step: 386/463, loss: 0.27356088161468506 2023-01-22 12:34:55.510688: step: 388/463, loss: 0.0012583467178046703 2023-01-22 12:34:56.094218: step: 390/463, loss: 0.020944418385624886 2023-01-22 12:34:56.693230: step: 392/463, loss: 0.10776685178279877 2023-01-22 12:34:57.262818: step: 394/463, loss: 0.009196275845170021 2023-01-22 12:34:57.890006: step: 396/463, loss: 0.01127785537391901 2023-01-22 12:34:58.520775: step: 398/463, loss: 0.014327704906463623 2023-01-22 12:34:59.139700: step: 400/463, loss: 0.008281395770609379 2023-01-22 12:34:59.760679: step: 402/463, loss: 0.0006575637962669134 2023-01-22 12:35:00.357774: step: 404/463, loss: 0.21968667209148407 2023-01-22 12:35:01.035737: step: 406/463, loss: 0.1207488626241684 2023-01-22 12:35:01.647851: step: 408/463, loss: 0.06901227682828903 2023-01-22 12:35:02.256198: step: 410/463, loss: 0.021813420578837395 2023-01-22 12:35:02.850447: step: 412/463, loss: 0.005912352818995714 2023-01-22 12:35:03.482261: step: 414/463, loss: 0.024966519325971603 2023-01-22 12:35:04.087961: step: 416/463, loss: 0.05034935846924782 2023-01-22 12:35:04.735612: step: 418/463, loss: 0.012702380307018757 2023-01-22 12:35:05.318912: step: 420/463, loss: 0.021927962079644203 2023-01-22 12:35:05.987578: step: 422/463, loss: 0.08018112182617188 2023-01-22 12:35:06.655695: step: 424/463, loss: 0.21924570202827454 2023-01-22 12:35:07.260457: step: 426/463, loss: 0.10328881442546844 2023-01-22 12:35:07.833429: step: 428/463, loss: 0.05289173871278763 2023-01-22 12:35:08.389100: step: 430/463, loss: 0.002373060444369912 2023-01-22 12:35:08.977114: step: 432/463, loss: 0.005188320763409138 2023-01-22 12:35:09.566309: step: 434/463, loss: 0.019843287765979767 2023-01-22 12:35:10.222664: step: 436/463, loss: 0.0008458858937956393 2023-01-22 12:35:10.855908: step: 438/463, loss: 0.06668374687433243 2023-01-22 12:35:11.462746: step: 440/463, loss: 0.037590451538562775 2023-01-22 12:35:11.960845: step: 442/463, loss: 0.028690610080957413 2023-01-22 12:35:12.536056: step: 444/463, loss: 0.017187584191560745 2023-01-22 12:35:13.157317: step: 446/463, loss: 0.08459227532148361 2023-01-22 12:35:13.697217: step: 448/463, loss: 0.07163655757904053 2023-01-22 12:35:14.213027: step: 450/463, loss: 0.005111805163323879 2023-01-22 12:35:14.818760: step: 452/463, loss: 0.01331639476120472 2023-01-22 12:35:15.430951: step: 454/463, loss: 0.009346513077616692 2023-01-22 12:35:15.999333: step: 456/463, loss: 0.020700331777334213 2023-01-22 12:35:16.656256: step: 458/463, loss: 0.00588577426970005 2023-01-22 12:35:17.245659: step: 460/463, loss: 0.0042933207005262375 2023-01-22 12:35:17.915015: step: 462/463, loss: 0.023354649543762207 2023-01-22 12:35:18.503042: step: 464/463, loss: 0.5120725035667419 2023-01-22 12:35:19.041178: step: 466/463, loss: 0.0026860798243433237 2023-01-22 12:35:19.663227: step: 468/463, loss: 0.0032239265274256468 2023-01-22 12:35:20.348609: step: 470/463, loss: 0.059078581631183624 2023-01-22 12:35:20.971146: step: 472/463, loss: 0.011114481836557388 2023-01-22 12:35:21.551216: step: 474/463, loss: 0.00791896041482687 2023-01-22 12:35:22.274090: step: 476/463, loss: 0.012504028156399727 2023-01-22 12:35:22.819567: step: 478/463, loss: 0.0002504971926100552 2023-01-22 12:35:23.437464: step: 480/463, loss: 0.012548841536045074 2023-01-22 12:35:23.927819: step: 482/463, loss: 0.0019528002012521029 2023-01-22 12:35:24.499001: step: 484/463, loss: 0.013973971828818321 2023-01-22 12:35:25.155101: step: 486/463, loss: 1.9603378772735596 2023-01-22 12:35:25.809475: step: 488/463, loss: 0.029213057830929756 2023-01-22 12:35:26.400447: step: 490/463, loss: 0.007589337415993214 2023-01-22 12:35:27.015935: step: 492/463, loss: 0.0017818623455241323 2023-01-22 12:35:27.589886: step: 494/463, loss: 0.06031782552599907 2023-01-22 12:35:28.154276: step: 496/463, loss: 0.007109189406037331 2023-01-22 12:35:28.759195: step: 498/463, loss: 0.03351876884698868 2023-01-22 12:35:29.284148: step: 500/463, loss: 0.0027931872755289078 2023-01-22 12:35:29.974577: step: 502/463, loss: 4.954287528991699 2023-01-22 12:35:30.565422: step: 504/463, loss: 0.08425292372703552 2023-01-22 12:35:31.111464: step: 506/463, loss: 0.04355937987565994 2023-01-22 12:35:31.728075: step: 508/463, loss: 0.024976037442684174 2023-01-22 12:35:32.323239: step: 510/463, loss: 0.017483292147517204 2023-01-22 12:35:32.879128: step: 512/463, loss: 0.007971568964421749 2023-01-22 12:35:33.452482: step: 514/463, loss: 0.003153902478516102 2023-01-22 12:35:34.058255: step: 516/463, loss: 0.014386521652340889 2023-01-22 12:35:34.736105: step: 518/463, loss: 0.016516188159585 2023-01-22 12:35:35.395786: step: 520/463, loss: 0.05810907483100891 2023-01-22 12:35:35.973639: step: 522/463, loss: 0.02266695536673069 2023-01-22 12:35:36.586585: step: 524/463, loss: 0.0251521784812212 2023-01-22 12:35:37.229421: step: 526/463, loss: 0.4154609143733978 2023-01-22 12:35:37.797809: step: 528/463, loss: 0.0012287950376048684 2023-01-22 12:35:38.475487: step: 530/463, loss: 0.014101866632699966 2023-01-22 12:35:39.098406: step: 532/463, loss: 0.03135546296834946 2023-01-22 12:35:39.625428: step: 534/463, loss: 0.022536693140864372 2023-01-22 12:35:40.204145: step: 536/463, loss: 0.004082603845745325 2023-01-22 12:35:40.779044: step: 538/463, loss: 0.008292187005281448 2023-01-22 12:35:41.390234: step: 540/463, loss: 0.011333576403558254 2023-01-22 12:35:42.008531: step: 542/463, loss: 0.0026401199866086245 2023-01-22 12:35:42.606859: step: 544/463, loss: 0.49489450454711914 2023-01-22 12:35:43.244198: step: 546/463, loss: 0.0016717016696929932 2023-01-22 12:35:43.818237: step: 548/463, loss: 0.005759804509580135 2023-01-22 12:35:44.446666: step: 550/463, loss: 0.05441486835479736 2023-01-22 12:35:45.012000: step: 552/463, loss: 0.012170879170298576 2023-01-22 12:35:45.519059: step: 554/463, loss: 1.1469770470284857e-05 2023-01-22 12:35:46.095565: step: 556/463, loss: 0.1692005842924118 2023-01-22 12:35:46.715160: step: 558/463, loss: 0.0076745604164898396 2023-01-22 12:35:47.331667: step: 560/463, loss: 0.23432189226150513 2023-01-22 12:35:47.930260: step: 562/463, loss: 0.028413010761141777 2023-01-22 12:35:48.511989: step: 564/463, loss: 0.03602062910795212 2023-01-22 12:35:49.155618: step: 566/463, loss: 0.003380877897143364 2023-01-22 12:35:49.782752: step: 568/463, loss: 0.0471382662653923 2023-01-22 12:35:50.362262: step: 570/463, loss: 0.012956744059920311 2023-01-22 12:35:51.021535: step: 572/463, loss: 0.015133941546082497 2023-01-22 12:35:51.568738: step: 574/463, loss: 0.03462939336895943 2023-01-22 12:35:52.165437: step: 576/463, loss: 0.009973033331334591 2023-01-22 12:35:52.831031: step: 578/463, loss: 0.004711265210062265 2023-01-22 12:35:53.385495: step: 580/463, loss: 0.005789301358163357 2023-01-22 12:35:53.943572: step: 582/463, loss: 0.035336632281541824 2023-01-22 12:35:54.551412: step: 584/463, loss: 0.15327401459217072 2023-01-22 12:35:55.136257: step: 586/463, loss: 0.2596665620803833 2023-01-22 12:35:55.754041: step: 588/463, loss: 0.01266113668680191 2023-01-22 12:35:56.399530: step: 590/463, loss: 0.00028796232072636485 2023-01-22 12:35:57.031057: step: 592/463, loss: 0.05749647319316864 2023-01-22 12:35:57.683936: step: 594/463, loss: 0.06660754233598709 2023-01-22 12:35:58.257963: step: 596/463, loss: 0.006729778368026018 2023-01-22 12:35:58.922310: step: 598/463, loss: 0.006399865727871656 2023-01-22 12:35:59.550159: step: 600/463, loss: 0.04109904170036316 2023-01-22 12:36:00.186370: step: 602/463, loss: 0.004981099162250757 2023-01-22 12:36:00.727790: step: 604/463, loss: 0.04519573971629143 2023-01-22 12:36:01.286036: step: 606/463, loss: 0.009017454460263252 2023-01-22 12:36:01.831111: step: 608/463, loss: 0.00037248252192512155 2023-01-22 12:36:02.413816: step: 610/463, loss: 0.13918378949165344 2023-01-22 12:36:03.046020: step: 612/463, loss: 0.00314846052788198 2023-01-22 12:36:03.667631: step: 614/463, loss: 0.06371580064296722 2023-01-22 12:36:04.223091: step: 616/463, loss: 0.016104543581604958 2023-01-22 12:36:04.846600: step: 618/463, loss: 0.00015639570483472198 2023-01-22 12:36:05.474203: step: 620/463, loss: 0.036811381578445435 2023-01-22 12:36:06.069905: step: 622/463, loss: 0.012018936686217785 2023-01-22 12:36:06.737089: step: 624/463, loss: 0.028855066746473312 2023-01-22 12:36:07.356534: step: 626/463, loss: 0.006511532701551914 2023-01-22 12:36:07.952308: step: 628/463, loss: 0.032886989414691925 2023-01-22 12:36:08.540633: step: 630/463, loss: 0.006207230035215616 2023-01-22 12:36:09.174414: step: 632/463, loss: 0.043157126754522324 2023-01-22 12:36:09.812678: step: 634/463, loss: 0.015785163268446922 2023-01-22 12:36:10.385099: step: 636/463, loss: 0.008867550641298294 2023-01-22 12:36:11.061498: step: 638/463, loss: 0.01249113492667675 2023-01-22 12:36:11.671006: step: 640/463, loss: 0.0402577668428421 2023-01-22 12:36:12.231963: step: 642/463, loss: 0.04310372471809387 2023-01-22 12:36:12.896593: step: 644/463, loss: 0.6034498810768127 2023-01-22 12:36:13.518686: step: 646/463, loss: 0.0632987916469574 2023-01-22 12:36:14.094710: step: 648/463, loss: 0.026047512888908386 2023-01-22 12:36:14.708110: step: 650/463, loss: 0.0029075623024255037 2023-01-22 12:36:15.296673: step: 652/463, loss: 0.007010980974882841 2023-01-22 12:36:15.834514: step: 654/463, loss: 0.0363411121070385 2023-01-22 12:36:16.550342: step: 656/463, loss: 0.0012821756536141038 2023-01-22 12:36:17.168905: step: 658/463, loss: 0.005994519684463739 2023-01-22 12:36:17.743353: step: 660/463, loss: 0.0018810039618983865 2023-01-22 12:36:18.331986: step: 662/463, loss: 0.009131263941526413 2023-01-22 12:36:18.938560: step: 664/463, loss: 0.029725870117545128 2023-01-22 12:36:19.528184: step: 666/463, loss: 0.002402053214609623 2023-01-22 12:36:20.127122: step: 668/463, loss: 0.00358913978561759 2023-01-22 12:36:20.737825: step: 670/463, loss: 0.02485659159719944 2023-01-22 12:36:21.351958: step: 672/463, loss: 0.02293873205780983 2023-01-22 12:36:22.005946: step: 674/463, loss: 0.0862886905670166 2023-01-22 12:36:22.663044: step: 676/463, loss: 0.005062241107225418 2023-01-22 12:36:23.322663: step: 678/463, loss: 0.06858204305171967 2023-01-22 12:36:23.855356: step: 680/463, loss: 0.027368837967514992 2023-01-22 12:36:24.462105: step: 682/463, loss: 0.11823952198028564 2023-01-22 12:36:25.059279: step: 684/463, loss: 0.009590024128556252 2023-01-22 12:36:25.678636: step: 686/463, loss: 0.09056219458580017 2023-01-22 12:36:26.282400: step: 688/463, loss: 0.012177771888673306 2023-01-22 12:36:26.897202: step: 690/463, loss: 0.013546637259423733 2023-01-22 12:36:27.474430: step: 692/463, loss: 0.06701210886240005 2023-01-22 12:36:28.131139: step: 694/463, loss: 0.05578470230102539 2023-01-22 12:36:28.673797: step: 696/463, loss: 0.007104128133505583 2023-01-22 12:36:29.255324: step: 698/463, loss: 0.011056576855480671 2023-01-22 12:36:29.850195: step: 700/463, loss: 0.02336994744837284 2023-01-22 12:36:30.471724: step: 702/463, loss: 0.03981660306453705 2023-01-22 12:36:31.127020: step: 704/463, loss: 0.008403674699366093 2023-01-22 12:36:31.723595: step: 706/463, loss: 0.22430363297462463 2023-01-22 12:36:32.293520: step: 708/463, loss: 0.020615549758076668 2023-01-22 12:36:32.949762: step: 710/463, loss: 0.005166787654161453 2023-01-22 12:36:33.528480: step: 712/463, loss: 0.02336127869784832 2023-01-22 12:36:34.239628: step: 714/463, loss: 0.03210056200623512 2023-01-22 12:36:34.847981: step: 716/463, loss: 0.03605251759290695 2023-01-22 12:36:35.438099: step: 718/463, loss: 0.09759293496608734 2023-01-22 12:36:36.058481: step: 720/463, loss: 0.05771316960453987 2023-01-22 12:36:36.683736: step: 722/463, loss: 0.00856007169932127 2023-01-22 12:36:37.274276: step: 724/463, loss: 0.023083670064806938 2023-01-22 12:36:37.976534: step: 726/463, loss: 0.04058995470404625 2023-01-22 12:36:38.543362: step: 728/463, loss: 0.01586988754570484 2023-01-22 12:36:39.132803: step: 730/463, loss: 0.0280936136841774 2023-01-22 12:36:39.757161: step: 732/463, loss: 0.11945941299200058 2023-01-22 12:36:40.377122: step: 734/463, loss: 0.1073726937174797 2023-01-22 12:36:40.938633: step: 736/463, loss: 0.0005320303607732058 2023-01-22 12:36:41.533306: step: 738/463, loss: 0.005495231598615646 2023-01-22 12:36:42.110535: step: 740/463, loss: 0.010837437584996223 2023-01-22 12:36:42.811287: step: 742/463, loss: 0.008254644460976124 2023-01-22 12:36:43.400775: step: 744/463, loss: 0.013994758017361164 2023-01-22 12:36:44.006039: step: 746/463, loss: 0.004982369020581245 2023-01-22 12:36:44.637862: step: 748/463, loss: 0.07093297690153122 2023-01-22 12:36:45.254342: step: 750/463, loss: 0.06783630698919296 2023-01-22 12:36:45.880450: step: 752/463, loss: 0.010502041317522526 2023-01-22 12:36:46.523183: step: 754/463, loss: 0.025041887536644936 2023-01-22 12:36:47.124528: step: 756/463, loss: 0.01032335963100195 2023-01-22 12:36:47.733932: step: 758/463, loss: 0.02496275119483471 2023-01-22 12:36:48.347026: step: 760/463, loss: 0.023167282342910767 2023-01-22 12:36:49.001861: step: 762/463, loss: 0.014368785545229912 2023-01-22 12:36:49.606140: step: 764/463, loss: 0.0056557306088507175 2023-01-22 12:36:50.235977: step: 766/463, loss: 0.06064826622605324 2023-01-22 12:36:50.811168: step: 768/463, loss: 0.02322997897863388 2023-01-22 12:36:51.390468: step: 770/463, loss: 0.0703640952706337 2023-01-22 12:36:51.989678: step: 772/463, loss: 0.015143807977437973 2023-01-22 12:36:52.613650: step: 774/463, loss: 0.009394689463078976 2023-01-22 12:36:53.199667: step: 776/463, loss: 0.0042193131521344185 2023-01-22 12:36:53.825884: step: 778/463, loss: 0.019165704026818275 2023-01-22 12:36:54.409469: step: 780/463, loss: 0.11084091663360596 2023-01-22 12:36:55.066875: step: 782/463, loss: 0.05546008422970772 2023-01-22 12:36:55.714958: step: 784/463, loss: 0.06199384480714798 2023-01-22 12:36:56.424686: step: 786/463, loss: 0.00725259305909276 2023-01-22 12:36:57.135305: step: 788/463, loss: 0.07570730149745941 2023-01-22 12:36:57.728240: step: 790/463, loss: 0.0014948367606848478 2023-01-22 12:36:58.297626: step: 792/463, loss: 0.0061547704972326756 2023-01-22 12:36:58.879714: step: 794/463, loss: 0.003681823378428817 2023-01-22 12:36:59.435086: step: 796/463, loss: 0.015273437835276127 2023-01-22 12:37:00.065229: step: 798/463, loss: 0.044249873608350754 2023-01-22 12:37:00.821264: step: 800/463, loss: 0.002889686729758978 2023-01-22 12:37:01.434290: step: 802/463, loss: 0.0030433328356593847 2023-01-22 12:37:02.063298: step: 804/463, loss: 0.004954787902534008 2023-01-22 12:37:02.677652: step: 806/463, loss: 0.002291971119120717 2023-01-22 12:37:03.365981: step: 808/463, loss: 0.13242027163505554 2023-01-22 12:37:03.974465: step: 810/463, loss: 0.0024529832880944014 2023-01-22 12:37:04.589667: step: 812/463, loss: 0.34052878618240356 2023-01-22 12:37:05.157833: step: 814/463, loss: 1.5564850568771362 2023-01-22 12:37:05.754701: step: 816/463, loss: 0.007235885597765446 2023-01-22 12:37:06.352567: step: 818/463, loss: 0.023396898061037064 2023-01-22 12:37:06.908278: step: 820/463, loss: 0.01937183551490307 2023-01-22 12:37:07.469902: step: 822/463, loss: 0.0014902740949764848 2023-01-22 12:37:08.137850: step: 824/463, loss: 0.008005850948393345 2023-01-22 12:37:08.811717: step: 826/463, loss: 0.0597454309463501 2023-01-22 12:37:09.401206: step: 828/463, loss: 0.028225086629390717 2023-01-22 12:37:10.026789: step: 830/463, loss: 0.0008724422659724951 2023-01-22 12:37:10.657202: step: 832/463, loss: 0.05529513582587242 2023-01-22 12:37:11.325355: step: 834/463, loss: 0.05840693414211273 2023-01-22 12:37:11.893047: step: 836/463, loss: 0.014748592860996723 2023-01-22 12:37:12.517319: step: 838/463, loss: 0.026627134531736374 2023-01-22 12:37:13.147684: step: 840/463, loss: 0.06271857023239136 2023-01-22 12:37:13.797208: step: 842/463, loss: 0.02222890965640545 2023-01-22 12:37:14.361012: step: 844/463, loss: 0.0014154906384646893 2023-01-22 12:37:15.008044: step: 846/463, loss: 0.003977329470217228 2023-01-22 12:37:15.706260: step: 848/463, loss: 0.032589104026556015 2023-01-22 12:37:16.270982: step: 850/463, loss: 0.001300859497860074 2023-01-22 12:37:16.855703: step: 852/463, loss: 0.006473367568105459 2023-01-22 12:37:17.428070: step: 854/463, loss: 0.007052626460790634 2023-01-22 12:37:18.024241: step: 856/463, loss: 0.25654274225234985 2023-01-22 12:37:18.581921: step: 858/463, loss: 0.0019829077646136284 2023-01-22 12:37:19.201710: step: 860/463, loss: 0.044510066509246826 2023-01-22 12:37:19.782128: step: 862/463, loss: 0.001918377005495131 2023-01-22 12:37:20.386201: step: 864/463, loss: 0.0053252531215548515 2023-01-22 12:37:20.990389: step: 866/463, loss: 0.014237024821341038 2023-01-22 12:37:21.638956: step: 868/463, loss: 0.08253347128629684 2023-01-22 12:37:22.231717: step: 870/463, loss: 0.01173979789018631 2023-01-22 12:37:22.859285: step: 872/463, loss: 0.09331776201725006 2023-01-22 12:37:23.496509: step: 874/463, loss: 0.009608241729438305 2023-01-22 12:37:24.064493: step: 876/463, loss: 0.0012645330280065536 2023-01-22 12:37:24.693803: step: 878/463, loss: 0.007763568311929703 2023-01-22 12:37:25.329401: step: 880/463, loss: 0.03193031623959541 2023-01-22 12:37:25.957474: step: 882/463, loss: 0.004222292453050613 2023-01-22 12:37:26.548870: step: 884/463, loss: 0.03477559611201286 2023-01-22 12:37:27.170053: step: 886/463, loss: 0.01894582062959671 2023-01-22 12:37:27.819980: step: 888/463, loss: 0.7207598686218262 2023-01-22 12:37:28.417323: step: 890/463, loss: 0.09832815825939178 2023-01-22 12:37:28.955567: step: 892/463, loss: 0.0027617882005870342 2023-01-22 12:37:29.541857: step: 894/463, loss: 0.01734338328242302 2023-01-22 12:37:30.240646: step: 896/463, loss: 0.3503348231315613 2023-01-22 12:37:30.966793: step: 898/463, loss: 0.038323625922203064 2023-01-22 12:37:31.566521: step: 900/463, loss: 0.30789369344711304 2023-01-22 12:37:32.190339: step: 902/463, loss: 0.11568040400743484 2023-01-22 12:37:32.769435: step: 904/463, loss: 0.002566554583609104 2023-01-22 12:37:33.306851: step: 906/463, loss: 0.053460948169231415 2023-01-22 12:37:33.885689: step: 908/463, loss: 0.016339968889951706 2023-01-22 12:37:34.477738: step: 910/463, loss: 0.027080947533249855 2023-01-22 12:37:35.103886: step: 912/463, loss: 0.026154812425374985 2023-01-22 12:37:35.722380: step: 914/463, loss: 0.058754950761795044 2023-01-22 12:37:36.350343: step: 916/463, loss: 0.14254778623580933 2023-01-22 12:37:36.975611: step: 918/463, loss: 0.010557337664067745 2023-01-22 12:37:37.599984: step: 920/463, loss: 0.05688267946243286 2023-01-22 12:37:38.281052: step: 922/463, loss: 0.06220068410038948 2023-01-22 12:37:38.838391: step: 924/463, loss: 0.09934665262699127 2023-01-22 12:37:39.415104: step: 926/463, loss: 0.047787394374608994 ================================================== Loss: 0.069 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3303146258503401, 'r': 0.34222350230414744, 'f1': 0.336163626680868}, 'combined': 0.24769951439642907, 'epoch': 28} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3477248250453037, 'r': 0.4052528291888282, 'f1': 0.3742912378246563}, 'combined': 0.29012048099327425, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28906040080609047, 'r': 0.34994409053944164, 'f1': 0.3166017780502759}, 'combined': 0.23328552066862432, 'epoch': 28} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3317540496335986, 'r': 0.41011874701947626, 'f1': 0.36679753124306624}, 'combined': 0.2843119620161566, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28887836844176557, 'r': 0.3316345595963343, 'f1': 0.30878341503050916}, 'combined': 0.22752462160142778, 'epoch': 28} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3323804924234255, 'r': 0.3986732928424359, 'f1': 0.3625211388320687}, 'combined': 0.28099724636744083, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24278115501519754, 'r': 0.3260204081632653, 'f1': 0.2783101045296167}, 'combined': 0.18554006968641112, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.30405405405405406, 'r': 0.4891304347826087, 'f1': 0.375}, 'combined': 0.1875, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3611111111111111, 'r': 0.22413793103448276, 'f1': 0.2765957446808511}, 'combined': 0.18439716312056736, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:40:15.977177: step: 2/463, loss: 0.1550900638103485 2023-01-22 12:40:16.635658: step: 4/463, loss: 0.01312115229666233 2023-01-22 12:40:17.195876: step: 6/463, loss: 0.04941786453127861 2023-01-22 12:40:17.850533: step: 8/463, loss: 0.008064012974500656 2023-01-22 12:40:18.450372: step: 10/463, loss: 0.0057556587271392345 2023-01-22 12:40:19.077618: step: 12/463, loss: 0.08062035590410233 2023-01-22 12:40:19.680587: step: 14/463, loss: 0.018098358064889908 2023-01-22 12:40:20.278239: step: 16/463, loss: 0.018659701570868492 2023-01-22 12:40:20.888901: step: 18/463, loss: 4.444612979888916 2023-01-22 12:40:21.448173: step: 20/463, loss: 0.005970236379653215 2023-01-22 12:40:22.095377: step: 22/463, loss: 0.011970184743404388 2023-01-22 12:40:22.757972: step: 24/463, loss: 0.004882423672825098 2023-01-22 12:40:23.365165: step: 26/463, loss: 0.10401653498411179 2023-01-22 12:40:23.949772: step: 28/463, loss: 0.10145227611064911 2023-01-22 12:40:24.549962: step: 30/463, loss: 0.0015595429576933384 2023-01-22 12:40:25.146382: step: 32/463, loss: 0.03525329753756523 2023-01-22 12:40:25.703840: step: 34/463, loss: 0.1612314134836197 2023-01-22 12:40:26.317116: step: 36/463, loss: 0.010630753822624683 2023-01-22 12:40:26.961367: step: 38/463, loss: 0.01335727795958519 2023-01-22 12:40:27.543272: step: 40/463, loss: 0.00010043101792689413 2023-01-22 12:40:28.165436: step: 42/463, loss: 0.01780151203274727 2023-01-22 12:40:28.787407: step: 44/463, loss: 0.024259736761450768 2023-01-22 12:40:29.315175: step: 46/463, loss: 0.03141922876238823 2023-01-22 12:40:29.948200: step: 48/463, loss: 0.03314659744501114 2023-01-22 12:40:30.585516: step: 50/463, loss: 0.012423048727214336 2023-01-22 12:40:31.149098: step: 52/463, loss: 0.016100626438856125 2023-01-22 12:40:31.717325: step: 54/463, loss: 0.07544917613267899 2023-01-22 12:40:32.319707: step: 56/463, loss: 0.026606986299157143 2023-01-22 12:40:33.029532: step: 58/463, loss: 0.0028888958040624857 2023-01-22 12:40:33.635737: step: 60/463, loss: 0.061041779816150665 2023-01-22 12:40:34.275861: step: 62/463, loss: 0.00510053988546133 2023-01-22 12:40:34.846539: step: 64/463, loss: 0.030810287222266197 2023-01-22 12:40:35.483326: step: 66/463, loss: 0.003582959994673729 2023-01-22 12:40:36.039411: step: 68/463, loss: 0.0012058792635798454 2023-01-22 12:40:36.700038: step: 70/463, loss: 0.028774576261639595 2023-01-22 12:40:37.285500: step: 72/463, loss: 0.03866603225469589 2023-01-22 12:40:37.864145: step: 74/463, loss: 0.7688112258911133 2023-01-22 12:40:38.530257: step: 76/463, loss: 0.018393585458397865 2023-01-22 12:40:39.134641: step: 78/463, loss: 0.009106939658522606 2023-01-22 12:40:39.758450: step: 80/463, loss: 0.05523703992366791 2023-01-22 12:40:40.338659: step: 82/463, loss: 0.612266480922699 2023-01-22 12:40:40.926837: step: 84/463, loss: 0.0021509667858481407 2023-01-22 12:40:41.563855: step: 86/463, loss: 0.040496937930583954 2023-01-22 12:40:42.112880: step: 88/463, loss: 0.052959710359573364 2023-01-22 12:40:42.679941: step: 90/463, loss: 0.060240283608436584 2023-01-22 12:40:43.330459: step: 92/463, loss: 0.008552342653274536 2023-01-22 12:40:43.950082: step: 94/463, loss: 0.0068692597560584545 2023-01-22 12:40:44.499438: step: 96/463, loss: 0.030129818245768547 2023-01-22 12:40:45.093200: step: 98/463, loss: 0.004517947323620319 2023-01-22 12:40:45.716743: step: 100/463, loss: 0.32829606533050537 2023-01-22 12:40:46.281804: step: 102/463, loss: 0.03064805269241333 2023-01-22 12:40:46.868031: step: 104/463, loss: 0.0008148181368596852 2023-01-22 12:40:47.508457: step: 106/463, loss: 0.03350914642214775 2023-01-22 12:40:48.085862: step: 108/463, loss: 0.038696784526109695 2023-01-22 12:40:48.697268: step: 110/463, loss: 0.20961901545524597 2023-01-22 12:40:49.258640: step: 112/463, loss: 0.007900345139205456 2023-01-22 12:40:49.880507: step: 114/463, loss: 0.024062421172857285 2023-01-22 12:40:50.490089: step: 116/463, loss: 8.284940849989653e-05 2023-01-22 12:40:51.087076: step: 118/463, loss: 0.0025021443143486977 2023-01-22 12:40:51.671648: step: 120/463, loss: 0.5800390243530273 2023-01-22 12:40:52.257532: step: 122/463, loss: 0.10917577147483826 2023-01-22 12:40:52.900566: step: 124/463, loss: 0.02627609856426716 2023-01-22 12:40:53.471123: step: 126/463, loss: 0.0077028400264680386 2023-01-22 12:40:54.079883: step: 128/463, loss: 0.0035240240395069122 2023-01-22 12:40:54.622650: step: 130/463, loss: 0.008725050836801529 2023-01-22 12:40:55.215219: step: 132/463, loss: 0.0005206424393691123 2023-01-22 12:40:55.803193: step: 134/463, loss: 0.012773975729942322 2023-01-22 12:40:56.399968: step: 136/463, loss: 0.069736048579216 2023-01-22 12:40:56.969802: step: 138/463, loss: 0.008612376637756824 2023-01-22 12:40:57.540862: step: 140/463, loss: 0.017149263992905617 2023-01-22 12:40:58.146029: step: 142/463, loss: 0.0033292314037680626 2023-01-22 12:40:58.749290: step: 144/463, loss: 0.032853949815034866 2023-01-22 12:40:59.341939: step: 146/463, loss: 0.00012632431753445417 2023-01-22 12:40:59.913910: step: 148/463, loss: 0.06823589652776718 2023-01-22 12:41:00.486264: step: 150/463, loss: 0.009149501100182533 2023-01-22 12:41:01.084440: step: 152/463, loss: 0.04792388528585434 2023-01-22 12:41:01.651510: step: 154/463, loss: 0.034675247967243195 2023-01-22 12:41:02.341351: step: 156/463, loss: 0.019604550674557686 2023-01-22 12:41:02.951660: step: 158/463, loss: 0.02220567688345909 2023-01-22 12:41:03.575999: step: 160/463, loss: 0.05470237880945206 2023-01-22 12:41:04.198874: step: 162/463, loss: 0.027703681960701942 2023-01-22 12:41:04.766936: step: 164/463, loss: 0.0797567367553711 2023-01-22 12:41:05.367518: step: 166/463, loss: 0.5837057828903198 2023-01-22 12:41:05.928535: step: 168/463, loss: 0.00628327764570713 2023-01-22 12:41:06.551320: step: 170/463, loss: 0.004783878568559885 2023-01-22 12:41:07.137593: step: 172/463, loss: 0.003920532763004303 2023-01-22 12:41:07.946442: step: 174/463, loss: 0.0012897155247628689 2023-01-22 12:41:08.477307: step: 176/463, loss: 0.0040885829366743565 2023-01-22 12:41:09.066923: step: 178/463, loss: 0.03434412181377411 2023-01-22 12:41:09.727918: step: 180/463, loss: 0.018764395266771317 2023-01-22 12:41:10.353403: step: 182/463, loss: 0.03970245271921158 2023-01-22 12:41:11.024315: step: 184/463, loss: 0.024454912170767784 2023-01-22 12:41:11.616371: step: 186/463, loss: 0.04383327439427376 2023-01-22 12:41:12.246300: step: 188/463, loss: 0.024958115071058273 2023-01-22 12:41:12.914777: step: 190/463, loss: 0.045921605080366135 2023-01-22 12:41:13.512505: step: 192/463, loss: 0.015539822168648243 2023-01-22 12:41:14.118805: step: 194/463, loss: 0.039451587945222855 2023-01-22 12:41:14.730544: step: 196/463, loss: 0.007012828718870878 2023-01-22 12:41:15.319257: step: 198/463, loss: 0.0016368953511118889 2023-01-22 12:41:15.999342: step: 200/463, loss: 0.027939356863498688 2023-01-22 12:41:16.615855: step: 202/463, loss: 0.02487771026790142 2023-01-22 12:41:17.225534: step: 204/463, loss: 0.02883930876851082 2023-01-22 12:41:17.817693: step: 206/463, loss: 0.035411231219768524 2023-01-22 12:41:18.475895: step: 208/463, loss: 0.001753342105075717 2023-01-22 12:41:19.072752: step: 210/463, loss: 0.034231409430503845 2023-01-22 12:41:19.652978: step: 212/463, loss: 0.013961263000965118 2023-01-22 12:41:20.230249: step: 214/463, loss: 0.022274188697338104 2023-01-22 12:41:20.790586: step: 216/463, loss: 0.0030902153812348843 2023-01-22 12:41:21.372487: step: 218/463, loss: 0.002822666894644499 2023-01-22 12:41:21.988444: step: 220/463, loss: 0.04084483161568642 2023-01-22 12:41:22.508441: step: 222/463, loss: 0.1427854746580124 2023-01-22 12:41:23.174193: step: 224/463, loss: 0.028082339093089104 2023-01-22 12:41:23.792731: step: 226/463, loss: 0.008980338461697102 2023-01-22 12:41:24.367034: step: 228/463, loss: 0.005404004827141762 2023-01-22 12:41:24.912949: step: 230/463, loss: 0.004749061074107885 2023-01-22 12:41:25.566906: step: 232/463, loss: 0.021711250767111778 2023-01-22 12:41:26.196128: step: 234/463, loss: 0.19020888209342957 2023-01-22 12:41:26.790083: step: 236/463, loss: 0.7447059750556946 2023-01-22 12:41:27.350917: step: 238/463, loss: 0.0014882652321830392 2023-01-22 12:41:27.942350: step: 240/463, loss: 0.013305017724633217 2023-01-22 12:41:28.521078: step: 242/463, loss: 0.016160206869244576 2023-01-22 12:41:29.135935: step: 244/463, loss: 0.015276189893484116 2023-01-22 12:41:29.759228: step: 246/463, loss: 0.004023570567369461 2023-01-22 12:41:30.342998: step: 248/463, loss: 0.014682869426906109 2023-01-22 12:41:30.933173: step: 250/463, loss: 0.0011476946529000998 2023-01-22 12:41:31.548136: step: 252/463, loss: 0.08572657406330109 2023-01-22 12:41:32.189027: step: 254/463, loss: 0.13576018810272217 2023-01-22 12:41:32.767950: step: 256/463, loss: 0.00667849974706769 2023-01-22 12:41:33.435270: step: 258/463, loss: 0.0002772319712676108 2023-01-22 12:41:34.139216: step: 260/463, loss: 0.05342680960893631 2023-01-22 12:41:34.747911: step: 262/463, loss: 4.617326736450195 2023-01-22 12:41:35.290177: step: 264/463, loss: 0.0006627269322052598 2023-01-22 12:41:35.854179: step: 266/463, loss: 0.001987112918868661 2023-01-22 12:41:36.428208: step: 268/463, loss: 0.008364694193005562 2023-01-22 12:41:37.006051: step: 270/463, loss: 0.0023677332792431116 2023-01-22 12:41:37.631275: step: 272/463, loss: 0.002349935006350279 2023-01-22 12:41:38.266103: step: 274/463, loss: 0.03869195654988289 2023-01-22 12:41:38.882153: step: 276/463, loss: 0.06180592626333237 2023-01-22 12:41:39.658837: step: 278/463, loss: 0.01338766235858202 2023-01-22 12:41:40.310750: step: 280/463, loss: 0.009493292309343815 2023-01-22 12:41:40.870487: step: 282/463, loss: 0.002042797626927495 2023-01-22 12:41:41.532619: step: 284/463, loss: 0.016817327588796616 2023-01-22 12:41:42.119822: step: 286/463, loss: 0.0006671757437288761 2023-01-22 12:41:42.668336: step: 288/463, loss: 0.08631105720996857 2023-01-22 12:41:43.220624: step: 290/463, loss: 0.057567253708839417 2023-01-22 12:41:43.854465: step: 292/463, loss: 0.05429114028811455 2023-01-22 12:41:44.455982: step: 294/463, loss: 0.055227115750312805 2023-01-22 12:41:45.067873: step: 296/463, loss: 0.01286325789988041 2023-01-22 12:41:45.696276: step: 298/463, loss: 0.03145575523376465 2023-01-22 12:41:46.309531: step: 300/463, loss: 0.02212996408343315 2023-01-22 12:41:46.901721: step: 302/463, loss: 0.05742984637618065 2023-01-22 12:41:47.500676: step: 304/463, loss: 0.034636858850717545 2023-01-22 12:41:48.059421: step: 306/463, loss: 0.0009654642199166119 2023-01-22 12:41:48.669752: step: 308/463, loss: 0.0017407169798389077 2023-01-22 12:41:49.254804: step: 310/463, loss: 0.019746888428926468 2023-01-22 12:41:49.815022: step: 312/463, loss: 0.0014645576011389494 2023-01-22 12:41:50.402125: step: 314/463, loss: 0.0012500026496127248 2023-01-22 12:41:51.065882: step: 316/463, loss: 0.028996281325817108 2023-01-22 12:41:51.772343: step: 318/463, loss: 0.004588013049215078 2023-01-22 12:41:52.363316: step: 320/463, loss: 0.006899316795170307 2023-01-22 12:41:52.974893: step: 322/463, loss: 0.04672044515609741 2023-01-22 12:41:53.651539: step: 324/463, loss: 0.039173007011413574 2023-01-22 12:41:54.292760: step: 326/463, loss: 0.011062702164053917 2023-01-22 12:41:54.906181: step: 328/463, loss: 0.00412320950999856 2023-01-22 12:41:55.472463: step: 330/463, loss: 0.06949309259653091 2023-01-22 12:41:56.000964: step: 332/463, loss: 0.019062811508774757 2023-01-22 12:41:56.612641: step: 334/463, loss: 0.008088046684861183 2023-01-22 12:41:57.267077: step: 336/463, loss: 0.02030049078166485 2023-01-22 12:41:57.915540: step: 338/463, loss: 0.015328459441661835 2023-01-22 12:41:58.388395: step: 340/463, loss: 0.01732882484793663 2023-01-22 12:41:59.039965: step: 342/463, loss: 0.018980398774147034 2023-01-22 12:41:59.623098: step: 344/463, loss: 0.01901767961680889 2023-01-22 12:42:00.204071: step: 346/463, loss: 0.028678152710199356 2023-01-22 12:42:00.803035: step: 348/463, loss: 0.009891662746667862 2023-01-22 12:42:01.502113: step: 350/463, loss: 0.030378203839063644 2023-01-22 12:42:02.148288: step: 352/463, loss: 0.011372922919690609 2023-01-22 12:42:02.728576: step: 354/463, loss: 0.002102686557918787 2023-01-22 12:42:03.350112: step: 356/463, loss: 0.00036071351496502757 2023-01-22 12:42:03.914651: step: 358/463, loss: 0.00859138835221529 2023-01-22 12:42:04.481246: step: 360/463, loss: 0.006079711951315403 2023-01-22 12:42:05.066977: step: 362/463, loss: 0.0011085874866694212 2023-01-22 12:42:05.697845: step: 364/463, loss: 0.007413227576762438 2023-01-22 12:42:06.301930: step: 366/463, loss: 0.004759788513183594 2023-01-22 12:42:06.870999: step: 368/463, loss: 0.20699399709701538 2023-01-22 12:42:07.475167: step: 370/463, loss: 0.272332102060318 2023-01-22 12:42:08.118374: step: 372/463, loss: 0.0013226158916950226 2023-01-22 12:42:08.792206: step: 374/463, loss: 0.008595649152994156 2023-01-22 12:42:09.438133: step: 376/463, loss: 0.009411418810486794 2023-01-22 12:42:10.027358: step: 378/463, loss: 0.03951879218220711 2023-01-22 12:42:10.627181: step: 380/463, loss: 0.06510946154594421 2023-01-22 12:42:11.274782: step: 382/463, loss: 0.23156484961509705 2023-01-22 12:42:11.906539: step: 384/463, loss: 0.000822294969111681 2023-01-22 12:42:12.512398: step: 386/463, loss: 0.00846689473837614 2023-01-22 12:42:13.059442: step: 388/463, loss: 1.6453962326049805 2023-01-22 12:42:13.745355: step: 390/463, loss: 0.021790912374854088 2023-01-22 12:42:14.331944: step: 392/463, loss: 0.13698720932006836 2023-01-22 12:42:14.882195: step: 394/463, loss: 0.008768951520323753 2023-01-22 12:42:15.464258: step: 396/463, loss: 0.028732653707265854 2023-01-22 12:42:16.043370: step: 398/463, loss: 0.012918967753648758 2023-01-22 12:42:16.711132: step: 400/463, loss: 0.008159270510077477 2023-01-22 12:42:17.417881: step: 402/463, loss: 0.004104298539459705 2023-01-22 12:42:17.991684: step: 404/463, loss: 0.3714562952518463 2023-01-22 12:42:18.637157: step: 406/463, loss: 0.0265846885740757 2023-01-22 12:42:19.189371: step: 408/463, loss: 0.05175428092479706 2023-01-22 12:42:19.769335: step: 410/463, loss: 0.5105317831039429 2023-01-22 12:42:20.393591: step: 412/463, loss: 0.0030236411839723587 2023-01-22 12:42:20.993603: step: 414/463, loss: 0.016782555729150772 2023-01-22 12:42:21.642467: step: 416/463, loss: 0.016550594940781593 2023-01-22 12:42:22.270135: step: 418/463, loss: 0.07024864107370377 2023-01-22 12:42:22.933690: step: 420/463, loss: 0.05589844658970833 2023-01-22 12:42:23.464625: step: 422/463, loss: 0.005209112074226141 2023-01-22 12:42:24.078650: step: 424/463, loss: 0.002344045089557767 2023-01-22 12:42:24.684697: step: 426/463, loss: 0.02918173559010029 2023-01-22 12:42:25.272289: step: 428/463, loss: 0.006644498091191053 2023-01-22 12:42:25.836487: step: 430/463, loss: 0.011588048189878464 2023-01-22 12:42:26.447973: step: 432/463, loss: 0.04694873467087746 2023-01-22 12:42:26.981046: step: 434/463, loss: 0.10165604948997498 2023-01-22 12:42:27.561446: step: 436/463, loss: 0.12336350977420807 2023-01-22 12:42:28.156566: step: 438/463, loss: 0.0033318425994366407 2023-01-22 12:42:28.737038: step: 440/463, loss: 0.016334472224116325 2023-01-22 12:42:29.371437: step: 442/463, loss: 0.0026598761323839426 2023-01-22 12:42:29.933364: step: 444/463, loss: 0.0016191492322832346 2023-01-22 12:42:30.604626: step: 446/463, loss: 0.01846923679113388 2023-01-22 12:42:31.160618: step: 448/463, loss: 0.027185291051864624 2023-01-22 12:42:31.811437: step: 450/463, loss: 0.004145144019275904 2023-01-22 12:42:32.387376: step: 452/463, loss: 0.0006502980249933898 2023-01-22 12:42:32.983146: step: 454/463, loss: 0.0021698183845728636 2023-01-22 12:42:33.591629: step: 456/463, loss: 0.051979027688503265 2023-01-22 12:42:34.177729: step: 458/463, loss: 0.006482423283159733 2023-01-22 12:42:34.780941: step: 460/463, loss: 0.012627340853214264 2023-01-22 12:42:35.444261: step: 462/463, loss: 0.0426507294178009 2023-01-22 12:42:36.089427: step: 464/463, loss: 0.032700929790735245 2023-01-22 12:42:36.663814: step: 466/463, loss: 0.01059667207300663 2023-01-22 12:42:37.243893: step: 468/463, loss: 0.0033148638904094696 2023-01-22 12:42:37.800697: step: 470/463, loss: 0.010824110358953476 2023-01-22 12:42:38.405205: step: 472/463, loss: 0.007498008664697409 2023-01-22 12:42:39.015238: step: 474/463, loss: 0.007538353092968464 2023-01-22 12:42:39.607255: step: 476/463, loss: 0.056987062096595764 2023-01-22 12:42:40.181244: step: 478/463, loss: 0.07131024450063705 2023-01-22 12:42:40.773582: step: 480/463, loss: 0.045813802629709244 2023-01-22 12:42:41.373028: step: 482/463, loss: 0.003239908954128623 2023-01-22 12:42:41.895011: step: 484/463, loss: 0.0014419176150113344 2023-01-22 12:42:42.492822: step: 486/463, loss: 0.018764080479741096 2023-01-22 12:42:43.102606: step: 488/463, loss: 0.006720618344843388 2023-01-22 12:42:43.653450: step: 490/463, loss: 0.008544464595615864 2023-01-22 12:42:44.303651: step: 492/463, loss: 0.00428227661177516 2023-01-22 12:42:44.882895: step: 494/463, loss: 0.007183075416833162 2023-01-22 12:42:45.486451: step: 496/463, loss: 0.012095246464014053 2023-01-22 12:42:46.060012: step: 498/463, loss: 0.01244945079088211 2023-01-22 12:42:46.560870: step: 500/463, loss: 0.0004592906916514039 2023-01-22 12:42:47.207386: step: 502/463, loss: 0.00928113330155611 2023-01-22 12:42:47.826278: step: 504/463, loss: 0.007732453290373087 2023-01-22 12:42:48.398762: step: 506/463, loss: 0.03434942662715912 2023-01-22 12:42:49.048504: step: 508/463, loss: 0.00024358944210689515 2023-01-22 12:42:49.684432: step: 510/463, loss: 0.03508301079273224 2023-01-22 12:42:50.337244: step: 512/463, loss: 0.0077947345562279224 2023-01-22 12:42:50.984777: step: 514/463, loss: 0.025396818295121193 2023-01-22 12:42:51.605222: step: 516/463, loss: 0.029606034979224205 2023-01-22 12:42:52.194781: step: 518/463, loss: 0.002479645423591137 2023-01-22 12:42:52.768706: step: 520/463, loss: 0.0024734127800911665 2023-01-22 12:42:53.398175: step: 522/463, loss: 0.07300721853971481 2023-01-22 12:42:53.959738: step: 524/463, loss: 0.013177592307329178 2023-01-22 12:42:54.555590: step: 526/463, loss: 0.7319537997245789 2023-01-22 12:42:55.153601: step: 528/463, loss: 0.0023846549447625875 2023-01-22 12:42:55.735574: step: 530/463, loss: 0.015529816038906574 2023-01-22 12:42:56.324714: step: 532/463, loss: 0.062036339193582535 2023-01-22 12:42:56.942672: step: 534/463, loss: 0.000389710912713781 2023-01-22 12:42:57.506525: step: 536/463, loss: 0.0019996482878923416 2023-01-22 12:42:58.128827: step: 538/463, loss: 0.009069595485925674 2023-01-22 12:42:58.712694: step: 540/463, loss: 0.001913677086122334 2023-01-22 12:42:59.265488: step: 542/463, loss: 0.014776908792555332 2023-01-22 12:42:59.878192: step: 544/463, loss: 0.02986527979373932 2023-01-22 12:43:00.486096: step: 546/463, loss: 0.010200761258602142 2023-01-22 12:43:01.111691: step: 548/463, loss: 2.508096218109131 2023-01-22 12:43:01.750090: step: 550/463, loss: 0.011398224160075188 2023-01-22 12:43:02.351571: step: 552/463, loss: 0.00230087386444211 2023-01-22 12:43:02.983257: step: 554/463, loss: 0.07720857113599777 2023-01-22 12:43:03.613800: step: 556/463, loss: 0.010546802543103695 2023-01-22 12:43:04.309184: step: 558/463, loss: 0.012195480056107044 2023-01-22 12:43:04.862734: step: 560/463, loss: 0.02231784351170063 2023-01-22 12:43:05.618174: step: 562/463, loss: 0.061512671411037445 2023-01-22 12:43:06.227442: step: 564/463, loss: 0.03533143177628517 2023-01-22 12:43:06.874733: step: 566/463, loss: 0.06323465704917908 2023-01-22 12:43:07.510720: step: 568/463, loss: 0.0459849089384079 2023-01-22 12:43:08.143320: step: 570/463, loss: 0.01714775711297989 2023-01-22 12:43:08.795543: step: 572/463, loss: 0.013263450004160404 2023-01-22 12:43:09.419858: step: 574/463, loss: 0.4613925814628601 2023-01-22 12:43:10.007091: step: 576/463, loss: 0.003999463748186827 2023-01-22 12:43:10.655040: step: 578/463, loss: 0.026372535154223442 2023-01-22 12:43:11.247346: step: 580/463, loss: 0.004054812714457512 2023-01-22 12:43:11.819257: step: 582/463, loss: 0.03685910999774933 2023-01-22 12:43:12.475199: step: 584/463, loss: 0.005764555651694536 2023-01-22 12:43:13.108102: step: 586/463, loss: 0.01615178771317005 2023-01-22 12:43:13.740549: step: 588/463, loss: 0.0005658544250763953 2023-01-22 12:43:14.309930: step: 590/463, loss: 0.06997364014387131 2023-01-22 12:43:14.889276: step: 592/463, loss: 0.014365759678184986 2023-01-22 12:43:15.538092: step: 594/463, loss: 0.033954013139009476 2023-01-22 12:43:16.105577: step: 596/463, loss: 0.04654618725180626 2023-01-22 12:43:16.709834: step: 598/463, loss: 0.007194723468273878 2023-01-22 12:43:17.322360: step: 600/463, loss: 0.025961844250559807 2023-01-22 12:43:17.991084: step: 602/463, loss: 0.020418139174580574 2023-01-22 12:43:18.523068: step: 604/463, loss: 0.07165495306253433 2023-01-22 12:43:19.197807: step: 606/463, loss: 0.0035638357512652874 2023-01-22 12:43:19.751685: step: 608/463, loss: 0.005681503098458052 2023-01-22 12:43:20.319719: step: 610/463, loss: 0.01035415381193161 2023-01-22 12:43:20.896975: step: 612/463, loss: 0.14534080028533936 2023-01-22 12:43:21.537644: step: 614/463, loss: 0.007295891176909208 2023-01-22 12:43:22.142463: step: 616/463, loss: 0.011274606920778751 2023-01-22 12:43:22.801946: step: 618/463, loss: 0.13457202911376953 2023-01-22 12:43:23.371694: step: 620/463, loss: 0.015402178280055523 2023-01-22 12:43:24.013699: step: 622/463, loss: 0.03315766900777817 2023-01-22 12:43:24.598052: step: 624/463, loss: 0.024579806253314018 2023-01-22 12:43:25.324020: step: 626/463, loss: 0.022329552099108696 2023-01-22 12:43:25.934205: step: 628/463, loss: 0.14533448219299316 2023-01-22 12:43:26.497796: step: 630/463, loss: 0.009475641883909702 2023-01-22 12:43:27.165158: step: 632/463, loss: 0.029815059155225754 2023-01-22 12:43:27.761923: step: 634/463, loss: 0.008774718269705772 2023-01-22 12:43:28.416747: step: 636/463, loss: 0.01674095168709755 2023-01-22 12:43:29.007183: step: 638/463, loss: 0.09528885781764984 2023-01-22 12:43:29.673035: step: 640/463, loss: 0.013047860935330391 2023-01-22 12:43:30.284258: step: 642/463, loss: 0.05572964996099472 2023-01-22 12:43:30.892945: step: 644/463, loss: 0.016993409022688866 2023-01-22 12:43:31.515315: step: 646/463, loss: 0.012161463499069214 2023-01-22 12:43:32.135170: step: 648/463, loss: 0.02520204707980156 2023-01-22 12:43:32.750499: step: 650/463, loss: 0.02108667604625225 2023-01-22 12:43:33.347543: step: 652/463, loss: 0.001307966187596321 2023-01-22 12:43:34.019939: step: 654/463, loss: 0.00036701816134154797 2023-01-22 12:43:34.607022: step: 656/463, loss: 0.37352892756462097 2023-01-22 12:43:35.242186: step: 658/463, loss: 0.006581631489098072 2023-01-22 12:43:35.886432: step: 660/463, loss: 0.12856628000736237 2023-01-22 12:43:36.511113: step: 662/463, loss: 0.0007741264998912811 2023-01-22 12:43:37.108037: step: 664/463, loss: 0.0006795075605623424 2023-01-22 12:43:37.672367: step: 666/463, loss: 0.030199136584997177 2023-01-22 12:43:38.207221: step: 668/463, loss: 0.00982176885008812 2023-01-22 12:43:38.817121: step: 670/463, loss: 0.4164646863937378 2023-01-22 12:43:39.464800: step: 672/463, loss: 0.016944773495197296 2023-01-22 12:43:40.084555: step: 674/463, loss: 0.022225484251976013 2023-01-22 12:43:40.681445: step: 676/463, loss: 0.24771860241889954 2023-01-22 12:43:41.295058: step: 678/463, loss: 0.015846656635403633 2023-01-22 12:43:41.919378: step: 680/463, loss: 0.03963213413953781 2023-01-22 12:43:42.548189: step: 682/463, loss: 0.04629233852028847 2023-01-22 12:43:43.197419: step: 684/463, loss: 0.032785721123218536 2023-01-22 12:43:43.744332: step: 686/463, loss: 0.010960950516164303 2023-01-22 12:43:44.376783: step: 688/463, loss: 0.008271569386124611 2023-01-22 12:43:44.994855: step: 690/463, loss: 0.09276773035526276 2023-01-22 12:43:45.576289: step: 692/463, loss: 0.002968868240714073 2023-01-22 12:43:46.183547: step: 694/463, loss: 0.003816205309703946 2023-01-22 12:43:46.762418: step: 696/463, loss: 0.00024717082851566374 2023-01-22 12:43:47.366207: step: 698/463, loss: 0.024036243557929993 2023-01-22 12:43:47.998510: step: 700/463, loss: 0.007068546023219824 2023-01-22 12:43:48.581130: step: 702/463, loss: 0.06692715734243393 2023-01-22 12:43:49.207455: step: 704/463, loss: 0.028790920972824097 2023-01-22 12:43:49.826089: step: 706/463, loss: 0.0812610536813736 2023-01-22 12:43:50.358098: step: 708/463, loss: 0.011899742297828197 2023-01-22 12:43:50.962975: step: 710/463, loss: 0.02381330542266369 2023-01-22 12:43:51.542303: step: 712/463, loss: 0.0275272186845541 2023-01-22 12:43:52.096637: step: 714/463, loss: 0.006281886249780655 2023-01-22 12:43:52.699434: step: 716/463, loss: 0.029312219470739365 2023-01-22 12:43:53.344294: step: 718/463, loss: 0.1676856428384781 2023-01-22 12:43:53.963664: step: 720/463, loss: 0.008076364174485207 2023-01-22 12:43:54.605383: step: 722/463, loss: 0.03715268522500992 2023-01-22 12:43:55.250151: step: 724/463, loss: 0.02680104970932007 2023-01-22 12:43:55.842332: step: 726/463, loss: 0.0076037440448999405 2023-01-22 12:43:56.540483: step: 728/463, loss: 0.0314769484102726 2023-01-22 12:43:57.162545: step: 730/463, loss: 0.007067324593663216 2023-01-22 12:43:57.784957: step: 732/463, loss: 0.28651463985443115 2023-01-22 12:43:58.320566: step: 734/463, loss: 0.05160336568951607 2023-01-22 12:43:58.999581: step: 736/463, loss: 0.09703442454338074 2023-01-22 12:43:59.607116: step: 738/463, loss: 0.0030085614416748285 2023-01-22 12:44:00.190135: step: 740/463, loss: 0.05477362871170044 2023-01-22 12:44:00.826358: step: 742/463, loss: 0.040129974484443665 2023-01-22 12:44:01.417919: step: 744/463, loss: 0.003152980701997876 2023-01-22 12:44:02.066439: step: 746/463, loss: 0.0012924366164952517 2023-01-22 12:44:02.684071: step: 748/463, loss: 0.019745944067835808 2023-01-22 12:44:03.266967: step: 750/463, loss: 0.006218594033271074 2023-01-22 12:44:03.894609: step: 752/463, loss: 0.0668272152543068 2023-01-22 12:44:04.466512: step: 754/463, loss: 0.016391780227422714 2023-01-22 12:44:05.118956: step: 756/463, loss: 0.012321342714130878 2023-01-22 12:44:05.735253: step: 758/463, loss: 0.15765640139579773 2023-01-22 12:44:06.340270: step: 760/463, loss: 0.05147123709321022 2023-01-22 12:44:06.974076: step: 762/463, loss: 0.001761647523380816 2023-01-22 12:44:07.653072: step: 764/463, loss: 0.017148515209555626 2023-01-22 12:44:08.357254: step: 766/463, loss: 0.04036860913038254 2023-01-22 12:44:08.927226: step: 768/463, loss: 0.01573687233030796 2023-01-22 12:44:09.508586: step: 770/463, loss: 0.010533246211707592 2023-01-22 12:44:10.129789: step: 772/463, loss: 0.0003301176184322685 2023-01-22 12:44:10.809567: step: 774/463, loss: 0.0724468007683754 2023-01-22 12:44:11.410205: step: 776/463, loss: 0.021009590476751328 2023-01-22 12:44:12.003972: step: 778/463, loss: 0.007623524870723486 2023-01-22 12:44:12.613638: step: 780/463, loss: 0.0017408073181286454 2023-01-22 12:44:13.171831: step: 782/463, loss: 0.0005291840643621981 2023-01-22 12:44:13.764167: step: 784/463, loss: 0.032468684017658234 2023-01-22 12:44:14.353342: step: 786/463, loss: 0.005144279915839434 2023-01-22 12:44:14.915066: step: 788/463, loss: 0.03454132750630379 2023-01-22 12:44:15.498456: step: 790/463, loss: 0.03987123444676399 2023-01-22 12:44:16.055379: step: 792/463, loss: 0.03480389714241028 2023-01-22 12:44:16.646600: step: 794/463, loss: 0.0020555350929498672 2023-01-22 12:44:17.263545: step: 796/463, loss: 0.07491395622491837 2023-01-22 12:44:17.840934: step: 798/463, loss: 6.228529673535377e-05 2023-01-22 12:44:18.497368: step: 800/463, loss: 0.00911690853536129 2023-01-22 12:44:19.112516: step: 802/463, loss: 0.1268131285905838 2023-01-22 12:44:19.706313: step: 804/463, loss: 0.018176700919866562 2023-01-22 12:44:20.315103: step: 806/463, loss: 0.013455672189593315 2023-01-22 12:44:20.888210: step: 808/463, loss: 0.014742767438292503 2023-01-22 12:44:21.557754: step: 810/463, loss: 0.006980289705097675 2023-01-22 12:44:22.193822: step: 812/463, loss: 0.10349062085151672 2023-01-22 12:44:22.817305: step: 814/463, loss: 0.02058652602136135 2023-01-22 12:44:23.436496: step: 816/463, loss: 0.07814081758260727 2023-01-22 12:44:24.007044: step: 818/463, loss: 0.0005949281039647758 2023-01-22 12:44:24.604568: step: 820/463, loss: 0.010980096645653248 2023-01-22 12:44:25.264297: step: 822/463, loss: 0.15577933192253113 2023-01-22 12:44:25.824210: step: 824/463, loss: 0.004505002871155739 2023-01-22 12:44:26.480377: step: 826/463, loss: 0.0313376858830452 2023-01-22 12:44:27.138641: step: 828/463, loss: 0.08885578066110611 2023-01-22 12:44:27.765879: step: 830/463, loss: 0.0018728503491729498 2023-01-22 12:44:28.380845: step: 832/463, loss: 0.33356672525405884 2023-01-22 12:44:29.007409: step: 834/463, loss: 0.012734198942780495 2023-01-22 12:44:29.604189: step: 836/463, loss: 0.005018147639930248 2023-01-22 12:44:30.210565: step: 838/463, loss: 0.07090814411640167 2023-01-22 12:44:30.786293: step: 840/463, loss: 0.004562025424093008 2023-01-22 12:44:31.372834: step: 842/463, loss: 0.03343694657087326 2023-01-22 12:44:31.941491: step: 844/463, loss: 0.013465807773172855 2023-01-22 12:44:32.578430: step: 846/463, loss: 0.03526826575398445 2023-01-22 12:44:33.280333: step: 848/463, loss: 0.02908763289451599 2023-01-22 12:44:33.917527: step: 850/463, loss: 0.01684403233230114 2023-01-22 12:44:34.496127: step: 852/463, loss: 0.007814344018697739 2023-01-22 12:44:35.140316: step: 854/463, loss: 0.009764028713107109 2023-01-22 12:44:35.779451: step: 856/463, loss: 0.057583849877119064 2023-01-22 12:44:36.326241: step: 858/463, loss: 0.006579817272722721 2023-01-22 12:44:36.946022: step: 860/463, loss: 0.030978145077824593 2023-01-22 12:44:37.633429: step: 862/463, loss: 0.012535871006548405 2023-01-22 12:44:38.230717: step: 864/463, loss: 0.002370886504650116 2023-01-22 12:44:38.845982: step: 866/463, loss: 0.021539144217967987 2023-01-22 12:44:39.486822: step: 868/463, loss: 0.047405119985342026 2023-01-22 12:44:40.076284: step: 870/463, loss: 0.029941538348793983 2023-01-22 12:44:40.668998: step: 872/463, loss: 0.010385973379015923 2023-01-22 12:44:41.289746: step: 874/463, loss: 0.25071820616722107 2023-01-22 12:44:41.890371: step: 876/463, loss: 0.05144646018743515 2023-01-22 12:44:42.464776: step: 878/463, loss: 0.00984541792422533 2023-01-22 12:44:43.090339: step: 880/463, loss: 0.0525452122092247 2023-01-22 12:44:43.728046: step: 882/463, loss: 0.032475486397743225 2023-01-22 12:44:44.322329: step: 884/463, loss: 0.0006611873395740986 2023-01-22 12:44:44.920800: step: 886/463, loss: 0.006387923378497362 2023-01-22 12:44:45.502508: step: 888/463, loss: 0.01817561313509941 2023-01-22 12:44:46.100922: step: 890/463, loss: 0.014133309945464134 2023-01-22 12:44:46.718088: step: 892/463, loss: 0.060384996235370636 2023-01-22 12:44:47.313299: step: 894/463, loss: 0.015683840960264206 2023-01-22 12:44:47.926821: step: 896/463, loss: 0.013177204877138138 2023-01-22 12:44:48.603629: step: 898/463, loss: 0.026812294498085976 2023-01-22 12:44:49.253470: step: 900/463, loss: 0.13442784547805786 2023-01-22 12:44:49.878303: step: 902/463, loss: 0.03376253321766853 2023-01-22 12:44:50.457646: step: 904/463, loss: 0.017914265394210815 2023-01-22 12:44:51.103298: step: 906/463, loss: 0.7043275833129883 2023-01-22 12:44:51.683860: step: 908/463, loss: 0.16737906634807587 2023-01-22 12:44:52.367498: step: 910/463, loss: 0.0629877895116806 2023-01-22 12:44:52.992970: step: 912/463, loss: 0.03859868273139 2023-01-22 12:44:53.594526: step: 914/463, loss: 0.03236795961856842 2023-01-22 12:44:54.212374: step: 916/463, loss: 0.0013772927923128009 2023-01-22 12:44:54.839988: step: 918/463, loss: 0.055881962180137634 2023-01-22 12:44:55.447851: step: 920/463, loss: 0.006493183318525553 2023-01-22 12:44:56.055980: step: 922/463, loss: 0.024610666558146477 2023-01-22 12:44:56.638463: step: 924/463, loss: 0.019501943141222 2023-01-22 12:44:57.200596: step: 926/463, loss: 0.026174984872341156 ================================================== Loss: 0.075 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32947916666666666, 'r': 0.33760673624288423, 'f1': 0.3334934395501406}, 'combined': 0.24573200808957726, 'epoch': 29} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33736200679577244, 'r': 0.38697406661868017, 'f1': 0.36046899356260614}, 'combined': 0.2794065883116852, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2956442857142857, 'r': 0.3506217809704527, 'f1': 0.3207945808531746}, 'combined': 0.23637495431286548, 'epoch': 29} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32913012140808195, 'r': 0.40294239128268855, 'f1': 0.3623151419136902}, 'combined': 0.2808375741149178, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3007871762740184, 'r': 0.3424521931013492, 'f1': 0.32027028529620416}, 'combined': 0.23598863127088726, 'epoch': 29} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32547415959296444, 'r': 0.3805175836417746, 'f1': 0.35085011101885655}, 'combined': 0.27195080375624286, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2564102564102564, 'r': 0.2857142857142857, 'f1': 0.27027027027027023}, 'combined': 0.18018018018018014, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27205882352941174, 'r': 0.40217391304347827, 'f1': 0.32456140350877194}, 'combined': 0.16228070175438597, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3888888888888889, 'r': 0.2413793103448276, 'f1': 0.2978723404255319}, 'combined': 0.19858156028368792, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:47:34.164995: step: 2/463, loss: 0.003348055062815547 2023-01-22 12:47:34.841786: step: 4/463, loss: 0.046765830367803574 2023-01-22 12:47:35.522260: step: 6/463, loss: 0.031149689108133316 2023-01-22 12:47:36.127160: step: 8/463, loss: 0.012425611726939678 2023-01-22 12:47:36.795099: step: 10/463, loss: 0.0006921354215592146 2023-01-22 12:47:37.439060: step: 12/463, loss: 0.06397475302219391 2023-01-22 12:47:38.126626: step: 14/463, loss: 0.08447199314832687 2023-01-22 12:47:38.760148: step: 16/463, loss: 0.006886500399559736 2023-01-22 12:47:39.427757: step: 18/463, loss: 0.010859581641852856 2023-01-22 12:47:40.035424: step: 20/463, loss: 0.06130557134747505 2023-01-22 12:47:40.609284: step: 22/463, loss: 0.0675460547208786 2023-01-22 12:47:41.278819: step: 24/463, loss: 0.007639497518539429 2023-01-22 12:47:41.862758: step: 26/463, loss: 0.001132871606387198 2023-01-22 12:47:42.519720: step: 28/463, loss: 0.00405964395031333 2023-01-22 12:47:43.140752: step: 30/463, loss: 0.04965794086456299 2023-01-22 12:47:43.736297: step: 32/463, loss: 0.011324339546263218 2023-01-22 12:47:44.368939: step: 34/463, loss: 0.048247307538986206 2023-01-22 12:47:44.943437: step: 36/463, loss: 0.008319674991071224 2023-01-22 12:47:45.524190: step: 38/463, loss: 0.015220372937619686 2023-01-22 12:47:46.094725: step: 40/463, loss: 0.08465173840522766 2023-01-22 12:47:46.720257: step: 42/463, loss: 0.01284420769661665 2023-01-22 12:47:47.358440: step: 44/463, loss: 0.07054094225168228 2023-01-22 12:47:48.036873: step: 46/463, loss: 0.04635622352361679 2023-01-22 12:47:48.591842: step: 48/463, loss: 0.007711169775575399 2023-01-22 12:47:49.222172: step: 50/463, loss: 0.009285827167332172 2023-01-22 12:47:49.781916: step: 52/463, loss: 0.012208811938762665 2023-01-22 12:47:50.386829: step: 54/463, loss: 0.0004118823853787035 2023-01-22 12:47:50.968678: step: 56/463, loss: 0.24255159497261047 2023-01-22 12:47:51.575201: step: 58/463, loss: 0.028750255703926086 2023-01-22 12:47:52.156635: step: 60/463, loss: 0.021993907168507576 2023-01-22 12:47:52.869675: step: 62/463, loss: 0.014323953539133072 2023-01-22 12:47:53.454016: step: 64/463, loss: 0.003018668619915843 2023-01-22 12:47:54.064015: step: 66/463, loss: 0.02776459790766239 2023-01-22 12:47:54.680556: step: 68/463, loss: 0.0007424212526530027 2023-01-22 12:47:55.282833: step: 70/463, loss: 0.009312381967902184 2023-01-22 12:47:55.898254: step: 72/463, loss: 0.00380902411416173 2023-01-22 12:47:56.565105: step: 74/463, loss: 0.011261909268796444 2023-01-22 12:47:57.241858: step: 76/463, loss: 0.029278485104441643 2023-01-22 12:47:57.856014: step: 78/463, loss: 0.018031824380159378 2023-01-22 12:47:58.529893: step: 80/463, loss: 0.023070020601153374 2023-01-22 12:47:59.087108: step: 82/463, loss: 0.0025981683284044266 2023-01-22 12:47:59.763690: step: 84/463, loss: 0.030428972095251083 2023-01-22 12:48:00.367582: step: 86/463, loss: 0.002118075033649802 2023-01-22 12:48:00.988032: step: 88/463, loss: 0.13838376104831696 2023-01-22 12:48:01.620227: step: 90/463, loss: 0.5376948714256287 2023-01-22 12:48:02.287950: step: 92/463, loss: 0.007030895445495844 2023-01-22 12:48:02.850427: step: 94/463, loss: 0.0059221284464001656 2023-01-22 12:48:03.485974: step: 96/463, loss: 0.02351825311779976 2023-01-22 12:48:04.059348: step: 98/463, loss: 0.0036061201244592667 2023-01-22 12:48:04.696227: step: 100/463, loss: 0.018816350027918816 2023-01-22 12:48:05.334794: step: 102/463, loss: 0.014140629209578037 2023-01-22 12:48:05.941108: step: 104/463, loss: 0.008114437572658062 2023-01-22 12:48:06.519426: step: 106/463, loss: 0.05228383466601372 2023-01-22 12:48:07.151085: step: 108/463, loss: 0.004296346101909876 2023-01-22 12:48:07.714082: step: 110/463, loss: 0.017928671091794968 2023-01-22 12:48:08.268456: step: 112/463, loss: 0.03456459566950798 2023-01-22 12:48:08.913944: step: 114/463, loss: 0.3317072093486786 2023-01-22 12:48:09.563988: step: 116/463, loss: 0.0032351817935705185 2023-01-22 12:48:10.237845: step: 118/463, loss: 0.005768848583102226 2023-01-22 12:48:10.855220: step: 120/463, loss: 0.00055794877698645 2023-01-22 12:48:11.464425: step: 122/463, loss: 0.022919733077287674 2023-01-22 12:48:12.052064: step: 124/463, loss: 0.020538704469799995 2023-01-22 12:48:12.597145: step: 126/463, loss: 0.003932601306587458 2023-01-22 12:48:13.215973: step: 128/463, loss: 0.028072243556380272 2023-01-22 12:48:13.785814: step: 130/463, loss: 0.0019172925967723131 2023-01-22 12:48:14.404849: step: 132/463, loss: 0.020364809781312943 2023-01-22 12:48:14.975207: step: 134/463, loss: 0.00419685710221529 2023-01-22 12:48:15.558362: step: 136/463, loss: 0.017741378396749496 2023-01-22 12:48:16.251361: step: 138/463, loss: 0.005836400203406811 2023-01-22 12:48:16.832580: step: 140/463, loss: 0.003543427214026451 2023-01-22 12:48:17.426477: step: 142/463, loss: 0.012756900861859322 2023-01-22 12:48:18.027297: step: 144/463, loss: 0.39636000990867615 2023-01-22 12:48:18.615049: step: 146/463, loss: 0.017988160252571106 2023-01-22 12:48:19.267141: step: 148/463, loss: 0.02115839160978794 2023-01-22 12:48:19.864683: step: 150/463, loss: 0.005877521354705095 2023-01-22 12:48:20.490489: step: 152/463, loss: 0.3393757939338684 2023-01-22 12:48:21.057897: step: 154/463, loss: 0.01985025405883789 2023-01-22 12:48:21.653692: step: 156/463, loss: 0.0031463720370084047 2023-01-22 12:48:22.231974: step: 158/463, loss: 0.017887214198708534 2023-01-22 12:48:22.822140: step: 160/463, loss: 0.003854408860206604 2023-01-22 12:48:23.425710: step: 162/463, loss: 0.0056409770622849464 2023-01-22 12:48:23.990954: step: 164/463, loss: 0.08588533103466034 2023-01-22 12:48:24.591120: step: 166/463, loss: 0.014338796958327293 2023-01-22 12:48:25.241847: step: 168/463, loss: 0.040356457233428955 2023-01-22 12:48:25.834159: step: 170/463, loss: 0.022640036419034004 2023-01-22 12:48:26.442978: step: 172/463, loss: 0.02104882337152958 2023-01-22 12:48:27.026679: step: 174/463, loss: 0.0011047740699723363 2023-01-22 12:48:27.632165: step: 176/463, loss: 0.011188049800693989 2023-01-22 12:48:28.252930: step: 178/463, loss: 0.0005825217231176794 2023-01-22 12:48:28.911448: step: 180/463, loss: 0.012243734672665596 2023-01-22 12:48:29.534855: step: 182/463, loss: 0.04197511076927185 2023-01-22 12:48:30.130569: step: 184/463, loss: 0.035589613020420074 2023-01-22 12:48:30.686698: step: 186/463, loss: 0.01662350259721279 2023-01-22 12:48:31.323057: step: 188/463, loss: 0.0038081782404333353 2023-01-22 12:48:31.883912: step: 190/463, loss: 0.011021867394447327 2023-01-22 12:48:32.424461: step: 192/463, loss: 8.040751708904281e-05 2023-01-22 12:48:33.029369: step: 194/463, loss: 0.036171261221170425 2023-01-22 12:48:33.629767: step: 196/463, loss: 0.05074598640203476 2023-01-22 12:48:34.239266: step: 198/463, loss: 0.004794722888618708 2023-01-22 12:48:34.801110: step: 200/463, loss: 0.005619929172098637 2023-01-22 12:48:35.363310: step: 202/463, loss: 0.02185971662402153 2023-01-22 12:48:35.939743: step: 204/463, loss: 0.006833468563854694 2023-01-22 12:48:36.501259: step: 206/463, loss: 0.003926296252757311 2023-01-22 12:48:37.120464: step: 208/463, loss: 0.008927463553845882 2023-01-22 12:48:37.708427: step: 210/463, loss: 0.01127215102314949 2023-01-22 12:48:38.368505: step: 212/463, loss: 1.1639524698257446 2023-01-22 12:48:39.002987: step: 214/463, loss: 0.04122161865234375 2023-01-22 12:48:39.594035: step: 216/463, loss: 0.07138665020465851 2023-01-22 12:48:40.198288: step: 218/463, loss: 0.00013343404862098396 2023-01-22 12:48:40.784321: step: 220/463, loss: 0.0028880152385681868 2023-01-22 12:48:41.404930: step: 222/463, loss: 0.04937922582030296 2023-01-22 12:48:42.008534: step: 224/463, loss: 0.029749706387519836 2023-01-22 12:48:42.594826: step: 226/463, loss: 0.08724698424339294 2023-01-22 12:48:43.244375: step: 228/463, loss: 0.32674795389175415 2023-01-22 12:48:43.900412: step: 230/463, loss: 0.0015274988254532218 2023-01-22 12:48:44.485926: step: 232/463, loss: 0.004527280107140541 2023-01-22 12:48:45.074399: step: 234/463, loss: 0.1155090406537056 2023-01-22 12:48:45.768919: step: 236/463, loss: 0.025656316429376602 2023-01-22 12:48:46.347588: step: 238/463, loss: 0.03855903819203377 2023-01-22 12:48:46.959009: step: 240/463, loss: 0.00015012556104920805 2023-01-22 12:48:47.585997: step: 242/463, loss: 0.009756055660545826 2023-01-22 12:48:48.280282: step: 244/463, loss: 0.2989869713783264 2023-01-22 12:48:48.902050: step: 246/463, loss: 0.02781127206981182 2023-01-22 12:48:49.505800: step: 248/463, loss: 0.029746340587735176 2023-01-22 12:48:50.224163: step: 250/463, loss: 0.015172197483479977 2023-01-22 12:48:50.907464: step: 252/463, loss: 0.003679451998323202 2023-01-22 12:48:51.524437: step: 254/463, loss: 0.000615073717199266 2023-01-22 12:48:52.127346: step: 256/463, loss: 0.014317273162305355 2023-01-22 12:48:52.774043: step: 258/463, loss: 0.0034707149025052786 2023-01-22 12:48:53.350438: step: 260/463, loss: 0.06441783159971237 2023-01-22 12:48:53.950464: step: 262/463, loss: 0.0009400406270287931 2023-01-22 12:48:54.491798: step: 264/463, loss: 0.020266558974981308 2023-01-22 12:48:55.056490: step: 266/463, loss: 0.0024563022889196873 2023-01-22 12:48:55.651271: step: 268/463, loss: 0.010296055115759373 2023-01-22 12:48:56.282955: step: 270/463, loss: 0.011542663909494877 2023-01-22 12:48:56.888539: step: 272/463, loss: 0.10954266041517258 2023-01-22 12:48:57.457235: step: 274/463, loss: 0.005849192384630442 2023-01-22 12:48:58.117872: step: 276/463, loss: 0.010167590342462063 2023-01-22 12:48:58.728332: step: 278/463, loss: 0.003906392026692629 2023-01-22 12:48:59.333680: step: 280/463, loss: 0.02463553659617901 2023-01-22 12:48:59.997941: step: 282/463, loss: 0.020623067393898964 2023-01-22 12:49:00.615036: step: 284/463, loss: 0.0025998291093856096 2023-01-22 12:49:01.141021: step: 286/463, loss: 0.004346669185906649 2023-01-22 12:49:01.709488: step: 288/463, loss: 0.03985128179192543 2023-01-22 12:49:02.332995: step: 290/463, loss: 0.02054804004728794 2023-01-22 12:49:02.977025: step: 292/463, loss: 0.02744104154407978 2023-01-22 12:49:03.569364: step: 294/463, loss: 0.0021334406919777393 2023-01-22 12:49:04.116980: step: 296/463, loss: 0.005076550878584385 2023-01-22 12:49:04.833087: step: 298/463, loss: 0.013938345946371555 2023-01-22 12:49:05.430027: step: 300/463, loss: 0.032830264419317245 2023-01-22 12:49:06.039644: step: 302/463, loss: 0.008370975032448769 2023-01-22 12:49:06.574528: step: 304/463, loss: 0.0020224181935191154 2023-01-22 12:49:07.225057: step: 306/463, loss: 0.04459505155682564 2023-01-22 12:49:07.877247: step: 308/463, loss: 0.013771029189229012 2023-01-22 12:49:08.504729: step: 310/463, loss: 0.015615686774253845 2023-01-22 12:49:09.110755: step: 312/463, loss: 0.03839581832289696 2023-01-22 12:49:09.717405: step: 314/463, loss: 0.01638411357998848 2023-01-22 12:49:10.282472: step: 316/463, loss: 0.021316595375537872 2023-01-22 12:49:10.852830: step: 318/463, loss: 0.15528523921966553 2023-01-22 12:49:11.461123: step: 320/463, loss: 0.00015696136688347906 2023-01-22 12:49:12.038547: step: 322/463, loss: 0.004515354055911303 2023-01-22 12:49:12.732628: step: 324/463, loss: 0.00995574425905943 2023-01-22 12:49:13.341324: step: 326/463, loss: 0.021659819409251213 2023-01-22 12:49:13.972002: step: 328/463, loss: 0.007264286279678345 2023-01-22 12:49:14.543150: step: 330/463, loss: 0.01879897527396679 2023-01-22 12:49:15.125402: step: 332/463, loss: 0.019653448835015297 2023-01-22 12:49:15.711257: step: 334/463, loss: 0.021654561161994934 2023-01-22 12:49:16.325381: step: 336/463, loss: 0.004446979146450758 2023-01-22 12:49:16.925279: step: 338/463, loss: 0.010496840812265873 2023-01-22 12:49:17.588185: step: 340/463, loss: 0.007603057660162449 2023-01-22 12:49:18.264407: step: 342/463, loss: 0.005591457709670067 2023-01-22 12:49:18.845234: step: 344/463, loss: 0.005775043275207281 2023-01-22 12:49:19.495477: step: 346/463, loss: 0.022768402472138405 2023-01-22 12:49:20.102506: step: 348/463, loss: 0.00039576523704454303 2023-01-22 12:49:20.712073: step: 350/463, loss: 0.018127653747797012 2023-01-22 12:49:21.319932: step: 352/463, loss: 0.001632579485885799 2023-01-22 12:49:21.883823: step: 354/463, loss: 0.011940563097596169 2023-01-22 12:49:22.462298: step: 356/463, loss: 0.030893150717020035 2023-01-22 12:49:23.100010: step: 358/463, loss: 0.01903459057211876 2023-01-22 12:49:23.770372: step: 360/463, loss: 0.015471003018319607 2023-01-22 12:49:24.333194: step: 362/463, loss: 0.05312786251306534 2023-01-22 12:49:24.918096: step: 364/463, loss: 0.004520105198025703 2023-01-22 12:49:25.555331: step: 366/463, loss: 0.00021048002236057073 2023-01-22 12:49:26.173714: step: 368/463, loss: 0.06298676878213882 2023-01-22 12:49:26.919038: step: 370/463, loss: 0.05269232019782066 2023-01-22 12:49:27.485837: step: 372/463, loss: 0.03298819810152054 2023-01-22 12:49:28.012696: step: 374/463, loss: 0.0004433818394318223 2023-01-22 12:49:28.583559: step: 376/463, loss: 0.016590416431427002 2023-01-22 12:49:29.181405: step: 378/463, loss: 0.08264383673667908 2023-01-22 12:49:29.792065: step: 380/463, loss: 0.02032715082168579 2023-01-22 12:49:30.458305: step: 382/463, loss: 0.0247122123837471 2023-01-22 12:49:31.068105: step: 384/463, loss: 0.00149907183367759 2023-01-22 12:49:31.686311: step: 386/463, loss: 0.053618915379047394 2023-01-22 12:49:32.313952: step: 388/463, loss: 0.014058658853173256 2023-01-22 12:49:32.955266: step: 390/463, loss: 0.015364853665232658 2023-01-22 12:49:33.557605: step: 392/463, loss: 0.013582360930740833 2023-01-22 12:49:34.188069: step: 394/463, loss: 0.0031859998125582933 2023-01-22 12:49:34.823726: step: 396/463, loss: 0.021418336778879166 2023-01-22 12:49:35.401643: step: 398/463, loss: 0.005219413433223963 2023-01-22 12:49:36.006544: step: 400/463, loss: 0.09055787324905396 2023-01-22 12:49:36.647798: step: 402/463, loss: 0.0020374557934701443 2023-01-22 12:49:37.259325: step: 404/463, loss: 0.003488131333142519 2023-01-22 12:49:37.889418: step: 406/463, loss: 0.015424426645040512 2023-01-22 12:49:38.631702: step: 408/463, loss: 0.006992554757744074 2023-01-22 12:49:39.225807: step: 410/463, loss: 0.001312085660174489 2023-01-22 12:49:39.901965: step: 412/463, loss: 0.1532529592514038 2023-01-22 12:49:40.512155: step: 414/463, loss: 0.03673427551984787 2023-01-22 12:49:41.171054: step: 416/463, loss: 0.10569197684526443 2023-01-22 12:49:41.795462: step: 418/463, loss: 0.03797304630279541 2023-01-22 12:49:42.380646: step: 420/463, loss: 0.002807617885991931 2023-01-22 12:49:42.964835: step: 422/463, loss: 0.1057528704404831 2023-01-22 12:49:43.532867: step: 424/463, loss: 0.014239491894841194 2023-01-22 12:49:44.172176: step: 426/463, loss: 0.01293495949357748 2023-01-22 12:49:44.831869: step: 428/463, loss: 0.023818783462047577 2023-01-22 12:49:45.446122: step: 430/463, loss: 0.14168071746826172 2023-01-22 12:49:46.037350: step: 432/463, loss: 0.04936476796865463 2023-01-22 12:49:46.667758: step: 434/463, loss: 0.618794858455658 2023-01-22 12:49:47.274955: step: 436/463, loss: 0.003481074469164014 2023-01-22 12:49:47.918638: step: 438/463, loss: 1.2794694900512695 2023-01-22 12:49:48.511681: step: 440/463, loss: 0.013472139835357666 2023-01-22 12:49:49.168399: step: 442/463, loss: 0.0053153629414737225 2023-01-22 12:49:49.750318: step: 444/463, loss: 0.009222879074513912 2023-01-22 12:49:50.371332: step: 446/463, loss: 0.004356713034212589 2023-01-22 12:49:51.182905: step: 448/463, loss: 0.034813858568668365 2023-01-22 12:49:51.776285: step: 450/463, loss: 0.002519553294405341 2023-01-22 12:49:52.430965: step: 452/463, loss: 0.01727062091231346 2023-01-22 12:49:53.059519: step: 454/463, loss: 0.008006599731743336 2023-01-22 12:49:53.643366: step: 456/463, loss: 0.37737908959388733 2023-01-22 12:49:54.244584: step: 458/463, loss: 0.010549294762313366 2023-01-22 12:49:54.841084: step: 460/463, loss: 0.007140059489756823 2023-01-22 12:49:55.539908: step: 462/463, loss: 0.004069739021360874 2023-01-22 12:49:56.127016: step: 464/463, loss: 0.002762184012681246 2023-01-22 12:49:56.763147: step: 466/463, loss: 0.03549811616539955 2023-01-22 12:49:57.365711: step: 468/463, loss: 0.0002543667796999216 2023-01-22 12:49:57.960373: step: 470/463, loss: 0.592303991317749 2023-01-22 12:49:58.538459: step: 472/463, loss: 0.013420642353594303 2023-01-22 12:49:59.160041: step: 474/463, loss: 0.12643814086914062 2023-01-22 12:49:59.733441: step: 476/463, loss: 0.0009326456347480416 2023-01-22 12:50:00.360443: step: 478/463, loss: 0.2759227752685547 2023-01-22 12:50:00.903682: step: 480/463, loss: 0.06930850446224213 2023-01-22 12:50:01.503205: step: 482/463, loss: 0.0386422723531723 2023-01-22 12:50:02.074319: step: 484/463, loss: 14.120134353637695 2023-01-22 12:50:02.651916: step: 486/463, loss: 0.05389359965920448 2023-01-22 12:50:03.303626: step: 488/463, loss: 0.07896668463945389 2023-01-22 12:50:03.899983: step: 490/463, loss: 0.04704621061682701 2023-01-22 12:50:04.464958: step: 492/463, loss: 0.012464272789657116 2023-01-22 12:50:05.046941: step: 494/463, loss: 0.00142408418469131 2023-01-22 12:50:05.652233: step: 496/463, loss: 0.07210279256105423 2023-01-22 12:50:06.253535: step: 498/463, loss: 0.009681577794253826 2023-01-22 12:50:06.930938: step: 500/463, loss: 0.08222053200006485 2023-01-22 12:50:07.558407: step: 502/463, loss: 0.0025594925973564386 2023-01-22 12:50:08.118181: step: 504/463, loss: 0.012416169978678226 2023-01-22 12:50:08.666017: step: 506/463, loss: 0.027304846793413162 2023-01-22 12:50:09.248737: step: 508/463, loss: 0.056054968386888504 2023-01-22 12:50:09.869195: step: 510/463, loss: 0.03493053838610649 2023-01-22 12:50:10.519544: step: 512/463, loss: 0.011331530287861824 2023-01-22 12:50:11.124238: step: 514/463, loss: 0.017196929082274437 2023-01-22 12:50:11.759840: step: 516/463, loss: 0.1067686527967453 2023-01-22 12:50:12.352809: step: 518/463, loss: 0.0014026375720277429 2023-01-22 12:50:12.970526: step: 520/463, loss: 0.9607723951339722 2023-01-22 12:50:13.658987: step: 522/463, loss: 0.01363272126764059 2023-01-22 12:50:14.251499: step: 524/463, loss: 0.015428583137691021 2023-01-22 12:50:14.868416: step: 526/463, loss: 0.012432626448571682 2023-01-22 12:50:15.531083: step: 528/463, loss: 0.05134475976228714 2023-01-22 12:50:16.167379: step: 530/463, loss: 0.0033621059264987707 2023-01-22 12:50:16.757735: step: 532/463, loss: 0.0013458256144076586 2023-01-22 12:50:17.325756: step: 534/463, loss: 0.015177948400378227 2023-01-22 12:50:17.915618: step: 536/463, loss: 0.11847135424613953 2023-01-22 12:50:18.464833: step: 538/463, loss: 1.146825889009051e-05 2023-01-22 12:50:19.091133: step: 540/463, loss: 0.01634560152888298 2023-01-22 12:50:19.707068: step: 542/463, loss: 0.01712670549750328 2023-01-22 12:50:20.307433: step: 544/463, loss: 0.0015381629345938563 2023-01-22 12:50:20.934520: step: 546/463, loss: 1.8892041444778442 2023-01-22 12:50:21.532671: step: 548/463, loss: 0.00919247604906559 2023-01-22 12:50:22.078680: step: 550/463, loss: 0.004723436664789915 2023-01-22 12:50:22.629993: step: 552/463, loss: 0.00408986397087574 2023-01-22 12:50:23.191631: step: 554/463, loss: 0.00014379460480995476 2023-01-22 12:50:23.777336: step: 556/463, loss: 0.01637520268559456 2023-01-22 12:50:24.390071: step: 558/463, loss: 0.011019648984074593 2023-01-22 12:50:24.990118: step: 560/463, loss: 0.0492401085793972 2023-01-22 12:50:25.593327: step: 562/463, loss: 0.09095236659049988 2023-01-22 12:50:26.191715: step: 564/463, loss: 0.008499075658619404 2023-01-22 12:50:26.879707: step: 566/463, loss: 0.03640542924404144 2023-01-22 12:50:27.518187: step: 568/463, loss: 0.0003638894122559577 2023-01-22 12:50:28.137729: step: 570/463, loss: 0.0014382230583578348 2023-01-22 12:50:28.733779: step: 572/463, loss: 0.05100620165467262 2023-01-22 12:50:29.397147: step: 574/463, loss: 0.007858376018702984 2023-01-22 12:50:30.047311: step: 576/463, loss: 0.055623859167099 2023-01-22 12:50:30.701455: step: 578/463, loss: 0.0013281474821269512 2023-01-22 12:50:31.301456: step: 580/463, loss: 0.019869204610586166 2023-01-22 12:50:31.893936: step: 582/463, loss: 0.0028137112967669964 2023-01-22 12:50:32.549654: step: 584/463, loss: 0.2470996379852295 2023-01-22 12:50:33.220728: step: 586/463, loss: 0.5812039375305176 2023-01-22 12:50:33.820995: step: 588/463, loss: 0.03611539304256439 2023-01-22 12:50:34.480758: step: 590/463, loss: 0.011885772459208965 2023-01-22 12:50:35.067285: step: 592/463, loss: 0.01397947408258915 2023-01-22 12:50:35.813045: step: 594/463, loss: 0.12583327293395996 2023-01-22 12:50:36.402970: step: 596/463, loss: 0.0013787749921903014 2023-01-22 12:50:37.044303: step: 598/463, loss: 4.441771507263184 2023-01-22 12:50:37.666159: step: 600/463, loss: 0.07939635217189789 2023-01-22 12:50:38.246942: step: 602/463, loss: 0.04025167599320412 2023-01-22 12:50:38.822396: step: 604/463, loss: 0.061051249504089355 2023-01-22 12:50:39.451757: step: 606/463, loss: 0.7362342476844788 2023-01-22 12:50:40.000659: step: 608/463, loss: 0.06517346203327179 2023-01-22 12:50:40.645479: step: 610/463, loss: 0.046322111040353775 2023-01-22 12:50:41.206524: step: 612/463, loss: 0.031612757593393326 2023-01-22 12:50:41.800434: step: 614/463, loss: 0.02285258285701275 2023-01-22 12:50:42.351065: step: 616/463, loss: 0.0009958171285688877 2023-01-22 12:50:42.944535: step: 618/463, loss: 0.45432621240615845 2023-01-22 12:50:43.528349: step: 620/463, loss: 0.00349188013933599 2023-01-22 12:50:44.114749: step: 622/463, loss: 0.0024540708400309086 2023-01-22 12:50:44.724127: step: 624/463, loss: 0.008386512286961079 2023-01-22 12:50:45.386538: step: 626/463, loss: 0.015294750221073627 2023-01-22 12:50:45.993023: step: 628/463, loss: 0.0017503045964986086 2023-01-22 12:50:46.582205: step: 630/463, loss: 0.08070531487464905 2023-01-22 12:50:47.171751: step: 632/463, loss: 0.0030635695438832045 2023-01-22 12:50:47.762021: step: 634/463, loss: 0.08945896476507187 2023-01-22 12:50:48.363561: step: 636/463, loss: 0.022937500849366188 2023-01-22 12:50:48.957307: step: 638/463, loss: 0.052769217640161514 2023-01-22 12:50:49.592146: step: 640/463, loss: 0.042901817709207535 2023-01-22 12:50:50.149250: step: 642/463, loss: 0.07390157133340836 2023-01-22 12:50:50.755168: step: 644/463, loss: 0.047893062233924866 2023-01-22 12:50:51.330719: step: 646/463, loss: 0.05383450910449028 2023-01-22 12:50:51.891602: step: 648/463, loss: 0.008620602078735828 2023-01-22 12:50:52.615059: step: 650/463, loss: 0.18091782927513123 2023-01-22 12:50:53.262926: step: 652/463, loss: 0.021911179646849632 2023-01-22 12:50:53.885478: step: 654/463, loss: 0.000647700042463839 2023-01-22 12:50:54.434513: step: 656/463, loss: 0.001276754424907267 2023-01-22 12:50:55.007762: step: 658/463, loss: 0.03981545567512512 2023-01-22 12:50:55.593839: step: 660/463, loss: 0.005832474213093519 2023-01-22 12:50:56.250905: step: 662/463, loss: 0.011179996654391289 2023-01-22 12:50:56.847534: step: 664/463, loss: 0.03047151304781437 2023-01-22 12:50:57.438350: step: 666/463, loss: 0.0040741669945418835 2023-01-22 12:50:57.981819: step: 668/463, loss: 0.00031125021632760763 2023-01-22 12:50:58.613480: step: 670/463, loss: 0.06128876283764839 2023-01-22 12:50:59.187685: step: 672/463, loss: 0.03009014017879963 2023-01-22 12:50:59.755278: step: 674/463, loss: 0.0019778709392994642 2023-01-22 12:51:00.386676: step: 676/463, loss: 0.03799960017204285 2023-01-22 12:51:01.003327: step: 678/463, loss: 0.008525410667061806 2023-01-22 12:51:01.542721: step: 680/463, loss: 0.013243120163679123 2023-01-22 12:51:02.160492: step: 682/463, loss: 0.05975394323468208 2023-01-22 12:51:02.791411: step: 684/463, loss: 0.036906905472278595 2023-01-22 12:51:03.482522: step: 686/463, loss: 0.0022359774447977543 2023-01-22 12:51:04.077290: step: 688/463, loss: 0.023090695962309837 2023-01-22 12:51:04.638251: step: 690/463, loss: 0.022579804062843323 2023-01-22 12:51:05.188804: step: 692/463, loss: 0.03391774743795395 2023-01-22 12:51:05.790058: step: 694/463, loss: 0.03190687671303749 2023-01-22 12:51:06.392751: step: 696/463, loss: 0.0039575244300067425 2023-01-22 12:51:06.980219: step: 698/463, loss: 0.005288866814225912 2023-01-22 12:51:07.592932: step: 700/463, loss: 0.03183300420641899 2023-01-22 12:51:08.200871: step: 702/463, loss: 0.002325284993276 2023-01-22 12:51:08.860643: step: 704/463, loss: 0.010124661028385162 2023-01-22 12:51:09.480279: step: 706/463, loss: 0.027480168268084526 2023-01-22 12:51:10.143582: step: 708/463, loss: 0.01256866380572319 2023-01-22 12:51:10.809197: step: 710/463, loss: 0.03388833627104759 2023-01-22 12:51:11.438064: step: 712/463, loss: 0.07260380685329437 2023-01-22 12:51:12.045778: step: 714/463, loss: 0.005551299545913935 2023-01-22 12:51:12.681485: step: 716/463, loss: 0.020944247022271156 2023-01-22 12:51:13.251367: step: 718/463, loss: 0.01053232979029417 2023-01-22 12:51:13.848501: step: 720/463, loss: 0.0008032767800614238 2023-01-22 12:51:14.471910: step: 722/463, loss: 0.14156877994537354 2023-01-22 12:51:15.068768: step: 724/463, loss: 0.004391709342598915 2023-01-22 12:51:15.668386: step: 726/463, loss: 0.08217251300811768 2023-01-22 12:51:16.359689: step: 728/463, loss: 0.005677036941051483 2023-01-22 12:51:16.994157: step: 730/463, loss: 0.002708585700020194 2023-01-22 12:51:17.585590: step: 732/463, loss: 0.002403578255325556 2023-01-22 12:51:18.213105: step: 734/463, loss: 0.01208692230284214 2023-01-22 12:51:18.753520: step: 736/463, loss: 0.060735706239938736 2023-01-22 12:51:19.355900: step: 738/463, loss: 0.012099682353436947 2023-01-22 12:51:19.929119: step: 740/463, loss: 0.001410757889971137 2023-01-22 12:51:20.581245: step: 742/463, loss: 0.012133477255702019 2023-01-22 12:51:21.199748: step: 744/463, loss: 0.01876804232597351 2023-01-22 12:51:21.841558: step: 746/463, loss: 0.02310679480433464 2023-01-22 12:51:22.401995: step: 748/463, loss: 0.029150746762752533 2023-01-22 12:51:23.035850: step: 750/463, loss: 0.008996223099529743 2023-01-22 12:51:23.680440: step: 752/463, loss: 0.01346584502607584 2023-01-22 12:51:24.289170: step: 754/463, loss: 0.0063035194762051105 2023-01-22 12:51:24.903767: step: 756/463, loss: 0.07119446247816086 2023-01-22 12:51:25.483744: step: 758/463, loss: 0.007020119111984968 2023-01-22 12:51:26.134471: step: 760/463, loss: 0.04800300672650337 2023-01-22 12:51:26.708038: step: 762/463, loss: 0.06737812608480453 2023-01-22 12:51:27.377512: step: 764/463, loss: 0.020925460383296013 2023-01-22 12:51:27.946083: step: 766/463, loss: 0.012368453666567802 2023-01-22 12:51:28.537015: step: 768/463, loss: 0.006261227186769247 2023-01-22 12:51:29.114306: step: 770/463, loss: 0.0013962461380288005 2023-01-22 12:51:29.703763: step: 772/463, loss: 0.045866724103689194 2023-01-22 12:51:30.326012: step: 774/463, loss: 0.00868605449795723 2023-01-22 12:51:30.936379: step: 776/463, loss: 0.04266854003071785 2023-01-22 12:51:31.557676: step: 778/463, loss: 0.012076734565198421 2023-01-22 12:51:32.139066: step: 780/463, loss: 0.016845114529132843 2023-01-22 12:51:32.769476: step: 782/463, loss: 0.022558672353625298 2023-01-22 12:51:33.427454: step: 784/463, loss: 0.004800360184162855 2023-01-22 12:51:34.043577: step: 786/463, loss: 0.04612700641155243 2023-01-22 12:51:34.643058: step: 788/463, loss: 0.008846086449921131 2023-01-22 12:51:35.254672: step: 790/463, loss: 0.11185453832149506 2023-01-22 12:51:35.868474: step: 792/463, loss: 0.02231520414352417 2023-01-22 12:51:36.458738: step: 794/463, loss: 0.2917740046977997 2023-01-22 12:51:37.131653: step: 796/463, loss: 0.21534432470798492 2023-01-22 12:51:37.754430: step: 798/463, loss: 0.07309827208518982 2023-01-22 12:51:38.334742: step: 800/463, loss: 0.0015493407845497131 2023-01-22 12:51:39.043372: step: 802/463, loss: 0.006521427072584629 2023-01-22 12:51:39.665205: step: 804/463, loss: 0.015714384615421295 2023-01-22 12:51:40.300821: step: 806/463, loss: 0.008010848425328732 2023-01-22 12:51:40.905712: step: 808/463, loss: 0.006601187400519848 2023-01-22 12:51:41.462233: step: 810/463, loss: 0.035502608865499496 2023-01-22 12:51:42.049621: step: 812/463, loss: 0.03248879685997963 2023-01-22 12:51:42.662351: step: 814/463, loss: 0.025749389082193375 2023-01-22 12:51:43.318834: step: 816/463, loss: 0.04278761148452759 2023-01-22 12:51:43.872273: step: 818/463, loss: 0.008416598662734032 2023-01-22 12:51:44.473997: step: 820/463, loss: 0.13345743715763092 2023-01-22 12:51:45.059025: step: 822/463, loss: 0.014655270613729954 2023-01-22 12:51:45.627445: step: 824/463, loss: 0.0012383426073938608 2023-01-22 12:51:46.216103: step: 826/463, loss: 0.09054864197969437 2023-01-22 12:51:46.863807: step: 828/463, loss: 0.05785893276333809 2023-01-22 12:51:47.421808: step: 830/463, loss: 0.002178038004785776 2023-01-22 12:51:48.025508: step: 832/463, loss: 0.1174110695719719 2023-01-22 12:51:48.660099: step: 834/463, loss: 0.11237432062625885 2023-01-22 12:51:49.290366: step: 836/463, loss: 0.033386338502168655 2023-01-22 12:51:49.894098: step: 838/463, loss: 0.016350561752915382 2023-01-22 12:51:50.502606: step: 840/463, loss: 0.006214508321136236 2023-01-22 12:51:51.076175: step: 842/463, loss: 0.16392134130001068 2023-01-22 12:51:51.701854: step: 844/463, loss: 0.01186341792345047 2023-01-22 12:51:52.295406: step: 846/463, loss: 0.0068565248511731625 2023-01-22 12:51:52.879993: step: 848/463, loss: 0.11053220182657242 2023-01-22 12:51:53.489876: step: 850/463, loss: 0.05240786820650101 2023-01-22 12:51:54.041794: step: 852/463, loss: 0.035946860909461975 2023-01-22 12:51:54.651005: step: 854/463, loss: 0.13795296847820282 2023-01-22 12:51:55.268284: step: 856/463, loss: 0.029736429452896118 2023-01-22 12:51:55.984041: step: 858/463, loss: 0.028719386085867882 2023-01-22 12:51:56.522776: step: 860/463, loss: 0.0007364210323430598 2023-01-22 12:51:57.178187: step: 862/463, loss: 0.018374288454651833 2023-01-22 12:51:57.766415: step: 864/463, loss: 0.02100134640932083 2023-01-22 12:51:58.400812: step: 866/463, loss: 0.044303975999355316 2023-01-22 12:51:58.977611: step: 868/463, loss: 0.5984532833099365 2023-01-22 12:51:59.578048: step: 870/463, loss: 0.0006451236549764872 2023-01-22 12:52:00.238969: step: 872/463, loss: 0.0909319743514061 2023-01-22 12:52:00.907200: step: 874/463, loss: 0.0290652085095644 2023-01-22 12:52:01.553372: step: 876/463, loss: 0.00032575963996350765 2023-01-22 12:52:02.165377: step: 878/463, loss: 0.015450532548129559 2023-01-22 12:52:02.797967: step: 880/463, loss: 0.038711994886398315 2023-01-22 12:52:03.497653: step: 882/463, loss: 0.011028628796339035 2023-01-22 12:52:04.073629: step: 884/463, loss: 0.031230472028255463 2023-01-22 12:52:04.756732: step: 886/463, loss: 0.005912075750529766 2023-01-22 12:52:05.326171: step: 888/463, loss: 0.4749009311199188 2023-01-22 12:52:05.990288: step: 890/463, loss: 0.0033143230248242617 2023-01-22 12:52:06.491394: step: 892/463, loss: 0.011290629394352436 2023-01-22 12:52:07.062394: step: 894/463, loss: 0.017845887690782547 2023-01-22 12:52:07.630969: step: 896/463, loss: 0.040140196681022644 2023-01-22 12:52:08.212512: step: 898/463, loss: 0.4968484044075012 2023-01-22 12:52:08.784296: step: 900/463, loss: 0.022448042407631874 2023-01-22 12:52:09.416051: step: 902/463, loss: 0.00402583135291934 2023-01-22 12:52:10.047356: step: 904/463, loss: 0.07864254713058472 2023-01-22 12:52:10.644048: step: 906/463, loss: 0.004549562931060791 2023-01-22 12:52:11.227948: step: 908/463, loss: 0.003151519689708948 2023-01-22 12:52:11.811835: step: 910/463, loss: 0.015122473239898682 2023-01-22 12:52:12.411049: step: 912/463, loss: 0.013166156597435474 2023-01-22 12:52:13.095429: step: 914/463, loss: 0.07530917227268219 2023-01-22 12:52:13.703813: step: 916/463, loss: 0.013531996868550777 2023-01-22 12:52:14.253331: step: 918/463, loss: 0.03069126419723034 2023-01-22 12:52:14.874163: step: 920/463, loss: 0.0023237536661326885 2023-01-22 12:52:15.509275: step: 922/463, loss: 0.04988205060362816 2023-01-22 12:52:16.171187: step: 924/463, loss: 0.0030852099880576134 2023-01-22 12:52:16.840859: step: 926/463, loss: 0.02364422008395195 ================================================== Loss: 0.095 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33592524109186006, 'r': 0.3391123875917828, 'f1': 0.33751129038879996}, 'combined': 0.24869252976016837, 'epoch': 30} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34232527066521967, 'r': 0.40682589611225095, 'f1': 0.3717988869971684}, 'combined': 0.28818861097388176, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2927311953049035, 'r': 0.34772244451777906, 'f1': 0.31786596402579287}, 'combined': 0.23421702612426842, 'epoch': 30} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32647614669291075, 'r': 0.4152968630725997, 'f1': 0.36556875972733704}, 'combined': 0.2833595171092278, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.28828989441579156, 'r': 0.3309589869479201, 'f1': 0.3081543924409079}, 'combined': 0.22706113127224792, 'epoch': 30} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3242496569541448, 'r': 0.3969674109034199, 'f1': 0.35694259757266195}, 'combined': 0.27667320960177627, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2755681818181818, 'r': 0.3464285714285714, 'f1': 0.3069620253164557}, 'combined': 0.20464135021097046, 'epoch': 30} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26515151515151514, 'r': 0.3804347826086957, 'f1': 0.3125}, 'combined': 0.15625, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.425, 'r': 0.29310344827586204, 'f1': 0.3469387755102041}, 'combined': 0.2312925170068027, 'epoch': 30} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 12:54:54.116757: step: 2/463, loss: 0.004422870930284262 2023-01-22 12:54:54.683498: step: 4/463, loss: 0.0476013720035553 2023-01-22 12:54:55.238549: step: 6/463, loss: 0.03670978173613548 2023-01-22 12:54:55.869704: step: 8/463, loss: 0.01566462032496929 2023-01-22 12:54:56.449866: step: 10/463, loss: 0.013408592902123928 2023-01-22 12:54:57.101775: step: 12/463, loss: 0.025252968072891235 2023-01-22 12:54:57.663627: step: 14/463, loss: 0.006519390270113945 2023-01-22 12:54:58.204350: step: 16/463, loss: 0.007219681050628424 2023-01-22 12:54:58.852528: step: 18/463, loss: 0.03251561149954796 2023-01-22 12:54:59.492528: step: 20/463, loss: 0.03944979980587959 2023-01-22 12:55:00.082690: step: 22/463, loss: 0.0031611749436706305 2023-01-22 12:55:00.669015: step: 24/463, loss: 0.014545641839504242 2023-01-22 12:55:01.246399: step: 26/463, loss: 0.011168266646564007 2023-01-22 12:55:01.873438: step: 28/463, loss: 0.0022661034017801285 2023-01-22 12:55:02.528685: step: 30/463, loss: 0.032148342579603195 2023-01-22 12:55:03.105662: step: 32/463, loss: 0.001172112999483943 2023-01-22 12:55:03.707573: step: 34/463, loss: 0.012198339216411114 2023-01-22 12:55:04.415590: step: 36/463, loss: 0.002454516477882862 2023-01-22 12:55:05.003147: step: 38/463, loss: 0.051385894417762756 2023-01-22 12:55:05.658595: step: 40/463, loss: 0.02402893267571926 2023-01-22 12:55:06.267430: step: 42/463, loss: 0.13218927383422852 2023-01-22 12:55:06.994618: step: 44/463, loss: 0.0474386140704155 2023-01-22 12:55:07.677595: step: 46/463, loss: 0.002726589096710086 2023-01-22 12:55:08.249602: step: 48/463, loss: 0.005372941493988037 2023-01-22 12:55:08.840991: step: 50/463, loss: 0.04420856386423111 2023-01-22 12:55:09.464731: step: 52/463, loss: 0.01096365787088871 2023-01-22 12:55:10.069256: step: 54/463, loss: 0.016338463872671127 2023-01-22 12:55:10.696706: step: 56/463, loss: 0.013118507340550423 2023-01-22 12:55:11.331118: step: 58/463, loss: 0.0031445943750441074 2023-01-22 12:55:11.898128: step: 60/463, loss: 0.004941543098539114 2023-01-22 12:55:12.521506: step: 62/463, loss: 0.022115424275398254 2023-01-22 12:55:13.188326: step: 64/463, loss: 0.03489331528544426 2023-01-22 12:55:13.795372: step: 66/463, loss: 0.0438426248729229 2023-01-22 12:55:14.359732: step: 68/463, loss: 0.3694595396518707 2023-01-22 12:55:14.968121: step: 70/463, loss: 0.008282233029603958 2023-01-22 12:55:15.548255: step: 72/463, loss: 0.009963375516235828 2023-01-22 12:55:16.119879: step: 74/463, loss: 0.047709763050079346 2023-01-22 12:55:16.683711: step: 76/463, loss: 0.34061020612716675 2023-01-22 12:55:17.247099: step: 78/463, loss: 0.0006715833442285657 2023-01-22 12:55:17.833947: step: 80/463, loss: 0.0015674851601943374 2023-01-22 12:55:18.474004: step: 82/463, loss: 0.07726532965898514 2023-01-22 12:55:19.045996: step: 84/463, loss: 0.03366328775882721 2023-01-22 12:55:19.654535: step: 86/463, loss: 0.004537722561508417 2023-01-22 12:55:20.314839: step: 88/463, loss: 0.0054290262050926685 2023-01-22 12:55:20.900679: step: 90/463, loss: 0.029591098427772522 2023-01-22 12:55:21.464912: step: 92/463, loss: 0.0003665934782475233 2023-01-22 12:55:22.097369: step: 94/463, loss: 0.009631955996155739 2023-01-22 12:55:22.664051: step: 96/463, loss: 0.027263300493359566 2023-01-22 12:55:23.276812: step: 98/463, loss: 0.007477788254618645 2023-01-22 12:55:23.887067: step: 100/463, loss: 0.012094361707568169 2023-01-22 12:55:24.565675: step: 102/463, loss: 0.0550723671913147 2023-01-22 12:55:25.210503: step: 104/463, loss: 0.10447590053081512 2023-01-22 12:55:25.774333: step: 106/463, loss: 0.008263484574854374 2023-01-22 12:55:26.377197: step: 108/463, loss: 0.2188422679901123 2023-01-22 12:55:27.004228: step: 110/463, loss: 0.005995317827910185 2023-01-22 12:55:27.560503: step: 112/463, loss: 0.06964045763015747 2023-01-22 12:55:28.150634: step: 114/463, loss: 7.80285699875094e-05 2023-01-22 12:55:28.791163: step: 116/463, loss: 0.005388882476836443 2023-01-22 12:55:29.377945: step: 118/463, loss: 0.030760588124394417 2023-01-22 12:55:30.013892: step: 120/463, loss: 0.00666368193924427 2023-01-22 12:55:30.549991: step: 122/463, loss: 0.03842487558722496 2023-01-22 12:55:31.144876: step: 124/463, loss: 0.007111882790923119 2023-01-22 12:55:31.748604: step: 126/463, loss: 0.003897252958267927 2023-01-22 12:55:32.324973: step: 128/463, loss: 0.0006882630405016243 2023-01-22 12:55:32.940118: step: 130/463, loss: 0.007725379429757595 2023-01-22 12:55:33.516291: step: 132/463, loss: 0.0006098590674810112 2023-01-22 12:55:34.112760: step: 134/463, loss: 0.013623587787151337 2023-01-22 12:55:34.702674: step: 136/463, loss: 0.0449017770588398 2023-01-22 12:55:35.299476: step: 138/463, loss: 0.017932362854480743 2023-01-22 12:55:35.859903: step: 140/463, loss: 0.027936631813645363 2023-01-22 12:55:36.444758: step: 142/463, loss: 0.011372219771146774 2023-01-22 12:55:37.125790: step: 144/463, loss: 0.0019866484217345715 2023-01-22 12:55:37.784464: step: 146/463, loss: 0.023730548098683357 2023-01-22 12:55:38.365986: step: 148/463, loss: 0.11961495876312256 2023-01-22 12:55:39.011693: step: 150/463, loss: 0.07656265795230865 2023-01-22 12:55:39.556235: step: 152/463, loss: 0.31471946835517883 2023-01-22 12:55:40.172391: step: 154/463, loss: 0.035964835435152054 2023-01-22 12:55:40.769363: step: 156/463, loss: 0.019084136933088303 2023-01-22 12:55:41.381985: step: 158/463, loss: 0.039367157965898514 2023-01-22 12:55:42.011285: step: 160/463, loss: 0.07852606475353241 2023-01-22 12:55:42.667853: step: 162/463, loss: 0.024555031210184097 2023-01-22 12:55:43.285766: step: 164/463, loss: 0.0023580596316605806 2023-01-22 12:55:43.913890: step: 166/463, loss: 0.05921948328614235 2023-01-22 12:55:44.517777: step: 168/463, loss: 0.06778226792812347 2023-01-22 12:55:45.144763: step: 170/463, loss: 0.0036057790275663137 2023-01-22 12:55:45.723941: step: 172/463, loss: 0.0017839828506112099 2023-01-22 12:55:46.267403: step: 174/463, loss: 0.0013491861755028367 2023-01-22 12:55:46.909575: step: 176/463, loss: 0.08490173518657684 2023-01-22 12:55:47.457420: step: 178/463, loss: 0.0032500780653208494 2023-01-22 12:55:48.074581: step: 180/463, loss: 0.014830099418759346 2023-01-22 12:55:48.657049: step: 182/463, loss: 0.02372978813946247 2023-01-22 12:55:49.264587: step: 184/463, loss: 0.2949571907520294 2023-01-22 12:55:49.927907: step: 186/463, loss: 0.019025932997465134 2023-01-22 12:55:50.484557: step: 188/463, loss: 0.04316798970103264 2023-01-22 12:55:51.029156: step: 190/463, loss: 0.0016627575969323516 2023-01-22 12:55:51.582431: step: 192/463, loss: 0.0056515843607485294 2023-01-22 12:55:52.252871: step: 194/463, loss: 0.0047281705774366856 2023-01-22 12:55:52.937693: step: 196/463, loss: 0.003192935371771455 2023-01-22 12:55:53.585722: step: 198/463, loss: 0.20383906364440918 2023-01-22 12:55:54.198994: step: 200/463, loss: 0.03514109551906586 2023-01-22 12:55:54.838016: step: 202/463, loss: 0.010624373331665993 2023-01-22 12:55:55.409731: step: 204/463, loss: 0.03294515237212181 2023-01-22 12:55:55.995197: step: 206/463, loss: 0.017936188727617264 2023-01-22 12:55:56.527125: step: 208/463, loss: 0.0686739832162857 2023-01-22 12:55:57.101891: step: 210/463, loss: 0.33181145787239075 2023-01-22 12:55:57.629002: step: 212/463, loss: 0.010371187701821327 2023-01-22 12:55:58.304068: step: 214/463, loss: 0.036026500165462494 2023-01-22 12:55:58.877037: step: 216/463, loss: 0.03120010532438755 2023-01-22 12:55:59.498104: step: 218/463, loss: 0.03980020806193352 2023-01-22 12:56:00.140416: step: 220/463, loss: 0.01664537750184536 2023-01-22 12:56:00.760662: step: 222/463, loss: 0.006049699615687132 2023-01-22 12:56:01.364518: step: 224/463, loss: 0.040885474532842636 2023-01-22 12:56:02.050745: step: 226/463, loss: 0.003250849200412631 2023-01-22 12:56:02.635747: step: 228/463, loss: 0.013019710779190063 2023-01-22 12:56:03.193622: step: 230/463, loss: 0.014770979061722755 2023-01-22 12:56:03.855259: step: 232/463, loss: 0.008929251693189144 2023-01-22 12:56:04.536126: step: 234/463, loss: 0.00699266605079174 2023-01-22 12:56:05.143408: step: 236/463, loss: 0.0014869315782561898 2023-01-22 12:56:05.744695: step: 238/463, loss: 0.02677600085735321 2023-01-22 12:56:06.361575: step: 240/463, loss: 0.004532995633780956 2023-01-22 12:56:06.965174: step: 242/463, loss: 0.014747112058103085 2023-01-22 12:56:07.581928: step: 244/463, loss: 0.0004476590547710657 2023-01-22 12:56:08.198046: step: 246/463, loss: 0.008005515672266483 2023-01-22 12:56:08.777594: step: 248/463, loss: 0.029232745990157127 2023-01-22 12:56:09.341700: step: 250/463, loss: 0.006342190317809582 2023-01-22 12:56:09.963413: step: 252/463, loss: 0.007136001251637936 2023-01-22 12:56:10.540257: step: 254/463, loss: 0.026874519884586334 2023-01-22 12:56:11.122584: step: 256/463, loss: 0.01919802464544773 2023-01-22 12:56:11.657412: step: 258/463, loss: 0.0003610389831010252 2023-01-22 12:56:12.344087: step: 260/463, loss: 0.037108078598976135 2023-01-22 12:56:12.979642: step: 262/463, loss: 0.0019849385134875774 2023-01-22 12:56:13.592257: step: 264/463, loss: 0.029811395332217216 2023-01-22 12:56:14.249670: step: 266/463, loss: 0.0082953330129385 2023-01-22 12:56:14.826375: step: 268/463, loss: 0.005881347693502903 2023-01-22 12:56:15.422719: step: 270/463, loss: 0.0014427980640903115 2023-01-22 12:56:16.028236: step: 272/463, loss: 0.023032061755657196 2023-01-22 12:56:16.650749: step: 274/463, loss: 0.004619150422513485 2023-01-22 12:56:17.283208: step: 276/463, loss: 0.0014236380811780691 2023-01-22 12:56:18.010816: step: 278/463, loss: 0.09615522623062134 2023-01-22 12:56:18.696104: step: 280/463, loss: 0.04531494528055191 2023-01-22 12:56:19.258752: step: 282/463, loss: 0.014902645722031593 2023-01-22 12:56:19.887610: step: 284/463, loss: 0.013980317860841751 2023-01-22 12:56:20.497633: step: 286/463, loss: 0.015187329612672329 2023-01-22 12:56:21.083629: step: 288/463, loss: 0.0742793008685112 2023-01-22 12:56:21.675644: step: 290/463, loss: 0.006699662655591965 2023-01-22 12:56:22.277342: step: 292/463, loss: 0.00824943371117115 2023-01-22 12:56:22.918898: step: 294/463, loss: 0.09274782985448837 2023-01-22 12:56:23.614102: step: 296/463, loss: 0.013319441117346287 2023-01-22 12:56:24.331646: step: 298/463, loss: 0.005755189340561628 2023-01-22 12:56:24.923397: step: 300/463, loss: 0.007902318611741066 2023-01-22 12:56:25.501274: step: 302/463, loss: 8.287282253149897e-05 2023-01-22 12:56:26.246962: step: 304/463, loss: 0.002167182043194771 2023-01-22 12:56:26.829341: step: 306/463, loss: 0.001773059950210154 2023-01-22 12:56:27.455704: step: 308/463, loss: 0.008001173846423626 2023-01-22 12:56:28.132689: step: 310/463, loss: 0.18523503839969635 2023-01-22 12:56:28.740656: step: 312/463, loss: 0.009070301428437233 2023-01-22 12:56:29.285401: step: 314/463, loss: 0.07790570706129074 2023-01-22 12:56:29.960230: step: 316/463, loss: 0.01883280836045742 2023-01-22 12:56:30.601973: step: 318/463, loss: 0.05135779827833176 2023-01-22 12:56:31.230470: step: 320/463, loss: 0.023418454453349113 2023-01-22 12:56:31.811432: step: 322/463, loss: 0.0054235984571278095 2023-01-22 12:56:32.365996: step: 324/463, loss: 0.0016468078829348087 2023-01-22 12:56:32.939923: step: 326/463, loss: 0.0018365428550168872 2023-01-22 12:56:33.554611: step: 328/463, loss: 0.028370002284646034 2023-01-22 12:56:34.128442: step: 330/463, loss: 0.06369420886039734 2023-01-22 12:56:34.708109: step: 332/463, loss: 0.0007014954462647438 2023-01-22 12:56:35.345127: step: 334/463, loss: 0.0654231384396553 2023-01-22 12:56:35.914913: step: 336/463, loss: 0.041736625134944916 2023-01-22 12:56:36.469762: step: 338/463, loss: 0.24929679930210114 2023-01-22 12:56:37.109572: step: 340/463, loss: 0.04755621403455734 2023-01-22 12:56:37.655200: step: 342/463, loss: 0.005220194347202778 2023-01-22 12:56:38.341977: step: 344/463, loss: 0.012860847637057304 2023-01-22 12:56:38.945735: step: 346/463, loss: 0.0004501325893215835 2023-01-22 12:56:39.543059: step: 348/463, loss: 0.004493432585150003 2023-01-22 12:56:40.163958: step: 350/463, loss: 0.018364235758781433 2023-01-22 12:56:40.746299: step: 352/463, loss: 0.00035102470428682864 2023-01-22 12:56:41.338487: step: 354/463, loss: 0.10908031463623047 2023-01-22 12:56:41.883306: step: 356/463, loss: 0.0040270364843308926 2023-01-22 12:56:42.460464: step: 358/463, loss: 0.04088394716382027 2023-01-22 12:56:43.061004: step: 360/463, loss: 0.003437537234276533 2023-01-22 12:56:43.669748: step: 362/463, loss: 0.006508524529635906 2023-01-22 12:56:44.231532: step: 364/463, loss: 0.49140796065330505 2023-01-22 12:56:44.913784: step: 366/463, loss: 0.014118066988885403 2023-01-22 12:56:45.521277: step: 368/463, loss: 0.35202130675315857 2023-01-22 12:56:46.133169: step: 370/463, loss: 0.1096949651837349 2023-01-22 12:56:46.719152: step: 372/463, loss: 0.007475042250007391 2023-01-22 12:56:47.342436: step: 374/463, loss: 0.008727510459721088 2023-01-22 12:56:47.911353: step: 376/463, loss: 0.014625866897404194 2023-01-22 12:56:48.472125: step: 378/463, loss: 0.01204013917595148 2023-01-22 12:56:49.076545: step: 380/463, loss: 0.03378046303987503 2023-01-22 12:56:49.644536: step: 382/463, loss: 0.002222589449957013 2023-01-22 12:56:50.212348: step: 384/463, loss: 0.005115623585879803 2023-01-22 12:56:50.844565: step: 386/463, loss: 0.006413538008928299 2023-01-22 12:56:51.458132: step: 388/463, loss: 0.008121415972709656 2023-01-22 12:56:52.068069: step: 390/463, loss: 0.015697646886110306 2023-01-22 12:56:52.652325: step: 392/463, loss: 0.03854619711637497 2023-01-22 12:56:53.232102: step: 394/463, loss: 0.010431385599076748 2023-01-22 12:56:53.824298: step: 396/463, loss: 0.004541287198662758 2023-01-22 12:56:54.443901: step: 398/463, loss: 0.0363098606467247 2023-01-22 12:56:54.990121: step: 400/463, loss: 0.00254978914745152 2023-01-22 12:56:55.620324: step: 402/463, loss: 0.04078345745801926 2023-01-22 12:56:56.185817: step: 404/463, loss: 0.03281532973051071 2023-01-22 12:56:56.814494: step: 406/463, loss: 0.013361765071749687 2023-01-22 12:56:57.423227: step: 408/463, loss: 0.012684313580393791 2023-01-22 12:56:58.118014: step: 410/463, loss: 0.022267932072281837 2023-01-22 12:56:58.721340: step: 412/463, loss: 0.0008494913927279413 2023-01-22 12:56:59.313687: step: 414/463, loss: 0.003827937413007021 2023-01-22 12:56:59.894328: step: 416/463, loss: 0.00808730162680149 2023-01-22 12:57:00.515477: step: 418/463, loss: 0.080779068171978 2023-01-22 12:57:01.166387: step: 420/463, loss: 0.0078439861536026 2023-01-22 12:57:01.899348: step: 422/463, loss: 0.06151185929775238 2023-01-22 12:57:02.472412: step: 424/463, loss: 0.0015386121813207865 2023-01-22 12:57:03.114764: step: 426/463, loss: 0.07064806669950485 2023-01-22 12:57:03.777807: step: 428/463, loss: 0.023667573928833008 2023-01-22 12:57:04.414294: step: 430/463, loss: 0.049251630902290344 2023-01-22 12:57:05.041724: step: 432/463, loss: 0.2999231219291687 2023-01-22 12:57:05.581361: step: 434/463, loss: 0.004364494699984789 2023-01-22 12:57:06.156638: step: 436/463, loss: 0.08532516658306122 2023-01-22 12:57:06.750920: step: 438/463, loss: 0.020749501883983612 2023-01-22 12:57:07.354864: step: 440/463, loss: 0.02663409151136875 2023-01-22 12:57:07.941513: step: 442/463, loss: 0.003548748092725873 2023-01-22 12:57:08.541923: step: 444/463, loss: 0.0015679626958444715 2023-01-22 12:57:09.149918: step: 446/463, loss: 0.0156058045104146 2023-01-22 12:57:09.773909: step: 448/463, loss: 0.041782036423683167 2023-01-22 12:57:10.425056: step: 450/463, loss: 0.0058049894869327545 2023-01-22 12:57:11.059854: step: 452/463, loss: 0.0071624526754021645 2023-01-22 12:57:11.725280: step: 454/463, loss: 0.038687292486429214 2023-01-22 12:57:12.311557: step: 456/463, loss: 0.0023365935776382685 2023-01-22 12:57:12.849921: step: 458/463, loss: 0.020690549165010452 2023-01-22 12:57:13.451766: step: 460/463, loss: 2.011099338531494 2023-01-22 12:57:14.054037: step: 462/463, loss: 0.0026375912129878998 2023-01-22 12:57:14.823535: step: 464/463, loss: 0.04222738370299339 2023-01-22 12:57:15.422571: step: 466/463, loss: 0.011043574661016464 2023-01-22 12:57:16.010163: step: 468/463, loss: 0.008948219940066338 2023-01-22 12:57:16.639053: step: 470/463, loss: 0.0036710118874907494 2023-01-22 12:57:17.249121: step: 472/463, loss: 0.027389252558350563 2023-01-22 12:57:17.873310: step: 474/463, loss: 0.007042410783469677 2023-01-22 12:57:18.522469: step: 476/463, loss: 0.054590508341789246 2023-01-22 12:57:19.121292: step: 478/463, loss: 0.01330722589045763 2023-01-22 12:57:19.689784: step: 480/463, loss: 0.01824328489601612 2023-01-22 12:57:20.286149: step: 482/463, loss: 0.010179009288549423 2023-01-22 12:57:20.919109: step: 484/463, loss: 0.0037826818879693747 2023-01-22 12:57:21.483065: step: 486/463, loss: 0.00548544293269515 2023-01-22 12:57:22.105008: step: 488/463, loss: 0.03571353852748871 2023-01-22 12:57:22.728001: step: 490/463, loss: 0.030300496146082878 2023-01-22 12:57:23.434775: step: 492/463, loss: 0.0065094707533717155 2023-01-22 12:57:24.066800: step: 494/463, loss: 0.004390700254589319 2023-01-22 12:57:24.604561: step: 496/463, loss: 0.001701120170764625 2023-01-22 12:57:25.243948: step: 498/463, loss: 0.001852832967415452 2023-01-22 12:57:25.910485: step: 500/463, loss: 0.011258398182690144 2023-01-22 12:57:26.486680: step: 502/463, loss: 0.004814359825104475 2023-01-22 12:57:27.093753: step: 504/463, loss: 0.0061867572367191315 2023-01-22 12:57:27.686836: step: 506/463, loss: 0.04522324353456497 2023-01-22 12:57:28.336428: step: 508/463, loss: 0.006622238550335169 2023-01-22 12:57:28.908571: step: 510/463, loss: 0.02634294144809246 2023-01-22 12:57:29.564000: step: 512/463, loss: 0.0019054700387641788 2023-01-22 12:57:30.147648: step: 514/463, loss: 0.006115755066275597 2023-01-22 12:57:30.803815: step: 516/463, loss: 0.042282894253730774 2023-01-22 12:57:31.419328: step: 518/463, loss: 0.008722420781850815 2023-01-22 12:57:31.993934: step: 520/463, loss: 0.11657079309225082 2023-01-22 12:57:32.606924: step: 522/463, loss: 0.13663256168365479 2023-01-22 12:57:33.256516: step: 524/463, loss: 0.028521951287984848 2023-01-22 12:57:33.939635: step: 526/463, loss: 0.03692437335848808 2023-01-22 12:57:34.556335: step: 528/463, loss: 0.008721224963665009 2023-01-22 12:57:35.171650: step: 530/463, loss: 0.05696066468954086 2023-01-22 12:57:35.769656: step: 532/463, loss: 1.4876073598861694 2023-01-22 12:57:36.356473: step: 534/463, loss: 0.007431971374899149 2023-01-22 12:57:37.006781: step: 536/463, loss: 0.02266770415008068 2023-01-22 12:57:37.668245: step: 538/463, loss: 0.1841118484735489 2023-01-22 12:57:38.272421: step: 540/463, loss: 0.016357596963644028 2023-01-22 12:57:38.899844: step: 542/463, loss: 0.006929911207407713 2023-01-22 12:57:39.497418: step: 544/463, loss: 0.022365828976035118 2023-01-22 12:57:40.085393: step: 546/463, loss: 0.0352771133184433 2023-01-22 12:57:40.743847: step: 548/463, loss: 0.0016185512067750096 2023-01-22 12:57:41.411051: step: 550/463, loss: 0.0004608993185684085 2023-01-22 12:57:41.999369: step: 552/463, loss: 0.012112148106098175 2023-01-22 12:57:42.580643: step: 554/463, loss: 0.01999003067612648 2023-01-22 12:57:43.225466: step: 556/463, loss: 0.03824234753847122 2023-01-22 12:57:43.866533: step: 558/463, loss: 0.001486281049437821 2023-01-22 12:57:44.541872: step: 560/463, loss: 0.0161641426384449 2023-01-22 12:57:45.107926: step: 562/463, loss: 0.013917661271989346 2023-01-22 12:57:45.695089: step: 564/463, loss: 0.015233626589179039 2023-01-22 12:57:46.311532: step: 566/463, loss: 0.009183204732835293 2023-01-22 12:57:46.936433: step: 568/463, loss: 0.011217552237212658 2023-01-22 12:57:47.570233: step: 570/463, loss: 0.0045104562304914 2023-01-22 12:57:48.181041: step: 572/463, loss: 0.0038712089881300926 2023-01-22 12:57:48.857618: step: 574/463, loss: 0.00613539619371295 2023-01-22 12:57:49.458467: step: 576/463, loss: 0.021922443062067032 2023-01-22 12:57:50.068380: step: 578/463, loss: 0.030069440603256226 2023-01-22 12:57:50.740450: step: 580/463, loss: 0.01946062408387661 2023-01-22 12:57:51.342800: step: 582/463, loss: 0.015328926965594292 2023-01-22 12:57:51.928029: step: 584/463, loss: 0.00012223079102113843 2023-01-22 12:57:52.526496: step: 586/463, loss: 0.033645614981651306 2023-01-22 12:57:53.129739: step: 588/463, loss: 0.03122386895120144 2023-01-22 12:57:53.797124: step: 590/463, loss: 0.03320504352450371 2023-01-22 12:57:54.430494: step: 592/463, loss: 0.0273686982691288 2023-01-22 12:57:55.122273: step: 594/463, loss: 0.05744955688714981 2023-01-22 12:57:55.754597: step: 596/463, loss: 0.022972291335463524 2023-01-22 12:57:56.413913: step: 598/463, loss: 0.02768358401954174 2023-01-22 12:57:56.991885: step: 600/463, loss: 0.006396604236215353 2023-01-22 12:57:57.599282: step: 602/463, loss: 0.046035610139369965 2023-01-22 12:57:58.209696: step: 604/463, loss: 0.0030810264870524406 2023-01-22 12:57:58.834397: step: 606/463, loss: 0.010916823521256447 2023-01-22 12:57:59.414453: step: 608/463, loss: 0.04350392892956734 2023-01-22 12:58:00.060572: step: 610/463, loss: 0.0021778522059321404 2023-01-22 12:58:00.592684: step: 612/463, loss: 0.002617267891764641 2023-01-22 12:58:01.262371: step: 614/463, loss: 0.009265408851206303 2023-01-22 12:58:01.845894: step: 616/463, loss: 0.005866225343197584 2023-01-22 12:58:02.542061: step: 618/463, loss: 0.015919432044029236 2023-01-22 12:58:03.191486: step: 620/463, loss: 0.011642727069556713 2023-01-22 12:58:03.866008: step: 622/463, loss: 0.005333344917744398 2023-01-22 12:58:04.465516: step: 624/463, loss: 0.016595320776104927 2023-01-22 12:58:05.183751: step: 626/463, loss: 0.01663575880229473 2023-01-22 12:58:05.788941: step: 628/463, loss: 0.003091169521212578 2023-01-22 12:58:06.369817: step: 630/463, loss: 0.0015520612942054868 2023-01-22 12:58:06.984526: step: 632/463, loss: 0.02732008509337902 2023-01-22 12:58:07.627449: step: 634/463, loss: 0.006296942010521889 2023-01-22 12:58:08.246123: step: 636/463, loss: 0.02850566804409027 2023-01-22 12:58:08.927896: step: 638/463, loss: 0.008473414927721024 2023-01-22 12:58:09.566994: step: 640/463, loss: 0.016142599284648895 2023-01-22 12:58:10.182293: step: 642/463, loss: 0.06030401214957237 2023-01-22 12:58:10.830731: step: 644/463, loss: 0.05786483362317085 2023-01-22 12:58:11.432112: step: 646/463, loss: 0.0005453546764329076 2023-01-22 12:58:12.052864: step: 648/463, loss: 0.07799121737480164 2023-01-22 12:58:12.643813: step: 650/463, loss: 0.13648895919322968 2023-01-22 12:58:13.239259: step: 652/463, loss: 0.006396754644811153 2023-01-22 12:58:13.878985: step: 654/463, loss: 0.04120725393295288 2023-01-22 12:58:14.468876: step: 656/463, loss: 0.001183871878311038 2023-01-22 12:58:15.102078: step: 658/463, loss: 0.0656622126698494 2023-01-22 12:58:15.714464: step: 660/463, loss: 0.0038908233400434256 2023-01-22 12:58:16.400394: step: 662/463, loss: 0.04772219806909561 2023-01-22 12:58:17.076107: step: 664/463, loss: 0.01838582754135132 2023-01-22 12:58:17.668215: step: 666/463, loss: 0.004813347943127155 2023-01-22 12:58:18.205717: step: 668/463, loss: 0.008449632674455643 2023-01-22 12:58:18.801576: step: 670/463, loss: 0.002942825900390744 2023-01-22 12:58:19.393430: step: 672/463, loss: 0.00027331389719620347 2023-01-22 12:58:20.004053: step: 674/463, loss: 0.03727958723902702 2023-01-22 12:58:20.658966: step: 676/463, loss: 0.0025262413546442986 2023-01-22 12:58:21.212397: step: 678/463, loss: 0.0007347504724748433 2023-01-22 12:58:21.776513: step: 680/463, loss: 0.00021508800273295492 2023-01-22 12:58:22.357263: step: 682/463, loss: 0.020790673792362213 2023-01-22 12:58:23.018500: step: 684/463, loss: 0.012854392640292645 2023-01-22 12:58:23.645719: step: 686/463, loss: 0.0023949958849698305 2023-01-22 12:58:24.369128: step: 688/463, loss: 0.015197242610156536 2023-01-22 12:58:24.978208: step: 690/463, loss: 0.02898489311337471 2023-01-22 12:58:25.605384: step: 692/463, loss: 0.46489542722702026 2023-01-22 12:58:26.210357: step: 694/463, loss: 0.006251441780477762 2023-01-22 12:58:26.711119: step: 696/463, loss: 0.00031785733881406486 2023-01-22 12:58:27.300311: step: 698/463, loss: 0.03094170242547989 2023-01-22 12:58:27.974350: step: 700/463, loss: 2.71785044670105 2023-01-22 12:58:28.568914: step: 702/463, loss: 0.02963750995695591 2023-01-22 12:58:29.151816: step: 704/463, loss: 0.028155282139778137 2023-01-22 12:58:29.755210: step: 706/463, loss: 0.054499782621860504 2023-01-22 12:58:30.378126: step: 708/463, loss: 0.12207955121994019 2023-01-22 12:58:30.983102: step: 710/463, loss: 0.062202781438827515 2023-01-22 12:58:31.583630: step: 712/463, loss: 0.003921740688383579 2023-01-22 12:58:32.146331: step: 714/463, loss: 0.009068318642675877 2023-01-22 12:58:32.785501: step: 716/463, loss: 0.001717057777568698 2023-01-22 12:58:33.489243: step: 718/463, loss: 0.2314022332429886 2023-01-22 12:58:34.092354: step: 720/463, loss: 0.003480197163298726 2023-01-22 12:58:34.688954: step: 722/463, loss: 0.010539277456700802 2023-01-22 12:58:35.257138: step: 724/463, loss: 0.0001856078888522461 2023-01-22 12:58:35.999785: step: 726/463, loss: 0.1042601689696312 2023-01-22 12:58:36.624631: step: 728/463, loss: 0.20359423756599426 2023-01-22 12:58:37.223152: step: 730/463, loss: 0.06622110307216644 2023-01-22 12:58:37.803651: step: 732/463, loss: 0.021832749247550964 2023-01-22 12:58:38.386146: step: 734/463, loss: 0.028036857023835182 2023-01-22 12:58:39.066399: step: 736/463, loss: 0.00857492070645094 2023-01-22 12:58:39.629315: step: 738/463, loss: 0.024369752034544945 2023-01-22 12:58:40.246705: step: 740/463, loss: 0.9981862902641296 2023-01-22 12:58:40.829066: step: 742/463, loss: 0.26254791021347046 2023-01-22 12:58:41.386957: step: 744/463, loss: 0.008379360660910606 2023-01-22 12:58:41.992877: step: 746/463, loss: 0.006708393804728985 2023-01-22 12:58:42.592129: step: 748/463, loss: 0.027649573981761932 2023-01-22 12:58:43.194423: step: 750/463, loss: 0.021381065249443054 2023-01-22 12:58:43.801951: step: 752/463, loss: 0.010988865047693253 2023-01-22 12:58:44.438560: step: 754/463, loss: 0.11998961120843887 2023-01-22 12:58:44.986387: step: 756/463, loss: 0.004171561915427446 2023-01-22 12:58:45.592353: step: 758/463, loss: 0.014124194160103798 2023-01-22 12:58:46.276455: step: 760/463, loss: 0.036059629172086716 2023-01-22 12:58:46.841530: step: 762/463, loss: 0.010031620971858501 2023-01-22 12:58:47.483133: step: 764/463, loss: 0.02956785447895527 2023-01-22 12:58:48.058115: step: 766/463, loss: 0.014752618037164211 2023-01-22 12:58:48.675955: step: 768/463, loss: 0.009555048309266567 2023-01-22 12:58:49.313039: step: 770/463, loss: 0.027097180485725403 2023-01-22 12:58:49.892290: step: 772/463, loss: 0.0016095516039058566 2023-01-22 12:58:50.479495: step: 774/463, loss: 0.32477274537086487 2023-01-22 12:58:51.076018: step: 776/463, loss: 0.03107057884335518 2023-01-22 12:58:51.682387: step: 778/463, loss: 0.0038266393821686506 2023-01-22 12:58:52.273357: step: 780/463, loss: 0.0018915997352451086 2023-01-22 12:58:52.848079: step: 782/463, loss: 0.025805462151765823 2023-01-22 12:58:53.398488: step: 784/463, loss: 0.009442784823477268 2023-01-22 12:58:54.002229: step: 786/463, loss: 0.3690575361251831 2023-01-22 12:58:54.563056: step: 788/463, loss: 0.002227095188573003 2023-01-22 12:58:55.183740: step: 790/463, loss: 0.007028195075690746 2023-01-22 12:58:55.786889: step: 792/463, loss: 0.02309034764766693 2023-01-22 12:58:56.403998: step: 794/463, loss: 0.021725161001086235 2023-01-22 12:58:56.949488: step: 796/463, loss: 0.02373715490102768 2023-01-22 12:58:57.503006: step: 798/463, loss: 0.0081197964027524 2023-01-22 12:58:58.093901: step: 800/463, loss: 0.019046910107135773 2023-01-22 12:58:58.597384: step: 802/463, loss: 0.010289816185832024 2023-01-22 12:58:59.178460: step: 804/463, loss: 0.010623391717672348 2023-01-22 12:58:59.800367: step: 806/463, loss: 0.19675587117671967 2023-01-22 12:59:00.377515: step: 808/463, loss: 0.004776483401656151 2023-01-22 12:59:00.970856: step: 810/463, loss: 0.039210911840200424 2023-01-22 12:59:01.553964: step: 812/463, loss: 0.17494267225265503 2023-01-22 12:59:02.169814: step: 814/463, loss: 0.004393057897686958 2023-01-22 12:59:02.809763: step: 816/463, loss: 0.05353159457445145 2023-01-22 12:59:03.456024: step: 818/463, loss: 0.018934201449155807 2023-01-22 12:59:04.124147: step: 820/463, loss: 0.003956741653382778 2023-01-22 12:59:04.750918: step: 822/463, loss: 0.002721026772633195 2023-01-22 12:59:05.358558: step: 824/463, loss: 0.12289454787969589 2023-01-22 12:59:05.962698: step: 826/463, loss: 0.009979347698390484 2023-01-22 12:59:06.605841: step: 828/463, loss: 0.031114686280488968 2023-01-22 12:59:07.258794: step: 830/463, loss: 0.002735384739935398 2023-01-22 12:59:07.863894: step: 832/463, loss: 0.033943697810173035 2023-01-22 12:59:08.405166: step: 834/463, loss: 0.05121907591819763 2023-01-22 12:59:09.009922: step: 836/463, loss: 0.014786218293011189 2023-01-22 12:59:09.600054: step: 838/463, loss: 0.044448621571063995 2023-01-22 12:59:10.130128: step: 840/463, loss: 6.978730380069464e-05 2023-01-22 12:59:10.782559: step: 842/463, loss: 0.022355465218424797 2023-01-22 12:59:11.426867: step: 844/463, loss: 0.03591838479042053 2023-01-22 12:59:12.072599: step: 846/463, loss: 3.351704799570143e-05 2023-01-22 12:59:12.673639: step: 848/463, loss: 0.022055551409721375 2023-01-22 12:59:13.240433: step: 850/463, loss: 0.07801473140716553 2023-01-22 12:59:13.832808: step: 852/463, loss: 0.6784647703170776 2023-01-22 12:59:14.447670: step: 854/463, loss: 0.02263505384325981 2023-01-22 12:59:15.095239: step: 856/463, loss: 0.045020654797554016 2023-01-22 12:59:15.660393: step: 858/463, loss: 0.05555715411901474 2023-01-22 12:59:16.309092: step: 860/463, loss: 0.03509535640478134 2023-01-22 12:59:16.861174: step: 862/463, loss: 0.0019300265703350306 2023-01-22 12:59:17.473055: step: 864/463, loss: 0.03359898179769516 2023-01-22 12:59:18.161809: step: 866/463, loss: 0.03566916286945343 2023-01-22 12:59:18.772255: step: 868/463, loss: 0.048562973737716675 2023-01-22 12:59:19.379132: step: 870/463, loss: 0.017183849588036537 2023-01-22 12:59:19.986967: step: 872/463, loss: 0.09051021933555603 2023-01-22 12:59:20.701520: step: 874/463, loss: 0.037447940558195114 2023-01-22 12:59:21.333807: step: 876/463, loss: 0.09816146641969681 2023-01-22 12:59:21.957104: step: 878/463, loss: 0.21727986633777618 2023-01-22 12:59:22.597194: step: 880/463, loss: 0.03683961182832718 2023-01-22 12:59:23.144302: step: 882/463, loss: 0.0132490498945117 2023-01-22 12:59:23.803366: step: 884/463, loss: 0.06462587416172028 2023-01-22 12:59:24.403082: step: 886/463, loss: 0.019473182037472725 2023-01-22 12:59:24.981776: step: 888/463, loss: 0.012223903089761734 2023-01-22 12:59:25.656404: step: 890/463, loss: 0.0008236998110078275 2023-01-22 12:59:26.231810: step: 892/463, loss: 0.904689371585846 2023-01-22 12:59:26.773464: step: 894/463, loss: 0.06158997491002083 2023-01-22 12:59:27.367058: step: 896/463, loss: 0.12429679930210114 2023-01-22 12:59:28.091854: step: 898/463, loss: 0.04664970189332962 2023-01-22 12:59:28.660834: step: 900/463, loss: 0.0627560168504715 2023-01-22 12:59:29.251769: step: 902/463, loss: 0.0368766151368618 2023-01-22 12:59:29.799740: step: 904/463, loss: 0.006379998754709959 2023-01-22 12:59:30.517513: step: 906/463, loss: 1.0913007259368896 2023-01-22 12:59:31.131630: step: 908/463, loss: 0.009168403223156929 2023-01-22 12:59:31.777356: step: 910/463, loss: 0.03069346956908703 2023-01-22 12:59:32.367377: step: 912/463, loss: 0.0005233348929323256 2023-01-22 12:59:32.962112: step: 914/463, loss: 0.009611212648451328 2023-01-22 12:59:33.543638: step: 916/463, loss: 0.01006375439465046 2023-01-22 12:59:34.149515: step: 918/463, loss: 0.0023787294048815966 2023-01-22 12:59:34.707384: step: 920/463, loss: 0.005271725356578827 2023-01-22 12:59:35.329973: step: 922/463, loss: 0.0032884960528463125 2023-01-22 12:59:35.877287: step: 924/463, loss: 0.01920892857015133 2023-01-22 12:59:36.478355: step: 926/463, loss: 0.1331748217344284 ================================================== Loss: 0.057 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33799664443346505, 'r': 0.33863800429007507, 'f1': 0.3383170203997527}, 'combined': 0.24928622555771252, 'epoch': 31} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33411367894821076, 'r': 0.39614581419411016, 'f1': 0.36249507640302087}, 'combined': 0.2809770448674133, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2925356793914439, 'r': 0.34582491131094795, 'f1': 0.31695604914933834}, 'combined': 0.2335465625310914, 'epoch': 31} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3214254018638475, 'r': 0.40798573527019616, 'f1': 0.35956944509839883}, 'combined': 0.2787093306504336, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3005610256638217, 'r': 0.3376321199107826, 'f1': 0.3180198877443833}, 'combined': 0.2343304436011245, 'epoch': 31} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3209367220881324, 'r': 0.3929115016740739, 'f1': 0.35329563125734903}, 'combined': 0.2738463744674189, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24431818181818182, 'r': 0.30714285714285716, 'f1': 0.2721518987341772}, 'combined': 0.18143459915611815, 'epoch': 31} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2578125, 'r': 0.358695652173913, 'f1': 0.29999999999999993}, 'combined': 0.14999999999999997, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3684210526315789, 'r': 0.2413793103448276, 'f1': 0.2916666666666667}, 'combined': 0.19444444444444445, 'epoch': 31} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:02:12.974692: step: 2/463, loss: 0.007539997808635235 2023-01-22 13:02:13.562919: step: 4/463, loss: 0.0285597313195467 2023-01-22 13:02:14.140373: step: 6/463, loss: 0.016809236258268356 2023-01-22 13:02:14.767526: step: 8/463, loss: 0.015142922289669514 2023-01-22 13:02:15.343282: step: 10/463, loss: 0.034306954592466354 2023-01-22 13:02:15.956238: step: 12/463, loss: 0.012067035771906376 2023-01-22 13:02:16.612963: step: 14/463, loss: 0.0035218135453760624 2023-01-22 13:02:17.164767: step: 16/463, loss: 0.04193190485239029 2023-01-22 13:02:17.783139: step: 18/463, loss: 0.1488228589296341 2023-01-22 13:02:18.408253: step: 20/463, loss: 0.08529949933290482 2023-01-22 13:02:18.998918: step: 22/463, loss: 0.000689376553054899 2023-01-22 13:02:19.613926: step: 24/463, loss: 0.008331475779414177 2023-01-22 13:02:20.223683: step: 26/463, loss: 0.019964156672358513 2023-01-22 13:02:20.833144: step: 28/463, loss: 0.009994626976549625 2023-01-22 13:02:21.475310: step: 30/463, loss: 0.049466222524642944 2023-01-22 13:02:22.133985: step: 32/463, loss: 0.004443508572876453 2023-01-22 13:02:22.718901: step: 34/463, loss: 0.02194046787917614 2023-01-22 13:02:23.395349: step: 36/463, loss: 0.03573227301239967 2023-01-22 13:02:23.973555: step: 38/463, loss: 0.022319650277495384 2023-01-22 13:02:24.590360: step: 40/463, loss: 0.0014002065872773528 2023-01-22 13:02:25.170042: step: 42/463, loss: 0.0004496430919971317 2023-01-22 13:02:25.747913: step: 44/463, loss: 0.015456403605639935 2023-01-22 13:02:26.330884: step: 46/463, loss: 0.028172114863991737 2023-01-22 13:02:26.915003: step: 48/463, loss: 0.011922353878617287 2023-01-22 13:02:27.488477: step: 50/463, loss: 0.003793282201513648 2023-01-22 13:02:28.175591: step: 52/463, loss: 0.10658351331949234 2023-01-22 13:02:28.804639: step: 54/463, loss: 0.03181418031454086 2023-01-22 13:02:29.354094: step: 56/463, loss: 0.002492751693353057 2023-01-22 13:02:29.933533: step: 58/463, loss: 0.02101484127342701 2023-01-22 13:02:30.514166: step: 60/463, loss: 0.0030175780411809683 2023-01-22 13:02:31.112382: step: 62/463, loss: 0.05303362384438515 2023-01-22 13:02:31.743703: step: 64/463, loss: 0.0007619561511091888 2023-01-22 13:02:32.359887: step: 66/463, loss: 0.011478719301521778 2023-01-22 13:02:32.954292: step: 68/463, loss: 0.016095656901597977 2023-01-22 13:02:33.509623: step: 70/463, loss: 0.14240337908267975 2023-01-22 13:02:34.153587: step: 72/463, loss: 0.00042851909529417753 2023-01-22 13:02:34.759697: step: 74/463, loss: 0.04007928818464279 2023-01-22 13:02:35.405828: step: 76/463, loss: 0.0029200424905866385 2023-01-22 13:02:36.010426: step: 78/463, loss: 0.002364977030083537 2023-01-22 13:02:36.636421: step: 80/463, loss: 0.0036983764730393887 2023-01-22 13:02:37.275174: step: 82/463, loss: 0.02411513216793537 2023-01-22 13:02:37.891417: step: 84/463, loss: 0.0014074050122871995 2023-01-22 13:02:38.640229: step: 86/463, loss: 0.0342196524143219 2023-01-22 13:02:39.307648: step: 88/463, loss: 0.0018354646163061261 2023-01-22 13:02:39.891100: step: 90/463, loss: 0.02250245213508606 2023-01-22 13:02:40.579189: step: 92/463, loss: 0.033345721662044525 2023-01-22 13:02:41.114484: step: 94/463, loss: 0.0336601585149765 2023-01-22 13:02:41.734775: step: 96/463, loss: 0.04659218713641167 2023-01-22 13:02:42.393252: step: 98/463, loss: 0.0011301238555461168 2023-01-22 13:02:43.047215: step: 100/463, loss: 0.004545257892459631 2023-01-22 13:02:43.644906: step: 102/463, loss: 0.009769748896360397 2023-01-22 13:02:44.293196: step: 104/463, loss: 0.003323189914226532 2023-01-22 13:02:44.908124: step: 106/463, loss: 0.08884399384260178 2023-01-22 13:02:45.522334: step: 108/463, loss: 0.024232499301433563 2023-01-22 13:02:46.150964: step: 110/463, loss: 0.006647381000220776 2023-01-22 13:02:46.833878: step: 112/463, loss: 0.005637145601212978 2023-01-22 13:02:47.420493: step: 114/463, loss: 0.011465215124189854 2023-01-22 13:02:47.975078: step: 116/463, loss: 0.20188608765602112 2023-01-22 13:02:48.607884: step: 118/463, loss: 0.016808904707431793 2023-01-22 13:02:49.180288: step: 120/463, loss: 0.000454298424301669 2023-01-22 13:02:49.827078: step: 122/463, loss: 0.017502423375844955 2023-01-22 13:02:50.393455: step: 124/463, loss: 0.005748111288994551 2023-01-22 13:02:50.963559: step: 126/463, loss: 0.035578783601522446 2023-01-22 13:02:51.599674: step: 128/463, loss: 0.2723863124847412 2023-01-22 13:02:52.219511: step: 130/463, loss: 0.03454357758164406 2023-01-22 13:02:52.830392: step: 132/463, loss: 4.4352535041980445e-05 2023-01-22 13:02:53.446731: step: 134/463, loss: 0.4914063811302185 2023-01-22 13:02:54.006122: step: 136/463, loss: 0.035584814846515656 2023-01-22 13:02:54.607944: step: 138/463, loss: 0.16930851340293884 2023-01-22 13:02:55.261205: step: 140/463, loss: 0.03407140076160431 2023-01-22 13:02:55.825289: step: 142/463, loss: 0.0005401013768278062 2023-01-22 13:02:56.462573: step: 144/463, loss: 0.0006070762756280601 2023-01-22 13:02:57.078558: step: 146/463, loss: 0.029754312708973885 2023-01-22 13:02:57.740939: step: 148/463, loss: 0.010098290629684925 2023-01-22 13:02:58.270444: step: 150/463, loss: 0.023425571620464325 2023-01-22 13:02:58.836778: step: 152/463, loss: 0.0023472870234400034 2023-01-22 13:02:59.428423: step: 154/463, loss: 0.007180437911301851 2023-01-22 13:03:00.000470: step: 156/463, loss: 0.000614872551523149 2023-01-22 13:03:00.542920: step: 158/463, loss: 0.0002431828179396689 2023-01-22 13:03:01.117857: step: 160/463, loss: 0.5747070908546448 2023-01-22 13:03:01.700242: step: 162/463, loss: 0.003977107349783182 2023-01-22 13:03:02.278712: step: 164/463, loss: 0.006228742189705372 2023-01-22 13:03:02.897935: step: 166/463, loss: 0.02033989131450653 2023-01-22 13:03:03.626205: step: 168/463, loss: 0.026744529604911804 2023-01-22 13:03:04.225248: step: 170/463, loss: 0.00286267907358706 2023-01-22 13:03:04.836324: step: 172/463, loss: 0.009174594655632973 2023-01-22 13:03:05.412658: step: 174/463, loss: 0.001380595494993031 2023-01-22 13:03:05.997023: step: 176/463, loss: 0.015228061936795712 2023-01-22 13:03:06.625357: step: 178/463, loss: 0.009595110081136227 2023-01-22 13:03:07.246947: step: 180/463, loss: 0.0075636752881109715 2023-01-22 13:03:07.896197: step: 182/463, loss: 0.01381948497146368 2023-01-22 13:03:08.471135: step: 184/463, loss: 0.006805033423006535 2023-01-22 13:03:09.148122: step: 186/463, loss: 0.014521433971822262 2023-01-22 13:03:09.664725: step: 188/463, loss: 0.0005430255550891161 2023-01-22 13:03:10.291926: step: 190/463, loss: 0.11562582850456238 2023-01-22 13:03:10.913179: step: 192/463, loss: 0.006288902834057808 2023-01-22 13:03:11.547221: step: 194/463, loss: 0.024572215974330902 2023-01-22 13:03:12.224031: step: 196/463, loss: 0.42376944422721863 2023-01-22 13:03:12.835188: step: 198/463, loss: 0.028134725987911224 2023-01-22 13:03:13.444608: step: 200/463, loss: 0.06974295526742935 2023-01-22 13:03:14.027750: step: 202/463, loss: 0.028500178828835487 2023-01-22 13:03:14.636752: step: 204/463, loss: 0.033555179834365845 2023-01-22 13:03:15.233361: step: 206/463, loss: 0.017418064177036285 2023-01-22 13:03:15.814733: step: 208/463, loss: 0.005509042646735907 2023-01-22 13:03:16.400368: step: 210/463, loss: 0.00871426984667778 2023-01-22 13:03:17.023420: step: 212/463, loss: 0.009560792706906796 2023-01-22 13:03:17.634463: step: 214/463, loss: 0.08527348935604095 2023-01-22 13:03:18.247080: step: 216/463, loss: 0.0003343084827065468 2023-01-22 13:03:18.826356: step: 218/463, loss: 0.007383414078503847 2023-01-22 13:03:19.427123: step: 220/463, loss: 0.006665619555860758 2023-01-22 13:03:20.029145: step: 222/463, loss: 0.020200714468955994 2023-01-22 13:03:20.601921: step: 224/463, loss: 0.0008119925041683018 2023-01-22 13:03:21.172307: step: 226/463, loss: 0.005870304070413113 2023-01-22 13:03:21.790927: step: 228/463, loss: 0.0017156031681224704 2023-01-22 13:03:22.410676: step: 230/463, loss: 0.016523635014891624 2023-01-22 13:03:23.013056: step: 232/463, loss: 0.20136921107769012 2023-01-22 13:03:23.609187: step: 234/463, loss: 0.020561877638101578 2023-01-22 13:03:24.296074: step: 236/463, loss: 0.008616182021796703 2023-01-22 13:03:24.953169: step: 238/463, loss: 0.0021359645761549473 2023-01-22 13:03:25.550018: step: 240/463, loss: 0.03449198231101036 2023-01-22 13:03:26.146336: step: 242/463, loss: 0.006085831671953201 2023-01-22 13:03:26.796247: step: 244/463, loss: 0.00014921504771336913 2023-01-22 13:03:27.405344: step: 246/463, loss: 0.005383210722357035 2023-01-22 13:03:28.005918: step: 248/463, loss: 3.426999092102051 2023-01-22 13:03:28.575786: step: 250/463, loss: 0.0036993036046624184 2023-01-22 13:03:29.185212: step: 252/463, loss: 0.0005066048470325768 2023-01-22 13:03:29.701559: step: 254/463, loss: 0.008969089947640896 2023-01-22 13:03:30.341979: step: 256/463, loss: 0.006283185910433531 2023-01-22 13:03:30.963659: step: 258/463, loss: 0.003469745861366391 2023-01-22 13:03:31.540953: step: 260/463, loss: 0.004944317974150181 2023-01-22 13:03:32.148745: step: 262/463, loss: 0.012042064219713211 2023-01-22 13:03:32.719225: step: 264/463, loss: 0.001941087655723095 2023-01-22 13:03:33.281390: step: 266/463, loss: 0.0346544124186039 2023-01-22 13:03:33.861072: step: 268/463, loss: 0.13292662799358368 2023-01-22 13:03:34.448697: step: 270/463, loss: 0.2711741626262665 2023-01-22 13:03:35.052858: step: 272/463, loss: 0.10146824270486832 2023-01-22 13:03:35.746616: step: 274/463, loss: 0.035470522940158844 2023-01-22 13:03:36.319443: step: 276/463, loss: 0.023216169327497482 2023-01-22 13:03:36.907957: step: 278/463, loss: 0.00018035976972896606 2023-01-22 13:03:37.425981: step: 280/463, loss: 0.0031747580505907536 2023-01-22 13:03:37.968274: step: 282/463, loss: 0.020259961485862732 2023-01-22 13:03:38.588938: step: 284/463, loss: 0.047636549919843674 2023-01-22 13:03:39.108427: step: 286/463, loss: 0.0012870192294940352 2023-01-22 13:03:39.686683: step: 288/463, loss: 0.028955060988664627 2023-01-22 13:03:40.285040: step: 290/463, loss: 0.004549533594399691 2023-01-22 13:03:40.889892: step: 292/463, loss: 0.0027782809920608997 2023-01-22 13:03:41.594962: step: 294/463, loss: 0.05999428778886795 2023-01-22 13:03:42.157518: step: 296/463, loss: 0.09269710630178452 2023-01-22 13:03:42.766008: step: 298/463, loss: 0.0128501420840621 2023-01-22 13:03:43.408064: step: 300/463, loss: 0.008558955043554306 2023-01-22 13:03:43.966331: step: 302/463, loss: 0.030647819861769676 2023-01-22 13:03:44.551966: step: 304/463, loss: 0.0019651339389383793 2023-01-22 13:03:45.171706: step: 306/463, loss: 0.026631858199834824 2023-01-22 13:03:45.738031: step: 308/463, loss: 0.0030862498097121716 2023-01-22 13:03:46.392970: step: 310/463, loss: 0.026625145226716995 2023-01-22 13:03:47.075341: step: 312/463, loss: 0.0040268730372190475 2023-01-22 13:03:47.579821: step: 314/463, loss: 0.013235168531537056 2023-01-22 13:03:48.198274: step: 316/463, loss: 0.0004697859985753894 2023-01-22 13:03:48.827251: step: 318/463, loss: 0.0053980364464223385 2023-01-22 13:03:49.422026: step: 320/463, loss: 0.007824977859854698 2023-01-22 13:03:50.111682: step: 322/463, loss: 0.014681877568364143 2023-01-22 13:03:50.711933: step: 324/463, loss: 0.003200782462954521 2023-01-22 13:03:51.319854: step: 326/463, loss: 0.01203584298491478 2023-01-22 13:03:51.945650: step: 328/463, loss: 0.0701024979352951 2023-01-22 13:03:52.565371: step: 330/463, loss: 0.017137421295046806 2023-01-22 13:03:53.175389: step: 332/463, loss: 0.01446082629263401 2023-01-22 13:03:53.731449: step: 334/463, loss: 0.006402396131306887 2023-01-22 13:03:54.367120: step: 336/463, loss: 0.016582287847995758 2023-01-22 13:03:54.968309: step: 338/463, loss: 0.05337226018309593 2023-01-22 13:03:55.623529: step: 340/463, loss: 0.004474237561225891 2023-01-22 13:03:56.160033: step: 342/463, loss: 0.0021448079496622086 2023-01-22 13:03:56.762196: step: 344/463, loss: 0.00017759088950697333 2023-01-22 13:03:57.373722: step: 346/463, loss: 0.004703349433839321 2023-01-22 13:03:57.963443: step: 348/463, loss: 0.005637302063405514 2023-01-22 13:03:58.575599: step: 350/463, loss: 0.01383545808494091 2023-01-22 13:03:59.202844: step: 352/463, loss: 0.051064323633909225 2023-01-22 13:03:59.778027: step: 354/463, loss: 0.0009390169871039689 2023-01-22 13:04:00.551092: step: 356/463, loss: 0.2315608263015747 2023-01-22 13:04:01.142143: step: 358/463, loss: 0.0003287666477262974 2023-01-22 13:04:01.731464: step: 360/463, loss: 0.03099549002945423 2023-01-22 13:04:02.284718: step: 362/463, loss: 0.7973887920379639 2023-01-22 13:04:02.946399: step: 364/463, loss: 0.01659395545721054 2023-01-22 13:04:03.605672: step: 366/463, loss: 0.005600487347692251 2023-01-22 13:04:04.179826: step: 368/463, loss: 0.0022889862302690744 2023-01-22 13:04:04.803345: step: 370/463, loss: 0.0028396525885909796 2023-01-22 13:04:05.449510: step: 372/463, loss: 0.2733951807022095 2023-01-22 13:04:06.058680: step: 374/463, loss: 0.0029625236056745052 2023-01-22 13:04:06.649120: step: 376/463, loss: 0.04764530435204506 2023-01-22 13:04:07.258906: step: 378/463, loss: 0.0473158173263073 2023-01-22 13:04:07.880816: step: 380/463, loss: 0.021352553740143776 2023-01-22 13:04:08.465690: step: 382/463, loss: 0.003747256938368082 2023-01-22 13:04:09.037318: step: 384/463, loss: 7.466435636160895e-05 2023-01-22 13:04:09.707725: step: 386/463, loss: 0.007939601317048073 2023-01-22 13:04:10.271301: step: 388/463, loss: 0.07944590598344803 2023-01-22 13:04:10.861233: step: 390/463, loss: 0.000556295330170542 2023-01-22 13:04:11.480391: step: 392/463, loss: 0.03293980285525322 2023-01-22 13:04:12.052812: step: 394/463, loss: 0.01663028448820114 2023-01-22 13:04:12.614149: step: 396/463, loss: 0.02180313877761364 2023-01-22 13:04:13.177310: step: 398/463, loss: 0.00047571311006322503 2023-01-22 13:04:13.815700: step: 400/463, loss: 0.018062826246023178 2023-01-22 13:04:14.377221: step: 402/463, loss: 0.001344798132777214 2023-01-22 13:04:14.968846: step: 404/463, loss: 0.005027064122259617 2023-01-22 13:04:15.589678: step: 406/463, loss: 0.7167328596115112 2023-01-22 13:04:16.215218: step: 408/463, loss: 0.08638081699609756 2023-01-22 13:04:16.833060: step: 410/463, loss: 0.002496791537851095 2023-01-22 13:04:17.437057: step: 412/463, loss: 6.884579488541931e-05 2023-01-22 13:04:18.048276: step: 414/463, loss: 0.02883158251643181 2023-01-22 13:04:18.608104: step: 416/463, loss: 0.01060231402516365 2023-01-22 13:04:19.169103: step: 418/463, loss: 0.008587300777435303 2023-01-22 13:04:19.704509: step: 420/463, loss: 0.0019782360177487135 2023-01-22 13:04:20.332843: step: 422/463, loss: 0.1591232419013977 2023-01-22 13:04:20.854970: step: 424/463, loss: 0.010507761500775814 2023-01-22 13:04:21.439388: step: 426/463, loss: 0.011206735856831074 2023-01-22 13:04:22.110819: step: 428/463, loss: 0.04298040270805359 2023-01-22 13:04:22.769114: step: 430/463, loss: 0.000632758135907352 2023-01-22 13:04:23.452228: step: 432/463, loss: 0.012523945420980453 2023-01-22 13:04:24.067501: step: 434/463, loss: 0.36266952753067017 2023-01-22 13:04:24.659839: step: 436/463, loss: 0.011708064936101437 2023-01-22 13:04:25.266916: step: 438/463, loss: 0.001885719713754952 2023-01-22 13:04:25.885403: step: 440/463, loss: 0.014458074234426022 2023-01-22 13:04:26.521468: step: 442/463, loss: 0.026317961513996124 2023-01-22 13:04:27.189057: step: 444/463, loss: 0.004992858041077852 2023-01-22 13:04:27.781777: step: 446/463, loss: 0.0016601731767877936 2023-01-22 13:04:28.386489: step: 448/463, loss: 0.05608716979622841 2023-01-22 13:04:29.014274: step: 450/463, loss: 0.012247040867805481 2023-01-22 13:04:29.625867: step: 452/463, loss: 0.039441097527742386 2023-01-22 13:04:30.193818: step: 454/463, loss: 0.02453218773007393 2023-01-22 13:04:30.735187: step: 456/463, loss: 0.0062887161038815975 2023-01-22 13:04:31.409632: step: 458/463, loss: 0.01657373271882534 2023-01-22 13:04:31.977284: step: 460/463, loss: 0.012005730532109737 2023-01-22 13:04:32.630927: step: 462/463, loss: 0.028833623975515366 2023-01-22 13:04:33.246750: step: 464/463, loss: 0.03986469656229019 2023-01-22 13:04:33.809365: step: 466/463, loss: 0.0006596514140255749 2023-01-22 13:04:34.401294: step: 468/463, loss: 0.009200339205563068 2023-01-22 13:04:34.984693: step: 470/463, loss: 0.15731744468212128 2023-01-22 13:04:35.604415: step: 472/463, loss: 0.0037326219025999308 2023-01-22 13:04:36.197218: step: 474/463, loss: 0.0013432613341137767 2023-01-22 13:04:36.809043: step: 476/463, loss: 0.004083434119820595 2023-01-22 13:04:37.419104: step: 478/463, loss: 0.02273726649582386 2023-01-22 13:04:38.073634: step: 480/463, loss: 0.011660085991024971 2023-01-22 13:04:38.615542: step: 482/463, loss: 0.02291260100901127 2023-01-22 13:04:39.268566: step: 484/463, loss: 0.016993245109915733 2023-01-22 13:04:39.862029: step: 486/463, loss: 0.0034471338149160147 2023-01-22 13:04:40.528350: step: 488/463, loss: 0.02825026400387287 2023-01-22 13:04:41.151042: step: 490/463, loss: 0.0016568704741075635 2023-01-22 13:04:41.738595: step: 492/463, loss: 0.35882633924484253 2023-01-22 13:04:42.353274: step: 494/463, loss: 0.039255015552043915 2023-01-22 13:04:42.987706: step: 496/463, loss: 0.012035921216011047 2023-01-22 13:04:43.594614: step: 498/463, loss: 0.05582023039460182 2023-01-22 13:04:44.273989: step: 500/463, loss: 0.005246494431048632 2023-01-22 13:04:44.858057: step: 502/463, loss: 0.008546480908989906 2023-01-22 13:04:45.431241: step: 504/463, loss: 0.001535041257739067 2023-01-22 13:04:46.021009: step: 506/463, loss: 0.026231754571199417 2023-01-22 13:04:46.674293: step: 508/463, loss: 0.00016216209041886032 2023-01-22 13:04:47.287865: step: 510/463, loss: 0.0019158277427777648 2023-01-22 13:04:47.886528: step: 512/463, loss: 0.020430097356438637 2023-01-22 13:04:48.514119: step: 514/463, loss: 0.0023506670258939266 2023-01-22 13:04:49.058455: step: 516/463, loss: 0.0008170974906533957 2023-01-22 13:04:49.642212: step: 518/463, loss: 0.016291765496134758 2023-01-22 13:04:50.200921: step: 520/463, loss: 6.906566704856232e-05 2023-01-22 13:04:50.860132: step: 522/463, loss: 0.005249998532235622 2023-01-22 13:04:51.476922: step: 524/463, loss: 0.01802987977862358 2023-01-22 13:04:52.001540: step: 526/463, loss: 0.02856077067553997 2023-01-22 13:04:52.580847: step: 528/463, loss: 0.007953857071697712 2023-01-22 13:04:53.170602: step: 530/463, loss: 0.0005640205927193165 2023-01-22 13:04:53.832940: step: 532/463, loss: 0.03563994541764259 2023-01-22 13:04:54.521928: step: 534/463, loss: 0.0012998809106647968 2023-01-22 13:04:55.140270: step: 536/463, loss: 0.04176727682352066 2023-01-22 13:04:55.724123: step: 538/463, loss: 0.0002324201923329383 2023-01-22 13:04:56.346120: step: 540/463, loss: 0.0004154304333496839 2023-01-22 13:04:56.940761: step: 542/463, loss: 0.023231608793139458 2023-01-22 13:04:57.581740: step: 544/463, loss: 0.03646976128220558 2023-01-22 13:04:58.196858: step: 546/463, loss: 0.05267562344670296 2023-01-22 13:04:58.807514: step: 548/463, loss: 0.011600497178733349 2023-01-22 13:04:59.433605: step: 550/463, loss: 0.015198652632534504 2023-01-22 13:05:00.047342: step: 552/463, loss: 0.6468573808670044 2023-01-22 13:05:00.569070: step: 554/463, loss: 0.006744857877492905 2023-01-22 13:05:01.170282: step: 556/463, loss: 0.005127535201609135 2023-01-22 13:05:01.752001: step: 558/463, loss: 0.0048785884864628315 2023-01-22 13:05:02.331741: step: 560/463, loss: 0.003410136792808771 2023-01-22 13:05:02.986432: step: 562/463, loss: 0.005176797974854708 2023-01-22 13:05:03.609086: step: 564/463, loss: 0.001908926758915186 2023-01-22 13:05:04.210688: step: 566/463, loss: 1.9047214664169587e-05 2023-01-22 13:05:04.805134: step: 568/463, loss: 0.06040994077920914 2023-01-22 13:05:05.370650: step: 570/463, loss: 0.0005264796782284975 2023-01-22 13:05:05.991268: step: 572/463, loss: 0.00658460333943367 2023-01-22 13:05:06.559610: step: 574/463, loss: 0.0541391484439373 2023-01-22 13:05:07.220289: step: 576/463, loss: 0.03383643925189972 2023-01-22 13:05:07.878247: step: 578/463, loss: 0.035475488752126694 2023-01-22 13:05:08.472652: step: 580/463, loss: 0.00944242812693119 2023-01-22 13:05:09.068487: step: 582/463, loss: 0.005249816458672285 2023-01-22 13:05:09.644669: step: 584/463, loss: 0.010450259782373905 2023-01-22 13:05:10.229394: step: 586/463, loss: 0.0034519657492637634 2023-01-22 13:05:10.749371: step: 588/463, loss: 0.00016075785970315337 2023-01-22 13:05:11.423852: step: 590/463, loss: 0.06734348833560944 2023-01-22 13:05:12.008774: step: 592/463, loss: 0.005445543210953474 2023-01-22 13:05:12.563442: step: 594/463, loss: 0.00023987282474990934 2023-01-22 13:05:13.170534: step: 596/463, loss: 0.07066363096237183 2023-01-22 13:05:13.743335: step: 598/463, loss: 0.009559571743011475 2023-01-22 13:05:14.325632: step: 600/463, loss: 0.019260212779045105 2023-01-22 13:05:14.906689: step: 602/463, loss: 0.054603107273578644 2023-01-22 13:05:15.504928: step: 604/463, loss: 0.0011515539372339845 2023-01-22 13:05:16.045581: step: 606/463, loss: 0.005755016580224037 2023-01-22 13:05:16.648188: step: 608/463, loss: 0.012383447960019112 2023-01-22 13:05:17.256914: step: 610/463, loss: 0.0011383414966985583 2023-01-22 13:05:17.843796: step: 612/463, loss: 0.031573131680488586 2023-01-22 13:05:18.362317: step: 614/463, loss: 0.000995947397314012 2023-01-22 13:05:19.038643: step: 616/463, loss: 0.7395897507667542 2023-01-22 13:05:19.667467: step: 618/463, loss: 0.048117369413375854 2023-01-22 13:05:20.190411: step: 620/463, loss: 0.03181912750005722 2023-01-22 13:05:20.776875: step: 622/463, loss: 0.0028155629988759756 2023-01-22 13:05:21.380908: step: 624/463, loss: 0.16174601018428802 2023-01-22 13:05:21.979178: step: 626/463, loss: 0.04744167998433113 2023-01-22 13:05:22.616761: step: 628/463, loss: 0.013348712585866451 2023-01-22 13:05:23.216509: step: 630/463, loss: 0.027135038748383522 2023-01-22 13:05:23.898989: step: 632/463, loss: 0.03360970318317413 2023-01-22 13:05:24.586218: step: 634/463, loss: 0.03294328972697258 2023-01-22 13:05:25.249195: step: 636/463, loss: 0.03222019597887993 2023-01-22 13:05:25.843807: step: 638/463, loss: 0.0018262427765876055 2023-01-22 13:05:26.422367: step: 640/463, loss: 0.02675807476043701 2023-01-22 13:05:27.039186: step: 642/463, loss: 0.025562116876244545 2023-01-22 13:05:27.647944: step: 644/463, loss: 0.06224594637751579 2023-01-22 13:05:28.241004: step: 646/463, loss: 0.04108177497982979 2023-01-22 13:05:28.820098: step: 648/463, loss: 0.0028384001925587654 2023-01-22 13:05:29.466324: step: 650/463, loss: 0.0018289118306711316 2023-01-22 13:05:30.137450: step: 652/463, loss: 0.014852159656584263 2023-01-22 13:05:30.757818: step: 654/463, loss: 0.001475636730901897 2023-01-22 13:05:31.318224: step: 656/463, loss: 0.02210184745490551 2023-01-22 13:05:31.893178: step: 658/463, loss: 0.01853053830564022 2023-01-22 13:05:32.490253: step: 660/463, loss: 0.09320429712533951 2023-01-22 13:05:33.042869: step: 662/463, loss: 0.015808576717972755 2023-01-22 13:05:33.674771: step: 664/463, loss: 0.00013743633462581784 2023-01-22 13:05:34.231505: step: 666/463, loss: 0.0068207248114049435 2023-01-22 13:05:34.849060: step: 668/463, loss: 0.029963258653879166 2023-01-22 13:05:35.422011: step: 670/463, loss: 0.008672508411109447 2023-01-22 13:05:36.050689: step: 672/463, loss: 0.0331050269305706 2023-01-22 13:05:36.665408: step: 674/463, loss: 0.018555283546447754 2023-01-22 13:05:37.343767: step: 676/463, loss: 0.0025362588930875063 2023-01-22 13:05:38.058493: step: 678/463, loss: 0.01964394934475422 2023-01-22 13:05:38.620247: step: 680/463, loss: 0.009995434433221817 2023-01-22 13:05:39.220204: step: 682/463, loss: 0.0027733054012060165 2023-01-22 13:05:39.825608: step: 684/463, loss: 0.022071074694395065 2023-01-22 13:05:40.436806: step: 686/463, loss: 0.025674713775515556 2023-01-22 13:05:41.098778: step: 688/463, loss: 0.007691400125622749 2023-01-22 13:05:41.852107: step: 690/463, loss: 0.07100996375083923 2023-01-22 13:05:42.529966: step: 692/463, loss: 0.026908760890364647 2023-01-22 13:05:43.109591: step: 694/463, loss: 0.004746427293866873 2023-01-22 13:05:43.770988: step: 696/463, loss: 0.030488725751638412 2023-01-22 13:05:44.387711: step: 698/463, loss: 0.0661323070526123 2023-01-22 13:05:45.026681: step: 700/463, loss: 0.016116736456751823 2023-01-22 13:05:45.625033: step: 702/463, loss: 0.03909580036997795 2023-01-22 13:05:46.164537: step: 704/463, loss: 0.016833040863275528 2023-01-22 13:05:46.786896: step: 706/463, loss: 0.005295882932841778 2023-01-22 13:05:47.405636: step: 708/463, loss: 0.00467087235301733 2023-01-22 13:05:48.039510: step: 710/463, loss: 0.025982731953263283 2023-01-22 13:05:48.625714: step: 712/463, loss: 0.02441459521651268 2023-01-22 13:05:49.186650: step: 714/463, loss: 0.007864666171371937 2023-01-22 13:05:49.807388: step: 716/463, loss: 0.10547102987766266 2023-01-22 13:05:50.390225: step: 718/463, loss: 0.004200815688818693 2023-01-22 13:05:50.982698: step: 720/463, loss: 0.0004409697721712291 2023-01-22 13:05:51.551759: step: 722/463, loss: 0.08174954354763031 2023-01-22 13:05:52.234335: step: 724/463, loss: 0.07840759307146072 2023-01-22 13:05:52.769890: step: 726/463, loss: 0.0043959952890872955 2023-01-22 13:05:53.413394: step: 728/463, loss: 0.18816347420215607 2023-01-22 13:05:54.032558: step: 730/463, loss: 0.03758756443858147 2023-01-22 13:05:54.615849: step: 732/463, loss: 0.01197047159075737 2023-01-22 13:05:55.217751: step: 734/463, loss: 0.009181518107652664 2023-01-22 13:05:55.845060: step: 736/463, loss: 0.03412233665585518 2023-01-22 13:05:56.424763: step: 738/463, loss: 0.02147078327834606 2023-01-22 13:05:57.001688: step: 740/463, loss: 0.00813200417906046 2023-01-22 13:05:57.696645: step: 742/463, loss: 0.0014224787009879947 2023-01-22 13:05:58.373821: step: 744/463, loss: 0.015566750429570675 2023-01-22 13:05:58.943410: step: 746/463, loss: 0.0007399892783723772 2023-01-22 13:05:59.521252: step: 748/463, loss: 0.009280719794332981 2023-01-22 13:06:00.144084: step: 750/463, loss: 0.013851113617420197 2023-01-22 13:06:00.725733: step: 752/463, loss: 0.016852153465151787 2023-01-22 13:06:01.324987: step: 754/463, loss: 0.00749478954821825 2023-01-22 13:06:02.022300: step: 756/463, loss: 0.0030878072138875723 2023-01-22 13:06:02.649723: step: 758/463, loss: 0.04691128432750702 2023-01-22 13:06:03.303040: step: 760/463, loss: 0.009341364726424217 2023-01-22 13:06:03.955292: step: 762/463, loss: 0.05508425459265709 2023-01-22 13:06:04.724684: step: 764/463, loss: 0.03854438289999962 2023-01-22 13:06:05.293687: step: 766/463, loss: 0.01581127941608429 2023-01-22 13:06:06.018132: step: 768/463, loss: 0.005890332628041506 2023-01-22 13:06:06.614041: step: 770/463, loss: 0.05794880911707878 2023-01-22 13:06:07.223563: step: 772/463, loss: 0.009432896040380001 2023-01-22 13:06:07.863367: step: 774/463, loss: 0.021078072488307953 2023-01-22 13:06:08.477213: step: 776/463, loss: 0.007599270902574062 2023-01-22 13:06:09.086475: step: 778/463, loss: 0.0005113847437314689 2023-01-22 13:06:09.678878: step: 780/463, loss: 0.006401469931006432 2023-01-22 13:06:10.285011: step: 782/463, loss: 0.014508014544844627 2023-01-22 13:06:10.889623: step: 784/463, loss: 0.002345010172575712 2023-01-22 13:06:11.662567: step: 786/463, loss: 0.007189466618001461 2023-01-22 13:06:12.304103: step: 788/463, loss: 0.009035010822117329 2023-01-22 13:06:12.855444: step: 790/463, loss: 0.005614686757326126 2023-01-22 13:06:13.500225: step: 792/463, loss: 0.07986856997013092 2023-01-22 13:06:14.087068: step: 794/463, loss: 0.12394262850284576 2023-01-22 13:06:14.711633: step: 796/463, loss: 0.005549500696361065 2023-01-22 13:06:15.281542: step: 798/463, loss: 0.04039314389228821 2023-01-22 13:06:15.891509: step: 800/463, loss: 0.001079953508451581 2023-01-22 13:06:16.509603: step: 802/463, loss: 0.03734450787305832 2023-01-22 13:06:17.135167: step: 804/463, loss: 0.0039762272499501705 2023-01-22 13:06:17.732462: step: 806/463, loss: 0.010790260508656502 2023-01-22 13:06:18.396028: step: 808/463, loss: 0.01745694875717163 2023-01-22 13:06:19.015521: step: 810/463, loss: 0.5384841561317444 2023-01-22 13:06:19.630874: step: 812/463, loss: 0.0416187047958374 2023-01-22 13:06:20.306702: step: 814/463, loss: 0.06309138238430023 2023-01-22 13:06:20.827782: step: 816/463, loss: 0.000431559921707958 2023-01-22 13:06:21.443985: step: 818/463, loss: 0.012816688977181911 2023-01-22 13:06:22.101860: step: 820/463, loss: 0.05416051298379898 2023-01-22 13:06:22.717118: step: 822/463, loss: 0.0023913108743727207 2023-01-22 13:06:23.344690: step: 824/463, loss: 0.3894469439983368 2023-01-22 13:06:23.946977: step: 826/463, loss: 0.012212458997964859 2023-01-22 13:06:24.664821: step: 828/463, loss: 0.01997094601392746 2023-01-22 13:06:25.325417: step: 830/463, loss: 0.00045535824028775096 2023-01-22 13:06:25.878183: step: 832/463, loss: 0.02898307703435421 2023-01-22 13:06:26.498068: step: 834/463, loss: 0.22472518682479858 2023-01-22 13:06:27.139954: step: 836/463, loss: 0.18519264459609985 2023-01-22 13:06:27.761666: step: 838/463, loss: 0.005664360243827105 2023-01-22 13:06:28.406576: step: 840/463, loss: 0.012907752767205238 2023-01-22 13:06:28.992110: step: 842/463, loss: 0.00020149107149336487 2023-01-22 13:06:29.622405: step: 844/463, loss: 0.017091726884245872 2023-01-22 13:06:30.252505: step: 846/463, loss: 0.0016688171308487654 2023-01-22 13:06:30.878187: step: 848/463, loss: 0.004266593139618635 2023-01-22 13:06:31.540101: step: 850/463, loss: 0.012012452818453312 2023-01-22 13:06:32.170994: step: 852/463, loss: 0.06989787518978119 2023-01-22 13:06:32.791054: step: 854/463, loss: 0.0033829433377832174 2023-01-22 13:06:33.435511: step: 856/463, loss: 0.0267151091247797 2023-01-22 13:06:34.032695: step: 858/463, loss: 0.008524909615516663 2023-01-22 13:06:34.576956: step: 860/463, loss: 0.03437764570116997 2023-01-22 13:06:35.129576: step: 862/463, loss: 0.009047313593327999 2023-01-22 13:06:35.864546: step: 864/463, loss: 0.01516711339354515 2023-01-22 13:06:36.438056: step: 866/463, loss: 0.0020795678719878197 2023-01-22 13:06:37.041863: step: 868/463, loss: 0.034122999757528305 2023-01-22 13:06:37.596945: step: 870/463, loss: 0.1948269158601761 2023-01-22 13:06:38.210747: step: 872/463, loss: 0.31432220339775085 2023-01-22 13:06:38.808926: step: 874/463, loss: 0.010344895534217358 2023-01-22 13:06:39.385773: step: 876/463, loss: 0.001328377635218203 2023-01-22 13:06:40.006112: step: 878/463, loss: 0.0018691495060920715 2023-01-22 13:06:40.633242: step: 880/463, loss: 0.0036268578842282295 2023-01-22 13:06:41.180709: step: 882/463, loss: 0.06314323842525482 2023-01-22 13:06:41.761856: step: 884/463, loss: 0.0009191217832267284 2023-01-22 13:06:42.425672: step: 886/463, loss: 0.010080519132316113 2023-01-22 13:06:43.053045: step: 888/463, loss: 0.019825082272291183 2023-01-22 13:06:43.614843: step: 890/463, loss: 0.001594318775460124 2023-01-22 13:06:44.255368: step: 892/463, loss: 0.008323898538947105 2023-01-22 13:06:44.888913: step: 894/463, loss: 0.13878990709781647 2023-01-22 13:06:45.506863: step: 896/463, loss: 0.008268610574305058 2023-01-22 13:06:46.120268: step: 898/463, loss: 0.06482666730880737 2023-01-22 13:06:46.700054: step: 900/463, loss: 0.016729217022657394 2023-01-22 13:06:47.295559: step: 902/463, loss: 0.02992217428982258 2023-01-22 13:06:47.917646: step: 904/463, loss: 0.07099413871765137 2023-01-22 13:06:48.499642: step: 906/463, loss: 0.01644017919898033 2023-01-22 13:06:49.100005: step: 908/463, loss: 0.03739346191287041 2023-01-22 13:06:49.743240: step: 910/463, loss: 0.04760941490530968 2023-01-22 13:06:50.338614: step: 912/463, loss: 0.0301815215498209 2023-01-22 13:06:50.916257: step: 914/463, loss: 0.00023451166634913534 2023-01-22 13:06:51.512038: step: 916/463, loss: 0.0017771677812561393 2023-01-22 13:06:52.196284: step: 918/463, loss: 0.4552406668663025 2023-01-22 13:06:52.886576: step: 920/463, loss: 0.03320131450891495 2023-01-22 13:06:53.526054: step: 922/463, loss: 0.00185881566721946 2023-01-22 13:06:54.242544: step: 924/463, loss: 0.33790406584739685 2023-01-22 13:06:54.844614: step: 926/463, loss: 0.016135603189468384 ================================================== Loss: 0.048 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32785351201478746, 'r': 0.3365630929791271, 'f1': 0.3321512172284644}, 'combined': 0.2447430021683422, 'epoch': 32} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3427920909762122, 'r': 0.3947780238908032, 'f1': 0.36695300298436045}, 'combined': 0.2844324712127579, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29293082524271846, 'r': 0.3435128083491461, 'f1': 0.3162117903930131}, 'combined': 0.23299816134222018, 'epoch': 32} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32715102495572634, 'r': 0.4014215241874032, 'f1': 0.36050071672793615}, 'combined': 0.2794311775594529, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2916386248460764, 'r': 0.33923050669951577, 'f1': 0.31363943338709616}, 'combined': 0.2311027403904919, 'epoch': 32} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32885595443889554, 'r': 0.38598258623021103, 'f1': 0.355136620565302}, 'combined': 0.2752733613951145, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24444444444444444, 'r': 0.3142857142857143, 'f1': 0.27499999999999997}, 'combined': 0.1833333333333333, 'epoch': 32} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2857142857142857, 'r': 0.43478260869565216, 'f1': 0.3448275862068965}, 'combined': 0.17241379310344826, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3977272727272727, 'r': 0.3017241379310345, 'f1': 0.34313725490196073}, 'combined': 0.2287581699346405, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:09:31.325612: step: 2/463, loss: 0.013177735731005669 2023-01-22 13:09:31.962077: step: 4/463, loss: 0.029422033578157425 2023-01-22 13:09:32.559719: step: 6/463, loss: 0.0521036721765995 2023-01-22 13:09:33.263633: step: 8/463, loss: 0.7831087112426758 2023-01-22 13:09:33.842377: step: 10/463, loss: 0.0006630992284044623 2023-01-22 13:09:34.547011: step: 12/463, loss: 0.0023785794619470835 2023-01-22 13:09:35.145335: step: 14/463, loss: 0.0017757429741322994 2023-01-22 13:09:35.764795: step: 16/463, loss: 0.0009827989852055907 2023-01-22 13:09:36.467604: step: 18/463, loss: 0.0023622713051736355 2023-01-22 13:09:37.038860: step: 20/463, loss: 0.01516252662986517 2023-01-22 13:09:37.588575: step: 22/463, loss: 0.003957367967814207 2023-01-22 13:09:38.226037: step: 24/463, loss: 0.027441132813692093 2023-01-22 13:09:38.776861: step: 26/463, loss: 0.004595894832164049 2023-01-22 13:09:39.433336: step: 28/463, loss: 0.009913764894008636 2023-01-22 13:09:39.998295: step: 30/463, loss: 0.0001937171764438972 2023-01-22 13:09:40.612215: step: 32/463, loss: 0.0073243496008217335 2023-01-22 13:09:41.266591: step: 34/463, loss: 0.06665915250778198 2023-01-22 13:09:41.829106: step: 36/463, loss: 0.21806226670742035 2023-01-22 13:09:42.440429: step: 38/463, loss: 0.019458046182990074 2023-01-22 13:09:43.097490: step: 40/463, loss: 0.04964243993163109 2023-01-22 13:09:43.735770: step: 42/463, loss: 0.05178474262356758 2023-01-22 13:09:44.310210: step: 44/463, loss: 0.012613000348210335 2023-01-22 13:09:44.933921: step: 46/463, loss: 0.0006787462625652552 2023-01-22 13:09:45.536192: step: 48/463, loss: 0.03951142728328705 2023-01-22 13:09:46.180253: step: 50/463, loss: 0.06550993770360947 2023-01-22 13:09:46.759101: step: 52/463, loss: 0.0031946974340826273 2023-01-22 13:09:47.374295: step: 54/463, loss: 0.0009893204551190138 2023-01-22 13:09:47.982631: step: 56/463, loss: 0.030004192143678665 2023-01-22 13:09:48.571582: step: 58/463, loss: 0.07743649929761887 2023-01-22 13:09:49.182561: step: 60/463, loss: 0.004632330033928156 2023-01-22 13:09:49.819394: step: 62/463, loss: 0.005031968932598829 2023-01-22 13:09:50.399871: step: 64/463, loss: 0.012747881934046745 2023-01-22 13:09:50.936004: step: 66/463, loss: 0.002852066420018673 2023-01-22 13:09:51.508494: step: 68/463, loss: 0.03280583769083023 2023-01-22 13:09:52.119977: step: 70/463, loss: 0.02015034481883049 2023-01-22 13:09:52.755341: step: 72/463, loss: 0.004480517003685236 2023-01-22 13:09:53.353757: step: 74/463, loss: 0.020288977771997452 2023-01-22 13:09:53.949428: step: 76/463, loss: 0.0014101208653301 2023-01-22 13:09:54.526735: step: 78/463, loss: 0.013872583396732807 2023-01-22 13:09:55.148153: step: 80/463, loss: 0.023355277255177498 2023-01-22 13:09:55.706699: step: 82/463, loss: 0.009263689629733562 2023-01-22 13:09:56.350520: step: 84/463, loss: 0.012693917378783226 2023-01-22 13:09:56.949862: step: 86/463, loss: 0.011706424877047539 2023-01-22 13:09:57.573873: step: 88/463, loss: 0.051203593611717224 2023-01-22 13:09:58.177821: step: 90/463, loss: 0.005326269660145044 2023-01-22 13:09:58.807749: step: 92/463, loss: 0.006566681899130344 2023-01-22 13:09:59.416210: step: 94/463, loss: 0.010242292657494545 2023-01-22 13:09:59.995350: step: 96/463, loss: 0.02074410952627659 2023-01-22 13:10:00.676103: step: 98/463, loss: 0.0004857122548855841 2023-01-22 13:10:01.383122: step: 100/463, loss: 0.023802557960152626 2023-01-22 13:10:01.975361: step: 102/463, loss: 0.027592146769165993 2023-01-22 13:10:02.578001: step: 104/463, loss: 0.005277467425912619 2023-01-22 13:10:03.250806: step: 106/463, loss: 0.09126009047031403 2023-01-22 13:10:03.854399: step: 108/463, loss: 0.05373068153858185 2023-01-22 13:10:04.423473: step: 110/463, loss: 0.002197599271312356 2023-01-22 13:10:05.032976: step: 112/463, loss: 0.004238425754010677 2023-01-22 13:10:05.645082: step: 114/463, loss: 0.02249746024608612 2023-01-22 13:10:06.211333: step: 116/463, loss: 0.0004886161768808961 2023-01-22 13:10:06.827363: step: 118/463, loss: 0.14667724072933197 2023-01-22 13:10:07.396482: step: 120/463, loss: 0.0007660912815481424 2023-01-22 13:10:07.985848: step: 122/463, loss: 0.001688626827672124 2023-01-22 13:10:08.514992: step: 124/463, loss: 0.12359911948442459 2023-01-22 13:10:09.193097: step: 126/463, loss: 0.0017821916844695807 2023-01-22 13:10:09.756358: step: 128/463, loss: 0.0020403775852173567 2023-01-22 13:10:10.424781: step: 130/463, loss: 0.0015391093911603093 2023-01-22 13:10:11.037842: step: 132/463, loss: 0.02669435925781727 2023-01-22 13:10:11.635047: step: 134/463, loss: 0.0019884684588760138 2023-01-22 13:10:12.311502: step: 136/463, loss: 0.006303318776190281 2023-01-22 13:10:12.852214: step: 138/463, loss: 0.01015305146574974 2023-01-22 13:10:13.441150: step: 140/463, loss: 0.027442781254649162 2023-01-22 13:10:14.024188: step: 142/463, loss: 0.07229649275541306 2023-01-22 13:10:14.540922: step: 144/463, loss: 0.0019841294270008802 2023-01-22 13:10:15.140185: step: 146/463, loss: 0.045641813427209854 2023-01-22 13:10:15.721595: step: 148/463, loss: 0.004566526971757412 2023-01-22 13:10:16.334148: step: 150/463, loss: 0.030943425372242928 2023-01-22 13:10:16.924881: step: 152/463, loss: 0.0009443388553336263 2023-01-22 13:10:17.582260: step: 154/463, loss: 0.004768942482769489 2023-01-22 13:10:18.206835: step: 156/463, loss: 0.06695771962404251 2023-01-22 13:10:18.853592: step: 158/463, loss: 0.1569249927997589 2023-01-22 13:10:19.438012: step: 160/463, loss: 0.12606506049633026 2023-01-22 13:10:20.055773: step: 162/463, loss: 0.03515082970261574 2023-01-22 13:10:20.726105: step: 164/463, loss: 0.0030049022752791643 2023-01-22 13:10:21.282018: step: 166/463, loss: 0.00029705179622396827 2023-01-22 13:10:21.846051: step: 168/463, loss: 0.06618739664554596 2023-01-22 13:10:22.433344: step: 170/463, loss: 0.006797961890697479 2023-01-22 13:10:23.054447: step: 172/463, loss: 0.0024814188946038485 2023-01-22 13:10:23.669125: step: 174/463, loss: 0.0435347855091095 2023-01-22 13:10:24.396400: step: 176/463, loss: 0.025983229279518127 2023-01-22 13:10:25.004671: step: 178/463, loss: 0.01597868651151657 2023-01-22 13:10:25.637232: step: 180/463, loss: 0.005939070601016283 2023-01-22 13:10:26.258639: step: 182/463, loss: 0.03352585807442665 2023-01-22 13:10:26.882057: step: 184/463, loss: 0.038901034742593765 2023-01-22 13:10:27.516708: step: 186/463, loss: 0.01623605191707611 2023-01-22 13:10:28.093170: step: 188/463, loss: 0.012403161264955997 2023-01-22 13:10:28.695536: step: 190/463, loss: 0.0018404530128464103 2023-01-22 13:10:29.192047: step: 192/463, loss: 0.009095106273889542 2023-01-22 13:10:29.861913: step: 194/463, loss: 0.011725467629730701 2023-01-22 13:10:30.489983: step: 196/463, loss: 0.03635198622941971 2023-01-22 13:10:31.126954: step: 198/463, loss: 0.03714846819639206 2023-01-22 13:10:31.805191: step: 200/463, loss: 0.05019973963499069 2023-01-22 13:10:32.421041: step: 202/463, loss: 0.028682127594947815 2023-01-22 13:10:33.033971: step: 204/463, loss: 0.0020879965741187334 2023-01-22 13:10:33.632276: step: 206/463, loss: 0.007942724972963333 2023-01-22 13:10:34.229020: step: 208/463, loss: 0.01245945319533348 2023-01-22 13:10:34.814660: step: 210/463, loss: 0.04298003390431404 2023-01-22 13:10:35.401919: step: 212/463, loss: 0.02037498727440834 2023-01-22 13:10:36.075043: step: 214/463, loss: 0.09421339631080627 2023-01-22 13:10:36.645168: step: 216/463, loss: 0.4100435674190521 2023-01-22 13:10:37.230697: step: 218/463, loss: 0.0009400646667927504 2023-01-22 13:10:37.803231: step: 220/463, loss: 0.025177719071507454 2023-01-22 13:10:38.409625: step: 222/463, loss: 0.012517690658569336 2023-01-22 13:10:39.028583: step: 224/463, loss: 0.2530026137828827 2023-01-22 13:10:39.585135: step: 226/463, loss: 0.012043198570609093 2023-01-22 13:10:40.203130: step: 228/463, loss: 0.007266404572874308 2023-01-22 13:10:40.839324: step: 230/463, loss: 0.09906034171581268 2023-01-22 13:10:41.456982: step: 232/463, loss: 0.02197418175637722 2023-01-22 13:10:42.127581: step: 234/463, loss: 0.008904719725251198 2023-01-22 13:10:42.713646: step: 236/463, loss: 0.05396690219640732 2023-01-22 13:10:43.319072: step: 238/463, loss: 0.002286656992509961 2023-01-22 13:10:43.833379: step: 240/463, loss: 0.008480419404804707 2023-01-22 13:10:44.488578: step: 242/463, loss: 0.010825510136783123 2023-01-22 13:10:45.123347: step: 244/463, loss: 0.03129807114601135 2023-01-22 13:10:45.646394: step: 246/463, loss: 0.0008733254508115351 2023-01-22 13:10:46.262309: step: 248/463, loss: 0.02321523427963257 2023-01-22 13:10:46.788758: step: 250/463, loss: 0.001570650259964168 2023-01-22 13:10:47.433650: step: 252/463, loss: 0.03918491676449776 2023-01-22 13:10:47.981588: step: 254/463, loss: 0.16565479338169098 2023-01-22 13:10:48.595994: step: 256/463, loss: 1.4607250690460205 2023-01-22 13:10:49.194620: step: 258/463, loss: 0.044743604958057404 2023-01-22 13:10:49.765384: step: 260/463, loss: 0.007310402113944292 2023-01-22 13:10:50.380507: step: 262/463, loss: 0.01301309373229742 2023-01-22 13:10:50.968271: step: 264/463, loss: 0.019325660541653633 2023-01-22 13:10:51.625087: step: 266/463, loss: 0.047689154744148254 2023-01-22 13:10:52.214190: step: 268/463, loss: 0.009946894831955433 2023-01-22 13:10:52.839233: step: 270/463, loss: 0.005585205275565386 2023-01-22 13:10:53.378119: step: 272/463, loss: 0.0007765499758534133 2023-01-22 13:10:54.033614: step: 274/463, loss: 0.01617380604147911 2023-01-22 13:10:54.686473: step: 276/463, loss: 0.038956161588430405 2023-01-22 13:10:55.249987: step: 278/463, loss: 0.015469358302652836 2023-01-22 13:10:55.816482: step: 280/463, loss: 0.007868226617574692 2023-01-22 13:10:56.490271: step: 282/463, loss: 0.016742585226893425 2023-01-22 13:10:57.053262: step: 284/463, loss: 0.004956222604960203 2023-01-22 13:10:57.608398: step: 286/463, loss: 1.3458346984407399e-05 2023-01-22 13:10:58.208847: step: 288/463, loss: 0.0016780121950432658 2023-01-22 13:10:58.811361: step: 290/463, loss: 0.003546732710674405 2023-01-22 13:10:59.433076: step: 292/463, loss: 0.12040197104215622 2023-01-22 13:11:00.186774: step: 294/463, loss: 0.08213011175394058 2023-01-22 13:11:00.756236: step: 296/463, loss: 0.0020931046456098557 2023-01-22 13:11:01.371650: step: 298/463, loss: 0.0006639169296249747 2023-01-22 13:11:02.034883: step: 300/463, loss: 0.0018088988726958632 2023-01-22 13:11:02.672947: step: 302/463, loss: 0.06725714355707169 2023-01-22 13:11:03.301918: step: 304/463, loss: 0.0026506278663873672 2023-01-22 13:11:03.968327: step: 306/463, loss: 0.0436609648168087 2023-01-22 13:11:04.527003: step: 308/463, loss: 0.02149411477148533 2023-01-22 13:11:05.008573: step: 310/463, loss: 0.005827387794852257 2023-01-22 13:11:05.605272: step: 312/463, loss: 0.016611739993095398 2023-01-22 13:11:06.155722: step: 314/463, loss: 0.003579403506591916 2023-01-22 13:11:06.738260: step: 316/463, loss: 0.004192912019789219 2023-01-22 13:11:07.319121: step: 318/463, loss: 0.013725695200264454 2023-01-22 13:11:07.962523: step: 320/463, loss: 0.03245076164603233 2023-01-22 13:11:08.689563: step: 322/463, loss: 0.11209096759557724 2023-01-22 13:11:09.313063: step: 324/463, loss: 0.0012780773686245084 2023-01-22 13:11:09.942990: step: 326/463, loss: 0.04920756816864014 2023-01-22 13:11:10.669630: step: 328/463, loss: 0.00143470021430403 2023-01-22 13:11:11.245724: step: 330/463, loss: 1.839549258875195e-05 2023-01-22 13:11:11.915270: step: 332/463, loss: 0.032383084297180176 2023-01-22 13:11:12.557131: step: 334/463, loss: 0.020892543718218803 2023-01-22 13:11:13.142457: step: 336/463, loss: 0.004676688928157091 2023-01-22 13:11:13.765801: step: 338/463, loss: 0.012989516369998455 2023-01-22 13:11:14.420505: step: 340/463, loss: 0.011859335005283356 2023-01-22 13:11:14.981107: step: 342/463, loss: 0.019382981583476067 2023-01-22 13:11:15.626787: step: 344/463, loss: 0.020520128309726715 2023-01-22 13:11:16.208848: step: 346/463, loss: 0.009408780373632908 2023-01-22 13:11:16.742480: step: 348/463, loss: 0.0027982189785689116 2023-01-22 13:11:17.416162: step: 350/463, loss: 0.015885649248957634 2023-01-22 13:11:17.985410: step: 352/463, loss: 0.016582250595092773 2023-01-22 13:11:18.626354: step: 354/463, loss: 0.010317642241716385 2023-01-22 13:11:19.282431: step: 356/463, loss: 0.033750079572200775 2023-01-22 13:11:19.887519: step: 358/463, loss: 0.491613507270813 2023-01-22 13:11:20.567712: step: 360/463, loss: 0.009647327475249767 2023-01-22 13:11:21.215254: step: 362/463, loss: 0.045069120824337006 2023-01-22 13:11:21.837168: step: 364/463, loss: 0.0022171225864440203 2023-01-22 13:11:22.438168: step: 366/463, loss: 0.0004882703360635787 2023-01-22 13:11:23.009096: step: 368/463, loss: 0.007154977414757013 2023-01-22 13:11:23.633114: step: 370/463, loss: 0.010284720920026302 2023-01-22 13:11:24.205722: step: 372/463, loss: 9.433650120627135e-05 2023-01-22 13:11:24.880842: step: 374/463, loss: 0.024721626192331314 2023-01-22 13:11:25.468177: step: 376/463, loss: 0.002616564277559519 2023-01-22 13:11:26.090258: step: 378/463, loss: 0.008903161622583866 2023-01-22 13:11:26.699931: step: 380/463, loss: 0.0018138960003852844 2023-01-22 13:11:27.283247: step: 382/463, loss: 0.0036056144163012505 2023-01-22 13:11:27.826973: step: 384/463, loss: 0.020847296342253685 2023-01-22 13:11:28.429627: step: 386/463, loss: 0.0006662294617854059 2023-01-22 13:11:29.000249: step: 388/463, loss: 0.009713188745081425 2023-01-22 13:11:29.618078: step: 390/463, loss: 0.0009053823887370527 2023-01-22 13:11:30.228781: step: 392/463, loss: 0.023109719157218933 2023-01-22 13:11:30.827781: step: 394/463, loss: 0.001229922752827406 2023-01-22 13:11:31.466813: step: 396/463, loss: 0.004341547377407551 2023-01-22 13:11:32.012318: step: 398/463, loss: 0.0014307390665635467 2023-01-22 13:11:32.642337: step: 400/463, loss: 0.002218321431428194 2023-01-22 13:11:33.266949: step: 402/463, loss: 0.0005069616017863154 2023-01-22 13:11:33.915825: step: 404/463, loss: 2.783818483352661 2023-01-22 13:11:34.505286: step: 406/463, loss: 0.012074127793312073 2023-01-22 13:11:35.128872: step: 408/463, loss: 0.0005894795758649707 2023-01-22 13:11:35.746672: step: 410/463, loss: 0.00010849148384295404 2023-01-22 13:11:36.321158: step: 412/463, loss: 0.05550036579370499 2023-01-22 13:11:36.924548: step: 414/463, loss: 0.0010107363341376185 2023-01-22 13:11:37.534232: step: 416/463, loss: 0.0031266873702406883 2023-01-22 13:11:38.112982: step: 418/463, loss: 0.0002638222649693489 2023-01-22 13:11:38.786323: step: 420/463, loss: 0.016629505902528763 2023-01-22 13:11:39.415871: step: 422/463, loss: 0.06914753466844559 2023-01-22 13:11:39.975696: step: 424/463, loss: 0.001353125786408782 2023-01-22 13:11:40.534930: step: 426/463, loss: 0.10459199547767639 2023-01-22 13:11:41.118452: step: 428/463, loss: 0.004527949262410402 2023-01-22 13:11:41.685197: step: 430/463, loss: 0.00016903478535823524 2023-01-22 13:11:42.250427: step: 432/463, loss: 0.0052298265509307384 2023-01-22 13:11:42.883961: step: 434/463, loss: 1.5333068859035848e-06 2023-01-22 13:11:43.481686: step: 436/463, loss: 0.013857504352927208 2023-01-22 13:11:44.094556: step: 438/463, loss: 0.041202887892723083 2023-01-22 13:11:44.676298: step: 440/463, loss: 0.002106701722368598 2023-01-22 13:11:45.230361: step: 442/463, loss: 0.017782218754291534 2023-01-22 13:11:45.795829: step: 444/463, loss: 0.00529123842716217 2023-01-22 13:11:46.451597: step: 446/463, loss: 0.00023902471002656966 2023-01-22 13:11:47.074822: step: 448/463, loss: 0.012297023087739944 2023-01-22 13:11:47.711143: step: 450/463, loss: 0.9898707270622253 2023-01-22 13:11:48.243750: step: 452/463, loss: 0.0028778291307389736 2023-01-22 13:11:48.932503: step: 454/463, loss: 0.0004048977862112224 2023-01-22 13:11:49.488875: step: 456/463, loss: 0.010129190981388092 2023-01-22 13:11:50.109855: step: 458/463, loss: 0.0011601498117670417 2023-01-22 13:11:50.735215: step: 460/463, loss: 0.05444043502211571 2023-01-22 13:11:51.270895: step: 462/463, loss: 0.8219467401504517 2023-01-22 13:11:51.831732: step: 464/463, loss: 0.021931078284978867 2023-01-22 13:11:52.448694: step: 466/463, loss: 0.0034460548777133226 2023-01-22 13:11:53.106123: step: 468/463, loss: 0.004666443448513746 2023-01-22 13:11:53.781032: step: 470/463, loss: 0.005805822089314461 2023-01-22 13:11:54.384653: step: 472/463, loss: 0.007446239236742258 2023-01-22 13:11:54.957492: step: 474/463, loss: 0.006875197868794203 2023-01-22 13:11:55.497988: step: 476/463, loss: 0.10706232488155365 2023-01-22 13:11:56.078341: step: 478/463, loss: 0.03519926592707634 2023-01-22 13:11:56.629434: step: 480/463, loss: 0.0004966319538652897 2023-01-22 13:11:57.228048: step: 482/463, loss: 0.033550385385751724 2023-01-22 13:11:57.780803: step: 484/463, loss: 0.011099251918494701 2023-01-22 13:11:58.384291: step: 486/463, loss: 0.006425037980079651 2023-01-22 13:11:59.002348: step: 488/463, loss: 0.00516635412350297 2023-01-22 13:11:59.685374: step: 490/463, loss: 0.020119614899158478 2023-01-22 13:12:00.307427: step: 492/463, loss: 0.03843187540769577 2023-01-22 13:12:00.914319: step: 494/463, loss: 0.02273445948958397 2023-01-22 13:12:01.517221: step: 496/463, loss: 0.004197875503450632 2023-01-22 13:12:02.167113: step: 498/463, loss: 0.007444774731993675 2023-01-22 13:12:02.780340: step: 500/463, loss: 0.007977718487381935 2023-01-22 13:12:03.360276: step: 502/463, loss: 1.159813642501831 2023-01-22 13:12:03.942416: step: 504/463, loss: 0.12344076484441757 2023-01-22 13:12:04.548395: step: 506/463, loss: 0.010689151473343372 2023-01-22 13:12:05.202820: step: 508/463, loss: 0.02402939647436142 2023-01-22 13:12:05.823821: step: 510/463, loss: 0.000801203481387347 2023-01-22 13:12:06.391216: step: 512/463, loss: 0.0002643251500558108 2023-01-22 13:12:07.009833: step: 514/463, loss: 0.007294789422303438 2023-01-22 13:12:07.591736: step: 516/463, loss: 0.001394484774209559 2023-01-22 13:12:08.154054: step: 518/463, loss: 0.01273674238473177 2023-01-22 13:12:08.760206: step: 520/463, loss: 0.024614671245217323 2023-01-22 13:12:09.352746: step: 522/463, loss: 0.03714652359485626 2023-01-22 13:12:09.890794: step: 524/463, loss: 0.0001383195776725188 2023-01-22 13:12:10.507713: step: 526/463, loss: 0.022074243053793907 2023-01-22 13:12:11.053995: step: 528/463, loss: 0.03157607093453407 2023-01-22 13:12:11.598982: step: 530/463, loss: 0.012684871442615986 2023-01-22 13:12:12.179281: step: 532/463, loss: 0.029379574581980705 2023-01-22 13:12:12.740864: step: 534/463, loss: 0.0035063489340245724 2023-01-22 13:12:13.391126: step: 536/463, loss: 0.0034502989146858454 2023-01-22 13:12:13.954246: step: 538/463, loss: 0.018242251127958298 2023-01-22 13:12:14.568527: step: 540/463, loss: 0.004189246334135532 2023-01-22 13:12:15.161650: step: 542/463, loss: 0.02560761198401451 2023-01-22 13:12:15.764764: step: 544/463, loss: 0.025203125551342964 2023-01-22 13:12:16.410274: step: 546/463, loss: 0.02217089757323265 2023-01-22 13:12:16.986118: step: 548/463, loss: 0.011151999235153198 2023-01-22 13:12:17.566922: step: 550/463, loss: 0.010360317304730415 2023-01-22 13:12:18.164708: step: 552/463, loss: 0.001353154773823917 2023-01-22 13:12:18.765013: step: 554/463, loss: 0.0024906164035201073 2023-01-22 13:12:19.339007: step: 556/463, loss: 0.021636279299855232 2023-01-22 13:12:19.957125: step: 558/463, loss: 0.09633377939462662 2023-01-22 13:12:20.538615: step: 560/463, loss: 0.0014069689204916358 2023-01-22 13:12:21.195981: step: 562/463, loss: 0.0036347832065075636 2023-01-22 13:12:21.846674: step: 564/463, loss: 0.018188487738370895 2023-01-22 13:12:22.419683: step: 566/463, loss: 0.05478993430733681 2023-01-22 13:12:23.006692: step: 568/463, loss: 0.005565541796386242 2023-01-22 13:12:23.631157: step: 570/463, loss: 0.3302651643753052 2023-01-22 13:12:24.228053: step: 572/463, loss: 0.0002009569579968229 2023-01-22 13:12:24.920422: step: 574/463, loss: 0.5271614789962769 2023-01-22 13:12:25.518458: step: 576/463, loss: 0.009887102991342545 2023-01-22 13:12:26.135155: step: 578/463, loss: 0.09758669137954712 2023-01-22 13:12:26.701795: step: 580/463, loss: 0.015284329652786255 2023-01-22 13:12:27.219802: step: 582/463, loss: 0.0014495070790871978 2023-01-22 13:12:27.862438: step: 584/463, loss: 0.018039032816886902 2023-01-22 13:12:28.458394: step: 586/463, loss: 0.04768936708569527 2023-01-22 13:12:29.066560: step: 588/463, loss: 0.01392241008579731 2023-01-22 13:12:29.613723: step: 590/463, loss: 0.004238658584654331 2023-01-22 13:12:30.199606: step: 592/463, loss: 0.0014836931368336082 2023-01-22 13:12:30.801723: step: 594/463, loss: 0.0010989391012117267 2023-01-22 13:12:31.393859: step: 596/463, loss: 0.0038590440526604652 2023-01-22 13:12:32.050796: step: 598/463, loss: 0.010744186118245125 2023-01-22 13:12:32.640115: step: 600/463, loss: 0.0113844508305192 2023-01-22 13:12:33.299396: step: 602/463, loss: 0.017788993194699287 2023-01-22 13:12:33.952800: step: 604/463, loss: 0.0002349230198888108 2023-01-22 13:12:34.563331: step: 606/463, loss: 0.010457243770360947 2023-01-22 13:12:35.147239: step: 608/463, loss: 0.0058177197352051735 2023-01-22 13:12:35.786724: step: 610/463, loss: 0.08693305402994156 2023-01-22 13:12:36.346742: step: 612/463, loss: 0.0022323522716760635 2023-01-22 13:12:36.949371: step: 614/463, loss: 0.002958253724500537 2023-01-22 13:12:37.557367: step: 616/463, loss: 5.9136058553121984e-05 2023-01-22 13:12:38.094623: step: 618/463, loss: 0.003142335917800665 2023-01-22 13:12:38.719360: step: 620/463, loss: 0.02074419893324375 2023-01-22 13:12:39.278992: step: 622/463, loss: 0.030147427693009377 2023-01-22 13:12:39.902013: step: 624/463, loss: 0.2404526174068451 2023-01-22 13:12:40.549286: step: 626/463, loss: 0.01802827976644039 2023-01-22 13:12:41.198575: step: 628/463, loss: 0.0025942681822925806 2023-01-22 13:12:41.740560: step: 630/463, loss: 0.006616254802793264 2023-01-22 13:12:42.390226: step: 632/463, loss: 0.07031940668821335 2023-01-22 13:12:42.969420: step: 634/463, loss: 0.023251071572303772 2023-01-22 13:12:43.546047: step: 636/463, loss: 0.008291074074804783 2023-01-22 13:12:44.182503: step: 638/463, loss: 0.04736214131116867 2023-01-22 13:12:44.746712: step: 640/463, loss: 0.005038055125623941 2023-01-22 13:12:45.323102: step: 642/463, loss: 0.12091568857431412 2023-01-22 13:12:45.846068: step: 644/463, loss: 0.005785045213997364 2023-01-22 13:12:46.476654: step: 646/463, loss: 0.0006044826004654169 2023-01-22 13:12:47.166759: step: 648/463, loss: 0.2230464369058609 2023-01-22 13:12:47.805530: step: 650/463, loss: 0.00014197845302987844 2023-01-22 13:12:48.396616: step: 652/463, loss: 0.014181282371282578 2023-01-22 13:12:49.118521: step: 654/463, loss: 0.009210537187755108 2023-01-22 13:12:49.718184: step: 656/463, loss: 0.06659893691539764 2023-01-22 13:12:50.433049: step: 658/463, loss: 0.0014637090498581529 2023-01-22 13:12:51.069043: step: 660/463, loss: 0.015872491523623466 2023-01-22 13:12:51.715657: step: 662/463, loss: 0.02204708941280842 2023-01-22 13:12:52.328427: step: 664/463, loss: 0.00498984893783927 2023-01-22 13:12:52.933843: step: 666/463, loss: 0.0013688835315406322 2023-01-22 13:12:53.556797: step: 668/463, loss: 0.0034113817382603884 2023-01-22 13:12:54.196054: step: 670/463, loss: 0.02099032513797283 2023-01-22 13:12:54.765166: step: 672/463, loss: 0.016249027103185654 2023-01-22 13:12:55.384446: step: 674/463, loss: 0.014061027206480503 2023-01-22 13:12:56.025014: step: 676/463, loss: 0.00711113540455699 2023-01-22 13:12:56.647758: step: 678/463, loss: 0.0922115370631218 2023-01-22 13:12:57.222058: step: 680/463, loss: 0.015617083758115768 2023-01-22 13:12:57.823804: step: 682/463, loss: 0.01940792053937912 2023-01-22 13:12:58.436868: step: 684/463, loss: 0.002145026344805956 2023-01-22 13:12:59.077035: step: 686/463, loss: 0.025647325441241264 2023-01-22 13:12:59.658328: step: 688/463, loss: 0.0068283346481621265 2023-01-22 13:13:00.221243: step: 690/463, loss: 0.04125862568616867 2023-01-22 13:13:00.876630: step: 692/463, loss: 0.07212526351213455 2023-01-22 13:13:01.402634: step: 694/463, loss: 0.04558970034122467 2023-01-22 13:13:02.019636: step: 696/463, loss: 0.03964276611804962 2023-01-22 13:13:02.646217: step: 698/463, loss: 0.05120779573917389 2023-01-22 13:13:03.215382: step: 700/463, loss: 0.003935744054615498 2023-01-22 13:13:03.805702: step: 702/463, loss: 0.0038848421536386013 2023-01-22 13:13:04.481250: step: 704/463, loss: 0.00033737451303750277 2023-01-22 13:13:05.096216: step: 706/463, loss: 0.007344974670559168 2023-01-22 13:13:05.793959: step: 708/463, loss: 0.04298728331923485 2023-01-22 13:13:06.391193: step: 710/463, loss: 0.002970857312902808 2023-01-22 13:13:07.001286: step: 712/463, loss: 0.003636470763012767 2023-01-22 13:13:07.578710: step: 714/463, loss: 0.001263256766833365 2023-01-22 13:13:08.169190: step: 716/463, loss: 0.0005735816084779799 2023-01-22 13:13:08.818484: step: 718/463, loss: 0.002978523960337043 2023-01-22 13:13:09.375991: step: 720/463, loss: 0.003500348189845681 2023-01-22 13:13:09.993566: step: 722/463, loss: 0.02365230955183506 2023-01-22 13:13:10.641273: step: 724/463, loss: 8.246102515840903e-05 2023-01-22 13:13:11.336298: step: 726/463, loss: 0.005365605466067791 2023-01-22 13:13:11.948368: step: 728/463, loss: 0.0237798560410738 2023-01-22 13:13:12.578285: step: 730/463, loss: 0.008263921365141869 2023-01-22 13:13:13.219458: step: 732/463, loss: 0.005609151907265186 2023-01-22 13:13:13.784121: step: 734/463, loss: 0.01481957919895649 2023-01-22 13:13:14.423491: step: 736/463, loss: 0.0956425666809082 2023-01-22 13:13:15.038332: step: 738/463, loss: 0.013071027584373951 2023-01-22 13:13:15.610056: step: 740/463, loss: 0.019585823640227318 2023-01-22 13:13:16.248155: step: 742/463, loss: 0.0014239312149584293 2023-01-22 13:13:16.842405: step: 744/463, loss: 0.006012816447764635 2023-01-22 13:13:17.446538: step: 746/463, loss: 0.02948002889752388 2023-01-22 13:13:18.008205: step: 748/463, loss: 0.006546752993017435 2023-01-22 13:13:18.620120: step: 750/463, loss: 0.012195901945233345 2023-01-22 13:13:19.224360: step: 752/463, loss: 0.015468014404177666 2023-01-22 13:13:19.847805: step: 754/463, loss: 0.006080780643969774 2023-01-22 13:13:20.485365: step: 756/463, loss: 0.0003518761077430099 2023-01-22 13:13:21.112087: step: 758/463, loss: 0.009662225842475891 2023-01-22 13:13:21.645414: step: 760/463, loss: 2.3864381546445657e-06 2023-01-22 13:13:22.249984: step: 762/463, loss: 0.060901302844285965 2023-01-22 13:13:22.919788: step: 764/463, loss: 0.009654792957007885 2023-01-22 13:13:23.508900: step: 766/463, loss: 0.037627194076776505 2023-01-22 13:13:24.080084: step: 768/463, loss: 0.026824548840522766 2023-01-22 13:13:24.619945: step: 770/463, loss: 0.03738008067011833 2023-01-22 13:13:25.165731: step: 772/463, loss: 0.0005081394920125604 2023-01-22 13:13:25.807061: step: 774/463, loss: 0.04401644691824913 2023-01-22 13:13:26.487082: step: 776/463, loss: 0.02724134363234043 2023-01-22 13:13:27.115290: step: 778/463, loss: 0.010808688588440418 2023-01-22 13:13:27.724100: step: 780/463, loss: 0.0009761190740391612 2023-01-22 13:13:28.290472: step: 782/463, loss: 0.00019191816681995988 2023-01-22 13:13:28.968807: step: 784/463, loss: 0.01264650747179985 2023-01-22 13:13:29.542153: step: 786/463, loss: 0.009386629797518253 2023-01-22 13:13:30.168958: step: 788/463, loss: 0.03513404726982117 2023-01-22 13:13:30.833718: step: 790/463, loss: 0.012704877182841301 2023-01-22 13:13:31.580304: step: 792/463, loss: 0.004062521271407604 2023-01-22 13:13:32.226769: step: 794/463, loss: 0.010036393068730831 2023-01-22 13:13:32.851765: step: 796/463, loss: 0.01922842487692833 2023-01-22 13:13:33.477361: step: 798/463, loss: 0.000656516058370471 2023-01-22 13:13:34.143766: step: 800/463, loss: 0.04428530111908913 2023-01-22 13:13:34.766639: step: 802/463, loss: 0.00021473036031238735 2023-01-22 13:13:35.351287: step: 804/463, loss: 0.00013105737161822617 2023-01-22 13:13:35.981325: step: 806/463, loss: 0.040083661675453186 2023-01-22 13:13:36.583571: step: 808/463, loss: 0.058017369359731674 2023-01-22 13:13:37.183028: step: 810/463, loss: 0.010729657486081123 2023-01-22 13:13:37.839804: step: 812/463, loss: 0.02432204596698284 2023-01-22 13:13:38.432787: step: 814/463, loss: 0.002297374652698636 2023-01-22 13:13:39.034477: step: 816/463, loss: 0.012257283553481102 2023-01-22 13:13:39.666866: step: 818/463, loss: 0.003023503115400672 2023-01-22 13:13:40.311477: step: 820/463, loss: 0.023690316826105118 2023-01-22 13:13:40.986889: step: 822/463, loss: 0.00591715844348073 2023-01-22 13:13:41.574828: step: 824/463, loss: 0.041522618383169174 2023-01-22 13:13:42.185540: step: 826/463, loss: 1.5101227760314941 2023-01-22 13:13:42.777062: step: 828/463, loss: 0.005976432468742132 2023-01-22 13:13:43.436923: step: 830/463, loss: 0.025869490578770638 2023-01-22 13:13:44.021487: step: 832/463, loss: 0.023571673780679703 2023-01-22 13:13:44.680443: step: 834/463, loss: 0.014984600245952606 2023-01-22 13:13:45.277968: step: 836/463, loss: 0.005688313394784927 2023-01-22 13:13:45.860907: step: 838/463, loss: 0.0009162913192994893 2023-01-22 13:13:46.446389: step: 840/463, loss: 0.0007104834076017141 2023-01-22 13:13:47.039572: step: 842/463, loss: 0.027130533009767532 2023-01-22 13:13:47.718895: step: 844/463, loss: 0.00908284168690443 2023-01-22 13:13:48.317735: step: 846/463, loss: 0.006472413428127766 2023-01-22 13:13:48.932275: step: 848/463, loss: 0.0055686431005597115 2023-01-22 13:13:49.504229: step: 850/463, loss: 0.0002357563644181937 2023-01-22 13:13:50.126674: step: 852/463, loss: 0.009973691776394844 2023-01-22 13:13:50.770050: step: 854/463, loss: 0.06778901815414429 2023-01-22 13:13:51.401827: step: 856/463, loss: 0.01029406487941742 2023-01-22 13:13:51.961305: step: 858/463, loss: 0.002801164984703064 2023-01-22 13:13:52.613528: step: 860/463, loss: 0.8698673844337463 2023-01-22 13:13:53.169680: step: 862/463, loss: 0.05457659065723419 2023-01-22 13:13:53.812409: step: 864/463, loss: 0.0004166988655924797 2023-01-22 13:13:54.384182: step: 866/463, loss: 0.003854450536891818 2023-01-22 13:13:55.066622: step: 868/463, loss: 1.1777788400650024 2023-01-22 13:13:55.636820: step: 870/463, loss: 0.005094530060887337 2023-01-22 13:13:56.214942: step: 872/463, loss: 0.0005814541364088655 2023-01-22 13:13:56.823126: step: 874/463, loss: 0.01394410990178585 2023-01-22 13:13:57.438274: step: 876/463, loss: 0.0012392376083880663 2023-01-22 13:13:58.017465: step: 878/463, loss: 13.929421424865723 2023-01-22 13:13:58.593437: step: 880/463, loss: 0.014130426570773125 2023-01-22 13:13:59.157898: step: 882/463, loss: 0.011859637685120106 2023-01-22 13:13:59.759530: step: 884/463, loss: 0.0065087019465863705 2023-01-22 13:14:00.365488: step: 886/463, loss: 0.02387521229684353 2023-01-22 13:14:01.004118: step: 888/463, loss: 0.0002580659056548029 2023-01-22 13:14:01.607084: step: 890/463, loss: 0.003160901367664337 2023-01-22 13:14:02.162033: step: 892/463, loss: 0.0016409298405051231 2023-01-22 13:14:02.706231: step: 894/463, loss: 0.012187356129288673 2023-01-22 13:14:03.338017: step: 896/463, loss: 0.004704783670604229 2023-01-22 13:14:03.919409: step: 898/463, loss: 0.031524937599897385 2023-01-22 13:14:04.534450: step: 900/463, loss: 0.05365930497646332 2023-01-22 13:14:05.164983: step: 902/463, loss: 0.001847764360718429 2023-01-22 13:14:05.733866: step: 904/463, loss: 0.015729987993836403 2023-01-22 13:14:06.358559: step: 906/463, loss: 0.08213606476783752 2023-01-22 13:14:06.884261: step: 908/463, loss: 0.3502179980278015 2023-01-22 13:14:07.419022: step: 910/463, loss: 0.02215544693171978 2023-01-22 13:14:08.027304: step: 912/463, loss: 0.05643599107861519 2023-01-22 13:14:08.678809: step: 914/463, loss: 0.04383004456758499 2023-01-22 13:14:09.246873: step: 916/463, loss: 0.01738683693110943 2023-01-22 13:14:09.893572: step: 918/463, loss: 0.037004098296165466 2023-01-22 13:14:10.546640: step: 920/463, loss: 0.03255252540111542 2023-01-22 13:14:11.183532: step: 922/463, loss: 0.05243566259741783 2023-01-22 13:14:11.802813: step: 924/463, loss: 0.00585405807942152 2023-01-22 13:14:12.385264: step: 926/463, loss: 0.003993358928710222 ================================================== Loss: 0.081 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3333406546256946, 'r': 0.33650327943239006, 'f1': 0.334914500964815}, 'combined': 0.2467791059740742, 'epoch': 33} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3500026079834208, 'r': 0.40662067692191534, 'f1': 0.37619327932911895}, 'combined': 0.2915947906761592, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2947424223602485, 'r': 0.3495522086815091, 'f1': 0.3198159964846446}, 'combined': 0.23565389214658022, 'epoch': 33} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3328459978871645, 'r': 0.41728119588059964, 'f1': 0.3703115343540721}, 'combined': 0.2870357347624865, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3008217361773462, 'r': 0.34648727487599457, 'f1': 0.3220437281475293}, 'combined': 0.23729537863502156, 'epoch': 33} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33339968837666273, 'r': 0.4002021994668396, 'f1': 0.36375939266493024}, 'combined': 0.2819570412043957, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24479166666666666, 'r': 0.3357142857142857, 'f1': 0.2831325301204819}, 'combined': 0.18875502008032125, 'epoch': 33} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24404761904761904, 'r': 0.44565217391304346, 'f1': 0.3153846153846154}, 'combined': 0.1576923076923077, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.375, 'r': 0.23275862068965517, 'f1': 0.2872340425531915}, 'combined': 0.19148936170212766, 'epoch': 33} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:16:49.276395: step: 2/463, loss: 0.009739723987877369 2023-01-22 13:16:49.904967: step: 4/463, loss: 0.007610445376485586 2023-01-22 13:16:50.474819: step: 6/463, loss: 0.0008657005964778364 2023-01-22 13:16:51.123429: step: 8/463, loss: 0.02146882191300392 2023-01-22 13:16:51.799313: step: 10/463, loss: 0.0009552372503094375 2023-01-22 13:16:52.354252: step: 12/463, loss: 0.005236999597400427 2023-01-22 13:16:52.959735: step: 14/463, loss: 0.007812581956386566 2023-01-22 13:16:53.587040: step: 16/463, loss: 0.06353195011615753 2023-01-22 13:16:54.239557: step: 18/463, loss: 0.01044398732483387 2023-01-22 13:16:54.853978: step: 20/463, loss: 0.026223251596093178 2023-01-22 13:16:55.485967: step: 22/463, loss: 1.2370123863220215 2023-01-22 13:16:56.107214: step: 24/463, loss: 0.3983534276485443 2023-01-22 13:16:56.753300: step: 26/463, loss: 0.005513183772563934 2023-01-22 13:16:57.318698: step: 28/463, loss: 0.04135580733418465 2023-01-22 13:16:57.849041: step: 30/463, loss: 0.010369330644607544 2023-01-22 13:16:58.445058: step: 32/463, loss: 0.057855088263750076 2023-01-22 13:16:59.022280: step: 34/463, loss: 0.010785293765366077 2023-01-22 13:16:59.658761: step: 36/463, loss: 0.01930118165910244 2023-01-22 13:17:00.341065: step: 38/463, loss: 0.0038408352993428707 2023-01-22 13:17:00.935549: step: 40/463, loss: 0.008800194598734379 2023-01-22 13:17:01.502157: step: 42/463, loss: 0.0015701227821409702 2023-01-22 13:17:02.235090: step: 44/463, loss: 0.05451464280486107 2023-01-22 13:17:02.784679: step: 46/463, loss: 0.0008218620205298066 2023-01-22 13:17:03.319387: step: 48/463, loss: 0.002668748376891017 2023-01-22 13:17:03.887630: step: 50/463, loss: 0.0015461058355867863 2023-01-22 13:17:04.536308: step: 52/463, loss: 0.0013470182893797755 2023-01-22 13:17:05.141162: step: 54/463, loss: 0.00822441466152668 2023-01-22 13:17:05.781255: step: 56/463, loss: 0.0028365219477564096 2023-01-22 13:17:06.447030: step: 58/463, loss: 0.00044708087807521224 2023-01-22 13:17:07.069183: step: 60/463, loss: 0.010490639135241508 2023-01-22 13:17:07.649180: step: 62/463, loss: 0.0020631244406104088 2023-01-22 13:17:08.295801: step: 64/463, loss: 0.022615423426032066 2023-01-22 13:17:08.868897: step: 66/463, loss: 0.015612557530403137 2023-01-22 13:17:09.454642: step: 68/463, loss: 0.0027821483090519905 2023-01-22 13:17:10.009327: step: 70/463, loss: 0.013376089744269848 2023-01-22 13:17:10.598599: step: 72/463, loss: 0.021571798250079155 2023-01-22 13:17:11.148250: step: 74/463, loss: 0.0012288036523386836 2023-01-22 13:17:11.714460: step: 76/463, loss: 0.006507739424705505 2023-01-22 13:17:12.384540: step: 78/463, loss: 0.008792250417172909 2023-01-22 13:17:12.920930: step: 80/463, loss: 0.13713964819908142 2023-01-22 13:17:13.537721: step: 82/463, loss: 0.010746333748102188 2023-01-22 13:17:14.152725: step: 84/463, loss: 0.025266578420996666 2023-01-22 13:17:14.653886: step: 86/463, loss: 0.005215545650571585 2023-01-22 13:17:15.315932: step: 88/463, loss: 0.2075408399105072 2023-01-22 13:17:15.911528: step: 90/463, loss: 0.0064244866371154785 2023-01-22 13:17:16.606243: step: 92/463, loss: 0.035964082926511765 2023-01-22 13:17:17.214442: step: 94/463, loss: 0.002193576656281948 2023-01-22 13:17:17.782363: step: 96/463, loss: 0.03399713337421417 2023-01-22 13:17:18.413244: step: 98/463, loss: 0.06357075273990631 2023-01-22 13:17:18.954891: step: 100/463, loss: 0.00032459720387123525 2023-01-22 13:17:19.569442: step: 102/463, loss: 0.017216861248016357 2023-01-22 13:17:20.197978: step: 104/463, loss: 0.0002555028477218002 2023-01-22 13:17:20.812238: step: 106/463, loss: 0.0009637364419177175 2023-01-22 13:17:21.439971: step: 108/463, loss: 0.030441882088780403 2023-01-22 13:17:22.032628: step: 110/463, loss: 9.589681576471776e-05 2023-01-22 13:17:22.836599: step: 112/463, loss: 0.011581837199628353 2023-01-22 13:17:23.425181: step: 114/463, loss: 0.004575759172439575 2023-01-22 13:17:24.051084: step: 116/463, loss: 0.0029651059303432703 2023-01-22 13:17:24.620484: step: 118/463, loss: 0.003387508448213339 2023-01-22 13:17:25.304673: step: 120/463, loss: 0.013092845678329468 2023-01-22 13:17:25.953263: step: 122/463, loss: 0.03617576137185097 2023-01-22 13:17:26.590632: step: 124/463, loss: 0.0005085245938971639 2023-01-22 13:17:27.283128: step: 126/463, loss: 0.03192896023392677 2023-01-22 13:17:27.887894: step: 128/463, loss: 0.058753203600645065 2023-01-22 13:17:28.485135: step: 130/463, loss: 0.00109693908598274 2023-01-22 13:17:29.046937: step: 132/463, loss: 0.0025991476140916348 2023-01-22 13:17:29.666305: step: 134/463, loss: 0.006514101754873991 2023-01-22 13:17:30.333303: step: 136/463, loss: 0.014389179646968842 2023-01-22 13:17:30.948015: step: 138/463, loss: 0.029964737594127655 2023-01-22 13:17:31.568713: step: 140/463, loss: 0.010847127996385098 2023-01-22 13:17:32.161041: step: 142/463, loss: 4.811858889297582e-05 2023-01-22 13:17:32.745005: step: 144/463, loss: 0.0007950214785523713 2023-01-22 13:17:33.414623: step: 146/463, loss: 0.0016385347116738558 2023-01-22 13:17:34.001185: step: 148/463, loss: 0.00019960342615377158 2023-01-22 13:17:34.626289: step: 150/463, loss: 0.013654787093400955 2023-01-22 13:17:35.225427: step: 152/463, loss: 0.25668656826019287 2023-01-22 13:17:35.862941: step: 154/463, loss: 0.008343420922756195 2023-01-22 13:17:36.524702: step: 156/463, loss: 0.006150003056973219 2023-01-22 13:17:37.114922: step: 158/463, loss: 0.01569363847374916 2023-01-22 13:17:37.745374: step: 160/463, loss: 0.0003534180286806077 2023-01-22 13:17:38.340402: step: 162/463, loss: 0.01329941488802433 2023-01-22 13:17:38.972537: step: 164/463, loss: 0.04652642458677292 2023-01-22 13:17:39.527794: step: 166/463, loss: 0.0017869491130113602 2023-01-22 13:17:40.086926: step: 168/463, loss: 0.0011533185606822371 2023-01-22 13:17:40.680355: step: 170/463, loss: 0.001133265090174973 2023-01-22 13:17:41.287705: step: 172/463, loss: 0.0019344929605722427 2023-01-22 13:17:41.857842: step: 174/463, loss: 0.0038246966432780027 2023-01-22 13:17:42.525248: step: 176/463, loss: 0.00142365088686347 2023-01-22 13:17:43.142696: step: 178/463, loss: 0.006401105783879757 2023-01-22 13:17:43.736945: step: 180/463, loss: 0.04276920482516289 2023-01-22 13:17:44.347154: step: 182/463, loss: 0.008806237019598484 2023-01-22 13:17:44.998881: step: 184/463, loss: 0.0005914178327657282 2023-01-22 13:17:45.584901: step: 186/463, loss: 0.0068437280133366585 2023-01-22 13:17:46.190955: step: 188/463, loss: 0.0007700577261857688 2023-01-22 13:17:46.828292: step: 190/463, loss: 0.0026511407922953367 2023-01-22 13:17:47.457005: step: 192/463, loss: 0.007346590049564838 2023-01-22 13:17:48.067124: step: 194/463, loss: 0.01857823133468628 2023-01-22 13:17:48.647504: step: 196/463, loss: 0.004577023442834616 2023-01-22 13:17:49.288344: step: 198/463, loss: 0.0009512401884421706 2023-01-22 13:17:49.999054: step: 200/463, loss: 0.01460027415305376 2023-01-22 13:17:50.576025: step: 202/463, loss: 0.014259354211390018 2023-01-22 13:17:51.130121: step: 204/463, loss: 0.006603836547583342 2023-01-22 13:17:51.729571: step: 206/463, loss: 0.0009632774745114148 2023-01-22 13:17:52.332052: step: 208/463, loss: 0.20120222866535187 2023-01-22 13:17:52.938042: step: 210/463, loss: 0.053990691900253296 2023-01-22 13:17:53.570195: step: 212/463, loss: 0.0005672593251802027 2023-01-22 13:17:54.205639: step: 214/463, loss: 0.06538698822259903 2023-01-22 13:17:54.861813: step: 216/463, loss: 0.006212085485458374 2023-01-22 13:17:55.456532: step: 218/463, loss: 0.013693749904632568 2023-01-22 13:17:56.070360: step: 220/463, loss: 0.5488004088401794 2023-01-22 13:17:56.658968: step: 222/463, loss: 0.0010441800113767385 2023-01-22 13:17:57.250074: step: 224/463, loss: 0.0062617091462016106 2023-01-22 13:17:57.849792: step: 226/463, loss: 0.003725644899532199 2023-01-22 13:17:58.445310: step: 228/463, loss: 0.0015154535649344325 2023-01-22 13:17:59.004622: step: 230/463, loss: 0.006395978387445211 2023-01-22 13:17:59.630318: step: 232/463, loss: 0.0021662330254912376 2023-01-22 13:18:00.333592: step: 234/463, loss: 0.002501111011952162 2023-01-22 13:18:00.947614: step: 236/463, loss: 0.013906202279031277 2023-01-22 13:18:01.511896: step: 238/463, loss: 0.05204048380255699 2023-01-22 13:18:02.125830: step: 240/463, loss: 4.068772614118643e-05 2023-01-22 13:18:02.719444: step: 242/463, loss: 0.005519125144928694 2023-01-22 13:18:03.359077: step: 244/463, loss: 0.316930890083313 2023-01-22 13:18:03.915160: step: 246/463, loss: 0.004956568591296673 2023-01-22 13:18:04.523978: step: 248/463, loss: 0.0038417638279497623 2023-01-22 13:18:05.142461: step: 250/463, loss: 0.006842820439487696 2023-01-22 13:18:05.833319: step: 252/463, loss: 0.07526186853647232 2023-01-22 13:18:06.444807: step: 254/463, loss: 0.3961915969848633 2023-01-22 13:18:07.099619: step: 256/463, loss: 0.12554974853992462 2023-01-22 13:18:07.697831: step: 258/463, loss: 0.0163657795637846 2023-01-22 13:18:08.371291: step: 260/463, loss: 0.02212616056203842 2023-01-22 13:18:09.020115: step: 262/463, loss: 0.0003563922073226422 2023-01-22 13:18:09.614733: step: 264/463, loss: 0.09829600155353546 2023-01-22 13:18:10.234960: step: 266/463, loss: 0.08193520456552505 2023-01-22 13:18:10.835876: step: 268/463, loss: 0.018553458154201508 2023-01-22 13:18:11.447146: step: 270/463, loss: 0.006240359507501125 2023-01-22 13:18:11.991922: step: 272/463, loss: 0.006647813133895397 2023-01-22 13:18:12.510041: step: 274/463, loss: 0.0011024614796042442 2023-01-22 13:18:13.107048: step: 276/463, loss: 0.003351131919771433 2023-01-22 13:18:13.636742: step: 278/463, loss: 0.01782275177538395 2023-01-22 13:18:14.207766: step: 280/463, loss: 0.008393857628107071 2023-01-22 13:18:14.831469: step: 282/463, loss: 0.00037038218579255044 2023-01-22 13:18:15.497784: step: 284/463, loss: 0.008358017541468143 2023-01-22 13:18:16.103358: step: 286/463, loss: 0.08788734674453735 2023-01-22 13:18:16.705131: step: 288/463, loss: 0.006910989992320538 2023-01-22 13:18:17.345405: step: 290/463, loss: 0.022601911798119545 2023-01-22 13:18:17.943019: step: 292/463, loss: 0.003383188508450985 2023-01-22 13:18:18.538650: step: 294/463, loss: 0.005388925317674875 2023-01-22 13:18:19.112039: step: 296/463, loss: 0.010944236069917679 2023-01-22 13:18:19.771627: step: 298/463, loss: 0.0010097564663738012 2023-01-22 13:18:20.398157: step: 300/463, loss: 0.1455574780702591 2023-01-22 13:18:20.995098: step: 302/463, loss: 0.011903516948223114 2023-01-22 13:18:21.597910: step: 304/463, loss: 0.006360805593430996 2023-01-22 13:18:22.139741: step: 306/463, loss: 0.001116794184781611 2023-01-22 13:18:22.815024: step: 308/463, loss: 0.041386678814888 2023-01-22 13:18:23.437051: step: 310/463, loss: 0.002185546327382326 2023-01-22 13:18:24.094738: step: 312/463, loss: 0.032803475856781006 2023-01-22 13:18:24.663535: step: 314/463, loss: 0.00047534273471683264 2023-01-22 13:18:25.263375: step: 316/463, loss: 0.0035968010779470205 2023-01-22 13:18:25.928519: step: 318/463, loss: 0.0023719794116914272 2023-01-22 13:18:26.592490: step: 320/463, loss: 0.017731424421072006 2023-01-22 13:18:27.157828: step: 322/463, loss: 0.001555209862999618 2023-01-22 13:18:27.776032: step: 324/463, loss: 0.19333000481128693 2023-01-22 13:18:28.402009: step: 326/463, loss: 0.0023012254387140274 2023-01-22 13:18:29.018359: step: 328/463, loss: 0.8035910129547119 2023-01-22 13:18:29.586334: step: 330/463, loss: 0.020183803513646126 2023-01-22 13:18:30.225994: step: 332/463, loss: 0.0036327107809484005 2023-01-22 13:18:30.764831: step: 334/463, loss: 0.00017435017798561603 2023-01-22 13:18:31.426150: step: 336/463, loss: 0.020087696611881256 2023-01-22 13:18:32.005967: step: 338/463, loss: 0.004522798117250204 2023-01-22 13:18:32.564880: step: 340/463, loss: 0.0332925021648407 2023-01-22 13:18:33.181754: step: 342/463, loss: 0.0007199611281976104 2023-01-22 13:18:33.787978: step: 344/463, loss: 0.01485811360180378 2023-01-22 13:18:34.422068: step: 346/463, loss: 0.008959410712122917 2023-01-22 13:18:35.042246: step: 348/463, loss: 0.0005478111561387777 2023-01-22 13:18:35.634562: step: 350/463, loss: 0.0010155013296753168 2023-01-22 13:18:36.272026: step: 352/463, loss: 0.1405760496854782 2023-01-22 13:18:36.899778: step: 354/463, loss: 0.0029731285758316517 2023-01-22 13:18:37.497215: step: 356/463, loss: 0.0006213237647898495 2023-01-22 13:18:38.133998: step: 358/463, loss: 0.032526131719350815 2023-01-22 13:18:38.781604: step: 360/463, loss: 0.002632910618558526 2023-01-22 13:18:39.409601: step: 362/463, loss: 0.7473871111869812 2023-01-22 13:18:39.992438: step: 364/463, loss: 0.0013244155561551452 2023-01-22 13:18:40.641402: step: 366/463, loss: 0.011469478718936443 2023-01-22 13:18:41.339391: step: 368/463, loss: 0.014940064400434494 2023-01-22 13:18:41.857972: step: 370/463, loss: 0.012037776410579681 2023-01-22 13:18:42.479970: step: 372/463, loss: 0.006952646188437939 2023-01-22 13:18:43.059075: step: 374/463, loss: 0.00018836453091353178 2023-01-22 13:18:43.728602: step: 376/463, loss: 0.013855385594069958 2023-01-22 13:18:44.295813: step: 378/463, loss: 0.0077673266641795635 2023-01-22 13:18:44.955796: step: 380/463, loss: 0.004257932770997286 2023-01-22 13:18:45.594443: step: 382/463, loss: 0.0026532199699431658 2023-01-22 13:18:46.228696: step: 384/463, loss: 0.021064486354589462 2023-01-22 13:18:46.841773: step: 386/463, loss: 0.022865070030093193 2023-01-22 13:18:47.426413: step: 388/463, loss: 0.00334391207434237 2023-01-22 13:18:48.076308: step: 390/463, loss: 0.002264923881739378 2023-01-22 13:18:48.666337: step: 392/463, loss: 0.036474183201789856 2023-01-22 13:18:49.273376: step: 394/463, loss: 0.0005822824314236641 2023-01-22 13:18:49.834464: step: 396/463, loss: 0.008036978542804718 2023-01-22 13:18:50.408161: step: 398/463, loss: 0.0007134778425097466 2023-01-22 13:18:51.009411: step: 400/463, loss: 0.0007486168760806322 2023-01-22 13:18:51.600405: step: 402/463, loss: 0.15983781218528748 2023-01-22 13:18:52.180406: step: 404/463, loss: 0.0007222112035378814 2023-01-22 13:18:52.772313: step: 406/463, loss: 0.0030092766974121332 2023-01-22 13:18:53.442718: step: 408/463, loss: 0.0007156338542699814 2023-01-22 13:18:54.075005: step: 410/463, loss: 0.001254756934940815 2023-01-22 13:18:54.652009: step: 412/463, loss: 0.004104102496057749 2023-01-22 13:18:55.254315: step: 414/463, loss: 0.0383087657392025 2023-01-22 13:18:55.825628: step: 416/463, loss: 0.00833128485828638 2023-01-22 13:18:56.437451: step: 418/463, loss: 0.0017676475690677762 2023-01-22 13:18:57.074068: step: 420/463, loss: 0.0016550406580790877 2023-01-22 13:18:57.692478: step: 422/463, loss: 0.0021914620883762836 2023-01-22 13:18:58.359848: step: 424/463, loss: 0.0013504276284947991 2023-01-22 13:18:59.088820: step: 426/463, loss: 0.0062805647030472755 2023-01-22 13:18:59.640960: step: 428/463, loss: 0.03294497728347778 2023-01-22 13:19:00.289871: step: 430/463, loss: 0.03719580918550491 2023-01-22 13:19:00.936192: step: 432/463, loss: 0.07113736867904663 2023-01-22 13:19:01.490068: step: 434/463, loss: 0.006984752602875233 2023-01-22 13:19:02.114343: step: 436/463, loss: 0.009843391366302967 2023-01-22 13:19:02.731864: step: 438/463, loss: 0.00597524456679821 2023-01-22 13:19:03.375061: step: 440/463, loss: 0.07690596580505371 2023-01-22 13:19:04.055634: step: 442/463, loss: 0.005051633808761835 2023-01-22 13:19:04.621126: step: 444/463, loss: 0.001145934802480042 2023-01-22 13:19:05.244084: step: 446/463, loss: 0.018852099776268005 2023-01-22 13:19:05.805540: step: 448/463, loss: 0.00020774270524270833 2023-01-22 13:19:06.430592: step: 450/463, loss: 0.0006223213276825845 2023-01-22 13:19:06.975350: step: 452/463, loss: 0.01759417913854122 2023-01-22 13:19:07.601991: step: 454/463, loss: 0.013798830099403858 2023-01-22 13:19:08.255135: step: 456/463, loss: 0.046021368354558945 2023-01-22 13:19:08.910932: step: 458/463, loss: 0.03674422949552536 2023-01-22 13:19:09.523031: step: 460/463, loss: 0.009432390332221985 2023-01-22 13:19:10.075189: step: 462/463, loss: 0.012854049913585186 2023-01-22 13:19:10.708007: step: 464/463, loss: 0.0790523886680603 2023-01-22 13:19:11.328483: step: 466/463, loss: 0.023337973281741142 2023-01-22 13:19:11.954008: step: 468/463, loss: 0.021093090996146202 2023-01-22 13:19:12.550547: step: 470/463, loss: 0.026403164491057396 2023-01-22 13:19:13.095225: step: 472/463, loss: 0.0011704692151397467 2023-01-22 13:19:13.748016: step: 474/463, loss: 0.06086888164281845 2023-01-22 13:19:14.313252: step: 476/463, loss: 0.0010551728773862123 2023-01-22 13:19:14.915695: step: 478/463, loss: 0.0008148958440870047 2023-01-22 13:19:15.502237: step: 480/463, loss: 0.009969083592295647 2023-01-22 13:19:16.058487: step: 482/463, loss: 0.01283843070268631 2023-01-22 13:19:16.739528: step: 484/463, loss: 0.0034997870679944754 2023-01-22 13:19:17.332332: step: 486/463, loss: 0.00028669112361967564 2023-01-22 13:19:17.941420: step: 488/463, loss: 0.09110712260007858 2023-01-22 13:19:18.571999: step: 490/463, loss: 0.009743470698595047 2023-01-22 13:19:19.261896: step: 492/463, loss: 0.003664525458589196 2023-01-22 13:19:19.848711: step: 494/463, loss: 0.004117617383599281 2023-01-22 13:19:20.494439: step: 496/463, loss: 0.019161734730005264 2023-01-22 13:19:21.111027: step: 498/463, loss: 0.0022390701342374086 2023-01-22 13:19:21.702767: step: 500/463, loss: 0.03654737398028374 2023-01-22 13:19:22.342771: step: 502/463, loss: 0.04710760712623596 2023-01-22 13:19:22.909590: step: 504/463, loss: 0.00016607699217274785 2023-01-22 13:19:23.525353: step: 506/463, loss: 0.26908519864082336 2023-01-22 13:19:24.128543: step: 508/463, loss: 0.06100417673587799 2023-01-22 13:19:24.706130: step: 510/463, loss: 0.06180068850517273 2023-01-22 13:19:25.322723: step: 512/463, loss: 0.01284338440746069 2023-01-22 13:19:25.876768: step: 514/463, loss: 0.004898529965430498 2023-01-22 13:19:26.379897: step: 516/463, loss: 0.004370218142867088 2023-01-22 13:19:27.028354: step: 518/463, loss: 0.011034929193556309 2023-01-22 13:19:27.652405: step: 520/463, loss: 0.007362432312220335 2023-01-22 13:19:28.261090: step: 522/463, loss: 0.025243492797017097 2023-01-22 13:19:28.819187: step: 524/463, loss: 0.029760627076029778 2023-01-22 13:19:29.369973: step: 526/463, loss: 0.013672089204192162 2023-01-22 13:19:29.933467: step: 528/463, loss: 0.026861751452088356 2023-01-22 13:19:30.541514: step: 530/463, loss: 0.0014564846642315388 2023-01-22 13:19:31.192151: step: 532/463, loss: 0.0023695307318121195 2023-01-22 13:19:31.776967: step: 534/463, loss: 0.010567440651357174 2023-01-22 13:19:32.376064: step: 536/463, loss: 0.03477469086647034 2023-01-22 13:19:32.972313: step: 538/463, loss: 0.014735058881342411 2023-01-22 13:19:33.658722: step: 540/463, loss: 0.0010662655113264918 2023-01-22 13:19:34.279257: step: 542/463, loss: 0.023383846506476402 2023-01-22 13:19:34.944391: step: 544/463, loss: 0.02319318614900112 2023-01-22 13:19:35.553598: step: 546/463, loss: 0.07221105694770813 2023-01-22 13:19:36.136433: step: 548/463, loss: 0.08004879206418991 2023-01-22 13:19:36.756693: step: 550/463, loss: 0.03057803213596344 2023-01-22 13:19:37.430609: step: 552/463, loss: 0.005712749902158976 2023-01-22 13:19:38.044646: step: 554/463, loss: 0.008013799786567688 2023-01-22 13:19:38.688323: step: 556/463, loss: 0.013723742216825485 2023-01-22 13:19:39.282119: step: 558/463, loss: 0.004704783204942942 2023-01-22 13:19:39.850700: step: 560/463, loss: 0.012912776321172714 2023-01-22 13:19:40.373973: step: 562/463, loss: 0.005185188725590706 2023-01-22 13:19:40.963777: step: 564/463, loss: 0.0004636362427845597 2023-01-22 13:19:41.554944: step: 566/463, loss: 0.028054652735590935 2023-01-22 13:19:42.099101: step: 568/463, loss: 0.030527614057064056 2023-01-22 13:19:42.732040: step: 570/463, loss: 0.002444436540827155 2023-01-22 13:19:43.280143: step: 572/463, loss: 0.004083929117769003 2023-01-22 13:19:43.857481: step: 574/463, loss: 0.01851549558341503 2023-01-22 13:19:44.446506: step: 576/463, loss: 0.005885877180844545 2023-01-22 13:19:45.114418: step: 578/463, loss: 0.03820318728685379 2023-01-22 13:19:45.743720: step: 580/463, loss: 0.0353037491440773 2023-01-22 13:19:46.344101: step: 582/463, loss: 0.0684768557548523 2023-01-22 13:19:46.884966: step: 584/463, loss: 0.006563975475728512 2023-01-22 13:19:47.515375: step: 586/463, loss: 0.04160820692777634 2023-01-22 13:19:48.030062: step: 588/463, loss: 0.007843821309506893 2023-01-22 13:19:48.552145: step: 590/463, loss: 0.0005108764162287116 2023-01-22 13:19:49.175175: step: 592/463, loss: 0.15797743201255798 2023-01-22 13:19:49.757809: step: 594/463, loss: 0.006693197879940271 2023-01-22 13:19:50.343187: step: 596/463, loss: 0.062018558382987976 2023-01-22 13:19:50.947556: step: 598/463, loss: 0.010066436603665352 2023-01-22 13:19:51.600283: step: 600/463, loss: 0.048210203647613525 2023-01-22 13:19:52.185704: step: 602/463, loss: 0.0025097534526139498 2023-01-22 13:19:52.800877: step: 604/463, loss: 0.02489396557211876 2023-01-22 13:19:53.396465: step: 606/463, loss: 0.0004875862505286932 2023-01-22 13:19:53.973597: step: 608/463, loss: 0.012104896828532219 2023-01-22 13:19:54.602246: step: 610/463, loss: 0.03129550814628601 2023-01-22 13:19:55.153185: step: 612/463, loss: 0.0008676177822053432 2023-01-22 13:19:55.685414: step: 614/463, loss: 0.0003353909414727241 2023-01-22 13:19:56.353212: step: 616/463, loss: 0.025637736544013023 2023-01-22 13:19:56.893771: step: 618/463, loss: 0.0005053762579336762 2023-01-22 13:19:57.491513: step: 620/463, loss: 0.01377609558403492 2023-01-22 13:19:58.110061: step: 622/463, loss: 0.0078073893673717976 2023-01-22 13:19:58.760310: step: 624/463, loss: 0.008490349166095257 2023-01-22 13:19:59.308804: step: 626/463, loss: 0.0005791570292785764 2023-01-22 13:19:59.891801: step: 628/463, loss: 0.009469408541917801 2023-01-22 13:20:00.498988: step: 630/463, loss: 0.005361108109354973 2023-01-22 13:20:01.078739: step: 632/463, loss: 0.0005024028359912336 2023-01-22 13:20:01.695734: step: 634/463, loss: 0.016048002988100052 2023-01-22 13:20:02.228512: step: 636/463, loss: 0.0012524072080850601 2023-01-22 13:20:02.791702: step: 638/463, loss: 0.0008066810551099479 2023-01-22 13:20:03.392169: step: 640/463, loss: 0.0015481916489079595 2023-01-22 13:20:03.988044: step: 642/463, loss: 0.021330559626221657 2023-01-22 13:20:04.594235: step: 644/463, loss: 0.010048109106719494 2023-01-22 13:20:05.198619: step: 646/463, loss: 0.4652419090270996 2023-01-22 13:20:05.855413: step: 648/463, loss: 0.0006253863102756441 2023-01-22 13:20:06.436141: step: 650/463, loss: 0.009285411797463894 2023-01-22 13:20:07.102118: step: 652/463, loss: 0.03028927370905876 2023-01-22 13:20:07.759880: step: 654/463, loss: 0.014444654807448387 2023-01-22 13:20:08.386545: step: 656/463, loss: 0.0228570569306612 2023-01-22 13:20:09.046636: step: 658/463, loss: 0.021325791254639626 2023-01-22 13:20:09.652616: step: 660/463, loss: 0.005649307277053595 2023-01-22 13:20:10.309600: step: 662/463, loss: 0.1185615211725235 2023-01-22 13:20:10.886156: step: 664/463, loss: 0.001825980027206242 2023-01-22 13:20:11.466324: step: 666/463, loss: 0.0521569699048996 2023-01-22 13:20:12.003343: step: 668/463, loss: 0.005966990254819393 2023-01-22 13:20:12.584502: step: 670/463, loss: 0.006369201000779867 2023-01-22 13:20:13.192311: step: 672/463, loss: 0.010017817839980125 2023-01-22 13:20:13.854630: step: 674/463, loss: 0.029045892879366875 2023-01-22 13:20:14.443932: step: 676/463, loss: 0.00931037962436676 2023-01-22 13:20:15.067407: step: 678/463, loss: 0.004967473912984133 2023-01-22 13:20:15.647722: step: 680/463, loss: 0.01018843986093998 2023-01-22 13:20:16.193428: step: 682/463, loss: 0.0005618171999230981 2023-01-22 13:20:16.879182: step: 684/463, loss: 0.02629307471215725 2023-01-22 13:20:17.432984: step: 686/463, loss: 0.0011716255685314536 2023-01-22 13:20:18.061642: step: 688/463, loss: 4.1036284528672695e-05 2023-01-22 13:20:18.779179: step: 690/463, loss: 0.05460178479552269 2023-01-22 13:20:19.341834: step: 692/463, loss: 0.0008286880329251289 2023-01-22 13:20:19.967980: step: 694/463, loss: 0.007452541496604681 2023-01-22 13:20:20.634524: step: 696/463, loss: 0.019342167302966118 2023-01-22 13:20:21.226993: step: 698/463, loss: 0.0008071481715887785 2023-01-22 13:20:21.826952: step: 700/463, loss: 0.000634913332760334 2023-01-22 13:20:22.401855: step: 702/463, loss: 0.0025818785652518272 2023-01-22 13:20:23.003159: step: 704/463, loss: 0.0005449080490507185 2023-01-22 13:20:23.602529: step: 706/463, loss: 0.0052023292519152164 2023-01-22 13:20:24.210485: step: 708/463, loss: 0.0008114945376291871 2023-01-22 13:20:24.876740: step: 710/463, loss: 0.007314126007258892 2023-01-22 13:20:25.483875: step: 712/463, loss: 0.0020018103532493114 2023-01-22 13:20:26.057814: step: 714/463, loss: 0.02496105618774891 2023-01-22 13:20:26.705809: step: 716/463, loss: 0.06659258902072906 2023-01-22 13:20:27.332425: step: 718/463, loss: 0.04558748006820679 2023-01-22 13:20:27.936114: step: 720/463, loss: 0.009435946121811867 2023-01-22 13:20:28.509747: step: 722/463, loss: 0.009029042907059193 2023-01-22 13:20:29.142922: step: 724/463, loss: 0.0017209950601682067 2023-01-22 13:20:29.776361: step: 726/463, loss: 0.021001620218157768 2023-01-22 13:20:30.390413: step: 728/463, loss: 0.008627827279269695 2023-01-22 13:20:30.994374: step: 730/463, loss: 0.006505152210593224 2023-01-22 13:20:31.603187: step: 732/463, loss: 0.005757859442383051 2023-01-22 13:20:32.206712: step: 734/463, loss: 0.04773740842938423 2023-01-22 13:20:32.819883: step: 736/463, loss: 0.011440160684287548 2023-01-22 13:20:33.459096: step: 738/463, loss: 0.004779912065714598 2023-01-22 13:20:34.024305: step: 740/463, loss: 0.045252975076436996 2023-01-22 13:20:34.645983: step: 742/463, loss: 0.07623853534460068 2023-01-22 13:20:35.207343: step: 744/463, loss: 0.008411399088799953 2023-01-22 13:20:35.820815: step: 746/463, loss: 0.0043028355576097965 2023-01-22 13:20:36.408897: step: 748/463, loss: 0.005836684722453356 2023-01-22 13:20:37.048546: step: 750/463, loss: 0.005340093281120062 2023-01-22 13:20:37.679570: step: 752/463, loss: 0.0005660206661559641 2023-01-22 13:20:38.257061: step: 754/463, loss: 0.008960151113569736 2023-01-22 13:20:38.861900: step: 756/463, loss: 0.0411030612885952 2023-01-22 13:20:39.437849: step: 758/463, loss: 0.0014925183495506644 2023-01-22 13:20:39.982687: step: 760/463, loss: 0.0026543105486780405 2023-01-22 13:20:40.609182: step: 762/463, loss: 0.0003035521076526493 2023-01-22 13:20:41.216680: step: 764/463, loss: 0.021044567227363586 2023-01-22 13:20:41.864483: step: 766/463, loss: 0.011688812635838985 2023-01-22 13:20:42.509723: step: 768/463, loss: 0.10110897570848465 2023-01-22 13:20:43.113750: step: 770/463, loss: 0.04459425061941147 2023-01-22 13:20:43.673741: step: 772/463, loss: 7.603244011988863e-05 2023-01-22 13:20:44.281612: step: 774/463, loss: 0.019078094512224197 2023-01-22 13:20:44.890114: step: 776/463, loss: 0.002768618753179908 2023-01-22 13:20:45.515873: step: 778/463, loss: 0.004745963960886002 2023-01-22 13:20:46.075119: step: 780/463, loss: 0.00015188485849648714 2023-01-22 13:20:46.707485: step: 782/463, loss: 0.012416007928550243 2023-01-22 13:20:47.299952: step: 784/463, loss: 0.0016538889613002539 2023-01-22 13:20:47.846292: step: 786/463, loss: 0.36856821179389954 2023-01-22 13:20:48.446740: step: 788/463, loss: 0.00014135270612314343 2023-01-22 13:20:49.041662: step: 790/463, loss: 0.019952071830630302 2023-01-22 13:20:49.663936: step: 792/463, loss: 0.0041428254917263985 2023-01-22 13:20:50.220920: step: 794/463, loss: 0.031157664954662323 2023-01-22 13:20:50.823130: step: 796/463, loss: 0.0029931424651294947 2023-01-22 13:20:51.458923: step: 798/463, loss: 0.060080792754888535 2023-01-22 13:20:52.075832: step: 800/463, loss: 0.00038428761763498187 2023-01-22 13:20:52.678972: step: 802/463, loss: 0.0035635759122669697 2023-01-22 13:20:53.395015: step: 804/463, loss: 0.0027083682361990213 2023-01-22 13:20:54.016052: step: 806/463, loss: 0.002922641346231103 2023-01-22 13:20:54.605000: step: 808/463, loss: 0.0036797085776925087 2023-01-22 13:20:55.216603: step: 810/463, loss: 0.0494624488055706 2023-01-22 13:20:55.830776: step: 812/463, loss: 0.02796754613518715 2023-01-22 13:20:56.375843: step: 814/463, loss: 0.1730279177427292 2023-01-22 13:20:56.966520: step: 816/463, loss: 0.010082573629915714 2023-01-22 13:20:57.572526: step: 818/463, loss: 0.09070724248886108 2023-01-22 13:20:58.175070: step: 820/463, loss: 0.00033572097891010344 2023-01-22 13:20:58.846094: step: 822/463, loss: 0.013764219358563423 2023-01-22 13:20:59.455217: step: 824/463, loss: 0.01487722061574459 2023-01-22 13:21:00.026194: step: 826/463, loss: 0.0014253434492275119 2023-01-22 13:21:00.575216: step: 828/463, loss: 0.006183678284287453 2023-01-22 13:21:01.204691: step: 830/463, loss: 0.033008165657520294 2023-01-22 13:21:01.866689: step: 832/463, loss: 0.0062687755562365055 2023-01-22 13:21:02.482557: step: 834/463, loss: 0.0012203037040308118 2023-01-22 13:21:03.029967: step: 836/463, loss: 0.0011219758307561278 2023-01-22 13:21:03.612717: step: 838/463, loss: 0.0019976417534053326 2023-01-22 13:21:04.223623: step: 840/463, loss: 0.01087773498147726 2023-01-22 13:21:04.830258: step: 842/463, loss: 0.0008014105260372162 2023-01-22 13:21:05.375972: step: 844/463, loss: 0.009464616887271404 2023-01-22 13:21:05.930858: step: 846/463, loss: 0.10054533183574677 2023-01-22 13:21:06.527640: step: 848/463, loss: 0.007601532619446516 2023-01-22 13:21:07.129168: step: 850/463, loss: 0.010330067947506905 2023-01-22 13:21:07.784003: step: 852/463, loss: 0.03854396939277649 2023-01-22 13:21:08.394107: step: 854/463, loss: 0.005770996678620577 2023-01-22 13:21:09.011985: step: 856/463, loss: 0.005867260042577982 2023-01-22 13:21:09.602358: step: 858/463, loss: 6.179288175189868e-05 2023-01-22 13:21:10.207218: step: 860/463, loss: 0.10213794559240341 2023-01-22 13:21:10.815202: step: 862/463, loss: 0.5764427185058594 2023-01-22 13:21:11.435529: step: 864/463, loss: 0.0631270632147789 2023-01-22 13:21:12.019624: step: 866/463, loss: 0.00036338967038318515 2023-01-22 13:21:12.597827: step: 868/463, loss: 0.00947169866412878 2023-01-22 13:21:13.266032: step: 870/463, loss: 0.0015089331427589059 2023-01-22 13:21:13.861309: step: 872/463, loss: 0.07484561204910278 2023-01-22 13:21:14.419283: step: 874/463, loss: 0.2011224776506424 2023-01-22 13:21:15.011762: step: 876/463, loss: 0.026650454849004745 2023-01-22 13:21:15.611267: step: 878/463, loss: 0.01840878464281559 2023-01-22 13:21:16.264676: step: 880/463, loss: 0.00026987181627191603 2023-01-22 13:21:16.883678: step: 882/463, loss: 0.011966696009039879 2023-01-22 13:21:17.546866: step: 884/463, loss: 0.0032956497743725777 2023-01-22 13:21:18.124256: step: 886/463, loss: 1.5728992366348393e-05 2023-01-22 13:21:18.737204: step: 888/463, loss: 0.043862234801054 2023-01-22 13:21:19.372540: step: 890/463, loss: 0.009606434032320976 2023-01-22 13:21:20.002295: step: 892/463, loss: 0.004375309217721224 2023-01-22 13:21:20.597917: step: 894/463, loss: 0.00282529485411942 2023-01-22 13:21:21.260906: step: 896/463, loss: 0.009064910933375359 2023-01-22 13:21:21.888715: step: 898/463, loss: 0.007233513984829187 2023-01-22 13:21:22.542297: step: 900/463, loss: 0.017115803435444832 2023-01-22 13:21:23.152212: step: 902/463, loss: 0.008231788873672485 2023-01-22 13:21:23.738894: step: 904/463, loss: 0.027875155210494995 2023-01-22 13:21:24.445271: step: 906/463, loss: 0.00670216279104352 2023-01-22 13:21:25.022525: step: 908/463, loss: 0.8182143568992615 2023-01-22 13:21:25.667965: step: 910/463, loss: 0.021769888699054718 2023-01-22 13:21:26.393307: step: 912/463, loss: 0.021472467109560966 2023-01-22 13:21:27.083567: step: 914/463, loss: 0.01789284497499466 2023-01-22 13:21:27.703703: step: 916/463, loss: 0.014903604052960873 2023-01-22 13:21:28.272428: step: 918/463, loss: 0.019243378192186356 2023-01-22 13:21:28.919199: step: 920/463, loss: 0.0014249717351049185 2023-01-22 13:21:29.538144: step: 922/463, loss: 0.021726321429014206 2023-01-22 13:21:30.113010: step: 924/463, loss: 0.08095753192901611 2023-01-22 13:21:30.773389: step: 926/463, loss: 0.006877733860164881 ================================================== Loss: 0.035 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33639990328820113, 'r': 0.3300166034155598, 'f1': 0.33317768199233716}, 'combined': 0.24549934462593262, 'epoch': 34} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.353726020636348, 'r': 0.4005426998382176, 'f1': 0.37568142881377653}, 'combined': 0.29119804530063065, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2964682866556837, 'r': 0.3414729601518027, 'f1': 0.31738315696649033}, 'combined': 0.233861273554256, 'epoch': 34} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33589199922651947, 'r': 0.4078247527373458, 'f1': 0.36837968532854487}, 'combined': 0.2855383206852836, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2974446585740445, 'r': 0.3352602034022437, 'f1': 0.3152223500320828}, 'combined': 0.23226910002363996, 'epoch': 34} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3372252173418279, 'r': 0.3951858015724546, 'f1': 0.36391210504513805}, 'combined': 0.2820754115660879, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2558139534883721, 'r': 0.3142857142857143, 'f1': 0.28205128205128205}, 'combined': 0.18803418803418803, 'epoch': 34} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.30405405405405406, 'r': 0.4891304347826087, 'f1': 0.375}, 'combined': 0.1875, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3815789473684211, 'r': 0.25, 'f1': 0.30208333333333337}, 'combined': 0.2013888888888889, 'epoch': 34} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:24:07.537147: step: 2/463, loss: 0.024554675444960594 2023-01-22 13:24:08.108668: step: 4/463, loss: 0.000228421893552877 2023-01-22 13:24:08.771790: step: 6/463, loss: 0.006230027414858341 2023-01-22 13:24:09.389447: step: 8/463, loss: 0.0010267156176269054 2023-01-22 13:24:09.976906: step: 10/463, loss: 0.006440572906285524 2023-01-22 13:24:10.547815: step: 12/463, loss: 0.00042961008148267865 2023-01-22 13:24:11.201696: step: 14/463, loss: 0.006021533627063036 2023-01-22 13:24:11.820099: step: 16/463, loss: 0.00038502324605360627 2023-01-22 13:24:12.459222: step: 18/463, loss: 0.0016179465455934405 2023-01-22 13:24:13.016917: step: 20/463, loss: 0.012197596952319145 2023-01-22 13:24:13.609835: step: 22/463, loss: 0.0105760982260108 2023-01-22 13:24:14.319337: step: 24/463, loss: 0.031051844358444214 2023-01-22 13:24:14.929402: step: 26/463, loss: 0.04035607725381851 2023-01-22 13:24:15.500850: step: 28/463, loss: 0.0010999601799994707 2023-01-22 13:24:16.152614: step: 30/463, loss: 0.022077837958931923 2023-01-22 13:24:16.841128: step: 32/463, loss: 1.580573320388794 2023-01-22 13:24:17.435417: step: 34/463, loss: 0.0020611651707440615 2023-01-22 13:24:18.070818: step: 36/463, loss: 0.1768808364868164 2023-01-22 13:24:18.734372: step: 38/463, loss: 0.004586328286677599 2023-01-22 13:24:19.377207: step: 40/463, loss: 0.01226328406482935 2023-01-22 13:24:19.980705: step: 42/463, loss: 0.014630187302827835 2023-01-22 13:24:20.578446: step: 44/463, loss: 0.005935985129326582 2023-01-22 13:24:21.157558: step: 46/463, loss: 3.65638334187679e-05 2023-01-22 13:24:21.809018: step: 48/463, loss: 0.0007069005514495075 2023-01-22 13:24:22.434449: step: 50/463, loss: 0.006781348492950201 2023-01-22 13:24:23.029633: step: 52/463, loss: 0.03647290915250778 2023-01-22 13:24:23.662807: step: 54/463, loss: 0.08398403227329254 2023-01-22 13:24:24.168672: step: 56/463, loss: 0.003984568174928427 2023-01-22 13:24:24.702620: step: 58/463, loss: 0.06392472982406616 2023-01-22 13:24:25.294668: step: 60/463, loss: 0.04403117671608925 2023-01-22 13:24:25.899722: step: 62/463, loss: 0.022905055433511734 2023-01-22 13:24:26.493499: step: 64/463, loss: 0.02600308135151863 2023-01-22 13:24:27.027044: step: 66/463, loss: 0.00486516859382391 2023-01-22 13:24:27.638423: step: 68/463, loss: 0.005585775710642338 2023-01-22 13:24:28.160679: step: 70/463, loss: 0.014749941416084766 2023-01-22 13:24:28.786969: step: 72/463, loss: 0.006160380784422159 2023-01-22 13:24:29.347829: step: 74/463, loss: 0.11902791261672974 2023-01-22 13:24:29.957327: step: 76/463, loss: 0.0756763368844986 2023-01-22 13:24:30.610946: step: 78/463, loss: 0.00040072196861729026 2023-01-22 13:24:31.242242: step: 80/463, loss: 0.011726079508662224 2023-01-22 13:24:31.952082: step: 82/463, loss: 0.00554349971935153 2023-01-22 13:24:32.539728: step: 84/463, loss: 0.0021053783129900694 2023-01-22 13:24:33.180017: step: 86/463, loss: 0.02026268094778061 2023-01-22 13:24:33.838578: step: 88/463, loss: 0.04527315869927406 2023-01-22 13:24:34.420164: step: 90/463, loss: 0.0014820119831711054 2023-01-22 13:24:35.028111: step: 92/463, loss: 0.013923609629273415 2023-01-22 13:24:35.674843: step: 94/463, loss: 0.0008256182773038745 2023-01-22 13:24:36.336493: step: 96/463, loss: 0.03240247815847397 2023-01-22 13:24:36.942607: step: 98/463, loss: 0.009753512218594551 2023-01-22 13:24:37.543815: step: 100/463, loss: 0.0011146693723276258 2023-01-22 13:24:38.118913: step: 102/463, loss: 0.009076748043298721 2023-01-22 13:24:38.772407: step: 104/463, loss: 0.005524345673620701 2023-01-22 13:24:39.354980: step: 106/463, loss: 0.002493618056178093 2023-01-22 13:24:39.934500: step: 108/463, loss: 0.019862966611981392 2023-01-22 13:24:40.559273: step: 110/463, loss: 0.014839885756373405 2023-01-22 13:24:41.107745: step: 112/463, loss: 0.007156794890761375 2023-01-22 13:24:41.746207: step: 114/463, loss: 0.004859243985265493 2023-01-22 13:24:42.357173: step: 116/463, loss: 0.0010114931501448154 2023-01-22 13:24:42.909178: step: 118/463, loss: 0.00018777334480546415 2023-01-22 13:24:43.523718: step: 120/463, loss: 0.00024580300669185817 2023-01-22 13:24:44.161569: step: 122/463, loss: 0.0009292989270761609 2023-01-22 13:24:44.751965: step: 124/463, loss: 0.00553923798725009 2023-01-22 13:24:45.345156: step: 126/463, loss: 0.018627328798174858 2023-01-22 13:24:45.944673: step: 128/463, loss: 0.02359108440577984 2023-01-22 13:24:46.477924: step: 130/463, loss: 0.0004572428879328072 2023-01-22 13:24:47.095881: step: 132/463, loss: 0.00232084677554667 2023-01-22 13:24:47.660289: step: 134/463, loss: 0.01168471947312355 2023-01-22 13:24:48.236878: step: 136/463, loss: 0.03794407844543457 2023-01-22 13:24:48.890944: step: 138/463, loss: 0.005098619032651186 2023-01-22 13:24:49.481490: step: 140/463, loss: 0.03334835544228554 2023-01-22 13:24:50.073445: step: 142/463, loss: 0.01427743211388588 2023-01-22 13:24:50.663752: step: 144/463, loss: 0.031748294830322266 2023-01-22 13:24:51.287806: step: 146/463, loss: 0.0023708692751824856 2023-01-22 13:24:51.926097: step: 148/463, loss: 0.03166327625513077 2023-01-22 13:24:52.538162: step: 150/463, loss: 0.0005548236658796668 2023-01-22 13:24:53.174403: step: 152/463, loss: 6.9699868618045e-05 2023-01-22 13:24:53.760100: step: 154/463, loss: 0.05512380972504616 2023-01-22 13:24:54.316928: step: 156/463, loss: 0.00031485213548876345 2023-01-22 13:24:54.923274: step: 158/463, loss: 0.9138661623001099 2023-01-22 13:24:55.531674: step: 160/463, loss: 0.02591072954237461 2023-01-22 13:24:56.234065: step: 162/463, loss: 0.0016724716406315565 2023-01-22 13:24:56.794944: step: 164/463, loss: 0.005577743519097567 2023-01-22 13:24:57.386869: step: 166/463, loss: 0.010732623748481274 2023-01-22 13:24:58.000706: step: 168/463, loss: 0.39056822657585144 2023-01-22 13:24:58.632063: step: 170/463, loss: 0.006436503026634455 2023-01-22 13:24:59.217978: step: 172/463, loss: 0.025078184902668 2023-01-22 13:24:59.781478: step: 174/463, loss: 0.005097771529108286 2023-01-22 13:25:00.375471: step: 176/463, loss: 0.00323680997826159 2023-01-22 13:25:00.968199: step: 178/463, loss: 0.005976242944598198 2023-01-22 13:25:01.567710: step: 180/463, loss: 0.003433573991060257 2023-01-22 13:25:02.152335: step: 182/463, loss: 0.008844452910125256 2023-01-22 13:25:02.731254: step: 184/463, loss: 0.03760220482945442 2023-01-22 13:25:03.301863: step: 186/463, loss: 0.008496008813381195 2023-01-22 13:25:03.973845: step: 188/463, loss: 0.049681950360536575 2023-01-22 13:25:04.543204: step: 190/463, loss: 0.12096449732780457 2023-01-22 13:25:05.104672: step: 192/463, loss: 0.003335256827995181 2023-01-22 13:25:05.740967: step: 194/463, loss: 0.004286882467567921 2023-01-22 13:25:06.373154: step: 196/463, loss: 0.009320305660367012 2023-01-22 13:25:07.079587: step: 198/463, loss: 0.01455577090382576 2023-01-22 13:25:07.645713: step: 200/463, loss: 0.026991579681634903 2023-01-22 13:25:08.324040: step: 202/463, loss: 0.05739283189177513 2023-01-22 13:25:08.898835: step: 204/463, loss: 0.0010014408035203815 2023-01-22 13:25:09.434449: step: 206/463, loss: 6.723425030941144e-05 2023-01-22 13:25:10.016250: step: 208/463, loss: 0.0018492756644263864 2023-01-22 13:25:10.639673: step: 210/463, loss: 0.054891977459192276 2023-01-22 13:25:11.271340: step: 212/463, loss: 0.024136517196893692 2023-01-22 13:25:11.810012: step: 214/463, loss: 0.00943446159362793 2023-01-22 13:25:12.449498: step: 216/463, loss: 0.0013909173430874944 2023-01-22 13:25:13.032079: step: 218/463, loss: 0.0015011630021035671 2023-01-22 13:25:13.637635: step: 220/463, loss: 0.006773713510483503 2023-01-22 13:25:14.211085: step: 222/463, loss: 0.007433029823005199 2023-01-22 13:25:14.786849: step: 224/463, loss: 0.016462130472064018 2023-01-22 13:25:15.371875: step: 226/463, loss: 0.021360553801059723 2023-01-22 13:25:16.012549: step: 228/463, loss: 0.02648688107728958 2023-01-22 13:25:16.648549: step: 230/463, loss: 0.007667865138500929 2023-01-22 13:25:17.278688: step: 232/463, loss: 0.008582384325563908 2023-01-22 13:25:17.891715: step: 234/463, loss: 0.0007704205927439034 2023-01-22 13:25:18.463997: step: 236/463, loss: 0.018951408565044403 2023-01-22 13:25:19.079723: step: 238/463, loss: 0.012252001091837883 2023-01-22 13:25:19.661950: step: 240/463, loss: 0.008671257644891739 2023-01-22 13:25:20.213796: step: 242/463, loss: 0.01242708321660757 2023-01-22 13:25:20.798180: step: 244/463, loss: 0.062470290809869766 2023-01-22 13:25:21.374502: step: 246/463, loss: 9.429272722627502e-06 2023-01-22 13:25:22.003072: step: 248/463, loss: 0.05481576547026634 2023-01-22 13:25:22.602315: step: 250/463, loss: 0.001671632518991828 2023-01-22 13:25:23.280372: step: 252/463, loss: 0.015048501081764698 2023-01-22 13:25:23.862508: step: 254/463, loss: 0.015687121078372 2023-01-22 13:25:24.444092: step: 256/463, loss: 0.009678141213953495 2023-01-22 13:25:25.094071: step: 258/463, loss: 0.03176980838179588 2023-01-22 13:25:25.732693: step: 260/463, loss: 0.0023526581935584545 2023-01-22 13:25:26.345273: step: 262/463, loss: 0.0009741898975335062 2023-01-22 13:25:26.975189: step: 264/463, loss: 0.001743873581290245 2023-01-22 13:25:27.601195: step: 266/463, loss: 0.016783457249403 2023-01-22 13:25:28.228104: step: 268/463, loss: 0.01479868683964014 2023-01-22 13:25:28.789640: step: 270/463, loss: 0.011904990300536156 2023-01-22 13:25:29.463474: step: 272/463, loss: 0.016465744003653526 2023-01-22 13:25:30.109089: step: 274/463, loss: 0.0030503838788717985 2023-01-22 13:25:30.755142: step: 276/463, loss: 0.0041530681774020195 2023-01-22 13:25:31.355385: step: 278/463, loss: 0.030611081048846245 2023-01-22 13:25:31.957862: step: 280/463, loss: 0.004559692461043596 2023-01-22 13:25:32.583131: step: 282/463, loss: 0.0004207395249977708 2023-01-22 13:25:33.160187: step: 284/463, loss: 0.014515679329633713 2023-01-22 13:25:33.768154: step: 286/463, loss: 0.1311478465795517 2023-01-22 13:25:34.384383: step: 288/463, loss: 0.0012332580517977476 2023-01-22 13:25:34.999125: step: 290/463, loss: 0.008443433791399002 2023-01-22 13:25:35.678858: step: 292/463, loss: 0.0008047773153521121 2023-01-22 13:25:36.295930: step: 294/463, loss: 0.006280484143644571 2023-01-22 13:25:36.868387: step: 296/463, loss: 0.0015236276667565107 2023-01-22 13:25:37.461637: step: 298/463, loss: 0.0010327985510230064 2023-01-22 13:25:38.040308: step: 300/463, loss: 0.01540662907063961 2023-01-22 13:25:38.625121: step: 302/463, loss: 0.01582682505249977 2023-01-22 13:25:39.228188: step: 304/463, loss: 0.008314600214362144 2023-01-22 13:25:39.812555: step: 306/463, loss: 0.00023417093325406313 2023-01-22 13:25:40.391843: step: 308/463, loss: 0.009640461765229702 2023-01-22 13:25:41.009978: step: 310/463, loss: 0.02170552685856819 2023-01-22 13:25:41.615082: step: 312/463, loss: 9.597272872924805 2023-01-22 13:25:42.246472: step: 314/463, loss: 0.0086945416405797 2023-01-22 13:25:42.904112: step: 316/463, loss: 0.018993010744452477 2023-01-22 13:25:43.496157: step: 318/463, loss: 0.04802653193473816 2023-01-22 13:25:44.077145: step: 320/463, loss: 0.0020790782291442156 2023-01-22 13:25:44.621349: step: 322/463, loss: 0.003995911683887243 2023-01-22 13:25:45.201943: step: 324/463, loss: 0.001561065437272191 2023-01-22 13:25:45.846312: step: 326/463, loss: 0.017352307215332985 2023-01-22 13:25:46.448164: step: 328/463, loss: 0.019160201773047447 2023-01-22 13:25:47.108280: step: 330/463, loss: 0.0292675644159317 2023-01-22 13:25:47.775955: step: 332/463, loss: 0.002015606965869665 2023-01-22 13:25:48.367257: step: 334/463, loss: 0.008558913134038448 2023-01-22 13:25:48.913388: step: 336/463, loss: 0.45045584440231323 2023-01-22 13:25:49.472066: step: 338/463, loss: 0.002873100806027651 2023-01-22 13:25:50.100585: step: 340/463, loss: 0.006066231522709131 2023-01-22 13:25:50.679910: step: 342/463, loss: 0.004225987941026688 2023-01-22 13:25:51.505446: step: 344/463, loss: 3.483574982965365e-05 2023-01-22 13:25:52.084804: step: 346/463, loss: 0.002040430437773466 2023-01-22 13:25:52.722529: step: 348/463, loss: 0.009336970746517181 2023-01-22 13:25:53.359035: step: 350/463, loss: 0.026241572573781013 2023-01-22 13:25:53.948546: step: 352/463, loss: 0.007362271659076214 2023-01-22 13:25:54.577059: step: 354/463, loss: 0.027371464297175407 2023-01-22 13:25:55.155518: step: 356/463, loss: 0.00485308887436986 2023-01-22 13:25:55.760734: step: 358/463, loss: 0.01370034459978342 2023-01-22 13:25:56.375398: step: 360/463, loss: 0.004669187590479851 2023-01-22 13:25:56.923730: step: 362/463, loss: 7.050875865388662e-05 2023-01-22 13:25:57.569274: step: 364/463, loss: 0.017405999824404716 2023-01-22 13:25:58.215083: step: 366/463, loss: 5.289023101795465e-05 2023-01-22 13:25:58.779214: step: 368/463, loss: 0.04082733020186424 2023-01-22 13:25:59.446355: step: 370/463, loss: 0.0022130284924060106 2023-01-22 13:26:00.072902: step: 372/463, loss: 0.019310006871819496 2023-01-22 13:26:00.641274: step: 374/463, loss: 0.017915597185492516 2023-01-22 13:26:01.220308: step: 376/463, loss: 0.32642772793769836 2023-01-22 13:26:01.872064: step: 378/463, loss: 0.044260960072278976 2023-01-22 13:26:02.509388: step: 380/463, loss: 0.05304685980081558 2023-01-22 13:26:03.201605: step: 382/463, loss: 0.005499101709574461 2023-01-22 13:26:03.781057: step: 384/463, loss: 0.00102675158996135 2023-01-22 13:26:04.361522: step: 386/463, loss: 0.006113287061452866 2023-01-22 13:26:04.895693: step: 388/463, loss: 0.012271015904843807 2023-01-22 13:26:05.495068: step: 390/463, loss: 0.39034149050712585 2023-01-22 13:26:06.184127: step: 392/463, loss: 0.2743619978427887 2023-01-22 13:26:06.752833: step: 394/463, loss: 0.0016630636528134346 2023-01-22 13:26:07.378849: step: 396/463, loss: 0.005295955576002598 2023-01-22 13:26:07.971375: step: 398/463, loss: 0.0025687047746032476 2023-01-22 13:26:08.519713: step: 400/463, loss: 0.00913708470761776 2023-01-22 13:26:09.115473: step: 402/463, loss: 0.048686228692531586 2023-01-22 13:26:09.702937: step: 404/463, loss: 0.00530043663457036 2023-01-22 13:26:10.360529: step: 406/463, loss: 0.0024528519716113806 2023-01-22 13:26:10.952993: step: 408/463, loss: 0.019931841641664505 2023-01-22 13:26:11.615549: step: 410/463, loss: 0.004248816054314375 2023-01-22 13:26:12.179213: step: 412/463, loss: 0.017514139413833618 2023-01-22 13:26:12.776105: step: 414/463, loss: 0.004406094551086426 2023-01-22 13:26:13.352240: step: 416/463, loss: 0.062271326780319214 2023-01-22 13:26:13.947756: step: 418/463, loss: 0.040909670293331146 2023-01-22 13:26:14.593534: step: 420/463, loss: 0.055211763828992844 2023-01-22 13:26:15.176723: step: 422/463, loss: 0.015441077761352062 2023-01-22 13:26:15.857569: step: 424/463, loss: 0.00951111875474453 2023-01-22 13:26:16.463817: step: 426/463, loss: 0.055415406823158264 2023-01-22 13:26:17.041274: step: 428/463, loss: 0.0016636957880109549 2023-01-22 13:26:17.651976: step: 430/463, loss: 0.010604572482407093 2023-01-22 13:26:18.246077: step: 432/463, loss: 0.0010946291731670499 2023-01-22 13:26:18.938448: step: 434/463, loss: 0.0017120030242949724 2023-01-22 13:26:19.528611: step: 436/463, loss: 0.0030200621113181114 2023-01-22 13:26:20.124427: step: 438/463, loss: 0.03694145008921623 2023-01-22 13:26:20.749354: step: 440/463, loss: 0.028448956087231636 2023-01-22 13:26:21.357900: step: 442/463, loss: 0.001407848671078682 2023-01-22 13:26:22.004488: step: 444/463, loss: 0.022016361355781555 2023-01-22 13:26:22.617600: step: 446/463, loss: 0.04089091345667839 2023-01-22 13:26:23.225217: step: 448/463, loss: 0.02491391822695732 2023-01-22 13:26:23.820250: step: 450/463, loss: 0.0032255356200039387 2023-01-22 13:26:24.383296: step: 452/463, loss: 0.5869053602218628 2023-01-22 13:26:25.042188: step: 454/463, loss: 0.029924217611551285 2023-01-22 13:26:25.615339: step: 456/463, loss: 0.002704784506931901 2023-01-22 13:26:26.188621: step: 458/463, loss: 0.0017000801162794232 2023-01-22 13:26:26.821880: step: 460/463, loss: 0.12426872551441193 2023-01-22 13:26:27.521000: step: 462/463, loss: 0.018536772578954697 2023-01-22 13:26:28.169526: step: 464/463, loss: 0.0425092987716198 2023-01-22 13:26:28.769897: step: 466/463, loss: 0.006782358046621084 2023-01-22 13:26:29.319532: step: 468/463, loss: 0.0001791364193195477 2023-01-22 13:26:29.922843: step: 470/463, loss: 0.0036689655389636755 2023-01-22 13:26:30.537036: step: 472/463, loss: 0.0036370190791785717 2023-01-22 13:26:31.171975: step: 474/463, loss: 0.0001329735532635823 2023-01-22 13:26:31.781317: step: 476/463, loss: 0.005705771967768669 2023-01-22 13:26:32.343114: step: 478/463, loss: 0.012633300386369228 2023-01-22 13:26:32.956138: step: 480/463, loss: 0.011397972702980042 2023-01-22 13:26:33.627565: step: 482/463, loss: 0.031048130244016647 2023-01-22 13:26:34.251236: step: 484/463, loss: 0.01875011995434761 2023-01-22 13:26:34.860003: step: 486/463, loss: 0.020077552646398544 2023-01-22 13:26:35.450423: step: 488/463, loss: 0.014815251342952251 2023-01-22 13:26:36.041736: step: 490/463, loss: 0.025888385251164436 2023-01-22 13:26:36.659128: step: 492/463, loss: 0.0296202190220356 2023-01-22 13:26:37.212380: step: 494/463, loss: 0.003721346380189061 2023-01-22 13:26:37.811185: step: 496/463, loss: 7.28084851289168e-05 2023-01-22 13:26:38.444870: step: 498/463, loss: 0.005277659278362989 2023-01-22 13:26:39.076809: step: 500/463, loss: 0.01000724732875824 2023-01-22 13:26:39.651436: step: 502/463, loss: 0.012316937558352947 2023-01-22 13:26:40.235708: step: 504/463, loss: 0.00889474805444479 2023-01-22 13:26:40.850134: step: 506/463, loss: 0.021152684465050697 2023-01-22 13:26:41.408087: step: 508/463, loss: 0.04041510075330734 2023-01-22 13:26:42.042472: step: 510/463, loss: 0.02835632488131523 2023-01-22 13:26:42.569939: step: 512/463, loss: 0.0006655848119407892 2023-01-22 13:26:43.201309: step: 514/463, loss: 0.03245805203914642 2023-01-22 13:26:43.798698: step: 516/463, loss: 0.008442052640020847 2023-01-22 13:26:44.444325: step: 518/463, loss: 0.1609584540128708 2023-01-22 13:26:45.017980: step: 520/463, loss: 0.00019230511679779738 2023-01-22 13:26:45.599577: step: 522/463, loss: 0.006102635990828276 2023-01-22 13:26:46.216242: step: 524/463, loss: 0.0005319634801708162 2023-01-22 13:26:46.807845: step: 526/463, loss: 0.004204627126455307 2023-01-22 13:26:47.397162: step: 528/463, loss: 0.03489713370800018 2023-01-22 13:26:47.923898: step: 530/463, loss: 9.540874452795833e-05 2023-01-22 13:26:48.544228: step: 532/463, loss: 0.013699028640985489 2023-01-22 13:26:49.183433: step: 534/463, loss: 0.029252339154481888 2023-01-22 13:26:49.893838: step: 536/463, loss: 1.0969972610473633 2023-01-22 13:26:50.583981: step: 538/463, loss: 0.009437579661607742 2023-01-22 13:26:51.245813: step: 540/463, loss: 0.021492689847946167 2023-01-22 13:26:51.805456: step: 542/463, loss: 0.027512123808264732 2023-01-22 13:26:52.360094: step: 544/463, loss: 0.001970319775864482 2023-01-22 13:26:53.051268: step: 546/463, loss: 0.02360204979777336 2023-01-22 13:26:53.642119: step: 548/463, loss: 0.0014719793107360601 2023-01-22 13:26:54.287238: step: 550/463, loss: 0.0048825484700500965 2023-01-22 13:26:54.885844: step: 552/463, loss: 0.0253410954028368 2023-01-22 13:26:55.551175: step: 554/463, loss: 0.10798918455839157 2023-01-22 13:26:56.128955: step: 556/463, loss: 0.0022672193590551615 2023-01-22 13:26:56.703057: step: 558/463, loss: 0.0017448312137275934 2023-01-22 13:26:57.286013: step: 560/463, loss: 0.009754628874361515 2023-01-22 13:26:57.863151: step: 562/463, loss: 0.0013372161192819476 2023-01-22 13:26:58.478599: step: 564/463, loss: 0.003857825417071581 2023-01-22 13:26:59.103846: step: 566/463, loss: 0.004110759124159813 2023-01-22 13:26:59.783934: step: 568/463, loss: 0.008388431742787361 2023-01-22 13:27:00.355817: step: 570/463, loss: 0.0011858759680762887 2023-01-22 13:27:01.007920: step: 572/463, loss: 0.001762226689606905 2023-01-22 13:27:01.631050: step: 574/463, loss: 0.000316695892252028 2023-01-22 13:27:02.258655: step: 576/463, loss: 0.0530208945274353 2023-01-22 13:27:02.922911: step: 578/463, loss: 0.1530473232269287 2023-01-22 13:27:03.553732: step: 580/463, loss: 0.005145453382283449 2023-01-22 13:27:04.196499: step: 582/463, loss: 0.007890643551945686 2023-01-22 13:27:04.770033: step: 584/463, loss: 0.007970706559717655 2023-01-22 13:27:05.325611: step: 586/463, loss: 0.05107571929693222 2023-01-22 13:27:05.998848: step: 588/463, loss: 0.0023640599101781845 2023-01-22 13:27:06.601814: step: 590/463, loss: 0.004151259083300829 2023-01-22 13:27:07.224740: step: 592/463, loss: 0.0077116540633141994 2023-01-22 13:27:07.807652: step: 594/463, loss: 0.0021214198786765337 2023-01-22 13:27:08.412254: step: 596/463, loss: 0.0044120000675320625 2023-01-22 13:27:09.001498: step: 598/463, loss: 0.004525544587522745 2023-01-22 13:27:09.609376: step: 600/463, loss: 0.19436657428741455 2023-01-22 13:27:10.181322: step: 602/463, loss: 0.0015599527396261692 2023-01-22 13:27:10.864197: step: 604/463, loss: 0.008009737357497215 2023-01-22 13:27:11.461303: step: 606/463, loss: 0.0013858468737453222 2023-01-22 13:27:12.084581: step: 608/463, loss: 0.3488721251487732 2023-01-22 13:27:12.738343: step: 610/463, loss: 0.02543606422841549 2023-01-22 13:27:13.334679: step: 612/463, loss: 0.005030633416026831 2023-01-22 13:27:13.924943: step: 614/463, loss: 0.030824998393654823 2023-01-22 13:27:14.520485: step: 616/463, loss: 0.004081486724317074 2023-01-22 13:27:15.030748: step: 618/463, loss: 0.0006215223693288863 2023-01-22 13:27:15.597877: step: 620/463, loss: 0.061252009123563766 2023-01-22 13:27:16.235235: step: 622/463, loss: 0.01989113725721836 2023-01-22 13:27:16.948704: step: 624/463, loss: 0.0015687687555328012 2023-01-22 13:27:17.526806: step: 626/463, loss: 0.0009675811161287129 2023-01-22 13:27:18.144758: step: 628/463, loss: 0.00019023318600375205 2023-01-22 13:27:18.758194: step: 630/463, loss: 0.06610078364610672 2023-01-22 13:27:19.358863: step: 632/463, loss: 0.014715391211211681 2023-01-22 13:27:19.996994: step: 634/463, loss: 0.009596790187060833 2023-01-22 13:27:20.537344: step: 636/463, loss: 0.006795715540647507 2023-01-22 13:27:21.139434: step: 638/463, loss: 0.0027659893967211246 2023-01-22 13:27:21.727133: step: 640/463, loss: 0.022830478847026825 2023-01-22 13:27:22.418330: step: 642/463, loss: 0.02685198374092579 2023-01-22 13:27:23.003727: step: 644/463, loss: 0.001776121906004846 2023-01-22 13:27:23.593736: step: 646/463, loss: 0.03276059031486511 2023-01-22 13:27:24.252045: step: 648/463, loss: 0.013307644985616207 2023-01-22 13:27:24.837687: step: 650/463, loss: 0.013781944289803505 2023-01-22 13:27:25.419106: step: 652/463, loss: 0.3509994149208069 2023-01-22 13:27:26.010149: step: 654/463, loss: 0.0005310252890922129 2023-01-22 13:27:26.647871: step: 656/463, loss: 0.006202084012329578 2023-01-22 13:27:27.222730: step: 658/463, loss: 0.0015261704102158546 2023-01-22 13:27:27.811742: step: 660/463, loss: 0.007692432031035423 2023-01-22 13:27:28.422952: step: 662/463, loss: 0.011984733864665031 2023-01-22 13:27:29.070224: step: 664/463, loss: 0.08859200030565262 2023-01-22 13:27:29.752488: step: 666/463, loss: 0.001600740011781454 2023-01-22 13:27:30.374732: step: 668/463, loss: 3.259199365857057e-05 2023-01-22 13:27:30.994133: step: 670/463, loss: 0.043061695992946625 2023-01-22 13:27:31.549887: step: 672/463, loss: 0.0011611173395067453 2023-01-22 13:27:32.152706: step: 674/463, loss: 0.0028423587791621685 2023-01-22 13:27:32.817776: step: 676/463, loss: 0.003807240864261985 2023-01-22 13:27:33.461145: step: 678/463, loss: 0.00036372512113302946 2023-01-22 13:27:34.165638: step: 680/463, loss: 0.0069422065280377865 2023-01-22 13:27:34.776358: step: 682/463, loss: 0.8122259378433228 2023-01-22 13:27:35.390606: step: 684/463, loss: 0.007210102863609791 2023-01-22 13:27:36.018651: step: 686/463, loss: 0.0004664790176320821 2023-01-22 13:27:36.650220: step: 688/463, loss: 0.013120567426085472 2023-01-22 13:27:37.246353: step: 690/463, loss: 0.0008050451870076358 2023-01-22 13:27:37.887715: step: 692/463, loss: 0.010334783233702183 2023-01-22 13:27:38.463629: step: 694/463, loss: 0.001780427061021328 2023-01-22 13:27:39.104852: step: 696/463, loss: 0.002615198725834489 2023-01-22 13:27:39.764491: step: 698/463, loss: 0.02158784121274948 2023-01-22 13:27:40.383557: step: 700/463, loss: 0.025469280779361725 2023-01-22 13:27:40.910470: step: 702/463, loss: 0.007899214513599873 2023-01-22 13:27:41.523760: step: 704/463, loss: 0.06707986444234848 2023-01-22 13:27:42.097111: step: 706/463, loss: 0.0001986706192838028 2023-01-22 13:27:42.666471: step: 708/463, loss: 0.011409505270421505 2023-01-22 13:27:43.236900: step: 710/463, loss: 0.0065339095890522 2023-01-22 13:27:43.814151: step: 712/463, loss: 0.0018706261180341244 2023-01-22 13:27:44.429828: step: 714/463, loss: 0.00028417882276698947 2023-01-22 13:27:45.030857: step: 716/463, loss: 0.0025505241937935352 2023-01-22 13:27:45.608568: step: 718/463, loss: 0.006199778523296118 2023-01-22 13:27:46.116906: step: 720/463, loss: 0.00019804327166639268 2023-01-22 13:27:46.723493: step: 722/463, loss: 0.04341735318303108 2023-01-22 13:27:47.322910: step: 724/463, loss: 0.032529257237911224 2023-01-22 13:27:47.965747: step: 726/463, loss: 0.04912516102194786 2023-01-22 13:27:48.576263: step: 728/463, loss: 0.09744817018508911 2023-01-22 13:27:49.220036: step: 730/463, loss: 0.0012814155779778957 2023-01-22 13:27:49.804015: step: 732/463, loss: 0.009129542857408524 2023-01-22 13:27:50.400060: step: 734/463, loss: 0.038396455347537994 2023-01-22 13:27:51.042214: step: 736/463, loss: 0.010526176542043686 2023-01-22 13:27:51.642584: step: 738/463, loss: 0.003573357593268156 2023-01-22 13:27:52.304589: step: 740/463, loss: 0.032019611448049545 2023-01-22 13:27:52.956021: step: 742/463, loss: 0.003927348181605339 2023-01-22 13:27:53.555885: step: 744/463, loss: 0.0004113336035516113 2023-01-22 13:27:54.211643: step: 746/463, loss: 0.019907468929886818 2023-01-22 13:27:54.796625: step: 748/463, loss: 0.02164171077311039 2023-01-22 13:27:55.460283: step: 750/463, loss: 0.03540413826704025 2023-01-22 13:27:56.132999: step: 752/463, loss: 0.005953527521342039 2023-01-22 13:27:56.745745: step: 754/463, loss: 0.0008240683237090707 2023-01-22 13:27:57.315854: step: 756/463, loss: 0.0007584497798234224 2023-01-22 13:27:57.960716: step: 758/463, loss: 0.0404651053249836 2023-01-22 13:27:58.561754: step: 760/463, loss: 0.0017691616667434573 2023-01-22 13:27:59.146295: step: 762/463, loss: 0.01616172306239605 2023-01-22 13:27:59.698498: step: 764/463, loss: 0.0012893083039671183 2023-01-22 13:28:00.304764: step: 766/463, loss: 9.886065527098253e-05 2023-01-22 13:28:00.864632: step: 768/463, loss: 0.0026200832799077034 2023-01-22 13:28:01.421850: step: 770/463, loss: 0.007900582626461983 2023-01-22 13:28:02.043948: step: 772/463, loss: 0.0001425204100087285 2023-01-22 13:28:02.608631: step: 774/463, loss: 0.0838509052991867 2023-01-22 13:28:03.169755: step: 776/463, loss: 0.0026854455936700106 2023-01-22 13:28:03.767643: step: 778/463, loss: 0.005901542026549578 2023-01-22 13:28:04.385967: step: 780/463, loss: 0.015036815777420998 2023-01-22 13:28:04.938502: step: 782/463, loss: 0.005546136759221554 2023-01-22 13:28:05.546032: step: 784/463, loss: 0.0008756810566410422 2023-01-22 13:28:06.127440: step: 786/463, loss: 0.015073849819600582 2023-01-22 13:28:06.735446: step: 788/463, loss: 0.019593510776758194 2023-01-22 13:28:07.344837: step: 790/463, loss: 0.0057046180590987206 2023-01-22 13:28:07.846201: step: 792/463, loss: 0.001117024919949472 2023-01-22 13:28:08.430791: step: 794/463, loss: 0.05120617896318436 2023-01-22 13:28:09.008107: step: 796/463, loss: 0.0006611481658183038 2023-01-22 13:28:09.682189: step: 798/463, loss: 0.01597796007990837 2023-01-22 13:28:10.252450: step: 800/463, loss: 0.009253596886992455 2023-01-22 13:28:10.815109: step: 802/463, loss: 0.04280107095837593 2023-01-22 13:28:11.367302: step: 804/463, loss: 0.0026175014209002256 2023-01-22 13:28:11.993047: step: 806/463, loss: 0.0369688905775547 2023-01-22 13:28:12.564353: step: 808/463, loss: 0.05045194551348686 2023-01-22 13:28:13.177851: step: 810/463, loss: 0.1270289272069931 2023-01-22 13:28:13.768712: step: 812/463, loss: 0.022772789001464844 2023-01-22 13:28:14.328191: step: 814/463, loss: 0.07281602919101715 2023-01-22 13:28:14.917446: step: 816/463, loss: 0.1199316680431366 2023-01-22 13:28:15.536500: step: 818/463, loss: 0.020382236689329147 2023-01-22 13:28:16.196913: step: 820/463, loss: 0.03233255445957184 2023-01-22 13:28:16.826316: step: 822/463, loss: 0.003871083492413163 2023-01-22 13:28:17.444036: step: 824/463, loss: 0.006677394267171621 2023-01-22 13:28:18.105881: step: 826/463, loss: 0.09403805434703827 2023-01-22 13:28:18.668863: step: 828/463, loss: 0.003594765905290842 2023-01-22 13:28:19.273346: step: 830/463, loss: 0.010870123282074928 2023-01-22 13:28:19.873293: step: 832/463, loss: 0.004279666114598513 2023-01-22 13:28:20.510576: step: 834/463, loss: 0.033330705016851425 2023-01-22 13:28:21.047205: step: 836/463, loss: 0.03654314950108528 2023-01-22 13:28:21.717988: step: 838/463, loss: 0.0034194951876997948 2023-01-22 13:28:22.297715: step: 840/463, loss: 0.10320030897855759 2023-01-22 13:28:22.881758: step: 842/463, loss: 0.006930488161742687 2023-01-22 13:28:23.483828: step: 844/463, loss: 0.007995801977813244 2023-01-22 13:28:24.130909: step: 846/463, loss: 0.08015644550323486 2023-01-22 13:28:24.708355: step: 848/463, loss: 0.09332186728715897 2023-01-22 13:28:25.284653: step: 850/463, loss: 0.007438365835696459 2023-01-22 13:28:25.959441: step: 852/463, loss: 0.010123154148459435 2023-01-22 13:28:26.537933: step: 854/463, loss: 0.08119158446788788 2023-01-22 13:28:27.116130: step: 856/463, loss: 0.0019944319501519203 2023-01-22 13:28:27.715641: step: 858/463, loss: 0.01843361184000969 2023-01-22 13:28:28.355976: step: 860/463, loss: 0.012350602075457573 2023-01-22 13:28:28.894189: step: 862/463, loss: 0.05584613233804703 2023-01-22 13:28:29.496162: step: 864/463, loss: 0.014386721886694431 2023-01-22 13:28:30.148355: step: 866/463, loss: 0.0353621244430542 2023-01-22 13:28:30.684958: step: 868/463, loss: 0.020597638562321663 2023-01-22 13:28:31.261191: step: 870/463, loss: 0.0006872104131616652 2023-01-22 13:28:31.904968: step: 872/463, loss: 0.00979156419634819 2023-01-22 13:28:32.560485: step: 874/463, loss: 0.023115357384085655 2023-01-22 13:28:33.236999: step: 876/463, loss: 0.003294885391369462 2023-01-22 13:28:33.823871: step: 878/463, loss: 0.027601156383752823 2023-01-22 13:28:34.470064: step: 880/463, loss: 0.0022946984972804785 2023-01-22 13:28:35.122711: step: 882/463, loss: 0.01858082413673401 2023-01-22 13:28:35.765443: step: 884/463, loss: 0.28362688422203064 2023-01-22 13:28:36.367155: step: 886/463, loss: 0.001347129000350833 2023-01-22 13:28:37.000612: step: 888/463, loss: 0.004604569170624018 2023-01-22 13:28:37.635930: step: 890/463, loss: 0.0011813611490651965 2023-01-22 13:28:38.246822: step: 892/463, loss: 0.024046581238508224 2023-01-22 13:28:38.934798: step: 894/463, loss: 0.0010008730459958315 2023-01-22 13:28:39.514471: step: 896/463, loss: 0.029678529128432274 2023-01-22 13:28:40.064852: step: 898/463, loss: 0.0014649323420599103 2023-01-22 13:28:40.642211: step: 900/463, loss: 0.009135115891695023 2023-01-22 13:28:41.306191: step: 902/463, loss: 0.004966255277395248 2023-01-22 13:28:41.880676: step: 904/463, loss: 0.013460664078593254 2023-01-22 13:28:42.526722: step: 906/463, loss: 0.0005449182935990393 2023-01-22 13:28:43.105481: step: 908/463, loss: 0.00665718549862504 2023-01-22 13:28:43.685081: step: 910/463, loss: 0.0077726589515805244 2023-01-22 13:28:44.338427: step: 912/463, loss: 0.021509969606995583 2023-01-22 13:28:45.006542: step: 914/463, loss: 0.0011775298044085503 2023-01-22 13:28:45.596197: step: 916/463, loss: 0.030353013426065445 2023-01-22 13:28:46.166738: step: 918/463, loss: 0.0007901111384853721 2023-01-22 13:28:46.790180: step: 920/463, loss: 0.0056722210720181465 2023-01-22 13:28:47.413686: step: 922/463, loss: 0.04050293564796448 2023-01-22 13:28:48.083107: step: 924/463, loss: 0.09436770528554916 2023-01-22 13:28:48.706377: step: 926/463, loss: 0.006017202511429787 ================================================== Loss: 0.056 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3298492318048626, 'r': 0.3436190289580067, 'f1': 0.33659336107968324}, 'combined': 0.24801616079555605, 'epoch': 35} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3452658696704263, 'r': 0.41095524009485485, 'f1': 0.3752574915847269}, 'combined': 0.2908694432379223, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2888667629076087, 'r': 0.35080593597887966, 'f1': 0.316837580567043}, 'combined': 0.23345926989150534, 'epoch': 35} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3308713813764696, 'r': 0.4214960795843629, 'f1': 0.3707257353175318}, 'combined': 0.2873567900547376, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29003356461894286, 'r': 0.34231665501514696, 'f1': 0.3140137113890034}, 'combined': 0.23137852418137092, 'epoch': 35} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3295499233398524, 'r': 0.4119374041748155, 'f1': 0.36616658148872494}, 'combined': 0.2838229004840835, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24444444444444444, 'r': 0.3142857142857143, 'f1': 0.27499999999999997}, 'combined': 0.1833333333333333, 'epoch': 35} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2564102564102564, 'r': 0.43478260869565216, 'f1': 0.3225806451612903}, 'combined': 0.16129032258064516, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3522727272727273, 'r': 0.2672413793103448, 'f1': 0.303921568627451}, 'combined': 0.20261437908496732, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:31:25.925136: step: 2/463, loss: 0.0004926755791530013 2023-01-22 13:31:26.589299: step: 4/463, loss: 0.02238519862294197 2023-01-22 13:31:27.244021: step: 6/463, loss: 0.0616052970290184 2023-01-22 13:31:27.813040: step: 8/463, loss: 0.004799255169928074 2023-01-22 13:31:28.447087: step: 10/463, loss: 0.012655009515583515 2023-01-22 13:31:29.057969: step: 12/463, loss: 0.00014118333638180047 2023-01-22 13:31:29.654847: step: 14/463, loss: 0.15786932408809662 2023-01-22 13:31:30.243969: step: 16/463, loss: 0.001410311902873218 2023-01-22 13:31:30.920419: step: 18/463, loss: 0.018634648993611336 2023-01-22 13:31:31.556888: step: 20/463, loss: 0.000459634029539302 2023-01-22 13:31:32.179331: step: 22/463, loss: 0.002159084426239133 2023-01-22 13:31:32.785157: step: 24/463, loss: 0.002403414808213711 2023-01-22 13:31:33.450966: step: 26/463, loss: 0.014240997843444347 2023-01-22 13:31:34.095737: step: 28/463, loss: 0.0010254951193928719 2023-01-22 13:31:34.723456: step: 30/463, loss: 0.017695574089884758 2023-01-22 13:31:35.293641: step: 32/463, loss: 0.027020089328289032 2023-01-22 13:31:35.905716: step: 34/463, loss: 0.0011526475427672267 2023-01-22 13:31:36.550475: step: 36/463, loss: 0.04943987354636192 2023-01-22 13:31:37.087263: step: 38/463, loss: 5.3604599088430405e-06 2023-01-22 13:31:37.848689: step: 40/463, loss: 0.0004678784462157637 2023-01-22 13:31:38.467860: step: 42/463, loss: 0.14743058383464813 2023-01-22 13:31:39.111869: step: 44/463, loss: 0.0006350097246468067 2023-01-22 13:31:39.713486: step: 46/463, loss: 0.004167316481471062 2023-01-22 13:31:40.358785: step: 48/463, loss: 0.024480260908603668 2023-01-22 13:31:40.979225: step: 50/463, loss: 0.07773642987012863 2023-01-22 13:31:41.635344: step: 52/463, loss: 0.031239869073033333 2023-01-22 13:31:42.288011: step: 54/463, loss: 0.0041245208121836185 2023-01-22 13:31:42.879937: step: 56/463, loss: 0.005031470209360123 2023-01-22 13:31:43.496343: step: 58/463, loss: 0.0033055332023650408 2023-01-22 13:31:44.070250: step: 60/463, loss: 0.11986285448074341 2023-01-22 13:31:44.652751: step: 62/463, loss: 0.0007648723549209535 2023-01-22 13:31:45.303320: step: 64/463, loss: 0.12618069350719452 2023-01-22 13:31:45.873864: step: 66/463, loss: 0.004225063603371382 2023-01-22 13:31:46.473544: step: 68/463, loss: 0.0026896544732153416 2023-01-22 13:31:47.065877: step: 70/463, loss: 0.0016927430406212807 2023-01-22 13:31:47.684705: step: 72/463, loss: 0.004847423173487186 2023-01-22 13:31:48.290437: step: 74/463, loss: 0.001417087041772902 2023-01-22 13:31:48.804444: step: 76/463, loss: 0.0014156046090647578 2023-01-22 13:31:49.393725: step: 78/463, loss: 0.005870248191058636 2023-01-22 13:31:49.961055: step: 80/463, loss: 0.028519045561552048 2023-01-22 13:31:50.540189: step: 82/463, loss: 0.05495013669133186 2023-01-22 13:31:51.271197: step: 84/463, loss: 0.01506493054330349 2023-01-22 13:31:51.897424: step: 86/463, loss: 0.0004517593188211322 2023-01-22 13:31:52.498492: step: 88/463, loss: 0.002268270356580615 2023-01-22 13:31:53.080786: step: 90/463, loss: 0.013584124855697155 2023-01-22 13:31:53.685250: step: 92/463, loss: 0.012236799113452435 2023-01-22 13:31:54.302478: step: 94/463, loss: 0.004698202945291996 2023-01-22 13:31:54.894404: step: 96/463, loss: 0.027181118726730347 2023-01-22 13:31:55.566316: step: 98/463, loss: 0.025009119883179665 2023-01-22 13:31:56.171950: step: 100/463, loss: 0.010331111028790474 2023-01-22 13:31:56.843771: step: 102/463, loss: 0.0032453066669404507 2023-01-22 13:31:57.436918: step: 104/463, loss: 0.006861776579171419 2023-01-22 13:31:58.074921: step: 106/463, loss: 0.0005807341076433659 2023-01-22 13:31:58.693095: step: 108/463, loss: 0.013131448067724705 2023-01-22 13:31:59.209111: step: 110/463, loss: 0.0013315534451976418 2023-01-22 13:31:59.801826: step: 112/463, loss: 0.0020186949986964464 2023-01-22 13:32:00.334892: step: 114/463, loss: 0.015919573605060577 2023-01-22 13:32:00.916271: step: 116/463, loss: 0.003444872098043561 2023-01-22 13:32:01.523414: step: 118/463, loss: 0.03311912715435028 2023-01-22 13:32:02.109312: step: 120/463, loss: 0.02634321339428425 2023-01-22 13:32:02.669740: step: 122/463, loss: 0.030533963814377785 2023-01-22 13:32:03.243579: step: 124/463, loss: 0.0035745142959058285 2023-01-22 13:32:03.786909: step: 126/463, loss: 0.03102034144103527 2023-01-22 13:32:04.355242: step: 128/463, loss: 0.055629272013902664 2023-01-22 13:32:04.960976: step: 130/463, loss: 0.02219184674322605 2023-01-22 13:32:05.587005: step: 132/463, loss: 0.009436080232262611 2023-01-22 13:32:06.185868: step: 134/463, loss: 0.004411205183714628 2023-01-22 13:32:06.817803: step: 136/463, loss: 1.7568196199135855e-05 2023-01-22 13:32:07.396811: step: 138/463, loss: 0.00033781048841774464 2023-01-22 13:32:08.014093: step: 140/463, loss: 0.021601134911179543 2023-01-22 13:32:08.612426: step: 142/463, loss: 0.0011771252611652017 2023-01-22 13:32:09.217313: step: 144/463, loss: 0.008932785131037235 2023-01-22 13:32:09.856176: step: 146/463, loss: 0.0005776201141998172 2023-01-22 13:32:10.435086: step: 148/463, loss: 0.005300901364535093 2023-01-22 13:32:11.032572: step: 150/463, loss: 0.01612846367061138 2023-01-22 13:32:11.616589: step: 152/463, loss: 0.00011750024714274332 2023-01-22 13:32:12.255301: step: 154/463, loss: 0.007638935465365648 2023-01-22 13:32:12.796068: step: 156/463, loss: 0.021703720092773438 2023-01-22 13:32:13.367025: step: 158/463, loss: 0.003742585424333811 2023-01-22 13:32:13.998955: step: 160/463, loss: 8.992474613478407e-05 2023-01-22 13:32:14.606570: step: 162/463, loss: 0.006666215136647224 2023-01-22 13:32:15.153209: step: 164/463, loss: 0.0013682059943675995 2023-01-22 13:32:15.677882: step: 166/463, loss: 0.0017774467123672366 2023-01-22 13:32:16.301529: step: 168/463, loss: 0.0019421938341110945 2023-01-22 13:32:16.846964: step: 170/463, loss: 0.009636595845222473 2023-01-22 13:32:17.392151: step: 172/463, loss: 0.006382985971868038 2023-01-22 13:32:17.928761: step: 174/463, loss: 0.00425747549161315 2023-01-22 13:32:18.538147: step: 176/463, loss: 0.006000712979584932 2023-01-22 13:32:19.196229: step: 178/463, loss: 0.008318754844367504 2023-01-22 13:32:19.786719: step: 180/463, loss: 0.023962531238794327 2023-01-22 13:32:20.454096: step: 182/463, loss: 0.14740021526813507 2023-01-22 13:32:21.094783: step: 184/463, loss: 0.9892958402633667 2023-01-22 13:32:21.697015: step: 186/463, loss: 0.11170303076505661 2023-01-22 13:32:22.256691: step: 188/463, loss: 0.0009004765306599438 2023-01-22 13:32:22.826591: step: 190/463, loss: 0.0004100656951777637 2023-01-22 13:32:23.435518: step: 192/463, loss: 0.0006549927638843656 2023-01-22 13:32:24.043757: step: 194/463, loss: 0.00014262759941630065 2023-01-22 13:32:24.630677: step: 196/463, loss: 0.0029615741223096848 2023-01-22 13:32:25.194961: step: 198/463, loss: 0.001007150742225349 2023-01-22 13:32:25.837991: step: 200/463, loss: 0.1437002718448639 2023-01-22 13:32:26.511139: step: 202/463, loss: 0.014947419986128807 2023-01-22 13:32:27.124701: step: 204/463, loss: 0.018611252307891846 2023-01-22 13:32:27.673725: step: 206/463, loss: 0.0018871185602620244 2023-01-22 13:32:28.265631: step: 208/463, loss: 0.008279492147266865 2023-01-22 13:32:28.815119: step: 210/463, loss: 0.0003480594896245748 2023-01-22 13:32:29.451017: step: 212/463, loss: 0.032954078167676926 2023-01-22 13:32:30.052549: step: 214/463, loss: 0.001809043693356216 2023-01-22 13:32:30.635756: step: 216/463, loss: 0.0032473087776452303 2023-01-22 13:32:31.197299: step: 218/463, loss: 0.003574102884158492 2023-01-22 13:32:31.787936: step: 220/463, loss: 0.009183285757899284 2023-01-22 13:32:32.355465: step: 222/463, loss: 0.006684563122689724 2023-01-22 13:32:33.016645: step: 224/463, loss: 0.016727399080991745 2023-01-22 13:32:33.619431: step: 226/463, loss: 0.0002858467632904649 2023-01-22 13:32:34.182432: step: 228/463, loss: 0.005393872503191233 2023-01-22 13:32:34.763029: step: 230/463, loss: 0.0008613124373368919 2023-01-22 13:32:35.355893: step: 232/463, loss: 0.05925869196653366 2023-01-22 13:32:35.902124: step: 234/463, loss: 0.0002998412528540939 2023-01-22 13:32:36.515954: step: 236/463, loss: 0.03975815698504448 2023-01-22 13:32:37.135295: step: 238/463, loss: 0.002198026515543461 2023-01-22 13:32:37.778879: step: 240/463, loss: 0.03136700391769409 2023-01-22 13:32:38.392946: step: 242/463, loss: 0.00011733981955330819 2023-01-22 13:32:39.022095: step: 244/463, loss: 0.00014876711065880954 2023-01-22 13:32:39.703756: step: 246/463, loss: 0.0011152435326948762 2023-01-22 13:32:40.261862: step: 248/463, loss: 0.002581898355856538 2023-01-22 13:32:40.856069: step: 250/463, loss: 0.0030039451085031033 2023-01-22 13:32:41.494169: step: 252/463, loss: 0.013062836602330208 2023-01-22 13:32:42.139067: step: 254/463, loss: 0.0015480725560337305 2023-01-22 13:32:42.811252: step: 256/463, loss: 0.009470746852457523 2023-01-22 13:32:43.520315: step: 258/463, loss: 0.03372591361403465 2023-01-22 13:32:44.099528: step: 260/463, loss: 0.005516430363059044 2023-01-22 13:32:44.682710: step: 262/463, loss: 0.030937649309635162 2023-01-22 13:32:45.258296: step: 264/463, loss: 0.016965948045253754 2023-01-22 13:32:45.864965: step: 266/463, loss: 0.0012964120833203197 2023-01-22 13:32:46.421048: step: 268/463, loss: 0.014559167437255383 2023-01-22 13:32:47.019600: step: 270/463, loss: 0.0017193866660818458 2023-01-22 13:32:47.637130: step: 272/463, loss: 0.0016711689531803131 2023-01-22 13:32:48.164447: step: 274/463, loss: 0.004963552579283714 2023-01-22 13:32:48.794026: step: 276/463, loss: 1.2774089574813843 2023-01-22 13:32:49.367076: step: 278/463, loss: 0.0025270506739616394 2023-01-22 13:32:50.093210: step: 280/463, loss: 0.02895297482609749 2023-01-22 13:32:50.764299: step: 282/463, loss: 0.06715033948421478 2023-01-22 13:32:51.333091: step: 284/463, loss: 0.0023797901812940836 2023-01-22 13:32:51.958604: step: 286/463, loss: 0.006618114188313484 2023-01-22 13:32:52.537262: step: 288/463, loss: 0.013377774506807327 2023-01-22 13:32:53.223385: step: 290/463, loss: 0.0013190142344683409 2023-01-22 13:32:53.814058: step: 292/463, loss: 0.021279960870742798 2023-01-22 13:32:54.473631: step: 294/463, loss: 0.009338678792119026 2023-01-22 13:32:55.053010: step: 296/463, loss: 0.005434241611510515 2023-01-22 13:32:55.618020: step: 298/463, loss: 0.08452336490154266 2023-01-22 13:32:56.281903: step: 300/463, loss: 0.019597329199314117 2023-01-22 13:32:56.871222: step: 302/463, loss: 0.007165477145463228 2023-01-22 13:32:57.468116: step: 304/463, loss: 0.014686892740428448 2023-01-22 13:32:58.075983: step: 306/463, loss: 0.0041551715694367886 2023-01-22 13:32:58.669818: step: 308/463, loss: 0.08586680889129639 2023-01-22 13:32:59.264691: step: 310/463, loss: 0.10356751829385757 2023-01-22 13:32:59.861264: step: 312/463, loss: 0.00019130560394842178 2023-01-22 13:33:00.565549: step: 314/463, loss: 0.003673784201964736 2023-01-22 13:33:01.146499: step: 316/463, loss: 0.028642889112234116 2023-01-22 13:33:01.763896: step: 318/463, loss: 0.3337525725364685 2023-01-22 13:33:02.319185: step: 320/463, loss: 0.009891267865896225 2023-01-22 13:33:03.034353: step: 322/463, loss: 0.00046149801346473396 2023-01-22 13:33:03.608500: step: 324/463, loss: 0.009151432663202286 2023-01-22 13:33:04.250986: step: 326/463, loss: 0.015597046352922916 2023-01-22 13:33:04.846248: step: 328/463, loss: 0.0012257576454430819 2023-01-22 13:33:05.454333: step: 330/463, loss: 0.0031902380287647247 2023-01-22 13:33:06.084900: step: 332/463, loss: 0.01941579394042492 2023-01-22 13:33:06.659916: step: 334/463, loss: 0.004373494535684586 2023-01-22 13:33:07.240039: step: 336/463, loss: 0.010517958551645279 2023-01-22 13:33:07.855921: step: 338/463, loss: 0.08891010284423828 2023-01-22 13:33:08.466856: step: 340/463, loss: 0.11529798805713654 2023-01-22 13:33:09.029315: step: 342/463, loss: 0.0021454598754644394 2023-01-22 13:33:09.636901: step: 344/463, loss: 0.008525926619768143 2023-01-22 13:33:10.178616: step: 346/463, loss: 0.0010154180927202106 2023-01-22 13:33:10.809982: step: 348/463, loss: 0.003982314839959145 2023-01-22 13:33:11.402695: step: 350/463, loss: 0.0034363928716629744 2023-01-22 13:33:12.050008: step: 352/463, loss: 0.03635721653699875 2023-01-22 13:33:12.760933: step: 354/463, loss: 0.003859040793031454 2023-01-22 13:33:13.346998: step: 356/463, loss: 0.010492019355297089 2023-01-22 13:33:13.948664: step: 358/463, loss: 0.04434703662991524 2023-01-22 13:33:14.637728: step: 360/463, loss: 0.020779302343726158 2023-01-22 13:33:15.283188: step: 362/463, loss: 0.051597096025943756 2023-01-22 13:33:15.870769: step: 364/463, loss: 0.0030227352399379015 2023-01-22 13:33:16.502831: step: 366/463, loss: 0.021089598536491394 2023-01-22 13:33:17.132844: step: 368/463, loss: 0.005464594345539808 2023-01-22 13:33:17.720093: step: 370/463, loss: 0.00038491084706038237 2023-01-22 13:33:18.329668: step: 372/463, loss: 0.010451785288751125 2023-01-22 13:33:18.900842: step: 374/463, loss: 0.003971833735704422 2023-01-22 13:33:19.538667: step: 376/463, loss: 0.4399808347225189 2023-01-22 13:33:20.191929: step: 378/463, loss: 0.050659067928791046 2023-01-22 13:33:20.816323: step: 380/463, loss: 0.002487249905243516 2023-01-22 13:33:21.490450: step: 382/463, loss: 0.009695771150290966 2023-01-22 13:33:22.109086: step: 384/463, loss: 0.0027448353357613087 2023-01-22 13:33:22.713451: step: 386/463, loss: 0.00022534250456374139 2023-01-22 13:33:23.303522: step: 388/463, loss: 0.012742413207888603 2023-01-22 13:33:23.903265: step: 390/463, loss: 0.008741947822272778 2023-01-22 13:33:24.505159: step: 392/463, loss: 0.015133303590118885 2023-01-22 13:33:25.161768: step: 394/463, loss: 0.008081257343292236 2023-01-22 13:33:25.727456: step: 396/463, loss: 0.003655626205727458 2023-01-22 13:33:26.348788: step: 398/463, loss: 0.026013191789388657 2023-01-22 13:33:26.972619: step: 400/463, loss: 0.004687536507844925 2023-01-22 13:33:27.502010: step: 402/463, loss: 5.243177656666376e-05 2023-01-22 13:33:28.127664: step: 404/463, loss: 0.02930634096264839 2023-01-22 13:33:28.696879: step: 406/463, loss: 0.0014166105538606644 2023-01-22 13:33:29.264470: step: 408/463, loss: 0.00021431539789773524 2023-01-22 13:33:29.903116: step: 410/463, loss: 0.020718924701213837 2023-01-22 13:33:30.500071: step: 412/463, loss: 0.008741088211536407 2023-01-22 13:33:31.063622: step: 414/463, loss: 0.006040649488568306 2023-01-22 13:33:31.666645: step: 416/463, loss: 0.019894791767001152 2023-01-22 13:33:32.258323: step: 418/463, loss: 0.02136288210749626 2023-01-22 13:33:32.832911: step: 420/463, loss: 0.0019151787273585796 2023-01-22 13:33:33.444889: step: 422/463, loss: 0.8710062503814697 2023-01-22 13:33:34.067821: step: 424/463, loss: 0.001510797068476677 2023-01-22 13:33:34.668348: step: 426/463, loss: 0.01244441233575344 2023-01-22 13:33:35.221121: step: 428/463, loss: 0.014736899174749851 2023-01-22 13:33:35.836088: step: 430/463, loss: 0.4316057860851288 2023-01-22 13:33:36.495513: step: 432/463, loss: 0.017364177852869034 2023-01-22 13:33:37.156086: step: 434/463, loss: 0.01485108956694603 2023-01-22 13:33:37.786959: step: 436/463, loss: 0.024831417948007584 2023-01-22 13:33:38.401435: step: 438/463, loss: 0.016303060576319695 2023-01-22 13:33:39.040222: step: 440/463, loss: 0.008644654415547848 2023-01-22 13:33:39.599214: step: 442/463, loss: 0.0037164457608014345 2023-01-22 13:33:40.258305: step: 444/463, loss: 0.09094803780317307 2023-01-22 13:33:40.850984: step: 446/463, loss: 0.00975746102631092 2023-01-22 13:33:41.504250: step: 448/463, loss: 0.4025951027870178 2023-01-22 13:33:42.160071: step: 450/463, loss: 0.030930671840906143 2023-01-22 13:33:42.675673: step: 452/463, loss: 0.01561846025288105 2023-01-22 13:33:43.248407: step: 454/463, loss: 0.0025900397449731827 2023-01-22 13:33:43.890860: step: 456/463, loss: 0.0030286521650850773 2023-01-22 13:33:44.516592: step: 458/463, loss: 0.00490938127040863 2023-01-22 13:33:45.118753: step: 460/463, loss: 0.01058991625905037 2023-01-22 13:33:45.747432: step: 462/463, loss: 0.01330417487770319 2023-01-22 13:33:46.348636: step: 464/463, loss: 0.0003906160709448159 2023-01-22 13:33:46.957837: step: 466/463, loss: 0.009563478641211987 2023-01-22 13:33:47.564362: step: 468/463, loss: 0.0009717753855511546 2023-01-22 13:33:48.129731: step: 470/463, loss: 0.03675663471221924 2023-01-22 13:33:48.679563: step: 472/463, loss: 0.00033287954283878207 2023-01-22 13:33:49.304817: step: 474/463, loss: 0.05320652946829796 2023-01-22 13:33:49.921926: step: 476/463, loss: 0.0005783308879472315 2023-01-22 13:33:50.638251: step: 478/463, loss: 0.04280523583292961 2023-01-22 13:33:51.200350: step: 480/463, loss: 0.03138202428817749 2023-01-22 13:33:51.793749: step: 482/463, loss: 0.016117680817842484 2023-01-22 13:33:52.396709: step: 484/463, loss: 0.006584015674889088 2023-01-22 13:33:52.958872: step: 486/463, loss: 0.010424034669995308 2023-01-22 13:33:53.587761: step: 488/463, loss: 0.025882141664624214 2023-01-22 13:33:54.278530: step: 490/463, loss: 0.0014137310208752751 2023-01-22 13:33:54.843044: step: 492/463, loss: 0.0005551911890506744 2023-01-22 13:33:55.486471: step: 494/463, loss: 0.2818014323711395 2023-01-22 13:33:56.108321: step: 496/463, loss: 0.017470164224505424 2023-01-22 13:33:56.675360: step: 498/463, loss: 0.00011574823292903602 2023-01-22 13:33:57.348130: step: 500/463, loss: 0.02045602723956108 2023-01-22 13:33:57.969198: step: 502/463, loss: 0.10712159425020218 2023-01-22 13:33:58.571883: step: 504/463, loss: 0.0013147150166332722 2023-01-22 13:33:59.163415: step: 506/463, loss: 0.00317944772541523 2023-01-22 13:33:59.804312: step: 508/463, loss: 0.5646162033081055 2023-01-22 13:34:00.436988: step: 510/463, loss: 0.0005679419846273959 2023-01-22 13:34:01.038274: step: 512/463, loss: 0.030714282765984535 2023-01-22 13:34:01.638089: step: 514/463, loss: 0.0008262402261607349 2023-01-22 13:34:02.255063: step: 516/463, loss: 0.014902494847774506 2023-01-22 13:34:02.894833: step: 518/463, loss: 0.02746141515672207 2023-01-22 13:34:03.606179: step: 520/463, loss: 0.016667431220412254 2023-01-22 13:34:04.165148: step: 522/463, loss: 0.022922292351722717 2023-01-22 13:34:04.792922: step: 524/463, loss: 0.00801930297166109 2023-01-22 13:34:05.389822: step: 526/463, loss: 0.01766378991305828 2023-01-22 13:34:05.976599: step: 528/463, loss: 0.0006419854471459985 2023-01-22 13:34:06.601656: step: 530/463, loss: 0.01546458899974823 2023-01-22 13:34:07.183314: step: 532/463, loss: 0.055628638714551926 2023-01-22 13:34:07.816581: step: 534/463, loss: 0.005957188084721565 2023-01-22 13:34:08.523223: step: 536/463, loss: 0.016784073784947395 2023-01-22 13:34:09.101534: step: 538/463, loss: 0.01718474179506302 2023-01-22 13:34:09.653113: step: 540/463, loss: 0.017915578559041023 2023-01-22 13:34:10.210109: step: 542/463, loss: 0.01347360759973526 2023-01-22 13:34:10.775671: step: 544/463, loss: 0.0026090466417372227 2023-01-22 13:34:11.346211: step: 546/463, loss: 0.0026845773681998253 2023-01-22 13:34:11.932705: step: 548/463, loss: 0.0263515617698431 2023-01-22 13:34:12.578760: step: 550/463, loss: 0.01133830938488245 2023-01-22 13:34:13.179107: step: 552/463, loss: 0.1092524453997612 2023-01-22 13:34:13.812099: step: 554/463, loss: 0.008198747411370277 2023-01-22 13:34:14.468228: step: 556/463, loss: 0.021788250654935837 2023-01-22 13:34:15.141372: step: 558/463, loss: 0.005576969124376774 2023-01-22 13:34:15.840729: step: 560/463, loss: 0.059285759925842285 2023-01-22 13:34:16.409647: step: 562/463, loss: 0.0007793564000166953 2023-01-22 13:34:16.973095: step: 564/463, loss: 0.0005645605851896107 2023-01-22 13:34:17.559320: step: 566/463, loss: 0.000992911751382053 2023-01-22 13:34:18.171630: step: 568/463, loss: 0.029286060482263565 2023-01-22 13:34:18.784377: step: 570/463, loss: 0.7687966823577881 2023-01-22 13:34:19.353144: step: 572/463, loss: 0.18781490623950958 2023-01-22 13:34:19.920252: step: 574/463, loss: 0.015316360630095005 2023-01-22 13:34:20.506802: step: 576/463, loss: 0.011777011677622795 2023-01-22 13:34:21.112925: step: 578/463, loss: 0.0014651017263531685 2023-01-22 13:34:21.727160: step: 580/463, loss: 0.013675212860107422 2023-01-22 13:34:22.430008: step: 582/463, loss: 0.007511957548558712 2023-01-22 13:34:23.038035: step: 584/463, loss: 0.001818681601434946 2023-01-22 13:34:23.653669: step: 586/463, loss: 0.008130278438329697 2023-01-22 13:34:24.297603: step: 588/463, loss: 0.0009175124578177929 2023-01-22 13:34:24.893784: step: 590/463, loss: 0.0013268294278532267 2023-01-22 13:34:25.581266: step: 592/463, loss: 0.005795582197606564 2023-01-22 13:34:26.201342: step: 594/463, loss: 0.2278042882680893 2023-01-22 13:34:26.800030: step: 596/463, loss: 0.3387654423713684 2023-01-22 13:34:27.325925: step: 598/463, loss: 0.012030359357595444 2023-01-22 13:34:27.908793: step: 600/463, loss: 0.04630205035209656 2023-01-22 13:34:28.579727: step: 602/463, loss: 0.004996767267584801 2023-01-22 13:34:29.159588: step: 604/463, loss: 0.023959588259458542 2023-01-22 13:34:29.748419: step: 606/463, loss: 0.020490165799856186 2023-01-22 13:34:30.345203: step: 608/463, loss: 0.05195914953947067 2023-01-22 13:34:30.975168: step: 610/463, loss: 0.0014299642061814666 2023-01-22 13:34:31.510328: step: 612/463, loss: 0.05177592486143112 2023-01-22 13:34:32.184794: step: 614/463, loss: 0.03818044066429138 2023-01-22 13:34:32.778301: step: 616/463, loss: 0.006384481210261583 2023-01-22 13:34:33.436151: step: 618/463, loss: 0.0002505085722077638 2023-01-22 13:34:34.000503: step: 620/463, loss: 0.0005267433007247746 2023-01-22 13:34:34.589629: step: 622/463, loss: 0.0007392748375423253 2023-01-22 13:34:35.203283: step: 624/463, loss: 0.01500691007822752 2023-01-22 13:34:35.802028: step: 626/463, loss: 0.0047962237149477005 2023-01-22 13:34:36.390378: step: 628/463, loss: 0.3398101031780243 2023-01-22 13:34:37.020014: step: 630/463, loss: 0.3707369863986969 2023-01-22 13:34:37.735495: step: 632/463, loss: 0.019323181360960007 2023-01-22 13:34:38.361079: step: 634/463, loss: 0.007944508455693722 2023-01-22 13:34:38.971420: step: 636/463, loss: 0.04026941582560539 2023-01-22 13:34:39.545230: step: 638/463, loss: 0.03118385188281536 2023-01-22 13:34:40.209243: step: 640/463, loss: 6.613253935938701e-05 2023-01-22 13:34:40.896831: step: 642/463, loss: 0.057899050414562225 2023-01-22 13:34:41.486431: step: 644/463, loss: 0.0043227397836744785 2023-01-22 13:34:42.019905: step: 646/463, loss: 0.023480139672756195 2023-01-22 13:34:42.586032: step: 648/463, loss: 0.00424191216006875 2023-01-22 13:34:43.189417: step: 650/463, loss: 0.005802365019917488 2023-01-22 13:34:43.964615: step: 652/463, loss: 0.0011234697885811329 2023-01-22 13:34:44.578186: step: 654/463, loss: 0.007755516562610865 2023-01-22 13:34:45.143850: step: 656/463, loss: 0.001991518074646592 2023-01-22 13:34:45.709819: step: 658/463, loss: 0.002021184889599681 2023-01-22 13:34:46.265273: step: 660/463, loss: 0.04739458113908768 2023-01-22 13:34:46.862865: step: 662/463, loss: 0.050317615270614624 2023-01-22 13:34:47.473139: step: 664/463, loss: 0.006748190149664879 2023-01-22 13:34:48.120181: step: 666/463, loss: 0.059826381504535675 2023-01-22 13:34:48.699088: step: 668/463, loss: 0.010084355250000954 2023-01-22 13:34:49.301423: step: 670/463, loss: 0.09050492942333221 2023-01-22 13:34:49.875386: step: 672/463, loss: 0.009593677707016468 2023-01-22 13:34:50.476427: step: 674/463, loss: 0.08869484066963196 2023-01-22 13:34:51.043215: step: 676/463, loss: 0.0049742525443434715 2023-01-22 13:34:51.661371: step: 678/463, loss: 0.0006836153916083276 2023-01-22 13:34:52.292797: step: 680/463, loss: 0.012122646905481815 2023-01-22 13:34:53.030286: step: 682/463, loss: 0.1786395162343979 2023-01-22 13:34:53.607054: step: 684/463, loss: 0.0017495764186605811 2023-01-22 13:34:54.194989: step: 686/463, loss: 0.004210320767015219 2023-01-22 13:34:54.766339: step: 688/463, loss: 0.00992894358932972 2023-01-22 13:34:55.314839: step: 690/463, loss: 0.004616623744368553 2023-01-22 13:34:55.966458: step: 692/463, loss: 0.005472216289490461 2023-01-22 13:34:56.579262: step: 694/463, loss: 0.004750858526676893 2023-01-22 13:34:57.114651: step: 696/463, loss: 0.0007360982708632946 2023-01-22 13:34:57.703667: step: 698/463, loss: 0.03710366040468216 2023-01-22 13:34:58.318903: step: 700/463, loss: 0.0401921384036541 2023-01-22 13:34:58.935635: step: 702/463, loss: 0.02024674043059349 2023-01-22 13:34:59.559194: step: 704/463, loss: 0.192637637257576 2023-01-22 13:35:00.133549: step: 706/463, loss: 0.0004425140214152634 2023-01-22 13:35:00.767761: step: 708/463, loss: 0.028471769765019417 2023-01-22 13:35:01.370697: step: 710/463, loss: 0.00462668901309371 2023-01-22 13:35:02.037326: step: 712/463, loss: 0.00042205440695397556 2023-01-22 13:35:02.648295: step: 714/463, loss: 0.01462631393224001 2023-01-22 13:35:03.277230: step: 716/463, loss: 0.09215617924928665 2023-01-22 13:35:03.834760: step: 718/463, loss: 0.1404639035463333 2023-01-22 13:35:04.431820: step: 720/463, loss: 0.0009139894973486662 2023-01-22 13:35:05.048960: step: 722/463, loss: 0.004680534824728966 2023-01-22 13:35:05.620480: step: 724/463, loss: 8.959847764344886e-05 2023-01-22 13:35:06.215303: step: 726/463, loss: 0.0074323187582194805 2023-01-22 13:35:06.783525: step: 728/463, loss: 0.005084065720438957 2023-01-22 13:35:07.329093: step: 730/463, loss: 0.00336533784866333 2023-01-22 13:35:07.955121: step: 732/463, loss: 0.018698927015066147 2023-01-22 13:35:08.537838: step: 734/463, loss: 0.007453972939401865 2023-01-22 13:35:09.147334: step: 736/463, loss: 8.966495443019085e-06 2023-01-22 13:35:09.757806: step: 738/463, loss: 0.1251787543296814 2023-01-22 13:35:10.406808: step: 740/463, loss: 0.0010850889375433326 2023-01-22 13:35:11.117627: step: 742/463, loss: 0.007543194107711315 2023-01-22 13:35:11.696634: step: 744/463, loss: 0.014797679148614407 2023-01-22 13:35:12.266386: step: 746/463, loss: 0.004885831847786903 2023-01-22 13:35:12.892570: step: 748/463, loss: 0.00171879050321877 2023-01-22 13:35:13.548620: step: 750/463, loss: 0.003688403172418475 2023-01-22 13:35:14.139419: step: 752/463, loss: 0.0009126869263127446 2023-01-22 13:35:14.731381: step: 754/463, loss: 0.0011168696219101548 2023-01-22 13:35:15.384978: step: 756/463, loss: 0.0016336004482582211 2023-01-22 13:35:16.001307: step: 758/463, loss: 0.014039484784007072 2023-01-22 13:35:16.601752: step: 760/463, loss: 0.012102670036256313 2023-01-22 13:35:17.219803: step: 762/463, loss: 0.0779075026512146 2023-01-22 13:35:17.847909: step: 764/463, loss: 0.014404848217964172 2023-01-22 13:35:18.430183: step: 766/463, loss: 0.013028395362198353 2023-01-22 13:35:18.989911: step: 768/463, loss: 0.003530989633873105 2023-01-22 13:35:19.649838: step: 770/463, loss: 0.02890629507601261 2023-01-22 13:35:20.230195: step: 772/463, loss: 0.010100237093865871 2023-01-22 13:35:20.877067: step: 774/463, loss: 0.04835496470332146 2023-01-22 13:35:21.501324: step: 776/463, loss: 0.1938502937555313 2023-01-22 13:35:22.130973: step: 778/463, loss: 0.017065370455384254 2023-01-22 13:35:22.796213: step: 780/463, loss: 0.052604153752326965 2023-01-22 13:35:23.398972: step: 782/463, loss: 0.0004957179771736264 2023-01-22 13:35:23.969751: step: 784/463, loss: 0.004018211271613836 2023-01-22 13:35:24.677870: step: 786/463, loss: 0.020835421979427338 2023-01-22 13:35:25.240192: step: 788/463, loss: 0.6701751947402954 2023-01-22 13:35:25.881999: step: 790/463, loss: 0.033327676355838776 2023-01-22 13:35:26.537464: step: 792/463, loss: 0.12329889088869095 2023-01-22 13:35:27.110374: step: 794/463, loss: 0.00904530193656683 2023-01-22 13:35:27.694332: step: 796/463, loss: 0.0008595373365096748 2023-01-22 13:35:28.377593: step: 798/463, loss: 0.049598634243011475 2023-01-22 13:35:29.048559: step: 800/463, loss: 0.012237580493092537 2023-01-22 13:35:29.680994: step: 802/463, loss: 0.04790181666612625 2023-01-22 13:35:30.378976: step: 804/463, loss: 0.013909575529396534 2023-01-22 13:35:30.952807: step: 806/463, loss: 0.03652876615524292 2023-01-22 13:35:31.533760: step: 808/463, loss: 0.0009854212403297424 2023-01-22 13:35:32.211008: step: 810/463, loss: 0.4963323175907135 2023-01-22 13:35:32.843903: step: 812/463, loss: 0.01650214195251465 2023-01-22 13:35:33.554628: step: 814/463, loss: 0.00038486087578348815 2023-01-22 13:35:34.096839: step: 816/463, loss: 0.023917051032185555 2023-01-22 13:35:34.691372: step: 818/463, loss: 0.0021937338169664145 2023-01-22 13:35:35.293362: step: 820/463, loss: 0.000810742552857846 2023-01-22 13:35:35.939412: step: 822/463, loss: 0.00015931669622659683 2023-01-22 13:35:36.546629: step: 824/463, loss: 0.019404729828238487 2023-01-22 13:35:37.119343: step: 826/463, loss: 0.009138391353189945 2023-01-22 13:35:37.789186: step: 828/463, loss: 6.217642658157274e-05 2023-01-22 13:35:38.375469: step: 830/463, loss: 13.489502906799316 2023-01-22 13:35:38.996783: step: 832/463, loss: 0.003037678077816963 2023-01-22 13:35:39.602039: step: 834/463, loss: 0.0023070438764989376 2023-01-22 13:35:40.197090: step: 836/463, loss: 0.02351263165473938 2023-01-22 13:35:40.804450: step: 838/463, loss: 0.00047117617214098573 2023-01-22 13:35:41.424694: step: 840/463, loss: 0.0038112776819616556 2023-01-22 13:35:42.035336: step: 842/463, loss: 0.008953064680099487 2023-01-22 13:35:42.696118: step: 844/463, loss: 0.006670886650681496 2023-01-22 13:35:43.270145: step: 846/463, loss: 0.00036838703090325 2023-01-22 13:35:43.872424: step: 848/463, loss: 0.007583236321806908 2023-01-22 13:35:44.538558: step: 850/463, loss: 0.00285124103538692 2023-01-22 13:35:45.169460: step: 852/463, loss: 0.08847746253013611 2023-01-22 13:35:45.819585: step: 854/463, loss: 0.00040643694228492677 2023-01-22 13:35:46.468666: step: 856/463, loss: 0.004814634099602699 2023-01-22 13:35:47.077201: step: 858/463, loss: 0.006215792149305344 2023-01-22 13:35:47.718423: step: 860/463, loss: 0.0004852747078984976 2023-01-22 13:35:48.296847: step: 862/463, loss: 0.007043166551738977 2023-01-22 13:35:48.929063: step: 864/463, loss: 0.009274091571569443 2023-01-22 13:35:49.551288: step: 866/463, loss: 0.0008911711047403514 2023-01-22 13:35:50.170293: step: 868/463, loss: 0.013141664676368237 2023-01-22 13:35:50.826202: step: 870/463, loss: 0.0004966504639014602 2023-01-22 13:35:51.460810: step: 872/463, loss: 0.0002911067276727408 2023-01-22 13:35:52.132296: step: 874/463, loss: 0.00825702678412199 2023-01-22 13:35:52.718085: step: 876/463, loss: 0.010459580458700657 2023-01-22 13:35:53.329535: step: 878/463, loss: 0.00437451247125864 2023-01-22 13:35:53.915111: step: 880/463, loss: 0.016428804025053978 2023-01-22 13:35:54.509325: step: 882/463, loss: 0.04766729101538658 2023-01-22 13:35:55.083840: step: 884/463, loss: 0.00655767135322094 2023-01-22 13:35:55.712905: step: 886/463, loss: 0.0003531308611854911 2023-01-22 13:35:56.280996: step: 888/463, loss: 0.008266502059996128 2023-01-22 13:35:56.915668: step: 890/463, loss: 0.015137778595089912 2023-01-22 13:35:57.497009: step: 892/463, loss: 0.005476124584674835 2023-01-22 13:35:58.048714: step: 894/463, loss: 0.00510898744687438 2023-01-22 13:35:58.624997: step: 896/463, loss: 0.049214065074920654 2023-01-22 13:35:59.194311: step: 898/463, loss: 0.0038208586629480124 2023-01-22 13:35:59.760367: step: 900/463, loss: 0.0021358139347285032 2023-01-22 13:36:00.311227: step: 902/463, loss: 0.005033013876527548 2023-01-22 13:36:00.931700: step: 904/463, loss: 0.016478123143315315 2023-01-22 13:36:01.558753: step: 906/463, loss: 0.00983423925936222 2023-01-22 13:36:02.115652: step: 908/463, loss: 0.0019199280068278313 2023-01-22 13:36:02.697024: step: 910/463, loss: 0.0015439112903550267 2023-01-22 13:36:03.250278: step: 912/463, loss: 0.0016192031325772405 2023-01-22 13:36:03.818875: step: 914/463, loss: 0.005190224852412939 2023-01-22 13:36:04.433080: step: 916/463, loss: 0.005277537740767002 2023-01-22 13:36:04.999893: step: 918/463, loss: 0.0022299408446997404 2023-01-22 13:36:05.604721: step: 920/463, loss: 0.0019721633289009333 2023-01-22 13:36:06.189679: step: 922/463, loss: 0.012343078851699829 2023-01-22 13:36:06.850087: step: 924/463, loss: 0.002723332028836012 2023-01-22 13:36:07.427015: step: 926/463, loss: 0.0010699160629883409 ================================================== Loss: 0.066 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32718503600598725, 'r': 0.32718503600598725, 'f1': 0.32718503600598725}, 'combined': 0.24108371074125376, 'epoch': 36} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.35072597303362946, 'r': 0.4097175659243962, 'f1': 0.37793362588024}, 'combined': 0.29294376742870276, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2901201525570703, 'r': 0.33911577604393794, 'f1': 0.31271043565206524}, 'combined': 0.230418215743627, 'epoch': 36} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3365927518078763, 'r': 0.4170285197031408, 'f1': 0.3725180865656956}, 'combined': 0.2887460766681469, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2895573462558734, 'r': 0.3291173632016474, 'f1': 0.30807255844985465}, 'combined': 0.22700083254199815, 'epoch': 36} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33589301185238807, 'r': 0.4038125546901871, 'f1': 0.366734607264544}, 'combined': 0.28426318840601017, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22448979591836735, 'r': 0.3142857142857143, 'f1': 0.26190476190476186}, 'combined': 0.17460317460317457, 'epoch': 36} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.4782608695652174, 'f1': 0.3728813559322034}, 'combined': 0.1864406779661017, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.40789473684210525, 'r': 0.2672413793103448, 'f1': 0.3229166666666667}, 'combined': 0.2152777777777778, 'epoch': 36} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:38:43.610016: step: 2/463, loss: 0.6587218046188354 2023-01-22 13:38:44.311291: step: 4/463, loss: 0.010776903480291367 2023-01-22 13:38:44.954172: step: 6/463, loss: 0.0005929653998464346 2023-01-22 13:38:45.509004: step: 8/463, loss: 0.0003558184253051877 2023-01-22 13:38:46.107654: step: 10/463, loss: 0.0017356309108436108 2023-01-22 13:38:46.728434: step: 12/463, loss: 0.0006220794748514891 2023-01-22 13:38:47.378974: step: 14/463, loss: 0.28144556283950806 2023-01-22 13:38:47.997435: step: 16/463, loss: 0.0003804799052886665 2023-01-22 13:38:48.627964: step: 18/463, loss: 0.00018494875985197723 2023-01-22 13:38:49.287191: step: 20/463, loss: 0.00023998554388526827 2023-01-22 13:38:49.902367: step: 22/463, loss: 0.01660904474556446 2023-01-22 13:38:50.564785: step: 24/463, loss: 0.05208080634474754 2023-01-22 13:38:51.162032: step: 26/463, loss: 0.008441172540187836 2023-01-22 13:38:51.790185: step: 28/463, loss: 0.012856683693826199 2023-01-22 13:38:52.421157: step: 30/463, loss: 0.06990411132574081 2023-01-22 13:38:53.047168: step: 32/463, loss: 0.0006049636285752058 2023-01-22 13:38:53.604803: step: 34/463, loss: 0.001130929565988481 2023-01-22 13:38:54.282845: step: 36/463, loss: 0.03293420374393463 2023-01-22 13:38:54.870430: step: 38/463, loss: 0.00443666847422719 2023-01-22 13:38:55.470454: step: 40/463, loss: 0.07706819474697113 2023-01-22 13:38:56.017221: step: 42/463, loss: 0.0007607361185364425 2023-01-22 13:38:56.600621: step: 44/463, loss: 0.01823762059211731 2023-01-22 13:38:57.233858: step: 46/463, loss: 0.01940738968551159 2023-01-22 13:38:57.813753: step: 48/463, loss: 0.00553401792421937 2023-01-22 13:38:58.397065: step: 50/463, loss: 0.2633526623249054 2023-01-22 13:38:59.000154: step: 52/463, loss: 0.003037841757759452 2023-01-22 13:38:59.561264: step: 54/463, loss: 0.0010397820733487606 2023-01-22 13:39:00.234969: step: 56/463, loss: 0.018858512863516808 2023-01-22 13:39:01.029204: step: 58/463, loss: 0.001477002282626927 2023-01-22 13:39:01.606389: step: 60/463, loss: 0.030347801744937897 2023-01-22 13:39:02.207120: step: 62/463, loss: 0.7860462665557861 2023-01-22 13:39:02.827725: step: 64/463, loss: 0.0010909750126302242 2023-01-22 13:39:03.397883: step: 66/463, loss: 6.107752415118739e-05 2023-01-22 13:39:04.062438: step: 68/463, loss: 0.2685215175151825 2023-01-22 13:39:04.705363: step: 70/463, loss: 0.009650515392422676 2023-01-22 13:39:05.308647: step: 72/463, loss: 0.002894675126299262 2023-01-22 13:39:05.882203: step: 74/463, loss: 0.017059307545423508 2023-01-22 13:39:06.493505: step: 76/463, loss: 0.0068626548163592815 2023-01-22 13:39:07.088673: step: 78/463, loss: 0.0013277180260047317 2023-01-22 13:39:07.705193: step: 80/463, loss: 0.0013721180148422718 2023-01-22 13:39:08.296927: step: 82/463, loss: 0.008821958675980568 2023-01-22 13:39:08.881160: step: 84/463, loss: 0.020003188401460648 2023-01-22 13:39:09.440116: step: 86/463, loss: 0.0007712449296377599 2023-01-22 13:39:10.045118: step: 88/463, loss: 5.655456334352493e-06 2023-01-22 13:39:10.661439: step: 90/463, loss: 0.018775301054120064 2023-01-22 13:39:11.278583: step: 92/463, loss: 0.0014276603469625115 2023-01-22 13:39:11.826204: step: 94/463, loss: 0.00956269446760416 2023-01-22 13:39:12.480746: step: 96/463, loss: 0.002346278168261051 2023-01-22 13:39:13.108738: step: 98/463, loss: 0.0004997443757019937 2023-01-22 13:39:13.663339: step: 100/463, loss: 2.352398951188661e-05 2023-01-22 13:39:14.252087: step: 102/463, loss: 0.026509350165724754 2023-01-22 13:39:14.879540: step: 104/463, loss: 0.00048460852121934295 2023-01-22 13:39:15.459928: step: 106/463, loss: 0.0020447096321731806 2023-01-22 13:39:16.058293: step: 108/463, loss: 0.042406003922224045 2023-01-22 13:39:16.599668: step: 110/463, loss: 0.0007937455666251481 2023-01-22 13:39:17.213720: step: 112/463, loss: 0.00023135694209486246 2023-01-22 13:39:17.793046: step: 114/463, loss: 0.0032308471854776144 2023-01-22 13:39:18.361159: step: 116/463, loss: 0.013062181882560253 2023-01-22 13:39:18.942406: step: 118/463, loss: 0.003929843660444021 2023-01-22 13:39:19.499274: step: 120/463, loss: 8.747879473958164e-05 2023-01-22 13:39:20.134267: step: 122/463, loss: 0.0021208536345511675 2023-01-22 13:39:20.711224: step: 124/463, loss: 0.004104707855731249 2023-01-22 13:39:21.262430: step: 126/463, loss: 0.000277163548162207 2023-01-22 13:39:21.855022: step: 128/463, loss: 0.05952223017811775 2023-01-22 13:39:22.451398: step: 130/463, loss: 0.008136567659676075 2023-01-22 13:39:23.055203: step: 132/463, loss: 2.4742646928643808e-05 2023-01-22 13:39:23.699339: step: 134/463, loss: 0.02547604590654373 2023-01-22 13:39:24.363264: step: 136/463, loss: 0.018019111827015877 2023-01-22 13:39:25.032395: step: 138/463, loss: 0.0668676421046257 2023-01-22 13:39:25.614728: step: 140/463, loss: 0.00028247645241208375 2023-01-22 13:39:26.226773: step: 142/463, loss: 0.0031307446770370007 2023-01-22 13:39:26.786900: step: 144/463, loss: 0.011265382170677185 2023-01-22 13:39:27.368851: step: 146/463, loss: 0.00040918265585787594 2023-01-22 13:39:28.037756: step: 148/463, loss: 0.09168965369462967 2023-01-22 13:39:28.638412: step: 150/463, loss: 0.015680095180869102 2023-01-22 13:39:29.380534: step: 152/463, loss: 0.00029443143284879625 2023-01-22 13:39:30.022540: step: 154/463, loss: 0.0056647262535989285 2023-01-22 13:39:30.676575: step: 156/463, loss: 0.0007648394675925374 2023-01-22 13:39:31.262210: step: 158/463, loss: 0.011920605786144733 2023-01-22 13:39:31.887550: step: 160/463, loss: 0.173386812210083 2023-01-22 13:39:32.460503: step: 162/463, loss: 3.1526673410553485e-05 2023-01-22 13:39:33.038915: step: 164/463, loss: 0.014528918080031872 2023-01-22 13:39:33.616914: step: 166/463, loss: 0.011221883818507195 2023-01-22 13:39:34.198416: step: 168/463, loss: 0.00778588792309165 2023-01-22 13:39:34.772467: step: 170/463, loss: 0.002198287984356284 2023-01-22 13:39:35.313494: step: 172/463, loss: 0.024640100076794624 2023-01-22 13:39:35.981766: step: 174/463, loss: 0.002938736928626895 2023-01-22 13:39:36.562154: step: 176/463, loss: 0.0010286591714248061 2023-01-22 13:39:37.219912: step: 178/463, loss: 0.03232716768980026 2023-01-22 13:39:37.771492: step: 180/463, loss: 0.0015578961465507746 2023-01-22 13:39:38.429110: step: 182/463, loss: 0.0005568155902437866 2023-01-22 13:39:39.015928: step: 184/463, loss: 0.00010768469655886292 2023-01-22 13:39:39.619834: step: 186/463, loss: 0.014435186982154846 2023-01-22 13:39:40.186794: step: 188/463, loss: 0.013997500762343407 2023-01-22 13:39:40.855894: step: 190/463, loss: 0.01962263137102127 2023-01-22 13:39:41.479971: step: 192/463, loss: 0.0076688127592206 2023-01-22 13:39:42.054227: step: 194/463, loss: 0.00011455887579359114 2023-01-22 13:39:42.638543: step: 196/463, loss: 0.00025081579224206507 2023-01-22 13:39:43.234641: step: 198/463, loss: 0.0312761627137661 2023-01-22 13:39:43.792712: step: 200/463, loss: 0.0003015534020960331 2023-01-22 13:39:44.407025: step: 202/463, loss: 0.0012189560802653432 2023-01-22 13:39:44.982379: step: 204/463, loss: 0.00765312509611249 2023-01-22 13:39:45.619525: step: 206/463, loss: 0.015591097064316273 2023-01-22 13:39:46.222341: step: 208/463, loss: 0.0079575777053833 2023-01-22 13:39:46.796753: step: 210/463, loss: 0.0045778690837323666 2023-01-22 13:39:47.320491: step: 212/463, loss: 0.01078322809189558 2023-01-22 13:39:47.915607: step: 214/463, loss: 0.04701203107833862 2023-01-22 13:39:48.530488: step: 216/463, loss: 0.007716403342783451 2023-01-22 13:39:49.151504: step: 218/463, loss: 0.0035424574743956327 2023-01-22 13:39:49.752603: step: 220/463, loss: 3.8686380321451e-06 2023-01-22 13:39:50.363748: step: 222/463, loss: 0.001566283404827118 2023-01-22 13:39:51.024957: step: 224/463, loss: 0.030678650364279747 2023-01-22 13:39:51.633003: step: 226/463, loss: 0.08632715046405792 2023-01-22 13:39:52.281080: step: 228/463, loss: 0.050980955362319946 2023-01-22 13:39:52.894942: step: 230/463, loss: 0.009675303474068642 2023-01-22 13:39:53.476346: step: 232/463, loss: 0.0043572308495640755 2023-01-22 13:39:54.061243: step: 234/463, loss: 0.0005882259574718773 2023-01-22 13:39:54.731067: step: 236/463, loss: 0.002075779251754284 2023-01-22 13:39:55.358467: step: 238/463, loss: 0.0683252215385437 2023-01-22 13:39:55.967212: step: 240/463, loss: 0.0024023137520998716 2023-01-22 13:39:56.542796: step: 242/463, loss: 0.09539209306240082 2023-01-22 13:39:57.096514: step: 244/463, loss: 0.000319745420711115 2023-01-22 13:39:57.775325: step: 246/463, loss: 0.0054846820421516895 2023-01-22 13:39:58.365894: step: 248/463, loss: 0.0013603162951767445 2023-01-22 13:39:59.034878: step: 250/463, loss: 0.027929656207561493 2023-01-22 13:39:59.604083: step: 252/463, loss: 0.000322263891575858 2023-01-22 13:40:00.221361: step: 254/463, loss: 0.014581589959561825 2023-01-22 13:40:00.824154: step: 256/463, loss: 0.03552702069282532 2023-01-22 13:40:01.428351: step: 258/463, loss: 0.040786027908325195 2023-01-22 13:40:02.022603: step: 260/463, loss: 0.0446326918900013 2023-01-22 13:40:02.550782: step: 262/463, loss: 0.0399804525077343 2023-01-22 13:40:03.189932: step: 264/463, loss: 0.004975477699190378 2023-01-22 13:40:03.810618: step: 266/463, loss: 0.012718992307782173 2023-01-22 13:40:04.365265: step: 268/463, loss: 0.004722801968455315 2023-01-22 13:40:04.961317: step: 270/463, loss: 0.0017666302155703306 2023-01-22 13:40:05.497799: step: 272/463, loss: 0.013465424999594688 2023-01-22 13:40:06.083955: step: 274/463, loss: 0.0062596979551017284 2023-01-22 13:40:06.781251: step: 276/463, loss: 0.002826994052156806 2023-01-22 13:40:07.471053: step: 278/463, loss: 0.01050316821783781 2023-01-22 13:40:08.094182: step: 280/463, loss: 0.013287696056067944 2023-01-22 13:40:08.712659: step: 282/463, loss: 6.352769560180604e-05 2023-01-22 13:40:09.280958: step: 284/463, loss: 0.0016838241135701537 2023-01-22 13:40:09.840426: step: 286/463, loss: 0.05246245115995407 2023-01-22 13:40:10.453402: step: 288/463, loss: 0.0007788481889292598 2023-01-22 13:40:11.077746: step: 290/463, loss: 0.0002616256824694574 2023-01-22 13:40:11.594374: step: 292/463, loss: 0.004513297230005264 2023-01-22 13:40:12.211281: step: 294/463, loss: 0.0008384017855860293 2023-01-22 13:40:12.838617: step: 296/463, loss: 0.00442774873226881 2023-01-22 13:40:13.495343: step: 298/463, loss: 0.22174061834812164 2023-01-22 13:40:14.159285: step: 300/463, loss: 0.014089682139456272 2023-01-22 13:40:14.692229: step: 302/463, loss: 0.011865006759762764 2023-01-22 13:40:15.293849: step: 304/463, loss: 0.007160308305174112 2023-01-22 13:40:15.891629: step: 306/463, loss: 2.191199779510498 2023-01-22 13:40:16.500111: step: 308/463, loss: 0.019760850816965103 2023-01-22 13:40:17.109815: step: 310/463, loss: 0.05446013808250427 2023-01-22 13:40:17.716159: step: 312/463, loss: 0.0038349914830178022 2023-01-22 13:40:18.349848: step: 314/463, loss: 0.015616307035088539 2023-01-22 13:40:18.927634: step: 316/463, loss: 0.06799617409706116 2023-01-22 13:40:19.529294: step: 318/463, loss: 0.005231272894889116 2023-01-22 13:40:20.132537: step: 320/463, loss: 0.0014700135216116905 2023-01-22 13:40:20.721789: step: 322/463, loss: 0.004051823168992996 2023-01-22 13:40:21.328022: step: 324/463, loss: 0.0331818126142025 2023-01-22 13:40:21.915725: step: 326/463, loss: 0.0007295148097909987 2023-01-22 13:40:22.422830: step: 328/463, loss: 0.00043950843974016607 2023-01-22 13:40:23.026878: step: 330/463, loss: 0.007079733535647392 2023-01-22 13:40:23.609060: step: 332/463, loss: 0.006048364564776421 2023-01-22 13:40:24.218463: step: 334/463, loss: 0.04785123094916344 2023-01-22 13:40:24.870330: step: 336/463, loss: 0.04843372851610184 2023-01-22 13:40:25.662029: step: 338/463, loss: 0.00014510856999550015 2023-01-22 13:40:26.348914: step: 340/463, loss: 0.0001122959511121735 2023-01-22 13:40:26.942118: step: 342/463, loss: 0.031640488654375076 2023-01-22 13:40:27.562274: step: 344/463, loss: 0.025138873606920242 2023-01-22 13:40:28.105172: step: 346/463, loss: 0.0037102666683495045 2023-01-22 13:40:28.716949: step: 348/463, loss: 0.03473350405693054 2023-01-22 13:40:29.335246: step: 350/463, loss: 0.0023750332184135914 2023-01-22 13:40:29.926996: step: 352/463, loss: 0.0017465592827647924 2023-01-22 13:40:30.533611: step: 354/463, loss: 0.00996798649430275 2023-01-22 13:40:31.233101: step: 356/463, loss: 0.7772992849349976 2023-01-22 13:40:31.812649: step: 358/463, loss: 0.001652126433327794 2023-01-22 13:40:32.547603: step: 360/463, loss: 0.004635266028344631 2023-01-22 13:40:33.106352: step: 362/463, loss: 0.0014862800016999245 2023-01-22 13:40:33.693061: step: 364/463, loss: 0.00030541254091076553 2023-01-22 13:40:34.298358: step: 366/463, loss: 0.001901472918689251 2023-01-22 13:40:34.847683: step: 368/463, loss: 0.004664566367864609 2023-01-22 13:40:35.455265: step: 370/463, loss: 0.0002563406014814973 2023-01-22 13:40:35.996644: step: 372/463, loss: 3.374091102159582e-05 2023-01-22 13:40:36.620098: step: 374/463, loss: 0.007033093832433224 2023-01-22 13:40:37.184745: step: 376/463, loss: 0.0037258719094097614 2023-01-22 13:40:37.816416: step: 378/463, loss: 0.002449818653985858 2023-01-22 13:40:38.418642: step: 380/463, loss: 0.010926024988293648 2023-01-22 13:40:39.048701: step: 382/463, loss: 0.0014149992493912578 2023-01-22 13:40:39.646997: step: 384/463, loss: 0.06429838389158249 2023-01-22 13:40:40.261203: step: 386/463, loss: 0.00046093412674963474 2023-01-22 13:40:40.892535: step: 388/463, loss: 0.0019565694965422153 2023-01-22 13:40:41.372077: step: 390/463, loss: 0.0019051478011533618 2023-01-22 13:40:41.956718: step: 392/463, loss: 0.008902883157134056 2023-01-22 13:40:42.556659: step: 394/463, loss: 0.006219537928700447 2023-01-22 13:40:43.172659: step: 396/463, loss: 0.10888869315385818 2023-01-22 13:40:43.772828: step: 398/463, loss: 0.016297919675707817 2023-01-22 13:40:44.391376: step: 400/463, loss: 0.008376400917768478 2023-01-22 13:40:45.054143: step: 402/463, loss: 0.007794746197760105 2023-01-22 13:40:45.695210: step: 404/463, loss: 0.015515094622969627 2023-01-22 13:40:46.274776: step: 406/463, loss: 0.0009189951815642416 2023-01-22 13:40:46.932694: step: 408/463, loss: 0.021026665344834328 2023-01-22 13:40:47.470328: step: 410/463, loss: 9.61375844781287e-05 2023-01-22 13:40:48.129701: step: 412/463, loss: 0.012185735628008842 2023-01-22 13:40:48.708244: step: 414/463, loss: 0.06111254170536995 2023-01-22 13:40:49.339373: step: 416/463, loss: 0.0015753593761473894 2023-01-22 13:40:49.922143: step: 418/463, loss: 0.00046376415411941707 2023-01-22 13:40:50.508518: step: 420/463, loss: 0.004102764185518026 2023-01-22 13:40:51.102731: step: 422/463, loss: 0.0336264967918396 2023-01-22 13:40:51.759222: step: 424/463, loss: 0.006779925897717476 2023-01-22 13:40:52.380040: step: 426/463, loss: 0.0041001299396157265 2023-01-22 13:40:53.073370: step: 428/463, loss: 0.0030108659993857145 2023-01-22 13:40:53.707160: step: 430/463, loss: 0.08740600198507309 2023-01-22 13:40:54.298864: step: 432/463, loss: 0.014774695038795471 2023-01-22 13:40:54.906364: step: 434/463, loss: 0.006550933234393597 2023-01-22 13:40:55.562990: step: 436/463, loss: 0.05794130265712738 2023-01-22 13:40:56.159905: step: 438/463, loss: 6.887897325213999e-05 2023-01-22 13:40:56.744912: step: 440/463, loss: 0.01628354750573635 2023-01-22 13:40:57.370865: step: 442/463, loss: 0.07845579087734222 2023-01-22 13:40:58.053137: step: 444/463, loss: 1.5991721738828346e-05 2023-01-22 13:40:58.639741: step: 446/463, loss: 1.4149316484690644e-05 2023-01-22 13:40:59.211580: step: 448/463, loss: 0.009916380047798157 2023-01-22 13:40:59.772435: step: 450/463, loss: 0.005335419438779354 2023-01-22 13:41:00.365323: step: 452/463, loss: 0.00026734109269455075 2023-01-22 13:41:00.912609: step: 454/463, loss: 0.016516294330358505 2023-01-22 13:41:01.481505: step: 456/463, loss: 0.0015434387605637312 2023-01-22 13:41:02.110229: step: 458/463, loss: 0.01522710919380188 2023-01-22 13:41:02.737106: step: 460/463, loss: 0.00010683065193006769 2023-01-22 13:41:03.316526: step: 462/463, loss: 0.0008388441638089716 2023-01-22 13:41:03.925719: step: 464/463, loss: 7.75800144765526e-05 2023-01-22 13:41:04.493232: step: 466/463, loss: 0.146086648106575 2023-01-22 13:41:05.051557: step: 468/463, loss: 0.0059030367992818356 2023-01-22 13:41:05.693891: step: 470/463, loss: 0.00972730852663517 2023-01-22 13:41:06.263701: step: 472/463, loss: 0.031643953174352646 2023-01-22 13:41:06.907077: step: 474/463, loss: 0.0016331018414348364 2023-01-22 13:41:07.528298: step: 476/463, loss: 0.0006400145939551294 2023-01-22 13:41:08.152250: step: 478/463, loss: 0.00296087097376585 2023-01-22 13:41:08.769045: step: 480/463, loss: 0.004904331639409065 2023-01-22 13:41:09.406103: step: 482/463, loss: 0.12728621065616608 2023-01-22 13:41:09.979299: step: 484/463, loss: 1.8096621715812944e-05 2023-01-22 13:41:10.675856: step: 486/463, loss: 0.0012359794927760959 2023-01-22 13:41:11.277226: step: 488/463, loss: 0.0007239349070005119 2023-01-22 13:41:11.882665: step: 490/463, loss: 0.006451805587857962 2023-01-22 13:41:12.452137: step: 492/463, loss: 0.00978158786892891 2023-01-22 13:41:13.049334: step: 494/463, loss: 0.049171071499586105 2023-01-22 13:41:13.597088: step: 496/463, loss: 0.01482637319713831 2023-01-22 13:41:14.186872: step: 498/463, loss: 0.014235462993383408 2023-01-22 13:41:14.816623: step: 500/463, loss: 0.03949636220932007 2023-01-22 13:41:15.407391: step: 502/463, loss: 0.00029810681007802486 2023-01-22 13:41:16.049136: step: 504/463, loss: 0.08927298337221146 2023-01-22 13:41:16.642187: step: 506/463, loss: 0.0010077450424432755 2023-01-22 13:41:17.238102: step: 508/463, loss: 0.0011717199813574553 2023-01-22 13:41:17.875634: step: 510/463, loss: 0.002724254271015525 2023-01-22 13:41:18.517209: step: 512/463, loss: 0.00045990533544681966 2023-01-22 13:41:19.173903: step: 514/463, loss: 0.007768549956381321 2023-01-22 13:41:19.804887: step: 516/463, loss: 0.0007777441060170531 2023-01-22 13:41:20.425073: step: 518/463, loss: 0.025105273351073265 2023-01-22 13:41:21.064499: step: 520/463, loss: 2.189960468967911e-05 2023-01-22 13:41:21.655824: step: 522/463, loss: 0.0006834982195869088 2023-01-22 13:41:22.236706: step: 524/463, loss: 0.08271333575248718 2023-01-22 13:41:22.897940: step: 526/463, loss: 0.021871773526072502 2023-01-22 13:41:23.490771: step: 528/463, loss: 0.0001770402886904776 2023-01-22 13:41:24.084449: step: 530/463, loss: 1.3111066436977126e-05 2023-01-22 13:41:24.757027: step: 532/463, loss: 0.3096505105495453 2023-01-22 13:41:25.389002: step: 534/463, loss: 0.0037230215966701508 2023-01-22 13:41:26.053190: step: 536/463, loss: 0.0009449947974644601 2023-01-22 13:41:26.691602: step: 538/463, loss: 0.0023832169827073812 2023-01-22 13:41:27.315598: step: 540/463, loss: 0.008003924041986465 2023-01-22 13:41:27.893437: step: 542/463, loss: 0.00026716661523096263 2023-01-22 13:41:28.529387: step: 544/463, loss: 0.025845911353826523 2023-01-22 13:41:29.109440: step: 546/463, loss: 0.005311736837029457 2023-01-22 13:41:29.695224: step: 548/463, loss: 0.006617727689445019 2023-01-22 13:41:30.281383: step: 550/463, loss: 0.015924856066703796 2023-01-22 13:41:30.885792: step: 552/463, loss: 0.00019956158939749002 2023-01-22 13:41:31.480912: step: 554/463, loss: 0.007889360189437866 2023-01-22 13:41:32.092525: step: 556/463, loss: 0.0014797173207625747 2023-01-22 13:41:32.770404: step: 558/463, loss: 0.0015090981032699347 2023-01-22 13:41:33.425880: step: 560/463, loss: 0.00020871504966635257 2023-01-22 13:41:34.035267: step: 562/463, loss: 0.007421551272273064 2023-01-22 13:41:34.601659: step: 564/463, loss: 0.01180185191333294 2023-01-22 13:41:35.246852: step: 566/463, loss: 0.0035408809781074524 2023-01-22 13:41:35.855793: step: 568/463, loss: 0.00014776161697227508 2023-01-22 13:41:36.410403: step: 570/463, loss: 0.013659101910889149 2023-01-22 13:41:36.958291: step: 572/463, loss: 0.024044468998908997 2023-01-22 13:41:37.593467: step: 574/463, loss: 0.0314486064016819 2023-01-22 13:41:38.146300: step: 576/463, loss: 0.0028053412679582834 2023-01-22 13:41:38.743901: step: 578/463, loss: 0.0020871474407613277 2023-01-22 13:41:39.370617: step: 580/463, loss: 0.005823214538395405 2023-01-22 13:41:39.944611: step: 582/463, loss: 0.011518976651132107 2023-01-22 13:41:40.557004: step: 584/463, loss: 0.016073133796453476 2023-01-22 13:41:41.098573: step: 586/463, loss: 0.0047295973636209965 2023-01-22 13:41:41.691404: step: 588/463, loss: 0.005530184600502253 2023-01-22 13:41:42.261584: step: 590/463, loss: 0.004026345908641815 2023-01-22 13:41:42.871756: step: 592/463, loss: 0.013685299083590508 2023-01-22 13:41:43.481902: step: 594/463, loss: 0.015032644383609295 2023-01-22 13:41:44.075006: step: 596/463, loss: 0.0006871359655633569 2023-01-22 13:41:44.742271: step: 598/463, loss: 0.007380248513072729 2023-01-22 13:41:45.317921: step: 600/463, loss: 0.0011174535611644387 2023-01-22 13:41:45.895790: step: 602/463, loss: 0.001170499948784709 2023-01-22 13:41:46.469569: step: 604/463, loss: 0.014355262741446495 2023-01-22 13:41:47.102351: step: 606/463, loss: 0.02323991246521473 2023-01-22 13:41:47.687346: step: 608/463, loss: 7.946568075567484e-06 2023-01-22 13:41:48.299450: step: 610/463, loss: 0.07203865796327591 2023-01-22 13:41:48.905237: step: 612/463, loss: 0.007512159179896116 2023-01-22 13:41:49.543874: step: 614/463, loss: 0.002797973807901144 2023-01-22 13:41:50.161841: step: 616/463, loss: 0.0006113756680861115 2023-01-22 13:41:50.824604: step: 618/463, loss: 0.02156316675245762 2023-01-22 13:41:51.433716: step: 620/463, loss: 0.02305697463452816 2023-01-22 13:41:52.029236: step: 622/463, loss: 0.015090367756783962 2023-01-22 13:41:52.646300: step: 624/463, loss: 2.848814256140031e-05 2023-01-22 13:41:53.232070: step: 626/463, loss: 0.009909285232424736 2023-01-22 13:41:53.865707: step: 628/463, loss: 0.0017538886750116944 2023-01-22 13:41:54.458630: step: 630/463, loss: 0.04216102510690689 2023-01-22 13:41:55.099741: step: 632/463, loss: 0.008701596409082413 2023-01-22 13:41:55.691381: step: 634/463, loss: 0.023315465077757835 2023-01-22 13:41:56.289919: step: 636/463, loss: 0.0005711687263101339 2023-01-22 13:41:56.876619: step: 638/463, loss: 0.0016999803483486176 2023-01-22 13:41:57.509454: step: 640/463, loss: 0.008287470787763596 2023-01-22 13:41:58.155492: step: 642/463, loss: 0.0002698024036362767 2023-01-22 13:41:58.759166: step: 644/463, loss: 0.010481531731784344 2023-01-22 13:41:59.324417: step: 646/463, loss: 0.03454779461026192 2023-01-22 13:41:59.901441: step: 648/463, loss: 0.016674399375915527 2023-01-22 13:42:00.509922: step: 650/463, loss: 0.000723723613191396 2023-01-22 13:42:01.122167: step: 652/463, loss: 0.008157163858413696 2023-01-22 13:42:01.720636: step: 654/463, loss: 0.014090480282902718 2023-01-22 13:42:02.303147: step: 656/463, loss: 0.019488925114274025 2023-01-22 13:42:02.973025: step: 658/463, loss: 0.019174255430698395 2023-01-22 13:42:03.614386: step: 660/463, loss: 0.010555608198046684 2023-01-22 13:42:04.237394: step: 662/463, loss: 0.0009650069405324757 2023-01-22 13:42:04.828616: step: 664/463, loss: 0.002380374586209655 2023-01-22 13:42:05.408845: step: 666/463, loss: 0.03403887525200844 2023-01-22 13:42:06.039893: step: 668/463, loss: 0.011219196021556854 2023-01-22 13:42:06.655455: step: 670/463, loss: 0.017417466267943382 2023-01-22 13:42:07.310829: step: 672/463, loss: 2.057253550447058e-05 2023-01-22 13:42:07.961017: step: 674/463, loss: 0.06781242787837982 2023-01-22 13:42:08.573942: step: 676/463, loss: 0.028085824102163315 2023-01-22 13:42:09.217625: step: 678/463, loss: 0.002456174697726965 2023-01-22 13:42:09.756616: step: 680/463, loss: 0.0015936418203637004 2023-01-22 13:42:10.366961: step: 682/463, loss: 0.0032691669184714556 2023-01-22 13:42:11.040510: step: 684/463, loss: 0.23696757853031158 2023-01-22 13:42:11.683377: step: 686/463, loss: 0.019505681470036507 2023-01-22 13:42:12.265261: step: 688/463, loss: 0.0034016871359199286 2023-01-22 13:42:12.836894: step: 690/463, loss: 0.024251684546470642 2023-01-22 13:42:13.435732: step: 692/463, loss: 0.08296268433332443 2023-01-22 13:42:14.006173: step: 694/463, loss: 0.0063774497248232365 2023-01-22 13:42:14.637332: step: 696/463, loss: 0.0012224129168316722 2023-01-22 13:42:15.195221: step: 698/463, loss: 0.016882825642824173 2023-01-22 13:42:15.873412: step: 700/463, loss: 0.03692098334431648 2023-01-22 13:42:16.474526: step: 702/463, loss: 0.0530284084379673 2023-01-22 13:42:17.060869: step: 704/463, loss: 0.0035500871017575264 2023-01-22 13:42:17.662581: step: 706/463, loss: 0.0025282411370426416 2023-01-22 13:42:18.212974: step: 708/463, loss: 0.0020579856354743242 2023-01-22 13:42:18.802042: step: 710/463, loss: 0.015920987352728844 2023-01-22 13:42:19.407907: step: 712/463, loss: 0.014441859908401966 2023-01-22 13:42:20.186876: step: 714/463, loss: 0.006769892759621143 2023-01-22 13:42:20.770372: step: 716/463, loss: 0.043161630630493164 2023-01-22 13:42:21.343575: step: 718/463, loss: 0.001978771761059761 2023-01-22 13:42:21.984696: step: 720/463, loss: 0.02008512243628502 2023-01-22 13:42:22.628319: step: 722/463, loss: 0.009537681937217712 2023-01-22 13:42:23.241371: step: 724/463, loss: 0.0003638060879893601 2023-01-22 13:42:23.888485: step: 726/463, loss: 0.005339528433978558 2023-01-22 13:42:24.440314: step: 728/463, loss: 0.003388767596334219 2023-01-22 13:42:25.013313: step: 730/463, loss: 0.0038376867305487394 2023-01-22 13:42:25.608559: step: 732/463, loss: 0.06787408888339996 2023-01-22 13:42:26.199299: step: 734/463, loss: 0.010417459532618523 2023-01-22 13:42:26.808422: step: 736/463, loss: 0.01652175560593605 2023-01-22 13:42:27.312065: step: 738/463, loss: 0.005788207985460758 2023-01-22 13:42:27.881951: step: 740/463, loss: 0.0028158684726804495 2023-01-22 13:42:28.493156: step: 742/463, loss: 0.059458520263433456 2023-01-22 13:42:29.088988: step: 744/463, loss: 5.446530394692672e-06 2023-01-22 13:42:29.699974: step: 746/463, loss: 0.03516753390431404 2023-01-22 13:42:30.322029: step: 748/463, loss: 0.013525101356208324 2023-01-22 13:42:30.977377: step: 750/463, loss: 0.06270499527454376 2023-01-22 13:42:31.617254: step: 752/463, loss: 0.00106957764364779 2023-01-22 13:42:32.163656: step: 754/463, loss: 0.04263201355934143 2023-01-22 13:42:32.770203: step: 756/463, loss: 0.03356302157044411 2023-01-22 13:42:33.413808: step: 758/463, loss: 0.012440657243132591 2023-01-22 13:42:34.044322: step: 760/463, loss: 0.029967335984110832 2023-01-22 13:42:34.705581: step: 762/463, loss: 0.01575998030602932 2023-01-22 13:42:35.324067: step: 764/463, loss: 0.006210016552358866 2023-01-22 13:42:35.938367: step: 766/463, loss: 0.010283583775162697 2023-01-22 13:42:36.559073: step: 768/463, loss: 0.018541868776082993 2023-01-22 13:42:37.189022: step: 770/463, loss: 0.01182506326586008 2023-01-22 13:42:37.827735: step: 772/463, loss: 0.002215802436694503 2023-01-22 13:42:38.448528: step: 774/463, loss: 0.0406578928232193 2023-01-22 13:42:39.058227: step: 776/463, loss: 0.07028999924659729 2023-01-22 13:42:39.644351: step: 778/463, loss: 0.0005066748126409948 2023-01-22 13:42:40.251059: step: 780/463, loss: 0.0019525624811649323 2023-01-22 13:42:40.870059: step: 782/463, loss: 0.01686103083193302 2023-01-22 13:42:41.520251: step: 784/463, loss: 0.17171694338321686 2023-01-22 13:42:42.141508: step: 786/463, loss: 0.004957855213433504 2023-01-22 13:42:42.722839: step: 788/463, loss: 0.004850171972066164 2023-01-22 13:42:43.298028: step: 790/463, loss: 0.0010972366435453296 2023-01-22 13:42:43.864127: step: 792/463, loss: 0.0025384726468473673 2023-01-22 13:42:44.500096: step: 794/463, loss: 0.011010677553713322 2023-01-22 13:42:45.144129: step: 796/463, loss: 0.07036984711885452 2023-01-22 13:42:45.786815: step: 798/463, loss: 0.0030289096757769585 2023-01-22 13:42:46.408012: step: 800/463, loss: 0.003858304815366864 2023-01-22 13:42:46.994753: step: 802/463, loss: 0.0008055354119278491 2023-01-22 13:42:47.582900: step: 804/463, loss: 0.0007819096790626645 2023-01-22 13:42:48.165636: step: 806/463, loss: 0.01665329560637474 2023-01-22 13:42:48.796836: step: 808/463, loss: 0.028631258755922318 2023-01-22 13:42:49.528015: step: 810/463, loss: 0.24964243173599243 2023-01-22 13:42:50.156246: step: 812/463, loss: 0.13243399560451508 2023-01-22 13:42:50.848227: step: 814/463, loss: 0.006908778566867113 2023-01-22 13:42:51.410279: step: 816/463, loss: 0.2623399794101715 2023-01-22 13:42:51.946383: step: 818/463, loss: 0.007665853947401047 2023-01-22 13:42:52.539681: step: 820/463, loss: 0.001188281225040555 2023-01-22 13:42:53.153228: step: 822/463, loss: 0.04388278350234032 2023-01-22 13:42:53.798378: step: 824/463, loss: 0.010815965943038464 2023-01-22 13:42:54.430585: step: 826/463, loss: 0.0022514716256409883 2023-01-22 13:42:55.011469: step: 828/463, loss: 0.010159856639802456 2023-01-22 13:42:55.605442: step: 830/463, loss: 0.000980306533165276 2023-01-22 13:42:56.204597: step: 832/463, loss: 0.025764310732483864 2023-01-22 13:42:56.809133: step: 834/463, loss: 0.0004008069809060544 2023-01-22 13:42:57.395953: step: 836/463, loss: 0.007156014908105135 2023-01-22 13:42:58.010033: step: 838/463, loss: 0.002076355740427971 2023-01-22 13:42:58.567500: step: 840/463, loss: 0.0010596619686111808 2023-01-22 13:42:59.125419: step: 842/463, loss: 0.007150654215365648 2023-01-22 13:42:59.781672: step: 844/463, loss: 0.0032344022765755653 2023-01-22 13:43:00.360649: step: 846/463, loss: 0.00041566186700947583 2023-01-22 13:43:00.928293: step: 848/463, loss: 0.0049224658869206905 2023-01-22 13:43:01.569190: step: 850/463, loss: 0.0395309180021286 2023-01-22 13:43:02.148662: step: 852/463, loss: 0.03558933734893799 2023-01-22 13:43:02.762947: step: 854/463, loss: 0.005729232914745808 2023-01-22 13:43:03.433197: step: 856/463, loss: 0.00038364113424904644 2023-01-22 13:43:04.020616: step: 858/463, loss: 0.0055260444059967995 2023-01-22 13:43:04.661851: step: 860/463, loss: 0.014664572663605213 2023-01-22 13:43:05.259914: step: 862/463, loss: 0.0016391221433877945 2023-01-22 13:43:05.862749: step: 864/463, loss: 0.011106476187705994 2023-01-22 13:43:06.499205: step: 866/463, loss: 0.04988894239068031 2023-01-22 13:43:07.142756: step: 868/463, loss: 0.0012667113915085793 2023-01-22 13:43:07.755029: step: 870/463, loss: 0.014364737085998058 2023-01-22 13:43:08.361711: step: 872/463, loss: 0.0052583059296011925 2023-01-22 13:43:08.972373: step: 874/463, loss: 0.0027395517099648714 2023-01-22 13:43:09.610835: step: 876/463, loss: 1.0095703601837158 2023-01-22 13:43:10.208074: step: 878/463, loss: 0.017347924411296844 2023-01-22 13:43:10.839349: step: 880/463, loss: 0.003989678807556629 2023-01-22 13:43:11.556800: step: 882/463, loss: 0.1823144406080246 2023-01-22 13:43:12.161099: step: 884/463, loss: 0.04542887583374977 2023-01-22 13:43:12.853506: step: 886/463, loss: 0.006405098829418421 2023-01-22 13:43:13.561752: step: 888/463, loss: 0.18624812364578247 2023-01-22 13:43:14.144509: step: 890/463, loss: 0.012357478961348534 2023-01-22 13:43:14.750846: step: 892/463, loss: 0.009346227161586285 2023-01-22 13:43:15.394003: step: 894/463, loss: 0.043834686279296875 2023-01-22 13:43:15.957569: step: 896/463, loss: 0.00037754818913526833 2023-01-22 13:43:16.529622: step: 898/463, loss: 0.0030709875281900167 2023-01-22 13:43:17.119046: step: 900/463, loss: 0.001151008065789938 2023-01-22 13:43:17.693602: step: 902/463, loss: 0.0017272848635911942 2023-01-22 13:43:18.254576: step: 904/463, loss: 0.004733656998723745 2023-01-22 13:43:18.873224: step: 906/463, loss: 0.03464411944150925 2023-01-22 13:43:19.443275: step: 908/463, loss: 0.0004434540751390159 2023-01-22 13:43:20.077131: step: 910/463, loss: 0.02998579852283001 2023-01-22 13:43:20.623238: step: 912/463, loss: 0.0012972571421414614 2023-01-22 13:43:21.216699: step: 914/463, loss: 0.0006090668612159789 2023-01-22 13:43:21.888665: step: 916/463, loss: 0.01941867731511593 2023-01-22 13:43:22.451774: step: 918/463, loss: 0.18417759239673615 2023-01-22 13:43:23.021297: step: 920/463, loss: 0.0005059108952991664 2023-01-22 13:43:23.680975: step: 922/463, loss: 2.426903009414673 2023-01-22 13:43:24.302731: step: 924/463, loss: 0.010100876912474632 2023-01-22 13:43:24.830651: step: 926/463, loss: 0.0012141040060669184 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3436934447692121, 'r': 0.3326065594540762, 'f1': 0.3380591260025037}, 'combined': 0.24909619810710798, 'epoch': 37} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.356656135590201, 'r': 0.4012381525389761, 'f1': 0.37763590827197757}, 'combined': 0.29271300067014533, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30336229378156143, 'r': 0.3448083756644313, 'f1': 0.32276023796652803}, 'combined': 0.23782333323849433, 'epoch': 37} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33829673626778184, 'r': 0.4054585883209444, 'f1': 0.368845270980926}, 'combined': 0.28589920525794266, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30526882408289563, 'r': 0.33828651473322774, 'f1': 0.32093068094403426}, 'combined': 0.23647523859034103, 'epoch': 37} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33810565592825775, 'r': 0.395906806665993, 'f1': 0.364730402754107}, 'combined': 0.2827096901730399, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2558139534883721, 'r': 0.3142857142857143, 'f1': 0.28205128205128205}, 'combined': 0.18803418803418803, 'epoch': 37} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.27941176470588236, 'r': 0.41304347826086957, 'f1': 0.33333333333333337}, 'combined': 0.16666666666666669, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.39473684210526316, 'r': 0.25862068965517243, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:46:00.813941: step: 2/463, loss: 0.0003576675371732563 2023-01-22 13:46:01.455532: step: 4/463, loss: 0.6663461923599243 2023-01-22 13:46:02.118290: step: 6/463, loss: 0.005348617676645517 2023-01-22 13:46:02.765162: step: 8/463, loss: 0.0052065420895814896 2023-01-22 13:46:03.378823: step: 10/463, loss: 0.002165149664506316 2023-01-22 13:46:04.045999: step: 12/463, loss: 0.02910236269235611 2023-01-22 13:46:04.579250: step: 14/463, loss: 0.0011701653711497784 2023-01-22 13:46:05.212342: step: 16/463, loss: 0.007246619090437889 2023-01-22 13:46:05.820272: step: 18/463, loss: 0.0319894403219223 2023-01-22 13:46:06.442721: step: 20/463, loss: 0.00046730050235055387 2023-01-22 13:46:07.079046: step: 22/463, loss: 0.0020132004283368587 2023-01-22 13:46:07.726944: step: 24/463, loss: 0.003372333711013198 2023-01-22 13:46:08.301640: step: 26/463, loss: 0.009564385749399662 2023-01-22 13:46:08.890053: step: 28/463, loss: 0.013140233233571053 2023-01-22 13:46:09.432702: step: 30/463, loss: 0.001348411082290113 2023-01-22 13:46:10.026021: step: 32/463, loss: 0.0014122524298727512 2023-01-22 13:46:10.581153: step: 34/463, loss: 0.0008026689174585044 2023-01-22 13:46:11.206182: step: 36/463, loss: 0.0028248915914446115 2023-01-22 13:46:11.802676: step: 38/463, loss: 0.0075436849147081375 2023-01-22 13:46:12.438652: step: 40/463, loss: 0.005117383785545826 2023-01-22 13:46:13.001659: step: 42/463, loss: 0.006541636306792498 2023-01-22 13:46:13.648023: step: 44/463, loss: 0.011812773533165455 2023-01-22 13:46:14.210353: step: 46/463, loss: 0.03619756922125816 2023-01-22 13:46:14.805198: step: 48/463, loss: 0.0041055078618228436 2023-01-22 13:46:15.432484: step: 50/463, loss: 8.513357897754759e-05 2023-01-22 13:46:16.026876: step: 52/463, loss: 0.04997166246175766 2023-01-22 13:46:16.681629: step: 54/463, loss: 0.03975410386919975 2023-01-22 13:46:17.258635: step: 56/463, loss: 0.0003791792260017246 2023-01-22 13:46:17.935536: step: 58/463, loss: 0.03447412699460983 2023-01-22 13:46:18.582378: step: 60/463, loss: 0.010324408300220966 2023-01-22 13:46:19.102237: step: 62/463, loss: 0.013240799307823181 2023-01-22 13:46:19.722708: step: 64/463, loss: 0.006249695084989071 2023-01-22 13:46:20.377568: step: 66/463, loss: 0.025303052738308907 2023-01-22 13:46:20.915098: step: 68/463, loss: 0.0006516952998936176 2023-01-22 13:46:21.483416: step: 70/463, loss: 0.06753762066364288 2023-01-22 13:46:22.130140: step: 72/463, loss: 0.01517146173864603 2023-01-22 13:46:22.743292: step: 74/463, loss: 0.0010519068455323577 2023-01-22 13:46:23.323139: step: 76/463, loss: 0.007192879915237427 2023-01-22 13:46:23.953860: step: 78/463, loss: 0.05849984660744667 2023-01-22 13:46:24.600081: step: 80/463, loss: 0.012203852646052837 2023-01-22 13:46:25.176004: step: 82/463, loss: 0.0004563910770229995 2023-01-22 13:46:25.803589: step: 84/463, loss: 0.006087826564908028 2023-01-22 13:46:26.349751: step: 86/463, loss: 0.01261174213141203 2023-01-22 13:46:26.924152: step: 88/463, loss: 0.0009724133997224271 2023-01-22 13:46:27.525598: step: 90/463, loss: 0.0004132771282456815 2023-01-22 13:46:28.097895: step: 92/463, loss: 0.01645635813474655 2023-01-22 13:46:28.722061: step: 94/463, loss: 0.028350112959742546 2023-01-22 13:46:29.337712: step: 96/463, loss: 0.0012911633821204305 2023-01-22 13:46:29.984163: step: 98/463, loss: 0.0131125058978796 2023-01-22 13:46:30.590560: step: 100/463, loss: 0.00519681116566062 2023-01-22 13:46:31.219143: step: 102/463, loss: 0.03608179837465286 2023-01-22 13:46:31.794162: step: 104/463, loss: 0.011656703427433968 2023-01-22 13:46:32.377343: step: 106/463, loss: 0.0002910309995058924 2023-01-22 13:46:33.030329: step: 108/463, loss: 0.0004622248816303909 2023-01-22 13:46:33.718520: step: 110/463, loss: 0.009065485559403896 2023-01-22 13:46:34.344827: step: 112/463, loss: 0.002514266176149249 2023-01-22 13:46:34.977168: step: 114/463, loss: 0.00034480978501960635 2023-01-22 13:46:35.615303: step: 116/463, loss: 0.003990465309470892 2023-01-22 13:46:36.212874: step: 118/463, loss: 0.0036008465103805065 2023-01-22 13:46:36.803483: step: 120/463, loss: 0.006166631355881691 2023-01-22 13:46:37.416988: step: 122/463, loss: 4.63170072180219e-05 2023-01-22 13:46:38.029813: step: 124/463, loss: 0.007947270758450031 2023-01-22 13:46:38.650139: step: 126/463, loss: 0.010826355777680874 2023-01-22 13:46:39.219477: step: 128/463, loss: 0.016946762800216675 2023-01-22 13:46:39.810291: step: 130/463, loss: 0.004641765728592873 2023-01-22 13:46:40.450871: step: 132/463, loss: 0.005628272425383329 2023-01-22 13:46:41.083335: step: 134/463, loss: 0.0003984539653174579 2023-01-22 13:46:41.747899: step: 136/463, loss: 0.005691876634955406 2023-01-22 13:46:42.340113: step: 138/463, loss: 0.001149071496911347 2023-01-22 13:46:42.968113: step: 140/463, loss: 0.019951999187469482 2023-01-22 13:46:43.550820: step: 142/463, loss: 0.00044942894601263106 2023-01-22 13:46:44.173792: step: 144/463, loss: 0.00596737302839756 2023-01-22 13:46:44.731272: step: 146/463, loss: 0.01373070478439331 2023-01-22 13:46:45.381401: step: 148/463, loss: 0.006822957657277584 2023-01-22 13:46:46.045118: step: 150/463, loss: 6.739966192981228e-05 2023-01-22 13:46:46.652991: step: 152/463, loss: 0.005839567165821791 2023-01-22 13:46:47.168867: step: 154/463, loss: 5.105665331939235e-05 2023-01-22 13:46:47.838605: step: 156/463, loss: 0.017809750512242317 2023-01-22 13:46:48.401663: step: 158/463, loss: 0.013083020225167274 2023-01-22 13:46:48.951058: step: 160/463, loss: 0.0032974849455058575 2023-01-22 13:46:49.552841: step: 162/463, loss: 0.01070482563227415 2023-01-22 13:46:50.082494: step: 164/463, loss: 0.00032536074286326766 2023-01-22 13:46:50.648463: step: 166/463, loss: 0.04124321788549423 2023-01-22 13:46:51.212521: step: 168/463, loss: 0.0017832282464951277 2023-01-22 13:46:51.808680: step: 170/463, loss: 0.0022127775009721518 2023-01-22 13:46:52.414605: step: 172/463, loss: 0.001772384624928236 2023-01-22 13:46:52.926817: step: 174/463, loss: 0.0027756420895457268 2023-01-22 13:46:53.536471: step: 176/463, loss: 0.0034369942732155323 2023-01-22 13:46:54.169951: step: 178/463, loss: 0.01690688170492649 2023-01-22 13:46:54.764000: step: 180/463, loss: 0.027102582156658173 2023-01-22 13:46:55.436759: step: 182/463, loss: 0.013270555064082146 2023-01-22 13:46:56.013557: step: 184/463, loss: 0.002121496247127652 2023-01-22 13:46:56.646783: step: 186/463, loss: 0.00016423201304860413 2023-01-22 13:46:57.394847: step: 188/463, loss: 0.0037703884299844503 2023-01-22 13:46:57.986456: step: 190/463, loss: 0.008827411569654942 2023-01-22 13:46:58.649649: step: 192/463, loss: 0.027613524347543716 2023-01-22 13:46:59.312237: step: 194/463, loss: 0.008662181906402111 2023-01-22 13:46:59.932814: step: 196/463, loss: 0.09055357426404953 2023-01-22 13:47:00.548294: step: 198/463, loss: 0.017377590760588646 2023-01-22 13:47:01.178187: step: 200/463, loss: 7.32247790438123e-05 2023-01-22 13:47:01.782499: step: 202/463, loss: 0.0007087164558470249 2023-01-22 13:47:02.437992: step: 204/463, loss: 0.317324161529541 2023-01-22 13:47:03.022442: step: 206/463, loss: 0.0005053650238551199 2023-01-22 13:47:03.571154: step: 208/463, loss: 0.0010837082518264651 2023-01-22 13:47:04.154111: step: 210/463, loss: 0.0062217446975409985 2023-01-22 13:47:04.782481: step: 212/463, loss: 0.00924688670784235 2023-01-22 13:47:05.387950: step: 214/463, loss: 0.002303719986230135 2023-01-22 13:47:05.966255: step: 216/463, loss: 0.012535372748970985 2023-01-22 13:47:06.532325: step: 218/463, loss: 0.02805422618985176 2023-01-22 13:47:07.098624: step: 220/463, loss: 0.007259115111082792 2023-01-22 13:47:07.695663: step: 222/463, loss: 0.024462362751364708 2023-01-22 13:47:08.215310: step: 224/463, loss: 0.0010564392432570457 2023-01-22 13:47:08.781596: step: 226/463, loss: 0.02273215539753437 2023-01-22 13:47:09.353044: step: 228/463, loss: 0.10344050824642181 2023-01-22 13:47:09.942390: step: 230/463, loss: 0.11630513519048691 2023-01-22 13:47:10.550496: step: 232/463, loss: 0.00038182278512977064 2023-01-22 13:47:11.144476: step: 234/463, loss: 0.0013521601213142276 2023-01-22 13:47:11.789657: step: 236/463, loss: 0.06642608344554901 2023-01-22 13:47:12.431017: step: 238/463, loss: 0.02880418486893177 2023-01-22 13:47:13.068652: step: 240/463, loss: 0.015984902158379555 2023-01-22 13:47:13.676233: step: 242/463, loss: 0.047216299921274185 2023-01-22 13:47:14.367630: step: 244/463, loss: 0.0004027021932415664 2023-01-22 13:47:15.091597: step: 246/463, loss: 0.011578534729778767 2023-01-22 13:47:15.712069: step: 248/463, loss: 0.013928024098277092 2023-01-22 13:47:16.325390: step: 250/463, loss: 0.005497732665389776 2023-01-22 13:47:16.909653: step: 252/463, loss: 0.0004778372240252793 2023-01-22 13:47:17.520975: step: 254/463, loss: 0.0038341470062732697 2023-01-22 13:47:18.117146: step: 256/463, loss: 0.01212337240576744 2023-01-22 13:47:18.786810: step: 258/463, loss: 0.0110453050583601 2023-01-22 13:47:19.381669: step: 260/463, loss: 0.006752063054591417 2023-01-22 13:47:19.934670: step: 262/463, loss: 0.0001130100354203023 2023-01-22 13:47:20.521394: step: 264/463, loss: 0.0005019967793487012 2023-01-22 13:47:21.088002: step: 266/463, loss: 0.01084559503942728 2023-01-22 13:47:21.673939: step: 268/463, loss: 0.00031250284519046545 2023-01-22 13:47:22.305796: step: 270/463, loss: 0.023255877196788788 2023-01-22 13:47:22.921353: step: 272/463, loss: 0.008008042350411415 2023-01-22 13:47:23.436573: step: 274/463, loss: 0.026028001680970192 2023-01-22 13:47:24.051748: step: 276/463, loss: 0.034775178879499435 2023-01-22 13:47:24.637733: step: 278/463, loss: 0.06590401381254196 2023-01-22 13:47:25.286118: step: 280/463, loss: 0.009529476054012775 2023-01-22 13:47:25.910226: step: 282/463, loss: 0.0030940070282667875 2023-01-22 13:47:26.547503: step: 284/463, loss: 0.0076924762688577175 2023-01-22 13:47:27.200937: step: 286/463, loss: 0.0007767279748804867 2023-01-22 13:47:27.762019: step: 288/463, loss: 0.31206226348876953 2023-01-22 13:47:28.353073: step: 290/463, loss: 0.004208110738545656 2023-01-22 13:47:28.933802: step: 292/463, loss: 0.03561747819185257 2023-01-22 13:47:29.612962: step: 294/463, loss: 0.0025247172452509403 2023-01-22 13:47:30.164186: step: 296/463, loss: 0.02131984755396843 2023-01-22 13:47:30.768102: step: 298/463, loss: 0.0734555572271347 2023-01-22 13:47:31.403988: step: 300/463, loss: 0.02882952056825161 2023-01-22 13:47:32.044775: step: 302/463, loss: 0.0009613363072276115 2023-01-22 13:47:32.642885: step: 304/463, loss: 0.0318412110209465 2023-01-22 13:47:33.211210: step: 306/463, loss: 0.0057764495722949505 2023-01-22 13:47:33.771453: step: 308/463, loss: 0.003803574712947011 2023-01-22 13:47:34.332052: step: 310/463, loss: 0.02247965894639492 2023-01-22 13:47:34.926759: step: 312/463, loss: 0.00042657507583498955 2023-01-22 13:47:35.535494: step: 314/463, loss: 0.008545217104256153 2023-01-22 13:47:36.200229: step: 316/463, loss: 0.0020248896908015013 2023-01-22 13:47:36.834748: step: 318/463, loss: 0.1126699298620224 2023-01-22 13:47:37.435375: step: 320/463, loss: 0.0002879079256672412 2023-01-22 13:47:37.996919: step: 322/463, loss: 0.007370691746473312 2023-01-22 13:47:38.601043: step: 324/463, loss: 0.0018412236822769046 2023-01-22 13:47:39.196209: step: 326/463, loss: 0.0022184234112501144 2023-01-22 13:47:39.771741: step: 328/463, loss: 0.0019151547458022833 2023-01-22 13:47:40.376482: step: 330/463, loss: 0.00280355685390532 2023-01-22 13:47:40.938474: step: 332/463, loss: 0.003879902418702841 2023-01-22 13:47:41.599069: step: 334/463, loss: 0.0003928143414668739 2023-01-22 13:47:42.184045: step: 336/463, loss: 0.12195266783237457 2023-01-22 13:47:42.783323: step: 338/463, loss: 0.003956921864300966 2023-01-22 13:47:43.299988: step: 340/463, loss: 0.004294354468584061 2023-01-22 13:47:43.855608: step: 342/463, loss: 2.8229922463651747e-05 2023-01-22 13:47:44.461287: step: 344/463, loss: 0.011857496574521065 2023-01-22 13:47:45.036575: step: 346/463, loss: 0.002544622402638197 2023-01-22 13:47:45.646245: step: 348/463, loss: 0.005111938342452049 2023-01-22 13:47:46.253377: step: 350/463, loss: 0.003586391219869256 2023-01-22 13:47:46.932311: step: 352/463, loss: 0.002086812164634466 2023-01-22 13:47:47.501774: step: 354/463, loss: 0.00022493835422210395 2023-01-22 13:47:48.107221: step: 356/463, loss: 0.0029014514293521643 2023-01-22 13:47:48.702493: step: 358/463, loss: 0.005020488053560257 2023-01-22 13:47:49.313642: step: 360/463, loss: 0.04885225370526314 2023-01-22 13:47:49.884305: step: 362/463, loss: 0.017862966284155846 2023-01-22 13:47:50.479125: step: 364/463, loss: 0.05107182264328003 2023-01-22 13:47:51.068847: step: 366/463, loss: 0.0042709712870419025 2023-01-22 13:47:51.731589: step: 368/463, loss: 0.009424310177564621 2023-01-22 13:47:52.283160: step: 370/463, loss: 0.015868568792939186 2023-01-22 13:47:52.937231: step: 372/463, loss: 0.0004685927997343242 2023-01-22 13:47:53.587908: step: 374/463, loss: 0.01611120067536831 2023-01-22 13:47:54.130273: step: 376/463, loss: 0.016102414578199387 2023-01-22 13:47:54.797598: step: 378/463, loss: 0.013001061975955963 2023-01-22 13:47:55.424208: step: 380/463, loss: 0.006803620140999556 2023-01-22 13:47:56.064475: step: 382/463, loss: 0.0014990817289799452 2023-01-22 13:47:56.618835: step: 384/463, loss: 0.00013608716835733503 2023-01-22 13:47:57.194197: step: 386/463, loss: 0.09866796433925629 2023-01-22 13:47:57.811621: step: 388/463, loss: 0.0032702710013836622 2023-01-22 13:47:58.425332: step: 390/463, loss: 0.083692267537117 2023-01-22 13:47:58.983382: step: 392/463, loss: 0.02510911412537098 2023-01-22 13:47:59.603541: step: 394/463, loss: 0.0005975825479254127 2023-01-22 13:48:00.186294: step: 396/463, loss: 0.005218931473791599 2023-01-22 13:48:00.861014: step: 398/463, loss: 0.025239232927560806 2023-01-22 13:48:01.520270: step: 400/463, loss: 0.03985372185707092 2023-01-22 13:48:02.109454: step: 402/463, loss: 0.0002242095797555521 2023-01-22 13:48:02.775567: step: 404/463, loss: 0.02234315499663353 2023-01-22 13:48:03.466310: step: 406/463, loss: 0.013031018897891045 2023-01-22 13:48:04.096490: step: 408/463, loss: 0.0003241975500714034 2023-01-22 13:48:04.813915: step: 410/463, loss: 0.003737811464816332 2023-01-22 13:48:05.366546: step: 412/463, loss: 0.013983436860144138 2023-01-22 13:48:06.011082: step: 414/463, loss: 0.0006442320300266147 2023-01-22 13:48:06.550438: step: 416/463, loss: 0.0005439840024337173 2023-01-22 13:48:07.209831: step: 418/463, loss: 0.02544771507382393 2023-01-22 13:48:07.823724: step: 420/463, loss: 0.0023602836299687624 2023-01-22 13:48:08.411718: step: 422/463, loss: 0.0005632571410387754 2023-01-22 13:48:08.978595: step: 424/463, loss: 0.000652813003398478 2023-01-22 13:48:09.592428: step: 426/463, loss: 0.053215187042951584 2023-01-22 13:48:10.200801: step: 428/463, loss: 0.0015672557055950165 2023-01-22 13:48:10.835787: step: 430/463, loss: 0.002932344563305378 2023-01-22 13:48:11.410665: step: 432/463, loss: 0.03482050448656082 2023-01-22 13:48:11.965383: step: 434/463, loss: 0.0007440971676260233 2023-01-22 13:48:12.576225: step: 436/463, loss: 0.0005701840855181217 2023-01-22 13:48:13.221307: step: 438/463, loss: 0.02425306662917137 2023-01-22 13:48:13.835651: step: 440/463, loss: 0.015324989333748817 2023-01-22 13:48:14.397555: step: 442/463, loss: 0.0002631238312460482 2023-01-22 13:48:14.942928: step: 444/463, loss: 0.0005782668013125658 2023-01-22 13:48:15.537733: step: 446/463, loss: 0.0018936014967039227 2023-01-22 13:48:16.194095: step: 448/463, loss: 8.828086720313877e-05 2023-01-22 13:48:16.804141: step: 450/463, loss: 0.00518467603251338 2023-01-22 13:48:17.393187: step: 452/463, loss: 0.0032181974966078997 2023-01-22 13:48:18.023463: step: 454/463, loss: 1.5466004610061646 2023-01-22 13:48:18.629840: step: 456/463, loss: 0.03499947115778923 2023-01-22 13:48:19.309494: step: 458/463, loss: 0.012161768972873688 2023-01-22 13:48:19.922934: step: 460/463, loss: 0.011972170323133469 2023-01-22 13:48:20.568647: step: 462/463, loss: 0.0010148603469133377 2023-01-22 13:48:21.131564: step: 464/463, loss: 0.0006081080064177513 2023-01-22 13:48:21.766791: step: 466/463, loss: 0.04654070362448692 2023-01-22 13:48:22.379508: step: 468/463, loss: 0.016150107607245445 2023-01-22 13:48:22.982278: step: 470/463, loss: 0.010591501370072365 2023-01-22 13:48:23.547477: step: 472/463, loss: 0.016223080456256866 2023-01-22 13:48:24.154591: step: 474/463, loss: 0.0034826139453798532 2023-01-22 13:48:24.718919: step: 476/463, loss: 0.0015128606464713812 2023-01-22 13:48:25.335618: step: 478/463, loss: 0.017107337713241577 2023-01-22 13:48:26.032840: step: 480/463, loss: 0.0011645930353552103 2023-01-22 13:48:26.640605: step: 482/463, loss: 2.951589340227656e-05 2023-01-22 13:48:27.255546: step: 484/463, loss: 0.0028347501065582037 2023-01-22 13:48:27.890541: step: 486/463, loss: 0.0008458561496809125 2023-01-22 13:48:28.548603: step: 488/463, loss: 0.0025803851895034313 2023-01-22 13:48:29.151381: step: 490/463, loss: 0.001344438991509378 2023-01-22 13:48:29.754808: step: 492/463, loss: 0.0001398771710228175 2023-01-22 13:48:30.331272: step: 494/463, loss: 0.004892126657068729 2023-01-22 13:48:30.909447: step: 496/463, loss: 0.00012866096221841872 2023-01-22 13:48:31.478738: step: 498/463, loss: 0.011026565916836262 2023-01-22 13:48:32.078267: step: 500/463, loss: 4.267854819772765e-05 2023-01-22 13:48:32.646238: step: 502/463, loss: 0.00037181138759478927 2023-01-22 13:48:33.242987: step: 504/463, loss: 0.0010123489191755652 2023-01-22 13:48:33.850835: step: 506/463, loss: 0.6113887429237366 2023-01-22 13:48:34.461896: step: 508/463, loss: 0.031441714614629745 2023-01-22 13:48:35.099934: step: 510/463, loss: 0.027309393510222435 2023-01-22 13:48:35.687220: step: 512/463, loss: 0.0027584049385041 2023-01-22 13:48:36.248463: step: 514/463, loss: 0.0006248729769140482 2023-01-22 13:48:36.966655: step: 516/463, loss: 0.000973543559666723 2023-01-22 13:48:37.555417: step: 518/463, loss: 0.24825112521648407 2023-01-22 13:48:38.163046: step: 520/463, loss: 0.012699211947619915 2023-01-22 13:48:38.740151: step: 522/463, loss: 0.0012883590534329414 2023-01-22 13:48:39.329111: step: 524/463, loss: 0.0017056922661140561 2023-01-22 13:48:39.871011: step: 526/463, loss: 0.00032777892192825675 2023-01-22 13:48:40.477148: step: 528/463, loss: 0.015193992294371128 2023-01-22 13:48:41.030222: step: 530/463, loss: 0.0009753488702699542 2023-01-22 13:48:41.700041: step: 532/463, loss: 0.0037017534486949444 2023-01-22 13:48:42.249381: step: 534/463, loss: 0.00311911478638649 2023-01-22 13:48:42.837264: step: 536/463, loss: 2.0947974917362444e-05 2023-01-22 13:48:43.431811: step: 538/463, loss: 0.01927190274000168 2023-01-22 13:48:44.015288: step: 540/463, loss: 0.013577724806964397 2023-01-22 13:48:44.642527: step: 542/463, loss: 0.008778207935392857 2023-01-22 13:48:45.264344: step: 544/463, loss: 0.11551780998706818 2023-01-22 13:48:45.820467: step: 546/463, loss: 0.0034004158806055784 2023-01-22 13:48:46.440850: step: 548/463, loss: 0.007283760700374842 2023-01-22 13:48:47.054472: step: 550/463, loss: 0.005372778512537479 2023-01-22 13:48:47.625632: step: 552/463, loss: 0.008491241373121738 2023-01-22 13:48:48.157798: step: 554/463, loss: 0.00023776550369802862 2023-01-22 13:48:48.773714: step: 556/463, loss: 0.0029520662501454353 2023-01-22 13:48:49.335101: step: 558/463, loss: 2.8681654930114746 2023-01-22 13:48:49.957302: step: 560/463, loss: 0.051297131925821304 2023-01-22 13:48:50.615243: step: 562/463, loss: 0.0030340056400746107 2023-01-22 13:48:51.291244: step: 564/463, loss: 0.008575016632676125 2023-01-22 13:48:51.962183: step: 566/463, loss: 0.0009005602332763374 2023-01-22 13:48:52.606095: step: 568/463, loss: 0.0002541911671869457 2023-01-22 13:48:53.187865: step: 570/463, loss: 0.00010980103979818523 2023-01-22 13:48:53.822224: step: 572/463, loss: 0.03710321709513664 2023-01-22 13:48:54.419218: step: 574/463, loss: 0.02576286718249321 2023-01-22 13:48:55.030446: step: 576/463, loss: 0.00144080794416368 2023-01-22 13:48:55.604427: step: 578/463, loss: 0.000108132982859388 2023-01-22 13:48:56.199248: step: 580/463, loss: 0.00042414720519445837 2023-01-22 13:48:56.789187: step: 582/463, loss: 0.0016385602066293359 2023-01-22 13:48:57.380797: step: 584/463, loss: 0.011878685094416142 2023-01-22 13:48:57.980233: step: 586/463, loss: 8.690333925187588e-05 2023-01-22 13:48:58.658162: step: 588/463, loss: 0.02630392089486122 2023-01-22 13:48:59.208245: step: 590/463, loss: 0.0029558863025158644 2023-01-22 13:48:59.809904: step: 592/463, loss: 0.014071577228605747 2023-01-22 13:49:00.485437: step: 594/463, loss: 0.24573202431201935 2023-01-22 13:49:01.149731: step: 596/463, loss: 0.014122777618467808 2023-01-22 13:49:01.791887: step: 598/463, loss: 0.002087947679683566 2023-01-22 13:49:02.443956: step: 600/463, loss: 0.002814420498907566 2023-01-22 13:49:03.018738: step: 602/463, loss: 0.07893769443035126 2023-01-22 13:49:03.612721: step: 604/463, loss: 0.020510639995336533 2023-01-22 13:49:04.203173: step: 606/463, loss: 0.0025151835288852453 2023-01-22 13:49:04.801506: step: 608/463, loss: 0.0011185641633346677 2023-01-22 13:49:05.473361: step: 610/463, loss: 0.0019370777299627662 2023-01-22 13:49:06.077827: step: 612/463, loss: 0.038371533155441284 2023-01-22 13:49:06.654634: step: 614/463, loss: 0.002873349003493786 2023-01-22 13:49:07.261441: step: 616/463, loss: 0.7107179760932922 2023-01-22 13:49:07.822149: step: 618/463, loss: 0.0262912567704916 2023-01-22 13:49:08.390623: step: 620/463, loss: 0.01624593883752823 2023-01-22 13:49:09.005697: step: 622/463, loss: 0.0006126550724729896 2023-01-22 13:49:09.542146: step: 624/463, loss: 0.002259156433865428 2023-01-22 13:49:10.112800: step: 626/463, loss: 0.001167376758530736 2023-01-22 13:49:10.694277: step: 628/463, loss: 0.009499099105596542 2023-01-22 13:49:11.227669: step: 630/463, loss: 0.10232248902320862 2023-01-22 13:49:11.856635: step: 632/463, loss: 0.0006010098732076585 2023-01-22 13:49:12.401450: step: 634/463, loss: 0.0680655837059021 2023-01-22 13:49:13.064403: step: 636/463, loss: 0.011620243079960346 2023-01-22 13:49:13.675212: step: 638/463, loss: 0.018363557755947113 2023-01-22 13:49:14.267556: step: 640/463, loss: 0.016795363277196884 2023-01-22 13:49:14.884575: step: 642/463, loss: 0.005221130792051554 2023-01-22 13:49:15.547847: step: 644/463, loss: 2.1192850908846594e-05 2023-01-22 13:49:16.182542: step: 646/463, loss: 0.0009030998917296529 2023-01-22 13:49:16.764160: step: 648/463, loss: 0.7196435928344727 2023-01-22 13:49:17.344305: step: 650/463, loss: 0.02872166410088539 2023-01-22 13:49:17.887941: step: 652/463, loss: 0.006281400099396706 2023-01-22 13:49:18.452302: step: 654/463, loss: 0.00614789966493845 2023-01-22 13:49:19.067837: step: 656/463, loss: 0.005407969933003187 2023-01-22 13:49:19.653523: step: 658/463, loss: 0.00039257208118215203 2023-01-22 13:49:20.332350: step: 660/463, loss: 0.0011086283484473825 2023-01-22 13:49:20.975145: step: 662/463, loss: 0.0097615085542202 2023-01-22 13:49:21.606650: step: 664/463, loss: 0.2791215777397156 2023-01-22 13:49:22.136429: step: 666/463, loss: 0.011203419417142868 2023-01-22 13:49:22.830153: step: 668/463, loss: 0.00036510106292553246 2023-01-22 13:49:23.422062: step: 670/463, loss: 0.00014853785978630185 2023-01-22 13:49:23.986826: step: 672/463, loss: 0.00434811320155859 2023-01-22 13:49:24.656527: step: 674/463, loss: 0.014435181394219398 2023-01-22 13:49:25.301234: step: 676/463, loss: 0.0337931290268898 2023-01-22 13:49:25.906151: step: 678/463, loss: 0.001187111483886838 2023-01-22 13:49:26.501792: step: 680/463, loss: 0.0030091821681708097 2023-01-22 13:49:27.125120: step: 682/463, loss: 0.003528317203745246 2023-01-22 13:49:27.869478: step: 684/463, loss: 0.00029646451002918184 2023-01-22 13:49:28.393676: step: 686/463, loss: 0.020188894122838974 2023-01-22 13:49:28.999465: step: 688/463, loss: 0.038598254323005676 2023-01-22 13:49:29.552814: step: 690/463, loss: 0.04736355319619179 2023-01-22 13:49:30.167569: step: 692/463, loss: 0.034489214420318604 2023-01-22 13:49:30.745805: step: 694/463, loss: 0.35866814851760864 2023-01-22 13:49:31.385485: step: 696/463, loss: 0.02614535205066204 2023-01-22 13:49:31.970078: step: 698/463, loss: 0.0015560268657281995 2023-01-22 13:49:32.514625: step: 700/463, loss: 0.004201863892376423 2023-01-22 13:49:33.117406: step: 702/463, loss: 0.0010461887577548623 2023-01-22 13:49:33.719922: step: 704/463, loss: 0.004900551866739988 2023-01-22 13:49:34.268718: step: 706/463, loss: 0.008886730298399925 2023-01-22 13:49:34.830720: step: 708/463, loss: 0.0005063335411250591 2023-01-22 13:49:35.431954: step: 710/463, loss: 0.000594239856582135 2023-01-22 13:49:35.987524: step: 712/463, loss: 0.00010440604819450527 2023-01-22 13:49:36.573970: step: 714/463, loss: 0.0028269460890442133 2023-01-22 13:49:37.149332: step: 716/463, loss: 0.14797835052013397 2023-01-22 13:49:37.767051: step: 718/463, loss: 0.008436452597379684 2023-01-22 13:49:38.283668: step: 720/463, loss: 0.0006557427695952356 2023-01-22 13:49:38.881237: step: 722/463, loss: 0.10913539677858353 2023-01-22 13:49:39.461087: step: 724/463, loss: 0.5314435362815857 2023-01-22 13:49:40.111270: step: 726/463, loss: 0.0010888563701882958 2023-01-22 13:49:40.784707: step: 728/463, loss: 0.012055275030434132 2023-01-22 13:49:41.422229: step: 730/463, loss: 0.08905165642499924 2023-01-22 13:49:42.025659: step: 732/463, loss: 0.0008891245233826339 2023-01-22 13:49:42.704861: step: 734/463, loss: 0.2571966350078583 2023-01-22 13:49:43.379570: step: 736/463, loss: 0.00947421696037054 2023-01-22 13:49:44.024105: step: 738/463, loss: 0.006358742248266935 2023-01-22 13:49:44.661235: step: 740/463, loss: 0.002794235711917281 2023-01-22 13:49:45.311429: step: 742/463, loss: 0.017871318385004997 2023-01-22 13:49:45.984539: step: 744/463, loss: 0.010377487167716026 2023-01-22 13:49:46.544861: step: 746/463, loss: 0.0003550343681126833 2023-01-22 13:49:47.128104: step: 748/463, loss: 0.007270701229572296 2023-01-22 13:49:47.723171: step: 750/463, loss: 0.0003694660263136029 2023-01-22 13:49:48.301628: step: 752/463, loss: 0.00420248182490468 2023-01-22 13:49:48.981567: step: 754/463, loss: 0.0010248766047880054 2023-01-22 13:49:49.543967: step: 756/463, loss: 0.0033011864870786667 2023-01-22 13:49:50.153330: step: 758/463, loss: 0.0031189294531941414 2023-01-22 13:49:50.718123: step: 760/463, loss: 0.00022750585048925132 2023-01-22 13:49:51.374734: step: 762/463, loss: 0.004734429996460676 2023-01-22 13:49:52.053673: step: 764/463, loss: 0.0003108963428530842 2023-01-22 13:49:52.683438: step: 766/463, loss: 0.005663448013365269 2023-01-22 13:49:53.263963: step: 768/463, loss: 0.012136506848037243 2023-01-22 13:49:53.888281: step: 770/463, loss: 0.00012647359108086675 2023-01-22 13:49:54.451275: step: 772/463, loss: 0.043156947940588 2023-01-22 13:49:55.090819: step: 774/463, loss: 0.0008583275484852493 2023-01-22 13:49:55.725076: step: 776/463, loss: 0.0014312269631773233 2023-01-22 13:49:56.366431: step: 778/463, loss: 0.0029045837000012398 2023-01-22 13:49:56.958016: step: 780/463, loss: 0.043769583106040955 2023-01-22 13:49:57.598545: step: 782/463, loss: 0.012938517145812511 2023-01-22 13:49:58.187486: step: 784/463, loss: 0.005907172802835703 2023-01-22 13:49:58.832085: step: 786/463, loss: 0.09016815572977066 2023-01-22 13:49:59.406710: step: 788/463, loss: 0.00027480642893351614 2023-01-22 13:50:00.127153: step: 790/463, loss: 0.0014474766794592142 2023-01-22 13:50:00.743607: step: 792/463, loss: 0.0002280456101289019 2023-01-22 13:50:01.334715: step: 794/463, loss: 2.5823312171269208e-05 2023-01-22 13:50:02.015352: step: 796/463, loss: 0.0030285813845694065 2023-01-22 13:50:02.584721: step: 798/463, loss: 0.4696293771266937 2023-01-22 13:50:03.222960: step: 800/463, loss: 2.0237541320966557e-05 2023-01-22 13:50:03.796287: step: 802/463, loss: 0.0018452114891260862 2023-01-22 13:50:04.340994: step: 804/463, loss: 0.002650710754096508 2023-01-22 13:50:05.050425: step: 806/463, loss: 0.016896134242415428 2023-01-22 13:50:05.670756: step: 808/463, loss: 0.012639055028557777 2023-01-22 13:50:06.226297: step: 810/463, loss: 0.0026958251837641 2023-01-22 13:50:06.858002: step: 812/463, loss: 0.06391260772943497 2023-01-22 13:50:07.407905: step: 814/463, loss: 0.10942236334085464 2023-01-22 13:50:08.051945: step: 816/463, loss: 0.023639511317014694 2023-01-22 13:50:08.597124: step: 818/463, loss: 0.018793359398841858 2023-01-22 13:50:09.227773: step: 820/463, loss: 0.0004436885938048363 2023-01-22 13:50:09.828445: step: 822/463, loss: 0.0007158114458434284 2023-01-22 13:50:10.436549: step: 824/463, loss: 0.030479643493890762 2023-01-22 13:50:11.065343: step: 826/463, loss: 0.18333019316196442 2023-01-22 13:50:11.705251: step: 828/463, loss: 0.045274633914232254 2023-01-22 13:50:12.359304: step: 830/463, loss: 0.0015979836462065578 2023-01-22 13:50:12.986855: step: 832/463, loss: 0.048418302088975906 2023-01-22 13:50:13.603567: step: 834/463, loss: 0.0003083001065533608 2023-01-22 13:50:14.266091: step: 836/463, loss: 0.03679577261209488 2023-01-22 13:50:14.874083: step: 838/463, loss: 0.02976054698228836 2023-01-22 13:50:15.494718: step: 840/463, loss: 0.0027237161993980408 2023-01-22 13:50:16.146224: step: 842/463, loss: 0.014188104309141636 2023-01-22 13:50:16.711018: step: 844/463, loss: 0.000988544663414359 2023-01-22 13:50:17.277211: step: 846/463, loss: 0.0013473142171278596 2023-01-22 13:50:17.848479: step: 848/463, loss: 0.0008627126226201653 2023-01-22 13:50:18.466249: step: 850/463, loss: 0.02962706796824932 2023-01-22 13:50:19.059997: step: 852/463, loss: 0.0004974248586222529 2023-01-22 13:50:19.671122: step: 854/463, loss: 0.01137123815715313 2023-01-22 13:50:20.287711: step: 856/463, loss: 0.014730734750628471 2023-01-22 13:50:20.856936: step: 858/463, loss: 3.7103702652530046e-06 2023-01-22 13:50:21.426179: step: 860/463, loss: 0.0060698846355080605 2023-01-22 13:50:22.070877: step: 862/463, loss: 0.0019864977803081274 2023-01-22 13:50:22.643683: step: 864/463, loss: 0.0007902037468738854 2023-01-22 13:50:23.259489: step: 866/463, loss: 0.012988809496164322 2023-01-22 13:50:23.860335: step: 868/463, loss: 0.035771533846855164 2023-01-22 13:50:24.482122: step: 870/463, loss: 0.06955956667661667 2023-01-22 13:50:25.055190: step: 872/463, loss: 0.0034183140378445387 2023-01-22 13:50:25.679393: step: 874/463, loss: 0.015360042452812195 2023-01-22 13:50:26.359431: step: 876/463, loss: 0.00014103636203799397 2023-01-22 13:50:27.017535: step: 878/463, loss: 0.16709063947200775 2023-01-22 13:50:27.557744: step: 880/463, loss: 0.19758981466293335 2023-01-22 13:50:28.304364: step: 882/463, loss: 0.01201779767870903 2023-01-22 13:50:28.881208: step: 884/463, loss: 0.018532108515501022 2023-01-22 13:50:29.503013: step: 886/463, loss: 0.0005302152712829411 2023-01-22 13:50:30.070478: step: 888/463, loss: 7.821289727871772e-06 2023-01-22 13:50:30.679250: step: 890/463, loss: 0.06921089440584183 2023-01-22 13:50:31.315035: step: 892/463, loss: 0.0748114362359047 2023-01-22 13:50:31.897342: step: 894/463, loss: 0.037598468363285065 2023-01-22 13:50:32.484375: step: 896/463, loss: 0.09422103315591812 2023-01-22 13:50:33.119568: step: 898/463, loss: 0.012228304520249367 2023-01-22 13:50:33.721478: step: 900/463, loss: 0.01457449421286583 2023-01-22 13:50:34.333060: step: 902/463, loss: 0.0030833331402391195 2023-01-22 13:50:34.882269: step: 904/463, loss: 1.3750407695770264 2023-01-22 13:50:35.483474: step: 906/463, loss: 0.001494941534474492 2023-01-22 13:50:36.283635: step: 908/463, loss: 6.817867415520595e-06 2023-01-22 13:50:36.890924: step: 910/463, loss: 0.0016072022262960672 2023-01-22 13:50:37.488033: step: 912/463, loss: 0.002142509911209345 2023-01-22 13:50:38.083199: step: 914/463, loss: 0.0002531880745664239 2023-01-22 13:50:38.715590: step: 916/463, loss: 0.008350801654160023 2023-01-22 13:50:39.339251: step: 918/463, loss: 0.0008177039562724531 2023-01-22 13:50:39.876476: step: 920/463, loss: 0.006838065572082996 2023-01-22 13:50:40.430091: step: 922/463, loss: 0.0008794894674792886 2023-01-22 13:50:41.082202: step: 924/463, loss: 0.0006061706808395684 2023-01-22 13:50:41.725507: step: 926/463, loss: 0.04009054973721504 ================================================== Loss: 0.040 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3419110576923077, 'r': 0.33736954459203033, 'f1': 0.3396251193887297}, 'combined': 0.2502500879706429, 'epoch': 38} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3373381995082452, 'r': 0.3897372396892134, 'f1': 0.36164956655169656}, 'combined': 0.28032167359509497, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3075723140495868, 'r': 0.3530953510436433, 'f1': 0.3287654593639576}, 'combined': 0.2422482332155477, 'epoch': 38} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3205289307427465, 'r': 0.3977151254620476, 'f1': 0.35497461567080213}, 'combined': 0.2751477882233012, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3074786477586106, 'r': 0.3489036648190686, 'f1': 0.3268839668615985}, 'combined': 0.24086187031907255, 'epoch': 38} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3175228414020586, 'r': 0.37705837416494464, 'f1': 0.3447390849508065}, 'combined': 0.26721402756952467, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26666666666666666, 'r': 0.34285714285714286, 'f1': 0.3}, 'combined': 0.19999999999999998, 'epoch': 38} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.265625, 'r': 0.3695652173913043, 'f1': 0.3090909090909091}, 'combined': 0.15454545454545454, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3875, 'r': 0.2672413793103448, 'f1': 0.3163265306122449}, 'combined': 0.2108843537414966, 'epoch': 38} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-22 13:53:17.800421: step: 2/463, loss: 0.005101046059280634 2023-01-22 13:53:18.425814: step: 4/463, loss: 0.0063080801628530025 2023-01-22 13:53:19.079488: step: 6/463, loss: 0.019881948828697205 2023-01-22 13:53:19.737923: step: 8/463, loss: 7.387231016764417e-05 2023-01-22 13:53:20.386779: step: 10/463, loss: 0.045004162937402725 2023-01-22 13:53:21.013991: step: 12/463, loss: 0.004906332585960627 2023-01-22 13:53:21.590319: step: 14/463, loss: 0.0028790051583200693 2023-01-22 13:53:22.163993: step: 16/463, loss: 0.0012290325248613954 2023-01-22 13:53:22.768954: step: 18/463, loss: 0.0006157810566946864 2023-01-22 13:53:23.350015: step: 20/463, loss: 0.003034910187125206 2023-01-22 13:53:23.961872: step: 22/463, loss: 2.634338306961581e-05 2023-01-22 13:53:24.594029: step: 24/463, loss: 0.025913435965776443 2023-01-22 13:53:25.185253: step: 26/463, loss: 0.0034398380666971207 2023-01-22 13:53:25.721368: step: 28/463, loss: 0.0011896591167896986 2023-01-22 13:53:26.295926: step: 30/463, loss: 0.0163203626871109 2023-01-22 13:53:26.914800: step: 32/463, loss: 0.015592370182275772 2023-01-22 13:53:27.594502: step: 34/463, loss: 0.25456076860427856 2023-01-22 13:53:28.166605: step: 36/463, loss: 2.3247070203069597e-05 2023-01-22 13:53:28.750233: step: 38/463, loss: 0.00016227616288233548 2023-01-22 13:53:29.371174: step: 40/463, loss: 0.00020118938118685037 2023-01-22 13:53:29.935077: step: 42/463, loss: 0.0014288818929344416 2023-01-22 13:53:30.588567: step: 44/463, loss: 2.4023011064855382e-05 2023-01-22 13:53:31.262746: step: 46/463, loss: 0.002262156456708908 2023-01-22 13:53:31.846299: step: 48/463, loss: 0.0002539228298701346 2023-01-22 13:53:32.469230: step: 50/463, loss: 0.060603659600019455 2023-01-22 13:53:33.069177: step: 52/463, loss: 0.06319565325975418 2023-01-22 13:53:33.686181: step: 54/463, loss: 0.06426774710416794 2023-01-22 13:53:34.225697: step: 56/463, loss: 0.008915627375245094 2023-01-22 13:53:34.836251: step: 58/463, loss: 0.006581749767065048 2023-01-22 13:53:35.501526: step: 60/463, loss: 0.0013842361513525248 2023-01-22 13:53:36.114058: step: 62/463, loss: 0.011555763892829418 2023-01-22 13:53:36.726034: step: 64/463, loss: 0.00033713303855620325 2023-01-22 13:53:37.355185: step: 66/463, loss: 0.6245585083961487 2023-01-22 13:53:37.945298: step: 68/463, loss: 0.00883561559021473 2023-01-22 13:53:38.516034: step: 70/463, loss: 0.0012230186257511377 2023-01-22 13:53:39.157180: step: 72/463, loss: 0.0014932213816791773 2023-01-22 13:53:39.774089: step: 74/463, loss: 0.004410460125654936 2023-01-22 13:53:40.352623: step: 76/463, loss: 6.290165401878767e-06 2023-01-22 13:53:41.021107: step: 78/463, loss: 0.0002952778886537999 2023-01-22 13:53:41.668276: step: 80/463, loss: 0.02845289371907711 2023-01-22 13:53:42.273236: step: 82/463, loss: 0.002594040473923087 2023-01-22 13:53:42.831617: step: 84/463, loss: 0.0006576331797987223 2023-01-22 13:53:43.439240: step: 86/463, loss: 0.0026975558139383793 2023-01-22 13:53:43.997299: step: 88/463, loss: 0.002611092058941722 2023-01-22 13:53:44.601511: step: 90/463, loss: 0.0002484899014234543 2023-01-22 13:53:45.264399: step: 92/463, loss: 0.0277217086404562 2023-01-22 13:53:45.797638: step: 94/463, loss: 0.00152283301576972 2023-01-22 13:53:46.416207: step: 96/463, loss: 0.03214195370674133 2023-01-22 13:53:46.979502: step: 98/463, loss: 0.006914425175637007 2023-01-22 13:53:47.577687: step: 100/463, loss: 0.012915435247123241 2023-01-22 13:53:48.234559: step: 102/463, loss: 0.0020883933175355196 2023-01-22 13:53:48.844484: step: 104/463, loss: 0.00017096732335630804 2023-01-22 13:53:49.476561: step: 106/463, loss: 0.0005624577170237899 2023-01-22 13:53:50.112778: step: 108/463, loss: 0.003950077574700117 2023-01-22 13:53:50.708023: step: 110/463, loss: 0.0038696820847690105 2023-01-22 13:53:51.269335: step: 112/463, loss: 0.09254588931798935 2023-01-22 13:53:51.915017: step: 114/463, loss: 0.003738517640158534 2023-01-22 13:53:52.459232: step: 116/463, loss: 0.018618999049067497 2023-01-22 13:53:53.056326: step: 118/463, loss: 0.00016887504898477346 2023-01-22 13:53:53.723465: step: 120/463, loss: 0.011418359354138374 2023-01-22 13:53:54.315142: step: 122/463, loss: 0.0008552664658054709 2023-01-22 13:53:54.947647: step: 124/463, loss: 0.004260038956999779 2023-01-22 13:53:55.518148: step: 126/463, loss: 0.0004883751971647143 2023-01-22 13:53:56.141369: step: 128/463, loss: 0.006286273244768381 2023-01-22 13:53:56.687606: step: 130/463, loss: 0.43046244978904724 2023-01-22 13:53:57.303208: step: 132/463, loss: 0.001455778838135302 2023-01-22 13:53:57.990243: step: 134/463, loss: 0.00635158084332943 2023-01-22 13:53:58.643156: step: 136/463, loss: 0.08300653845071793 2023-01-22 13:53:59.354818: step: 138/463, loss: 0.0044763521291315556 2023-01-22 13:53:59.953338: step: 140/463, loss: 0.002413678914308548 2023-01-22 13:54:00.529296: step: 142/463, loss: 0.12263081222772598 2023-01-22 13:54:01.127032: step: 144/463, loss: 0.007996129803359509 2023-01-22 13:54:01.779169: step: 146/463, loss: 0.3734673857688904 2023-01-22 13:54:02.307613: step: 148/463, loss: 0.1497054547071457 2023-01-22 13:54:02.855951: step: 150/463, loss: 0.016379481181502342 2023-01-22 13:54:03.450619: step: 152/463, loss: 0.011143434792757034 2023-01-22 13:54:04.020692: step: 154/463, loss: 0.0004206180165056139 2023-01-22 13:54:04.711851: step: 156/463, loss: 0.031053245067596436 2023-01-22 13:54:05.270952: step: 158/463, loss: 0.004198350012302399 2023-01-22 13:54:05.852030: step: 160/463, loss: 0.030927203595638275 2023-01-22 13:54:06.460269: step: 162/463, loss: 0.0027811983600258827 2023-01-22 13:54:07.058585: step: 164/463, loss: 0.0003592850116547197 2023-01-22 13:54:07.691443: step: 166/463, loss: 0.00947804469615221 2023-01-22 13:54:08.318966: step: 168/463, loss: 0.00030571690876968205 2023-01-22 13:54:08.976950: step: 170/463, loss: 0.11764440685510635 2023-01-22 13:54:09.496669: step: 172/463, loss: 0.009552194736897945 2023-01-22 13:54:10.143207: step: 174/463, loss: 0.008524104952812195 2023-01-22 13:54:10.697992: step: 176/463, loss: 0.0008647734648548067 2023-01-22 13:54:11.278460: step: 178/463, loss: 0.0031043209601193666 2023-01-22 13:54:11.862800: step: 180/463, loss: 0.006270288489758968 2023-01-22 13:54:12.541828: step: 182/463, loss: 0.002518759109079838 2023-01-22 13:54:13.134754: step: 184/463, loss: 0.0005668033845722675 2023-01-22 13:54:13.769319: step: 186/463, loss: 0.02828381024301052 2023-01-22 13:54:14.536679: step: 188/463, loss: 0.0041167642921209335 2023-01-22 13:54:15.154364: step: 190/463, loss: 0.00010263576405122876 2023-01-22 13:54:15.753064: step: 192/463, loss: 0.000519321474712342 2023-01-22 13:54:16.359803: step: 194/463, loss: 0.46234777569770813 2023-01-22 13:54:16.969807: step: 196/463, loss: 0.008984023705124855 2023-01-22 13:54:17.529894: step: 198/463, loss: 0.03553246334195137 2023-01-22 13:54:18.149854: step: 200/463, loss: 0.004233300685882568 2023-01-22 13:54:18.728352: step: 202/463, loss: 0.0008449263987131417 2023-01-22 13:54:19.364664: step: 204/463, loss: 0.06882200390100479 2023-01-22 13:54:19.912275: step: 206/463, loss: 0.00012937060091644526 2023-01-22 13:54:20.501975: step: 208/463, loss: 0.04999740794301033 2023-01-22 13:54:21.075786: step: 210/463, loss: 0.00308515434153378 2023-01-22 13:54:21.687871: step: 212/463, loss: 0.04466313123703003 2023-01-22 13:54:22.290828: step: 214/463, loss: 0.25284236669540405 2023-01-22 13:54:22.863056: step: 216/463, loss: 0.0021615626756101847 2023-01-22 13:54:23.565001: step: 218/463, loss: 0.005363750737160444 2023-01-22 13:54:24.254782: step: 220/463, loss: 0.1568789780139923 2023-01-22 13:54:24.920516: step: 222/463, loss: 0.006635377649217844 2023-01-22 13:54:25.471196: step: 224/463, loss: 0.0016466293018311262 2023-01-22 13:54:26.097932: step: 226/463, loss: 0.0015907459892332554 2023-01-22 13:54:26.755737: step: 228/463, loss: 0.002491228748112917 2023-01-22 13:54:27.389925: step: 230/463, loss: 0.0019399513257667422 2023-01-22 13:54:28.008344: step: 232/463, loss: 0.0002647745131980628 2023-01-22 13:54:28.605542: step: 234/463, loss: 0.0329493023455143 2023-01-22 13:54:29.287585: step: 236/463, loss: 0.04008388891816139 2023-01-22 13:54:29.865109: step: 238/463, loss: 0.00517432764172554 2023-01-22 13:54:30.422141: step: 240/463, loss: 0.0018173099961131811 2023-01-22 13:54:31.018429: step: 242/463, loss: 0.0060622477903962135 2023-01-22 13:54:31.646236: step: 244/463, loss: 0.000108723746961914 2023-01-22 13:54:32.240403: step: 246/463, loss: 0.014138683676719666 2023-01-22 13:54:32.858115: step: 248/463, loss: 0.03220289945602417 2023-01-22 13:54:33.476589: step: 250/463, loss: 0.0005985701573081315 2023-01-22 13:54:34.046145: step: 252/463, loss: 0.0008431488531641662 2023-01-22 13:54:34.734519: step: 254/463, loss: 0.0031692287884652615 2023-01-22 13:54:35.301376: step: 256/463, loss: 0.0014526075683534145 2023-01-22 13:54:35.990196: step: 258/463, loss: 0.00011805987014668062 2023-01-22 13:54:36.513567: step: 260/463, loss: 0.0007070398423820734 2023-01-22 13:54:37.072765: step: 262/463, loss: 5.11353173351381e-05 2023-01-22 13:54:37.700118: step: 264/463, loss: 0.002086434280499816 2023-01-22 13:54:38.352726: step: 266/463, loss: 0.018929775804281235 2023-01-22 13:54:38.968821: step: 268/463, loss: 0.002038001548498869 2023-01-22 13:54:39.564217: step: 270/463, loss: 0.003594369860365987 2023-01-22 13:54:40.212817: step: 272/463, loss: 0.0012561352923512459 2023-01-22 13:54:40.795386: step: 274/463, loss: 0.026628682389855385 2023-01-22 13:54:41.428911: step: 276/463, loss: 0.021509597077965736 2023-01-22 13:54:42.082221: step: 278/463, loss: 0.001450798474252224 2023-01-22 13:54:42.699643: step: 280/463, loss: 0.005348288919776678 2023-01-22 13:54:43.311741: step: 282/463, loss: 0.005450593773275614 2023-01-22 13:54:44.006133: step: 284/463, loss: 0.035292964428663254 2023-01-22 13:54:44.607451: step: 286/463, loss: 2.2482222448161338e-06 2023-01-22 13:54:45.187140: step: 288/463, loss: 0.012215464375913143 2023-01-22 13:54:45.782789: step: 290/463, loss: 8.917354716686532e-05 2023-01-22 13:54:46.349209: step: 292/463, loss: 0.003010797780007124 2023-01-22 13:54:46.923426: step: 294/463, loss: 0.011076126247644424 2023-01-22 13:54:47.524779: step: 296/463, loss: 0.0001839690812630579 2023-01-22 13:54:48.105938: step: 298/463, loss: 0.0001617338857613504 2023-01-22 13:54:48.681831: step: 300/463, loss: 0.0016445706132799387 2023-01-22 13:54:49.235336: step: 302/463, loss: 0.0030276840552687645 2023-01-22 13:54:49.845687: step: 304/463, loss: 0.005086181219667196 2023-01-22 13:54:50.493040: step: 306/463, loss: 0.02982032112777233 2023-01-22 13:54:51.129010: step: 308/463, loss: 0.10039351880550385 2023-01-22 13:54:51.790469: step: 310/463, loss: 0.0018271517474204302 2023-01-22 13:54:52.397932: step: 312/463, loss: 0.00021448054758366197 2023-01-22 13:54:53.084452: step: 314/463, loss: 0.01972249150276184 2023-01-22 13:54:53.649404: step: 316/463, loss: 0.00026626032195053995 2023-01-22 13:54:54.326707: step: 318/463, loss: 0.040258318185806274 2023-01-22 13:54:54.943851: step: 320/463, loss: 0.0057281991466879845 2023-01-22 13:54:55.570800: step: 322/463, loss: 0.014666832983493805 2023-01-22 13:54:56.141721: step: 324/463, loss: 0.013578989543020725 2023-01-22 13:54:56.730283: step: 326/463, loss: 0.000523874128703028 2023-01-22 13:54:57.415958: step: 328/463, loss: 0.0010622312547639012 2023-01-22 13:54:58.002998: step: 330/463, loss: 1.2540181160147768e-06 2023-01-22 13:54:58.608063: step: 332/463, loss: 0.0012956437421962619 2023-01-22 13:54:59.188677: step: 334/463, loss: 0.0032489588484168053 2023-01-22 13:54:59.866547: step: 336/463, loss: 0.003476150333881378 2023-01-22 13:55:00.427962: step: 338/463, loss: 0.0002736754540819675 2023-01-22 13:55:01.003955: step: 340/463, loss: 0.002102866303175688 2023-01-22 13:55:01.628802: step: 342/463, loss: 0.5588254928588867 2023-01-22 13:55:02.192963: step: 344/463, loss: 0.004840799607336521 2023-01-22 13:55:02.939976: step: 346/463, loss: 0.00010948073031613603 2023-01-22 13:55:03.571871: step: 348/463, loss: 0.0037592104636132717 2023-01-22 13:55:04.135759: step: 350/463, loss: 0.0006970184622332454 2023-01-22 13:55:04.711227: step: 352/463, loss: 0.0017699286108836532 2023-01-22 13:55:05.307934: step: 354/463, loss: 0.04139326140284538 2023-01-22 13:55:05.917147: step: 356/463, loss: 0.0009388115722686052 2023-01-22 13:55:06.519207: step: 358/463, loss: 0.001461970154196024 2023-01-22 13:55:07.174744: step: 360/463, loss: 0.007859587669372559 2023-01-22 13:55:07.799981: step: 362/463, loss: 0.00931469164788723 2023-01-22 13:55:08.399753: step: 364/463, loss: 0.000484753109049052 2023-01-22 13:55:09.037860: step: 366/463, loss: 0.011816229671239853 2023-01-22 13:55:09.601005: step: 368/463, loss: 0.0037920218892395496 2023-01-22 13:55:10.267248: step: 370/463, loss: 0.0001582533586770296 2023-01-22 13:55:10.894318: step: 372/463, loss: 0.0309405829757452 2023-01-22 13:55:11.494981: step: 374/463, loss: 0.15409953892230988 2023-01-22 13:55:12.148168: step: 376/463, loss: 0.022477364167571068 2023-01-22 13:55:12.757471: step: 378/463, loss: 3.470348383416422e-05 2023-01-22 13:55:13.346997: step: 380/463, loss: 1.8464037566445768e-05 2023-01-22 13:55:14.072512: step: 382/463, loss: 0.0025057110469788313 2023-01-22 13:55:14.650379: step: 384/463, loss: 0.0003260721277911216 2023-01-22 13:55:15.189999: step: 386/463, loss: 0.0013037144672125578 2023-01-22 13:55:15.781243: step: 388/463, loss: 0.006715648341923952 2023-01-22 13:55:16.387875: step: 390/463, loss: 0.015780387446284294 2023-01-22 13:55:16.970566: step: 392/463, loss: 0.00019978114869445562 2023-01-22 13:55:17.603983: step: 394/463, loss: 0.6672088503837585 2023-01-22 13:55:18.193447: step: 396/463, loss: 0.002492237603291869 2023-01-22 13:55:18.788132: step: 398/463, loss: 0.03764458745718002 2023-01-22 13:55:19.487855: step: 400/463, loss: 0.00914923008531332 2023-01-22 13:55:20.093791: step: 402/463, loss: 0.006509678903967142 2023-01-22 13:55:20.663701: step: 404/463, loss: 0.0030041737481951714 2023-01-22 13:55:21.269736: step: 406/463, loss: 0.00275383610278368 2023-01-22 13:55:21.884909: step: 408/463, loss: 0.0032367929816246033 2023-01-22 13:55:22.433476: step: 410/463, loss: 4.163006451562978e-05 2023-01-22 13:55:23.061817: step: 412/463, loss: 0.0028238550294190645 2023-01-22 13:55:23.675708: step: 414/463, loss: 0.016968419775366783 2023-01-22 13:55:24.276063: step: 416/463, loss: 0.07657045871019363 2023-01-22 13:55:24.816146: step: 418/463, loss: 7.713102968409657e-05 2023-01-22 13:55:25.408628: step: 420/463, loss: 0.00978376716375351 2023-01-22 13:55:25.988814: step: 422/463, loss: 0.0027286261320114136 2023-01-22 13:55:26.608161: step: 424/463, loss: 0.0009378529503010213 2023-01-22 13:55:27.231396: step: 426/463, loss: 0.00018820025434251875 2023-01-22 13:55:27.907123: step: 428/463, loss: 0.005454068537801504 2023-01-22 13:55:28.569705: step: 430/463, loss: 0.017453545704483986 2023-01-22 13:55:29.152034: step: 432/463, loss: 0.0009796028025448322 2023-01-22 13:55:29.730942: step: 434/463, loss: 0.008941635489463806 2023-01-22 13:55:30.378148: step: 436/463, loss: 0.03115403838455677 2023-01-22 13:55:31.028890: step: 438/463, loss: 0.013458590023219585 2023-01-22 13:55:31.647014: step: 440/463, loss: 0.0001702435256447643 2023-01-22 13:55:32.278826: step: 442/463, loss: 0.0003959675959777087 2023-01-22 13:55:32.887984: step: 444/463, loss: 0.006209314800798893 2023-01-22 13:55:33.438042: step: 446/463, loss: 0.0018842765130102634 2023-01-22 13:55:34.049918: step: 448/463, loss: 0.0028514459263533354 2023-01-22 13:55:34.652669: step: 450/463, loss: 0.00018538547737989575 2023-01-22 13:55:35.257629: step: 452/463, loss: 0.09835506975650787 2023-01-22 13:55:35.865100: step: 454/463, loss: 0.040322139859199524 2023-01-22 13:55:36.471413: step: 456/463, loss: 0.0021380505058914423 2023-01-22 13:55:37.091775: step: 458/463, loss: 0.0032994237262755632 2023-01-22 13:55:37.719566: step: 460/463, loss: 0.012302545830607414 2023-01-22 13:55:38.350823: step: 462/463, loss: 0.0030976543202996254 2023-01-22 13:55:38.881390: step: 464/463, loss: 0.0023362203501164913 2023-01-22 13:55:39.473746: step: 466/463, loss: 0.0015525113558396697 2023-01-22 13:55:40.107560: step: 468/463, loss: 0.001852512708865106 2023-01-22 13:55:40.780276: step: 470/463, loss: 0.013955218717455864 2023-01-22 13:55:41.464911: step: 472/463, loss: 0.017245154827833176 2023-01-22 13:55:42.131615: step: 474/463, loss: 2.5008670490933582e-05 2023-01-22 13:55:42.812343: step: 476/463, loss: 0.007278566248714924 2023-01-22 13:55:43.427569: step: 478/463, loss: 0.000951079826336354 2023-01-22 13:55:44.118422: step: 480/463, loss: 0.00031163645326159894 2023-01-22 13:55:44.703852: step: 482/463, loss: 0.010875837877392769 2023-01-22 13:55:45.267076: step: 484/463, loss: 0.0008127672481350601 2023-01-22 13:55:45.849506: step: 486/463, loss: 0.0005339454510249197 2023-01-22 13:55:46.445249: step: 488/463, loss: 0.0013327541528269649 2023-01-22 13:55:47.043568: step: 490/463, loss: 0.013201793655753136 2023-01-22 13:55:47.645749: step: 492/463, loss: 0.0029789437539875507 2023-01-22 13:55:48.210601: step: 494/463, loss: 0.011383281089365482 2023-01-22 13:55:48.785283: step: 496/463, loss: 0.0029643219895660877 2023-01-22 13:55:49.370959: step: 498/463, loss: 0.023574139922857285 2023-01-22 13:55:49.944744: step: 500/463, loss: 0.00040819059358909726 2023-01-22 13:55:50.563510: step: 502/463, loss: 0.00013156521890778095 2023-01-22 13:55:51.154183: step: 504/463, loss: 0.551557183265686 2023-01-22 13:55:51.757510: step: 506/463, loss: 0.0020468926522880793 2023-01-22 13:55:52.351690: step: 508/463, loss: 0.3316692113876343 2023-01-22 13:55:52.953934: step: 510/463, loss: 0.008236047811806202 2023-01-22 13:55:53.555345: step: 512/463, loss: 0.00558823999017477 2023-01-22 13:55:54.181886: step: 514/463, loss: 0.004384888801723719 2023-01-22 13:55:54.768730: step: 516/463, loss: 0.01471670065075159 2023-01-22 13:55:55.449241: step: 518/463, loss: 0.010182141326367855 2023-01-22 13:55:56.064783: step: 520/463, loss: 0.0035986772272735834 2023-01-22 13:55:56.663997: step: 522/463, loss: 0.005598139949142933 2023-01-22 13:55:57.240680: step: 524/463, loss: 0.000648642482701689 2023-01-22 13:55:57.827904: step: 526/463, loss: 0.01973499171435833 2023-01-22 13:55:58.434180: step: 528/463, loss: 0.005143820773810148 2023-01-22 13:55:59.022027: step: 530/463, loss: 0.00038234624662436545 2023-01-22 13:55:59.595853: step: 532/463, loss: 0.030666884034872055 2023-01-22 13:56:00.236179: step: 534/463, loss: 0.005132536869496107 2023-01-22 13:56:00.837440: step: 536/463, loss: 0.006605064030736685 2023-01-22 13:56:01.438396: step: 538/463, loss: 0.0005883269477635622 2023-01-22 13:56:01.997482: step: 540/463, loss: 0.0002428966254228726 2023-01-22 13:56:02.580908: step: 542/463, loss: 0.002347770147025585 2023-01-22 13:56:03.197693: step: 544/463, loss: 0.000972317939158529 2023-01-22 13:56:03.818548: step: 546/463, loss: 0.0012101618340238929 2023-01-22 13:56:04.392716: step: 548/463, loss: 0.00036239458131603897 2023-01-22 13:56:05.097028: step: 550/463, loss: 0.0024849427863955498 2023-01-22 13:56:05.728959: step: 552/463, loss: 0.08889742940664291 2023-01-22 13:56:06.315240: step: 554/463, loss: 0.012291314080357552 2023-01-22 13:56:06.917147: step: 556/463, loss: 0.003750688163563609 2023-01-22 13:56:07.490406: step: 558/463, loss: 0.08504805713891983 2023-01-22 13:56:08.108964: step: 560/463, loss: 0.01901933364570141 2023-01-22 13:56:08.752904: step: 562/463, loss: 0.0319950170814991 2023-01-22 13:56:09.362043: step: 564/463, loss: 0.0008004764677025378 2023-01-22 13:56:09.912013: step: 566/463, loss: 0.000420475349528715 2023-01-22 13:56:10.523201: step: 568/463, loss: 0.030587129294872284 2023-01-22 13:56:11.183842: step: 570/463, loss: 0.0009074404370039701 2023-01-22 13:56:11.757871: step: 572/463, loss: 0.00037804763996973634 2023-01-22 13:56:12.329323: step: 574/463, loss: 0.005028039216995239 2023-01-22 13:56:12.971638: step: 576/463, loss: 0.002985528204590082 2023-01-22 13:56:13.510639: step: 578/463, loss: 0.00027284491807222366 2023-01-22 13:56:14.156541: step: 580/463, loss: 0.02392571046948433 2023-01-22 13:56:14.766538: step: 582/463, loss: 0.0006588668911717832 2023-01-22 13:56:15.363727: step: 584/463, loss: 0.0015033355448395014 2023-01-22 13:56:15.940376: step: 586/463, loss: 0.001714364974759519 2023-01-22 13:56:16.499014: step: 588/463, loss: 0.001540832919999957 2023-01-22 13:56:17.059060: step: 590/463, loss: 0.0008473931229673326 2023-01-22 13:56:17.664432: step: 592/463, loss: 0.02150999940931797 2023-01-22 13:56:18.267423: step: 594/463, loss: 0.0005528069450519979 2023-01-22 13:56:18.864501: step: 596/463, loss: 1.9969973436673172e-05 2023-01-22 13:56:19.501786: step: 598/463, loss: 0.01979796588420868 2023-01-22 13:56:20.112785: step: 600/463, loss: 0.007987127639353275 2023-01-22 13:56:20.737645: step: 602/463, loss: 0.00027877395041286945 2023-01-22 13:56:21.342526: step: 604/463, loss: 0.01493868324905634 2023-01-22 13:56:21.950896: step: 606/463, loss: 0.0010983727406710386 2023-01-22 13:56:22.526134: step: 608/463, loss: 0.08684752136468887 2023-01-22 13:56:23.122608: step: 610/463, loss: 0.0024158363230526447 2023-01-22 13:56:23.776220: step: 612/463, loss: 0.022967644035816193 2023-01-22 13:56:24.370407: step: 614/463, loss: 0.026730766519904137 2023-01-22 13:56:24.965125: step: 616/463, loss: 0.0008778470801189542 2023-01-22 13:56:25.589104: step: 618/463, loss: 0.006617836654186249 2023-01-22 13:56:26.183444: step: 620/463, loss: 0.0012552656698971987 2023-01-22 13:56:26.798939: step: 622/463, loss: 0.0016567009733989835 2023-01-22 13:56:27.339661: step: 624/463, loss: 0.00783392135053873 2023-01-22 13:56:27.957205: step: 626/463, loss: 0.10046045482158661 2023-01-22 13:56:28.571492: step: 628/463, loss: 0.015135063789784908 2023-01-22 13:56:29.109241: step: 630/463, loss: 0.008125129155814648 2023-01-22 13:56:29.651772: step: 632/463, loss: 5.0117076170863584e-05 2023-01-22 13:56:30.318079: step: 634/463, loss: 0.003074069507420063 2023-01-22 13:56:30.934674: step: 636/463, loss: 0.0026245855260640383 2023-01-22 13:56:31.567929: step: 638/463, loss: 0.04754061996936798 2023-01-22 13:56:32.129526: step: 640/463, loss: 0.002656852826476097 2023-01-22 13:56:32.706484: step: 642/463, loss: 0.0008477208903059363 2023-01-22 13:56:33.361077: step: 644/463, loss: 0.0167354978621006 2023-01-22 13:56:33.967515: step: 646/463, loss: 0.003730935510247946 2023-01-22 13:56:34.524448: step: 648/463, loss: 0.011582210659980774 2023-01-22 13:56:35.177073: step: 650/463, loss: 0.0007358565926551819 2023-01-22 13:56:35.812605: step: 652/463, loss: 0.0030736280605196953 2023-01-22 13:56:36.368255: step: 654/463, loss: 0.01061483845114708 2023-01-22 13:56:36.951417: step: 656/463, loss: 0.09793661534786224 2023-01-22 13:56:37.538996: step: 658/463, loss: 0.0012629505945369601 2023-01-22 13:56:38.163579: step: 660/463, loss: 0.0011275908909738064 2023-01-22 13:56:38.776745: step: 662/463, loss: 0.004502336960285902 2023-01-22 13:56:39.394876: step: 664/463, loss: 0.0013200596440583467 2023-01-22 13:56:39.974466: step: 666/463, loss: 0.0008304209331981838 2023-01-22 13:56:40.574881: step: 668/463, loss: 0.009631169028580189 2023-01-22 13:56:41.204117: step: 670/463, loss: 0.028128191828727722 2023-01-22 13:56:41.806824: step: 672/463, loss: 0.0015040450962260365 2023-01-22 13:56:42.374139: step: 674/463, loss: 0.0417172834277153 2023-01-22 13:56:42.960572: step: 676/463, loss: 0.005432909354567528 2023-01-22 13:56:43.580032: step: 678/463, loss: 0.003239082172513008 2023-01-22 13:56:44.202373: step: 680/463, loss: 0.0013843229971826077 2023-01-22 13:56:44.810198: step: 682/463, loss: 0.028053050860762596 2023-01-22 13:56:45.342670: step: 684/463, loss: 3.207140980521217e-05 2023-01-22 13:56:46.088555: step: 686/463, loss: 0.006003293674439192 2023-01-22 13:56:46.675264: step: 688/463, loss: 0.0002574764075689018 2023-01-22 13:56:47.282289: step: 690/463, loss: 0.5024890899658203 2023-01-22 13:56:47.933069: step: 692/463, loss: 0.031492963433265686 2023-01-22 13:56:48.450347: step: 694/463, loss: 0.0004643872089218348 2023-01-22 13:56:49.011213: step: 696/463, loss: 0.009366431273519993 2023-01-22 13:56:49.571669: step: 698/463, loss: 0.0012687998823821545 2023-01-22 13:56:50.160024: step: 700/463, loss: 0.004887820687144995 2023-01-22 13:56:50.760347: step: 702/463, loss: 5.0249684136360884e-05 2023-01-22 13:56:51.317881: step: 704/463, loss: 0.07279365509748459 2023-01-22 13:56:51.926852: step: 706/463, loss: 0.00300635676831007 2023-01-22 13:56:52.484655: step: 708/463, loss: 0.0017109294421970844 2023-01-22 13:56:53.182317: step: 710/463, loss: 0.020548371598124504 2023-01-22 13:56:53.801315: step: 712/463, loss: 0.01779833994805813 2023-01-22 13:56:54.433130: step: 714/463, loss: 0.002069491660222411 2023-01-22 13:56:55.135544: step: 716/463, loss: 0.0058089084923267365 2023-01-22 13:56:55.755712: step: 718/463, loss: 0.0032654430251568556 2023-01-22 13:56:56.345414: step: 720/463, loss: 0.001063687726855278 2023-01-22 13:56:56.969578: step: 722/463, loss: 0.0035961433313786983 2023-01-22 13:56:57.535083: step: 724/463, loss: 0.000984791200608015 2023-01-22 13:56:58.190729: step: 726/463, loss: 0.0021394516807049513 2023-01-22 13:56:58.790433: step: 728/463, loss: 0.0663040280342102 2023-01-22 13:56:59.354151: step: 730/463, loss: 8.17628315417096e-05 2023-01-22 13:56:59.990135: step: 732/463, loss: 0.009121349081397057 2023-01-22 13:57:00.624079: step: 734/463, loss: 0.10119572281837463 2023-01-22 13:57:01.196682: step: 736/463, loss: 0.0002363017265452072 2023-01-22 13:57:01.767663: step: 738/463, loss: 0.05009730905294418 2023-01-22 13:57:02.328487: step: 740/463, loss: 0.01026653777807951 2023-01-22 13:57:02.945642: step: 742/463, loss: 0.005966320168226957 2023-01-22 13:57:03.543304: step: 744/463, loss: 0.0044987816363573074 2023-01-22 13:57:04.052201: step: 746/463, loss: 0.008317560888826847 2023-01-22 13:57:04.634367: step: 748/463, loss: 0.000586635316722095 2023-01-22 13:57:05.318193: step: 750/463, loss: 0.08782698214054108 2023-01-22 13:57:05.878541: step: 752/463, loss: 0.07010345160961151 2023-01-22 13:57:06.492436: step: 754/463, loss: 0.058717429637908936 2023-01-22 13:57:07.112203: step: 756/463, loss: 0.010663501918315887 2023-01-22 13:57:07.727836: step: 758/463, loss: 0.05304395407438278 2023-01-22 13:57:08.377496: step: 760/463, loss: 0.09776268899440765 2023-01-22 13:57:08.924062: step: 762/463, loss: 0.005428258329629898 2023-01-22 13:57:09.542952: step: 764/463, loss: 0.033863089978694916 2023-01-22 13:57:10.142259: step: 766/463, loss: 0.008410153910517693 2023-01-22 13:57:10.844016: step: 768/463, loss: 0.09233640134334564 2023-01-22 13:57:11.450961: step: 770/463, loss: 0.00014279631432145834 2023-01-22 13:57:12.037022: step: 772/463, loss: 0.013316868804395199 2023-01-22 13:57:12.644852: step: 774/463, loss: 0.004622491076588631 2023-01-22 13:57:13.228442: step: 776/463, loss: 0.00931533332914114 2023-01-22 13:57:13.929047: step: 778/463, loss: 0.0290085282176733 2023-01-22 13:57:14.588761: step: 780/463, loss: 0.19074031710624695 2023-01-22 13:57:15.185714: step: 782/463, loss: 0.002779945731163025 2023-01-22 13:57:15.772042: step: 784/463, loss: 0.002603244734928012 2023-01-22 13:57:16.357144: step: 786/463, loss: 0.00027012574719265103 2023-01-22 13:57:16.929485: step: 788/463, loss: 0.0022005753125995398 2023-01-22 13:57:17.530455: step: 790/463, loss: 0.0023240766022354364 2023-01-22 13:57:18.078836: step: 792/463, loss: 0.01554103009402752 2023-01-22 13:57:18.677965: step: 794/463, loss: 0.07578250020742416 2023-01-22 13:57:19.241648: step: 796/463, loss: 0.000614823482464999 2023-01-22 13:57:19.815369: step: 798/463, loss: 0.014563743025064468 2023-01-22 13:57:20.464960: step: 800/463, loss: 0.001362152281217277 2023-01-22 13:57:21.091149: step: 802/463, loss: 0.03800148516893387 2023-01-22 13:57:21.658951: step: 804/463, loss: 0.00412390660494566 2023-01-22 13:57:22.294199: step: 806/463, loss: 0.03198016807436943 2023-01-22 13:57:22.908608: step: 808/463, loss: 0.03758576884865761 2023-01-22 13:57:23.536079: step: 810/463, loss: 0.02593049965798855 2023-01-22 13:57:24.294772: step: 812/463, loss: 0.0037283925339579582 2023-01-22 13:57:24.869775: step: 814/463, loss: 0.06398437172174454 2023-01-22 13:57:25.511934: step: 816/463, loss: 0.042970314621925354 2023-01-22 13:57:26.109074: step: 818/463, loss: 0.004306245129555464 2023-01-22 13:57:26.661193: step: 820/463, loss: 0.02571534737944603 2023-01-22 13:57:27.224929: step: 822/463, loss: 0.00019989970314782113 2023-01-22 13:57:27.803975: step: 824/463, loss: 0.06832410395145416 2023-01-22 13:57:28.384440: step: 826/463, loss: 0.004086012486368418 2023-01-22 13:57:28.957544: step: 828/463, loss: 0.014487771317362785 2023-01-22 13:57:29.752540: step: 830/463, loss: 0.005764051340520382 2023-01-22 13:57:30.376730: step: 832/463, loss: 5.8278452343074605e-05 2023-01-22 13:57:30.946720: step: 834/463, loss: 0.029955696314573288 2023-01-22 13:57:31.557959: step: 836/463, loss: 0.009231744334101677 2023-01-22 13:57:32.171598: step: 838/463, loss: 0.013798731379210949 2023-01-22 13:57:32.734709: step: 840/463, loss: 0.16709890961647034 2023-01-22 13:57:33.349526: step: 842/463, loss: 0.005619935691356659 2023-01-22 13:57:34.000486: step: 844/463, loss: 0.0003501700994092971 2023-01-22 13:57:34.681705: step: 846/463, loss: 0.0010360708693042397 2023-01-22 13:57:35.320108: step: 848/463, loss: 0.010365192778408527 2023-01-22 13:57:35.921819: step: 850/463, loss: 0.017780913040041924 2023-01-22 13:57:36.558809: step: 852/463, loss: 0.0027114583645015955 2023-01-22 13:57:37.202451: step: 854/463, loss: 0.02568534016609192 2023-01-22 13:57:37.784735: step: 856/463, loss: 0.0015489222714677453 2023-01-22 13:57:38.377331: step: 858/463, loss: 0.012909810058772564 2023-01-22 13:57:38.932213: step: 860/463, loss: 0.005378380883485079 2023-01-22 13:57:39.545830: step: 862/463, loss: 0.000740766292437911 2023-01-22 13:57:40.202222: step: 864/463, loss: 0.0013974225148558617 2023-01-22 13:57:40.818774: step: 866/463, loss: 0.04189683869481087 2023-01-22 13:57:41.379341: step: 868/463, loss: 0.10087142884731293 2023-01-22 13:57:41.996966: step: 870/463, loss: 0.048038266599178314 2023-01-22 13:57:42.572142: step: 872/463, loss: 0.00904986634850502 2023-01-22 13:57:43.193673: step: 874/463, loss: 0.0081681739538908 2023-01-22 13:57:43.728422: step: 876/463, loss: 0.001242789556272328 2023-01-22 13:57:44.276250: step: 878/463, loss: 0.002331588650122285 2023-01-22 13:57:44.977287: step: 880/463, loss: 0.028196750208735466 2023-01-22 13:57:45.600833: step: 882/463, loss: 0.030575983226299286 2023-01-22 13:57:46.171431: step: 884/463, loss: 0.0054059275425970554 2023-01-22 13:57:46.723075: step: 886/463, loss: 0.0030291141010820866 2023-01-22 13:57:47.352830: step: 888/463, loss: 0.16524618864059448 2023-01-22 13:57:47.973796: step: 890/463, loss: 0.005934928078204393 2023-01-22 13:57:48.566859: step: 892/463, loss: 0.026059653609991074 2023-01-22 13:57:49.176917: step: 894/463, loss: 0.23381909728050232 2023-01-22 13:57:49.775895: step: 896/463, loss: 0.0011872815666720271 2023-01-22 13:57:50.350618: step: 898/463, loss: 0.00025052239652723074 2023-01-22 13:57:50.925190: step: 900/463, loss: 0.00011480140528874472 2023-01-22 13:57:51.472340: step: 902/463, loss: 0.015487512573599815 2023-01-22 13:57:52.039963: step: 904/463, loss: 0.01088606845587492 2023-01-22 13:57:52.604683: step: 906/463, loss: 0.0017687305808067322 2023-01-22 13:57:53.233221: step: 908/463, loss: 0.0036358400247991085 2023-01-22 13:57:53.783810: step: 910/463, loss: 0.009502926841378212 2023-01-22 13:57:54.386332: step: 912/463, loss: 0.0005604664911516011 2023-01-22 13:57:55.064864: step: 914/463, loss: 0.03314416855573654 2023-01-22 13:57:55.654892: step: 916/463, loss: 0.005248896777629852 2023-01-22 13:57:56.297676: step: 918/463, loss: 0.01236130204051733 2023-01-22 13:57:56.972160: step: 920/463, loss: 0.0001980936503969133 2023-01-22 13:57:57.610295: step: 922/463, loss: 0.008808234706521034 2023-01-22 13:57:58.191615: step: 924/463, loss: 0.0009770625038072467 2023-01-22 13:57:58.837059: step: 926/463, loss: 0.006946452893316746 ================================================== Loss: 0.026 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3406179869202521, 'r': 0.35419099968178014, 'f1': 0.3472719196879966}, 'combined': 0.25588457240168166, 'epoch': 39} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.34783580515039425, 'r': 0.4050624679462771, 'f1': 0.3742742803614008}, 'combined': 0.29010733693084656, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.2942639824682859, 'r': 0.3556853070821596, 'f1': 0.3220724344197562}, 'combined': 0.2373165306250835, 'epoch': 39} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3317726335570571, 'r': 0.41105653495855976, 'f1': 0.3671835057757907}, 'combined': 0.28461113844822056, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29262960395571597, 'r': 0.3459359454732657, 'f1': 0.31705781437288877}, 'combined': 0.23362154743265487, 'epoch': 39} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3304500315850938, 'r': 0.40121736371682803, 'f1': 0.3624113671431373}, 'combined': 0.2809121601779342, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.34285714285714286, 'f1': 0.2891566265060241}, 'combined': 0.19277108433734938, 'epoch': 39} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2571428571428571, 'r': 0.391304347826087, 'f1': 0.3103448275862069}, 'combined': 0.15517241379310345, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36904761904761907, 'r': 0.2672413793103448, 'f1': 0.31}, 'combined': 0.20666666666666667, 'epoch': 39} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3279711174242424, 'r': 0.3285934535104364, 'f1': 0.32828199052132695}, 'combined': 0.24189199301571457, 'epoch': 17} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.33699381251830834, 'r': 0.40575541764612494, 'f1': 0.3681917384478598}, 'combined': 0.2853926393710684, 'epoch': 17} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32926829268292684, 'r': 0.38571428571428573, 'f1': 0.35526315789473684}, 'combined': 0.23684210526315788, 'epoch': 17} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.29021588054776093, 'r': 0.325460313859064, 'f1': 0.30682931199235547}, 'combined': 0.2260847562048935, 'epoch': 13} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.32837198696491027, 'r': 0.4053341714098111, 'f1': 0.3628166009822085}, 'combined': 0.28122626487616165, 'epoch': 13} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3333333333333333, 'r': 0.4782608695652174, 'f1': 0.3928571428571428}, 'combined': 0.1964285714285714, 'epoch': 13} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30241528588023536, 'r': 0.33799355480732185, 'f1': 0.319216135095804}, 'combined': 0.2352118890179608, 'epoch': 18} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.3270213595028262, 'r': 0.3847310111797955, 'f1': 0.3535366048679202}, 'combined': 0.2740331578402061, 'epoch': 18} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4875, 'r': 0.33620689655172414, 'f1': 0.3979591836734694}, 'combined': 0.26530612244897955, 'epoch': 18}