Command that produces this log: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> basic_gcn.T_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.0.bias: torch.Size([1024]) >>> basic_gcn.T_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.1.bias: torch.Size([1024]) >>> basic_gcn.T_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_T.2.bias: torch.Size([1024]) >>> basic_gcn.T_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.0.bias: torch.Size([1024]) >>> basic_gcn.T_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.1.bias: torch.Size([1024]) >>> basic_gcn.T_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.T_E.2.bias: torch.Size([1024]) >>> basic_gcn.E_T.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.0.bias: torch.Size([1024]) >>> basic_gcn.E_T.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.1.bias: torch.Size([1024]) >>> basic_gcn.E_T.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_T.2.bias: torch.Size([1024]) >>> basic_gcn.E_E.0.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.0.bias: torch.Size([1024]) >>> basic_gcn.E_E.1.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.1.bias: torch.Size([1024]) >>> basic_gcn.E_E.2.weight: torch.Size([1024, 1024]) >>> basic_gcn.E_E.2.bias: torch.Size([1024]) >>> basic_gcn.f_t.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_t.0.bias: torch.Size([1024]) >>> basic_gcn.f_e.0.weight: torch.Size([1024, 2048]) >>> basic_gcn.f_e.0.bias: torch.Size([1024]) >>> name2classifier.occupy-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.occupy-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.occupy-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.occupy-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outcome-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outcome-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outcome-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outcome-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.when-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.when-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.when-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.when-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.where-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.where-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.where-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.where-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.who-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.who-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.who-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.who-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-against-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-against-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-against-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-against-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.protest-for-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.protest-for-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.protest-for-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.protest-for-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.wounded-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.wounded-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.wounded-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.wounded-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.arrested-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.arrested-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.arrested-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.arrested-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.imprisoned-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.imprisoned-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.imprisoned-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.imprisoned-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.corrupt-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.corrupt-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.corrupt-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.corrupt-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.judicial-actions-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.judicial-actions-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.judicial-actions-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.judicial-actions-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.charged-with-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.charged-with-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.charged-with-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.charged-with-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.prison-term-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.prison-term-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.prison-term-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.prison-term-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.fine-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.fine-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.fine-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.fine-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.npi-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.npi-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.npi-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.npi-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.outbreak-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.outbreak-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.outbreak-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.outbreak-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.killed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.killed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.killed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.killed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.infected-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.infected-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.infected-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.infected-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.tested-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.tested-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.tested-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.tested-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.vaccinated-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.vaccinated-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.exposed-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.exposed-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.exposed-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.hospitalized-cumulative-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.hospitalized-cumulative-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.recovered-individuals-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.recovered-individuals-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.recovered-individuals-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.recovered-individuals-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blamed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blamed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blamed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blamed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.claimed-by-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.claimed-by-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.claimed-by-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.claimed-by-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.terror-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.terror-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.terror-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.terror-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.kidnapped-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.kidnapped-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.kidnapped-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.kidnapped-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-org-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-org-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-org-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-org-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-physical-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-physical-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-physical-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-physical-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-perp-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-perp-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-perp-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-perp-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-killed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-killed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-killed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-killed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.target-human-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.target-human-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.target-human-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.target-human-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.weapon-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.weapon-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.weapon-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.weapon-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-objective-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-objective-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-objective-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-objective-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.named-organizer-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.named-organizer-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.named-organizer-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.named-organizer-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perp-captured-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perp-captured-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perp-captured-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perp-captured-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-provided-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-provided-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-provided-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-provided-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.current-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.current-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.current-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.current-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.event-or-soa-at-origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.group-identity-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.group-identity-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.group-identity-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.group-identity-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.origin-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.origin-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.origin-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.origin-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.total-displaced-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.total-displaced-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.total-displaced-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.total-displaced-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transitory-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transitory-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transitory-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transitory-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.destination-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.destination-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.destination-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.destination-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.transiting-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.transiting-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.transiting-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.transiting-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.settlement-status-event-or-soa-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.assistance-needed-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.assistance-needed-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.assistance-needed-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.assistance-needed-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.detained-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.detained-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.detained-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.detained-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.missing-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.missing-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.missing-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.missing-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.blocked-migration-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.blocked-migration-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.blocked-migration-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.injured-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.injured-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.injured-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.injured-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.major-disaster-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.major-disaster-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.major-disaster-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.major-disaster-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-natural-phenomena-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-natural-phenomena-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescue-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescue-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescue-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescue-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.responders-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.responders-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.responders-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.responders-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.human-displacement-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.human-displacement-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.human-displacement-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.human-displacement-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.damage-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.damage-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.damage-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.damage-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.individuals-affected-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.individuals-affected-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.individuals-affected-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.individuals-affected-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.affected-cumulative-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.affected-cumulative-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.rescued-count-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.rescued-count-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.rescued-count-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.rescued-count-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.repair-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.repair-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.repair-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.repair-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.declare-emergency-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.declare-emergency-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.declare-emergency-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.declare-emergency-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.announce-disaster-warnings-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.announce-disaster-warnings-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.disease-outbreak-events-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.disease-outbreak-events-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.cybercrime-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.cybercrime-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.cybercrime-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.cybercrime-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.perpetrator-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.perpetrator-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.perpetrator-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.perpetrator-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.response-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.response-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.response-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.response-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.information-stolen-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.information-stolen-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.information-stolen-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.information-stolen-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.related-crimes-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.related-crimes-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.related-crimes-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.related-crimes-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.victim-impact-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.victim-impact-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.victim-impact-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.victim-impact-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.etip-event-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.etip-event-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.etip-event-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.etip-event-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-location-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-location-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-location-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-location-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.project-name-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.project-name-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.project-name-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.project-name-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.signatories-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.signatories-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.signatories-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.signatories-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awardee-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awardee-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awardee-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awardee-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.overall-project-value-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.overall-project-value-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.overall-project-value-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.overall-project-value-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-amount-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-amount-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-amount-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-amount-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-recipient-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-recipient-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-recipient-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-recipient-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.funding-source-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.funding-source-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.funding-source-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.funding-source-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.contract-awarder-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.contract-awarder-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.contract-awarder-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.contract-awarder-ffn.layers.1.bias: torch.Size([2]) >>> name2classifier.agreement-length-ffn.layers.0.weight: torch.Size([350, 1024]) >>> name2classifier.agreement-length-ffn.layers.0.bias: torch.Size([350]) >>> name2classifier.agreement-length-ffn.layers.1.weight: torch.Size([2, 350]) >>> name2classifier.agreement-length-ffn.layers.1.bias: torch.Size([2]) >>> irrealis_classifier.layers.0.weight: torch.Size([350, 1128]) >>> irrealis_classifier.layers.0.bias: torch.Size([350]) >>> irrealis_classifier.layers.1.weight: torch.Size([7, 350]) >>> irrealis_classifier.layers.1.bias: torch.Size([7]) n_trainable_params: 614103147, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:02:21.570761: step: 2/459, loss: 14.049833297729492 2023-01-24 00:02:22.171883: step: 4/459, loss: 17.774765014648438 2023-01-24 00:02:22.847561: step: 6/459, loss: 16.859039306640625 2023-01-24 00:02:23.485498: step: 8/459, loss: 20.317859649658203 2023-01-24 00:02:24.078641: step: 10/459, loss: 8.557064056396484 2023-01-24 00:02:24.752395: step: 12/459, loss: 20.67351531982422 2023-01-24 00:02:25.347956: step: 14/459, loss: 17.55199432373047 2023-01-24 00:02:26.037808: step: 16/459, loss: 5.982392311096191 2023-01-24 00:02:26.630611: step: 18/459, loss: 15.262879371643066 2023-01-24 00:02:27.268790: step: 20/459, loss: 30.485349655151367 2023-01-24 00:02:27.994259: step: 22/459, loss: 7.178618431091309 2023-01-24 00:02:28.578738: step: 24/459, loss: 13.14234733581543 2023-01-24 00:02:29.254169: step: 26/459, loss: 22.931774139404297 2023-01-24 00:02:29.868329: step: 28/459, loss: 10.32826042175293 2023-01-24 00:02:30.503419: step: 30/459, loss: 12.72064208984375 2023-01-24 00:02:31.164785: step: 32/459, loss: 9.240194320678711 2023-01-24 00:02:31.759318: step: 34/459, loss: 21.66123390197754 2023-01-24 00:02:32.365435: step: 36/459, loss: 16.754291534423828 2023-01-24 00:02:33.053423: step: 38/459, loss: 28.944082260131836 2023-01-24 00:02:33.709482: step: 40/459, loss: 17.978517532348633 2023-01-24 00:02:34.318839: step: 42/459, loss: 24.404743194580078 2023-01-24 00:02:34.891318: step: 44/459, loss: 13.819939613342285 2023-01-24 00:02:35.480891: step: 46/459, loss: 15.330714225769043 2023-01-24 00:02:36.085802: step: 48/459, loss: 18.362356185913086 2023-01-24 00:02:36.822889: step: 50/459, loss: 29.44424819946289 2023-01-24 00:02:37.436305: step: 52/459, loss: 15.208847045898438 2023-01-24 00:02:38.081048: step: 54/459, loss: 8.515247344970703 2023-01-24 00:02:38.735628: step: 56/459, loss: 11.613227844238281 2023-01-24 00:02:39.348288: step: 58/459, loss: 14.10073471069336 2023-01-24 00:02:39.946442: step: 60/459, loss: 9.606376647949219 2023-01-24 00:02:40.542556: step: 62/459, loss: 11.92740249633789 2023-01-24 00:02:41.265888: step: 64/459, loss: 21.11025047302246 2023-01-24 00:02:41.851064: step: 66/459, loss: 11.513785362243652 2023-01-24 00:02:42.495533: step: 68/459, loss: 16.062780380249023 2023-01-24 00:02:43.131552: step: 70/459, loss: 14.198490142822266 2023-01-24 00:02:43.685576: step: 72/459, loss: 6.017186164855957 2023-01-24 00:02:44.252024: step: 74/459, loss: 18.971782684326172 2023-01-24 00:02:44.831127: step: 76/459, loss: 5.959894180297852 2023-01-24 00:02:45.434074: step: 78/459, loss: 10.210354804992676 2023-01-24 00:02:46.071303: step: 80/459, loss: 19.873146057128906 2023-01-24 00:02:46.722766: step: 82/459, loss: 14.653738975524902 2023-01-24 00:02:47.342372: step: 84/459, loss: 9.199347496032715 2023-01-24 00:02:47.986850: step: 86/459, loss: 8.30517578125 2023-01-24 00:02:48.661486: step: 88/459, loss: 15.473654747009277 2023-01-24 00:02:49.294721: step: 90/459, loss: 12.912657737731934 2023-01-24 00:02:49.894805: step: 92/459, loss: 8.185126304626465 2023-01-24 00:02:50.541987: step: 94/459, loss: 17.299720764160156 2023-01-24 00:02:51.186565: step: 96/459, loss: 8.055187225341797 2023-01-24 00:02:51.828723: step: 98/459, loss: 12.347447395324707 2023-01-24 00:02:52.387194: step: 100/459, loss: 13.508932113647461 2023-01-24 00:02:52.994261: step: 102/459, loss: 10.32866096496582 2023-01-24 00:02:53.599447: step: 104/459, loss: 25.23952865600586 2023-01-24 00:02:54.248267: step: 106/459, loss: 6.047138214111328 2023-01-24 00:02:54.835658: step: 108/459, loss: 14.872207641601562 2023-01-24 00:02:55.490759: step: 110/459, loss: 29.021575927734375 2023-01-24 00:02:56.086370: step: 112/459, loss: 11.317483901977539 2023-01-24 00:02:56.683613: step: 114/459, loss: 3.816718578338623 2023-01-24 00:02:57.296263: step: 116/459, loss: 12.127930641174316 2023-01-24 00:02:57.909297: step: 118/459, loss: 15.408921241760254 2023-01-24 00:02:58.521575: step: 120/459, loss: 12.370786666870117 2023-01-24 00:02:59.071153: step: 122/459, loss: 8.968170166015625 2023-01-24 00:02:59.738752: step: 124/459, loss: 8.109829902648926 2023-01-24 00:03:00.435233: step: 126/459, loss: 13.359649658203125 2023-01-24 00:03:01.082189: step: 128/459, loss: 8.144328117370605 2023-01-24 00:03:01.672139: step: 130/459, loss: 6.914320945739746 2023-01-24 00:03:02.372244: step: 132/459, loss: 6.6228814125061035 2023-01-24 00:03:02.991581: step: 134/459, loss: 7.395743370056152 2023-01-24 00:03:03.742046: step: 136/459, loss: 16.499544143676758 2023-01-24 00:03:04.382136: step: 138/459, loss: 5.532669544219971 2023-01-24 00:03:04.943864: step: 140/459, loss: 9.262593269348145 2023-01-24 00:03:05.531689: step: 142/459, loss: 13.140060424804688 2023-01-24 00:03:06.168850: step: 144/459, loss: 3.6053354740142822 2023-01-24 00:03:06.745823: step: 146/459, loss: 9.736199378967285 2023-01-24 00:03:07.349357: step: 148/459, loss: 14.64077377319336 2023-01-24 00:03:07.985189: step: 150/459, loss: 23.801912307739258 2023-01-24 00:03:08.677626: step: 152/459, loss: 10.181270599365234 2023-01-24 00:03:09.295528: step: 154/459, loss: 13.814695358276367 2023-01-24 00:03:09.906830: step: 156/459, loss: 15.1998872756958 2023-01-24 00:03:10.530845: step: 158/459, loss: 9.689101219177246 2023-01-24 00:03:11.151514: step: 160/459, loss: 7.613445281982422 2023-01-24 00:03:11.828260: step: 162/459, loss: 11.04550552368164 2023-01-24 00:03:12.436376: step: 164/459, loss: 3.699120044708252 2023-01-24 00:03:13.093552: step: 166/459, loss: 11.99321460723877 2023-01-24 00:03:13.735130: step: 168/459, loss: 7.094427585601807 2023-01-24 00:03:14.310001: step: 170/459, loss: 20.894962310791016 2023-01-24 00:03:14.943788: step: 172/459, loss: 12.740489959716797 2023-01-24 00:03:15.527866: step: 174/459, loss: 13.265972137451172 2023-01-24 00:03:16.154913: step: 176/459, loss: 6.440618515014648 2023-01-24 00:03:16.762339: step: 178/459, loss: 34.30327224731445 2023-01-24 00:03:17.354180: step: 180/459, loss: 11.039562225341797 2023-01-24 00:03:17.942601: step: 182/459, loss: 7.95001745223999 2023-01-24 00:03:18.598038: step: 184/459, loss: 6.520364284515381 2023-01-24 00:03:19.238163: step: 186/459, loss: 9.522093772888184 2023-01-24 00:03:19.842717: step: 188/459, loss: 10.04672622680664 2023-01-24 00:03:20.523909: step: 190/459, loss: 10.061422348022461 2023-01-24 00:03:21.112998: step: 192/459, loss: 3.0026211738586426 2023-01-24 00:03:21.703833: step: 194/459, loss: 7.44968843460083 2023-01-24 00:03:22.358904: step: 196/459, loss: 13.391910552978516 2023-01-24 00:03:22.953639: step: 198/459, loss: 7.679272174835205 2023-01-24 00:03:23.560516: step: 200/459, loss: 14.759941101074219 2023-01-24 00:03:24.212867: step: 202/459, loss: 9.208215713500977 2023-01-24 00:03:24.812512: step: 204/459, loss: 13.746330261230469 2023-01-24 00:03:25.524344: step: 206/459, loss: 3.165417194366455 2023-01-24 00:03:26.127258: step: 208/459, loss: 4.352890968322754 2023-01-24 00:03:26.712082: step: 210/459, loss: 3.2674190998077393 2023-01-24 00:03:27.337968: step: 212/459, loss: 6.778233528137207 2023-01-24 00:03:27.992851: step: 214/459, loss: 15.897944450378418 2023-01-24 00:03:28.643793: step: 216/459, loss: 7.730437755584717 2023-01-24 00:03:29.255085: step: 218/459, loss: 18.781871795654297 2023-01-24 00:03:29.931854: step: 220/459, loss: 3.936953544616699 2023-01-24 00:03:30.517489: step: 222/459, loss: 2.0897090435028076 2023-01-24 00:03:31.105683: step: 224/459, loss: 10.987658500671387 2023-01-24 00:03:31.695116: step: 226/459, loss: 10.064005851745605 2023-01-24 00:03:32.320715: step: 228/459, loss: 2.6713743209838867 2023-01-24 00:03:33.003161: step: 230/459, loss: 4.29444694519043 2023-01-24 00:03:33.599093: step: 232/459, loss: 2.83393931388855 2023-01-24 00:03:34.205033: step: 234/459, loss: 5.40538215637207 2023-01-24 00:03:34.894401: step: 236/459, loss: 7.385439872741699 2023-01-24 00:03:35.495732: step: 238/459, loss: 7.964359760284424 2023-01-24 00:03:36.203051: step: 240/459, loss: 12.106081008911133 2023-01-24 00:03:36.809893: step: 242/459, loss: 3.2255091667175293 2023-01-24 00:03:37.427585: step: 244/459, loss: 11.900279998779297 2023-01-24 00:03:38.077167: step: 246/459, loss: 2.3038675785064697 2023-01-24 00:03:38.727501: step: 248/459, loss: 8.8099365234375 2023-01-24 00:03:39.298755: step: 250/459, loss: 17.795555114746094 2023-01-24 00:03:39.884749: step: 252/459, loss: 10.444770812988281 2023-01-24 00:03:40.483207: step: 254/459, loss: 6.827157974243164 2023-01-24 00:03:41.052792: step: 256/459, loss: 8.857442855834961 2023-01-24 00:03:41.616414: step: 258/459, loss: 8.461585998535156 2023-01-24 00:03:42.227085: step: 260/459, loss: 11.538174629211426 2023-01-24 00:03:42.805779: step: 262/459, loss: 3.841248035430908 2023-01-24 00:03:43.470337: step: 264/459, loss: 3.182095766067505 2023-01-24 00:03:44.059782: step: 266/459, loss: 3.557096004486084 2023-01-24 00:03:44.670574: step: 268/459, loss: 3.7419488430023193 2023-01-24 00:03:45.303629: step: 270/459, loss: 4.652825832366943 2023-01-24 00:03:45.851868: step: 272/459, loss: 4.313234329223633 2023-01-24 00:03:46.506553: step: 274/459, loss: 20.807275772094727 2023-01-24 00:03:47.150062: step: 276/459, loss: 10.299155235290527 2023-01-24 00:03:47.770990: step: 278/459, loss: 21.2875919342041 2023-01-24 00:03:48.330911: step: 280/459, loss: 12.335000991821289 2023-01-24 00:03:49.101374: step: 282/459, loss: 7.068063735961914 2023-01-24 00:03:49.757364: step: 284/459, loss: 4.634182929992676 2023-01-24 00:03:50.386648: step: 286/459, loss: 12.190961837768555 2023-01-24 00:03:50.981362: step: 288/459, loss: 3.0654044151306152 2023-01-24 00:03:51.596062: step: 290/459, loss: 12.033029556274414 2023-01-24 00:03:52.321988: step: 292/459, loss: 10.509269714355469 2023-01-24 00:03:52.906576: step: 294/459, loss: 5.414367198944092 2023-01-24 00:03:53.499727: step: 296/459, loss: 6.620211601257324 2023-01-24 00:03:54.191963: step: 298/459, loss: 6.504822731018066 2023-01-24 00:03:54.830141: step: 300/459, loss: 4.126156806945801 2023-01-24 00:03:55.496960: step: 302/459, loss: 5.323126792907715 2023-01-24 00:03:56.132038: step: 304/459, loss: 5.855346202850342 2023-01-24 00:03:56.723096: step: 306/459, loss: 10.120737075805664 2023-01-24 00:03:57.388953: step: 308/459, loss: 2.957974672317505 2023-01-24 00:03:58.112206: step: 310/459, loss: 9.98240852355957 2023-01-24 00:03:58.669044: step: 312/459, loss: 6.068939208984375 2023-01-24 00:03:59.246910: step: 314/459, loss: 1.9338345527648926 2023-01-24 00:03:59.842922: step: 316/459, loss: 5.066551208496094 2023-01-24 00:04:00.466418: step: 318/459, loss: 11.34574031829834 2023-01-24 00:04:01.088813: step: 320/459, loss: 4.961349010467529 2023-01-24 00:04:01.769409: step: 322/459, loss: 3.425729751586914 2023-01-24 00:04:02.386002: step: 324/459, loss: 6.984195709228516 2023-01-24 00:04:03.023326: step: 326/459, loss: 5.106949806213379 2023-01-24 00:04:03.600597: step: 328/459, loss: 2.6663053035736084 2023-01-24 00:04:04.197260: step: 330/459, loss: 4.648544788360596 2023-01-24 00:04:04.900978: step: 332/459, loss: 11.824711799621582 2023-01-24 00:04:05.543746: step: 334/459, loss: 7.088462829589844 2023-01-24 00:04:06.169421: step: 336/459, loss: 4.00225305557251 2023-01-24 00:04:06.901583: step: 338/459, loss: 2.7322869300842285 2023-01-24 00:04:07.533572: step: 340/459, loss: 2.800448417663574 2023-01-24 00:04:08.165562: step: 342/459, loss: 5.846116065979004 2023-01-24 00:04:08.788974: step: 344/459, loss: 2.215721845626831 2023-01-24 00:04:09.388360: step: 346/459, loss: 3.550058603286743 2023-01-24 00:04:09.984640: step: 348/459, loss: 1.706078290939331 2023-01-24 00:04:10.630728: step: 350/459, loss: 3.4147753715515137 2023-01-24 00:04:11.227998: step: 352/459, loss: 9.859456062316895 2023-01-24 00:04:11.920176: step: 354/459, loss: 3.8476600646972656 2023-01-24 00:04:12.604534: step: 356/459, loss: 9.845941543579102 2023-01-24 00:04:13.217979: step: 358/459, loss: 9.930298805236816 2023-01-24 00:04:13.824551: step: 360/459, loss: 1.2417389154434204 2023-01-24 00:04:14.457697: step: 362/459, loss: 11.006293296813965 2023-01-24 00:04:15.052100: step: 364/459, loss: 2.7952611446380615 2023-01-24 00:04:15.648768: step: 366/459, loss: 4.34937047958374 2023-01-24 00:04:16.259150: step: 368/459, loss: 2.3026463985443115 2023-01-24 00:04:16.990257: step: 370/459, loss: 4.964760780334473 2023-01-24 00:04:17.605734: step: 372/459, loss: 5.015931129455566 2023-01-24 00:04:18.210342: step: 374/459, loss: 4.467179775238037 2023-01-24 00:04:18.820228: step: 376/459, loss: 7.4562788009643555 2023-01-24 00:04:19.458718: step: 378/459, loss: 1.7763283252716064 2023-01-24 00:04:20.077304: step: 380/459, loss: 5.123967170715332 2023-01-24 00:04:20.727102: step: 382/459, loss: 7.560960292816162 2023-01-24 00:04:21.396176: step: 384/459, loss: 6.437835693359375 2023-01-24 00:04:21.999542: step: 386/459, loss: 7.3481974601745605 2023-01-24 00:04:22.677311: step: 388/459, loss: 2.850940465927124 2023-01-24 00:04:23.327753: step: 390/459, loss: 2.3807804584503174 2023-01-24 00:04:23.930839: step: 392/459, loss: 5.940893173217773 2023-01-24 00:04:24.516490: step: 394/459, loss: 3.8245091438293457 2023-01-24 00:04:25.155904: step: 396/459, loss: 7.057912826538086 2023-01-24 00:04:25.828720: step: 398/459, loss: 2.8817131519317627 2023-01-24 00:04:26.378633: step: 400/459, loss: 3.7465884685516357 2023-01-24 00:04:26.918961: step: 402/459, loss: 1.4190165996551514 2023-01-24 00:04:27.566304: step: 404/459, loss: 6.647316932678223 2023-01-24 00:04:28.160695: step: 406/459, loss: 6.477262496948242 2023-01-24 00:04:28.787207: step: 408/459, loss: 3.666471481323242 2023-01-24 00:04:29.422266: step: 410/459, loss: 4.147994041442871 2023-01-24 00:04:30.046733: step: 412/459, loss: 3.3879103660583496 2023-01-24 00:04:30.668407: step: 414/459, loss: 1.0003509521484375 2023-01-24 00:04:31.295490: step: 416/459, loss: 1.939015507698059 2023-01-24 00:04:31.886483: step: 418/459, loss: 8.570428848266602 2023-01-24 00:04:32.530175: step: 420/459, loss: 7.446415424346924 2023-01-24 00:04:33.155123: step: 422/459, loss: 5.479066848754883 2023-01-24 00:04:33.828357: step: 424/459, loss: 2.8515257835388184 2023-01-24 00:04:34.440249: step: 426/459, loss: 1.4343518018722534 2023-01-24 00:04:35.140680: step: 428/459, loss: 2.3492484092712402 2023-01-24 00:04:35.780006: step: 430/459, loss: 3.1789894104003906 2023-01-24 00:04:36.524843: step: 432/459, loss: 1.6059504747390747 2023-01-24 00:04:37.146672: step: 434/459, loss: 0.962929368019104 2023-01-24 00:04:37.825648: step: 436/459, loss: 0.9774048924446106 2023-01-24 00:04:38.469333: step: 438/459, loss: 1.1988327503204346 2023-01-24 00:04:39.085254: step: 440/459, loss: 1.7399349212646484 2023-01-24 00:04:39.672873: step: 442/459, loss: 0.9562798142433167 2023-01-24 00:04:40.304211: step: 444/459, loss: 0.6122471690177917 2023-01-24 00:04:40.903833: step: 446/459, loss: 3.945359706878662 2023-01-24 00:04:41.527397: step: 448/459, loss: 1.867626428604126 2023-01-24 00:04:42.288665: step: 450/459, loss: 2.5628771781921387 2023-01-24 00:04:42.973697: step: 452/459, loss: 0.9594701528549194 2023-01-24 00:04:43.563515: step: 454/459, loss: 0.6527707576751709 2023-01-24 00:04:44.221811: step: 456/459, loss: 3.513340473175049 2023-01-24 00:04:44.892422: step: 458/459, loss: 1.1604524850845337 2023-01-24 00:04:45.531513: step: 460/459, loss: 1.693572998046875 2023-01-24 00:04:46.152070: step: 462/459, loss: 1.6023356914520264 2023-01-24 00:04:46.784093: step: 464/459, loss: 3.306795120239258 2023-01-24 00:04:47.401165: step: 466/459, loss: 1.853796362876892 2023-01-24 00:04:47.958181: step: 468/459, loss: 1.4889540672302246 2023-01-24 00:04:48.555678: step: 470/459, loss: 0.8856974244117737 2023-01-24 00:04:49.159750: step: 472/459, loss: 3.020167112350464 2023-01-24 00:04:49.843571: step: 474/459, loss: 10.38552188873291 2023-01-24 00:04:50.463495: step: 476/459, loss: 1.2592674493789673 2023-01-24 00:04:51.059866: step: 478/459, loss: 1.5852487087249756 2023-01-24 00:04:51.735439: step: 480/459, loss: 1.2281150817871094 2023-01-24 00:04:52.354580: step: 482/459, loss: 1.078405499458313 2023-01-24 00:04:52.999526: step: 484/459, loss: 2.4872076511383057 2023-01-24 00:04:53.571571: step: 486/459, loss: 1.0538036823272705 2023-01-24 00:04:54.230457: step: 488/459, loss: 1.996608018875122 2023-01-24 00:04:54.768238: step: 490/459, loss: 1.1917378902435303 2023-01-24 00:04:55.445535: step: 492/459, loss: 7.327281951904297 2023-01-24 00:04:56.040283: step: 494/459, loss: 1.0538625717163086 2023-01-24 00:04:56.684416: step: 496/459, loss: 0.7180293202400208 2023-01-24 00:04:57.350326: step: 498/459, loss: 1.9422094821929932 2023-01-24 00:04:57.992280: step: 500/459, loss: 1.104684591293335 2023-01-24 00:04:58.659060: step: 502/459, loss: 10.05981159210205 2023-01-24 00:04:59.275967: step: 504/459, loss: 0.9946376085281372 2023-01-24 00:04:59.914189: step: 506/459, loss: 2.2679660320281982 2023-01-24 00:05:00.492350: step: 508/459, loss: 3.0403308868408203 2023-01-24 00:05:01.122349: step: 510/459, loss: 1.2907724380493164 2023-01-24 00:05:01.776482: step: 512/459, loss: 2.9976680278778076 2023-01-24 00:05:02.401792: step: 514/459, loss: 1.200697422027588 2023-01-24 00:05:03.014586: step: 516/459, loss: 0.8511447906494141 2023-01-24 00:05:03.669641: step: 518/459, loss: 2.3092918395996094 2023-01-24 00:05:04.213476: step: 520/459, loss: 2.0297532081604004 2023-01-24 00:05:04.826184: step: 522/459, loss: 1.7896136045455933 2023-01-24 00:05:05.595402: step: 524/459, loss: 1.2158305644989014 2023-01-24 00:05:06.244820: step: 526/459, loss: 1.4429209232330322 2023-01-24 00:05:06.868620: step: 528/459, loss: 2.417314052581787 2023-01-24 00:05:07.479709: step: 530/459, loss: 0.5821518898010254 2023-01-24 00:05:08.013603: step: 532/459, loss: 1.2754409313201904 2023-01-24 00:05:08.667068: step: 534/459, loss: 0.5956588983535767 2023-01-24 00:05:09.346165: step: 536/459, loss: 8.233503341674805 2023-01-24 00:05:10.000706: step: 538/459, loss: 1.2843663692474365 2023-01-24 00:05:10.560897: step: 540/459, loss: 1.2130643129348755 2023-01-24 00:05:11.192490: step: 542/459, loss: 5.684965133666992 2023-01-24 00:05:11.858849: step: 544/459, loss: 2.6298716068267822 2023-01-24 00:05:12.525316: step: 546/459, loss: 1.4601067304611206 2023-01-24 00:05:13.233119: step: 548/459, loss: 2.8849904537200928 2023-01-24 00:05:13.856795: step: 550/459, loss: 2.2295303344726562 2023-01-24 00:05:14.507051: step: 552/459, loss: 8.016440391540527 2023-01-24 00:05:15.116104: step: 554/459, loss: 1.7778139114379883 2023-01-24 00:05:15.769746: step: 556/459, loss: 1.7467834949493408 2023-01-24 00:05:16.404797: step: 558/459, loss: 1.2355602979660034 2023-01-24 00:05:17.088208: step: 560/459, loss: 2.8242101669311523 2023-01-24 00:05:17.710133: step: 562/459, loss: 1.911413550376892 2023-01-24 00:05:18.294667: step: 564/459, loss: 0.9085494875907898 2023-01-24 00:05:18.969098: step: 566/459, loss: 2.130169630050659 2023-01-24 00:05:19.581391: step: 568/459, loss: 2.410320997238159 2023-01-24 00:05:20.124320: step: 570/459, loss: 1.7563560009002686 2023-01-24 00:05:20.709532: step: 572/459, loss: 1.8459999561309814 2023-01-24 00:05:21.286776: step: 574/459, loss: 1.8837614059448242 2023-01-24 00:05:21.936202: step: 576/459, loss: 1.817295789718628 2023-01-24 00:05:22.714329: step: 578/459, loss: 1.1555655002593994 2023-01-24 00:05:23.361615: step: 580/459, loss: 2.993751287460327 2023-01-24 00:05:24.015949: step: 582/459, loss: 1.5902153253555298 2023-01-24 00:05:24.662016: step: 584/459, loss: 5.065632343292236 2023-01-24 00:05:25.305928: step: 586/459, loss: 3.7449240684509277 2023-01-24 00:05:25.919856: step: 588/459, loss: 1.9331941604614258 2023-01-24 00:05:26.582155: step: 590/459, loss: 0.5161389708518982 2023-01-24 00:05:27.187931: step: 592/459, loss: 2.015943765640259 2023-01-24 00:05:27.787674: step: 594/459, loss: 2.4697861671447754 2023-01-24 00:05:28.380387: step: 596/459, loss: 1.23321533203125 2023-01-24 00:05:28.970621: step: 598/459, loss: 1.3152406215667725 2023-01-24 00:05:29.658119: step: 600/459, loss: 2.625206470489502 2023-01-24 00:05:30.269239: step: 602/459, loss: 2.8521273136138916 2023-01-24 00:05:30.938671: step: 604/459, loss: 1.6326117515563965 2023-01-24 00:05:31.544870: step: 606/459, loss: 0.7159778475761414 2023-01-24 00:05:32.200522: step: 608/459, loss: 1.6573705673217773 2023-01-24 00:05:32.858996: step: 610/459, loss: 1.633286476135254 2023-01-24 00:05:33.507716: step: 612/459, loss: 1.8268694877624512 2023-01-24 00:05:34.113976: step: 614/459, loss: 1.5282402038574219 2023-01-24 00:05:34.702055: step: 616/459, loss: 0.8027502298355103 2023-01-24 00:05:35.390144: step: 618/459, loss: 1.0158425569534302 2023-01-24 00:05:35.922983: step: 620/459, loss: 1.0592502355575562 2023-01-24 00:05:36.474915: step: 622/459, loss: 7.1123948097229 2023-01-24 00:05:37.104252: step: 624/459, loss: 1.7493960857391357 2023-01-24 00:05:37.733636: step: 626/459, loss: 2.765866756439209 2023-01-24 00:05:38.321787: step: 628/459, loss: 2.8445513248443604 2023-01-24 00:05:38.920615: step: 630/459, loss: 1.487991213798523 2023-01-24 00:05:39.613371: step: 632/459, loss: 4.446500778198242 2023-01-24 00:05:40.255164: step: 634/459, loss: 0.5802804231643677 2023-01-24 00:05:40.947258: step: 636/459, loss: 1.8463175296783447 2023-01-24 00:05:41.651638: step: 638/459, loss: 3.2226672172546387 2023-01-24 00:05:42.341198: step: 640/459, loss: 3.3258891105651855 2023-01-24 00:05:42.993848: step: 642/459, loss: 0.825851321220398 2023-01-24 00:05:43.498697: step: 644/459, loss: 0.7911403179168701 2023-01-24 00:05:44.078887: step: 646/459, loss: 1.240240454673767 2023-01-24 00:05:44.709034: step: 648/459, loss: 1.2444136142730713 2023-01-24 00:05:45.383597: step: 650/459, loss: 1.205026388168335 2023-01-24 00:05:45.974159: step: 652/459, loss: 1.421164870262146 2023-01-24 00:05:46.642055: step: 654/459, loss: 1.9134454727172852 2023-01-24 00:05:47.262471: step: 656/459, loss: 1.6534503698349 2023-01-24 00:05:47.870709: step: 658/459, loss: 3.757086992263794 2023-01-24 00:05:48.464615: step: 660/459, loss: 0.7896360754966736 2023-01-24 00:05:49.067749: step: 662/459, loss: 7.279130458831787 2023-01-24 00:05:49.727430: step: 664/459, loss: 2.542903184890747 2023-01-24 00:05:50.322094: step: 666/459, loss: 1.0977072715759277 2023-01-24 00:05:50.976927: step: 668/459, loss: 3.141584634780884 2023-01-24 00:05:51.653925: step: 670/459, loss: 2.3182854652404785 2023-01-24 00:05:52.324965: step: 672/459, loss: 8.373307228088379 2023-01-24 00:05:52.916154: step: 674/459, loss: 5.62956428527832 2023-01-24 00:05:53.566463: step: 676/459, loss: 1.3932788372039795 2023-01-24 00:05:54.247454: step: 678/459, loss: 1.7619014978408813 2023-01-24 00:05:54.898422: step: 680/459, loss: 0.8271897435188293 2023-01-24 00:05:55.515602: step: 682/459, loss: 1.8508051633834839 2023-01-24 00:05:56.153953: step: 684/459, loss: 2.839737892150879 2023-01-24 00:05:56.785911: step: 686/459, loss: 2.25300931930542 2023-01-24 00:05:57.536228: step: 688/459, loss: 8.152230262756348 2023-01-24 00:05:58.106963: step: 690/459, loss: 6.577670097351074 2023-01-24 00:05:58.703578: step: 692/459, loss: 6.163491249084473 2023-01-24 00:05:59.435224: step: 694/459, loss: 2.8803763389587402 2023-01-24 00:06:00.043638: step: 696/459, loss: 9.184295654296875 2023-01-24 00:06:00.695106: step: 698/459, loss: 10.515149116516113 2023-01-24 00:06:01.282701: step: 700/459, loss: 12.220897674560547 2023-01-24 00:06:01.857004: step: 702/459, loss: 4.4580583572387695 2023-01-24 00:06:02.449892: step: 704/459, loss: 1.7495527267456055 2023-01-24 00:06:03.121888: step: 706/459, loss: 2.9243907928466797 2023-01-24 00:06:03.663505: step: 708/459, loss: 2.22869610786438 2023-01-24 00:06:04.242493: step: 710/459, loss: 2.263824701309204 2023-01-24 00:06:04.918575: step: 712/459, loss: 0.7323858141899109 2023-01-24 00:06:05.528896: step: 714/459, loss: 5.230510711669922 2023-01-24 00:06:06.137257: step: 716/459, loss: 1.7443934679031372 2023-01-24 00:06:06.765319: step: 718/459, loss: 0.8379201292991638 2023-01-24 00:06:07.369468: step: 720/459, loss: 1.172990322113037 2023-01-24 00:06:08.092731: step: 722/459, loss: 2.610232353210449 2023-01-24 00:06:08.677673: step: 724/459, loss: 7.333810806274414 2023-01-24 00:06:09.318772: step: 726/459, loss: 1.199715256690979 2023-01-24 00:06:09.853519: step: 728/459, loss: 1.1303703784942627 2023-01-24 00:06:10.457976: step: 730/459, loss: 1.5919160842895508 2023-01-24 00:06:11.105775: step: 732/459, loss: 0.5422648191452026 2023-01-24 00:06:11.745006: step: 734/459, loss: 1.1659080982208252 2023-01-24 00:06:12.403753: step: 736/459, loss: 1.5974665880203247 2023-01-24 00:06:13.099221: step: 738/459, loss: 2.5842349529266357 2023-01-24 00:06:13.705490: step: 740/459, loss: 0.37224525213241577 2023-01-24 00:06:14.283464: step: 742/459, loss: 1.3185676336288452 2023-01-24 00:06:14.906730: step: 744/459, loss: 1.2888380289077759 2023-01-24 00:06:15.484387: step: 746/459, loss: 0.41490620374679565 2023-01-24 00:06:16.217441: step: 748/459, loss: 0.7181096076965332 2023-01-24 00:06:16.810559: step: 750/459, loss: 8.314245223999023 2023-01-24 00:06:17.436841: step: 752/459, loss: 5.123539924621582 2023-01-24 00:06:18.082022: step: 754/459, loss: 4.123293876647949 2023-01-24 00:06:18.717130: step: 756/459, loss: 0.867313027381897 2023-01-24 00:06:19.373680: step: 758/459, loss: 9.282245635986328 2023-01-24 00:06:20.016920: step: 760/459, loss: 0.7609901428222656 2023-01-24 00:06:20.620318: step: 762/459, loss: 1.682234287261963 2023-01-24 00:06:21.221835: step: 764/459, loss: 3.8469438552856445 2023-01-24 00:06:21.866867: step: 766/459, loss: 0.3340407907962799 2023-01-24 00:06:22.470469: step: 768/459, loss: 0.7898977994918823 2023-01-24 00:06:23.022753: step: 770/459, loss: 1.555338978767395 2023-01-24 00:06:23.638258: step: 772/459, loss: 2.9984540939331055 2023-01-24 00:06:24.298300: step: 774/459, loss: 0.9551335573196411 2023-01-24 00:06:24.883131: step: 776/459, loss: 1.3182694911956787 2023-01-24 00:06:25.574720: step: 778/459, loss: 4.938896179199219 2023-01-24 00:06:26.169852: step: 780/459, loss: 2.1922333240509033 2023-01-24 00:06:26.765272: step: 782/459, loss: 2.030712127685547 2023-01-24 00:06:27.313113: step: 784/459, loss: 1.81718111038208 2023-01-24 00:06:27.940568: step: 786/459, loss: 2.2332797050476074 2023-01-24 00:06:28.554331: step: 788/459, loss: 0.7470541596412659 2023-01-24 00:06:29.120070: step: 790/459, loss: 0.4073042571544647 2023-01-24 00:06:29.788999: step: 792/459, loss: 2.523461103439331 2023-01-24 00:06:30.383364: step: 794/459, loss: 4.579337120056152 2023-01-24 00:06:30.996901: step: 796/459, loss: 4.310708999633789 2023-01-24 00:06:31.614315: step: 798/459, loss: 1.1223485469818115 2023-01-24 00:06:32.264535: step: 800/459, loss: 1.285414695739746 2023-01-24 00:06:32.865546: step: 802/459, loss: 2.096447467803955 2023-01-24 00:06:33.481385: step: 804/459, loss: 2.009270429611206 2023-01-24 00:06:34.107000: step: 806/459, loss: 1.2016229629516602 2023-01-24 00:06:34.724040: step: 808/459, loss: 0.9633241295814514 2023-01-24 00:06:35.387914: step: 810/459, loss: 1.7993857860565186 2023-01-24 00:06:36.018167: step: 812/459, loss: 0.9830517768859863 2023-01-24 00:06:36.650913: step: 814/459, loss: 2.157952070236206 2023-01-24 00:06:37.278719: step: 816/459, loss: 2.19089412689209 2023-01-24 00:06:37.970563: step: 818/459, loss: 2.943434238433838 2023-01-24 00:06:38.598563: step: 820/459, loss: 0.7568618059158325 2023-01-24 00:06:39.221815: step: 822/459, loss: 2.5608725547790527 2023-01-24 00:06:39.753225: step: 824/459, loss: 1.686025619506836 2023-01-24 00:06:40.360598: step: 826/459, loss: 0.907474160194397 2023-01-24 00:06:40.978449: step: 828/459, loss: 2.6207945346832275 2023-01-24 00:06:41.626028: step: 830/459, loss: 1.3045647144317627 2023-01-24 00:06:42.233585: step: 832/459, loss: 0.9820588827133179 2023-01-24 00:06:42.828397: step: 834/459, loss: 1.046439290046692 2023-01-24 00:06:43.580477: step: 836/459, loss: 6.870802402496338 2023-01-24 00:06:44.169996: step: 838/459, loss: 1.3599413633346558 2023-01-24 00:06:44.796796: step: 840/459, loss: 3.879260540008545 2023-01-24 00:06:45.411079: step: 842/459, loss: 0.7421108484268188 2023-01-24 00:06:46.128656: step: 844/459, loss: 1.6360570192337036 2023-01-24 00:06:46.722221: step: 846/459, loss: 0.7264425158500671 2023-01-24 00:06:47.364234: step: 848/459, loss: 1.2006781101226807 2023-01-24 00:06:47.942865: step: 850/459, loss: 0.9591929316520691 2023-01-24 00:06:48.605891: step: 852/459, loss: 1.2571929693222046 2023-01-24 00:06:49.272373: step: 854/459, loss: 1.8805749416351318 2023-01-24 00:06:49.836487: step: 856/459, loss: 1.0307695865631104 2023-01-24 00:06:50.431912: step: 858/459, loss: 0.969419002532959 2023-01-24 00:06:51.049443: step: 860/459, loss: 1.5867550373077393 2023-01-24 00:06:51.637627: step: 862/459, loss: 2.485734701156616 2023-01-24 00:06:52.261391: step: 864/459, loss: 1.9169365167617798 2023-01-24 00:06:52.887620: step: 866/459, loss: 1.3727264404296875 2023-01-24 00:06:53.554684: step: 868/459, loss: 2.7275168895721436 2023-01-24 00:06:54.160645: step: 870/459, loss: 4.012635707855225 2023-01-24 00:06:54.778130: step: 872/459, loss: 0.6647301316261292 2023-01-24 00:06:55.385975: step: 874/459, loss: 4.549221515655518 2023-01-24 00:06:56.029644: step: 876/459, loss: 0.4633423686027527 2023-01-24 00:06:56.565269: step: 878/459, loss: 1.6449989080429077 2023-01-24 00:06:57.154239: step: 880/459, loss: 1.2468535900115967 2023-01-24 00:06:57.791729: step: 882/459, loss: 1.8318140506744385 2023-01-24 00:06:58.426158: step: 884/459, loss: 1.636282205581665 2023-01-24 00:06:59.052116: step: 886/459, loss: 2.0163981914520264 2023-01-24 00:06:59.700566: step: 888/459, loss: 1.8912115097045898 2023-01-24 00:07:00.317743: step: 890/459, loss: 1.380719780921936 2023-01-24 00:07:00.979967: step: 892/459, loss: 1.8959118127822876 2023-01-24 00:07:01.572806: step: 894/459, loss: 1.586099624633789 2023-01-24 00:07:02.150589: step: 896/459, loss: 0.7997898459434509 2023-01-24 00:07:02.795566: step: 898/459, loss: 2.9919159412384033 2023-01-24 00:07:03.416970: step: 900/459, loss: 3.088918447494507 2023-01-24 00:07:03.989082: step: 902/459, loss: 3.247626543045044 2023-01-24 00:07:04.629370: step: 904/459, loss: 2.355701446533203 2023-01-24 00:07:05.238733: step: 906/459, loss: 1.5104095935821533 2023-01-24 00:07:05.901882: step: 908/459, loss: 0.5081392526626587 2023-01-24 00:07:06.564983: step: 910/459, loss: 0.5294934511184692 2023-01-24 00:07:07.199621: step: 912/459, loss: 1.2250251770019531 2023-01-24 00:07:07.820133: step: 914/459, loss: 1.2173593044281006 2023-01-24 00:07:08.450094: step: 916/459, loss: 1.9191032648086548 2023-01-24 00:07:09.090647: step: 918/459, loss: 5.370596408843994 2023-01-24 00:07:09.558970: step: 920/459, loss: 0.08031457662582397 ================================================== Loss: 5.701 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3411550223476829, 'r': 0.07029470214725414, 'f1': 0.11657021139779762}, 'combined': 0.08589383997732455, 'epoch': 0} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.37377090321839673, 'r': 0.06482747372893602, 'f1': 0.11049116769504569}, 'combined': 0.07071434732482923, 'epoch': 0} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3384742320385885, 'r': 0.06462362464252824, 'f1': 0.10852665852665853}, 'combined': 0.07996701154595892, 'epoch': 0} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3718897109893502, 'r': 0.062485534095771574, 'f1': 0.10699379156074112}, 'combined': 0.0684760265988743, 'epoch': 0} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35066006600660066, 'r': 0.06695022054190296, 'f1': 0.11243386243386241}, 'combined': 0.08284600389863546, 'epoch': 0} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.41327085643015515, 'r': 0.06122531206372669, 'f1': 0.10665054359487874}, 'combined': 0.07646642748312062, 'epoch': 0} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.05714285714285714, 'f1': 0.10526315789473684}, 'combined': 0.07017543859649122, 'epoch': 0} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3411550223476829, 'r': 0.07029470214725414, 'f1': 0.11657021139779762}, 'combined': 0.08589383997732455, 'epoch': 0} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.37377090321839673, 'r': 0.06482747372893602, 'f1': 0.11049116769504569}, 'combined': 0.07071434732482923, 'epoch': 0} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6666666666666666, 'r': 0.05714285714285714, 'f1': 0.10526315789473684}, 'combined': 0.07017543859649122, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3384742320385885, 'r': 0.06462362464252824, 'f1': 0.10852665852665853}, 'combined': 0.07996701154595892, 'epoch': 0} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3718897109893502, 'r': 0.062485534095771574, 'f1': 0.10699379156074112}, 'combined': 0.0684760265988743, 'epoch': 0} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35066006600660066, 'r': 0.06695022054190296, 'f1': 0.11243386243386241}, 'combined': 0.08284600389863546, 'epoch': 0} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.41327085643015515, 'r': 0.06122531206372669, 'f1': 0.10665054359487874}, 'combined': 0.07646642748312062, 'epoch': 0} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:09:58.580645: step: 2/459, loss: 1.4371005296707153 2023-01-24 00:09:59.170874: step: 4/459, loss: 1.7341578006744385 2023-01-24 00:09:59.787374: step: 6/459, loss: 1.0525729656219482 2023-01-24 00:10:00.381868: step: 8/459, loss: 1.9615821838378906 2023-01-24 00:10:01.031117: step: 10/459, loss: 0.6987857222557068 2023-01-24 00:10:01.688683: step: 12/459, loss: 2.210301399230957 2023-01-24 00:10:02.296823: step: 14/459, loss: 0.6639207601547241 2023-01-24 00:10:02.931981: step: 16/459, loss: 2.8534467220306396 2023-01-24 00:10:03.535652: step: 18/459, loss: 0.818335771560669 2023-01-24 00:10:04.139986: step: 20/459, loss: 1.1568975448608398 2023-01-24 00:10:04.796195: step: 22/459, loss: 0.25868406891822815 2023-01-24 00:10:05.464884: step: 24/459, loss: 4.45516300201416 2023-01-24 00:10:06.059145: step: 26/459, loss: 0.6843295097351074 2023-01-24 00:10:06.715487: step: 28/459, loss: 6.876794338226318 2023-01-24 00:10:07.308135: step: 30/459, loss: 2.1603033542633057 2023-01-24 00:10:07.921881: step: 32/459, loss: 0.9450627565383911 2023-01-24 00:10:08.544243: step: 34/459, loss: 0.5561680197715759 2023-01-24 00:10:09.194191: step: 36/459, loss: 3.261331558227539 2023-01-24 00:10:09.760848: step: 38/459, loss: 2.753970146179199 2023-01-24 00:10:10.364982: step: 40/459, loss: 3.41957426071167 2023-01-24 00:10:11.010123: step: 42/459, loss: 1.2748403549194336 2023-01-24 00:10:11.597100: step: 44/459, loss: 0.7733592987060547 2023-01-24 00:10:12.276040: step: 46/459, loss: 8.655805587768555 2023-01-24 00:10:12.874415: step: 48/459, loss: 5.0869622230529785 2023-01-24 00:10:13.527828: step: 50/459, loss: 4.405174732208252 2023-01-24 00:10:14.226591: step: 52/459, loss: 5.179235458374023 2023-01-24 00:10:14.817064: step: 54/459, loss: 1.4325791597366333 2023-01-24 00:10:15.422561: step: 56/459, loss: 0.48165392875671387 2023-01-24 00:10:15.993838: step: 58/459, loss: 1.840761661529541 2023-01-24 00:10:16.596336: step: 60/459, loss: 1.379122257232666 2023-01-24 00:10:17.202794: step: 62/459, loss: 2.749972343444824 2023-01-24 00:10:17.797222: step: 64/459, loss: 1.278607726097107 2023-01-24 00:10:18.384592: step: 66/459, loss: 0.9901564121246338 2023-01-24 00:10:18.987993: step: 68/459, loss: 0.8344129920005798 2023-01-24 00:10:19.677471: step: 70/459, loss: 1.6095564365386963 2023-01-24 00:10:20.218661: step: 72/459, loss: 0.3637648820877075 2023-01-24 00:10:20.880286: step: 74/459, loss: 2.1309947967529297 2023-01-24 00:10:21.472044: step: 76/459, loss: 1.113835096359253 2023-01-24 00:10:22.113970: step: 78/459, loss: 0.8665818572044373 2023-01-24 00:10:22.831729: step: 80/459, loss: 1.0665297508239746 2023-01-24 00:10:23.501613: step: 82/459, loss: 3.9567887783050537 2023-01-24 00:10:24.139553: step: 84/459, loss: 3.4178197383880615 2023-01-24 00:10:24.801460: step: 86/459, loss: 1.2463572025299072 2023-01-24 00:10:25.383636: step: 88/459, loss: 0.5774142742156982 2023-01-24 00:10:25.978883: step: 90/459, loss: 6.412754058837891 2023-01-24 00:10:26.608102: step: 92/459, loss: 5.583096027374268 2023-01-24 00:10:27.209266: step: 94/459, loss: 3.2569689750671387 2023-01-24 00:10:27.813022: step: 96/459, loss: 2.0342719554901123 2023-01-24 00:10:28.371740: step: 98/459, loss: 0.6345742344856262 2023-01-24 00:10:28.978655: step: 100/459, loss: 0.9270249009132385 2023-01-24 00:10:29.616754: step: 102/459, loss: 0.4273676872253418 2023-01-24 00:10:30.149035: step: 104/459, loss: 1.696847915649414 2023-01-24 00:10:30.751869: step: 106/459, loss: 6.435511589050293 2023-01-24 00:10:31.391868: step: 108/459, loss: 2.097060441970825 2023-01-24 00:10:32.075651: step: 110/459, loss: 1.270726203918457 2023-01-24 00:10:32.661481: step: 112/459, loss: 2.135930299758911 2023-01-24 00:10:33.344934: step: 114/459, loss: 1.918535828590393 2023-01-24 00:10:33.981187: step: 116/459, loss: 4.2157816886901855 2023-01-24 00:10:34.600940: step: 118/459, loss: 1.130672812461853 2023-01-24 00:10:35.149442: step: 120/459, loss: 1.1553517580032349 2023-01-24 00:10:35.802467: step: 122/459, loss: 0.3678295314311981 2023-01-24 00:10:36.411490: step: 124/459, loss: 8.19862174987793 2023-01-24 00:10:37.061868: step: 126/459, loss: 1.6315306425094604 2023-01-24 00:10:37.704477: step: 128/459, loss: 1.0676915645599365 2023-01-24 00:10:38.276863: step: 130/459, loss: 0.7785354256629944 2023-01-24 00:10:38.891018: step: 132/459, loss: 0.7970293760299683 2023-01-24 00:10:39.531695: step: 134/459, loss: 1.265354871749878 2023-01-24 00:10:40.117761: step: 136/459, loss: 6.480751991271973 2023-01-24 00:10:40.734353: step: 138/459, loss: 1.360116720199585 2023-01-24 00:10:41.372874: step: 140/459, loss: 2.3613176345825195 2023-01-24 00:10:42.008630: step: 142/459, loss: 0.473491907119751 2023-01-24 00:10:42.616758: step: 144/459, loss: 0.8332571387290955 2023-01-24 00:10:43.234079: step: 146/459, loss: 1.5375356674194336 2023-01-24 00:10:43.849616: step: 148/459, loss: 0.42203348875045776 2023-01-24 00:10:44.442460: step: 150/459, loss: 7.984592437744141 2023-01-24 00:10:45.051938: step: 152/459, loss: 1.677099347114563 2023-01-24 00:10:45.685568: step: 154/459, loss: 1.0970335006713867 2023-01-24 00:10:46.296781: step: 156/459, loss: 2.2806015014648438 2023-01-24 00:10:46.945779: step: 158/459, loss: 1.6714164018630981 2023-01-24 00:10:47.572077: step: 160/459, loss: 0.5445456504821777 2023-01-24 00:10:48.251585: step: 162/459, loss: 1.7711219787597656 2023-01-24 00:10:48.871232: step: 164/459, loss: 2.984172821044922 2023-01-24 00:10:49.659358: step: 166/459, loss: 0.6869045495986938 2023-01-24 00:10:50.333825: step: 168/459, loss: 0.9034554958343506 2023-01-24 00:10:50.929903: step: 170/459, loss: 0.6673029065132141 2023-01-24 00:10:51.553401: step: 172/459, loss: 1.4765095710754395 2023-01-24 00:10:52.171037: step: 174/459, loss: 0.4945083260536194 2023-01-24 00:10:52.869874: step: 176/459, loss: 4.550059795379639 2023-01-24 00:10:53.514658: step: 178/459, loss: 0.4917007386684418 2023-01-24 00:10:54.072350: step: 180/459, loss: 0.42058977484703064 2023-01-24 00:10:54.722329: step: 182/459, loss: 1.369899034500122 2023-01-24 00:10:55.370245: step: 184/459, loss: 1.7413026094436646 2023-01-24 00:10:56.023418: step: 186/459, loss: 1.886083722114563 2023-01-24 00:10:56.645623: step: 188/459, loss: 0.6664714813232422 2023-01-24 00:10:57.254533: step: 190/459, loss: 3.3447628021240234 2023-01-24 00:10:57.935311: step: 192/459, loss: 0.6963915228843689 2023-01-24 00:10:58.552626: step: 194/459, loss: 0.9126009941101074 2023-01-24 00:10:59.181506: step: 196/459, loss: 0.5749098062515259 2023-01-24 00:10:59.760467: step: 198/459, loss: 0.6172277927398682 2023-01-24 00:11:00.377924: step: 200/459, loss: 2.8823728561401367 2023-01-24 00:11:00.973482: step: 202/459, loss: 0.7219178676605225 2023-01-24 00:11:01.630019: step: 204/459, loss: 1.6279923915863037 2023-01-24 00:11:02.204837: step: 206/459, loss: 1.968108892440796 2023-01-24 00:11:02.824610: step: 208/459, loss: 3.344264030456543 2023-01-24 00:11:03.474593: step: 210/459, loss: 1.5255463123321533 2023-01-24 00:11:04.089714: step: 212/459, loss: 1.7239658832550049 2023-01-24 00:11:04.668106: step: 214/459, loss: 2.6687283515930176 2023-01-24 00:11:05.213943: step: 216/459, loss: 0.42011818289756775 2023-01-24 00:11:05.865397: step: 218/459, loss: 2.3849520683288574 2023-01-24 00:11:06.436339: step: 220/459, loss: 0.634800136089325 2023-01-24 00:11:06.955740: step: 222/459, loss: 2.116934299468994 2023-01-24 00:11:07.607733: step: 224/459, loss: 1.1157808303833008 2023-01-24 00:11:08.202365: step: 226/459, loss: 1.1967705488204956 2023-01-24 00:11:08.820395: step: 228/459, loss: 1.9662386178970337 2023-01-24 00:11:09.367278: step: 230/459, loss: 0.8312451243400574 2023-01-24 00:11:10.006992: step: 232/459, loss: 1.5747036933898926 2023-01-24 00:11:10.648339: step: 234/459, loss: 1.1217353343963623 2023-01-24 00:11:11.257411: step: 236/459, loss: 5.991803169250488 2023-01-24 00:11:11.891234: step: 238/459, loss: 2.0294628143310547 2023-01-24 00:11:12.560916: step: 240/459, loss: 0.7557103633880615 2023-01-24 00:11:13.137088: step: 242/459, loss: 0.8770052194595337 2023-01-24 00:11:13.750319: step: 244/459, loss: 1.2464927434921265 2023-01-24 00:11:14.337943: step: 246/459, loss: 3.7773585319519043 2023-01-24 00:11:14.988825: step: 248/459, loss: 2.423583745956421 2023-01-24 00:11:15.557497: step: 250/459, loss: 1.7296801805496216 2023-01-24 00:11:16.141219: step: 252/459, loss: 10.322151184082031 2023-01-24 00:11:16.818718: step: 254/459, loss: 1.1194157600402832 2023-01-24 00:11:17.458488: step: 256/459, loss: 1.621954083442688 2023-01-24 00:11:18.059507: step: 258/459, loss: 0.4411066770553589 2023-01-24 00:11:18.750987: step: 260/459, loss: 0.9652734994888306 2023-01-24 00:11:19.417851: step: 262/459, loss: 4.066675186157227 2023-01-24 00:11:20.044392: step: 264/459, loss: 0.9046793580055237 2023-01-24 00:11:20.674682: step: 266/459, loss: 5.04667854309082 2023-01-24 00:11:21.269410: step: 268/459, loss: 1.3100019693374634 2023-01-24 00:11:21.861759: step: 270/459, loss: 0.8223793506622314 2023-01-24 00:11:22.478034: step: 272/459, loss: 4.343293190002441 2023-01-24 00:11:23.103848: step: 274/459, loss: 1.4502910375595093 2023-01-24 00:11:23.660123: step: 276/459, loss: 0.5540711283683777 2023-01-24 00:11:24.267852: step: 278/459, loss: 2.056628942489624 2023-01-24 00:11:24.870045: step: 280/459, loss: 0.6832935810089111 2023-01-24 00:11:25.419703: step: 282/459, loss: 1.010049819946289 2023-01-24 00:11:26.002304: step: 284/459, loss: 1.5147418975830078 2023-01-24 00:11:26.642405: step: 286/459, loss: 4.7909932136535645 2023-01-24 00:11:27.210844: step: 288/459, loss: 0.9886355400085449 2023-01-24 00:11:27.752814: step: 290/459, loss: 2.8008627891540527 2023-01-24 00:11:28.421451: step: 292/459, loss: 8.830796241760254 2023-01-24 00:11:29.017774: step: 294/459, loss: 0.486957848072052 2023-01-24 00:11:29.609524: step: 296/459, loss: 0.30305343866348267 2023-01-24 00:11:30.253225: step: 298/459, loss: 1.7560030221939087 2023-01-24 00:11:30.866236: step: 300/459, loss: 2.743844509124756 2023-01-24 00:11:31.509239: step: 302/459, loss: 2.2508602142333984 2023-01-24 00:11:32.149829: step: 304/459, loss: 0.9462167024612427 2023-01-24 00:11:32.759303: step: 306/459, loss: 1.1679271459579468 2023-01-24 00:11:33.463335: step: 308/459, loss: 1.075474500656128 2023-01-24 00:11:34.030713: step: 310/459, loss: 3.4738173484802246 2023-01-24 00:11:34.638930: step: 312/459, loss: 1.8190054893493652 2023-01-24 00:11:35.233885: step: 314/459, loss: 2.1524579524993896 2023-01-24 00:11:35.875234: step: 316/459, loss: 1.599608302116394 2023-01-24 00:11:36.410405: step: 318/459, loss: 1.5024293661117554 2023-01-24 00:11:37.045474: step: 320/459, loss: 0.5831025838851929 2023-01-24 00:11:37.602326: step: 322/459, loss: 0.9331634640693665 2023-01-24 00:11:38.215440: step: 324/459, loss: 2.0419631004333496 2023-01-24 00:11:38.800410: step: 326/459, loss: 2.060904026031494 2023-01-24 00:11:39.361018: step: 328/459, loss: 0.8853999376296997 2023-01-24 00:11:39.970026: step: 330/459, loss: 5.087151527404785 2023-01-24 00:11:40.662481: step: 332/459, loss: 0.9994280338287354 2023-01-24 00:11:41.407553: step: 334/459, loss: 2.2580862045288086 2023-01-24 00:11:42.011496: step: 336/459, loss: 1.8988158702850342 2023-01-24 00:11:42.642212: step: 338/459, loss: 1.874299168586731 2023-01-24 00:11:43.246757: step: 340/459, loss: 3.0760974884033203 2023-01-24 00:11:43.854554: step: 342/459, loss: 1.0796383619308472 2023-01-24 00:11:44.534292: step: 344/459, loss: 1.1567338705062866 2023-01-24 00:11:45.175530: step: 346/459, loss: 0.7332533597946167 2023-01-24 00:11:45.776453: step: 348/459, loss: 1.9514131546020508 2023-01-24 00:11:46.398618: step: 350/459, loss: 0.7421085238456726 2023-01-24 00:11:47.029830: step: 352/459, loss: 1.1617892980575562 2023-01-24 00:11:47.722381: step: 354/459, loss: 1.185189127922058 2023-01-24 00:11:48.364032: step: 356/459, loss: 2.280662775039673 2023-01-24 00:11:48.976424: step: 358/459, loss: 1.2015223503112793 2023-01-24 00:11:49.621651: step: 360/459, loss: 0.7789280414581299 2023-01-24 00:11:50.280513: step: 362/459, loss: 1.0828458070755005 2023-01-24 00:11:50.828971: step: 364/459, loss: 3.140049934387207 2023-01-24 00:11:51.471585: step: 366/459, loss: 0.8582673072814941 2023-01-24 00:11:52.068932: step: 368/459, loss: 0.7380606532096863 2023-01-24 00:11:52.824980: step: 370/459, loss: 1.6459691524505615 2023-01-24 00:11:53.428225: step: 372/459, loss: 0.3439019024372101 2023-01-24 00:11:54.053487: step: 374/459, loss: 0.7866339087486267 2023-01-24 00:11:54.737758: step: 376/459, loss: 0.29395750164985657 2023-01-24 00:11:55.324776: step: 378/459, loss: 8.577985763549805 2023-01-24 00:11:56.014157: step: 380/459, loss: 1.5580620765686035 2023-01-24 00:11:56.604133: step: 382/459, loss: 0.3481694459915161 2023-01-24 00:11:57.255146: step: 384/459, loss: 0.8744121789932251 2023-01-24 00:11:57.860007: step: 386/459, loss: 4.14309549331665 2023-01-24 00:11:58.536665: step: 388/459, loss: 0.7741884589195251 2023-01-24 00:11:59.207567: step: 390/459, loss: 2.0971415042877197 2023-01-24 00:11:59.807895: step: 392/459, loss: 0.3385602831840515 2023-01-24 00:12:00.467258: step: 394/459, loss: 0.5485925674438477 2023-01-24 00:12:01.104540: step: 396/459, loss: 1.2396987676620483 2023-01-24 00:12:01.697260: step: 398/459, loss: 1.3241991996765137 2023-01-24 00:12:02.381069: step: 400/459, loss: 2.892056465148926 2023-01-24 00:12:03.004666: step: 402/459, loss: 0.7483226656913757 2023-01-24 00:12:03.603936: step: 404/459, loss: 2.6788487434387207 2023-01-24 00:12:04.204244: step: 406/459, loss: 0.7998065948486328 2023-01-24 00:12:04.821326: step: 408/459, loss: 1.514395833015442 2023-01-24 00:12:05.420033: step: 410/459, loss: 1.3482205867767334 2023-01-24 00:12:06.089654: step: 412/459, loss: 0.48005056381225586 2023-01-24 00:12:06.729806: step: 414/459, loss: 3.7682156562805176 2023-01-24 00:12:07.369036: step: 416/459, loss: 1.518691062927246 2023-01-24 00:12:08.070005: step: 418/459, loss: 1.357672929763794 2023-01-24 00:12:08.670588: step: 420/459, loss: 0.8726103901863098 2023-01-24 00:12:09.256482: step: 422/459, loss: 1.6112236976623535 2023-01-24 00:12:09.924056: step: 424/459, loss: 1.428727626800537 2023-01-24 00:12:10.651520: step: 426/459, loss: 1.2277071475982666 2023-01-24 00:12:11.198229: step: 428/459, loss: 1.4066407680511475 2023-01-24 00:12:11.772286: step: 430/459, loss: 1.7030949592590332 2023-01-24 00:12:12.347154: step: 432/459, loss: 0.40212443470954895 2023-01-24 00:12:12.977895: step: 434/459, loss: 4.2897233963012695 2023-01-24 00:12:13.609679: step: 436/459, loss: 1.075543761253357 2023-01-24 00:12:14.212289: step: 438/459, loss: 2.4940266609191895 2023-01-24 00:12:14.816755: step: 440/459, loss: 0.30974051356315613 2023-01-24 00:12:15.423159: step: 442/459, loss: 1.1410452127456665 2023-01-24 00:12:16.008558: step: 444/459, loss: 1.105715274810791 2023-01-24 00:12:16.622981: step: 446/459, loss: 0.38048556447029114 2023-01-24 00:12:17.247019: step: 448/459, loss: 1.4916739463806152 2023-01-24 00:12:17.960989: step: 450/459, loss: 1.0962646007537842 2023-01-24 00:12:18.606902: step: 452/459, loss: 7.929347038269043 2023-01-24 00:12:19.269231: step: 454/459, loss: 3.323611259460449 2023-01-24 00:12:19.871561: step: 456/459, loss: 2.756432294845581 2023-01-24 00:12:20.470982: step: 458/459, loss: 1.2801079750061035 2023-01-24 00:12:21.133509: step: 460/459, loss: 2.9039793014526367 2023-01-24 00:12:21.695694: step: 462/459, loss: 0.9478271007537842 2023-01-24 00:12:22.355364: step: 464/459, loss: 0.7801481485366821 2023-01-24 00:12:22.973588: step: 466/459, loss: 1.9684977531433105 2023-01-24 00:12:23.547424: step: 468/459, loss: 1.02853524684906 2023-01-24 00:12:24.168471: step: 470/459, loss: 0.9955044984817505 2023-01-24 00:12:24.743345: step: 472/459, loss: 0.622715413570404 2023-01-24 00:12:25.402881: step: 474/459, loss: 0.6934436559677124 2023-01-24 00:12:25.963508: step: 476/459, loss: 1.926073431968689 2023-01-24 00:12:26.605935: step: 478/459, loss: 3.4433839321136475 2023-01-24 00:12:27.189870: step: 480/459, loss: 2.146625280380249 2023-01-24 00:12:27.825064: step: 482/459, loss: 0.5956369638442993 2023-01-24 00:12:28.474192: step: 484/459, loss: 2.145817279815674 2023-01-24 00:12:29.072029: step: 486/459, loss: 2.3201704025268555 2023-01-24 00:12:29.775620: step: 488/459, loss: 2.6764204502105713 2023-01-24 00:12:30.439975: step: 490/459, loss: 3.484480857849121 2023-01-24 00:12:31.086656: step: 492/459, loss: 6.866931438446045 2023-01-24 00:12:31.701567: step: 494/459, loss: 1.6879788637161255 2023-01-24 00:12:32.295875: step: 496/459, loss: 1.7114297151565552 2023-01-24 00:12:32.911864: step: 498/459, loss: 0.8747518658638 2023-01-24 00:12:33.492856: step: 500/459, loss: 0.6954765319824219 2023-01-24 00:12:34.107586: step: 502/459, loss: 1.2622300386428833 2023-01-24 00:12:34.686827: step: 504/459, loss: 0.43292269110679626 2023-01-24 00:12:35.298982: step: 506/459, loss: 0.5250182151794434 2023-01-24 00:12:35.893246: step: 508/459, loss: 0.466840922832489 2023-01-24 00:12:36.488474: step: 510/459, loss: 1.4372602701187134 2023-01-24 00:12:37.120964: step: 512/459, loss: 4.802717685699463 2023-01-24 00:12:37.716224: step: 514/459, loss: 1.754112958908081 2023-01-24 00:12:38.309607: step: 516/459, loss: 1.1707080602645874 2023-01-24 00:12:38.962322: step: 518/459, loss: 4.3704752922058105 2023-01-24 00:12:39.657754: step: 520/459, loss: 1.3667216300964355 2023-01-24 00:12:40.311759: step: 522/459, loss: 1.8180477619171143 2023-01-24 00:12:40.929212: step: 524/459, loss: 3.0925230979919434 2023-01-24 00:12:41.599328: step: 526/459, loss: 0.6146737933158875 2023-01-24 00:12:42.210556: step: 528/459, loss: 0.9668198823928833 2023-01-24 00:12:42.830503: step: 530/459, loss: 0.18275050818920135 2023-01-24 00:12:43.433313: step: 532/459, loss: 0.9133509397506714 2023-01-24 00:12:44.090485: step: 534/459, loss: 3.936896324157715 2023-01-24 00:12:44.682380: step: 536/459, loss: 0.6911158561706543 2023-01-24 00:12:45.281211: step: 538/459, loss: 1.5471265316009521 2023-01-24 00:12:45.902095: step: 540/459, loss: 1.001186728477478 2023-01-24 00:12:46.513571: step: 542/459, loss: 0.5195126533508301 2023-01-24 00:12:47.098098: step: 544/459, loss: 5.252586841583252 2023-01-24 00:12:47.649513: step: 546/459, loss: 1.2656114101409912 2023-01-24 00:12:48.271151: step: 548/459, loss: 2.7198095321655273 2023-01-24 00:12:48.906477: step: 550/459, loss: 1.5847090482711792 2023-01-24 00:12:49.552530: step: 552/459, loss: 1.31501042842865 2023-01-24 00:12:50.158604: step: 554/459, loss: 0.38828060030937195 2023-01-24 00:12:50.757168: step: 556/459, loss: 0.6545200347900391 2023-01-24 00:12:51.395397: step: 558/459, loss: 1.0520541667938232 2023-01-24 00:12:51.963154: step: 560/459, loss: 1.1451678276062012 2023-01-24 00:12:52.573957: step: 562/459, loss: 1.8490960597991943 2023-01-24 00:12:53.145422: step: 564/459, loss: 1.5789721012115479 2023-01-24 00:12:53.877309: step: 566/459, loss: 4.846915245056152 2023-01-24 00:12:54.450230: step: 568/459, loss: 2.7101569175720215 2023-01-24 00:12:55.173115: step: 570/459, loss: 4.424274444580078 2023-01-24 00:12:55.824255: step: 572/459, loss: 4.240233421325684 2023-01-24 00:12:56.485181: step: 574/459, loss: 2.5171926021575928 2023-01-24 00:12:57.066649: step: 576/459, loss: 1.9181804656982422 2023-01-24 00:12:57.672442: step: 578/459, loss: 2.0375030040740967 2023-01-24 00:12:58.241687: step: 580/459, loss: 1.604880690574646 2023-01-24 00:12:58.898915: step: 582/459, loss: 0.7266522645950317 2023-01-24 00:12:59.484625: step: 584/459, loss: 0.8022943735122681 2023-01-24 00:13:00.124118: step: 586/459, loss: 0.6031952500343323 2023-01-24 00:13:00.761931: step: 588/459, loss: 1.6514990329742432 2023-01-24 00:13:01.435983: step: 590/459, loss: 1.0368783473968506 2023-01-24 00:13:02.040407: step: 592/459, loss: 1.6634620428085327 2023-01-24 00:13:02.607695: step: 594/459, loss: 3.229857921600342 2023-01-24 00:13:03.249338: step: 596/459, loss: 0.70522540807724 2023-01-24 00:13:03.861206: step: 598/459, loss: 1.4609432220458984 2023-01-24 00:13:04.511729: step: 600/459, loss: 1.2257959842681885 2023-01-24 00:13:05.247220: step: 602/459, loss: 0.9444800615310669 2023-01-24 00:13:05.816301: step: 604/459, loss: 2.034120798110962 2023-01-24 00:13:06.384786: step: 606/459, loss: 2.633071184158325 2023-01-24 00:13:06.991156: step: 608/459, loss: 0.7241140007972717 2023-01-24 00:13:07.587677: step: 610/459, loss: 2.491039752960205 2023-01-24 00:13:08.189139: step: 612/459, loss: 0.5786416530609131 2023-01-24 00:13:08.805062: step: 614/459, loss: 0.6675806045532227 2023-01-24 00:13:09.537673: step: 616/459, loss: 0.7144543528556824 2023-01-24 00:13:10.230743: step: 618/459, loss: 3.149385452270508 2023-01-24 00:13:11.004686: step: 620/459, loss: 1.2661981582641602 2023-01-24 00:13:11.597085: step: 622/459, loss: 0.2766946256160736 2023-01-24 00:13:12.213394: step: 624/459, loss: 0.760688066482544 2023-01-24 00:13:12.773318: step: 626/459, loss: 1.3481698036193848 2023-01-24 00:13:13.363276: step: 628/459, loss: 0.8819654583930969 2023-01-24 00:13:14.066867: step: 630/459, loss: 0.6101068258285522 2023-01-24 00:13:14.627992: step: 632/459, loss: 1.6909291744232178 2023-01-24 00:13:15.285641: step: 634/459, loss: 0.5122995376586914 2023-01-24 00:13:15.970009: step: 636/459, loss: 1.4349700212478638 2023-01-24 00:13:16.662788: step: 638/459, loss: 0.7249438166618347 2023-01-24 00:13:17.264821: step: 640/459, loss: 1.1276358366012573 2023-01-24 00:13:17.929980: step: 642/459, loss: 1.2147623300552368 2023-01-24 00:13:18.565686: step: 644/459, loss: 0.7561400532722473 2023-01-24 00:13:19.181773: step: 646/459, loss: 0.8366003632545471 2023-01-24 00:13:19.875205: step: 648/459, loss: 2.106358528137207 2023-01-24 00:13:20.481772: step: 650/459, loss: 4.596017837524414 2023-01-24 00:13:21.140706: step: 652/459, loss: 4.607790946960449 2023-01-24 00:13:21.715277: step: 654/459, loss: 3.2632460594177246 2023-01-24 00:13:22.373056: step: 656/459, loss: 0.4384366571903229 2023-01-24 00:13:23.008452: step: 658/459, loss: 1.3590916395187378 2023-01-24 00:13:23.585100: step: 660/459, loss: 1.2518911361694336 2023-01-24 00:13:24.183678: step: 662/459, loss: 1.6108684539794922 2023-01-24 00:13:24.733004: step: 664/459, loss: 0.7277984619140625 2023-01-24 00:13:25.373387: step: 666/459, loss: 2.811052083969116 2023-01-24 00:13:26.018373: step: 668/459, loss: 2.607978582382202 2023-01-24 00:13:26.625440: step: 670/459, loss: 1.3136045932769775 2023-01-24 00:13:27.262682: step: 672/459, loss: 2.3524956703186035 2023-01-24 00:13:27.886829: step: 674/459, loss: 1.0250489711761475 2023-01-24 00:13:28.465603: step: 676/459, loss: 1.8694411516189575 2023-01-24 00:13:29.127726: step: 678/459, loss: 2.3206093311309814 2023-01-24 00:13:29.736215: step: 680/459, loss: 3.035940170288086 2023-01-24 00:13:30.312285: step: 682/459, loss: 0.7468891143798828 2023-01-24 00:13:30.964720: step: 684/459, loss: 0.4595390260219574 2023-01-24 00:13:31.560432: step: 686/459, loss: 0.44685348868370056 2023-01-24 00:13:32.163729: step: 688/459, loss: 1.3384919166564941 2023-01-24 00:13:32.791441: step: 690/459, loss: 1.1757570505142212 2023-01-24 00:13:33.381305: step: 692/459, loss: 2.847175359725952 2023-01-24 00:13:34.013728: step: 694/459, loss: 0.797674834728241 2023-01-24 00:13:34.602876: step: 696/459, loss: 0.7881062626838684 2023-01-24 00:13:35.188522: step: 698/459, loss: 1.939846396446228 2023-01-24 00:13:35.816571: step: 700/459, loss: 0.9349437952041626 2023-01-24 00:13:36.478627: step: 702/459, loss: 1.354567050933838 2023-01-24 00:13:37.142633: step: 704/459, loss: 2.35331654548645 2023-01-24 00:13:37.801496: step: 706/459, loss: 2.150862693786621 2023-01-24 00:13:38.409111: step: 708/459, loss: 4.014100074768066 2023-01-24 00:13:39.014753: step: 710/459, loss: 1.4505374431610107 2023-01-24 00:13:39.638871: step: 712/459, loss: 2.6891000270843506 2023-01-24 00:13:40.246606: step: 714/459, loss: 0.2900802195072174 2023-01-24 00:13:40.871586: step: 716/459, loss: 1.679220199584961 2023-01-24 00:13:41.534325: step: 718/459, loss: 1.3851189613342285 2023-01-24 00:13:42.114523: step: 720/459, loss: 2.8023760318756104 2023-01-24 00:13:42.767984: step: 722/459, loss: 1.5972695350646973 2023-01-24 00:13:43.381053: step: 724/459, loss: 0.5987861752510071 2023-01-24 00:13:43.965397: step: 726/459, loss: 0.5535874962806702 2023-01-24 00:13:44.646721: step: 728/459, loss: 1.2826502323150635 2023-01-24 00:13:45.245308: step: 730/459, loss: 0.6945141553878784 2023-01-24 00:13:45.816271: step: 732/459, loss: 3.770942211151123 2023-01-24 00:13:46.430613: step: 734/459, loss: 1.1257808208465576 2023-01-24 00:13:46.993339: step: 736/459, loss: 0.8114346861839294 2023-01-24 00:13:47.570004: step: 738/459, loss: 2.9906349182128906 2023-01-24 00:13:48.218835: step: 740/459, loss: 0.7856050729751587 2023-01-24 00:13:48.872797: step: 742/459, loss: 0.6122981905937195 2023-01-24 00:13:49.533329: step: 744/459, loss: 4.5051984786987305 2023-01-24 00:13:50.159744: step: 746/459, loss: 0.5489301681518555 2023-01-24 00:13:50.855701: step: 748/459, loss: 4.706042289733887 2023-01-24 00:13:51.530731: step: 750/459, loss: 0.9763305187225342 2023-01-24 00:13:52.130585: step: 752/459, loss: 4.557886123657227 2023-01-24 00:13:52.731596: step: 754/459, loss: 1.396458387374878 2023-01-24 00:13:53.435857: step: 756/459, loss: 1.6567044258117676 2023-01-24 00:13:54.106659: step: 758/459, loss: 2.279005527496338 2023-01-24 00:13:54.717559: step: 760/459, loss: 1.0885093212127686 2023-01-24 00:13:55.346468: step: 762/459, loss: 1.3707302808761597 2023-01-24 00:13:55.939563: step: 764/459, loss: 4.190794944763184 2023-01-24 00:13:56.524255: step: 766/459, loss: 1.7109222412109375 2023-01-24 00:13:57.133281: step: 768/459, loss: 0.5966875553131104 2023-01-24 00:13:57.738763: step: 770/459, loss: 7.31533670425415 2023-01-24 00:13:58.332908: step: 772/459, loss: 3.5234689712524414 2023-01-24 00:13:58.932339: step: 774/459, loss: 0.5130599737167358 2023-01-24 00:13:59.525691: step: 776/459, loss: 3.3166801929473877 2023-01-24 00:14:00.178602: step: 778/459, loss: 0.49467605352401733 2023-01-24 00:14:00.777877: step: 780/459, loss: 0.773528516292572 2023-01-24 00:14:01.381489: step: 782/459, loss: 0.7945117354393005 2023-01-24 00:14:02.020120: step: 784/459, loss: 0.5091480016708374 2023-01-24 00:14:02.634220: step: 786/459, loss: 0.3267606794834137 2023-01-24 00:14:03.225459: step: 788/459, loss: 0.1241484209895134 2023-01-24 00:14:03.817401: step: 790/459, loss: 0.8298280835151672 2023-01-24 00:14:04.482710: step: 792/459, loss: 0.6026463508605957 2023-01-24 00:14:05.161786: step: 794/459, loss: 4.921339988708496 2023-01-24 00:14:05.810807: step: 796/459, loss: 0.7484053373336792 2023-01-24 00:14:06.453757: step: 798/459, loss: 7.8285322189331055 2023-01-24 00:14:07.050247: step: 800/459, loss: 0.9472916126251221 2023-01-24 00:14:07.661943: step: 802/459, loss: 0.4082900881767273 2023-01-24 00:14:08.280846: step: 804/459, loss: 0.44069916009902954 2023-01-24 00:14:08.843604: step: 806/459, loss: 0.39255931973457336 2023-01-24 00:14:09.467263: step: 808/459, loss: 2.260658025741577 2023-01-24 00:14:10.063535: step: 810/459, loss: 1.5396331548690796 2023-01-24 00:14:10.696432: step: 812/459, loss: 2.2354464530944824 2023-01-24 00:14:11.282279: step: 814/459, loss: 1.4015529155731201 2023-01-24 00:14:11.922711: step: 816/459, loss: 0.7333983778953552 2023-01-24 00:14:12.463107: step: 818/459, loss: 0.6273173093795776 2023-01-24 00:14:13.122451: step: 820/459, loss: 1.7879142761230469 2023-01-24 00:14:13.780252: step: 822/459, loss: 0.9862490892410278 2023-01-24 00:14:14.470320: step: 824/459, loss: 1.7285690307617188 2023-01-24 00:14:15.084686: step: 826/459, loss: 0.3541461229324341 2023-01-24 00:14:15.692118: step: 828/459, loss: 1.1012656688690186 2023-01-24 00:14:16.437976: step: 830/459, loss: 1.7890552282333374 2023-01-24 00:14:17.067258: step: 832/459, loss: 5.600885391235352 2023-01-24 00:14:17.862333: step: 834/459, loss: 1.1526391506195068 2023-01-24 00:14:18.499073: step: 836/459, loss: 1.7324172258377075 2023-01-24 00:14:19.096577: step: 838/459, loss: 1.1430737972259521 2023-01-24 00:14:19.802286: step: 840/459, loss: 0.49146774411201477 2023-01-24 00:14:20.356275: step: 842/459, loss: 4.176309585571289 2023-01-24 00:14:20.986561: step: 844/459, loss: 3.5221033096313477 2023-01-24 00:14:21.558680: step: 846/459, loss: 0.8362484574317932 2023-01-24 00:14:22.184438: step: 848/459, loss: 1.107537865638733 2023-01-24 00:14:22.754845: step: 850/459, loss: 0.5651856064796448 2023-01-24 00:14:23.482833: step: 852/459, loss: 0.5344273447990417 2023-01-24 00:14:24.129625: step: 854/459, loss: 0.5821496844291687 2023-01-24 00:14:24.793335: step: 856/459, loss: 1.270309329032898 2023-01-24 00:14:25.439354: step: 858/459, loss: 2.642348527908325 2023-01-24 00:14:26.121290: step: 860/459, loss: 4.137936592102051 2023-01-24 00:14:26.697400: step: 862/459, loss: 1.1007333993911743 2023-01-24 00:14:27.304568: step: 864/459, loss: 1.557206392288208 2023-01-24 00:14:27.963924: step: 866/459, loss: 1.4417884349822998 2023-01-24 00:14:28.540661: step: 868/459, loss: 2.808262348175049 2023-01-24 00:14:29.203882: step: 870/459, loss: 1.726438283920288 2023-01-24 00:14:29.825458: step: 872/459, loss: 2.826653480529785 2023-01-24 00:14:30.543330: step: 874/459, loss: 1.0331168174743652 2023-01-24 00:14:31.178945: step: 876/459, loss: 2.876239776611328 2023-01-24 00:14:31.772421: step: 878/459, loss: 0.5352421998977661 2023-01-24 00:14:32.326051: step: 880/459, loss: 2.7669625282287598 2023-01-24 00:14:32.972390: step: 882/459, loss: 1.2966234683990479 2023-01-24 00:14:33.552041: step: 884/459, loss: 4.247247695922852 2023-01-24 00:14:34.205894: step: 886/459, loss: 0.4928673803806305 2023-01-24 00:14:34.806717: step: 888/459, loss: 0.6477999091148376 2023-01-24 00:14:35.446192: step: 890/459, loss: 1.530838131904602 2023-01-24 00:14:36.081614: step: 892/459, loss: 0.5277753472328186 2023-01-24 00:14:36.675908: step: 894/459, loss: 2.536600112915039 2023-01-24 00:14:37.304534: step: 896/459, loss: 1.7760241031646729 2023-01-24 00:14:37.888686: step: 898/459, loss: 1.740977168083191 2023-01-24 00:14:38.591707: step: 900/459, loss: 0.9304183721542358 2023-01-24 00:14:39.202312: step: 902/459, loss: 2.645181894302368 2023-01-24 00:14:39.910124: step: 904/459, loss: 1.493164300918579 2023-01-24 00:14:40.511734: step: 906/459, loss: 3.0619094371795654 2023-01-24 00:14:41.220007: step: 908/459, loss: 2.3159165382385254 2023-01-24 00:14:41.879058: step: 910/459, loss: 0.7331509590148926 2023-01-24 00:14:42.505517: step: 912/459, loss: 1.4959956407546997 2023-01-24 00:14:43.067404: step: 914/459, loss: 1.4105982780456543 2023-01-24 00:14:43.700976: step: 916/459, loss: 2.886962413787842 2023-01-24 00:14:44.328563: step: 918/459, loss: 1.6082098484039307 2023-01-24 00:14:44.767448: step: 920/459, loss: 0.04700871556997299 ================================================== Loss: 1.850 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491727817734325, 'r': 0.20065883867509166, 'f1': 0.2548584049438739}, 'combined': 0.18779040364285446, 'epoch': 1} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3892399194349998, 'r': 0.1591543097599138, 'f1': 0.22592947704657004}, 'combined': 0.1445948653098048, 'epoch': 1} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3508175186155378, 'r': 0.20160401825921262, 'f1': 0.2560588851359507}, 'combined': 0.18867496799491104, 'epoch': 1} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3783908438078241, 'r': 0.14991550203446885, 'f1': 0.21474908927794695}, 'combined': 0.13743941713788602, 'epoch': 1} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35683978582201553, 'r': 0.20506482965953254, 'f1': 0.2604544895315551}, 'combined': 0.19191383439167217, 'epoch': 1} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3885350794461351, 'r': 0.1574570992859677, 'f1': 0.22409700711280306}, 'combined': 0.1606733258544626, 'epoch': 1} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38949275362318836, 'r': 0.25595238095238093, 'f1': 0.3089080459770115}, 'combined': 0.20593869731800765, 'epoch': 1} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4166666666666667, 'r': 0.16304347826086957, 'f1': 0.23437500000000003}, 'combined': 0.11718750000000001, 'epoch': 1} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.09482758620689655, 'f1': 0.15714285714285717}, 'combined': 0.10476190476190478, 'epoch': 1} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3491727817734325, 'r': 0.20065883867509166, 'f1': 0.2548584049438739}, 'combined': 0.18779040364285446, 'epoch': 1} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3892399194349998, 'r': 0.1591543097599138, 'f1': 0.22592947704657004}, 'combined': 0.1445948653098048, 'epoch': 1} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38949275362318836, 'r': 0.25595238095238093, 'f1': 0.3089080459770115}, 'combined': 0.20593869731800765, 'epoch': 1} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3508175186155378, 'r': 0.20160401825921262, 'f1': 0.2560588851359507}, 'combined': 0.18867496799491104, 'epoch': 1} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3783908438078241, 'r': 0.14991550203446885, 'f1': 0.21474908927794695}, 'combined': 0.13743941713788602, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4166666666666667, 'r': 0.16304347826086957, 'f1': 0.23437500000000003}, 'combined': 0.11718750000000001, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35683978582201553, 'r': 0.20506482965953254, 'f1': 0.2604544895315551}, 'combined': 0.19191383439167217, 'epoch': 1} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3885350794461351, 'r': 0.1574570992859677, 'f1': 0.22409700711280306}, 'combined': 0.1606733258544626, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.09482758620689655, 'f1': 0.15714285714285717}, 'combined': 0.10476190476190478, 'epoch': 1} ****************************** Epoch: 2 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:17:44.660598: step: 2/459, loss: 0.45717936754226685 2023-01-24 00:17:45.273029: step: 4/459, loss: 1.4427688121795654 2023-01-24 00:17:45.861304: step: 6/459, loss: 3.5838398933410645 2023-01-24 00:17:46.567824: step: 8/459, loss: 3.4458775520324707 2023-01-24 00:17:47.211613: step: 10/459, loss: 0.9520620107650757 2023-01-24 00:17:47.918747: step: 12/459, loss: 0.3425907790660858 2023-01-24 00:17:48.612215: step: 14/459, loss: 1.4372316598892212 2023-01-24 00:17:49.179812: step: 16/459, loss: 0.6982144117355347 2023-01-24 00:17:49.875887: step: 18/459, loss: 2.260547637939453 2023-01-24 00:17:50.466819: step: 20/459, loss: 3.055114984512329 2023-01-24 00:17:51.032593: step: 22/459, loss: 1.0574822425842285 2023-01-24 00:17:51.752840: step: 24/459, loss: 2.720066547393799 2023-01-24 00:17:52.404395: step: 26/459, loss: 0.3601929247379303 2023-01-24 00:17:53.041503: step: 28/459, loss: 0.5986139178276062 2023-01-24 00:17:53.704133: step: 30/459, loss: 3.3106954097747803 2023-01-24 00:17:54.365503: step: 32/459, loss: 3.505411148071289 2023-01-24 00:17:54.968205: step: 34/459, loss: 1.0489981174468994 2023-01-24 00:17:55.611369: step: 36/459, loss: 0.985190749168396 2023-01-24 00:17:56.282784: step: 38/459, loss: 7.053339958190918 2023-01-24 00:17:56.926247: step: 40/459, loss: 0.34661781787872314 2023-01-24 00:17:57.559400: step: 42/459, loss: 1.4588831663131714 2023-01-24 00:17:58.172515: step: 44/459, loss: 0.5674182176589966 2023-01-24 00:17:58.766727: step: 46/459, loss: 1.3515422344207764 2023-01-24 00:17:59.346279: step: 48/459, loss: 3.0648441314697266 2023-01-24 00:18:00.059306: step: 50/459, loss: 0.8960445523262024 2023-01-24 00:18:00.627871: step: 52/459, loss: 0.3563443720340729 2023-01-24 00:18:01.268825: step: 54/459, loss: 2.538529872894287 2023-01-24 00:18:01.908202: step: 56/459, loss: 8.691038131713867 2023-01-24 00:18:02.514251: step: 58/459, loss: 0.6539033055305481 2023-01-24 00:18:03.132754: step: 60/459, loss: 0.7089080214500427 2023-01-24 00:18:03.828306: step: 62/459, loss: 0.5438236594200134 2023-01-24 00:18:04.441922: step: 64/459, loss: 0.7990478873252869 2023-01-24 00:18:05.056519: step: 66/459, loss: 3.2393651008605957 2023-01-24 00:18:05.698646: step: 68/459, loss: 0.9060479402542114 2023-01-24 00:18:06.362437: step: 70/459, loss: 1.270484209060669 2023-01-24 00:18:06.949453: step: 72/459, loss: 1.159286379814148 2023-01-24 00:18:07.563234: step: 74/459, loss: 0.8148633241653442 2023-01-24 00:18:08.155759: step: 76/459, loss: 2.643871784210205 2023-01-24 00:18:08.781468: step: 78/459, loss: 1.0134001970291138 2023-01-24 00:18:09.445105: step: 80/459, loss: 1.6023664474487305 2023-01-24 00:18:10.070619: step: 82/459, loss: 0.9758840203285217 2023-01-24 00:18:10.640251: step: 84/459, loss: 0.18935070931911469 2023-01-24 00:18:11.258224: step: 86/459, loss: 0.42378613352775574 2023-01-24 00:18:11.859250: step: 88/459, loss: 0.8720873594284058 2023-01-24 00:18:12.452515: step: 90/459, loss: 0.40785151720046997 2023-01-24 00:18:13.060404: step: 92/459, loss: 0.4833860397338867 2023-01-24 00:18:13.678178: step: 94/459, loss: 2.2302584648132324 2023-01-24 00:18:14.326502: step: 96/459, loss: 0.92604660987854 2023-01-24 00:18:14.973060: step: 98/459, loss: 1.4461567401885986 2023-01-24 00:18:15.636747: step: 100/459, loss: 2.2264413833618164 2023-01-24 00:18:16.223412: step: 102/459, loss: 1.7457997798919678 2023-01-24 00:18:16.818578: step: 104/459, loss: 2.0622732639312744 2023-01-24 00:18:17.489475: step: 106/459, loss: 0.5698686838150024 2023-01-24 00:18:18.143497: step: 108/459, loss: 0.9569977521896362 2023-01-24 00:18:18.726944: step: 110/459, loss: 2.827911138534546 2023-01-24 00:18:19.363900: step: 112/459, loss: 2.2801501750946045 2023-01-24 00:18:20.013272: step: 114/459, loss: 0.41653090715408325 2023-01-24 00:18:20.603924: step: 116/459, loss: 0.6147544980049133 2023-01-24 00:18:21.243768: step: 118/459, loss: 0.3590528666973114 2023-01-24 00:18:21.930900: step: 120/459, loss: 1.04329252243042 2023-01-24 00:18:22.557178: step: 122/459, loss: 1.4602960348129272 2023-01-24 00:18:23.137261: step: 124/459, loss: 1.4798613786697388 2023-01-24 00:18:23.756682: step: 126/459, loss: 0.9874852299690247 2023-01-24 00:18:24.448874: step: 128/459, loss: 0.7542616128921509 2023-01-24 00:18:25.083652: step: 130/459, loss: 2.6203956604003906 2023-01-24 00:18:25.681281: step: 132/459, loss: 1.4582090377807617 2023-01-24 00:18:26.312940: step: 134/459, loss: 1.7166615724563599 2023-01-24 00:18:26.943680: step: 136/459, loss: 0.939001202583313 2023-01-24 00:18:27.552475: step: 138/459, loss: 1.8330106735229492 2023-01-24 00:18:28.114892: step: 140/459, loss: 1.107508897781372 2023-01-24 00:18:28.794621: step: 142/459, loss: 0.6877260804176331 2023-01-24 00:18:29.377886: step: 144/459, loss: 1.3378502130508423 2023-01-24 00:18:30.016226: step: 146/459, loss: 1.56905198097229 2023-01-24 00:18:30.645635: step: 148/459, loss: 1.0642061233520508 2023-01-24 00:18:31.254681: step: 150/459, loss: 0.6688764095306396 2023-01-24 00:18:31.839690: step: 152/459, loss: 2.929208755493164 2023-01-24 00:18:32.443126: step: 154/459, loss: 5.062226295471191 2023-01-24 00:18:33.088292: step: 156/459, loss: 0.3685398995876312 2023-01-24 00:18:33.694019: step: 158/459, loss: 1.8093228340148926 2023-01-24 00:18:34.320042: step: 160/459, loss: 0.8154586553573608 2023-01-24 00:18:34.931146: step: 162/459, loss: 6.684952735900879 2023-01-24 00:18:35.568784: step: 164/459, loss: 1.2105188369750977 2023-01-24 00:18:36.185547: step: 166/459, loss: 0.8681111335754395 2023-01-24 00:18:36.750450: step: 168/459, loss: 1.2550456523895264 2023-01-24 00:18:37.348728: step: 170/459, loss: 0.40556803345680237 2023-01-24 00:18:37.970168: step: 172/459, loss: 0.6481894254684448 2023-01-24 00:18:38.582897: step: 174/459, loss: 0.2537441849708557 2023-01-24 00:18:39.275754: step: 176/459, loss: 1.2547168731689453 2023-01-24 00:18:39.873116: step: 178/459, loss: 0.427835613489151 2023-01-24 00:18:40.457124: step: 180/459, loss: 0.7216542959213257 2023-01-24 00:18:41.062124: step: 182/459, loss: 9.351926803588867 2023-01-24 00:18:41.681274: step: 184/459, loss: 1.414599895477295 2023-01-24 00:18:42.352665: step: 186/459, loss: 7.326249122619629 2023-01-24 00:18:43.046650: step: 188/459, loss: 2.1490461826324463 2023-01-24 00:18:43.761694: step: 190/459, loss: 0.9087146520614624 2023-01-24 00:18:44.355027: step: 192/459, loss: 1.3119406700134277 2023-01-24 00:18:44.965583: step: 194/459, loss: 1.088611364364624 2023-01-24 00:18:45.586081: step: 196/459, loss: 0.4172945022583008 2023-01-24 00:18:46.181927: step: 198/459, loss: 0.47233378887176514 2023-01-24 00:18:46.761283: step: 200/459, loss: 1.294242024421692 2023-01-24 00:18:47.328897: step: 202/459, loss: 2.241133213043213 2023-01-24 00:18:47.935887: step: 204/459, loss: 2.2128143310546875 2023-01-24 00:18:48.563712: step: 206/459, loss: 1.3474177122116089 2023-01-24 00:18:49.196262: step: 208/459, loss: 0.8956626653671265 2023-01-24 00:18:49.863598: step: 210/459, loss: 1.0405977964401245 2023-01-24 00:18:50.496418: step: 212/459, loss: 1.076477289199829 2023-01-24 00:18:51.229332: step: 214/459, loss: 0.8332972526550293 2023-01-24 00:18:51.824677: step: 216/459, loss: 1.022378921508789 2023-01-24 00:18:52.377475: step: 218/459, loss: 0.6697079539299011 2023-01-24 00:18:53.004955: step: 220/459, loss: 0.6322205066680908 2023-01-24 00:18:53.641262: step: 222/459, loss: 3.7500298023223877 2023-01-24 00:18:54.254837: step: 224/459, loss: 2.798859119415283 2023-01-24 00:18:54.827545: step: 226/459, loss: 2.331723213195801 2023-01-24 00:18:55.400839: step: 228/459, loss: 0.4060479998588562 2023-01-24 00:18:56.006618: step: 230/459, loss: 1.3508962392807007 2023-01-24 00:18:56.659145: step: 232/459, loss: 0.28972741961479187 2023-01-24 00:18:57.256241: step: 234/459, loss: 0.4142490029335022 2023-01-24 00:18:57.866018: step: 236/459, loss: 1.5129990577697754 2023-01-24 00:18:58.496167: step: 238/459, loss: 1.2872895002365112 2023-01-24 00:18:59.081819: step: 240/459, loss: 0.2817654311656952 2023-01-24 00:18:59.698889: step: 242/459, loss: 0.5115005970001221 2023-01-24 00:19:00.263823: step: 244/459, loss: 0.5218278169631958 2023-01-24 00:19:00.960838: step: 246/459, loss: 4.3469929695129395 2023-01-24 00:19:01.531790: step: 248/459, loss: 2.009382724761963 2023-01-24 00:19:02.151749: step: 250/459, loss: 0.4062691628932953 2023-01-24 00:19:02.878441: step: 252/459, loss: 0.8576982021331787 2023-01-24 00:19:03.416435: step: 254/459, loss: 0.3867456316947937 2023-01-24 00:19:04.069326: step: 256/459, loss: 1.6648547649383545 2023-01-24 00:19:04.772679: step: 258/459, loss: 0.7702038288116455 2023-01-24 00:19:05.341876: step: 260/459, loss: 0.3635956645011902 2023-01-24 00:19:05.955069: step: 262/459, loss: 0.7775037288665771 2023-01-24 00:19:06.486220: step: 264/459, loss: 0.5447624921798706 2023-01-24 00:19:07.104160: step: 266/459, loss: 1.4929540157318115 2023-01-24 00:19:07.686082: step: 268/459, loss: 0.7429506778717041 2023-01-24 00:19:08.319246: step: 270/459, loss: 0.6128582954406738 2023-01-24 00:19:08.941595: step: 272/459, loss: 0.2768704891204834 2023-01-24 00:19:09.541735: step: 274/459, loss: 0.241205632686615 2023-01-24 00:19:10.123897: step: 276/459, loss: 0.4464900493621826 2023-01-24 00:19:10.697226: step: 278/459, loss: 0.6371052265167236 2023-01-24 00:19:11.346303: step: 280/459, loss: 0.8202690482139587 2023-01-24 00:19:11.981239: step: 282/459, loss: 0.2504928708076477 2023-01-24 00:19:12.540164: step: 284/459, loss: 1.9484589099884033 2023-01-24 00:19:13.175156: step: 286/459, loss: 2.0060923099517822 2023-01-24 00:19:13.801652: step: 288/459, loss: 0.39883309602737427 2023-01-24 00:19:14.442560: step: 290/459, loss: 0.2153918296098709 2023-01-24 00:19:15.066333: step: 292/459, loss: 2.140862464904785 2023-01-24 00:19:15.729053: step: 294/459, loss: 0.9308165907859802 2023-01-24 00:19:16.356631: step: 296/459, loss: 0.8665428757667542 2023-01-24 00:19:16.978960: step: 298/459, loss: 1.1154383420944214 2023-01-24 00:19:17.616282: step: 300/459, loss: 1.1416797637939453 2023-01-24 00:19:18.203665: step: 302/459, loss: 3.1174733638763428 2023-01-24 00:19:18.838175: step: 304/459, loss: 1.2103869915008545 2023-01-24 00:19:19.542685: step: 306/459, loss: 0.8407613635063171 2023-01-24 00:19:20.126235: step: 308/459, loss: 0.41470959782600403 2023-01-24 00:19:20.761422: step: 310/459, loss: 0.4297115206718445 2023-01-24 00:19:21.499563: step: 312/459, loss: 0.8946555256843567 2023-01-24 00:19:22.132666: step: 314/459, loss: 1.3314210176467896 2023-01-24 00:19:22.689326: step: 316/459, loss: 0.23496276140213013 2023-01-24 00:19:23.289622: step: 318/459, loss: 1.4612510204315186 2023-01-24 00:19:24.019070: step: 320/459, loss: 0.9700450301170349 2023-01-24 00:19:24.609247: step: 322/459, loss: 1.2102749347686768 2023-01-24 00:19:25.241691: step: 324/459, loss: 0.7008458971977234 2023-01-24 00:19:25.841432: step: 326/459, loss: 2.465160369873047 2023-01-24 00:19:26.431595: step: 328/459, loss: 3.15855073928833 2023-01-24 00:19:27.092162: step: 330/459, loss: 1.645024299621582 2023-01-24 00:19:27.772196: step: 332/459, loss: 1.3811469078063965 2023-01-24 00:19:28.426761: step: 334/459, loss: 3.570324182510376 2023-01-24 00:19:29.052848: step: 336/459, loss: 0.40774476528167725 2023-01-24 00:19:29.693275: step: 338/459, loss: 1.4992353916168213 2023-01-24 00:19:30.399050: step: 340/459, loss: 2.114243507385254 2023-01-24 00:19:31.044460: step: 342/459, loss: 0.898658275604248 2023-01-24 00:19:31.649597: step: 344/459, loss: 0.9311527013778687 2023-01-24 00:19:32.220717: step: 346/459, loss: 2.606498956680298 2023-01-24 00:19:32.841905: step: 348/459, loss: 0.3105418384075165 2023-01-24 00:19:33.466463: step: 350/459, loss: 0.6262545585632324 2023-01-24 00:19:34.035057: step: 352/459, loss: 0.9147328734397888 2023-01-24 00:19:34.648067: step: 354/459, loss: 0.5341132283210754 2023-01-24 00:19:35.196032: step: 356/459, loss: 1.2862645387649536 2023-01-24 00:19:35.827692: step: 358/459, loss: 0.4657110869884491 2023-01-24 00:19:36.460660: step: 360/459, loss: 1.495011806488037 2023-01-24 00:19:37.042751: step: 362/459, loss: 1.3086183071136475 2023-01-24 00:19:37.682191: step: 364/459, loss: 0.7985295653343201 2023-01-24 00:19:38.369038: step: 366/459, loss: 0.7328735589981079 2023-01-24 00:19:39.010619: step: 368/459, loss: 1.1086664199829102 2023-01-24 00:19:39.633207: step: 370/459, loss: 1.211386799812317 2023-01-24 00:19:40.198733: step: 372/459, loss: 1.3314112424850464 2023-01-24 00:19:40.817883: step: 374/459, loss: 1.3773584365844727 2023-01-24 00:19:41.442785: step: 376/459, loss: 1.4435207843780518 2023-01-24 00:19:42.014727: step: 378/459, loss: 2.3584022521972656 2023-01-24 00:19:42.651096: step: 380/459, loss: 1.843122959136963 2023-01-24 00:19:43.264727: step: 382/459, loss: 1.1345126628875732 2023-01-24 00:19:43.933232: step: 384/459, loss: 1.8083263635635376 2023-01-24 00:19:44.545314: step: 386/459, loss: 1.116138219833374 2023-01-24 00:19:45.149653: step: 388/459, loss: 0.8406505584716797 2023-01-24 00:19:45.752966: step: 390/459, loss: 2.033388614654541 2023-01-24 00:19:46.397581: step: 392/459, loss: 1.6891392469406128 2023-01-24 00:19:47.070374: step: 394/459, loss: 0.37889498472213745 2023-01-24 00:19:47.639021: step: 396/459, loss: 2.579160451889038 2023-01-24 00:19:48.336961: step: 398/459, loss: 0.6683961153030396 2023-01-24 00:19:48.992125: step: 400/459, loss: 1.0229767560958862 2023-01-24 00:19:49.580584: step: 402/459, loss: 2.449284076690674 2023-01-24 00:19:50.234057: step: 404/459, loss: 1.150241494178772 2023-01-24 00:19:50.890783: step: 406/459, loss: 0.8791859149932861 2023-01-24 00:19:51.579602: step: 408/459, loss: 0.7983344197273254 2023-01-24 00:19:52.177300: step: 410/459, loss: 6.512253761291504 2023-01-24 00:19:52.742713: step: 412/459, loss: 1.5900623798370361 2023-01-24 00:19:53.309006: step: 414/459, loss: 1.808212161064148 2023-01-24 00:19:53.904367: step: 416/459, loss: 1.6025710105895996 2023-01-24 00:19:54.490970: step: 418/459, loss: 0.6149626970291138 2023-01-24 00:19:55.122659: step: 420/459, loss: 0.5725603699684143 2023-01-24 00:19:55.769602: step: 422/459, loss: 1.3531734943389893 2023-01-24 00:19:56.407948: step: 424/459, loss: 1.6273462772369385 2023-01-24 00:19:57.021044: step: 426/459, loss: 2.208026885986328 2023-01-24 00:19:57.626815: step: 428/459, loss: 0.23514553904533386 2023-01-24 00:19:58.236186: step: 430/459, loss: 1.8255817890167236 2023-01-24 00:19:58.869263: step: 432/459, loss: 2.1909265518188477 2023-01-24 00:19:59.496170: step: 434/459, loss: 0.5634608864784241 2023-01-24 00:20:00.140403: step: 436/459, loss: 1.2695668935775757 2023-01-24 00:20:00.822200: step: 438/459, loss: 7.966315269470215 2023-01-24 00:20:01.450966: step: 440/459, loss: 0.9048678874969482 2023-01-24 00:20:02.058729: step: 442/459, loss: 2.2302334308624268 2023-01-24 00:20:02.649228: step: 444/459, loss: 0.7656605243682861 2023-01-24 00:20:03.292970: step: 446/459, loss: 0.2044113576412201 2023-01-24 00:20:03.987448: step: 448/459, loss: 0.8598530292510986 2023-01-24 00:20:04.508212: step: 450/459, loss: 0.36326515674591064 2023-01-24 00:20:05.133685: step: 452/459, loss: 1.2591376304626465 2023-01-24 00:20:05.734074: step: 454/459, loss: 0.2757798731327057 2023-01-24 00:20:06.350130: step: 456/459, loss: 1.3831878900527954 2023-01-24 00:20:06.932453: step: 458/459, loss: 1.9021728038787842 2023-01-24 00:20:07.538403: step: 460/459, loss: 0.7154315114021301 2023-01-24 00:20:08.169544: step: 462/459, loss: 1.4934648275375366 2023-01-24 00:20:08.772048: step: 464/459, loss: 0.8725304007530212 2023-01-24 00:20:09.426300: step: 466/459, loss: 6.689171314239502 2023-01-24 00:20:10.097152: step: 468/459, loss: 0.39742332696914673 2023-01-24 00:20:10.718559: step: 470/459, loss: 0.8239437341690063 2023-01-24 00:20:11.346821: step: 472/459, loss: 0.5605970025062561 2023-01-24 00:20:12.067645: step: 474/459, loss: 1.0267046689987183 2023-01-24 00:20:12.709630: step: 476/459, loss: 0.9729382395744324 2023-01-24 00:20:13.325761: step: 478/459, loss: 1.175020694732666 2023-01-24 00:20:13.970064: step: 480/459, loss: 3.2487869262695312 2023-01-24 00:20:14.559119: step: 482/459, loss: 1.751295566558838 2023-01-24 00:20:15.253313: step: 484/459, loss: 1.9713549613952637 2023-01-24 00:20:15.913732: step: 486/459, loss: 0.2764562666416168 2023-01-24 00:20:16.499284: step: 488/459, loss: 1.5184890031814575 2023-01-24 00:20:17.122164: step: 490/459, loss: 0.7414169311523438 2023-01-24 00:20:17.755749: step: 492/459, loss: 1.5642399787902832 2023-01-24 00:20:18.447277: step: 494/459, loss: 0.31552788615226746 2023-01-24 00:20:19.011299: step: 496/459, loss: 2.2540338039398193 2023-01-24 00:20:19.661818: step: 498/459, loss: 1.0063918828964233 2023-01-24 00:20:20.259584: step: 500/459, loss: 2.8592214584350586 2023-01-24 00:20:20.880632: step: 502/459, loss: 0.3235587477684021 2023-01-24 00:20:21.523952: step: 504/459, loss: 2.1618406772613525 2023-01-24 00:20:22.100267: step: 506/459, loss: 1.2369869947433472 2023-01-24 00:20:22.741345: step: 508/459, loss: 1.2596739530563354 2023-01-24 00:20:23.389527: step: 510/459, loss: 0.7877716422080994 2023-01-24 00:20:24.006972: step: 512/459, loss: 1.1300952434539795 2023-01-24 00:20:24.631663: step: 514/459, loss: 1.9011547565460205 2023-01-24 00:20:25.193739: step: 516/459, loss: 0.9479770660400391 2023-01-24 00:20:25.803465: step: 518/459, loss: 0.6830927729606628 2023-01-24 00:20:26.496553: step: 520/459, loss: 0.5260631442070007 2023-01-24 00:20:27.269934: step: 522/459, loss: 0.6620058417320251 2023-01-24 00:20:27.902902: step: 524/459, loss: 1.6642413139343262 2023-01-24 00:20:28.487741: step: 526/459, loss: 4.668638706207275 2023-01-24 00:20:29.092655: step: 528/459, loss: 0.8126309514045715 2023-01-24 00:20:29.677199: step: 530/459, loss: 3.11431884765625 2023-01-24 00:20:30.275533: step: 532/459, loss: 2.589367389678955 2023-01-24 00:20:30.878747: step: 534/459, loss: 4.974491119384766 2023-01-24 00:20:31.493597: step: 536/459, loss: 1.1601351499557495 2023-01-24 00:20:32.075939: step: 538/459, loss: 0.6934624910354614 2023-01-24 00:20:32.765263: step: 540/459, loss: 1.620200276374817 2023-01-24 00:20:33.436941: step: 542/459, loss: 0.5715560913085938 2023-01-24 00:20:34.056883: step: 544/459, loss: 0.8018465042114258 2023-01-24 00:20:34.668812: step: 546/459, loss: 1.6887102127075195 2023-01-24 00:20:35.290107: step: 548/459, loss: 2.2594659328460693 2023-01-24 00:20:35.928928: step: 550/459, loss: 2.016165018081665 2023-01-24 00:20:36.609771: step: 552/459, loss: 0.5770622491836548 2023-01-24 00:20:37.220542: step: 554/459, loss: 2.9587888717651367 2023-01-24 00:20:37.915252: step: 556/459, loss: 0.5121584534645081 2023-01-24 00:20:38.539002: step: 558/459, loss: 0.4698749780654907 2023-01-24 00:20:39.183872: step: 560/459, loss: 1.7131949663162231 2023-01-24 00:20:39.806930: step: 562/459, loss: 2.8559985160827637 2023-01-24 00:20:40.463519: step: 564/459, loss: 0.26727646589279175 2023-01-24 00:20:41.111244: step: 566/459, loss: 0.2851499617099762 2023-01-24 00:20:41.722024: step: 568/459, loss: 0.569366991519928 2023-01-24 00:20:42.305138: step: 570/459, loss: 1.1260986328125 2023-01-24 00:20:42.845487: step: 572/459, loss: 1.0763148069381714 2023-01-24 00:20:43.477716: step: 574/459, loss: 0.8301693797111511 2023-01-24 00:20:44.085737: step: 576/459, loss: 0.5375299453735352 2023-01-24 00:20:44.663887: step: 578/459, loss: 0.321865051984787 2023-01-24 00:20:45.287233: step: 580/459, loss: 1.157642126083374 2023-01-24 00:20:45.893060: step: 582/459, loss: 0.1503811478614807 2023-01-24 00:20:46.522763: step: 584/459, loss: 0.6757757067680359 2023-01-24 00:20:47.154440: step: 586/459, loss: 1.749570608139038 2023-01-24 00:20:47.797098: step: 588/459, loss: 1.1451761722564697 2023-01-24 00:20:48.436595: step: 590/459, loss: 0.6024152040481567 2023-01-24 00:20:49.064816: step: 592/459, loss: 0.39958953857421875 2023-01-24 00:20:49.688950: step: 594/459, loss: 0.48186808824539185 2023-01-24 00:20:50.367193: step: 596/459, loss: 0.8975344896316528 2023-01-24 00:20:51.077422: step: 598/459, loss: 2.5631000995635986 2023-01-24 00:20:51.698660: step: 600/459, loss: 0.8968180418014526 2023-01-24 00:20:52.295452: step: 602/459, loss: 0.489022821187973 2023-01-24 00:20:52.895556: step: 604/459, loss: 2.1018385887145996 2023-01-24 00:20:53.544418: step: 606/459, loss: 0.6685724258422852 2023-01-24 00:20:54.114022: step: 608/459, loss: 0.5579981207847595 2023-01-24 00:20:54.730489: step: 610/459, loss: 0.8991461992263794 2023-01-24 00:20:55.392545: step: 612/459, loss: 0.6839714646339417 2023-01-24 00:20:56.042306: step: 614/459, loss: 0.40097758173942566 2023-01-24 00:20:56.684223: step: 616/459, loss: 2.264106273651123 2023-01-24 00:20:57.278817: step: 618/459, loss: 1.1046397686004639 2023-01-24 00:20:57.964755: step: 620/459, loss: 2.3681182861328125 2023-01-24 00:20:58.551424: step: 622/459, loss: 0.733434796333313 2023-01-24 00:20:59.166479: step: 624/459, loss: 0.3395853042602539 2023-01-24 00:20:59.788525: step: 626/459, loss: 0.29763370752334595 2023-01-24 00:21:00.405807: step: 628/459, loss: 1.2565821409225464 2023-01-24 00:21:01.074871: step: 630/459, loss: 1.978063702583313 2023-01-24 00:21:01.682524: step: 632/459, loss: 1.713039755821228 2023-01-24 00:21:02.324731: step: 634/459, loss: 0.6272771954536438 2023-01-24 00:21:02.915725: step: 636/459, loss: 1.151561975479126 2023-01-24 00:21:03.494498: step: 638/459, loss: 1.2067441940307617 2023-01-24 00:21:04.158495: step: 640/459, loss: 1.1344982385635376 2023-01-24 00:21:04.777573: step: 642/459, loss: 0.5906795263290405 2023-01-24 00:21:05.419936: step: 644/459, loss: 0.8747051358222961 2023-01-24 00:21:06.040443: step: 646/459, loss: 0.8176027536392212 2023-01-24 00:21:06.676683: step: 648/459, loss: 0.41824182868003845 2023-01-24 00:21:07.269266: step: 650/459, loss: 0.4552389979362488 2023-01-24 00:21:07.860165: step: 652/459, loss: 0.7902160882949829 2023-01-24 00:21:08.476812: step: 654/459, loss: 0.8012642860412598 2023-01-24 00:21:09.085993: step: 656/459, loss: 5.261416912078857 2023-01-24 00:21:09.784388: step: 658/459, loss: 0.6756318807601929 2023-01-24 00:21:10.473564: step: 660/459, loss: 1.5055062770843506 2023-01-24 00:21:11.189543: step: 662/459, loss: 0.7684602737426758 2023-01-24 00:21:11.788878: step: 664/459, loss: 1.8932502269744873 2023-01-24 00:21:12.415895: step: 666/459, loss: 1.082308292388916 2023-01-24 00:21:13.059677: step: 668/459, loss: 1.1482504606246948 2023-01-24 00:21:13.736271: step: 670/459, loss: 1.718530297279358 2023-01-24 00:21:14.369641: step: 672/459, loss: 0.6806081533432007 2023-01-24 00:21:15.001147: step: 674/459, loss: 1.0159285068511963 2023-01-24 00:21:15.672300: step: 676/459, loss: 1.7938168048858643 2023-01-24 00:21:16.255695: step: 678/459, loss: 1.444655179977417 2023-01-24 00:21:16.869311: step: 680/459, loss: 0.5219036340713501 2023-01-24 00:21:17.473171: step: 682/459, loss: 0.4650307893753052 2023-01-24 00:21:18.030227: step: 684/459, loss: 0.5667250752449036 2023-01-24 00:21:18.692239: step: 686/459, loss: 0.6184502243995667 2023-01-24 00:21:19.293601: step: 688/459, loss: 1.6561170816421509 2023-01-24 00:21:19.892467: step: 690/459, loss: 1.884896159172058 2023-01-24 00:21:20.530770: step: 692/459, loss: 0.14301057159900665 2023-01-24 00:21:21.144632: step: 694/459, loss: 4.930254936218262 2023-01-24 00:21:21.810469: step: 696/459, loss: 0.8584591746330261 2023-01-24 00:21:22.454410: step: 698/459, loss: 1.471092939376831 2023-01-24 00:21:23.085513: step: 700/459, loss: 1.0642640590667725 2023-01-24 00:21:23.735866: step: 702/459, loss: 0.4274587035179138 2023-01-24 00:21:24.289368: step: 704/459, loss: 0.8022940158843994 2023-01-24 00:21:24.911346: step: 706/459, loss: 0.9192591309547424 2023-01-24 00:21:25.466394: step: 708/459, loss: 0.26049286127090454 2023-01-24 00:21:26.097473: step: 710/459, loss: 1.1097264289855957 2023-01-24 00:21:26.683613: step: 712/459, loss: 1.9330253601074219 2023-01-24 00:21:27.328668: step: 714/459, loss: 0.835494339466095 2023-01-24 00:21:28.046303: step: 716/459, loss: 1.1661986112594604 2023-01-24 00:21:28.612436: step: 718/459, loss: 0.3806879222393036 2023-01-24 00:21:29.255426: step: 720/459, loss: 3.3801047801971436 2023-01-24 00:21:29.841817: step: 722/459, loss: 0.1956629604101181 2023-01-24 00:21:30.550674: step: 724/459, loss: 0.7176102995872498 2023-01-24 00:21:31.198588: step: 726/459, loss: 0.8482877612113953 2023-01-24 00:21:31.792295: step: 728/459, loss: 0.4864981472492218 2023-01-24 00:21:32.371337: step: 730/459, loss: 0.7632269263267517 2023-01-24 00:21:32.990405: step: 732/459, loss: 6.718725204467773 2023-01-24 00:21:33.615136: step: 734/459, loss: 1.4955039024353027 2023-01-24 00:21:34.238031: step: 736/459, loss: 1.4946331977844238 2023-01-24 00:21:34.949365: step: 738/459, loss: 0.8571246862411499 2023-01-24 00:21:35.549476: step: 740/459, loss: 0.4617764949798584 2023-01-24 00:21:36.150784: step: 742/459, loss: 1.2032207250595093 2023-01-24 00:21:36.762956: step: 744/459, loss: 0.29747799038887024 2023-01-24 00:21:37.363932: step: 746/459, loss: 0.7276172041893005 2023-01-24 00:21:38.001954: step: 748/459, loss: 0.12189194560050964 2023-01-24 00:21:38.668266: step: 750/459, loss: 1.312324047088623 2023-01-24 00:21:39.272822: step: 752/459, loss: 1.4352073669433594 2023-01-24 00:21:39.888073: step: 754/459, loss: 0.9039232134819031 2023-01-24 00:21:40.481487: step: 756/459, loss: 1.161679983139038 2023-01-24 00:21:41.088601: step: 758/459, loss: 0.275340735912323 2023-01-24 00:21:41.683859: step: 760/459, loss: 0.30794164538383484 2023-01-24 00:21:42.336181: step: 762/459, loss: 0.6207448840141296 2023-01-24 00:21:42.983315: step: 764/459, loss: 1.1599900722503662 2023-01-24 00:21:43.632980: step: 766/459, loss: 0.5538971424102783 2023-01-24 00:21:44.248997: step: 768/459, loss: 0.36681893467903137 2023-01-24 00:21:44.868394: step: 770/459, loss: 0.36017903685569763 2023-01-24 00:21:45.426926: step: 772/459, loss: 2.3629817962646484 2023-01-24 00:21:46.014705: step: 774/459, loss: 0.35112231969833374 2023-01-24 00:21:46.661308: step: 776/459, loss: 1.8234193325042725 2023-01-24 00:21:47.341695: step: 778/459, loss: 7.793619155883789 2023-01-24 00:21:47.922143: step: 780/459, loss: 0.31389522552490234 2023-01-24 00:21:48.497373: step: 782/459, loss: 1.062631368637085 2023-01-24 00:21:49.119065: step: 784/459, loss: 1.724086046218872 2023-01-24 00:21:49.869604: step: 786/459, loss: 2.1751914024353027 2023-01-24 00:21:50.538476: step: 788/459, loss: 0.4454914331436157 2023-01-24 00:21:51.187084: step: 790/459, loss: 0.4647483825683594 2023-01-24 00:21:51.808388: step: 792/459, loss: 0.9157456159591675 2023-01-24 00:21:52.409995: step: 794/459, loss: 0.7139451503753662 2023-01-24 00:21:53.016710: step: 796/459, loss: 4.144582748413086 2023-01-24 00:21:53.589952: step: 798/459, loss: 0.20290669798851013 2023-01-24 00:21:54.242229: step: 800/459, loss: 0.41203388571739197 2023-01-24 00:21:54.836052: step: 802/459, loss: 1.3885111808776855 2023-01-24 00:21:55.473474: step: 804/459, loss: 0.4456954002380371 2023-01-24 00:21:56.095190: step: 806/459, loss: 1.7348687648773193 2023-01-24 00:21:56.686544: step: 808/459, loss: 1.1554114818572998 2023-01-24 00:21:57.308758: step: 810/459, loss: 2.0871546268463135 2023-01-24 00:21:57.949310: step: 812/459, loss: 0.5756745338439941 2023-01-24 00:21:58.581397: step: 814/459, loss: 0.8266251087188721 2023-01-24 00:21:59.170840: step: 816/459, loss: 1.100521445274353 2023-01-24 00:21:59.784096: step: 818/459, loss: 0.9244635105133057 2023-01-24 00:22:00.413004: step: 820/459, loss: 1.700126051902771 2023-01-24 00:22:00.968206: step: 822/459, loss: 0.9516096711158752 2023-01-24 00:22:01.567184: step: 824/459, loss: 0.6526795625686646 2023-01-24 00:22:02.145850: step: 826/459, loss: 0.3548718988895416 2023-01-24 00:22:02.760202: step: 828/459, loss: 1.567807674407959 2023-01-24 00:22:03.372505: step: 830/459, loss: 0.5296584367752075 2023-01-24 00:22:03.979163: step: 832/459, loss: 1.3750442266464233 2023-01-24 00:22:04.686927: step: 834/459, loss: 0.5094032883644104 2023-01-24 00:22:05.360461: step: 836/459, loss: 0.3739999532699585 2023-01-24 00:22:05.988228: step: 838/459, loss: 0.8038213849067688 2023-01-24 00:22:06.632097: step: 840/459, loss: 0.6276651620864868 2023-01-24 00:22:07.305843: step: 842/459, loss: 1.1992069482803345 2023-01-24 00:22:07.876136: step: 844/459, loss: 5.369886875152588 2023-01-24 00:22:08.520314: step: 846/459, loss: 0.5727553367614746 2023-01-24 00:22:09.201715: step: 848/459, loss: 1.6751593351364136 2023-01-24 00:22:09.829828: step: 850/459, loss: 0.5283336043357849 2023-01-24 00:22:10.478755: step: 852/459, loss: 0.8286944627761841 2023-01-24 00:22:11.121132: step: 854/459, loss: 1.5021897554397583 2023-01-24 00:22:11.756244: step: 856/459, loss: 5.800475120544434 2023-01-24 00:22:12.426942: step: 858/459, loss: 0.8275816440582275 2023-01-24 00:22:13.023515: step: 860/459, loss: 0.8015170693397522 2023-01-24 00:22:13.649767: step: 862/459, loss: 0.5710155367851257 2023-01-24 00:22:14.236517: step: 864/459, loss: 0.4288949966430664 2023-01-24 00:22:14.871270: step: 866/459, loss: 1.0196902751922607 2023-01-24 00:22:15.509575: step: 868/459, loss: 0.2568678557872772 2023-01-24 00:22:16.116211: step: 870/459, loss: 0.65896075963974 2023-01-24 00:22:16.797354: step: 872/459, loss: 0.601308286190033 2023-01-24 00:22:17.449784: step: 874/459, loss: 1.761462926864624 2023-01-24 00:22:18.137585: step: 876/459, loss: 2.284954071044922 2023-01-24 00:22:18.765868: step: 878/459, loss: 0.7036120295524597 2023-01-24 00:22:19.504381: step: 880/459, loss: 3.5566787719726562 2023-01-24 00:22:20.103272: step: 882/459, loss: 0.456418514251709 2023-01-24 00:22:20.779717: step: 884/459, loss: 4.946844577789307 2023-01-24 00:22:21.437306: step: 886/459, loss: 1.0678330659866333 2023-01-24 00:22:22.033736: step: 888/459, loss: 0.34567582607269287 2023-01-24 00:22:22.690585: step: 890/459, loss: 0.3227412700653076 2023-01-24 00:22:23.287583: step: 892/459, loss: 1.5360851287841797 2023-01-24 00:22:23.889038: step: 894/459, loss: 0.9249432682991028 2023-01-24 00:22:24.482697: step: 896/459, loss: 0.8862950205802917 2023-01-24 00:22:25.139204: step: 898/459, loss: 3.4133644104003906 2023-01-24 00:22:25.812666: step: 900/459, loss: 0.5547088980674744 2023-01-24 00:22:26.446782: step: 902/459, loss: 2.154693603515625 2023-01-24 00:22:27.089761: step: 904/459, loss: 1.2307137250900269 2023-01-24 00:22:27.741516: step: 906/459, loss: 1.0516743659973145 2023-01-24 00:22:28.325921: step: 908/459, loss: 0.339028537273407 2023-01-24 00:22:28.924504: step: 910/459, loss: 0.3682626187801361 2023-01-24 00:22:29.557962: step: 912/459, loss: 6.846502304077148 2023-01-24 00:22:30.103567: step: 914/459, loss: 1.9951707124710083 2023-01-24 00:22:30.726694: step: 916/459, loss: 2.289910316467285 2023-01-24 00:22:31.346132: step: 918/459, loss: 0.448527455329895 2023-01-24 00:22:31.806902: step: 920/459, loss: 1.1217601299285889 ================================================== Loss: 1.383 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3226771034143062, 'r': 0.24887005329496587, 'f1': 0.2810080217567491}, 'combined': 0.20705854234707827, 'epoch': 2} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31765742528569446, 'r': 0.21878583102707994, 'f1': 0.25911014056081905}, 'combined': 0.16583048995892416, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3242104984322021, 'r': 0.2537299552947669, 'f1': 0.2846726327697385}, 'combined': 0.20975878204085993, 'epoch': 2} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33032768839104926, 'r': 0.22151738813156568, 'f1': 0.26519517840411233}, 'combined': 0.16972491417863186, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33310133912201095, 'r': 0.2562802363377286, 'f1': 0.28968428423644965}, 'combined': 0.21345157785843658, 'epoch': 2} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3161130992067871, 'r': 0.22472767598155227, 'f1': 0.26269972750234594}, 'combined': 0.18835074802054994, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3098290598290598, 'r': 0.3452380952380952, 'f1': 0.3265765765765765}, 'combined': 0.21771771771771767, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.10526315789473684, 'r': 0.08695652173913043, 'f1': 0.09523809523809525}, 'combined': 0.04761904761904762, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2857142857142857, 'r': 0.06896551724137931, 'f1': 0.1111111111111111}, 'combined': 0.07407407407407407, 'epoch': 2} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3226771034143062, 'r': 0.24887005329496587, 'f1': 0.2810080217567491}, 'combined': 0.20705854234707827, 'epoch': 2} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31765742528569446, 'r': 0.21878583102707994, 'f1': 0.25911014056081905}, 'combined': 0.16583048995892416, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3098290598290598, 'r': 0.3452380952380952, 'f1': 0.3265765765765765}, 'combined': 0.21771771771771767, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3508175186155378, 'r': 0.20160401825921262, 'f1': 0.2560588851359507}, 'combined': 0.18867496799491104, 'epoch': 1} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3783908438078241, 'r': 0.14991550203446885, 'f1': 0.21474908927794695}, 'combined': 0.13743941713788602, 'epoch': 1} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.4166666666666667, 'r': 0.16304347826086957, 'f1': 0.23437500000000003}, 'combined': 0.11718750000000001, 'epoch': 1} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35683978582201553, 'r': 0.20506482965953254, 'f1': 0.2604544895315551}, 'combined': 0.19191383439167217, 'epoch': 1} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3885350794461351, 'r': 0.1574570992859677, 'f1': 0.22409700711280306}, 'combined': 0.1606733258544626, 'epoch': 1} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4583333333333333, 'r': 0.09482758620689655, 'f1': 0.15714285714285717}, 'combined': 0.10476190476190478, 'epoch': 1} ****************************** Epoch: 3 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:25:18.053079: step: 2/459, loss: 1.34539794921875 2023-01-24 00:25:18.660279: step: 4/459, loss: 0.6843711137771606 2023-01-24 00:25:19.336910: step: 6/459, loss: 1.5538629293441772 2023-01-24 00:25:19.945901: step: 8/459, loss: 0.2671789824962616 2023-01-24 00:25:20.568192: step: 10/459, loss: 1.7384154796600342 2023-01-24 00:25:21.195284: step: 12/459, loss: 0.5930419564247131 2023-01-24 00:25:21.817330: step: 14/459, loss: 3.6631686687469482 2023-01-24 00:25:22.481155: step: 16/459, loss: 0.38985246419906616 2023-01-24 00:25:23.078579: step: 18/459, loss: 0.4386817514896393 2023-01-24 00:25:23.720277: step: 20/459, loss: 0.4432528018951416 2023-01-24 00:25:24.331987: step: 22/459, loss: 2.2205755710601807 2023-01-24 00:25:24.963401: step: 24/459, loss: 0.5100850462913513 2023-01-24 00:25:25.601411: step: 26/459, loss: 0.8105925917625427 2023-01-24 00:25:26.263051: step: 28/459, loss: 0.25590530037879944 2023-01-24 00:25:26.848401: step: 30/459, loss: 1.8658726215362549 2023-01-24 00:25:27.448521: step: 32/459, loss: 0.4916907548904419 2023-01-24 00:25:28.061353: step: 34/459, loss: 0.5590140223503113 2023-01-24 00:25:28.649186: step: 36/459, loss: 1.1583956480026245 2023-01-24 00:25:29.247215: step: 38/459, loss: 0.6143373250961304 2023-01-24 00:25:29.832296: step: 40/459, loss: 0.5093900561332703 2023-01-24 00:25:30.480281: step: 42/459, loss: 0.5299316048622131 2023-01-24 00:25:31.071182: step: 44/459, loss: 0.8451926708221436 2023-01-24 00:25:31.669504: step: 46/459, loss: 0.2538551688194275 2023-01-24 00:25:32.305935: step: 48/459, loss: 0.5046674609184265 2023-01-24 00:25:32.920429: step: 50/459, loss: 0.4313194751739502 2023-01-24 00:25:33.492734: step: 52/459, loss: 0.37370485067367554 2023-01-24 00:25:34.069796: step: 54/459, loss: 0.26764440536499023 2023-01-24 00:25:34.691132: step: 56/459, loss: 0.42928260564804077 2023-01-24 00:25:35.312700: step: 58/459, loss: 0.7123456001281738 2023-01-24 00:25:35.925691: step: 60/459, loss: 0.559531569480896 2023-01-24 00:25:36.494287: step: 62/459, loss: 0.6096663475036621 2023-01-24 00:25:37.078743: step: 64/459, loss: 1.050963044166565 2023-01-24 00:25:37.669544: step: 66/459, loss: 1.4271619319915771 2023-01-24 00:25:38.296240: step: 68/459, loss: 1.0978344678878784 2023-01-24 00:25:38.876079: step: 70/459, loss: 1.4786887168884277 2023-01-24 00:25:39.468081: step: 72/459, loss: 0.6983580589294434 2023-01-24 00:25:40.046878: step: 74/459, loss: 2.163374423980713 2023-01-24 00:25:40.712778: step: 76/459, loss: 1.5105042457580566 2023-01-24 00:25:41.320744: step: 78/459, loss: 0.934279203414917 2023-01-24 00:25:41.924364: step: 80/459, loss: 3.444152355194092 2023-01-24 00:25:42.503049: step: 82/459, loss: 2.326418161392212 2023-01-24 00:25:43.202800: step: 84/459, loss: 1.7608872652053833 2023-01-24 00:25:43.830203: step: 86/459, loss: 1.0154283046722412 2023-01-24 00:25:44.418545: step: 88/459, loss: 0.7649128437042236 2023-01-24 00:25:45.041310: step: 90/459, loss: 0.4794178605079651 2023-01-24 00:25:45.628241: step: 92/459, loss: 2.03389573097229 2023-01-24 00:25:46.172758: step: 94/459, loss: 0.5906251668930054 2023-01-24 00:25:46.851434: step: 96/459, loss: 1.4894298315048218 2023-01-24 00:25:47.430524: step: 98/459, loss: 0.5264460444450378 2023-01-24 00:25:48.057558: step: 100/459, loss: 1.7541446685791016 2023-01-24 00:25:48.629589: step: 102/459, loss: 0.7523951530456543 2023-01-24 00:25:49.290722: step: 104/459, loss: 0.7883264422416687 2023-01-24 00:25:49.912028: step: 106/459, loss: 0.8281095027923584 2023-01-24 00:25:50.504184: step: 108/459, loss: 0.23071280121803284 2023-01-24 00:25:51.151043: step: 110/459, loss: 0.8264546990394592 2023-01-24 00:25:51.768387: step: 112/459, loss: 1.1084377765655518 2023-01-24 00:25:52.426802: step: 114/459, loss: 1.1930248737335205 2023-01-24 00:25:53.031617: step: 116/459, loss: 3.85786771774292 2023-01-24 00:25:53.677548: step: 118/459, loss: 0.6250128149986267 2023-01-24 00:25:54.312808: step: 120/459, loss: 0.3715479075908661 2023-01-24 00:25:54.932589: step: 122/459, loss: 0.7280361652374268 2023-01-24 00:25:55.561200: step: 124/459, loss: 3.4200425148010254 2023-01-24 00:25:56.245408: step: 126/459, loss: 3.2592058181762695 2023-01-24 00:25:56.851249: step: 128/459, loss: 1.651176929473877 2023-01-24 00:25:57.476904: step: 130/459, loss: 0.8011223077774048 2023-01-24 00:25:58.142214: step: 132/459, loss: 1.4701284170150757 2023-01-24 00:25:58.741373: step: 134/459, loss: 3.4956114292144775 2023-01-24 00:25:59.339637: step: 136/459, loss: 3.3017916679382324 2023-01-24 00:25:59.927710: step: 138/459, loss: 0.9289053082466125 2023-01-24 00:26:00.524516: step: 140/459, loss: 0.271335244178772 2023-01-24 00:26:01.182750: step: 142/459, loss: 0.7466456294059753 2023-01-24 00:26:01.752608: step: 144/459, loss: 1.5248656272888184 2023-01-24 00:26:02.337473: step: 146/459, loss: 1.850051760673523 2023-01-24 00:26:03.041484: step: 148/459, loss: 0.7854592204093933 2023-01-24 00:26:03.643919: step: 150/459, loss: 1.4592225551605225 2023-01-24 00:26:04.277573: step: 152/459, loss: 0.5339467525482178 2023-01-24 00:26:04.894057: step: 154/459, loss: 0.14101693034172058 2023-01-24 00:26:05.464658: step: 156/459, loss: 0.5295711159706116 2023-01-24 00:26:06.047180: step: 158/459, loss: 0.6650018095970154 2023-01-24 00:26:06.641519: step: 160/459, loss: 0.20120881497859955 2023-01-24 00:26:07.348060: step: 162/459, loss: 1.823500633239746 2023-01-24 00:26:07.942413: step: 164/459, loss: 0.6668996810913086 2023-01-24 00:26:08.555698: step: 166/459, loss: 0.32545697689056396 2023-01-24 00:26:09.183825: step: 168/459, loss: 0.21772123873233795 2023-01-24 00:26:09.843412: step: 170/459, loss: 1.6775672435760498 2023-01-24 00:26:10.418270: step: 172/459, loss: 1.7580591440200806 2023-01-24 00:26:11.046513: step: 174/459, loss: 0.4702569246292114 2023-01-24 00:26:11.670922: step: 176/459, loss: 0.17894011735916138 2023-01-24 00:26:12.352854: step: 178/459, loss: 0.7385835647583008 2023-01-24 00:26:12.986661: step: 180/459, loss: 1.665440320968628 2023-01-24 00:26:13.635949: step: 182/459, loss: 0.4918750524520874 2023-01-24 00:26:14.281073: step: 184/459, loss: 1.4709984064102173 2023-01-24 00:26:14.893140: step: 186/459, loss: 0.7562016844749451 2023-01-24 00:26:15.553707: step: 188/459, loss: 1.0279141664505005 2023-01-24 00:26:16.198620: step: 190/459, loss: 1.195880651473999 2023-01-24 00:26:16.855446: step: 192/459, loss: 0.694482684135437 2023-01-24 00:26:17.423892: step: 194/459, loss: 0.5465425848960876 2023-01-24 00:26:18.042895: step: 196/459, loss: 0.4278269112110138 2023-01-24 00:26:18.721819: step: 198/459, loss: 0.5577354431152344 2023-01-24 00:26:19.294311: step: 200/459, loss: 0.3260462284088135 2023-01-24 00:26:19.979763: step: 202/459, loss: 1.0445945262908936 2023-01-24 00:26:20.551840: step: 204/459, loss: 0.9469627141952515 2023-01-24 00:26:21.182060: step: 206/459, loss: 2.2768847942352295 2023-01-24 00:26:21.866250: step: 208/459, loss: 0.2896121144294739 2023-01-24 00:26:22.469652: step: 210/459, loss: 0.9198966026306152 2023-01-24 00:26:23.067818: step: 212/459, loss: 3.0267534255981445 2023-01-24 00:26:23.686935: step: 214/459, loss: 0.5123363733291626 2023-01-24 00:26:24.273082: step: 216/459, loss: 0.1880437433719635 2023-01-24 00:26:24.919305: step: 218/459, loss: 1.0389924049377441 2023-01-24 00:26:25.500833: step: 220/459, loss: 0.15956716239452362 2023-01-24 00:26:26.168786: step: 222/459, loss: 0.17872288823127747 2023-01-24 00:26:26.779159: step: 224/459, loss: 2.641927480697632 2023-01-24 00:26:27.366146: step: 226/459, loss: 0.8192496299743652 2023-01-24 00:26:27.988455: step: 228/459, loss: 0.6978987455368042 2023-01-24 00:26:28.553396: step: 230/459, loss: 1.476753830909729 2023-01-24 00:26:29.184066: step: 232/459, loss: 3.344637870788574 2023-01-24 00:26:29.779477: step: 234/459, loss: 0.2808527648448944 2023-01-24 00:26:30.414049: step: 236/459, loss: 0.6435661315917969 2023-01-24 00:26:31.062425: step: 238/459, loss: 1.4761948585510254 2023-01-24 00:26:31.659951: step: 240/459, loss: 3.7901718616485596 2023-01-24 00:26:32.292916: step: 242/459, loss: 1.3222386837005615 2023-01-24 00:26:32.897104: step: 244/459, loss: 0.3945029079914093 2023-01-24 00:26:33.532398: step: 246/459, loss: 1.8541406393051147 2023-01-24 00:26:34.195241: step: 248/459, loss: 1.3616605997085571 2023-01-24 00:26:34.891249: step: 250/459, loss: 0.7738597989082336 2023-01-24 00:26:35.548617: step: 252/459, loss: 1.5646867752075195 2023-01-24 00:26:36.215082: step: 254/459, loss: 0.7252985239028931 2023-01-24 00:26:36.821345: step: 256/459, loss: 0.3712087571620941 2023-01-24 00:26:37.424615: step: 258/459, loss: 0.422787070274353 2023-01-24 00:26:38.020860: step: 260/459, loss: 0.599609375 2023-01-24 00:26:38.570792: step: 262/459, loss: 0.7793545722961426 2023-01-24 00:26:39.235506: step: 264/459, loss: 1.8713089227676392 2023-01-24 00:26:39.854635: step: 266/459, loss: 1.4687752723693848 2023-01-24 00:26:40.511404: step: 268/459, loss: 0.6543996334075928 2023-01-24 00:26:41.129215: step: 270/459, loss: 0.8025096654891968 2023-01-24 00:26:41.769759: step: 272/459, loss: 1.8735666275024414 2023-01-24 00:26:42.390940: step: 274/459, loss: 0.9109737873077393 2023-01-24 00:26:43.027517: step: 276/459, loss: 1.2559928894042969 2023-01-24 00:26:43.610998: step: 278/459, loss: 0.22121627628803253 2023-01-24 00:26:44.205390: step: 280/459, loss: 0.6442551016807556 2023-01-24 00:26:44.838362: step: 282/459, loss: 0.8543239831924438 2023-01-24 00:26:45.523827: step: 284/459, loss: 2.0047237873077393 2023-01-24 00:26:46.205735: step: 286/459, loss: 1.5604041814804077 2023-01-24 00:26:46.854862: step: 288/459, loss: 1.8659075498580933 2023-01-24 00:26:47.518324: step: 290/459, loss: 1.5779672861099243 2023-01-24 00:26:48.118481: step: 292/459, loss: 0.25934991240501404 2023-01-24 00:26:48.740622: step: 294/459, loss: 0.6417565941810608 2023-01-24 00:26:49.342013: step: 296/459, loss: 0.425530344247818 2023-01-24 00:26:49.961599: step: 298/459, loss: 0.4093102514743805 2023-01-24 00:26:50.537679: step: 300/459, loss: 0.602676510810852 2023-01-24 00:26:51.191924: step: 302/459, loss: 2.729304313659668 2023-01-24 00:26:51.783099: step: 304/459, loss: 0.8125128746032715 2023-01-24 00:26:52.370181: step: 306/459, loss: 1.6581532955169678 2023-01-24 00:26:52.959217: step: 308/459, loss: 0.4438613951206207 2023-01-24 00:26:53.566779: step: 310/459, loss: 0.8310831785202026 2023-01-24 00:26:54.206538: step: 312/459, loss: 0.6865506172180176 2023-01-24 00:26:54.790934: step: 314/459, loss: 1.0549520254135132 2023-01-24 00:26:55.409785: step: 316/459, loss: 1.1775434017181396 2023-01-24 00:26:56.024437: step: 318/459, loss: 0.8357910513877869 2023-01-24 00:26:56.665759: step: 320/459, loss: 1.3560353517532349 2023-01-24 00:26:57.288274: step: 322/459, loss: 0.9045813083648682 2023-01-24 00:26:57.801495: step: 324/459, loss: 0.48862871527671814 2023-01-24 00:26:58.388456: step: 326/459, loss: 1.629237413406372 2023-01-24 00:26:58.982920: step: 328/459, loss: 0.5659946799278259 2023-01-24 00:26:59.573809: step: 330/459, loss: 1.6094141006469727 2023-01-24 00:27:00.250179: step: 332/459, loss: 1.2978315353393555 2023-01-24 00:27:00.819869: step: 334/459, loss: 1.3305429220199585 2023-01-24 00:27:01.435060: step: 336/459, loss: 0.29740822315216064 2023-01-24 00:27:02.034239: step: 338/459, loss: 0.8841338157653809 2023-01-24 00:27:02.627889: step: 340/459, loss: 0.2911653518676758 2023-01-24 00:27:03.303171: step: 342/459, loss: 0.46778586506843567 2023-01-24 00:27:03.878668: step: 344/459, loss: 1.3095018863677979 2023-01-24 00:27:04.547376: step: 346/459, loss: 0.6579856872558594 2023-01-24 00:27:05.151389: step: 348/459, loss: 0.6794909238815308 2023-01-24 00:27:05.784081: step: 350/459, loss: 0.20533934235572815 2023-01-24 00:27:06.384604: step: 352/459, loss: 3.8806116580963135 2023-01-24 00:27:06.959589: step: 354/459, loss: 0.6522988080978394 2023-01-24 00:27:07.605293: step: 356/459, loss: 1.4950547218322754 2023-01-24 00:27:08.255982: step: 358/459, loss: 1.7710545063018799 2023-01-24 00:27:08.870975: step: 360/459, loss: 4.145994186401367 2023-01-24 00:27:09.503157: step: 362/459, loss: 1.6224780082702637 2023-01-24 00:27:10.186511: step: 364/459, loss: 0.5022554993629456 2023-01-24 00:27:10.720869: step: 366/459, loss: 0.4681563973426819 2023-01-24 00:27:11.334272: step: 368/459, loss: 0.6489335894584656 2023-01-24 00:27:11.982052: step: 370/459, loss: 0.7894772291183472 2023-01-24 00:27:12.608434: step: 372/459, loss: 1.2394025325775146 2023-01-24 00:27:13.191657: step: 374/459, loss: 0.6670598387718201 2023-01-24 00:27:13.821123: step: 376/459, loss: 4.135623931884766 2023-01-24 00:27:14.516043: step: 378/459, loss: 0.7367656230926514 2023-01-24 00:27:15.182434: step: 380/459, loss: 1.6367018222808838 2023-01-24 00:27:15.789897: step: 382/459, loss: 0.26106640696525574 2023-01-24 00:27:16.386551: step: 384/459, loss: 1.1741548776626587 2023-01-24 00:27:16.975788: step: 386/459, loss: 0.7174557447433472 2023-01-24 00:27:17.594153: step: 388/459, loss: 0.9143613576889038 2023-01-24 00:27:18.238642: step: 390/459, loss: 0.6529849171638489 2023-01-24 00:27:18.882761: step: 392/459, loss: 3.2839932441711426 2023-01-24 00:27:19.506138: step: 394/459, loss: 1.314681053161621 2023-01-24 00:27:20.121453: step: 396/459, loss: 0.4911077320575714 2023-01-24 00:27:20.670845: step: 398/459, loss: 0.9325218200683594 2023-01-24 00:27:21.318267: step: 400/459, loss: 0.6106122732162476 2023-01-24 00:27:21.993120: step: 402/459, loss: 2.575706958770752 2023-01-24 00:27:22.611296: step: 404/459, loss: 0.5952858328819275 2023-01-24 00:27:23.264442: step: 406/459, loss: 0.5675975680351257 2023-01-24 00:27:23.907508: step: 408/459, loss: 0.4560191035270691 2023-01-24 00:27:24.552998: step: 410/459, loss: 0.7848914861679077 2023-01-24 00:27:25.154864: step: 412/459, loss: 0.3955594301223755 2023-01-24 00:27:25.790559: step: 414/459, loss: 1.509645938873291 2023-01-24 00:27:26.384916: step: 416/459, loss: 0.9425466060638428 2023-01-24 00:27:27.034611: step: 418/459, loss: 1.6929399967193604 2023-01-24 00:27:27.674070: step: 420/459, loss: 0.5200347900390625 2023-01-24 00:27:28.258914: step: 422/459, loss: 1.138954520225525 2023-01-24 00:27:28.861657: step: 424/459, loss: 1.1447030305862427 2023-01-24 00:27:29.469246: step: 426/459, loss: 1.0808771848678589 2023-01-24 00:27:30.048112: step: 428/459, loss: 0.5504215955734253 2023-01-24 00:27:30.774368: step: 430/459, loss: 0.9079512357711792 2023-01-24 00:27:31.391760: step: 432/459, loss: 1.242840051651001 2023-01-24 00:27:32.053033: step: 434/459, loss: 0.2510308623313904 2023-01-24 00:27:32.641728: step: 436/459, loss: 0.48212215304374695 2023-01-24 00:27:33.270412: step: 438/459, loss: 0.926152765750885 2023-01-24 00:27:33.899687: step: 440/459, loss: 1.69420325756073 2023-01-24 00:27:34.488403: step: 442/459, loss: 0.5266532897949219 2023-01-24 00:27:35.089923: step: 444/459, loss: 0.35694989562034607 2023-01-24 00:27:35.589639: step: 446/459, loss: 0.35843679308891296 2023-01-24 00:27:36.235908: step: 448/459, loss: 2.5587031841278076 2023-01-24 00:27:36.820612: step: 450/459, loss: 0.8052863478660583 2023-01-24 00:27:37.455738: step: 452/459, loss: 0.4123072922229767 2023-01-24 00:27:38.064253: step: 454/459, loss: 6.178079605102539 2023-01-24 00:27:38.650718: step: 456/459, loss: 0.40033483505249023 2023-01-24 00:27:39.263049: step: 458/459, loss: 1.2009406089782715 2023-01-24 00:27:39.885899: step: 460/459, loss: 0.2893584966659546 2023-01-24 00:27:40.514353: step: 462/459, loss: 0.8043610453605652 2023-01-24 00:27:41.121193: step: 464/459, loss: 0.6275294423103333 2023-01-24 00:27:41.793427: step: 466/459, loss: 0.8714760541915894 2023-01-24 00:27:42.466639: step: 468/459, loss: 1.6893222332000732 2023-01-24 00:27:43.049990: step: 470/459, loss: 0.9037781357765198 2023-01-24 00:27:43.734358: step: 472/459, loss: 0.3825099468231201 2023-01-24 00:27:44.392395: step: 474/459, loss: 0.6384893655776978 2023-01-24 00:27:44.984747: step: 476/459, loss: 2.444580316543579 2023-01-24 00:27:45.611834: step: 478/459, loss: 0.7209630608558655 2023-01-24 00:27:46.285973: step: 480/459, loss: 1.981106162071228 2023-01-24 00:27:46.944499: step: 482/459, loss: 0.9963701367378235 2023-01-24 00:27:47.574297: step: 484/459, loss: 0.27986183762550354 2023-01-24 00:27:48.166503: step: 486/459, loss: 0.3144344687461853 2023-01-24 00:27:48.887106: step: 488/459, loss: 3.5815420150756836 2023-01-24 00:27:49.515241: step: 490/459, loss: 0.6360688805580139 2023-01-24 00:27:50.162277: step: 492/459, loss: 1.5995702743530273 2023-01-24 00:27:50.811475: step: 494/459, loss: 0.7736324667930603 2023-01-24 00:27:51.447770: step: 496/459, loss: 0.2937704920768738 2023-01-24 00:27:52.096513: step: 498/459, loss: 18.115875244140625 2023-01-24 00:27:52.696317: step: 500/459, loss: 0.48584499955177307 2023-01-24 00:27:53.309335: step: 502/459, loss: 0.5950547456741333 2023-01-24 00:27:53.915227: step: 504/459, loss: 0.9921157360076904 2023-01-24 00:27:54.531927: step: 506/459, loss: 2.205368995666504 2023-01-24 00:27:55.111765: step: 508/459, loss: 0.2506319284439087 2023-01-24 00:27:55.826578: step: 510/459, loss: 1.219390869140625 2023-01-24 00:27:56.443662: step: 512/459, loss: 1.6226660013198853 2023-01-24 00:27:57.122309: step: 514/459, loss: 0.8613193035125732 2023-01-24 00:27:57.768960: step: 516/459, loss: 5.430402755737305 2023-01-24 00:27:58.439441: step: 518/459, loss: 15.198381423950195 2023-01-24 00:27:59.037366: step: 520/459, loss: 1.239107370376587 2023-01-24 00:27:59.702196: step: 522/459, loss: 0.9206216335296631 2023-01-24 00:28:00.296630: step: 524/459, loss: 1.0033116340637207 2023-01-24 00:28:00.969967: step: 526/459, loss: 0.5496668219566345 2023-01-24 00:28:01.585327: step: 528/459, loss: 0.4695923924446106 2023-01-24 00:28:02.169874: step: 530/459, loss: 0.8775062561035156 2023-01-24 00:28:02.796922: step: 532/459, loss: 0.2736675441265106 2023-01-24 00:28:03.418640: step: 534/459, loss: 0.7926996350288391 2023-01-24 00:28:04.056894: step: 536/459, loss: 0.9945271015167236 2023-01-24 00:28:04.726657: step: 538/459, loss: 1.0895674228668213 2023-01-24 00:28:05.375584: step: 540/459, loss: 0.5121865272521973 2023-01-24 00:28:06.007354: step: 542/459, loss: 1.4690622091293335 2023-01-24 00:28:06.607450: step: 544/459, loss: 0.8774546384811401 2023-01-24 00:28:07.253609: step: 546/459, loss: 0.9585673213005066 2023-01-24 00:28:07.856599: step: 548/459, loss: 0.7917250394821167 2023-01-24 00:28:08.479341: step: 550/459, loss: 0.8697558641433716 2023-01-24 00:28:09.086383: step: 552/459, loss: 0.2688852846622467 2023-01-24 00:28:09.776638: step: 554/459, loss: 1.7264035940170288 2023-01-24 00:28:10.374053: step: 556/459, loss: 1.5549944639205933 2023-01-24 00:28:11.096160: step: 558/459, loss: 0.3411080539226532 2023-01-24 00:28:11.699403: step: 560/459, loss: 0.2932255268096924 2023-01-24 00:28:12.320120: step: 562/459, loss: 0.8048446774482727 2023-01-24 00:28:12.945133: step: 564/459, loss: 0.4697575271129608 2023-01-24 00:28:13.613508: step: 566/459, loss: 0.6506484746932983 2023-01-24 00:28:14.211845: step: 568/459, loss: 1.5920615196228027 2023-01-24 00:28:14.805497: step: 570/459, loss: 0.23091834783554077 2023-01-24 00:28:15.386623: step: 572/459, loss: 0.5932387709617615 2023-01-24 00:28:16.114227: step: 574/459, loss: 1.6571087837219238 2023-01-24 00:28:16.772268: step: 576/459, loss: 1.0936177968978882 2023-01-24 00:28:17.373778: step: 578/459, loss: 0.33066534996032715 2023-01-24 00:28:18.011343: step: 580/459, loss: 0.36229029297828674 2023-01-24 00:28:18.605727: step: 582/459, loss: 0.43046316504478455 2023-01-24 00:28:19.262079: step: 584/459, loss: 2.7253103256225586 2023-01-24 00:28:19.848803: step: 586/459, loss: 1.226413607597351 2023-01-24 00:28:20.399386: step: 588/459, loss: 0.8200510740280151 2023-01-24 00:28:21.017871: step: 590/459, loss: 1.0346331596374512 2023-01-24 00:28:21.626206: step: 592/459, loss: 0.991756796836853 2023-01-24 00:28:22.243543: step: 594/459, loss: 1.4009437561035156 2023-01-24 00:28:22.843869: step: 596/459, loss: 0.6716774702072144 2023-01-24 00:28:23.485067: step: 598/459, loss: 0.5368046164512634 2023-01-24 00:28:24.122819: step: 600/459, loss: 1.1797130107879639 2023-01-24 00:28:24.714650: step: 602/459, loss: 0.7081166505813599 2023-01-24 00:28:25.336508: step: 604/459, loss: 2.255000591278076 2023-01-24 00:28:25.984925: step: 606/459, loss: 4.606207847595215 2023-01-24 00:28:26.588448: step: 608/459, loss: 0.3206142783164978 2023-01-24 00:28:27.212077: step: 610/459, loss: 2.5076637268066406 2023-01-24 00:28:27.840633: step: 612/459, loss: 0.185363307595253 2023-01-24 00:28:28.478813: step: 614/459, loss: 1.0422735214233398 2023-01-24 00:28:29.091726: step: 616/459, loss: 0.5050920248031616 2023-01-24 00:28:29.725545: step: 618/459, loss: 0.420908659696579 2023-01-24 00:28:30.359276: step: 620/459, loss: 1.8876844644546509 2023-01-24 00:28:30.989179: step: 622/459, loss: 0.4792257845401764 2023-01-24 00:28:31.613812: step: 624/459, loss: 1.2788257598876953 2023-01-24 00:28:32.227812: step: 626/459, loss: 0.2524569630622864 2023-01-24 00:28:32.808362: step: 628/459, loss: 0.7084643840789795 2023-01-24 00:28:33.402272: step: 630/459, loss: 6.153811454772949 2023-01-24 00:28:33.995422: step: 632/459, loss: 0.4269680976867676 2023-01-24 00:28:34.594489: step: 634/459, loss: 1.1386045217514038 2023-01-24 00:28:35.179391: step: 636/459, loss: 4.410979270935059 2023-01-24 00:28:35.831951: step: 638/459, loss: 0.9385034441947937 2023-01-24 00:28:36.494451: step: 640/459, loss: 1.019546627998352 2023-01-24 00:28:37.131329: step: 642/459, loss: 0.25148919224739075 2023-01-24 00:28:37.809943: step: 644/459, loss: 0.8925319910049438 2023-01-24 00:28:38.392723: step: 646/459, loss: 0.35423922538757324 2023-01-24 00:28:39.038039: step: 648/459, loss: 3.75087308883667 2023-01-24 00:28:39.728599: step: 650/459, loss: 2.211488723754883 2023-01-24 00:28:40.325105: step: 652/459, loss: 1.1153678894042969 2023-01-24 00:28:40.931103: step: 654/459, loss: 1.58473539352417 2023-01-24 00:28:41.499508: step: 656/459, loss: 0.4009247124195099 2023-01-24 00:28:42.098909: step: 658/459, loss: 0.18195658922195435 2023-01-24 00:28:42.736194: step: 660/459, loss: 0.3752962052822113 2023-01-24 00:28:43.439053: step: 662/459, loss: 1.5630545616149902 2023-01-24 00:28:44.063429: step: 664/459, loss: 0.503370463848114 2023-01-24 00:28:44.657892: step: 666/459, loss: 3.5021767616271973 2023-01-24 00:28:45.256678: step: 668/459, loss: 0.7952044010162354 2023-01-24 00:28:45.911483: step: 670/459, loss: 0.4048486351966858 2023-01-24 00:28:46.582471: step: 672/459, loss: 1.678531527519226 2023-01-24 00:28:47.216327: step: 674/459, loss: 0.5111985206604004 2023-01-24 00:28:47.888862: step: 676/459, loss: 1.1700975894927979 2023-01-24 00:28:48.464687: step: 678/459, loss: 0.8400508165359497 2023-01-24 00:28:49.100371: step: 680/459, loss: 0.549185037612915 2023-01-24 00:28:49.787636: step: 682/459, loss: 0.6185118556022644 2023-01-24 00:28:50.379349: step: 684/459, loss: 0.352055162191391 2023-01-24 00:28:51.045227: step: 686/459, loss: 0.5401342511177063 2023-01-24 00:28:51.607345: step: 688/459, loss: 0.6598278284072876 2023-01-24 00:28:52.249548: step: 690/459, loss: 0.903911828994751 2023-01-24 00:28:52.873556: step: 692/459, loss: 0.25315579771995544 2023-01-24 00:28:53.544486: step: 694/459, loss: 0.29895687103271484 2023-01-24 00:28:54.109735: step: 696/459, loss: 1.5049293041229248 2023-01-24 00:28:54.752201: step: 698/459, loss: 0.26014265418052673 2023-01-24 00:28:55.394111: step: 700/459, loss: 2.0663931369781494 2023-01-24 00:28:55.990830: step: 702/459, loss: 0.3371526300907135 2023-01-24 00:28:56.628125: step: 704/459, loss: 0.3592396676540375 2023-01-24 00:28:57.226978: step: 706/459, loss: 0.1669059544801712 2023-01-24 00:28:57.783931: step: 708/459, loss: 0.24367360770702362 2023-01-24 00:28:58.399374: step: 710/459, loss: 5.038712024688721 2023-01-24 00:28:59.027631: step: 712/459, loss: 0.5214122533798218 2023-01-24 00:28:59.727761: step: 714/459, loss: 1.2785910367965698 2023-01-24 00:29:00.384270: step: 716/459, loss: 0.2114851176738739 2023-01-24 00:29:00.942690: step: 718/459, loss: 0.23581266403198242 2023-01-24 00:29:01.517151: step: 720/459, loss: 1.0103063583374023 2023-01-24 00:29:02.171606: step: 722/459, loss: 1.5832810401916504 2023-01-24 00:29:02.837132: step: 724/459, loss: 1.3579044342041016 2023-01-24 00:29:03.411377: step: 726/459, loss: 1.7432422637939453 2023-01-24 00:29:04.119042: step: 728/459, loss: 8.680514335632324 2023-01-24 00:29:04.781759: step: 730/459, loss: 0.5144017934799194 2023-01-24 00:29:05.397299: step: 732/459, loss: 1.456050157546997 2023-01-24 00:29:05.987780: step: 734/459, loss: 0.39881739020347595 2023-01-24 00:29:06.583993: step: 736/459, loss: 0.38955754041671753 2023-01-24 00:29:07.184411: step: 738/459, loss: 0.35743361711502075 2023-01-24 00:29:07.810788: step: 740/459, loss: 1.6736419200897217 2023-01-24 00:29:08.458248: step: 742/459, loss: 0.50506192445755 2023-01-24 00:29:09.048987: step: 744/459, loss: 0.6748970150947571 2023-01-24 00:29:09.700868: step: 746/459, loss: 1.1583383083343506 2023-01-24 00:29:10.328660: step: 748/459, loss: 0.7531169652938843 2023-01-24 00:29:10.932933: step: 750/459, loss: 1.0487263202667236 2023-01-24 00:29:11.587269: step: 752/459, loss: 2.8782758712768555 2023-01-24 00:29:12.222932: step: 754/459, loss: 2.86689829826355 2023-01-24 00:29:12.817032: step: 756/459, loss: 0.7552492022514343 2023-01-24 00:29:13.471670: step: 758/459, loss: 0.9563307166099548 2023-01-24 00:29:14.062230: step: 760/459, loss: 2.033949136734009 2023-01-24 00:29:14.761865: step: 762/459, loss: 0.9269843101501465 2023-01-24 00:29:15.409433: step: 764/459, loss: 0.455827534198761 2023-01-24 00:29:15.961313: step: 766/459, loss: 0.42344018816947937 2023-01-24 00:29:16.577594: step: 768/459, loss: 1.0880793333053589 2023-01-24 00:29:17.151798: step: 770/459, loss: 1.096629023551941 2023-01-24 00:29:17.757510: step: 772/459, loss: 2.1301465034484863 2023-01-24 00:29:18.381822: step: 774/459, loss: 2.3265280723571777 2023-01-24 00:29:18.996553: step: 776/459, loss: 0.8673344254493713 2023-01-24 00:29:19.581282: step: 778/459, loss: 0.4379597008228302 2023-01-24 00:29:20.199105: step: 780/459, loss: 3.591761589050293 2023-01-24 00:29:20.830105: step: 782/459, loss: 1.3116577863693237 2023-01-24 00:29:21.434951: step: 784/459, loss: 0.7462408542633057 2023-01-24 00:29:22.041203: step: 786/459, loss: 7.972297191619873 2023-01-24 00:29:22.690665: step: 788/459, loss: 0.6689608097076416 2023-01-24 00:29:23.335865: step: 790/459, loss: 1.3054170608520508 2023-01-24 00:29:23.939310: step: 792/459, loss: 0.3681839108467102 2023-01-24 00:29:24.557473: step: 794/459, loss: 5.478631019592285 2023-01-24 00:29:25.243493: step: 796/459, loss: 0.5610888004302979 2023-01-24 00:29:25.881255: step: 798/459, loss: 1.14380943775177 2023-01-24 00:29:26.561281: step: 800/459, loss: 1.0850558280944824 2023-01-24 00:29:27.198984: step: 802/459, loss: 0.24230411648750305 2023-01-24 00:29:27.838380: step: 804/459, loss: 0.4777524471282959 2023-01-24 00:29:28.401561: step: 806/459, loss: 0.7631310820579529 2023-01-24 00:29:29.016721: step: 808/459, loss: 0.9241105914115906 2023-01-24 00:29:29.668603: step: 810/459, loss: 0.49542316794395447 2023-01-24 00:29:30.282763: step: 812/459, loss: 2.5931291580200195 2023-01-24 00:29:30.911009: step: 814/459, loss: 0.33722183108329773 2023-01-24 00:29:31.645784: step: 816/459, loss: 0.8948217630386353 2023-01-24 00:29:32.286702: step: 818/459, loss: 1.9462041854858398 2023-01-24 00:29:32.969227: step: 820/459, loss: 0.7682085037231445 2023-01-24 00:29:33.597563: step: 822/459, loss: 3.148587703704834 2023-01-24 00:29:34.219115: step: 824/459, loss: 0.9581572413444519 2023-01-24 00:29:34.865799: step: 826/459, loss: 1.7917827367782593 2023-01-24 00:29:35.402642: step: 828/459, loss: 0.21316814422607422 2023-01-24 00:29:36.047158: step: 830/459, loss: 0.7263556122779846 2023-01-24 00:29:36.731237: step: 832/459, loss: 1.2183486223220825 2023-01-24 00:29:37.459796: step: 834/459, loss: 0.5836576223373413 2023-01-24 00:29:38.066229: step: 836/459, loss: 2.732125759124756 2023-01-24 00:29:38.726405: step: 838/459, loss: 0.990696132183075 2023-01-24 00:29:39.360738: step: 840/459, loss: 5.378780364990234 2023-01-24 00:29:39.969788: step: 842/459, loss: 0.8090795278549194 2023-01-24 00:29:40.594021: step: 844/459, loss: 0.9860786199569702 2023-01-24 00:29:41.156950: step: 846/459, loss: 0.7927578687667847 2023-01-24 00:29:41.857665: step: 848/459, loss: 2.916944742202759 2023-01-24 00:29:42.469586: step: 850/459, loss: 0.47658807039260864 2023-01-24 00:29:43.125384: step: 852/459, loss: 1.3781492710113525 2023-01-24 00:29:43.768362: step: 854/459, loss: 0.6634725332260132 2023-01-24 00:29:44.464219: step: 856/459, loss: 0.33790236711502075 2023-01-24 00:29:45.052289: step: 858/459, loss: 0.5000613331794739 2023-01-24 00:29:45.642320: step: 860/459, loss: 1.4129551649093628 2023-01-24 00:29:46.281509: step: 862/459, loss: 2.7076830863952637 2023-01-24 00:29:46.887597: step: 864/459, loss: 1.5846668481826782 2023-01-24 00:29:47.513402: step: 866/459, loss: 0.9985396862030029 2023-01-24 00:29:48.123459: step: 868/459, loss: 1.0293058156967163 2023-01-24 00:29:48.766633: step: 870/459, loss: 2.3921313285827637 2023-01-24 00:29:49.353242: step: 872/459, loss: 1.8686859607696533 2023-01-24 00:29:50.002724: step: 874/459, loss: 0.5690680742263794 2023-01-24 00:29:50.578325: step: 876/459, loss: 0.42654144763946533 2023-01-24 00:29:51.273011: step: 878/459, loss: 0.5455842614173889 2023-01-24 00:29:51.913924: step: 880/459, loss: 0.4807981252670288 2023-01-24 00:29:52.530586: step: 882/459, loss: 0.8978493213653564 2023-01-24 00:29:53.122991: step: 884/459, loss: 0.44253677129745483 2023-01-24 00:29:53.735638: step: 886/459, loss: 0.5350589752197266 2023-01-24 00:29:54.349716: step: 888/459, loss: 0.3211902976036072 2023-01-24 00:29:54.985838: step: 890/459, loss: 1.2304664850234985 2023-01-24 00:29:55.628705: step: 892/459, loss: 0.5763905644416809 2023-01-24 00:29:56.240655: step: 894/459, loss: 0.3273567855358124 2023-01-24 00:29:56.889365: step: 896/459, loss: 0.7828986644744873 2023-01-24 00:29:57.613781: step: 898/459, loss: 0.7189304828643799 2023-01-24 00:29:58.193704: step: 900/459, loss: 1.6578819751739502 2023-01-24 00:29:58.762613: step: 902/459, loss: 0.7026721239089966 2023-01-24 00:29:59.312250: step: 904/459, loss: 0.3877449929714203 2023-01-24 00:30:00.011786: step: 906/459, loss: 1.3212809562683105 2023-01-24 00:30:00.692561: step: 908/459, loss: 0.3244810402393341 2023-01-24 00:30:01.351478: step: 910/459, loss: 0.5824142098426819 2023-01-24 00:30:02.007936: step: 912/459, loss: 1.4346675872802734 2023-01-24 00:30:02.637238: step: 914/459, loss: 1.0737671852111816 2023-01-24 00:30:03.280978: step: 916/459, loss: 0.8315730690956116 2023-01-24 00:30:03.879992: step: 918/459, loss: 0.3181229829788208 2023-01-24 00:30:04.342768: step: 920/459, loss: 0.03601766377687454 ================================================== Loss: 1.216 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35966767371601205, 'r': 0.22547348484848484, 'f1': 0.2771827706635623}, 'combined': 0.2042399362784143, 'epoch': 3} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4152678756734549, 'r': 0.20499612011466375, 'f1': 0.2744905520854915}, 'combined': 0.17567395333471453, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36691616766467067, 'r': 0.23166351606805294, 'f1': 0.2840092699884125}, 'combined': 0.20926998841251448, 'epoch': 3} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.40027336018279874, 'r': 0.20049990455617506, 'f1': 0.2671715777761849}, 'combined': 0.1709898097767583, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3535585891954916, 'r': 0.2218931032380023, 'f1': 0.2726630699486718}, 'combined': 0.200909630488495, 'epoch': 3} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.40436359948022654, 'r': 0.2018148636244325, 'f1': 0.2692493701139523}, 'combined': 0.1930467181949092, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.15238095238095237, 'f1': 0.20915032679738563}, 'combined': 0.13943355119825707, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.1956521739130435, 'f1': 0.24324324324324326}, 'combined': 0.12162162162162163, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.10344827586206896, 'f1': 0.17647058823529413}, 'combined': 0.11764705882352941, 'epoch': 3} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3226771034143062, 'r': 0.24887005329496587, 'f1': 0.2810080217567491}, 'combined': 0.20705854234707827, 'epoch': 2} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31765742528569446, 'r': 0.21878583102707994, 'f1': 0.25911014056081905}, 'combined': 0.16583048995892416, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3098290598290598, 'r': 0.3452380952380952, 'f1': 0.3265765765765765}, 'combined': 0.21771771771771767, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36691616766467067, 'r': 0.23166351606805294, 'f1': 0.2840092699884125}, 'combined': 0.20926998841251448, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.40027336018279874, 'r': 0.20049990455617506, 'f1': 0.2671715777761849}, 'combined': 0.1709898097767583, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.1956521739130435, 'f1': 0.24324324324324326}, 'combined': 0.12162162162162163, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3535585891954916, 'r': 0.2218931032380023, 'f1': 0.2726630699486718}, 'combined': 0.200909630488495, 'epoch': 3} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.40436359948022654, 'r': 0.2018148636244325, 'f1': 0.2692493701139523}, 'combined': 0.1930467181949092, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.10344827586206896, 'f1': 0.17647058823529413}, 'combined': 0.11764705882352941, 'epoch': 3} ****************************** Epoch: 4 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:32:57.825561: step: 2/459, loss: 0.2629299461841583 2023-01-24 00:32:58.447176: step: 4/459, loss: 0.31642934679985046 2023-01-24 00:32:59.135778: step: 6/459, loss: 0.5425222516059875 2023-01-24 00:32:59.736543: step: 8/459, loss: 0.8295786380767822 2023-01-24 00:33:00.368343: step: 10/459, loss: 0.8222666382789612 2023-01-24 00:33:00.927143: step: 12/459, loss: 2.658308506011963 2023-01-24 00:33:01.534754: step: 14/459, loss: 1.6308598518371582 2023-01-24 00:33:02.166198: step: 16/459, loss: 0.42774683237075806 2023-01-24 00:33:02.758470: step: 18/459, loss: 5.451874732971191 2023-01-24 00:33:03.380313: step: 20/459, loss: 0.17005379498004913 2023-01-24 00:33:04.022154: step: 22/459, loss: 0.5578955411911011 2023-01-24 00:33:04.673168: step: 24/459, loss: 0.5673248171806335 2023-01-24 00:33:05.350183: step: 26/459, loss: 0.47039127349853516 2023-01-24 00:33:05.964208: step: 28/459, loss: 0.4984683096408844 2023-01-24 00:33:06.611411: step: 30/459, loss: 1.340278148651123 2023-01-24 00:33:07.205843: step: 32/459, loss: 0.9560576677322388 2023-01-24 00:33:07.851583: step: 34/459, loss: 0.6741725206375122 2023-01-24 00:33:08.489674: step: 36/459, loss: 0.8153380155563354 2023-01-24 00:33:09.081065: step: 38/459, loss: 0.8904309868812561 2023-01-24 00:33:09.692145: step: 40/459, loss: 0.7837830185890198 2023-01-24 00:33:10.276094: step: 42/459, loss: 0.7278810739517212 2023-01-24 00:33:10.875279: step: 44/459, loss: 0.44239097833633423 2023-01-24 00:33:11.522729: step: 46/459, loss: 0.3355379104614258 2023-01-24 00:33:12.088913: step: 48/459, loss: 1.5425031185150146 2023-01-24 00:33:12.693545: step: 50/459, loss: 0.24296626448631287 2023-01-24 00:33:13.287795: step: 52/459, loss: 0.6176841855049133 2023-01-24 00:33:13.931102: step: 54/459, loss: 1.4245350360870361 2023-01-24 00:33:14.509776: step: 56/459, loss: 0.31595733761787415 2023-01-24 00:33:15.093270: step: 58/459, loss: 0.9301232099533081 2023-01-24 00:33:15.729504: step: 60/459, loss: 1.060267448425293 2023-01-24 00:33:16.327529: step: 62/459, loss: 1.024290919303894 2023-01-24 00:33:16.910634: step: 64/459, loss: 0.41217949986457825 2023-01-24 00:33:17.470832: step: 66/459, loss: 0.2570650577545166 2023-01-24 00:33:18.155231: step: 68/459, loss: 0.14188264310359955 2023-01-24 00:33:18.741378: step: 70/459, loss: 0.4395698606967926 2023-01-24 00:33:19.342344: step: 72/459, loss: 0.8365334272384644 2023-01-24 00:33:19.911530: step: 74/459, loss: 0.7945548892021179 2023-01-24 00:33:20.475402: step: 76/459, loss: 0.7270252108573914 2023-01-24 00:33:21.109291: step: 78/459, loss: 0.21225960552692413 2023-01-24 00:33:21.763860: step: 80/459, loss: 0.5944653153419495 2023-01-24 00:33:22.299684: step: 82/459, loss: 0.45144984126091003 2023-01-24 00:33:22.845293: step: 84/459, loss: 0.791796863079071 2023-01-24 00:33:23.508581: step: 86/459, loss: 0.7819728851318359 2023-01-24 00:33:24.099842: step: 88/459, loss: 3.674091100692749 2023-01-24 00:33:24.686341: step: 90/459, loss: 0.8385416269302368 2023-01-24 00:33:25.302448: step: 92/459, loss: 0.9138137102127075 2023-01-24 00:33:25.927351: step: 94/459, loss: 0.6646292209625244 2023-01-24 00:33:26.504007: step: 96/459, loss: 1.4333899021148682 2023-01-24 00:33:27.097900: step: 98/459, loss: 1.2509890794754028 2023-01-24 00:33:27.749833: step: 100/459, loss: 0.45850056409835815 2023-01-24 00:33:28.340973: step: 102/459, loss: 7.624563694000244 2023-01-24 00:33:28.925588: step: 104/459, loss: 0.433255136013031 2023-01-24 00:33:29.471918: step: 106/459, loss: 1.485954999923706 2023-01-24 00:33:30.015793: step: 108/459, loss: 0.6359782218933105 2023-01-24 00:33:30.621449: step: 110/459, loss: 0.5017732381820679 2023-01-24 00:33:31.221167: step: 112/459, loss: 0.2435244619846344 2023-01-24 00:33:31.909264: step: 114/459, loss: 1.448166012763977 2023-01-24 00:33:32.529411: step: 116/459, loss: 0.5265495777130127 2023-01-24 00:33:33.126946: step: 118/459, loss: 2.794808864593506 2023-01-24 00:33:33.717142: step: 120/459, loss: 1.059443712234497 2023-01-24 00:33:34.352388: step: 122/459, loss: 1.002873182296753 2023-01-24 00:33:34.916785: step: 124/459, loss: 0.569503664970398 2023-01-24 00:33:35.553303: step: 126/459, loss: 0.5838390588760376 2023-01-24 00:33:36.161776: step: 128/459, loss: 0.4809010326862335 2023-01-24 00:33:36.734790: step: 130/459, loss: 0.38630059361457825 2023-01-24 00:33:37.343265: step: 132/459, loss: 0.8847789764404297 2023-01-24 00:33:38.061735: step: 134/459, loss: 0.5669805407524109 2023-01-24 00:33:38.658672: step: 136/459, loss: 0.4482596814632416 2023-01-24 00:33:39.277121: step: 138/459, loss: 0.6293562054634094 2023-01-24 00:33:39.877184: step: 140/459, loss: 0.9912480711936951 2023-01-24 00:33:40.539154: step: 142/459, loss: 0.5156668424606323 2023-01-24 00:33:41.173034: step: 144/459, loss: 0.21459324657917023 2023-01-24 00:33:41.732707: step: 146/459, loss: 0.6685810089111328 2023-01-24 00:33:42.319060: step: 148/459, loss: 0.2823648750782013 2023-01-24 00:33:42.933135: step: 150/459, loss: 0.17113231122493744 2023-01-24 00:33:43.680198: step: 152/459, loss: 1.4788053035736084 2023-01-24 00:33:44.254082: step: 154/459, loss: 0.5702179670333862 2023-01-24 00:33:44.999214: step: 156/459, loss: 7.1528639793396 2023-01-24 00:33:45.620127: step: 158/459, loss: 0.27827689051628113 2023-01-24 00:33:46.309268: step: 160/459, loss: 0.651410698890686 2023-01-24 00:33:46.896678: step: 162/459, loss: 0.8525462746620178 2023-01-24 00:33:47.642094: step: 164/459, loss: 0.5408939719200134 2023-01-24 00:33:48.310422: step: 166/459, loss: 1.2127506732940674 2023-01-24 00:33:48.938901: step: 168/459, loss: 0.9700721502304077 2023-01-24 00:33:49.513913: step: 170/459, loss: 1.385732889175415 2023-01-24 00:33:50.145236: step: 172/459, loss: 0.7134215235710144 2023-01-24 00:33:50.779875: step: 174/459, loss: 0.5869786739349365 2023-01-24 00:33:51.408185: step: 176/459, loss: 1.5519976615905762 2023-01-24 00:33:52.083356: step: 178/459, loss: 0.27745574712753296 2023-01-24 00:33:52.660109: step: 180/459, loss: 0.8678243160247803 2023-01-24 00:33:53.275057: step: 182/459, loss: 1.3625507354736328 2023-01-24 00:33:53.902040: step: 184/459, loss: 0.3105129897594452 2023-01-24 00:33:54.586842: step: 186/459, loss: 0.2812196910381317 2023-01-24 00:33:55.147999: step: 188/459, loss: 0.3734639883041382 2023-01-24 00:33:55.842563: step: 190/459, loss: 0.4814627468585968 2023-01-24 00:33:56.422456: step: 192/459, loss: 0.4812332093715668 2023-01-24 00:33:57.050671: step: 194/459, loss: 1.7347314357757568 2023-01-24 00:33:57.644862: step: 196/459, loss: 0.8837291598320007 2023-01-24 00:33:58.247400: step: 198/459, loss: 0.30974122881889343 2023-01-24 00:33:58.858504: step: 200/459, loss: 0.7421004772186279 2023-01-24 00:33:59.504369: step: 202/459, loss: 0.8595197200775146 2023-01-24 00:34:00.110362: step: 204/459, loss: 2.2678451538085938 2023-01-24 00:34:00.700096: step: 206/459, loss: 1.0507932901382446 2023-01-24 00:34:01.297277: step: 208/459, loss: 0.48488593101501465 2023-01-24 00:34:01.934261: step: 210/459, loss: 0.8541501760482788 2023-01-24 00:34:02.637362: step: 212/459, loss: 0.622234582901001 2023-01-24 00:34:03.255447: step: 214/459, loss: 2.715867280960083 2023-01-24 00:34:03.881835: step: 216/459, loss: 0.6879290342330933 2023-01-24 00:34:04.464212: step: 218/459, loss: 0.5291939377784729 2023-01-24 00:34:05.059314: step: 220/459, loss: 0.4005625247955322 2023-01-24 00:34:05.642589: step: 222/459, loss: 0.6344862580299377 2023-01-24 00:34:06.239268: step: 224/459, loss: 0.8908586502075195 2023-01-24 00:34:06.879187: step: 226/459, loss: 0.3141728341579437 2023-01-24 00:34:07.451478: step: 228/459, loss: 1.0117344856262207 2023-01-24 00:34:08.074721: step: 230/459, loss: 0.2899724841117859 2023-01-24 00:34:08.728392: step: 232/459, loss: 0.19502530992031097 2023-01-24 00:34:09.370550: step: 234/459, loss: 0.31841278076171875 2023-01-24 00:34:09.992519: step: 236/459, loss: 0.18412983417510986 2023-01-24 00:34:10.704557: step: 238/459, loss: 1.504150629043579 2023-01-24 00:34:11.350676: step: 240/459, loss: 2.5924854278564453 2023-01-24 00:34:11.996215: step: 242/459, loss: 2.441319227218628 2023-01-24 00:34:12.657172: step: 244/459, loss: 1.8252403736114502 2023-01-24 00:34:13.269608: step: 246/459, loss: 0.5768305063247681 2023-01-24 00:34:13.888189: step: 248/459, loss: 1.487776756286621 2023-01-24 00:34:14.529328: step: 250/459, loss: 5.310724258422852 2023-01-24 00:34:15.199784: step: 252/459, loss: 0.6273289918899536 2023-01-24 00:34:15.794563: step: 254/459, loss: 1.3838484287261963 2023-01-24 00:34:16.394764: step: 256/459, loss: 1.167221188545227 2023-01-24 00:34:17.054381: step: 258/459, loss: 1.655578851699829 2023-01-24 00:34:17.674135: step: 260/459, loss: 0.7035750150680542 2023-01-24 00:34:18.313949: step: 262/459, loss: 0.7619149684906006 2023-01-24 00:34:18.979444: step: 264/459, loss: 0.8286491632461548 2023-01-24 00:34:19.689316: step: 266/459, loss: 1.4651660919189453 2023-01-24 00:34:20.265764: step: 268/459, loss: 0.5467448830604553 2023-01-24 00:34:20.879109: step: 270/459, loss: 0.809415340423584 2023-01-24 00:34:21.517834: step: 272/459, loss: 0.7950923442840576 2023-01-24 00:34:22.194405: step: 274/459, loss: 2.0230140686035156 2023-01-24 00:34:22.793460: step: 276/459, loss: 2.7649893760681152 2023-01-24 00:34:23.384041: step: 278/459, loss: 0.8508033752441406 2023-01-24 00:34:24.008410: step: 280/459, loss: 0.5348819494247437 2023-01-24 00:34:24.577518: step: 282/459, loss: 1.7239048480987549 2023-01-24 00:34:25.207530: step: 284/459, loss: 0.23025506734848022 2023-01-24 00:34:25.821833: step: 286/459, loss: 0.33228710293769836 2023-01-24 00:34:26.438418: step: 288/459, loss: 1.4574183225631714 2023-01-24 00:34:27.083482: step: 290/459, loss: 0.685314416885376 2023-01-24 00:34:27.789374: step: 292/459, loss: 0.4842592179775238 2023-01-24 00:34:28.423276: step: 294/459, loss: 0.2127315253019333 2023-01-24 00:34:29.067358: step: 296/459, loss: 1.1030495166778564 2023-01-24 00:34:29.623906: step: 298/459, loss: 0.4407437741756439 2023-01-24 00:34:30.279909: step: 300/459, loss: 0.3376855254173279 2023-01-24 00:34:30.936477: step: 302/459, loss: 1.117982268333435 2023-01-24 00:34:31.582910: step: 304/459, loss: 1.688399076461792 2023-01-24 00:34:32.219601: step: 306/459, loss: 0.2481587529182434 2023-01-24 00:34:32.868057: step: 308/459, loss: 0.5501042008399963 2023-01-24 00:34:33.487661: step: 310/459, loss: 0.344118595123291 2023-01-24 00:34:34.138397: step: 312/459, loss: 1.2008402347564697 2023-01-24 00:34:34.856099: step: 314/459, loss: 0.29537224769592285 2023-01-24 00:34:35.454978: step: 316/459, loss: 1.9949473142623901 2023-01-24 00:34:36.066887: step: 318/459, loss: 0.2504149079322815 2023-01-24 00:34:36.607157: step: 320/459, loss: 1.205349326133728 2023-01-24 00:34:37.251862: step: 322/459, loss: 1.5540961027145386 2023-01-24 00:34:37.973596: step: 324/459, loss: 0.2621919512748718 2023-01-24 00:34:38.592568: step: 326/459, loss: 0.35430827736854553 2023-01-24 00:34:39.202133: step: 328/459, loss: 0.39267444610595703 2023-01-24 00:34:39.787568: step: 330/459, loss: 0.8582189083099365 2023-01-24 00:34:40.396787: step: 332/459, loss: 0.4264954924583435 2023-01-24 00:34:41.097165: step: 334/459, loss: 1.3696779012680054 2023-01-24 00:34:41.726595: step: 336/459, loss: 1.0171194076538086 2023-01-24 00:34:42.312003: step: 338/459, loss: 0.8029224276542664 2023-01-24 00:34:42.944112: step: 340/459, loss: 4.020278453826904 2023-01-24 00:34:43.540046: step: 342/459, loss: 0.9196275472640991 2023-01-24 00:34:44.083010: step: 344/459, loss: 0.14582861959934235 2023-01-24 00:34:44.666374: step: 346/459, loss: 0.24664948880672455 2023-01-24 00:34:45.275318: step: 348/459, loss: 0.37189745903015137 2023-01-24 00:34:45.871162: step: 350/459, loss: 0.5621485710144043 2023-01-24 00:34:46.476006: step: 352/459, loss: 0.415779173374176 2023-01-24 00:34:47.075584: step: 354/459, loss: 0.24174237251281738 2023-01-24 00:34:47.700524: step: 356/459, loss: 1.5691418647766113 2023-01-24 00:34:48.276952: step: 358/459, loss: 0.6970980167388916 2023-01-24 00:34:48.959255: step: 360/459, loss: 0.4697466492652893 2023-01-24 00:34:49.705815: step: 362/459, loss: 3.37833571434021 2023-01-24 00:34:50.385073: step: 364/459, loss: 0.33004093170166016 2023-01-24 00:34:51.010231: step: 366/459, loss: 1.1166050434112549 2023-01-24 00:34:51.598585: step: 368/459, loss: 0.8793854117393494 2023-01-24 00:34:52.211726: step: 370/459, loss: 0.5331484079360962 2023-01-24 00:34:52.892990: step: 372/459, loss: 0.4205774962902069 2023-01-24 00:34:53.460307: step: 374/459, loss: 0.30958518385887146 2023-01-24 00:34:54.083875: step: 376/459, loss: 0.9852094054222107 2023-01-24 00:34:54.746585: step: 378/459, loss: 0.31982967257499695 2023-01-24 00:34:55.418500: step: 380/459, loss: 0.37168392539024353 2023-01-24 00:34:56.131900: step: 382/459, loss: 7.91438102722168 2023-01-24 00:34:56.717240: step: 384/459, loss: 0.7952520251274109 2023-01-24 00:34:57.293169: step: 386/459, loss: 0.8939163684844971 2023-01-24 00:34:57.890219: step: 388/459, loss: 0.6344574689865112 2023-01-24 00:34:58.563687: step: 390/459, loss: 1.3020354509353638 2023-01-24 00:34:59.160311: step: 392/459, loss: 1.0403913259506226 2023-01-24 00:34:59.858707: step: 394/459, loss: 0.2625603973865509 2023-01-24 00:35:00.545216: step: 396/459, loss: 0.17759118974208832 2023-01-24 00:35:01.151745: step: 398/459, loss: 0.8725587129592896 2023-01-24 00:35:01.787355: step: 400/459, loss: 0.39279407262802124 2023-01-24 00:35:02.379779: step: 402/459, loss: 0.5230737328529358 2023-01-24 00:35:02.997246: step: 404/459, loss: 0.4087664484977722 2023-01-24 00:35:03.551446: step: 406/459, loss: 0.15622754395008087 2023-01-24 00:35:04.211354: step: 408/459, loss: 0.3424694538116455 2023-01-24 00:35:04.834169: step: 410/459, loss: 0.5976166725158691 2023-01-24 00:35:05.462435: step: 412/459, loss: 2.202026844024658 2023-01-24 00:35:06.029436: step: 414/459, loss: 0.30455872416496277 2023-01-24 00:35:06.625035: step: 416/459, loss: 2.3817834854125977 2023-01-24 00:35:07.223722: step: 418/459, loss: 0.5373977422714233 2023-01-24 00:35:07.835194: step: 420/459, loss: 1.6219290494918823 2023-01-24 00:35:08.472348: step: 422/459, loss: 5.006904602050781 2023-01-24 00:35:09.080019: step: 424/459, loss: 0.9720149040222168 2023-01-24 00:35:09.875477: step: 426/459, loss: 0.4188815653324127 2023-01-24 00:35:10.528557: step: 428/459, loss: 0.5820473432540894 2023-01-24 00:35:11.177360: step: 430/459, loss: 0.6277740001678467 2023-01-24 00:35:11.822308: step: 432/459, loss: 0.5616673827171326 2023-01-24 00:35:12.550590: step: 434/459, loss: 0.4164665639400482 2023-01-24 00:35:13.194536: step: 436/459, loss: 0.90277099609375 2023-01-24 00:35:13.788899: step: 438/459, loss: 0.5416694283485413 2023-01-24 00:35:14.467250: step: 440/459, loss: 0.36404848098754883 2023-01-24 00:35:15.087375: step: 442/459, loss: 0.7054420709609985 2023-01-24 00:35:15.687841: step: 444/459, loss: 0.39884206652641296 2023-01-24 00:35:16.398564: step: 446/459, loss: 0.9488197565078735 2023-01-24 00:35:16.974535: step: 448/459, loss: 5.420139312744141 2023-01-24 00:35:17.602639: step: 450/459, loss: 3.270015001296997 2023-01-24 00:35:18.202298: step: 452/459, loss: 0.9378747940063477 2023-01-24 00:35:18.823430: step: 454/459, loss: 0.9022320508956909 2023-01-24 00:35:19.446958: step: 456/459, loss: 0.4916647970676422 2023-01-24 00:35:20.007652: step: 458/459, loss: 0.30066800117492676 2023-01-24 00:35:20.600768: step: 460/459, loss: 0.8008766174316406 2023-01-24 00:35:21.241428: step: 462/459, loss: 0.646251916885376 2023-01-24 00:35:21.848033: step: 464/459, loss: 0.5329670310020447 2023-01-24 00:35:22.445846: step: 466/459, loss: 1.9126996994018555 2023-01-24 00:35:23.032497: step: 468/459, loss: 3.222757339477539 2023-01-24 00:35:23.736576: step: 470/459, loss: 1.0228972434997559 2023-01-24 00:35:24.444795: step: 472/459, loss: 1.1214079856872559 2023-01-24 00:35:25.117240: step: 474/459, loss: 0.7026878595352173 2023-01-24 00:35:25.661361: step: 476/459, loss: 0.8214120268821716 2023-01-24 00:35:26.337723: step: 478/459, loss: 0.7580002546310425 2023-01-24 00:35:26.905279: step: 480/459, loss: 0.5894670486450195 2023-01-24 00:35:27.567350: step: 482/459, loss: 0.9182810187339783 2023-01-24 00:35:28.161629: step: 484/459, loss: 0.7076734304428101 2023-01-24 00:35:28.773162: step: 486/459, loss: 0.7584755420684814 2023-01-24 00:35:29.440022: step: 488/459, loss: 0.7704078555107117 2023-01-24 00:35:30.084399: step: 490/459, loss: 0.7114588618278503 2023-01-24 00:35:30.708178: step: 492/459, loss: 0.6675795316696167 2023-01-24 00:35:31.304660: step: 494/459, loss: 0.7988518476486206 2023-01-24 00:35:31.904666: step: 496/459, loss: 2.847660541534424 2023-01-24 00:35:32.561895: step: 498/459, loss: 0.4516908526420593 2023-01-24 00:35:33.182048: step: 500/459, loss: 7.626746654510498 2023-01-24 00:35:33.770146: step: 502/459, loss: 0.4138692021369934 2023-01-24 00:35:34.356034: step: 504/459, loss: 1.113804578781128 2023-01-24 00:35:34.975042: step: 506/459, loss: 2.1544716358184814 2023-01-24 00:35:35.545237: step: 508/459, loss: 1.0850372314453125 2023-01-24 00:35:36.169193: step: 510/459, loss: 0.12641064822673798 2023-01-24 00:35:36.728280: step: 512/459, loss: 0.2904667258262634 2023-01-24 00:35:37.359763: step: 514/459, loss: 0.21930423378944397 2023-01-24 00:35:37.922579: step: 516/459, loss: 1.6924818754196167 2023-01-24 00:35:38.518327: step: 518/459, loss: 1.6196529865264893 2023-01-24 00:35:39.052456: step: 520/459, loss: 0.3819645941257477 2023-01-24 00:35:39.709937: step: 522/459, loss: 0.24404674768447876 2023-01-24 00:35:40.426017: step: 524/459, loss: 0.7582460045814514 2023-01-24 00:35:41.046672: step: 526/459, loss: 0.4783790409564972 2023-01-24 00:35:41.680681: step: 528/459, loss: 1.1428343057632446 2023-01-24 00:35:42.265366: step: 530/459, loss: 0.5223912596702576 2023-01-24 00:35:42.942225: step: 532/459, loss: 0.9502300024032593 2023-01-24 00:35:43.616586: step: 534/459, loss: 0.28498023748397827 2023-01-24 00:35:44.199876: step: 536/459, loss: 1.0084697008132935 2023-01-24 00:35:44.828761: step: 538/459, loss: 0.3607500493526459 2023-01-24 00:35:45.442632: step: 540/459, loss: 1.4302022457122803 2023-01-24 00:35:46.068315: step: 542/459, loss: 0.9296649098396301 2023-01-24 00:35:46.697405: step: 544/459, loss: 0.2426072210073471 2023-01-24 00:35:47.276857: step: 546/459, loss: 1.4258317947387695 2023-01-24 00:35:47.861013: step: 548/459, loss: 0.7342610359191895 2023-01-24 00:35:48.501971: step: 550/459, loss: 1.3262778520584106 2023-01-24 00:35:49.121976: step: 552/459, loss: 0.41287708282470703 2023-01-24 00:35:49.732412: step: 554/459, loss: 1.4576377868652344 2023-01-24 00:35:50.344259: step: 556/459, loss: 0.3637332320213318 2023-01-24 00:35:50.888971: step: 558/459, loss: 0.6084101796150208 2023-01-24 00:35:51.494971: step: 560/459, loss: 0.5719351768493652 2023-01-24 00:35:52.092471: step: 562/459, loss: 0.21859420835971832 2023-01-24 00:35:52.708640: step: 564/459, loss: 0.42549556493759155 2023-01-24 00:35:53.357922: step: 566/459, loss: 0.33910927176475525 2023-01-24 00:35:53.972509: step: 568/459, loss: 0.24382515251636505 2023-01-24 00:35:54.550604: step: 570/459, loss: 0.8600502014160156 2023-01-24 00:35:55.161084: step: 572/459, loss: 1.3275903463363647 2023-01-24 00:35:55.767158: step: 574/459, loss: 1.2586464881896973 2023-01-24 00:35:56.334541: step: 576/459, loss: 1.0911566019058228 2023-01-24 00:35:56.942263: step: 578/459, loss: 0.1433534026145935 2023-01-24 00:35:57.566747: step: 580/459, loss: 0.7355127930641174 2023-01-24 00:35:58.213795: step: 582/459, loss: 1.477288842201233 2023-01-24 00:35:58.819572: step: 584/459, loss: 3.776376247406006 2023-01-24 00:35:59.533345: step: 586/459, loss: 6.201087951660156 2023-01-24 00:36:00.167638: step: 588/459, loss: 0.2019227296113968 2023-01-24 00:36:00.764746: step: 590/459, loss: 0.5719796419143677 2023-01-24 00:36:01.450239: step: 592/459, loss: 1.4263535737991333 2023-01-24 00:36:02.101287: step: 594/459, loss: 0.5059751272201538 2023-01-24 00:36:02.732345: step: 596/459, loss: 0.8674497008323669 2023-01-24 00:36:03.396637: step: 598/459, loss: 4.389739036560059 2023-01-24 00:36:04.084655: step: 600/459, loss: 0.480192631483078 2023-01-24 00:36:04.676414: step: 602/459, loss: 0.72983717918396 2023-01-24 00:36:05.266860: step: 604/459, loss: 1.4964569807052612 2023-01-24 00:36:05.893312: step: 606/459, loss: 0.41862425208091736 2023-01-24 00:36:06.464937: step: 608/459, loss: 0.6620845198631287 2023-01-24 00:36:07.053957: step: 610/459, loss: 3.5878705978393555 2023-01-24 00:36:07.660618: step: 612/459, loss: 0.8894243240356445 2023-01-24 00:36:08.243573: step: 614/459, loss: 0.7737754583358765 2023-01-24 00:36:08.852183: step: 616/459, loss: 1.1643366813659668 2023-01-24 00:36:09.473190: step: 618/459, loss: 1.380285620689392 2023-01-24 00:36:10.050227: step: 620/459, loss: 0.28609347343444824 2023-01-24 00:36:10.709465: step: 622/459, loss: 0.361116886138916 2023-01-24 00:36:11.307706: step: 624/459, loss: 0.2995170056819916 2023-01-24 00:36:11.936907: step: 626/459, loss: 3.9355735778808594 2023-01-24 00:36:12.588933: step: 628/459, loss: 1.7488770484924316 2023-01-24 00:36:13.241295: step: 630/459, loss: 0.6895421743392944 2023-01-24 00:36:13.832637: step: 632/459, loss: 0.7896594405174255 2023-01-24 00:36:14.440066: step: 634/459, loss: 1.897881269454956 2023-01-24 00:36:15.106849: step: 636/459, loss: 2.411818027496338 2023-01-24 00:36:15.739172: step: 638/459, loss: 1.0793397426605225 2023-01-24 00:36:16.321784: step: 640/459, loss: 0.5769231915473938 2023-01-24 00:36:16.969185: step: 642/459, loss: 1.4333837032318115 2023-01-24 00:36:17.598681: step: 644/459, loss: 0.6768953204154968 2023-01-24 00:36:18.213969: step: 646/459, loss: 0.29311004281044006 2023-01-24 00:36:18.814754: step: 648/459, loss: 0.793571949005127 2023-01-24 00:36:19.510442: step: 650/459, loss: 1.1592813730239868 2023-01-24 00:36:20.100908: step: 652/459, loss: 0.20003816485404968 2023-01-24 00:36:20.709060: step: 654/459, loss: 1.0515998601913452 2023-01-24 00:36:21.321767: step: 656/459, loss: 0.576008677482605 2023-01-24 00:36:22.012082: step: 658/459, loss: 0.3215329051017761 2023-01-24 00:36:22.613731: step: 660/459, loss: 1.0391525030136108 2023-01-24 00:36:23.200397: step: 662/459, loss: 0.5539386868476868 2023-01-24 00:36:23.809588: step: 664/459, loss: 0.8613218069076538 2023-01-24 00:36:24.424229: step: 666/459, loss: 0.7865577340126038 2023-01-24 00:36:25.079162: step: 668/459, loss: 0.757237434387207 2023-01-24 00:36:25.688128: step: 670/459, loss: 0.39790454506874084 2023-01-24 00:36:26.338737: step: 672/459, loss: 2.2098617553710938 2023-01-24 00:36:26.962306: step: 674/459, loss: 0.8326190710067749 2023-01-24 00:36:27.572771: step: 676/459, loss: 0.37667784094810486 2023-01-24 00:36:28.198211: step: 678/459, loss: 1.5609573125839233 2023-01-24 00:36:28.821715: step: 680/459, loss: 0.29327574372291565 2023-01-24 00:36:29.447398: step: 682/459, loss: 0.4473440647125244 2023-01-24 00:36:30.066778: step: 684/459, loss: 2.4117305278778076 2023-01-24 00:36:30.742252: step: 686/459, loss: 0.7797106504440308 2023-01-24 00:36:31.337918: step: 688/459, loss: 0.44356173276901245 2023-01-24 00:36:32.011323: step: 690/459, loss: 1.2065207958221436 2023-01-24 00:36:32.672676: step: 692/459, loss: 0.9138731360435486 2023-01-24 00:36:33.242081: step: 694/459, loss: 0.2791317403316498 2023-01-24 00:36:33.876337: step: 696/459, loss: 0.9751816987991333 2023-01-24 00:36:34.501964: step: 698/459, loss: 0.8802112340927124 2023-01-24 00:36:35.120736: step: 700/459, loss: 0.6757548451423645 2023-01-24 00:36:35.729783: step: 702/459, loss: 0.16731055080890656 2023-01-24 00:36:36.311291: step: 704/459, loss: 0.3569290339946747 2023-01-24 00:36:36.930370: step: 706/459, loss: 0.8331394791603088 2023-01-24 00:36:37.509456: step: 708/459, loss: 0.566225528717041 2023-01-24 00:36:38.134387: step: 710/459, loss: 0.6860302686691284 2023-01-24 00:36:38.747970: step: 712/459, loss: 0.3494754731655121 2023-01-24 00:36:39.382545: step: 714/459, loss: 0.6423234939575195 2023-01-24 00:36:40.015196: step: 716/459, loss: 1.213159441947937 2023-01-24 00:36:40.648956: step: 718/459, loss: 1.559997320175171 2023-01-24 00:36:41.266866: step: 720/459, loss: 0.5056448578834534 2023-01-24 00:36:41.910749: step: 722/459, loss: 0.4725208282470703 2023-01-24 00:36:42.533480: step: 724/459, loss: 0.8572290539741516 2023-01-24 00:36:43.106129: step: 726/459, loss: 0.4294005334377289 2023-01-24 00:36:43.721793: step: 728/459, loss: 0.9985460638999939 2023-01-24 00:36:44.360363: step: 730/459, loss: 0.4446983337402344 2023-01-24 00:36:44.956843: step: 732/459, loss: 0.2368692010641098 2023-01-24 00:36:45.595629: step: 734/459, loss: 0.24510011076927185 2023-01-24 00:36:46.254915: step: 736/459, loss: 1.2641977071762085 2023-01-24 00:36:46.837118: step: 738/459, loss: 1.4971938133239746 2023-01-24 00:36:47.477290: step: 740/459, loss: 1.5080828666687012 2023-01-24 00:36:48.103355: step: 742/459, loss: 0.9150080680847168 2023-01-24 00:36:48.764226: step: 744/459, loss: 0.8756124973297119 2023-01-24 00:36:49.350667: step: 746/459, loss: 0.554575502872467 2023-01-24 00:36:49.945857: step: 748/459, loss: 0.9310819506645203 2023-01-24 00:36:50.602074: step: 750/459, loss: 1.5205655097961426 2023-01-24 00:36:51.201713: step: 752/459, loss: 0.8217548727989197 2023-01-24 00:36:51.855010: step: 754/459, loss: 2.1938014030456543 2023-01-24 00:36:52.492108: step: 756/459, loss: 0.1676204353570938 2023-01-24 00:36:53.055707: step: 758/459, loss: 0.3982953429222107 2023-01-24 00:36:53.665046: step: 760/459, loss: 0.623871922492981 2023-01-24 00:36:54.328498: step: 762/459, loss: 0.15736980736255646 2023-01-24 00:36:54.936826: step: 764/459, loss: 0.5799091458320618 2023-01-24 00:36:55.506427: step: 766/459, loss: 0.730345606803894 2023-01-24 00:36:56.105441: step: 768/459, loss: 2.8818843364715576 2023-01-24 00:36:56.650862: step: 770/459, loss: 0.9129209518432617 2023-01-24 00:36:57.263420: step: 772/459, loss: 1.2512569427490234 2023-01-24 00:36:57.840833: step: 774/459, loss: 0.6847265362739563 2023-01-24 00:36:58.433194: step: 776/459, loss: 2.1599678993225098 2023-01-24 00:36:59.066111: step: 778/459, loss: 0.4730226695537567 2023-01-24 00:36:59.706718: step: 780/459, loss: 0.6727897524833679 2023-01-24 00:37:00.330672: step: 782/459, loss: 0.37919381260871887 2023-01-24 00:37:00.937033: step: 784/459, loss: 0.9756624102592468 2023-01-24 00:37:01.561968: step: 786/459, loss: 0.8476969599723816 2023-01-24 00:37:02.223286: step: 788/459, loss: 0.47736281156539917 2023-01-24 00:37:02.797850: step: 790/459, loss: 1.1214734315872192 2023-01-24 00:37:03.389497: step: 792/459, loss: 0.7948769330978394 2023-01-24 00:37:03.996440: step: 794/459, loss: 0.4573044180870056 2023-01-24 00:37:04.698796: step: 796/459, loss: 0.6083248257637024 2023-01-24 00:37:05.301832: step: 798/459, loss: 14.7095365524292 2023-01-24 00:37:05.912955: step: 800/459, loss: 0.9135960936546326 2023-01-24 00:37:06.639606: step: 802/459, loss: 1.0530340671539307 2023-01-24 00:37:07.237909: step: 804/459, loss: 0.8587310314178467 2023-01-24 00:37:07.886149: step: 806/459, loss: 0.44261816143989563 2023-01-24 00:37:08.624296: step: 808/459, loss: 0.7226831912994385 2023-01-24 00:37:09.224056: step: 810/459, loss: 0.5551444292068481 2023-01-24 00:37:09.794085: step: 812/459, loss: 0.8201499581336975 2023-01-24 00:37:10.413963: step: 814/459, loss: 0.23244555294513702 2023-01-24 00:37:11.055922: step: 816/459, loss: 0.7721466422080994 2023-01-24 00:37:11.652647: step: 818/459, loss: 0.7870833873748779 2023-01-24 00:37:12.297611: step: 820/459, loss: 3.7852625846862793 2023-01-24 00:37:12.954887: step: 822/459, loss: 0.7348635196685791 2023-01-24 00:37:13.622045: step: 824/459, loss: 0.3809548020362854 2023-01-24 00:37:14.246531: step: 826/459, loss: 0.8466887474060059 2023-01-24 00:37:14.867833: step: 828/459, loss: 1.0474181175231934 2023-01-24 00:37:15.442890: step: 830/459, loss: 0.5405825972557068 2023-01-24 00:37:16.040167: step: 832/459, loss: 12.30676555633545 2023-01-24 00:37:16.667412: step: 834/459, loss: 0.7195747494697571 2023-01-24 00:37:17.255910: step: 836/459, loss: 0.4299614429473877 2023-01-24 00:37:17.817946: step: 838/459, loss: 0.6468727588653564 2023-01-24 00:37:18.471217: step: 840/459, loss: 0.3315470814704895 2023-01-24 00:37:19.084171: step: 842/459, loss: 1.2618780136108398 2023-01-24 00:37:19.746799: step: 844/459, loss: 0.28994789719581604 2023-01-24 00:37:20.373059: step: 846/459, loss: 0.8049699068069458 2023-01-24 00:37:20.975599: step: 848/459, loss: 0.30282869935035706 2023-01-24 00:37:21.532263: step: 850/459, loss: 0.46714097261428833 2023-01-24 00:37:22.170359: step: 852/459, loss: 0.34933459758758545 2023-01-24 00:37:22.792569: step: 854/459, loss: 0.47277092933654785 2023-01-24 00:37:23.460817: step: 856/459, loss: 1.0379388332366943 2023-01-24 00:37:24.038495: step: 858/459, loss: 0.4522306025028229 2023-01-24 00:37:24.708659: step: 860/459, loss: 0.3088574707508087 2023-01-24 00:37:25.371088: step: 862/459, loss: 1.0116889476776123 2023-01-24 00:37:26.059786: step: 864/459, loss: 0.519141674041748 2023-01-24 00:37:26.616082: step: 866/459, loss: 0.6184970140457153 2023-01-24 00:37:27.253199: step: 868/459, loss: 0.6869659423828125 2023-01-24 00:37:27.931889: step: 870/459, loss: 3.3302927017211914 2023-01-24 00:37:28.566589: step: 872/459, loss: 2.4387428760528564 2023-01-24 00:37:29.154212: step: 874/459, loss: 0.4018455147743225 2023-01-24 00:37:29.732518: step: 876/459, loss: 0.5062389969825745 2023-01-24 00:37:30.338729: step: 878/459, loss: 0.2907419502735138 2023-01-24 00:37:31.013731: step: 880/459, loss: 0.4038926064968109 2023-01-24 00:37:31.572770: step: 882/459, loss: 0.4872068762779236 2023-01-24 00:37:32.145557: step: 884/459, loss: 0.36220604181289673 2023-01-24 00:37:32.770812: step: 886/459, loss: 0.7538596391677856 2023-01-24 00:37:33.320226: step: 888/459, loss: 0.6006200313568115 2023-01-24 00:37:33.913040: step: 890/459, loss: 2.504387378692627 2023-01-24 00:37:34.614851: step: 892/459, loss: 0.5879267454147339 2023-01-24 00:37:35.269785: step: 894/459, loss: 0.4408782422542572 2023-01-24 00:37:35.848016: step: 896/459, loss: 1.7672075033187866 2023-01-24 00:37:36.491054: step: 898/459, loss: 3.1880502700805664 2023-01-24 00:37:37.107914: step: 900/459, loss: 0.18132862448692322 2023-01-24 00:37:37.768385: step: 902/459, loss: 0.25227344036102295 2023-01-24 00:37:38.311813: step: 904/459, loss: 0.22001011669635773 2023-01-24 00:37:38.963327: step: 906/459, loss: 0.4160068929195404 2023-01-24 00:37:39.563898: step: 908/459, loss: 1.1492948532104492 2023-01-24 00:37:40.259676: step: 910/459, loss: 0.46529874205589294 2023-01-24 00:37:40.956728: step: 912/459, loss: 1.7102364301681519 2023-01-24 00:37:41.555896: step: 914/459, loss: 0.46843069791793823 2023-01-24 00:37:42.086839: step: 916/459, loss: 0.2924145460128784 2023-01-24 00:37:42.717289: step: 918/459, loss: 0.5436090230941772 2023-01-24 00:37:43.160502: step: 920/459, loss: 0.2239164263010025 ================================================== Loss: 1.038 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.30879753470214, 'r': 0.2889128449675325, 'f1': 0.2985244268940453}, 'combined': 0.219965367185086, 'epoch': 4} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34866077928898365, 'r': 0.2645012808399186, 'f1': 0.3008053782101035}, 'combined': 0.19251544205446622, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3162928713151927, 'r': 0.3019159226190476, 'f1': 0.308937223145072}, 'combined': 0.22763795389636882, 'epoch': 4} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35337873562195316, 'r': 0.2613463425879236, 'f1': 0.30047331197902116}, 'combined': 0.1923029196665735, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3125609955204797, 'r': 0.29657776279500064, 'f1': 0.30435968660011725}, 'combined': 0.22426503223166533, 'epoch': 4} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34660410768177247, 'r': 0.27822857007545915, 'f1': 0.30867516599395367}, 'combined': 0.22131426995792908, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.22632575757575754, 'r': 0.2845238095238095, 'f1': 0.2521097046413502}, 'combined': 0.16807313642756677, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33, 'r': 0.358695652173913, 'f1': 0.34375}, 'combined': 0.171875, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3958333333333333, 'r': 0.16379310344827586, 'f1': 0.23170731707317074}, 'combined': 0.15447154471544716, 'epoch': 4} New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3226771034143062, 'r': 0.24887005329496587, 'f1': 0.2810080217567491}, 'combined': 0.20705854234707827, 'epoch': 2} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.31765742528569446, 'r': 0.21878583102707994, 'f1': 0.25911014056081905}, 'combined': 0.16583048995892416, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3098290598290598, 'r': 0.3452380952380952, 'f1': 0.3265765765765765}, 'combined': 0.21771771771771767, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3162928713151927, 'r': 0.3019159226190476, 'f1': 0.308937223145072}, 'combined': 0.22763795389636882, 'epoch': 4} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35337873562195316, 'r': 0.2613463425879236, 'f1': 0.30047331197902116}, 'combined': 0.1923029196665735, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.33, 'r': 0.358695652173913, 'f1': 0.34375}, 'combined': 0.171875, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3125609955204797, 'r': 0.29657776279500064, 'f1': 0.30435968660011725}, 'combined': 0.22426503223166533, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34660410768177247, 'r': 0.27822857007545915, 'f1': 0.30867516599395367}, 'combined': 0.22131426995792908, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3958333333333333, 'r': 0.16379310344827586, 'f1': 0.23170731707317074}, 'combined': 0.15447154471544716, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:40:36.993456: step: 2/459, loss: 1.3350651264190674 2023-01-24 00:40:37.606883: step: 4/459, loss: 0.5168742537498474 2023-01-24 00:40:38.228238: step: 6/459, loss: 0.2962683439254761 2023-01-24 00:40:38.883354: step: 8/459, loss: 1.0375233888626099 2023-01-24 00:40:39.494528: step: 10/459, loss: 0.7348809838294983 2023-01-24 00:40:40.099960: step: 12/459, loss: 0.8307719826698303 2023-01-24 00:40:40.707317: step: 14/459, loss: 0.2545337378978729 2023-01-24 00:40:41.466907: step: 16/459, loss: 5.522551536560059 2023-01-24 00:40:42.044740: step: 18/459, loss: 0.5308913588523865 2023-01-24 00:40:42.596338: step: 20/459, loss: 0.7490206956863403 2023-01-24 00:40:43.199742: step: 22/459, loss: 0.5643554925918579 2023-01-24 00:40:43.814654: step: 24/459, loss: 0.9365861415863037 2023-01-24 00:40:44.418781: step: 26/459, loss: 0.8507208824157715 2023-01-24 00:40:45.013937: step: 28/459, loss: 0.3371944725513458 2023-01-24 00:40:45.641629: step: 30/459, loss: 0.5605995655059814 2023-01-24 00:40:46.245204: step: 32/459, loss: 0.4413914978504181 2023-01-24 00:40:46.838805: step: 34/459, loss: 0.6776484847068787 2023-01-24 00:40:47.447617: step: 36/459, loss: 0.9512380957603455 2023-01-24 00:40:48.084451: step: 38/459, loss: 0.8664429187774658 2023-01-24 00:40:48.698086: step: 40/459, loss: 0.3470512330532074 2023-01-24 00:40:49.347841: step: 42/459, loss: 0.17636220157146454 2023-01-24 00:40:49.947477: step: 44/459, loss: 0.7315127849578857 2023-01-24 00:40:50.627101: step: 46/459, loss: 0.27519622445106506 2023-01-24 00:40:51.236767: step: 48/459, loss: 0.2890176475048065 2023-01-24 00:40:51.834225: step: 50/459, loss: 0.8542979955673218 2023-01-24 00:40:52.496718: step: 52/459, loss: 0.2627634108066559 2023-01-24 00:40:53.043347: step: 54/459, loss: 1.0158345699310303 2023-01-24 00:40:53.704000: step: 56/459, loss: 0.6251516342163086 2023-01-24 00:40:54.327226: step: 58/459, loss: 0.5865209698677063 2023-01-24 00:40:54.928634: step: 60/459, loss: 0.280415803194046 2023-01-24 00:40:55.526838: step: 62/459, loss: 0.5328210592269897 2023-01-24 00:40:56.147961: step: 64/459, loss: 0.5556296110153198 2023-01-24 00:40:56.861900: step: 66/459, loss: 1.8344215154647827 2023-01-24 00:40:57.484456: step: 68/459, loss: 1.1699352264404297 2023-01-24 00:40:58.064810: step: 70/459, loss: 1.864617109298706 2023-01-24 00:40:58.736082: step: 72/459, loss: 0.43091705441474915 2023-01-24 00:40:59.321138: step: 74/459, loss: 0.6262624263763428 2023-01-24 00:40:59.927611: step: 76/459, loss: 0.7180641293525696 2023-01-24 00:41:00.569566: step: 78/459, loss: 0.3747831881046295 2023-01-24 00:41:01.220202: step: 80/459, loss: 1.0896307229995728 2023-01-24 00:41:01.745533: step: 82/459, loss: 0.931465208530426 2023-01-24 00:41:02.360768: step: 84/459, loss: 0.6116186380386353 2023-01-24 00:41:03.146190: step: 86/459, loss: 0.2389732152223587 2023-01-24 00:41:03.781036: step: 88/459, loss: 0.6873553991317749 2023-01-24 00:41:04.368715: step: 90/459, loss: 0.28769755363464355 2023-01-24 00:41:04.933151: step: 92/459, loss: 0.8287485837936401 2023-01-24 00:41:05.501011: step: 94/459, loss: 0.39036083221435547 2023-01-24 00:41:06.125088: step: 96/459, loss: 0.3696744441986084 2023-01-24 00:41:06.743249: step: 98/459, loss: 0.7390285730361938 2023-01-24 00:41:07.331865: step: 100/459, loss: 0.5709620118141174 2023-01-24 00:41:07.985697: step: 102/459, loss: 1.731492042541504 2023-01-24 00:41:08.741710: step: 104/459, loss: 0.7822936773300171 2023-01-24 00:41:09.350523: step: 106/459, loss: 0.17822834849357605 2023-01-24 00:41:09.997721: step: 108/459, loss: 0.47720083594322205 2023-01-24 00:41:10.652140: step: 110/459, loss: 0.1945108324289322 2023-01-24 00:41:11.254755: step: 112/459, loss: 1.0556021928787231 2023-01-24 00:41:11.936712: step: 114/459, loss: 1.1181052923202515 2023-01-24 00:41:12.579951: step: 116/459, loss: 0.4045599102973938 2023-01-24 00:41:13.249884: step: 118/459, loss: 0.7512566447257996 2023-01-24 00:41:13.810391: step: 120/459, loss: 0.518473744392395 2023-01-24 00:41:14.405105: step: 122/459, loss: 0.33870255947113037 2023-01-24 00:41:14.989276: step: 124/459, loss: 1.4996566772460938 2023-01-24 00:41:15.552097: step: 126/459, loss: 0.39805611968040466 2023-01-24 00:41:16.150755: step: 128/459, loss: 0.45963239669799805 2023-01-24 00:41:16.756409: step: 130/459, loss: 0.5376148819923401 2023-01-24 00:41:17.384983: step: 132/459, loss: 1.2899184226989746 2023-01-24 00:41:18.049204: step: 134/459, loss: 0.5277408957481384 2023-01-24 00:41:18.704871: step: 136/459, loss: 0.11072526127099991 2023-01-24 00:41:19.429118: step: 138/459, loss: 0.46407783031463623 2023-01-24 00:41:20.058156: step: 140/459, loss: 0.746612012386322 2023-01-24 00:41:20.744235: step: 142/459, loss: 1.6934971809387207 2023-01-24 00:41:21.343019: step: 144/459, loss: 0.6887577772140503 2023-01-24 00:41:21.909131: step: 146/459, loss: 0.49923011660575867 2023-01-24 00:41:22.545353: step: 148/459, loss: 2.048987865447998 2023-01-24 00:41:23.166128: step: 150/459, loss: 0.32971322536468506 2023-01-24 00:41:23.849629: step: 152/459, loss: 2.8499953746795654 2023-01-24 00:41:24.457856: step: 154/459, loss: 0.15716907382011414 2023-01-24 00:41:25.140904: step: 156/459, loss: 0.22632338106632233 2023-01-24 00:41:25.745653: step: 158/459, loss: 0.9527252912521362 2023-01-24 00:41:26.343806: step: 160/459, loss: 1.2837871313095093 2023-01-24 00:41:26.976526: step: 162/459, loss: 0.14945092797279358 2023-01-24 00:41:27.662905: step: 164/459, loss: 0.2053154706954956 2023-01-24 00:41:28.343980: step: 166/459, loss: 1.9207288026809692 2023-01-24 00:41:28.925748: step: 168/459, loss: 0.4040410816669464 2023-01-24 00:41:29.543925: step: 170/459, loss: 0.1765812635421753 2023-01-24 00:41:30.193397: step: 172/459, loss: 0.3334006667137146 2023-01-24 00:41:30.886881: step: 174/459, loss: 0.4186074733734131 2023-01-24 00:41:31.534511: step: 176/459, loss: 1.6856000423431396 2023-01-24 00:41:32.217236: step: 178/459, loss: 0.6583086252212524 2023-01-24 00:41:32.832612: step: 180/459, loss: 0.5396273136138916 2023-01-24 00:41:33.371636: step: 182/459, loss: 0.20415757596492767 2023-01-24 00:41:33.935881: step: 184/459, loss: 0.15699048340320587 2023-01-24 00:41:34.637763: step: 186/459, loss: 0.8924119472503662 2023-01-24 00:41:35.266461: step: 188/459, loss: 0.6504970788955688 2023-01-24 00:41:35.796931: step: 190/459, loss: 0.5080581903457642 2023-01-24 00:41:36.407400: step: 192/459, loss: 1.587684154510498 2023-01-24 00:41:37.058564: step: 194/459, loss: 0.22873400151729584 2023-01-24 00:41:37.617692: step: 196/459, loss: 1.4324404001235962 2023-01-24 00:41:38.177673: step: 198/459, loss: 0.6877245903015137 2023-01-24 00:41:38.829203: step: 200/459, loss: 0.19626520574092865 2023-01-24 00:41:39.420904: step: 202/459, loss: 0.42381176352500916 2023-01-24 00:41:39.999116: step: 204/459, loss: 0.23338672518730164 2023-01-24 00:41:40.642276: step: 206/459, loss: 0.5267444849014282 2023-01-24 00:41:41.274568: step: 208/459, loss: 1.5774400234222412 2023-01-24 00:41:41.899481: step: 210/459, loss: 0.23583288490772247 2023-01-24 00:41:42.464374: step: 212/459, loss: 0.39836519956588745 2023-01-24 00:41:43.083266: step: 214/459, loss: 0.5613749027252197 2023-01-24 00:41:43.609541: step: 216/459, loss: 1.0289984941482544 2023-01-24 00:41:44.229333: step: 218/459, loss: 0.24204517900943756 2023-01-24 00:41:44.915517: step: 220/459, loss: 0.7583855390548706 2023-01-24 00:41:45.493929: step: 222/459, loss: 0.856198251247406 2023-01-24 00:41:46.138036: step: 224/459, loss: 0.47256386280059814 2023-01-24 00:41:46.735787: step: 226/459, loss: 1.300577998161316 2023-01-24 00:41:47.311784: step: 228/459, loss: 1.2180429697036743 2023-01-24 00:41:47.931378: step: 230/459, loss: 1.3340908288955688 2023-01-24 00:41:48.558712: step: 232/459, loss: 0.4549231231212616 2023-01-24 00:41:49.244540: step: 234/459, loss: 4.459851264953613 2023-01-24 00:41:49.871607: step: 236/459, loss: 0.5746620893478394 2023-01-24 00:41:50.495751: step: 238/459, loss: 0.18012511730194092 2023-01-24 00:41:51.082532: step: 240/459, loss: 0.6374250650405884 2023-01-24 00:41:51.602406: step: 242/459, loss: 0.709536075592041 2023-01-24 00:41:52.231871: step: 244/459, loss: 0.5505164861679077 2023-01-24 00:41:52.813568: step: 246/459, loss: 0.9417238831520081 2023-01-24 00:41:53.440805: step: 248/459, loss: 0.1889418214559555 2023-01-24 00:41:54.075555: step: 250/459, loss: 0.567048192024231 2023-01-24 00:41:54.738524: step: 252/459, loss: 0.183607280254364 2023-01-24 00:41:55.434640: step: 254/459, loss: 4.350412368774414 2023-01-24 00:41:56.087076: step: 256/459, loss: 1.249659776687622 2023-01-24 00:41:56.666967: step: 258/459, loss: 0.5242674946784973 2023-01-24 00:41:57.316204: step: 260/459, loss: 0.5975053310394287 2023-01-24 00:41:57.915048: step: 262/459, loss: 1.4720535278320312 2023-01-24 00:41:58.504360: step: 264/459, loss: 0.9429743885993958 2023-01-24 00:41:59.120715: step: 266/459, loss: 0.26230302453041077 2023-01-24 00:41:59.739413: step: 268/459, loss: 0.32642433047294617 2023-01-24 00:42:00.392741: step: 270/459, loss: 0.5055562853813171 2023-01-24 00:42:00.973572: step: 272/459, loss: 2.013364791870117 2023-01-24 00:42:01.631185: step: 274/459, loss: 1.078272819519043 2023-01-24 00:42:02.233567: step: 276/459, loss: 0.3377644419670105 2023-01-24 00:42:02.777570: step: 278/459, loss: 1.1068533658981323 2023-01-24 00:42:03.400106: step: 280/459, loss: 1.9005591869354248 2023-01-24 00:42:03.995042: step: 282/459, loss: 0.20530568063259125 2023-01-24 00:42:04.619455: step: 284/459, loss: 1.10928475856781 2023-01-24 00:42:05.226009: step: 286/459, loss: 0.5996169447898865 2023-01-24 00:42:05.839988: step: 288/459, loss: 0.42050492763519287 2023-01-24 00:42:06.506098: step: 290/459, loss: 1.3429816961288452 2023-01-24 00:42:07.102049: step: 292/459, loss: 0.7160494327545166 2023-01-24 00:42:07.787629: step: 294/459, loss: 0.47888827323913574 2023-01-24 00:42:08.343017: step: 296/459, loss: 0.5283575654029846 2023-01-24 00:42:09.044461: step: 298/459, loss: 0.41812095046043396 2023-01-24 00:42:09.648564: step: 300/459, loss: 0.19305609166622162 2023-01-24 00:42:10.242209: step: 302/459, loss: 0.2932583689689636 2023-01-24 00:42:10.954873: step: 304/459, loss: 0.5050584673881531 2023-01-24 00:42:11.555530: step: 306/459, loss: 0.2721902132034302 2023-01-24 00:42:12.190776: step: 308/459, loss: 1.7892006635665894 2023-01-24 00:42:12.807058: step: 310/459, loss: 0.8411896824836731 2023-01-24 00:42:13.433344: step: 312/459, loss: 0.38051146268844604 2023-01-24 00:42:14.074602: step: 314/459, loss: 0.18534474074840546 2023-01-24 00:42:14.680610: step: 316/459, loss: 0.23963455855846405 2023-01-24 00:42:15.309797: step: 318/459, loss: 0.6066209077835083 2023-01-24 00:42:15.968636: step: 320/459, loss: 0.5456112623214722 2023-01-24 00:42:16.571169: step: 322/459, loss: 0.8884451389312744 2023-01-24 00:42:17.181157: step: 324/459, loss: 1.5336533784866333 2023-01-24 00:42:17.831530: step: 326/459, loss: 1.2965835332870483 2023-01-24 00:42:18.369818: step: 328/459, loss: 0.6448809504508972 2023-01-24 00:42:18.964990: step: 330/459, loss: 1.4193603992462158 2023-01-24 00:42:19.568898: step: 332/459, loss: 0.2406422346830368 2023-01-24 00:42:20.189932: step: 334/459, loss: 0.4080490171909332 2023-01-24 00:42:20.816166: step: 336/459, loss: 0.9783912897109985 2023-01-24 00:42:21.426321: step: 338/459, loss: 0.35314610600471497 2023-01-24 00:42:21.997727: step: 340/459, loss: 0.9117245078086853 2023-01-24 00:42:22.602845: step: 342/459, loss: 1.4157352447509766 2023-01-24 00:42:23.207794: step: 344/459, loss: 0.7039904594421387 2023-01-24 00:42:23.873060: step: 346/459, loss: 0.5267019271850586 2023-01-24 00:42:24.442353: step: 348/459, loss: 1.1777225732803345 2023-01-24 00:42:25.035592: step: 350/459, loss: 0.629917562007904 2023-01-24 00:42:25.655565: step: 352/459, loss: 1.2634320259094238 2023-01-24 00:42:26.323468: step: 354/459, loss: 1.8744292259216309 2023-01-24 00:42:27.011232: step: 356/459, loss: 0.4798222780227661 2023-01-24 00:42:27.639617: step: 358/459, loss: 0.5355122089385986 2023-01-24 00:42:28.219420: step: 360/459, loss: 1.640630841255188 2023-01-24 00:42:28.906284: step: 362/459, loss: 0.2798596918582916 2023-01-24 00:42:29.502679: step: 364/459, loss: 1.1278942823410034 2023-01-24 00:42:30.100401: step: 366/459, loss: 0.703087329864502 2023-01-24 00:42:30.774462: step: 368/459, loss: 0.3806866407394409 2023-01-24 00:42:31.391940: step: 370/459, loss: 0.21391020715236664 2023-01-24 00:42:31.996178: step: 372/459, loss: 0.825805127620697 2023-01-24 00:42:32.670962: step: 374/459, loss: 0.5096412897109985 2023-01-24 00:42:33.309917: step: 376/459, loss: 0.41485539078712463 2023-01-24 00:42:33.952896: step: 378/459, loss: 1.6315208673477173 2023-01-24 00:42:34.609762: step: 380/459, loss: 0.17451781034469604 2023-01-24 00:42:35.256612: step: 382/459, loss: 0.5152484774589539 2023-01-24 00:42:35.906252: step: 384/459, loss: 1.5136151313781738 2023-01-24 00:42:36.560937: step: 386/459, loss: 0.6996681690216064 2023-01-24 00:42:37.179674: step: 388/459, loss: 0.4729287326335907 2023-01-24 00:42:37.818204: step: 390/459, loss: 0.2165934294462204 2023-01-24 00:42:38.393301: step: 392/459, loss: 0.2874615788459778 2023-01-24 00:42:38.972705: step: 394/459, loss: 0.39115092158317566 2023-01-24 00:42:39.588891: step: 396/459, loss: 1.4447615146636963 2023-01-24 00:42:40.145311: step: 398/459, loss: 0.4070597290992737 2023-01-24 00:42:40.699658: step: 400/459, loss: 0.3974751830101013 2023-01-24 00:42:41.360211: step: 402/459, loss: 0.3762752413749695 2023-01-24 00:42:41.962738: step: 404/459, loss: 0.49572092294692993 2023-01-24 00:42:42.561157: step: 406/459, loss: 0.29277580976486206 2023-01-24 00:42:43.170529: step: 408/459, loss: 0.7978237271308899 2023-01-24 00:42:43.851320: step: 410/459, loss: 1.1212607622146606 2023-01-24 00:42:44.492736: step: 412/459, loss: 0.47293832898139954 2023-01-24 00:42:45.075512: step: 414/459, loss: 0.6417720317840576 2023-01-24 00:42:45.668730: step: 416/459, loss: 3.776261329650879 2023-01-24 00:42:46.256006: step: 418/459, loss: 0.6302133202552795 2023-01-24 00:42:46.870399: step: 420/459, loss: 0.8050230145454407 2023-01-24 00:42:47.483284: step: 422/459, loss: 0.5834254622459412 2023-01-24 00:42:48.095404: step: 424/459, loss: 0.4627189636230469 2023-01-24 00:42:48.675414: step: 426/459, loss: 0.6171804070472717 2023-01-24 00:42:49.393076: step: 428/459, loss: 0.7505080699920654 2023-01-24 00:42:49.994549: step: 430/459, loss: 0.31143084168434143 2023-01-24 00:42:50.573018: step: 432/459, loss: 0.5652810335159302 2023-01-24 00:42:51.154161: step: 434/459, loss: 0.4989061951637268 2023-01-24 00:42:51.771269: step: 436/459, loss: 0.8348990678787231 2023-01-24 00:42:52.558761: step: 438/459, loss: 0.8539979457855225 2023-01-24 00:42:53.170648: step: 440/459, loss: 0.5868257284164429 2023-01-24 00:42:53.795414: step: 442/459, loss: 0.3522438108921051 2023-01-24 00:42:54.392987: step: 444/459, loss: 0.3344798982143402 2023-01-24 00:42:55.116150: step: 446/459, loss: 0.9961092472076416 2023-01-24 00:42:55.727748: step: 448/459, loss: 0.45190855860710144 2023-01-24 00:42:56.343281: step: 450/459, loss: 0.7616454362869263 2023-01-24 00:42:56.969189: step: 452/459, loss: 0.3018345236778259 2023-01-24 00:42:57.578268: step: 454/459, loss: 0.27566561102867126 2023-01-24 00:42:58.216963: step: 456/459, loss: 4.764679908752441 2023-01-24 00:42:58.853199: step: 458/459, loss: 1.8502742052078247 2023-01-24 00:42:59.472027: step: 460/459, loss: 0.33621156215667725 2023-01-24 00:43:00.182360: step: 462/459, loss: 1.3535280227661133 2023-01-24 00:43:00.825736: step: 464/459, loss: 0.5029252767562866 2023-01-24 00:43:01.444769: step: 466/459, loss: 0.8073297739028931 2023-01-24 00:43:02.042655: step: 468/459, loss: 0.4990781545639038 2023-01-24 00:43:02.633412: step: 470/459, loss: 2.69464373588562 2023-01-24 00:43:03.299319: step: 472/459, loss: 0.9806108474731445 2023-01-24 00:43:03.890717: step: 474/459, loss: 1.0467572212219238 2023-01-24 00:43:04.596314: step: 476/459, loss: 0.7564940452575684 2023-01-24 00:43:05.207490: step: 478/459, loss: 1.0070796012878418 2023-01-24 00:43:05.774987: step: 480/459, loss: 1.0026754140853882 2023-01-24 00:43:06.404355: step: 482/459, loss: 0.17400187253952026 2023-01-24 00:43:07.092428: step: 484/459, loss: 0.42104288935661316 2023-01-24 00:43:07.664394: step: 486/459, loss: 0.3807084560394287 2023-01-24 00:43:08.272168: step: 488/459, loss: 12.389572143554688 2023-01-24 00:43:08.862695: step: 490/459, loss: 0.20164155960083008 2023-01-24 00:43:09.545137: step: 492/459, loss: 1.3656644821166992 2023-01-24 00:43:10.178357: step: 494/459, loss: 0.7488237619400024 2023-01-24 00:43:10.801047: step: 496/459, loss: 0.6844214200973511 2023-01-24 00:43:11.416649: step: 498/459, loss: 0.7034590244293213 2023-01-24 00:43:12.009240: step: 500/459, loss: 1.4276388883590698 2023-01-24 00:43:12.651841: step: 502/459, loss: 0.7736982107162476 2023-01-24 00:43:13.321977: step: 504/459, loss: 0.482555627822876 2023-01-24 00:43:13.935518: step: 506/459, loss: 0.38917696475982666 2023-01-24 00:43:14.504824: step: 508/459, loss: 0.5371469259262085 2023-01-24 00:43:15.121489: step: 510/459, loss: 0.5067078471183777 2023-01-24 00:43:15.739623: step: 512/459, loss: 0.6406406164169312 2023-01-24 00:43:16.340109: step: 514/459, loss: 0.62015700340271 2023-01-24 00:43:17.033396: step: 516/459, loss: 3.2091426849365234 2023-01-24 00:43:17.677713: step: 518/459, loss: 0.5918670892715454 2023-01-24 00:43:18.310844: step: 520/459, loss: 0.17960284650325775 2023-01-24 00:43:18.916502: step: 522/459, loss: 0.5730358958244324 2023-01-24 00:43:19.563907: step: 524/459, loss: 0.7482753396034241 2023-01-24 00:43:20.193399: step: 526/459, loss: 0.8529167175292969 2023-01-24 00:43:20.807533: step: 528/459, loss: 2.1800060272216797 2023-01-24 00:43:21.425744: step: 530/459, loss: 1.555212378501892 2023-01-24 00:43:22.113170: step: 532/459, loss: 0.8561269044876099 2023-01-24 00:43:22.783714: step: 534/459, loss: 0.7735949754714966 2023-01-24 00:43:23.362015: step: 536/459, loss: 0.776715099811554 2023-01-24 00:43:23.981267: step: 538/459, loss: 1.4052271842956543 2023-01-24 00:43:24.617353: step: 540/459, loss: 0.6035616397857666 2023-01-24 00:43:25.206903: step: 542/459, loss: 0.21154631674289703 2023-01-24 00:43:25.851248: step: 544/459, loss: 0.7711945176124573 2023-01-24 00:43:26.501874: step: 546/459, loss: 0.546257734298706 2023-01-24 00:43:27.117593: step: 548/459, loss: 0.29844075441360474 2023-01-24 00:43:27.825005: step: 550/459, loss: 0.44574832916259766 2023-01-24 00:43:28.421658: step: 552/459, loss: 0.8199281096458435 2023-01-24 00:43:29.004528: step: 554/459, loss: 2.5181710720062256 2023-01-24 00:43:29.651700: step: 556/459, loss: 0.6263920664787292 2023-01-24 00:43:30.217685: step: 558/459, loss: 2.5309157371520996 2023-01-24 00:43:30.850535: step: 560/459, loss: 0.712415337562561 2023-01-24 00:43:31.461101: step: 562/459, loss: 1.2948509454727173 2023-01-24 00:43:32.107655: step: 564/459, loss: 0.7691327333450317 2023-01-24 00:43:32.728053: step: 566/459, loss: 1.0764636993408203 2023-01-24 00:43:33.358822: step: 568/459, loss: 0.9933577179908752 2023-01-24 00:43:33.970545: step: 570/459, loss: 0.4042310416698456 2023-01-24 00:43:34.619974: step: 572/459, loss: 1.4048100709915161 2023-01-24 00:43:35.194902: step: 574/459, loss: 0.7725167870521545 2023-01-24 00:43:35.818880: step: 576/459, loss: 0.20776192843914032 2023-01-24 00:43:36.411039: step: 578/459, loss: 0.31357911229133606 2023-01-24 00:43:36.970076: step: 580/459, loss: 0.4900464415550232 2023-01-24 00:43:37.585846: step: 582/459, loss: 0.48530709743499756 2023-01-24 00:43:38.193160: step: 584/459, loss: 0.1880151629447937 2023-01-24 00:43:38.802246: step: 586/459, loss: 1.0395560264587402 2023-01-24 00:43:39.344957: step: 588/459, loss: 0.5921510457992554 2023-01-24 00:43:40.008287: step: 590/459, loss: 0.34141647815704346 2023-01-24 00:43:40.560887: step: 592/459, loss: 0.6401530504226685 2023-01-24 00:43:41.144105: step: 594/459, loss: 0.5920324325561523 2023-01-24 00:43:41.776121: step: 596/459, loss: 0.37278491258621216 2023-01-24 00:43:42.454579: step: 598/459, loss: 1.2302480936050415 2023-01-24 00:43:43.085820: step: 600/459, loss: 0.2546478509902954 2023-01-24 00:43:43.743824: step: 602/459, loss: 2.5344715118408203 2023-01-24 00:43:44.325194: step: 604/459, loss: 0.1691093146800995 2023-01-24 00:43:45.018046: step: 606/459, loss: 1.043391227722168 2023-01-24 00:43:45.665040: step: 608/459, loss: 1.0832993984222412 2023-01-24 00:43:46.271965: step: 610/459, loss: 0.970702052116394 2023-01-24 00:43:46.849237: step: 612/459, loss: 0.37451431155204773 2023-01-24 00:43:47.424407: step: 614/459, loss: 0.9243134260177612 2023-01-24 00:43:48.002105: step: 616/459, loss: 2.0195934772491455 2023-01-24 00:43:48.658863: step: 618/459, loss: 1.1839174032211304 2023-01-24 00:43:49.295033: step: 620/459, loss: 0.7522904276847839 2023-01-24 00:43:49.918406: step: 622/459, loss: 0.35923314094543457 2023-01-24 00:43:50.535395: step: 624/459, loss: 0.8481441736221313 2023-01-24 00:43:51.121832: step: 626/459, loss: 0.26211681962013245 2023-01-24 00:43:51.759711: step: 628/459, loss: 0.394963800907135 2023-01-24 00:43:52.424688: step: 630/459, loss: 0.6135212779045105 2023-01-24 00:43:53.053383: step: 632/459, loss: 1.7926645278930664 2023-01-24 00:43:53.627991: step: 634/459, loss: 0.48849499225616455 2023-01-24 00:43:54.268529: step: 636/459, loss: 0.6761322617530823 2023-01-24 00:43:54.915105: step: 638/459, loss: 0.5180824995040894 2023-01-24 00:43:55.522084: step: 640/459, loss: 9.745084762573242 2023-01-24 00:43:56.167451: step: 642/459, loss: 0.24573953449726105 2023-01-24 00:43:56.788323: step: 644/459, loss: 0.8935332894325256 2023-01-24 00:43:57.428691: step: 646/459, loss: 0.2318364530801773 2023-01-24 00:43:58.045460: step: 648/459, loss: 1.0570734739303589 2023-01-24 00:43:58.699822: step: 650/459, loss: 0.7268792390823364 2023-01-24 00:43:59.346963: step: 652/459, loss: 5.192760467529297 2023-01-24 00:43:59.912052: step: 654/459, loss: 0.6227118968963623 2023-01-24 00:44:00.568537: step: 656/459, loss: 0.7543628215789795 2023-01-24 00:44:01.205705: step: 658/459, loss: 1.629045009613037 2023-01-24 00:44:01.785809: step: 660/459, loss: 0.4166935086250305 2023-01-24 00:44:02.479302: step: 662/459, loss: 0.6807982921600342 2023-01-24 00:44:03.063676: step: 664/459, loss: 0.2852991819381714 2023-01-24 00:44:03.711837: step: 666/459, loss: 1.0686405897140503 2023-01-24 00:44:04.349004: step: 668/459, loss: 0.2859634757041931 2023-01-24 00:44:04.942225: step: 670/459, loss: 0.8234341144561768 2023-01-24 00:44:05.587203: step: 672/459, loss: 1.495621681213379 2023-01-24 00:44:06.169462: step: 674/459, loss: 0.5951805114746094 2023-01-24 00:44:06.797768: step: 676/459, loss: 0.6973206400871277 2023-01-24 00:44:07.394209: step: 678/459, loss: 4.413264751434326 2023-01-24 00:44:08.014521: step: 680/459, loss: 0.4154735207557678 2023-01-24 00:44:08.656695: step: 682/459, loss: 0.4093332588672638 2023-01-24 00:44:09.274916: step: 684/459, loss: 0.8105847835540771 2023-01-24 00:44:09.917529: step: 686/459, loss: 0.4331948459148407 2023-01-24 00:44:10.576634: step: 688/459, loss: 1.1826088428497314 2023-01-24 00:44:11.169007: step: 690/459, loss: 0.3970656991004944 2023-01-24 00:44:11.839761: step: 692/459, loss: 0.8727794885635376 2023-01-24 00:44:12.442837: step: 694/459, loss: 1.1520880460739136 2023-01-24 00:44:13.051767: step: 696/459, loss: 0.36242038011550903 2023-01-24 00:44:13.586646: step: 698/459, loss: 1.0907694101333618 2023-01-24 00:44:14.218499: step: 700/459, loss: 0.41899827122688293 2023-01-24 00:44:14.833411: step: 702/459, loss: 0.766655683517456 2023-01-24 00:44:15.422666: step: 704/459, loss: 0.6376671195030212 2023-01-24 00:44:16.035404: step: 706/459, loss: 0.9438160061836243 2023-01-24 00:44:16.683349: step: 708/459, loss: 0.5267175436019897 2023-01-24 00:44:17.254618: step: 710/459, loss: 0.20388123393058777 2023-01-24 00:44:17.874028: step: 712/459, loss: 0.38563570380210876 2023-01-24 00:44:18.530493: step: 714/459, loss: 5.204307556152344 2023-01-24 00:44:19.131085: step: 716/459, loss: 0.3076077997684479 2023-01-24 00:44:19.730372: step: 718/459, loss: 0.3351518511772156 2023-01-24 00:44:20.435232: step: 720/459, loss: 3.576780319213867 2023-01-24 00:44:20.992036: step: 722/459, loss: 0.6101024150848389 2023-01-24 00:44:21.617458: step: 724/459, loss: 1.4035770893096924 2023-01-24 00:44:22.214625: step: 726/459, loss: 1.2133090496063232 2023-01-24 00:44:22.847258: step: 728/459, loss: 0.6771793961524963 2023-01-24 00:44:23.498518: step: 730/459, loss: 0.44875699281692505 2023-01-24 00:44:24.215876: step: 732/459, loss: 0.5516201853752136 2023-01-24 00:44:24.871258: step: 734/459, loss: 0.4276513159275055 2023-01-24 00:44:25.482341: step: 736/459, loss: 0.24110381305217743 2023-01-24 00:44:26.115432: step: 738/459, loss: 0.42566174268722534 2023-01-24 00:44:26.732856: step: 740/459, loss: 0.9404018521308899 2023-01-24 00:44:27.352582: step: 742/459, loss: 1.013853907585144 2023-01-24 00:44:27.917125: step: 744/459, loss: 0.38294100761413574 2023-01-24 00:44:28.522970: step: 746/459, loss: 1.4316648244857788 2023-01-24 00:44:29.147539: step: 748/459, loss: 0.7595584392547607 2023-01-24 00:44:29.779204: step: 750/459, loss: 1.879400372505188 2023-01-24 00:44:30.407440: step: 752/459, loss: 0.18808861076831818 2023-01-24 00:44:31.126597: step: 754/459, loss: 1.10379159450531 2023-01-24 00:44:31.771800: step: 756/459, loss: 1.2431814670562744 2023-01-24 00:44:32.458168: step: 758/459, loss: 0.9345978498458862 2023-01-24 00:44:33.116896: step: 760/459, loss: 5.754981994628906 2023-01-24 00:44:33.703420: step: 762/459, loss: 0.26981714367866516 2023-01-24 00:44:34.358533: step: 764/459, loss: 1.6212433576583862 2023-01-24 00:44:34.984343: step: 766/459, loss: 0.28336021304130554 2023-01-24 00:44:35.586672: step: 768/459, loss: 0.4618211090564728 2023-01-24 00:44:36.261559: step: 770/459, loss: 1.0353784561157227 2023-01-24 00:44:36.873246: step: 772/459, loss: 0.9823098182678223 2023-01-24 00:44:37.519810: step: 774/459, loss: 1.8900784254074097 2023-01-24 00:44:38.133605: step: 776/459, loss: 0.9211585521697998 2023-01-24 00:44:38.707996: step: 778/459, loss: 0.5357157588005066 2023-01-24 00:44:39.384585: step: 780/459, loss: 1.2675426006317139 2023-01-24 00:44:40.058879: step: 782/459, loss: 0.6971933841705322 2023-01-24 00:44:40.611059: step: 784/459, loss: 1.9861958026885986 2023-01-24 00:44:41.219953: step: 786/459, loss: 0.6751170754432678 2023-01-24 00:44:41.877321: step: 788/459, loss: 0.5696123838424683 2023-01-24 00:44:42.474144: step: 790/459, loss: 0.30573949217796326 2023-01-24 00:44:43.053296: step: 792/459, loss: 0.6465668082237244 2023-01-24 00:44:43.604474: step: 794/459, loss: 1.7045904397964478 2023-01-24 00:44:44.187386: step: 796/459, loss: 0.2648695111274719 2023-01-24 00:44:44.790841: step: 798/459, loss: 0.41576629877090454 2023-01-24 00:44:45.369731: step: 800/459, loss: 0.37653395533561707 2023-01-24 00:44:45.984614: step: 802/459, loss: 2.4922096729278564 2023-01-24 00:44:46.669815: step: 804/459, loss: 1.8762781620025635 2023-01-24 00:44:47.250228: step: 806/459, loss: 1.8196154832839966 2023-01-24 00:44:47.834515: step: 808/459, loss: 0.31795042753219604 2023-01-24 00:44:48.465156: step: 810/459, loss: 0.6771062612533569 2023-01-24 00:44:49.063422: step: 812/459, loss: 0.7452657222747803 2023-01-24 00:44:49.717863: step: 814/459, loss: 0.8732942342758179 2023-01-24 00:44:50.408566: step: 816/459, loss: 0.4537571370601654 2023-01-24 00:44:51.140027: step: 818/459, loss: 0.8070897459983826 2023-01-24 00:44:51.775084: step: 820/459, loss: 0.44524234533309937 2023-01-24 00:44:52.405754: step: 822/459, loss: 1.1815237998962402 2023-01-24 00:44:52.998100: step: 824/459, loss: 0.5039702653884888 2023-01-24 00:44:53.546064: step: 826/459, loss: 0.7102353572845459 2023-01-24 00:44:54.132189: step: 828/459, loss: 0.25164708495140076 2023-01-24 00:44:54.745435: step: 830/459, loss: 0.6092464327812195 2023-01-24 00:44:55.330120: step: 832/459, loss: 1.2027238607406616 2023-01-24 00:44:55.991422: step: 834/459, loss: 1.4041070938110352 2023-01-24 00:44:56.606202: step: 836/459, loss: 0.5456593632698059 2023-01-24 00:44:57.175679: step: 838/459, loss: 0.29356956481933594 2023-01-24 00:44:57.767681: step: 840/459, loss: 1.1257473230361938 2023-01-24 00:44:58.375510: step: 842/459, loss: 0.14699606597423553 2023-01-24 00:44:59.014573: step: 844/459, loss: 0.3083524703979492 2023-01-24 00:44:59.593658: step: 846/459, loss: 2.6452484130859375 2023-01-24 00:45:00.223837: step: 848/459, loss: 0.3331630825996399 2023-01-24 00:45:00.956813: step: 850/459, loss: 0.7747106552124023 2023-01-24 00:45:01.520414: step: 852/459, loss: 0.5412964820861816 2023-01-24 00:45:02.173851: step: 854/459, loss: 0.5587515830993652 2023-01-24 00:45:02.802526: step: 856/459, loss: 0.7291297912597656 2023-01-24 00:45:03.428119: step: 858/459, loss: 0.5207776427268982 2023-01-24 00:45:04.017873: step: 860/459, loss: 1.241825819015503 2023-01-24 00:45:04.681023: step: 862/459, loss: 4.7374138832092285 2023-01-24 00:45:05.312508: step: 864/459, loss: 0.5988766551017761 2023-01-24 00:45:05.928808: step: 866/459, loss: 0.7711248397827148 2023-01-24 00:45:06.553946: step: 868/459, loss: 0.954521656036377 2023-01-24 00:45:07.152794: step: 870/459, loss: 0.4965534806251526 2023-01-24 00:45:07.819165: step: 872/459, loss: 0.5743597149848938 2023-01-24 00:45:08.413541: step: 874/459, loss: 0.694887638092041 2023-01-24 00:45:09.034394: step: 876/459, loss: 2.537919521331787 2023-01-24 00:45:09.641807: step: 878/459, loss: 0.794692873954773 2023-01-24 00:45:10.225188: step: 880/459, loss: 0.9252917170524597 2023-01-24 00:45:10.865501: step: 882/459, loss: 1.592782735824585 2023-01-24 00:45:11.527552: step: 884/459, loss: 1.9193388223648071 2023-01-24 00:45:12.217583: step: 886/459, loss: 0.9571017026901245 2023-01-24 00:45:12.917105: step: 888/459, loss: 0.5261251330375671 2023-01-24 00:45:13.501667: step: 890/459, loss: 0.2237534075975418 2023-01-24 00:45:14.121792: step: 892/459, loss: 0.659477949142456 2023-01-24 00:45:14.741622: step: 894/459, loss: 0.8059289455413818 2023-01-24 00:45:15.432027: step: 896/459, loss: 0.3841007351875305 2023-01-24 00:45:16.163281: step: 898/459, loss: 0.26297727227211 2023-01-24 00:45:16.831606: step: 900/459, loss: 0.30575311183929443 2023-01-24 00:45:17.470303: step: 902/459, loss: 0.33176180720329285 2023-01-24 00:45:18.117159: step: 904/459, loss: 1.5700769424438477 2023-01-24 00:45:18.675962: step: 906/459, loss: 0.640556812286377 2023-01-24 00:45:19.288527: step: 908/459, loss: 1.149547815322876 2023-01-24 00:45:19.988525: step: 910/459, loss: 0.732848584651947 2023-01-24 00:45:20.676570: step: 912/459, loss: 0.7447565793991089 2023-01-24 00:45:21.283648: step: 914/459, loss: 0.3528202474117279 2023-01-24 00:45:21.888897: step: 916/459, loss: 0.643644392490387 2023-01-24 00:45:22.509509: step: 918/459, loss: 0.8994807004928589 2023-01-24 00:45:22.960781: step: 920/459, loss: 0.0029027729760855436 ================================================== Loss: 0.900 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3420992658657132, 'r': 0.32067749020429287, 'f1': 0.331042188712365}, 'combined': 0.24392582326174264, 'epoch': 5} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3383810640925978, 'r': 0.2494791299809971, 'f1': 0.28720778961705584}, 'combined': 0.18381298535491572, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3479996432456939, 'r': 0.3248877124798508, 'f1': 0.3360467605041833}, 'combined': 0.24761340247676664, 'epoch': 5} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.33797068513604633, 'r': 0.2628660884391471, 'f1': 0.29572434949404053}, 'combined': 0.21202877888251964, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2660984848484848, 'r': 0.3345238095238095, 'f1': 0.29641350210970463}, 'combined': 0.19760900140646975, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.10344827586206896, 'f1': 0.15789473684210528}, 'combined': 0.10526315789473685, 'epoch': 5} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3420992658657132, 'r': 0.32067749020429287, 'f1': 0.331042188712365}, 'combined': 0.24392582326174264, 'epoch': 5} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3383810640925978, 'r': 0.2494791299809971, 'f1': 0.28720778961705584}, 'combined': 0.18381298535491572, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2660984848484848, 'r': 0.3345238095238095, 'f1': 0.29641350210970463}, 'combined': 0.19760900140646975, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3125609955204797, 'r': 0.29657776279500064, 'f1': 0.30435968660011725}, 'combined': 0.22426503223166533, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34660410768177247, 'r': 0.27822857007545915, 'f1': 0.30867516599395367}, 'combined': 0.22131426995792908, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3958333333333333, 'r': 0.16379310344827586, 'f1': 0.23170731707317074}, 'combined': 0.15447154471544716, 'epoch': 4} ****************************** Epoch: 6 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:48:11.897024: step: 2/459, loss: 0.3404897451400757 2023-01-24 00:48:12.496566: step: 4/459, loss: 0.3993445932865143 2023-01-24 00:48:13.111590: step: 6/459, loss: 1.0720512866973877 2023-01-24 00:48:13.801344: step: 8/459, loss: 0.9146129488945007 2023-01-24 00:48:14.401345: step: 10/459, loss: 0.5998106002807617 2023-01-24 00:48:15.008901: step: 12/459, loss: 0.2391183227300644 2023-01-24 00:48:15.595154: step: 14/459, loss: 0.7937113046646118 2023-01-24 00:48:16.295782: step: 16/459, loss: 0.2013617306947708 2023-01-24 00:48:16.912864: step: 18/459, loss: 0.3372349739074707 2023-01-24 00:48:17.509367: step: 20/459, loss: 1.1121702194213867 2023-01-24 00:48:18.168970: step: 22/459, loss: 1.0827711820602417 2023-01-24 00:48:18.799688: step: 24/459, loss: 0.19218827784061432 2023-01-24 00:48:19.434602: step: 26/459, loss: 1.871245265007019 2023-01-24 00:48:20.042110: step: 28/459, loss: 0.34299689531326294 2023-01-24 00:48:20.642483: step: 30/459, loss: 0.412228524684906 2023-01-24 00:48:21.281667: step: 32/459, loss: 1.0236657857894897 2023-01-24 00:48:21.820842: step: 34/459, loss: 0.32984787225723267 2023-01-24 00:48:22.461497: step: 36/459, loss: 0.2973814010620117 2023-01-24 00:48:23.072429: step: 38/459, loss: 0.2596169412136078 2023-01-24 00:48:23.701049: step: 40/459, loss: 0.658793032169342 2023-01-24 00:48:24.274532: step: 42/459, loss: 0.3969990909099579 2023-01-24 00:48:24.901604: step: 44/459, loss: 0.5447830557823181 2023-01-24 00:48:25.503567: step: 46/459, loss: 0.9561908841133118 2023-01-24 00:48:26.198939: step: 48/459, loss: 0.6440781950950623 2023-01-24 00:48:26.831137: step: 50/459, loss: 0.22351990640163422 2023-01-24 00:48:27.414610: step: 52/459, loss: 0.4453563988208771 2023-01-24 00:48:28.043165: step: 54/459, loss: 0.5085197687149048 2023-01-24 00:48:28.608427: step: 56/459, loss: 0.21888507902622223 2023-01-24 00:48:29.230886: step: 58/459, loss: 0.9013737440109253 2023-01-24 00:48:29.872515: step: 60/459, loss: 2.7046022415161133 2023-01-24 00:48:30.632392: step: 62/459, loss: 0.5445334911346436 2023-01-24 00:48:31.234447: step: 64/459, loss: 0.5855793356895447 2023-01-24 00:48:31.826770: step: 66/459, loss: 0.6187131404876709 2023-01-24 00:48:32.424258: step: 68/459, loss: 0.2826734781265259 2023-01-24 00:48:33.066369: step: 70/459, loss: 0.4370558261871338 2023-01-24 00:48:33.650998: step: 72/459, loss: 0.7797702550888062 2023-01-24 00:48:34.333698: step: 74/459, loss: 0.856144905090332 2023-01-24 00:48:34.941487: step: 76/459, loss: 0.27525249123573303 2023-01-24 00:48:35.646973: step: 78/459, loss: 0.3428467810153961 2023-01-24 00:48:36.281245: step: 80/459, loss: 0.4608880877494812 2023-01-24 00:48:36.853777: step: 82/459, loss: 0.36851176619529724 2023-01-24 00:48:37.444267: step: 84/459, loss: 0.30663827061653137 2023-01-24 00:48:38.036850: step: 86/459, loss: 0.3170296549797058 2023-01-24 00:48:38.675080: step: 88/459, loss: 0.7406253814697266 2023-01-24 00:48:39.256943: step: 90/459, loss: 0.5368050932884216 2023-01-24 00:48:39.889057: step: 92/459, loss: 1.3657344579696655 2023-01-24 00:48:40.443573: step: 94/459, loss: 2.101508140563965 2023-01-24 00:48:40.997550: step: 96/459, loss: 2.008885622024536 2023-01-24 00:48:41.597372: step: 98/459, loss: 0.5314332842826843 2023-01-24 00:48:42.179025: step: 100/459, loss: 0.7093220949172974 2023-01-24 00:48:42.764584: step: 102/459, loss: 0.4726252555847168 2023-01-24 00:48:43.428336: step: 104/459, loss: 0.5273478627204895 2023-01-24 00:48:43.972628: step: 106/459, loss: 0.9441933035850525 2023-01-24 00:48:44.640378: step: 108/459, loss: 0.2768445611000061 2023-01-24 00:48:45.191956: step: 110/459, loss: 0.3269539475440979 2023-01-24 00:48:45.864314: step: 112/459, loss: 0.2701319456100464 2023-01-24 00:48:46.435170: step: 114/459, loss: 0.2562897801399231 2023-01-24 00:48:47.046994: step: 116/459, loss: 0.6690642833709717 2023-01-24 00:48:47.634757: step: 118/459, loss: 0.31999242305755615 2023-01-24 00:48:48.259515: step: 120/459, loss: 1.394294023513794 2023-01-24 00:48:48.877911: step: 122/459, loss: 0.5762302279472351 2023-01-24 00:48:49.496304: step: 124/459, loss: 1.0574184656143188 2023-01-24 00:48:50.057870: step: 126/459, loss: 1.8061827421188354 2023-01-24 00:48:50.675900: step: 128/459, loss: 0.37834274768829346 2023-01-24 00:48:51.376798: step: 130/459, loss: 0.18257318437099457 2023-01-24 00:48:52.001700: step: 132/459, loss: 0.6305859684944153 2023-01-24 00:48:52.719727: step: 134/459, loss: 0.29179129004478455 2023-01-24 00:48:53.300750: step: 136/459, loss: 0.49877816438674927 2023-01-24 00:48:53.885143: step: 138/459, loss: 0.19586192071437836 2023-01-24 00:48:54.489707: step: 140/459, loss: 0.32382532954216003 2023-01-24 00:48:55.227576: step: 142/459, loss: 0.5080638527870178 2023-01-24 00:48:55.827106: step: 144/459, loss: 0.17365233600139618 2023-01-24 00:48:56.449026: step: 146/459, loss: 0.1539848893880844 2023-01-24 00:48:57.063614: step: 148/459, loss: 0.31484097242355347 2023-01-24 00:48:57.650793: step: 150/459, loss: 0.3517445921897888 2023-01-24 00:48:58.271778: step: 152/459, loss: 0.8139705657958984 2023-01-24 00:48:58.956789: step: 154/459, loss: 0.5890936851501465 2023-01-24 00:48:59.622629: step: 156/459, loss: 0.2747838795185089 2023-01-24 00:49:00.263834: step: 158/459, loss: 0.8587355017662048 2023-01-24 00:49:00.866200: step: 160/459, loss: 0.32148146629333496 2023-01-24 00:49:01.492943: step: 162/459, loss: 1.5100023746490479 2023-01-24 00:49:02.142423: step: 164/459, loss: 0.5357568860054016 2023-01-24 00:49:02.703714: step: 166/459, loss: 0.200309619307518 2023-01-24 00:49:03.317851: step: 168/459, loss: 1.5336840152740479 2023-01-24 00:49:03.930474: step: 170/459, loss: 0.17371316254138947 2023-01-24 00:49:04.515074: step: 172/459, loss: 0.33880847692489624 2023-01-24 00:49:05.140972: step: 174/459, loss: 0.3481345474720001 2023-01-24 00:49:05.812169: step: 176/459, loss: 0.6612433791160583 2023-01-24 00:49:06.514643: step: 178/459, loss: 0.2732468545436859 2023-01-24 00:49:07.162690: step: 180/459, loss: 0.8776035904884338 2023-01-24 00:49:07.807097: step: 182/459, loss: 0.3088020384311676 2023-01-24 00:49:08.406010: step: 184/459, loss: 0.0927431583404541 2023-01-24 00:49:09.139388: step: 186/459, loss: 0.7991250157356262 2023-01-24 00:49:09.743715: step: 188/459, loss: 0.3279609680175781 2023-01-24 00:49:10.367763: step: 190/459, loss: 0.27734288573265076 2023-01-24 00:49:10.995526: step: 192/459, loss: 0.31704679131507874 2023-01-24 00:49:11.599842: step: 194/459, loss: 2.1975414752960205 2023-01-24 00:49:12.255247: step: 196/459, loss: 1.182508945465088 2023-01-24 00:49:12.826137: step: 198/459, loss: 0.12424883991479874 2023-01-24 00:49:13.519961: step: 200/459, loss: 0.8645827174186707 2023-01-24 00:49:14.134767: step: 202/459, loss: 0.3041526675224304 2023-01-24 00:49:14.738677: step: 204/459, loss: 0.657402753829956 2023-01-24 00:49:15.343703: step: 206/459, loss: 0.8738991022109985 2023-01-24 00:49:16.020005: step: 208/459, loss: 0.28925588726997375 2023-01-24 00:49:16.614766: step: 210/459, loss: 0.46913835406303406 2023-01-24 00:49:17.256477: step: 212/459, loss: 0.5695966482162476 2023-01-24 00:49:17.916505: step: 214/459, loss: 0.2092171013355255 2023-01-24 00:49:18.669682: step: 216/459, loss: 1.0854508876800537 2023-01-24 00:49:19.296113: step: 218/459, loss: 0.9071727395057678 2023-01-24 00:49:19.934320: step: 220/459, loss: 0.6385606527328491 2023-01-24 00:49:20.561594: step: 222/459, loss: 0.8240488767623901 2023-01-24 00:49:21.121099: step: 224/459, loss: 0.3217650353908539 2023-01-24 00:49:21.717754: step: 226/459, loss: 0.3348287045955658 2023-01-24 00:49:22.376502: step: 228/459, loss: 0.9553728103637695 2023-01-24 00:49:22.969552: step: 230/459, loss: 0.20827718079090118 2023-01-24 00:49:23.628428: step: 232/459, loss: 0.8579831123352051 2023-01-24 00:49:24.216198: step: 234/459, loss: 2.642120361328125 2023-01-24 00:49:24.822765: step: 236/459, loss: 0.1856175661087036 2023-01-24 00:49:25.458306: step: 238/459, loss: 0.2845785617828369 2023-01-24 00:49:26.097780: step: 240/459, loss: 0.3059217929840088 2023-01-24 00:49:26.712094: step: 242/459, loss: 1.0357730388641357 2023-01-24 00:49:27.326555: step: 244/459, loss: 0.4379543364048004 2023-01-24 00:49:27.971467: step: 246/459, loss: 0.5856521129608154 2023-01-24 00:49:28.556527: step: 248/459, loss: 0.5762928128242493 2023-01-24 00:49:29.125457: step: 250/459, loss: 0.9160224199295044 2023-01-24 00:49:29.758884: step: 252/459, loss: 0.47633934020996094 2023-01-24 00:49:30.429482: step: 254/459, loss: 0.5144652128219604 2023-01-24 00:49:31.061784: step: 256/459, loss: 0.27219870686531067 2023-01-24 00:49:31.667072: step: 258/459, loss: 0.9914804697036743 2023-01-24 00:49:32.292775: step: 260/459, loss: 0.9181454181671143 2023-01-24 00:49:32.942828: step: 262/459, loss: 1.0782577991485596 2023-01-24 00:49:33.532397: step: 264/459, loss: 0.8498926162719727 2023-01-24 00:49:34.098916: step: 266/459, loss: 1.4435192346572876 2023-01-24 00:49:34.666704: step: 268/459, loss: 0.24223601818084717 2023-01-24 00:49:35.253702: step: 270/459, loss: 1.3100476264953613 2023-01-24 00:49:35.849449: step: 272/459, loss: 2.295886754989624 2023-01-24 00:49:36.477955: step: 274/459, loss: 0.3300009071826935 2023-01-24 00:49:37.102643: step: 276/459, loss: 0.49039706587791443 2023-01-24 00:49:37.749974: step: 278/459, loss: 0.36507850885391235 2023-01-24 00:49:38.384321: step: 280/459, loss: 0.9330970048904419 2023-01-24 00:49:38.959860: step: 282/459, loss: 2.5044729709625244 2023-01-24 00:49:39.639269: step: 284/459, loss: 0.9656766653060913 2023-01-24 00:49:40.208297: step: 286/459, loss: 0.610763430595398 2023-01-24 00:49:40.832766: step: 288/459, loss: 0.9101207256317139 2023-01-24 00:49:41.529272: step: 290/459, loss: 0.811351478099823 2023-01-24 00:49:42.169731: step: 292/459, loss: 0.2169988751411438 2023-01-24 00:49:42.776208: step: 294/459, loss: 0.28468021750450134 2023-01-24 00:49:43.376385: step: 296/459, loss: 0.5222878456115723 2023-01-24 00:49:44.018153: step: 298/459, loss: 0.2430160492658615 2023-01-24 00:49:44.602459: step: 300/459, loss: 0.49812206625938416 2023-01-24 00:49:45.180724: step: 302/459, loss: 0.7962859272956848 2023-01-24 00:49:45.813584: step: 304/459, loss: 0.47871842980384827 2023-01-24 00:49:46.447437: step: 306/459, loss: 0.10140848159790039 2023-01-24 00:49:47.063562: step: 308/459, loss: 0.7755213975906372 2023-01-24 00:49:47.657769: step: 310/459, loss: 0.3561386466026306 2023-01-24 00:49:48.249378: step: 312/459, loss: 0.2589372992515564 2023-01-24 00:49:48.852864: step: 314/459, loss: 2.1230077743530273 2023-01-24 00:49:49.480016: step: 316/459, loss: 0.21782715618610382 2023-01-24 00:49:50.201774: step: 318/459, loss: 0.8292707204818726 2023-01-24 00:49:50.777010: step: 320/459, loss: 0.7143049240112305 2023-01-24 00:49:51.445235: step: 322/459, loss: 0.31740376353263855 2023-01-24 00:49:52.046557: step: 324/459, loss: 0.5817620158195496 2023-01-24 00:49:52.667157: step: 326/459, loss: 0.7270742654800415 2023-01-24 00:49:53.261468: step: 328/459, loss: 0.7938922047615051 2023-01-24 00:49:53.875419: step: 330/459, loss: 0.4994628429412842 2023-01-24 00:49:54.523423: step: 332/459, loss: 0.2412312924861908 2023-01-24 00:49:55.062498: step: 334/459, loss: 0.36995673179626465 2023-01-24 00:49:55.672919: step: 336/459, loss: 0.46129924058914185 2023-01-24 00:49:56.352739: step: 338/459, loss: 9.791744232177734 2023-01-24 00:49:56.972661: step: 340/459, loss: 0.2770744264125824 2023-01-24 00:49:57.550642: step: 342/459, loss: 0.40765219926834106 2023-01-24 00:49:58.198634: step: 344/459, loss: 0.6366263628005981 2023-01-24 00:49:58.850266: step: 346/459, loss: 0.5911754965782166 2023-01-24 00:49:59.436715: step: 348/459, loss: 1.9898974895477295 2023-01-24 00:50:00.094759: step: 350/459, loss: 0.5940861701965332 2023-01-24 00:50:00.732802: step: 352/459, loss: 0.7863019704818726 2023-01-24 00:50:01.309962: step: 354/459, loss: 0.6435151100158691 2023-01-24 00:50:01.899627: step: 356/459, loss: 0.4073781669139862 2023-01-24 00:50:02.531970: step: 358/459, loss: 0.2378418892621994 2023-01-24 00:50:03.145647: step: 360/459, loss: 0.4949759840965271 2023-01-24 00:50:03.786772: step: 362/459, loss: 0.8044207096099854 2023-01-24 00:50:04.392350: step: 364/459, loss: 0.2739481031894684 2023-01-24 00:50:05.022306: step: 366/459, loss: 1.6759138107299805 2023-01-24 00:50:05.621991: step: 368/459, loss: 0.2754996418952942 2023-01-24 00:50:06.260804: step: 370/459, loss: 0.24065321683883667 2023-01-24 00:50:06.887430: step: 372/459, loss: 0.3830952048301697 2023-01-24 00:50:07.450056: step: 374/459, loss: 0.7827904224395752 2023-01-24 00:50:08.063722: step: 376/459, loss: 0.38260209560394287 2023-01-24 00:50:08.697898: step: 378/459, loss: 0.4666784405708313 2023-01-24 00:50:09.345050: step: 380/459, loss: 0.684164822101593 2023-01-24 00:50:09.978826: step: 382/459, loss: 0.45347100496292114 2023-01-24 00:50:10.603469: step: 384/459, loss: 0.8647016882896423 2023-01-24 00:50:11.251344: step: 386/459, loss: 0.46838682889938354 2023-01-24 00:50:11.866918: step: 388/459, loss: 0.22502900660037994 2023-01-24 00:50:12.455216: step: 390/459, loss: 0.21021461486816406 2023-01-24 00:50:13.048122: step: 392/459, loss: 0.740106999874115 2023-01-24 00:50:13.658022: step: 394/459, loss: 0.5784661769866943 2023-01-24 00:50:14.255760: step: 396/459, loss: 0.6859101057052612 2023-01-24 00:50:14.884487: step: 398/459, loss: 0.212942972779274 2023-01-24 00:50:15.454285: step: 400/459, loss: 0.23484578728675842 2023-01-24 00:50:16.048530: step: 402/459, loss: 0.2542991638183594 2023-01-24 00:50:16.610261: step: 404/459, loss: 0.5848116278648376 2023-01-24 00:50:17.242359: step: 406/459, loss: 0.7784409523010254 2023-01-24 00:50:17.846845: step: 408/459, loss: 0.8925454020500183 2023-01-24 00:50:18.416951: step: 410/459, loss: 0.5651633739471436 2023-01-24 00:50:19.055390: step: 412/459, loss: 0.812149167060852 2023-01-24 00:50:19.685414: step: 414/459, loss: 0.5169521570205688 2023-01-24 00:50:20.400347: step: 416/459, loss: 0.9354329109191895 2023-01-24 00:50:20.995319: step: 418/459, loss: 0.33929744362831116 2023-01-24 00:50:21.642306: step: 420/459, loss: 0.22303368151187897 2023-01-24 00:50:22.250114: step: 422/459, loss: 1.2103171348571777 2023-01-24 00:50:22.834084: step: 424/459, loss: 0.33703598380088806 2023-01-24 00:50:23.490924: step: 426/459, loss: 0.35393232107162476 2023-01-24 00:50:24.145030: step: 428/459, loss: 0.5806617736816406 2023-01-24 00:50:24.841537: step: 430/459, loss: 0.784831166267395 2023-01-24 00:50:25.434839: step: 432/459, loss: 0.9833120107650757 2023-01-24 00:50:26.078008: step: 434/459, loss: 0.6225362420082092 2023-01-24 00:50:26.741490: step: 436/459, loss: 1.3320690393447876 2023-01-24 00:50:27.392661: step: 438/459, loss: 0.7534404397010803 2023-01-24 00:50:28.028445: step: 440/459, loss: 0.20451362431049347 2023-01-24 00:50:28.644568: step: 442/459, loss: 0.5430547595024109 2023-01-24 00:50:29.254748: step: 444/459, loss: 0.3987220823764801 2023-01-24 00:50:29.834134: step: 446/459, loss: 0.4369630813598633 2023-01-24 00:50:30.484743: step: 448/459, loss: 0.8301814794540405 2023-01-24 00:50:31.074895: step: 450/459, loss: 0.8947480320930481 2023-01-24 00:50:31.720867: step: 452/459, loss: 1.4286450147628784 2023-01-24 00:50:32.376976: step: 454/459, loss: 0.3729023039340973 2023-01-24 00:50:32.997037: step: 456/459, loss: 0.736352801322937 2023-01-24 00:50:33.629138: step: 458/459, loss: 0.6933873295783997 2023-01-24 00:50:34.276568: step: 460/459, loss: 0.19718407094478607 2023-01-24 00:50:35.035434: step: 462/459, loss: 0.3349815905094147 2023-01-24 00:50:35.585764: step: 464/459, loss: 0.9142324924468994 2023-01-24 00:50:36.196702: step: 466/459, loss: 0.9287815093994141 2023-01-24 00:50:36.772645: step: 468/459, loss: 0.4835042953491211 2023-01-24 00:50:37.380917: step: 470/459, loss: 0.71302729845047 2023-01-24 00:50:38.023825: step: 472/459, loss: 0.2645386755466461 2023-01-24 00:50:38.747341: step: 474/459, loss: 0.6493459939956665 2023-01-24 00:50:39.415658: step: 476/459, loss: 0.3974875211715698 2023-01-24 00:50:39.986913: step: 478/459, loss: 0.3688303232192993 2023-01-24 00:50:40.608980: step: 480/459, loss: 0.19277414679527283 2023-01-24 00:50:41.213819: step: 482/459, loss: 0.14478661119937897 2023-01-24 00:50:41.826923: step: 484/459, loss: 0.19952693581581116 2023-01-24 00:50:42.431311: step: 486/459, loss: 0.18047650158405304 2023-01-24 00:50:43.080917: step: 488/459, loss: 0.34557104110717773 2023-01-24 00:50:43.711509: step: 490/459, loss: 0.5280262231826782 2023-01-24 00:50:44.272667: step: 492/459, loss: 0.887000560760498 2023-01-24 00:50:44.876906: step: 494/459, loss: 0.2418712079524994 2023-01-24 00:50:45.535058: step: 496/459, loss: 0.8627141118049622 2023-01-24 00:50:46.204558: step: 498/459, loss: 0.32760846614837646 2023-01-24 00:50:46.769514: step: 500/459, loss: 1.134730577468872 2023-01-24 00:50:47.330993: step: 502/459, loss: 0.9593404531478882 2023-01-24 00:50:47.949862: step: 504/459, loss: 1.1005994081497192 2023-01-24 00:50:48.553539: step: 506/459, loss: 0.5202370882034302 2023-01-24 00:50:49.174333: step: 508/459, loss: 0.9835485816001892 2023-01-24 00:50:49.788944: step: 510/459, loss: 0.6101159453392029 2023-01-24 00:50:50.404039: step: 512/459, loss: 0.3262729346752167 2023-01-24 00:50:51.008868: step: 514/459, loss: 0.7127304077148438 2023-01-24 00:50:51.719957: step: 516/459, loss: 0.5184987783432007 2023-01-24 00:50:52.322280: step: 518/459, loss: 0.1847616732120514 2023-01-24 00:50:52.961137: step: 520/459, loss: 1.2310789823532104 2023-01-24 00:50:53.618062: step: 522/459, loss: 0.16002288460731506 2023-01-24 00:50:54.260189: step: 524/459, loss: 0.4043094515800476 2023-01-24 00:50:54.842246: step: 526/459, loss: 0.316740483045578 2023-01-24 00:50:55.465977: step: 528/459, loss: 0.33766359090805054 2023-01-24 00:50:56.064565: step: 530/459, loss: 0.4942038655281067 2023-01-24 00:50:56.700175: step: 532/459, loss: 0.5698519349098206 2023-01-24 00:50:57.399445: step: 534/459, loss: 0.5358213186264038 2023-01-24 00:50:58.135308: step: 536/459, loss: 0.3858981430530548 2023-01-24 00:50:58.726464: step: 538/459, loss: 1.3829197883605957 2023-01-24 00:50:59.383827: step: 540/459, loss: 0.4551105499267578 2023-01-24 00:51:00.007939: step: 542/459, loss: 0.25362101197242737 2023-01-24 00:51:00.613963: step: 544/459, loss: 0.8836112022399902 2023-01-24 00:51:01.237418: step: 546/459, loss: 0.21327312290668488 2023-01-24 00:51:01.876974: step: 548/459, loss: 0.37403059005737305 2023-01-24 00:51:02.438445: step: 550/459, loss: 0.1915343850851059 2023-01-24 00:51:03.034911: step: 552/459, loss: 0.463436484336853 2023-01-24 00:51:03.696173: step: 554/459, loss: 0.6088221669197083 2023-01-24 00:51:04.413439: step: 556/459, loss: 0.5528003573417664 2023-01-24 00:51:05.038547: step: 558/459, loss: 0.8970747590065002 2023-01-24 00:51:05.683055: step: 560/459, loss: 0.1441963165998459 2023-01-24 00:51:06.312516: step: 562/459, loss: 0.39651456475257874 2023-01-24 00:51:06.866665: step: 564/459, loss: 0.7788552641868591 2023-01-24 00:51:07.461180: step: 566/459, loss: 1.3356953859329224 2023-01-24 00:51:08.062782: step: 568/459, loss: 0.6358245015144348 2023-01-24 00:51:08.684155: step: 570/459, loss: 0.3151633143424988 2023-01-24 00:51:09.390485: step: 572/459, loss: 0.3682290017604828 2023-01-24 00:51:10.071438: step: 574/459, loss: 0.5264793634414673 2023-01-24 00:51:10.732068: step: 576/459, loss: 0.23814167082309723 2023-01-24 00:51:11.325826: step: 578/459, loss: 0.28445345163345337 2023-01-24 00:51:11.906085: step: 580/459, loss: 0.3681282699108124 2023-01-24 00:51:12.562825: step: 582/459, loss: 1.2784503698349 2023-01-24 00:51:13.193104: step: 584/459, loss: 0.9397075176239014 2023-01-24 00:51:13.834555: step: 586/459, loss: 0.36048585176467896 2023-01-24 00:51:14.399503: step: 588/459, loss: 0.6936590671539307 2023-01-24 00:51:14.976140: step: 590/459, loss: 0.18044055998325348 2023-01-24 00:51:15.613433: step: 592/459, loss: 0.5482950210571289 2023-01-24 00:51:16.196664: step: 594/459, loss: 0.3720829486846924 2023-01-24 00:51:16.849147: step: 596/459, loss: 0.329839289188385 2023-01-24 00:51:17.432661: step: 598/459, loss: 1.0255767107009888 2023-01-24 00:51:18.059804: step: 600/459, loss: 1.1389281749725342 2023-01-24 00:51:18.671136: step: 602/459, loss: 0.25199180841445923 2023-01-24 00:51:19.278479: step: 604/459, loss: 0.518252968788147 2023-01-24 00:51:19.885622: step: 606/459, loss: 1.1184337139129639 2023-01-24 00:51:20.495362: step: 608/459, loss: 0.21473319828510284 2023-01-24 00:51:21.136761: step: 610/459, loss: 0.40481510758399963 2023-01-24 00:51:21.754179: step: 612/459, loss: 0.51793372631073 2023-01-24 00:51:22.354014: step: 614/459, loss: 0.8637856245040894 2023-01-24 00:51:23.003581: step: 616/459, loss: 0.4353800415992737 2023-01-24 00:51:23.604403: step: 618/459, loss: 1.2602522373199463 2023-01-24 00:51:24.264849: step: 620/459, loss: 0.4602583944797516 2023-01-24 00:51:24.938683: step: 622/459, loss: 0.13622687757015228 2023-01-24 00:51:25.651960: step: 624/459, loss: 0.49223047494888306 2023-01-24 00:51:26.260505: step: 626/459, loss: 1.178909182548523 2023-01-24 00:51:26.853595: step: 628/459, loss: 0.18967212736606598 2023-01-24 00:51:27.472535: step: 630/459, loss: 0.6049370169639587 2023-01-24 00:51:28.057337: step: 632/459, loss: 1.723682165145874 2023-01-24 00:51:28.708126: step: 634/459, loss: 0.25674501061439514 2023-01-24 00:51:29.298747: step: 636/459, loss: 0.5015274882316589 2023-01-24 00:51:29.902508: step: 638/459, loss: 0.43934839963912964 2023-01-24 00:51:30.490939: step: 640/459, loss: 0.2769814133644104 2023-01-24 00:51:31.102548: step: 642/459, loss: 1.16580069065094 2023-01-24 00:51:31.742039: step: 644/459, loss: 0.23926465213298798 2023-01-24 00:51:32.349576: step: 646/459, loss: 1.014899730682373 2023-01-24 00:51:32.935788: step: 648/459, loss: 0.8055210709571838 2023-01-24 00:51:33.642728: step: 650/459, loss: 0.2735183835029602 2023-01-24 00:51:34.255912: step: 652/459, loss: 0.35930293798446655 2023-01-24 00:51:34.946821: step: 654/459, loss: 0.30359557271003723 2023-01-24 00:51:35.539832: step: 656/459, loss: 0.23514346778392792 2023-01-24 00:51:36.231734: step: 658/459, loss: 0.9010862112045288 2023-01-24 00:51:36.866508: step: 660/459, loss: 0.572005569934845 2023-01-24 00:51:37.479951: step: 662/459, loss: 0.30039674043655396 2023-01-24 00:51:38.121910: step: 664/459, loss: 0.43786779046058655 2023-01-24 00:51:38.754899: step: 666/459, loss: 0.744214653968811 2023-01-24 00:51:39.350066: step: 668/459, loss: 0.4379722774028778 2023-01-24 00:51:39.952634: step: 670/459, loss: 0.7706655263900757 2023-01-24 00:51:40.558383: step: 672/459, loss: 0.1354086995124817 2023-01-24 00:51:41.176025: step: 674/459, loss: 2.4007184505462646 2023-01-24 00:51:41.891346: step: 676/459, loss: 0.3333797752857208 2023-01-24 00:51:42.526710: step: 678/459, loss: 0.9428128004074097 2023-01-24 00:51:43.163712: step: 680/459, loss: 0.2940179705619812 2023-01-24 00:51:43.764231: step: 682/459, loss: 0.2934315502643585 2023-01-24 00:51:44.358695: step: 684/459, loss: 0.4018338918685913 2023-01-24 00:51:44.968401: step: 686/459, loss: 0.1581418514251709 2023-01-24 00:51:45.637156: step: 688/459, loss: 0.9462018609046936 2023-01-24 00:51:46.248194: step: 690/459, loss: 0.4194997549057007 2023-01-24 00:51:46.862415: step: 692/459, loss: 0.29298898577690125 2023-01-24 00:51:47.531114: step: 694/459, loss: 0.33721861243247986 2023-01-24 00:51:48.088653: step: 696/459, loss: 0.5258423686027527 2023-01-24 00:51:48.648862: step: 698/459, loss: 0.2714780271053314 2023-01-24 00:51:49.389241: step: 700/459, loss: 1.1637182235717773 2023-01-24 00:51:50.003103: step: 702/459, loss: 0.2765747904777527 2023-01-24 00:51:50.593682: step: 704/459, loss: 0.29649263620376587 2023-01-24 00:51:51.294666: step: 706/459, loss: 0.46207550168037415 2023-01-24 00:51:51.896840: step: 708/459, loss: 0.3084617853164673 2023-01-24 00:51:52.465549: step: 710/459, loss: 0.61458820104599 2023-01-24 00:51:53.183328: step: 712/459, loss: 0.7908852100372314 2023-01-24 00:51:53.793704: step: 714/459, loss: 0.9401839971542358 2023-01-24 00:51:54.380943: step: 716/459, loss: 0.7449376583099365 2023-01-24 00:51:55.017131: step: 718/459, loss: 0.3753925561904907 2023-01-24 00:51:55.672689: step: 720/459, loss: 0.7923146486282349 2023-01-24 00:51:56.295055: step: 722/459, loss: 0.5417886972427368 2023-01-24 00:51:56.893925: step: 724/459, loss: 1.6221294403076172 2023-01-24 00:51:57.536264: step: 726/459, loss: 0.3310430943965912 2023-01-24 00:51:58.230554: step: 728/459, loss: 0.8590459823608398 2023-01-24 00:51:58.850942: step: 730/459, loss: 0.6611409187316895 2023-01-24 00:51:59.449848: step: 732/459, loss: 0.6194181442260742 2023-01-24 00:52:00.046946: step: 734/459, loss: 0.23753292858600616 2023-01-24 00:52:00.597779: step: 736/459, loss: 0.17975376546382904 2023-01-24 00:52:01.279168: step: 738/459, loss: 0.4691120386123657 2023-01-24 00:52:01.883260: step: 740/459, loss: 0.5616092681884766 2023-01-24 00:52:02.543461: step: 742/459, loss: 0.9724147915840149 2023-01-24 00:52:03.226706: step: 744/459, loss: 0.9499939680099487 2023-01-24 00:52:03.837714: step: 746/459, loss: 0.337427020072937 2023-01-24 00:52:04.597510: step: 748/459, loss: 0.8241318464279175 2023-01-24 00:52:05.182062: step: 750/459, loss: 0.2862998843193054 2023-01-24 00:52:05.817208: step: 752/459, loss: 0.10991387814283371 2023-01-24 00:52:06.391415: step: 754/459, loss: 0.2589423656463623 2023-01-24 00:52:07.017139: step: 756/459, loss: 0.8405879139900208 2023-01-24 00:52:07.600942: step: 758/459, loss: 0.25844234228134155 2023-01-24 00:52:08.197181: step: 760/459, loss: 0.31991809606552124 2023-01-24 00:52:08.847559: step: 762/459, loss: 0.6719395518302917 2023-01-24 00:52:09.489003: step: 764/459, loss: 0.5357170701026917 2023-01-24 00:52:10.111305: step: 766/459, loss: 0.4304245114326477 2023-01-24 00:52:10.657198: step: 768/459, loss: 0.3498314917087555 2023-01-24 00:52:11.237269: step: 770/459, loss: 0.6812449097633362 2023-01-24 00:52:11.874538: step: 772/459, loss: 0.2391941398382187 2023-01-24 00:52:12.548025: step: 774/459, loss: 0.3638177812099457 2023-01-24 00:52:13.268644: step: 776/459, loss: 1.3251705169677734 2023-01-24 00:52:13.847163: step: 778/459, loss: 0.9399577975273132 2023-01-24 00:52:14.489543: step: 780/459, loss: 0.5837995409965515 2023-01-24 00:52:15.015057: step: 782/459, loss: 0.26189470291137695 2023-01-24 00:52:15.666187: step: 784/459, loss: 0.24692422151565552 2023-01-24 00:52:16.258837: step: 786/459, loss: 1.183476448059082 2023-01-24 00:52:16.858314: step: 788/459, loss: 0.46074265241622925 2023-01-24 00:52:17.482176: step: 790/459, loss: 0.265033483505249 2023-01-24 00:52:18.125299: step: 792/459, loss: 0.967545747756958 2023-01-24 00:52:18.721679: step: 794/459, loss: 0.576570987701416 2023-01-24 00:52:19.340825: step: 796/459, loss: 0.23919717967510223 2023-01-24 00:52:19.964499: step: 798/459, loss: 0.4197443723678589 2023-01-24 00:52:20.662890: step: 800/459, loss: 0.5889609456062317 2023-01-24 00:52:21.295893: step: 802/459, loss: 0.2671554386615753 2023-01-24 00:52:21.970598: step: 804/459, loss: 1.0672848224639893 2023-01-24 00:52:22.568019: step: 806/459, loss: 0.5187402367591858 2023-01-24 00:52:23.170740: step: 808/459, loss: 0.4382280111312866 2023-01-24 00:52:23.763736: step: 810/459, loss: 0.3136976659297943 2023-01-24 00:52:24.377516: step: 812/459, loss: 0.5892276763916016 2023-01-24 00:52:25.020248: step: 814/459, loss: 0.38232657313346863 2023-01-24 00:52:25.627686: step: 816/459, loss: 0.2249353975057602 2023-01-24 00:52:26.240246: step: 818/459, loss: 0.963972270488739 2023-01-24 00:52:26.815191: step: 820/459, loss: 0.7888500094413757 2023-01-24 00:52:27.495751: step: 822/459, loss: 0.9460873603820801 2023-01-24 00:52:28.077670: step: 824/459, loss: 1.9705654382705688 2023-01-24 00:52:28.716659: step: 826/459, loss: 1.3299046754837036 2023-01-24 00:52:29.310117: step: 828/459, loss: 0.4419843852519989 2023-01-24 00:52:29.919244: step: 830/459, loss: 0.3682541251182556 2023-01-24 00:52:30.554247: step: 832/459, loss: 0.5335423946380615 2023-01-24 00:52:31.130547: step: 834/459, loss: 0.44594013690948486 2023-01-24 00:52:31.753298: step: 836/459, loss: 0.3386795222759247 2023-01-24 00:52:32.387997: step: 838/459, loss: 0.6137071847915649 2023-01-24 00:52:32.991008: step: 840/459, loss: 0.16859960556030273 2023-01-24 00:52:33.568081: step: 842/459, loss: 0.42158085107803345 2023-01-24 00:52:34.135481: step: 844/459, loss: 0.4484977126121521 2023-01-24 00:52:34.754871: step: 846/459, loss: 0.298227459192276 2023-01-24 00:52:35.387272: step: 848/459, loss: 1.202232837677002 2023-01-24 00:52:35.991139: step: 850/459, loss: 0.5517486929893494 2023-01-24 00:52:36.627597: step: 852/459, loss: 0.2678479254245758 2023-01-24 00:52:37.312458: step: 854/459, loss: 0.28105783462524414 2023-01-24 00:52:37.957990: step: 856/459, loss: 0.22354668378829956 2023-01-24 00:52:38.564878: step: 858/459, loss: 0.8850545287132263 2023-01-24 00:52:39.151472: step: 860/459, loss: 0.7001399993896484 2023-01-24 00:52:39.802968: step: 862/459, loss: 1.506056308746338 2023-01-24 00:52:40.475996: step: 864/459, loss: 0.2019028663635254 2023-01-24 00:52:41.060783: step: 866/459, loss: 0.23175886273384094 2023-01-24 00:52:41.737563: step: 868/459, loss: 0.3632751703262329 2023-01-24 00:52:42.354747: step: 870/459, loss: 0.28932926058769226 2023-01-24 00:52:42.990278: step: 872/459, loss: 0.7017502188682556 2023-01-24 00:52:43.599324: step: 874/459, loss: 0.48588550090789795 2023-01-24 00:52:44.240867: step: 876/459, loss: 0.2892270088195801 2023-01-24 00:52:44.823896: step: 878/459, loss: 1.1188124418258667 2023-01-24 00:52:45.407127: step: 880/459, loss: 0.7084054350852966 2023-01-24 00:52:46.048532: step: 882/459, loss: 0.6579598784446716 2023-01-24 00:52:46.687570: step: 884/459, loss: 0.3932293653488159 2023-01-24 00:52:47.283788: step: 886/459, loss: 1.3594143390655518 2023-01-24 00:52:47.932087: step: 888/459, loss: 0.27107179164886475 2023-01-24 00:52:48.564976: step: 890/459, loss: 0.4145548343658447 2023-01-24 00:52:49.182960: step: 892/459, loss: 0.17531824111938477 2023-01-24 00:52:49.803502: step: 894/459, loss: 0.760668933391571 2023-01-24 00:52:50.413104: step: 896/459, loss: 0.5098780393600464 2023-01-24 00:52:51.035672: step: 898/459, loss: 0.737545907497406 2023-01-24 00:52:51.702045: step: 900/459, loss: 0.5810427665710449 2023-01-24 00:52:52.361764: step: 902/459, loss: 1.4124860763549805 2023-01-24 00:52:52.951500: step: 904/459, loss: 0.1401377022266388 2023-01-24 00:52:53.570510: step: 906/459, loss: 1.5610002279281616 2023-01-24 00:52:54.184397: step: 908/459, loss: 1.6375066041946411 2023-01-24 00:52:54.782801: step: 910/459, loss: 0.5421810746192932 2023-01-24 00:52:55.398336: step: 912/459, loss: 0.4665292799472809 2023-01-24 00:52:56.007933: step: 914/459, loss: 0.49304717779159546 2023-01-24 00:52:56.591716: step: 916/459, loss: 1.2647558450698853 2023-01-24 00:52:57.199952: step: 918/459, loss: 0.5490025877952576 2023-01-24 00:52:57.677259: step: 920/459, loss: 0.005707115866243839 ================================================== Loss: 0.634 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32695545283520827, 'r': 0.32261259103284307, 'f1': 0.3247695042489175}, 'combined': 0.23930384523604445, 'epoch': 6} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.36230624416909746, 'r': 0.2835153503052082, 'f1': 0.31810451249149957}, 'combined': 0.2035868879945597, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3297547636682105, 'r': 0.3372634110382646, 'f1': 0.33346682479768375}, 'combined': 0.2457123972193459, 'epoch': 6} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35598645979878835, 'r': 0.2728150778276169, 'f1': 0.3089002425222631}, 'combined': 0.19769615521424835, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32456523677905913, 'r': 0.31655888368963264, 'f1': 0.3205120685964196}, 'combined': 0.2361667873868355, 'epoch': 6} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3626031936410303, 'r': 0.28955368972610984, 'f1': 0.3219872251693285}, 'combined': 0.23085876521574497, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.287037037037037, 'r': 0.2952380952380952, 'f1': 0.2910798122065727}, 'combined': 0.19405320813771512, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3148148148148148, 'r': 0.3695652173913043, 'f1': 0.34}, 'combined': 0.17, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.10344827586206896, 'f1': 0.15384615384615385}, 'combined': 0.10256410256410256, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3420992658657132, 'r': 0.32067749020429287, 'f1': 0.331042188712365}, 'combined': 0.24392582326174264, 'epoch': 5} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3383810640925978, 'r': 0.2494791299809971, 'f1': 0.28720778961705584}, 'combined': 0.18381298535491572, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2660984848484848, 'r': 0.3345238095238095, 'f1': 0.29641350210970463}, 'combined': 0.19760900140646975, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3125609955204797, 'r': 0.29657776279500064, 'f1': 0.30435968660011725}, 'combined': 0.22426503223166533, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34660410768177247, 'r': 0.27822857007545915, 'f1': 0.30867516599395367}, 'combined': 0.22131426995792908, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3958333333333333, 'r': 0.16379310344827586, 'f1': 0.23170731707317074}, 'combined': 0.15447154471544716, 'epoch': 4} ****************************** Epoch: 7 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 00:55:32.825492: step: 2/459, loss: 0.3579663634300232 2023-01-24 00:55:33.465277: step: 4/459, loss: 0.2700618803501129 2023-01-24 00:55:34.050807: step: 6/459, loss: 0.6277927756309509 2023-01-24 00:55:34.667260: step: 8/459, loss: 0.20203614234924316 2023-01-24 00:55:35.264802: step: 10/459, loss: 0.16160735487937927 2023-01-24 00:55:35.885592: step: 12/459, loss: 0.3937512934207916 2023-01-24 00:55:36.501670: step: 14/459, loss: 0.5914559364318848 2023-01-24 00:55:37.178580: step: 16/459, loss: 0.126180961728096 2023-01-24 00:55:37.800113: step: 18/459, loss: 0.2107970416545868 2023-01-24 00:55:38.352928: step: 20/459, loss: 0.41903001070022583 2023-01-24 00:55:38.923769: step: 22/459, loss: 0.16767114400863647 2023-01-24 00:55:39.574662: step: 24/459, loss: 0.27467402815818787 2023-01-24 00:55:40.230583: step: 26/459, loss: 0.27040982246398926 2023-01-24 00:55:40.917904: step: 28/459, loss: 0.6239561438560486 2023-01-24 00:55:41.515565: step: 30/459, loss: 0.7397379875183105 2023-01-24 00:55:42.195259: step: 32/459, loss: 0.34754136204719543 2023-01-24 00:55:42.858548: step: 34/459, loss: 0.22693198919296265 2023-01-24 00:55:43.476232: step: 36/459, loss: 0.4732779860496521 2023-01-24 00:55:44.254716: step: 38/459, loss: 0.7748904228210449 2023-01-24 00:55:44.933572: step: 40/459, loss: 0.2911655902862549 2023-01-24 00:55:45.524720: step: 42/459, loss: 0.21073615550994873 2023-01-24 00:55:46.167387: step: 44/459, loss: 0.45868420600891113 2023-01-24 00:55:46.795117: step: 46/459, loss: 0.31595373153686523 2023-01-24 00:55:47.388137: step: 48/459, loss: 0.1592530757188797 2023-01-24 00:55:48.074458: step: 50/459, loss: 0.15216639637947083 2023-01-24 00:55:48.699834: step: 52/459, loss: 0.2557794451713562 2023-01-24 00:55:49.375593: step: 54/459, loss: 0.3888740539550781 2023-01-24 00:55:50.002084: step: 56/459, loss: 0.4482192397117615 2023-01-24 00:55:50.613299: step: 58/459, loss: 0.5596091151237488 2023-01-24 00:55:51.195335: step: 60/459, loss: 0.14107011258602142 2023-01-24 00:55:51.832682: step: 62/459, loss: 0.07952499389648438 2023-01-24 00:55:52.434832: step: 64/459, loss: 0.11117306351661682 2023-01-24 00:55:52.992991: step: 66/459, loss: 1.910363793373108 2023-01-24 00:55:53.651594: step: 68/459, loss: 0.569317638874054 2023-01-24 00:55:54.290050: step: 70/459, loss: 0.4411834478378296 2023-01-24 00:55:54.910209: step: 72/459, loss: 0.1410941183567047 2023-01-24 00:55:55.633665: step: 74/459, loss: 0.826856255531311 2023-01-24 00:55:56.243879: step: 76/459, loss: 0.12069018185138702 2023-01-24 00:55:56.821004: step: 78/459, loss: 0.07895991206169128 2023-01-24 00:55:57.396388: step: 80/459, loss: 0.229952871799469 2023-01-24 00:55:58.015492: step: 82/459, loss: 1.218388557434082 2023-01-24 00:55:58.702107: step: 84/459, loss: 1.0730667114257812 2023-01-24 00:55:59.299135: step: 86/459, loss: 0.17715968191623688 2023-01-24 00:55:59.889330: step: 88/459, loss: 0.16542798280715942 2023-01-24 00:56:00.550215: step: 90/459, loss: 0.4088648557662964 2023-01-24 00:56:01.144539: step: 92/459, loss: 0.5391746759414673 2023-01-24 00:56:01.799609: step: 94/459, loss: 0.9325960874557495 2023-01-24 00:56:02.413769: step: 96/459, loss: 0.8034433126449585 2023-01-24 00:56:02.978154: step: 98/459, loss: 0.12949784100055695 2023-01-24 00:56:03.634988: step: 100/459, loss: 0.2914822995662689 2023-01-24 00:56:04.247724: step: 102/459, loss: 0.2535954415798187 2023-01-24 00:56:04.859410: step: 104/459, loss: 0.1386803686618805 2023-01-24 00:56:05.534984: step: 106/459, loss: 0.9359193444252014 2023-01-24 00:56:06.174197: step: 108/459, loss: 0.6156660318374634 2023-01-24 00:56:06.744020: step: 110/459, loss: 0.3580305278301239 2023-01-24 00:56:07.335009: step: 112/459, loss: 0.15266503393650055 2023-01-24 00:56:07.974690: step: 114/459, loss: 0.21743932366371155 2023-01-24 00:56:08.580462: step: 116/459, loss: 0.18733008205890656 2023-01-24 00:56:09.199002: step: 118/459, loss: 0.5080251693725586 2023-01-24 00:56:09.798472: step: 120/459, loss: 0.6342256665229797 2023-01-24 00:56:10.390228: step: 122/459, loss: 0.5707013607025146 2023-01-24 00:56:10.991498: step: 124/459, loss: 0.6185805797576904 2023-01-24 00:56:11.559905: step: 126/459, loss: 0.35711434483528137 2023-01-24 00:56:12.151722: step: 128/459, loss: 0.20133939385414124 2023-01-24 00:56:12.782812: step: 130/459, loss: 0.7109394073486328 2023-01-24 00:56:13.523512: step: 132/459, loss: 0.22435110807418823 2023-01-24 00:56:14.215194: step: 134/459, loss: 0.40086978673934937 2023-01-24 00:56:14.853630: step: 136/459, loss: 1.0688958168029785 2023-01-24 00:56:15.532789: step: 138/459, loss: 0.21586669981479645 2023-01-24 00:56:16.188855: step: 140/459, loss: 0.481327623128891 2023-01-24 00:56:16.801678: step: 142/459, loss: 0.42478039860725403 2023-01-24 00:56:17.417612: step: 144/459, loss: 0.6553373336791992 2023-01-24 00:56:18.020317: step: 146/459, loss: 0.2660770118236542 2023-01-24 00:56:18.651072: step: 148/459, loss: 1.1588835716247559 2023-01-24 00:56:19.344468: step: 150/459, loss: 0.3710630536079407 2023-01-24 00:56:19.925315: step: 152/459, loss: 0.40730971097946167 2023-01-24 00:56:20.642310: step: 154/459, loss: 0.40954118967056274 2023-01-24 00:56:21.219956: step: 156/459, loss: 0.19107039272785187 2023-01-24 00:56:21.892334: step: 158/459, loss: 0.8060635924339294 2023-01-24 00:56:22.491128: step: 160/459, loss: 0.3637791872024536 2023-01-24 00:56:23.120133: step: 162/459, loss: 0.2343795746564865 2023-01-24 00:56:23.769787: step: 164/459, loss: 1.9982990026474 2023-01-24 00:56:24.392571: step: 166/459, loss: 0.261982798576355 2023-01-24 00:56:25.047274: step: 168/459, loss: 1.0296776294708252 2023-01-24 00:56:25.615221: step: 170/459, loss: 0.5439716577529907 2023-01-24 00:56:26.167415: step: 172/459, loss: 0.2668640613555908 2023-01-24 00:56:26.822818: step: 174/459, loss: 0.3638315796852112 2023-01-24 00:56:27.471906: step: 176/459, loss: 0.15494580566883087 2023-01-24 00:56:28.078618: step: 178/459, loss: 1.0412858724594116 2023-01-24 00:56:28.671662: step: 180/459, loss: 0.14703743159770966 2023-01-24 00:56:29.296221: step: 182/459, loss: 0.09014195203781128 2023-01-24 00:56:29.911272: step: 184/459, loss: 0.6990402936935425 2023-01-24 00:56:30.588828: step: 186/459, loss: 0.16721095144748688 2023-01-24 00:56:31.161818: step: 188/459, loss: 0.7363616824150085 2023-01-24 00:56:31.768660: step: 190/459, loss: 1.6480470895767212 2023-01-24 00:56:32.348459: step: 192/459, loss: 0.6689409017562866 2023-01-24 00:56:32.960019: step: 194/459, loss: 0.4785969853401184 2023-01-24 00:56:33.567359: step: 196/459, loss: 0.49347060918807983 2023-01-24 00:56:34.225186: step: 198/459, loss: 0.31264591217041016 2023-01-24 00:56:34.847633: step: 200/459, loss: 0.7720748782157898 2023-01-24 00:56:35.509679: step: 202/459, loss: 0.6659747958183289 2023-01-24 00:56:36.130883: step: 204/459, loss: 0.18698301911354065 2023-01-24 00:56:36.772551: step: 206/459, loss: 0.594642162322998 2023-01-24 00:56:37.401318: step: 208/459, loss: 0.2193850874900818 2023-01-24 00:56:38.041499: step: 210/459, loss: 0.45260483026504517 2023-01-24 00:56:38.694662: step: 212/459, loss: 0.6766628623008728 2023-01-24 00:56:39.302816: step: 214/459, loss: 0.31592419743537903 2023-01-24 00:56:39.898139: step: 216/459, loss: 0.9256885051727295 2023-01-24 00:56:40.546422: step: 218/459, loss: 1.402020812034607 2023-01-24 00:56:41.125714: step: 220/459, loss: 0.24096018075942993 2023-01-24 00:56:41.708541: step: 222/459, loss: 0.26370418071746826 2023-01-24 00:56:42.343881: step: 224/459, loss: 0.2524990737438202 2023-01-24 00:56:42.963618: step: 226/459, loss: 2.1277031898498535 2023-01-24 00:56:43.538201: step: 228/459, loss: 0.36299049854278564 2023-01-24 00:56:44.294458: step: 230/459, loss: 1.8503227233886719 2023-01-24 00:56:44.953659: step: 232/459, loss: 0.13602669537067413 2023-01-24 00:56:45.638856: step: 234/459, loss: 0.6533699035644531 2023-01-24 00:56:46.256561: step: 236/459, loss: 0.22508090734481812 2023-01-24 00:56:46.832970: step: 238/459, loss: 0.17627887427806854 2023-01-24 00:56:47.463430: step: 240/459, loss: 0.4887842535972595 2023-01-24 00:56:48.052811: step: 242/459, loss: 0.45264148712158203 2023-01-24 00:56:48.651667: step: 244/459, loss: 0.5738886594772339 2023-01-24 00:56:49.277681: step: 246/459, loss: 0.20209501683712006 2023-01-24 00:56:49.841720: step: 248/459, loss: 1.3265936374664307 2023-01-24 00:56:50.430443: step: 250/459, loss: 0.17502361536026 2023-01-24 00:56:51.018219: step: 252/459, loss: 0.19162920117378235 2023-01-24 00:56:51.682901: step: 254/459, loss: 0.7351697087287903 2023-01-24 00:56:52.324659: step: 256/459, loss: 0.5731481909751892 2023-01-24 00:56:52.986763: step: 258/459, loss: 0.16931860148906708 2023-01-24 00:56:53.592413: step: 260/459, loss: 0.6001424789428711 2023-01-24 00:56:54.182207: step: 262/459, loss: 0.17639347910881042 2023-01-24 00:56:54.801129: step: 264/459, loss: 0.8160589933395386 2023-01-24 00:56:55.409213: step: 266/459, loss: 0.7302514314651489 2023-01-24 00:56:56.060257: step: 268/459, loss: 0.7390867471694946 2023-01-24 00:56:56.640625: step: 270/459, loss: 0.25087597966194153 2023-01-24 00:56:57.218852: step: 272/459, loss: 0.1976044476032257 2023-01-24 00:56:57.847076: step: 274/459, loss: 0.24288557469844818 2023-01-24 00:56:58.470604: step: 276/459, loss: 0.37830600142478943 2023-01-24 00:56:59.174975: step: 278/459, loss: 1.699676275253296 2023-01-24 00:56:59.860125: step: 280/459, loss: 0.7735227346420288 2023-01-24 00:57:00.482751: step: 282/459, loss: 0.14458031952381134 2023-01-24 00:57:01.101352: step: 284/459, loss: 0.26406392455101013 2023-01-24 00:57:01.696468: step: 286/459, loss: 1.0734453201293945 2023-01-24 00:57:02.323666: step: 288/459, loss: 0.2561120390892029 2023-01-24 00:57:02.892869: step: 290/459, loss: 0.20040485262870789 2023-01-24 00:57:03.556637: step: 292/459, loss: 0.5945956707000732 2023-01-24 00:57:04.200918: step: 294/459, loss: 0.2312096357345581 2023-01-24 00:57:04.862431: step: 296/459, loss: 0.5595628023147583 2023-01-24 00:57:05.497022: step: 298/459, loss: 0.5049360394477844 2023-01-24 00:57:06.169474: step: 300/459, loss: 0.597101628780365 2023-01-24 00:57:06.794050: step: 302/459, loss: 0.8469732403755188 2023-01-24 00:57:07.330157: step: 304/459, loss: 0.35944864153862 2023-01-24 00:57:07.972348: step: 306/459, loss: 0.5223783254623413 2023-01-24 00:57:08.645301: step: 308/459, loss: 0.3196190297603607 2023-01-24 00:57:09.246266: step: 310/459, loss: 0.1964162141084671 2023-01-24 00:57:09.878839: step: 312/459, loss: 0.31027835607528687 2023-01-24 00:57:10.453744: step: 314/459, loss: 0.32999300956726074 2023-01-24 00:57:11.059715: step: 316/459, loss: 0.2045622617006302 2023-01-24 00:57:11.799686: step: 318/459, loss: 0.4869652986526489 2023-01-24 00:57:12.428156: step: 320/459, loss: 0.5598112344741821 2023-01-24 00:57:13.044325: step: 322/459, loss: 0.5782570242881775 2023-01-24 00:57:13.639555: step: 324/459, loss: 0.10656566917896271 2023-01-24 00:57:14.226118: step: 326/459, loss: 1.0294554233551025 2023-01-24 00:57:14.842871: step: 328/459, loss: 0.3040839433670044 2023-01-24 00:57:15.396782: step: 330/459, loss: 0.11344794183969498 2023-01-24 00:57:16.020894: step: 332/459, loss: 0.29221421480178833 2023-01-24 00:57:16.686824: step: 334/459, loss: 0.20496602356433868 2023-01-24 00:57:17.316334: step: 336/459, loss: 0.29518479108810425 2023-01-24 00:57:17.910810: step: 338/459, loss: 0.12443839758634567 2023-01-24 00:57:18.598882: step: 340/459, loss: 1.224665880203247 2023-01-24 00:57:19.249496: step: 342/459, loss: 0.42436540126800537 2023-01-24 00:57:19.836499: step: 344/459, loss: 6.86220121383667 2023-01-24 00:57:20.459377: step: 346/459, loss: 0.8841491937637329 2023-01-24 00:57:21.073056: step: 348/459, loss: 1.068766713142395 2023-01-24 00:57:21.701680: step: 350/459, loss: 0.3738611936569214 2023-01-24 00:57:22.335004: step: 352/459, loss: 1.1398236751556396 2023-01-24 00:57:23.056653: step: 354/459, loss: 0.1585351824760437 2023-01-24 00:57:23.694380: step: 356/459, loss: 0.36306217312812805 2023-01-24 00:57:24.284734: step: 358/459, loss: 0.32068803906440735 2023-01-24 00:57:24.878184: step: 360/459, loss: 0.21803481876850128 2023-01-24 00:57:25.550546: step: 362/459, loss: 0.19550910592079163 2023-01-24 00:57:26.205393: step: 364/459, loss: 0.5204103589057922 2023-01-24 00:57:26.755288: step: 366/459, loss: 0.37719687819480896 2023-01-24 00:57:27.375654: step: 368/459, loss: 0.6856701970100403 2023-01-24 00:57:27.984200: step: 370/459, loss: 0.6115508675575256 2023-01-24 00:57:28.544861: step: 372/459, loss: 0.18936945497989655 2023-01-24 00:57:29.165741: step: 374/459, loss: 0.3847229778766632 2023-01-24 00:57:29.837097: step: 376/459, loss: 0.3945435583591461 2023-01-24 00:57:30.502086: step: 378/459, loss: 0.9438193440437317 2023-01-24 00:57:31.127255: step: 380/459, loss: 0.27105212211608887 2023-01-24 00:57:31.713377: step: 382/459, loss: 1.5618011951446533 2023-01-24 00:57:32.392884: step: 384/459, loss: 1.6065171957015991 2023-01-24 00:57:33.013144: step: 386/459, loss: 0.3688710629940033 2023-01-24 00:57:33.633525: step: 388/459, loss: 0.6253747344017029 2023-01-24 00:57:34.230766: step: 390/459, loss: 0.7413223385810852 2023-01-24 00:57:34.837257: step: 392/459, loss: 0.19703409075737 2023-01-24 00:57:35.465972: step: 394/459, loss: 1.04152250289917 2023-01-24 00:57:36.089248: step: 396/459, loss: 0.22575682401657104 2023-01-24 00:57:36.756745: step: 398/459, loss: 0.2053144872188568 2023-01-24 00:57:37.429695: step: 400/459, loss: 0.7615208625793457 2023-01-24 00:57:38.058326: step: 402/459, loss: 0.3637693524360657 2023-01-24 00:57:38.674338: step: 404/459, loss: 0.6596665382385254 2023-01-24 00:57:39.309928: step: 406/459, loss: 0.44354739785194397 2023-01-24 00:57:39.928854: step: 408/459, loss: 0.14026318490505219 2023-01-24 00:57:40.499566: step: 410/459, loss: 0.6185816526412964 2023-01-24 00:57:41.076400: step: 412/459, loss: 0.06092220917344093 2023-01-24 00:57:41.750874: step: 414/459, loss: 0.7827885150909424 2023-01-24 00:57:42.407853: step: 416/459, loss: 0.2610614597797394 2023-01-24 00:57:42.997905: step: 418/459, loss: 0.3267325162887573 2023-01-24 00:57:43.635343: step: 420/459, loss: 0.16037291288375854 2023-01-24 00:57:44.238210: step: 422/459, loss: 2.0170273780822754 2023-01-24 00:57:44.867762: step: 424/459, loss: 0.3988063931465149 2023-01-24 00:57:45.510366: step: 426/459, loss: 0.5221374034881592 2023-01-24 00:57:46.147015: step: 428/459, loss: 0.1994316428899765 2023-01-24 00:57:46.735196: step: 430/459, loss: 0.32029563188552856 2023-01-24 00:57:47.348269: step: 432/459, loss: 0.1623229682445526 2023-01-24 00:57:47.981322: step: 434/459, loss: 1.883230209350586 2023-01-24 00:57:48.568842: step: 436/459, loss: 0.7589873671531677 2023-01-24 00:57:49.175676: step: 438/459, loss: 3.736762523651123 2023-01-24 00:57:49.809152: step: 440/459, loss: 0.2580411732196808 2023-01-24 00:57:50.470570: step: 442/459, loss: 0.43602657318115234 2023-01-24 00:57:51.105765: step: 444/459, loss: 0.7893450856208801 2023-01-24 00:57:51.686848: step: 446/459, loss: 0.19094990193843842 2023-01-24 00:57:52.232392: step: 448/459, loss: 0.46896421909332275 2023-01-24 00:57:52.893286: step: 450/459, loss: 0.3961755633354187 2023-01-24 00:57:53.471762: step: 452/459, loss: 0.31122827529907227 2023-01-24 00:57:54.061154: step: 454/459, loss: 0.8228276371955872 2023-01-24 00:57:54.663054: step: 456/459, loss: 1.2761434316635132 2023-01-24 00:57:55.279766: step: 458/459, loss: 0.34749525785446167 2023-01-24 00:57:55.849835: step: 460/459, loss: 0.7350379824638367 2023-01-24 00:57:56.462355: step: 462/459, loss: 0.2154207080602646 2023-01-24 00:57:57.056942: step: 464/459, loss: 0.7798674702644348 2023-01-24 00:57:57.604271: step: 466/459, loss: 0.1821850836277008 2023-01-24 00:57:58.226722: step: 468/459, loss: 0.5792438387870789 2023-01-24 00:57:58.838365: step: 470/459, loss: 1.5411607027053833 2023-01-24 00:57:59.490179: step: 472/459, loss: 2.227687358856201 2023-01-24 00:58:00.088043: step: 474/459, loss: 0.4286887049674988 2023-01-24 00:58:00.687520: step: 476/459, loss: 0.3820238709449768 2023-01-24 00:58:01.263914: step: 478/459, loss: 0.44249534606933594 2023-01-24 00:58:01.929056: step: 480/459, loss: 0.32175150513648987 2023-01-24 00:58:02.490512: step: 482/459, loss: 0.6745996475219727 2023-01-24 00:58:03.116741: step: 484/459, loss: 4.939012050628662 2023-01-24 00:58:03.713912: step: 486/459, loss: 0.16102133691310883 2023-01-24 00:58:04.365095: step: 488/459, loss: 0.36313411593437195 2023-01-24 00:58:04.949495: step: 490/459, loss: 0.2845294773578644 2023-01-24 00:58:05.570069: step: 492/459, loss: 1.5911037921905518 2023-01-24 00:58:06.179891: step: 494/459, loss: 0.3748919665813446 2023-01-24 00:58:06.778425: step: 496/459, loss: 0.06147516891360283 2023-01-24 00:58:07.330856: step: 498/459, loss: 0.543201744556427 2023-01-24 00:58:07.871283: step: 500/459, loss: 1.0254242420196533 2023-01-24 00:58:08.453834: step: 502/459, loss: 0.75117027759552 2023-01-24 00:58:09.097798: step: 504/459, loss: 0.4052647352218628 2023-01-24 00:58:09.703664: step: 506/459, loss: 0.19419221580028534 2023-01-24 00:58:10.320232: step: 508/459, loss: 0.1812010556459427 2023-01-24 00:58:10.855976: step: 510/459, loss: 0.2346154898405075 2023-01-24 00:58:11.427473: step: 512/459, loss: 0.7612488269805908 2023-01-24 00:58:12.063513: step: 514/459, loss: 0.19109289348125458 2023-01-24 00:58:12.738791: step: 516/459, loss: 0.9054386615753174 2023-01-24 00:58:13.313726: step: 518/459, loss: 0.22535905241966248 2023-01-24 00:58:13.965051: step: 520/459, loss: 0.1589134782552719 2023-01-24 00:58:14.581528: step: 522/459, loss: 0.13657210767269135 2023-01-24 00:58:15.212352: step: 524/459, loss: 0.45560580492019653 2023-01-24 00:58:15.900148: step: 526/459, loss: 0.19545221328735352 2023-01-24 00:58:16.528296: step: 528/459, loss: 0.5094821453094482 2023-01-24 00:58:17.220297: step: 530/459, loss: 1.4622726440429688 2023-01-24 00:58:17.814443: step: 532/459, loss: 0.29566746950149536 2023-01-24 00:58:18.488264: step: 534/459, loss: 0.6777944564819336 2023-01-24 00:58:19.130221: step: 536/459, loss: 0.22817961871623993 2023-01-24 00:58:19.699450: step: 538/459, loss: 0.13366343080997467 2023-01-24 00:58:20.342161: step: 540/459, loss: 0.4856069087982178 2023-01-24 00:58:21.067348: step: 542/459, loss: 0.38733381032943726 2023-01-24 00:58:21.752300: step: 544/459, loss: 0.39968031644821167 2023-01-24 00:58:22.342371: step: 546/459, loss: 0.2322319597005844 2023-01-24 00:58:22.956986: step: 548/459, loss: 0.374018132686615 2023-01-24 00:58:23.646536: step: 550/459, loss: 0.40752464532852173 2023-01-24 00:58:24.288260: step: 552/459, loss: 2.4004359245300293 2023-01-24 00:58:24.922097: step: 554/459, loss: 0.6820537447929382 2023-01-24 00:58:25.610183: step: 556/459, loss: 0.8451496362686157 2023-01-24 00:58:26.229595: step: 558/459, loss: 0.4499948024749756 2023-01-24 00:58:26.822140: step: 560/459, loss: 1.3076893091201782 2023-01-24 00:58:27.528678: step: 562/459, loss: 0.29594144225120544 2023-01-24 00:58:28.199405: step: 564/459, loss: 0.27397698163986206 2023-01-24 00:58:28.747826: step: 566/459, loss: 0.25132206082344055 2023-01-24 00:58:29.398241: step: 568/459, loss: 0.42534372210502625 2023-01-24 00:58:30.040980: step: 570/459, loss: 0.8960063457489014 2023-01-24 00:58:30.679558: step: 572/459, loss: 0.2022126019001007 2023-01-24 00:58:31.334252: step: 574/459, loss: 0.4647914469242096 2023-01-24 00:58:31.992993: step: 576/459, loss: 0.2713935375213623 2023-01-24 00:58:32.660229: step: 578/459, loss: 0.41702884435653687 2023-01-24 00:58:33.259293: step: 580/459, loss: 0.3693060874938965 2023-01-24 00:58:33.864058: step: 582/459, loss: 1.6075788736343384 2023-01-24 00:58:34.516749: step: 584/459, loss: 0.2646411657333374 2023-01-24 00:58:35.162148: step: 586/459, loss: 0.34583789110183716 2023-01-24 00:58:35.788226: step: 588/459, loss: 0.32196521759033203 2023-01-24 00:58:36.418654: step: 590/459, loss: 0.5048180222511292 2023-01-24 00:58:37.046012: step: 592/459, loss: 0.830573320388794 2023-01-24 00:58:37.676676: step: 594/459, loss: 0.12421521544456482 2023-01-24 00:58:38.281868: step: 596/459, loss: 0.3854350745677948 2023-01-24 00:58:38.980229: step: 598/459, loss: 0.48839831352233887 2023-01-24 00:58:39.623295: step: 600/459, loss: 0.20159812271595 2023-01-24 00:58:40.225381: step: 602/459, loss: 0.7947661876678467 2023-01-24 00:58:40.853875: step: 604/459, loss: 0.6418979167938232 2023-01-24 00:58:41.435959: step: 606/459, loss: 0.17122597992420197 2023-01-24 00:58:42.049006: step: 608/459, loss: 1.2819092273712158 2023-01-24 00:58:42.685978: step: 610/459, loss: 1.059058427810669 2023-01-24 00:58:43.281876: step: 612/459, loss: 0.3122831881046295 2023-01-24 00:58:43.860266: step: 614/459, loss: 0.2355586439371109 2023-01-24 00:58:44.542589: step: 616/459, loss: 0.5835009813308716 2023-01-24 00:58:45.229921: step: 618/459, loss: 0.4888888895511627 2023-01-24 00:58:45.900188: step: 620/459, loss: 0.5557391047477722 2023-01-24 00:58:46.496955: step: 622/459, loss: 0.1995975375175476 2023-01-24 00:58:47.079662: step: 624/459, loss: 0.7562562227249146 2023-01-24 00:58:47.697490: step: 626/459, loss: 0.4003751575946808 2023-01-24 00:58:48.361221: step: 628/459, loss: 8.254501342773438 2023-01-24 00:58:48.922084: step: 630/459, loss: 0.11063786596059799 2023-01-24 00:58:49.547569: step: 632/459, loss: 0.40213799476623535 2023-01-24 00:58:50.137035: step: 634/459, loss: 0.7026703357696533 2023-01-24 00:58:50.694964: step: 636/459, loss: 1.4154549837112427 2023-01-24 00:58:51.331249: step: 638/459, loss: 0.1856958121061325 2023-01-24 00:58:51.902676: step: 640/459, loss: 0.37129199504852295 2023-01-24 00:58:52.499684: step: 642/459, loss: 0.335224449634552 2023-01-24 00:58:53.167411: step: 644/459, loss: 0.3721109628677368 2023-01-24 00:58:53.821887: step: 646/459, loss: 0.45077162981033325 2023-01-24 00:58:54.442500: step: 648/459, loss: 1.2737189531326294 2023-01-24 00:58:55.081462: step: 650/459, loss: 0.31436094641685486 2023-01-24 00:58:55.688744: step: 652/459, loss: 0.20614540576934814 2023-01-24 00:58:56.367439: step: 654/459, loss: 0.4997318983078003 2023-01-24 00:58:56.952682: step: 656/459, loss: 0.15958243608474731 2023-01-24 00:58:57.541464: step: 658/459, loss: 0.46333444118499756 2023-01-24 00:58:58.143022: step: 660/459, loss: 0.4711664617061615 2023-01-24 00:58:58.809400: step: 662/459, loss: 0.30970096588134766 2023-01-24 00:58:59.399808: step: 664/459, loss: 1.3890924453735352 2023-01-24 00:58:59.976358: step: 666/459, loss: 0.14528606832027435 2023-01-24 00:59:00.588334: step: 668/459, loss: 1.0032814741134644 2023-01-24 00:59:01.260859: step: 670/459, loss: 0.9801666736602783 2023-01-24 00:59:01.902603: step: 672/459, loss: 0.3635418117046356 2023-01-24 00:59:02.523496: step: 674/459, loss: 0.6780799627304077 2023-01-24 00:59:03.140956: step: 676/459, loss: 0.11552456766366959 2023-01-24 00:59:03.721715: step: 678/459, loss: 0.5125803351402283 2023-01-24 00:59:04.404145: step: 680/459, loss: 0.711292028427124 2023-01-24 00:59:05.018344: step: 682/459, loss: 0.3346433937549591 2023-01-24 00:59:05.612790: step: 684/459, loss: 0.3055908679962158 2023-01-24 00:59:06.281046: step: 686/459, loss: 0.28234580159187317 2023-01-24 00:59:06.937615: step: 688/459, loss: 0.414964884519577 2023-01-24 00:59:07.534168: step: 690/459, loss: 0.21537595987319946 2023-01-24 00:59:08.105710: step: 692/459, loss: 0.12958011031150818 2023-01-24 00:59:08.711975: step: 694/459, loss: 0.7203678488731384 2023-01-24 00:59:09.379578: step: 696/459, loss: 0.4964115619659424 2023-01-24 00:59:09.937399: step: 698/459, loss: 0.18331804871559143 2023-01-24 00:59:10.573224: step: 700/459, loss: 0.5363819003105164 2023-01-24 00:59:11.186695: step: 702/459, loss: 0.1509082168340683 2023-01-24 00:59:11.866304: step: 704/459, loss: 0.566707968711853 2023-01-24 00:59:12.449477: step: 706/459, loss: 0.18508419394493103 2023-01-24 00:59:13.089906: step: 708/459, loss: 1.2111742496490479 2023-01-24 00:59:13.720070: step: 710/459, loss: 0.09692087024450302 2023-01-24 00:59:14.336630: step: 712/459, loss: 0.37160375714302063 2023-01-24 00:59:14.865873: step: 714/459, loss: 0.16731101274490356 2023-01-24 00:59:15.515856: step: 716/459, loss: 0.21052008867263794 2023-01-24 00:59:16.132478: step: 718/459, loss: 1.1606866121292114 2023-01-24 00:59:16.762361: step: 720/459, loss: 0.39416012167930603 2023-01-24 00:59:17.367186: step: 722/459, loss: 0.7215221524238586 2023-01-24 00:59:17.991737: step: 724/459, loss: 0.15039482712745667 2023-01-24 00:59:18.599162: step: 726/459, loss: 0.8414298892021179 2023-01-24 00:59:19.290438: step: 728/459, loss: 0.15654781460762024 2023-01-24 00:59:19.876145: step: 730/459, loss: 0.29425498843193054 2023-01-24 00:59:20.459096: step: 732/459, loss: 5.344974517822266 2023-01-24 00:59:21.092063: step: 734/459, loss: 0.5977937579154968 2023-01-24 00:59:21.769132: step: 736/459, loss: 0.32878464460372925 2023-01-24 00:59:22.441947: step: 738/459, loss: 0.18196986615657806 2023-01-24 00:59:23.106303: step: 740/459, loss: 0.7068890333175659 2023-01-24 00:59:23.730679: step: 742/459, loss: 0.22308847308158875 2023-01-24 00:59:24.313261: step: 744/459, loss: 0.30198246240615845 2023-01-24 00:59:24.922099: step: 746/459, loss: 0.6537942886352539 2023-01-24 00:59:25.669044: step: 748/459, loss: 0.7337746024131775 2023-01-24 00:59:26.303279: step: 750/459, loss: 0.141435906291008 2023-01-24 00:59:26.943880: step: 752/459, loss: 0.2751712501049042 2023-01-24 00:59:27.543770: step: 754/459, loss: 0.23258891701698303 2023-01-24 00:59:28.144649: step: 756/459, loss: 0.44904789328575134 2023-01-24 00:59:28.853144: step: 758/459, loss: 0.1268608123064041 2023-01-24 00:59:29.450891: step: 760/459, loss: 0.2833101749420166 2023-01-24 00:59:30.070830: step: 762/459, loss: 0.2765253484249115 2023-01-24 00:59:30.729742: step: 764/459, loss: 0.26516425609588623 2023-01-24 00:59:31.327687: step: 766/459, loss: 0.550622820854187 2023-01-24 00:59:31.922049: step: 768/459, loss: 0.8163337707519531 2023-01-24 00:59:32.550751: step: 770/459, loss: 0.6386066675186157 2023-01-24 00:59:33.253754: step: 772/459, loss: 1.0743494033813477 2023-01-24 00:59:33.894588: step: 774/459, loss: 0.3163137435913086 2023-01-24 00:59:34.530175: step: 776/459, loss: 0.7960076332092285 2023-01-24 00:59:35.216941: step: 778/459, loss: 0.36510854959487915 2023-01-24 00:59:35.846905: step: 780/459, loss: 0.27552980184555054 2023-01-24 00:59:36.456711: step: 782/459, loss: 0.32716482877731323 2023-01-24 00:59:37.051247: step: 784/459, loss: 0.3369802236557007 2023-01-24 00:59:37.621735: step: 786/459, loss: 0.602428674697876 2023-01-24 00:59:38.227475: step: 788/459, loss: 0.2503136992454529 2023-01-24 00:59:38.994443: step: 790/459, loss: 0.14796583354473114 2023-01-24 00:59:39.608934: step: 792/459, loss: 0.19140353798866272 2023-01-24 00:59:40.157512: step: 794/459, loss: 0.1262776106595993 2023-01-24 00:59:40.717299: step: 796/459, loss: 0.34340700507164 2023-01-24 00:59:41.349627: step: 798/459, loss: 0.45240697264671326 2023-01-24 00:59:41.970330: step: 800/459, loss: 0.5289661288261414 2023-01-24 00:59:42.658838: step: 802/459, loss: 0.15151014924049377 2023-01-24 00:59:43.231891: step: 804/459, loss: 0.41758447885513306 2023-01-24 00:59:43.786216: step: 806/459, loss: 0.5584455728530884 2023-01-24 00:59:44.401418: step: 808/459, loss: 0.5495924949645996 2023-01-24 00:59:45.038826: step: 810/459, loss: 0.5388723611831665 2023-01-24 00:59:45.601125: step: 812/459, loss: 0.17344756424427032 2023-01-24 00:59:46.208867: step: 814/459, loss: 0.41887715458869934 2023-01-24 00:59:46.766608: step: 816/459, loss: 0.4239882826805115 2023-01-24 00:59:47.381259: step: 818/459, loss: 0.7037726640701294 2023-01-24 00:59:47.959539: step: 820/459, loss: 0.3385394811630249 2023-01-24 00:59:48.507924: step: 822/459, loss: 0.06925304234027863 2023-01-24 00:59:49.158409: step: 824/459, loss: 0.5950695276260376 2023-01-24 00:59:49.770151: step: 826/459, loss: 0.2177458107471466 2023-01-24 00:59:50.394691: step: 828/459, loss: 0.261464923620224 2023-01-24 00:59:50.994094: step: 830/459, loss: 0.3900063931941986 2023-01-24 00:59:51.625363: step: 832/459, loss: 0.11928819864988327 2023-01-24 00:59:52.284131: step: 834/459, loss: 0.2930239140987396 2023-01-24 00:59:52.946320: step: 836/459, loss: 0.7014073729515076 2023-01-24 00:59:53.536706: step: 838/459, loss: 0.274192750453949 2023-01-24 00:59:54.225790: step: 840/459, loss: 0.8501518368721008 2023-01-24 00:59:54.816056: step: 842/459, loss: 0.6697700619697571 2023-01-24 00:59:55.382580: step: 844/459, loss: 0.28764522075653076 2023-01-24 00:59:55.973651: step: 846/459, loss: 0.33456167578697205 2023-01-24 00:59:56.621552: step: 848/459, loss: 0.61599200963974 2023-01-24 00:59:57.262606: step: 850/459, loss: 5.906885623931885 2023-01-24 00:59:57.856492: step: 852/459, loss: 0.16365398466587067 2023-01-24 00:59:58.492972: step: 854/459, loss: 0.3977922797203064 2023-01-24 00:59:59.243977: step: 856/459, loss: 0.8310911059379578 2023-01-24 00:59:59.879774: step: 858/459, loss: 0.30721357464790344 2023-01-24 01:00:00.506621: step: 860/459, loss: 1.2728606462478638 2023-01-24 01:00:01.143052: step: 862/459, loss: 0.7195396423339844 2023-01-24 01:00:01.780272: step: 864/459, loss: 0.19213759899139404 2023-01-24 01:00:02.488452: step: 866/459, loss: 1.0304269790649414 2023-01-24 01:00:03.132540: step: 868/459, loss: 0.3085429072380066 2023-01-24 01:00:03.725552: step: 870/459, loss: 0.8016474843025208 2023-01-24 01:00:04.368372: step: 872/459, loss: 0.1399245411157608 2023-01-24 01:00:04.924193: step: 874/459, loss: 0.44792523980140686 2023-01-24 01:00:05.530764: step: 876/459, loss: 0.4044588804244995 2023-01-24 01:00:06.139106: step: 878/459, loss: 0.24657490849494934 2023-01-24 01:00:06.742566: step: 880/459, loss: 1.0320719480514526 2023-01-24 01:00:07.352250: step: 882/459, loss: 0.32922616600990295 2023-01-24 01:00:07.977705: step: 884/459, loss: 0.28696244955062866 2023-01-24 01:00:08.558003: step: 886/459, loss: 0.09543361514806747 2023-01-24 01:00:09.147951: step: 888/459, loss: 0.6437160968780518 2023-01-24 01:00:09.782424: step: 890/459, loss: 0.4286709725856781 2023-01-24 01:00:10.361633: step: 892/459, loss: 0.22211572527885437 2023-01-24 01:00:10.968316: step: 894/459, loss: 0.3104904592037201 2023-01-24 01:00:11.660044: step: 896/459, loss: 0.8770942687988281 2023-01-24 01:00:12.299492: step: 898/459, loss: 0.5505065321922302 2023-01-24 01:00:12.951656: step: 900/459, loss: 0.5629287958145142 2023-01-24 01:00:13.567630: step: 902/459, loss: 0.42847055196762085 2023-01-24 01:00:14.239629: step: 904/459, loss: 0.5198787450790405 2023-01-24 01:00:14.883286: step: 906/459, loss: 0.24539528787136078 2023-01-24 01:00:15.518470: step: 908/459, loss: 0.8026461005210876 2023-01-24 01:00:16.173966: step: 910/459, loss: 0.26092764735221863 2023-01-24 01:00:16.843524: step: 912/459, loss: 0.31397658586502075 2023-01-24 01:00:17.490399: step: 914/459, loss: 1.4474555253982544 2023-01-24 01:00:18.183605: step: 916/459, loss: 0.6467820405960083 2023-01-24 01:00:18.861438: step: 918/459, loss: 0.3088505268096924 2023-01-24 01:00:19.385989: step: 920/459, loss: 0.02756929211318493 ================================================== Loss: 0.568 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3198997802768456, 'r': 0.3326472098514448, 'f1': 0.32614898528690495}, 'combined': 0.24032030494824574, 'epoch': 7} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35490448230847327, 'r': 0.2908155603671065, 'f1': 0.31967954865291903}, 'combined': 0.20459491113786815, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31239552018406286, 'r': 0.3307717272537136, 'f1': 0.32132110647503603}, 'combined': 0.23676292056055284, 'epoch': 7} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3679075833384179, 'r': 0.29279033628656304, 'f1': 0.3260787777542117}, 'combined': 0.20869041776269545, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3277817707492864, 'r': 0.3352313564481338, 'f1': 0.33146471199366034}, 'combined': 0.24423715620585498, 'epoch': 7} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36968480374602203, 'r': 0.30644481911976895, 'f1': 0.3351073196145533}, 'combined': 0.24026562538401938, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.19886363636363635, 'r': 0.25, 'f1': 0.22151898734177214}, 'combined': 0.14767932489451474, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24107142857142858, 'r': 0.29347826086956524, 'f1': 0.2647058823529412}, 'combined': 0.1323529411764706, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.23529411764705882, 'r': 0.13793103448275862, 'f1': 0.17391304347826086}, 'combined': 0.11594202898550723, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3420992658657132, 'r': 0.32067749020429287, 'f1': 0.331042188712365}, 'combined': 0.24392582326174264, 'epoch': 5} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3383810640925978, 'r': 0.2494791299809971, 'f1': 0.28720778961705584}, 'combined': 0.18381298535491572, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2660984848484848, 'r': 0.3345238095238095, 'f1': 0.29641350210970463}, 'combined': 0.19760900140646975, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3125609955204797, 'r': 0.29657776279500064, 'f1': 0.30435968660011725}, 'combined': 0.22426503223166533, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34660410768177247, 'r': 0.27822857007545915, 'f1': 0.30867516599395367}, 'combined': 0.22131426995792908, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3958333333333333, 'r': 0.16379310344827586, 'f1': 0.23170731707317074}, 'combined': 0.15447154471544716, 'epoch': 4} ****************************** Epoch: 8 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:02:55.054608: step: 2/459, loss: 0.18039654195308685 2023-01-24 01:02:55.753845: step: 4/459, loss: 0.16765311360359192 2023-01-24 01:02:56.407591: step: 6/459, loss: 0.2904333174228668 2023-01-24 01:02:57.059190: step: 8/459, loss: 0.20816351473331451 2023-01-24 01:02:57.650825: step: 10/459, loss: 0.8620956540107727 2023-01-24 01:02:58.249180: step: 12/459, loss: 0.7453134059906006 2023-01-24 01:02:58.893641: step: 14/459, loss: 0.35998088121414185 2023-01-24 01:02:59.492618: step: 16/459, loss: 0.08643857389688492 2023-01-24 01:03:00.075022: step: 18/459, loss: 0.10129903256893158 2023-01-24 01:03:00.688189: step: 20/459, loss: 0.2946125864982605 2023-01-24 01:03:01.272909: step: 22/459, loss: 0.2419065237045288 2023-01-24 01:03:01.872510: step: 24/459, loss: 0.15470384061336517 2023-01-24 01:03:02.522754: step: 26/459, loss: 0.42476987838745117 2023-01-24 01:03:03.121781: step: 28/459, loss: 0.11591676622629166 2023-01-24 01:03:03.689375: step: 30/459, loss: 0.2387309968471527 2023-01-24 01:03:04.347434: step: 32/459, loss: 0.4435591995716095 2023-01-24 01:03:04.988094: step: 34/459, loss: 0.4407784938812256 2023-01-24 01:03:05.635218: step: 36/459, loss: 0.2733656167984009 2023-01-24 01:03:06.246788: step: 38/459, loss: 3.1299991607666016 2023-01-24 01:03:06.925739: step: 40/459, loss: 0.4894881844520569 2023-01-24 01:03:07.578568: step: 42/459, loss: 0.15391229093074799 2023-01-24 01:03:08.189613: step: 44/459, loss: 0.9259761571884155 2023-01-24 01:03:08.784572: step: 46/459, loss: 0.18635053932666779 2023-01-24 01:03:09.355317: step: 48/459, loss: 0.15961335599422455 2023-01-24 01:03:09.998629: step: 50/459, loss: 0.5486300587654114 2023-01-24 01:03:10.614703: step: 52/459, loss: 0.41316813230514526 2023-01-24 01:03:11.257672: step: 54/459, loss: 0.6223568320274353 2023-01-24 01:03:11.929601: step: 56/459, loss: 0.5599488019943237 2023-01-24 01:03:12.599408: step: 58/459, loss: 0.8780040740966797 2023-01-24 01:03:13.275126: step: 60/459, loss: 0.5186658501625061 2023-01-24 01:03:13.913363: step: 62/459, loss: 0.3754919767379761 2023-01-24 01:03:14.556115: step: 64/459, loss: 0.15609315037727356 2023-01-24 01:03:15.204153: step: 66/459, loss: 0.14898526668548584 2023-01-24 01:03:15.938406: step: 68/459, loss: 0.33197665214538574 2023-01-24 01:03:16.543837: step: 70/459, loss: 0.5781245231628418 2023-01-24 01:03:17.184722: step: 72/459, loss: 0.13677114248275757 2023-01-24 01:03:17.799465: step: 74/459, loss: 1.4945619106292725 2023-01-24 01:03:18.411678: step: 76/459, loss: 0.15927834808826447 2023-01-24 01:03:19.032924: step: 78/459, loss: 0.18429608643054962 2023-01-24 01:03:19.708207: step: 80/459, loss: 0.15387970209121704 2023-01-24 01:03:20.325387: step: 82/459, loss: 0.35744979977607727 2023-01-24 01:03:20.930203: step: 84/459, loss: 0.24467086791992188 2023-01-24 01:03:21.559051: step: 86/459, loss: 0.15248630940914154 2023-01-24 01:03:22.150966: step: 88/459, loss: 0.34590306878089905 2023-01-24 01:03:22.764024: step: 90/459, loss: 0.805512011051178 2023-01-24 01:03:23.364407: step: 92/459, loss: 0.11493372917175293 2023-01-24 01:03:24.029510: step: 94/459, loss: 0.4596560299396515 2023-01-24 01:03:24.625610: step: 96/459, loss: 0.15334013104438782 2023-01-24 01:03:25.262670: step: 98/459, loss: 0.17900390923023224 2023-01-24 01:03:25.868157: step: 100/459, loss: 0.12850184738636017 2023-01-24 01:03:26.544164: step: 102/459, loss: 1.7403380870819092 2023-01-24 01:03:27.117779: step: 104/459, loss: 0.8216527700424194 2023-01-24 01:03:27.733891: step: 106/459, loss: 0.21369841694831848 2023-01-24 01:03:28.327291: step: 108/459, loss: 0.13184423744678497 2023-01-24 01:03:29.032661: step: 110/459, loss: 0.2873223125934601 2023-01-24 01:03:29.653327: step: 112/459, loss: 0.33049821853637695 2023-01-24 01:03:30.288321: step: 114/459, loss: 0.7893746495246887 2023-01-24 01:03:30.914274: step: 116/459, loss: 0.16041220724582672 2023-01-24 01:03:31.554672: step: 118/459, loss: 0.3987002372741699 2023-01-24 01:03:32.177821: step: 120/459, loss: 0.29851818084716797 2023-01-24 01:03:32.794296: step: 122/459, loss: 0.3944944739341736 2023-01-24 01:03:33.370225: step: 124/459, loss: 0.13396988809108734 2023-01-24 01:03:33.969131: step: 126/459, loss: 0.14006800949573517 2023-01-24 01:03:34.643679: step: 128/459, loss: 0.20632731914520264 2023-01-24 01:03:35.317057: step: 130/459, loss: 0.10404180735349655 2023-01-24 01:03:35.965982: step: 132/459, loss: 0.22104781866073608 2023-01-24 01:03:36.570837: step: 134/459, loss: 1.2369471788406372 2023-01-24 01:03:37.167384: step: 136/459, loss: 0.14321711659431458 2023-01-24 01:03:37.774966: step: 138/459, loss: 0.3332552909851074 2023-01-24 01:03:38.391654: step: 140/459, loss: 0.24666890501976013 2023-01-24 01:03:38.965228: step: 142/459, loss: 0.24726435542106628 2023-01-24 01:03:39.521556: step: 144/459, loss: 0.33278051018714905 2023-01-24 01:03:40.135634: step: 146/459, loss: 0.30680084228515625 2023-01-24 01:03:40.738940: step: 148/459, loss: 0.11443520337343216 2023-01-24 01:03:41.345149: step: 150/459, loss: 0.1604049801826477 2023-01-24 01:03:42.001814: step: 152/459, loss: 0.44124624133110046 2023-01-24 01:03:42.534068: step: 154/459, loss: 0.394248366355896 2023-01-24 01:03:43.155653: step: 156/459, loss: 0.27334481477737427 2023-01-24 01:03:43.695487: step: 158/459, loss: 0.5904852151870728 2023-01-24 01:03:44.294601: step: 160/459, loss: 0.6847115159034729 2023-01-24 01:03:44.906509: step: 162/459, loss: 0.16836515069007874 2023-01-24 01:03:45.535164: step: 164/459, loss: 0.15278226137161255 2023-01-24 01:03:46.135619: step: 166/459, loss: 0.17848290503025055 2023-01-24 01:03:46.748473: step: 168/459, loss: 0.15454792976379395 2023-01-24 01:03:47.407681: step: 170/459, loss: 0.6942780017852783 2023-01-24 01:03:48.027780: step: 172/459, loss: 0.13670803606510162 2023-01-24 01:03:48.649789: step: 174/459, loss: 0.12223079800605774 2023-01-24 01:03:49.240905: step: 176/459, loss: 0.7234348058700562 2023-01-24 01:03:49.880010: step: 178/459, loss: 0.7314285039901733 2023-01-24 01:03:50.580182: step: 180/459, loss: 1.5688786506652832 2023-01-24 01:03:51.135191: step: 182/459, loss: 0.21637144684791565 2023-01-24 01:03:51.764605: step: 184/459, loss: 0.08612131327390671 2023-01-24 01:03:52.401709: step: 186/459, loss: 0.17138414084911346 2023-01-24 01:03:53.023646: step: 188/459, loss: 0.7112336754798889 2023-01-24 01:03:53.667883: step: 190/459, loss: 0.16903312504291534 2023-01-24 01:03:54.312735: step: 192/459, loss: 0.3746127784252167 2023-01-24 01:03:54.918400: step: 194/459, loss: 0.19575753808021545 2023-01-24 01:03:55.524062: step: 196/459, loss: 1.7990565299987793 2023-01-24 01:03:56.141943: step: 198/459, loss: 0.3257383406162262 2023-01-24 01:03:56.759664: step: 200/459, loss: 0.1694483608007431 2023-01-24 01:03:57.363204: step: 202/459, loss: 0.46640875935554504 2023-01-24 01:03:57.952205: step: 204/459, loss: 0.5984710454940796 2023-01-24 01:03:58.566410: step: 206/459, loss: 0.23730304837226868 2023-01-24 01:03:59.153927: step: 208/459, loss: 0.21183006465435028 2023-01-24 01:03:59.758451: step: 210/459, loss: 0.3088420033454895 2023-01-24 01:04:00.275076: step: 212/459, loss: 0.12316007912158966 2023-01-24 01:04:00.885962: step: 214/459, loss: 0.24646586179733276 2023-01-24 01:04:01.536199: step: 216/459, loss: 0.5027552247047424 2023-01-24 01:04:02.089468: step: 218/459, loss: 0.6890619993209839 2023-01-24 01:04:02.652307: step: 220/459, loss: 0.20084789395332336 2023-01-24 01:04:03.280498: step: 222/459, loss: 0.3335568904876709 2023-01-24 01:04:03.915107: step: 224/459, loss: 0.23138853907585144 2023-01-24 01:04:04.503539: step: 226/459, loss: 0.34828606247901917 2023-01-24 01:04:05.136480: step: 228/459, loss: 0.2883504033088684 2023-01-24 01:04:05.894008: step: 230/459, loss: 0.12196468561887741 2023-01-24 01:04:06.564011: step: 232/459, loss: 0.18726399540901184 2023-01-24 01:04:07.140732: step: 234/459, loss: 0.40320825576782227 2023-01-24 01:04:07.712945: step: 236/459, loss: 0.12347883731126785 2023-01-24 01:04:08.359105: step: 238/459, loss: 0.3936395049095154 2023-01-24 01:04:08.934265: step: 240/459, loss: 0.17490941286087036 2023-01-24 01:04:09.513925: step: 242/459, loss: 0.09399944543838501 2023-01-24 01:04:10.115183: step: 244/459, loss: 0.17052456736564636 2023-01-24 01:04:10.787565: step: 246/459, loss: 0.15133370459079742 2023-01-24 01:04:11.427311: step: 248/459, loss: 0.18924100697040558 2023-01-24 01:04:12.084548: step: 250/459, loss: 1.4516704082489014 2023-01-24 01:04:12.676059: step: 252/459, loss: 0.5956273078918457 2023-01-24 01:04:13.291200: step: 254/459, loss: 0.28370797634124756 2023-01-24 01:04:13.914431: step: 256/459, loss: 0.0947292149066925 2023-01-24 01:04:14.537964: step: 258/459, loss: 0.4110047221183777 2023-01-24 01:04:15.194465: step: 260/459, loss: 0.8702539205551147 2023-01-24 01:04:15.808342: step: 262/459, loss: 0.19981034100055695 2023-01-24 01:04:16.389846: step: 264/459, loss: 0.15529608726501465 2023-01-24 01:04:17.078233: step: 266/459, loss: 0.6634738445281982 2023-01-24 01:04:17.692997: step: 268/459, loss: 0.24748064577579498 2023-01-24 01:04:18.342887: step: 270/459, loss: 1.188869833946228 2023-01-24 01:04:18.991226: step: 272/459, loss: 0.08686104416847229 2023-01-24 01:04:19.660077: step: 274/459, loss: 0.34953930974006653 2023-01-24 01:04:20.289656: step: 276/459, loss: 0.20151148736476898 2023-01-24 01:04:20.878320: step: 278/459, loss: 0.7196388840675354 2023-01-24 01:04:21.507411: step: 280/459, loss: 0.5773730278015137 2023-01-24 01:04:22.067327: step: 282/459, loss: 1.0163421630859375 2023-01-24 01:04:22.629075: step: 284/459, loss: 0.14065498113632202 2023-01-24 01:04:23.350613: step: 286/459, loss: 1.1525304317474365 2023-01-24 01:04:23.989415: step: 288/459, loss: 0.23744353652000427 2023-01-24 01:04:24.570979: step: 290/459, loss: 0.45024436712265015 2023-01-24 01:04:25.198674: step: 292/459, loss: 0.21640464663505554 2023-01-24 01:04:25.811169: step: 294/459, loss: 0.2410629689693451 2023-01-24 01:04:26.457774: step: 296/459, loss: 0.4177993834018707 2023-01-24 01:04:27.054909: step: 298/459, loss: 0.15245240926742554 2023-01-24 01:04:27.641492: step: 300/459, loss: 0.207415372133255 2023-01-24 01:04:28.286471: step: 302/459, loss: 1.2372016906738281 2023-01-24 01:04:28.896128: step: 304/459, loss: 0.4108394682407379 2023-01-24 01:04:29.516194: step: 306/459, loss: 0.37765631079673767 2023-01-24 01:04:30.119391: step: 308/459, loss: 0.20706279575824738 2023-01-24 01:04:30.711438: step: 310/459, loss: 0.3605620563030243 2023-01-24 01:04:31.292699: step: 312/459, loss: 0.5049048662185669 2023-01-24 01:04:31.887708: step: 314/459, loss: 0.20071172714233398 2023-01-24 01:04:32.570812: step: 316/459, loss: 0.6031753420829773 2023-01-24 01:04:33.167921: step: 318/459, loss: 0.6137513518333435 2023-01-24 01:04:33.811491: step: 320/459, loss: 0.526211678981781 2023-01-24 01:04:34.399749: step: 322/459, loss: 0.13956096768379211 2023-01-24 01:04:35.022895: step: 324/459, loss: 0.5018689036369324 2023-01-24 01:04:35.660402: step: 326/459, loss: 0.22387483716011047 2023-01-24 01:04:36.297084: step: 328/459, loss: 0.3183661103248596 2023-01-24 01:04:36.929189: step: 330/459, loss: 0.6441217660903931 2023-01-24 01:04:37.621680: step: 332/459, loss: 0.16685046255588531 2023-01-24 01:04:38.281129: step: 334/459, loss: 2.5086755752563477 2023-01-24 01:04:38.934619: step: 336/459, loss: 0.24141764640808105 2023-01-24 01:04:39.572271: step: 338/459, loss: 0.13190951943397522 2023-01-24 01:04:40.119569: step: 340/459, loss: 0.08184415102005005 2023-01-24 01:04:40.801455: step: 342/459, loss: 0.3513845205307007 2023-01-24 01:04:41.387397: step: 344/459, loss: 0.08365323394536972 2023-01-24 01:04:41.946728: step: 346/459, loss: 0.14875653386116028 2023-01-24 01:04:42.528450: step: 348/459, loss: 0.19385872781276703 2023-01-24 01:04:43.164576: step: 350/459, loss: 0.32247474789619446 2023-01-24 01:04:43.679674: step: 352/459, loss: 0.18310263752937317 2023-01-24 01:04:44.263623: step: 354/459, loss: 1.2801804542541504 2023-01-24 01:04:44.874635: step: 356/459, loss: 0.17687837779521942 2023-01-24 01:04:45.490508: step: 358/459, loss: 0.18701013922691345 2023-01-24 01:04:46.145310: step: 360/459, loss: 0.0778094232082367 2023-01-24 01:04:46.786700: step: 362/459, loss: 0.22566929459571838 2023-01-24 01:04:47.448919: step: 364/459, loss: 0.10951486229896545 2023-01-24 01:04:48.037445: step: 366/459, loss: 0.719797670841217 2023-01-24 01:04:48.704490: step: 368/459, loss: 0.14247171580791473 2023-01-24 01:04:49.348338: step: 370/459, loss: 0.2509130537509918 2023-01-24 01:04:49.973777: step: 372/459, loss: 0.1781395673751831 2023-01-24 01:04:50.576625: step: 374/459, loss: 0.35739865899086 2023-01-24 01:04:51.213155: step: 376/459, loss: 0.14378513395786285 2023-01-24 01:04:51.838086: step: 378/459, loss: 0.2825417220592499 2023-01-24 01:04:52.479376: step: 380/459, loss: 0.4980061948299408 2023-01-24 01:04:53.066389: step: 382/459, loss: 0.09487826377153397 2023-01-24 01:04:53.733205: step: 384/459, loss: 0.1638140231370926 2023-01-24 01:04:54.318192: step: 386/459, loss: 0.19358937442302704 2023-01-24 01:04:54.942475: step: 388/459, loss: 0.18182730674743652 2023-01-24 01:04:55.544009: step: 390/459, loss: 0.13522806763648987 2023-01-24 01:04:56.199278: step: 392/459, loss: 0.12003424763679504 2023-01-24 01:04:56.814755: step: 394/459, loss: 0.1545122116804123 2023-01-24 01:04:57.411716: step: 396/459, loss: 0.11498784273862839 2023-01-24 01:04:58.044135: step: 398/459, loss: 0.2597713768482208 2023-01-24 01:04:58.649352: step: 400/459, loss: 0.42967215180397034 2023-01-24 01:04:59.318028: step: 402/459, loss: 1.2871953248977661 2023-01-24 01:04:59.946143: step: 404/459, loss: 0.8162389993667603 2023-01-24 01:05:00.509824: step: 406/459, loss: 0.12798656523227692 2023-01-24 01:05:01.066155: step: 408/459, loss: 0.6826269626617432 2023-01-24 01:05:01.679762: step: 410/459, loss: 0.1981491893529892 2023-01-24 01:05:02.270890: step: 412/459, loss: 0.28634753823280334 2023-01-24 01:05:02.944911: step: 414/459, loss: 0.20611776411533356 2023-01-24 01:05:03.590585: step: 416/459, loss: 0.4466075003147125 2023-01-24 01:05:04.192409: step: 418/459, loss: 0.6910675168037415 2023-01-24 01:05:04.797163: step: 420/459, loss: 0.33422625064849854 2023-01-24 01:05:05.439707: step: 422/459, loss: 0.44213414192199707 2023-01-24 01:05:06.160016: step: 424/459, loss: 0.1854037195444107 2023-01-24 01:05:06.774732: step: 426/459, loss: 1.2895921468734741 2023-01-24 01:05:07.358486: step: 428/459, loss: 0.44895169138908386 2023-01-24 01:05:08.040540: step: 430/459, loss: 0.47139620780944824 2023-01-24 01:05:08.644374: step: 432/459, loss: 0.23197150230407715 2023-01-24 01:05:09.240513: step: 434/459, loss: 0.3928978741168976 2023-01-24 01:05:09.943223: step: 436/459, loss: 0.1844196617603302 2023-01-24 01:05:10.581430: step: 438/459, loss: 0.6571303606033325 2023-01-24 01:05:11.152738: step: 440/459, loss: 0.2600710391998291 2023-01-24 01:05:11.827354: step: 442/459, loss: 0.09951509535312653 2023-01-24 01:05:12.495594: step: 444/459, loss: 0.41797706484794617 2023-01-24 01:05:13.088599: step: 446/459, loss: 0.18515171110630035 2023-01-24 01:05:13.721124: step: 448/459, loss: 0.1780686229467392 2023-01-24 01:05:14.383558: step: 450/459, loss: 2.3774335384368896 2023-01-24 01:05:15.098365: step: 452/459, loss: 0.09123805165290833 2023-01-24 01:05:15.747342: step: 454/459, loss: 0.35055965185165405 2023-01-24 01:05:16.350783: step: 456/459, loss: 0.5548540949821472 2023-01-24 01:05:17.045651: step: 458/459, loss: 1.1524410247802734 2023-01-24 01:05:17.625251: step: 460/459, loss: 0.4023057818412781 2023-01-24 01:05:18.250849: step: 462/459, loss: 0.18431460857391357 2023-01-24 01:05:18.891577: step: 464/459, loss: 0.33067265152931213 2023-01-24 01:05:19.544026: step: 466/459, loss: 0.677057147026062 2023-01-24 01:05:20.215878: step: 468/459, loss: 0.41886287927627563 2023-01-24 01:05:20.832829: step: 470/459, loss: 0.5934765934944153 2023-01-24 01:05:21.461694: step: 472/459, loss: 0.7634115219116211 2023-01-24 01:05:22.074618: step: 474/459, loss: 0.31536149978637695 2023-01-24 01:05:22.680785: step: 476/459, loss: 0.15814140439033508 2023-01-24 01:05:23.308297: step: 478/459, loss: 0.7651868462562561 2023-01-24 01:05:23.927738: step: 480/459, loss: 0.5521712303161621 2023-01-24 01:05:24.571518: step: 482/459, loss: 0.2258283495903015 2023-01-24 01:05:25.168981: step: 484/459, loss: 0.2894814908504486 2023-01-24 01:05:25.853085: step: 486/459, loss: 0.15028543770313263 2023-01-24 01:05:26.478231: step: 488/459, loss: 0.2247893214225769 2023-01-24 01:05:27.087328: step: 490/459, loss: 0.18406257033348083 2023-01-24 01:05:27.720945: step: 492/459, loss: 0.38161420822143555 2023-01-24 01:05:28.364644: step: 494/459, loss: 0.6951675415039062 2023-01-24 01:05:28.969420: step: 496/459, loss: 0.3234576880931854 2023-01-24 01:05:29.582595: step: 498/459, loss: 0.2894826829433441 2023-01-24 01:05:30.214348: step: 500/459, loss: 0.17935915291309357 2023-01-24 01:05:30.821123: step: 502/459, loss: 0.3712400197982788 2023-01-24 01:05:31.443292: step: 504/459, loss: 0.8673088550567627 2023-01-24 01:05:32.072038: step: 506/459, loss: 0.5508483648300171 2023-01-24 01:05:32.677966: step: 508/459, loss: 0.2756122648715973 2023-01-24 01:05:33.332827: step: 510/459, loss: 0.2856925427913666 2023-01-24 01:05:34.006620: step: 512/459, loss: 0.09698739647865295 2023-01-24 01:05:34.631871: step: 514/459, loss: 0.6220617890357971 2023-01-24 01:05:35.291149: step: 516/459, loss: 0.19669033586978912 2023-01-24 01:05:35.941928: step: 518/459, loss: 0.6912922263145447 2023-01-24 01:05:36.561986: step: 520/459, loss: 0.4407818913459778 2023-01-24 01:05:37.154731: step: 522/459, loss: 0.37258362770080566 2023-01-24 01:05:37.798428: step: 524/459, loss: 1.1093336343765259 2023-01-24 01:05:38.413283: step: 526/459, loss: 0.15341244637966156 2023-01-24 01:05:39.087817: step: 528/459, loss: 0.48238080739974976 2023-01-24 01:05:39.698208: step: 530/459, loss: 0.18430449068546295 2023-01-24 01:05:40.322700: step: 532/459, loss: 0.5843371152877808 2023-01-24 01:05:40.927636: step: 534/459, loss: 0.6429200768470764 2023-01-24 01:05:41.590894: step: 536/459, loss: 1.0304664373397827 2023-01-24 01:05:42.199746: step: 538/459, loss: 0.7883310317993164 2023-01-24 01:05:42.820290: step: 540/459, loss: 0.36959606409072876 2023-01-24 01:05:43.436634: step: 542/459, loss: 7.552517890930176 2023-01-24 01:05:44.165724: step: 544/459, loss: 0.2168189138174057 2023-01-24 01:05:44.790677: step: 546/459, loss: 0.32264742255210876 2023-01-24 01:05:45.370568: step: 548/459, loss: 0.5107887387275696 2023-01-24 01:05:46.065816: step: 550/459, loss: 11.453082084655762 2023-01-24 01:05:46.687743: step: 552/459, loss: 0.16538244485855103 2023-01-24 01:05:47.302442: step: 554/459, loss: 0.1701687127351761 2023-01-24 01:05:47.940878: step: 556/459, loss: 0.1407277137041092 2023-01-24 01:05:48.546341: step: 558/459, loss: 0.15756656229496002 2023-01-24 01:05:49.168980: step: 560/459, loss: 0.18394441902637482 2023-01-24 01:05:49.852158: step: 562/459, loss: 0.5148423314094543 2023-01-24 01:05:50.435582: step: 564/459, loss: 0.2938549220561981 2023-01-24 01:05:51.039399: step: 566/459, loss: 0.21858324110507965 2023-01-24 01:05:51.749902: step: 568/459, loss: 0.25872209668159485 2023-01-24 01:05:52.395380: step: 570/459, loss: 1.027198314666748 2023-01-24 01:05:53.021592: step: 572/459, loss: 0.21142911911010742 2023-01-24 01:05:53.633713: step: 574/459, loss: 0.43394842743873596 2023-01-24 01:05:54.232536: step: 576/459, loss: 0.4301047623157501 2023-01-24 01:05:54.810262: step: 578/459, loss: 0.2465490996837616 2023-01-24 01:05:55.430495: step: 580/459, loss: 0.19614142179489136 2023-01-24 01:05:56.058967: step: 582/459, loss: 1.2145421504974365 2023-01-24 01:05:56.689755: step: 584/459, loss: 0.16925440728664398 2023-01-24 01:05:57.260934: step: 586/459, loss: 0.4093548357486725 2023-01-24 01:05:57.911280: step: 588/459, loss: 0.7979206442832947 2023-01-24 01:05:58.465202: step: 590/459, loss: 0.7618283033370972 2023-01-24 01:05:59.117578: step: 592/459, loss: 0.608021080493927 2023-01-24 01:05:59.771814: step: 594/459, loss: 0.5015140175819397 2023-01-24 01:06:00.379000: step: 596/459, loss: 0.3321842849254608 2023-01-24 01:06:01.020197: step: 598/459, loss: 0.6244779229164124 2023-01-24 01:06:01.658958: step: 600/459, loss: 0.2893461287021637 2023-01-24 01:06:02.249903: step: 602/459, loss: 0.132235586643219 2023-01-24 01:06:02.788239: step: 604/459, loss: 0.2363666296005249 2023-01-24 01:06:03.400387: step: 606/459, loss: 0.31504762172698975 2023-01-24 01:06:04.060169: step: 608/459, loss: 0.2763802409172058 2023-01-24 01:06:04.653466: step: 610/459, loss: 0.3443502187728882 2023-01-24 01:06:05.230537: step: 612/459, loss: 1.0688430070877075 2023-01-24 01:06:05.886683: step: 614/459, loss: 0.1815289556980133 2023-01-24 01:06:06.556218: step: 616/459, loss: 0.44782310724258423 2023-01-24 01:06:07.179790: step: 618/459, loss: 0.0968952625989914 2023-01-24 01:06:07.746331: step: 620/459, loss: 1.053239345550537 2023-01-24 01:06:08.374047: step: 622/459, loss: 0.7677373886108398 2023-01-24 01:06:08.962469: step: 624/459, loss: 0.22508327662944794 2023-01-24 01:06:09.574174: step: 626/459, loss: 2.037318229675293 2023-01-24 01:06:10.227882: step: 628/459, loss: 0.48524755239486694 2023-01-24 01:06:10.878549: step: 630/459, loss: 0.2922484576702118 2023-01-24 01:06:11.494281: step: 632/459, loss: 0.23703831434249878 2023-01-24 01:06:12.162346: step: 634/459, loss: 0.6302182674407959 2023-01-24 01:06:12.759571: step: 636/459, loss: 0.46569639444351196 2023-01-24 01:06:13.362654: step: 638/459, loss: 0.23130406439304352 2023-01-24 01:06:13.950629: step: 640/459, loss: 0.10850957036018372 2023-01-24 01:06:14.646654: step: 642/459, loss: 0.47394704818725586 2023-01-24 01:06:15.334284: step: 644/459, loss: 0.2031172513961792 2023-01-24 01:06:15.918285: step: 646/459, loss: 0.5884383916854858 2023-01-24 01:06:16.514851: step: 648/459, loss: 0.11349011212587357 2023-01-24 01:06:17.093264: step: 650/459, loss: 0.10355081409215927 2023-01-24 01:06:17.693161: step: 652/459, loss: 0.25856927037239075 2023-01-24 01:06:18.359409: step: 654/459, loss: 0.08802769333124161 2023-01-24 01:06:18.987697: step: 656/459, loss: 0.19306515157222748 2023-01-24 01:06:19.598618: step: 658/459, loss: 1.1434768438339233 2023-01-24 01:06:20.188026: step: 660/459, loss: 0.43385830521583557 2023-01-24 01:06:20.802855: step: 662/459, loss: 0.15163977444171906 2023-01-24 01:06:21.403570: step: 664/459, loss: 0.8342099189758301 2023-01-24 01:06:21.966076: step: 666/459, loss: 0.10318244993686676 2023-01-24 01:06:22.589229: step: 668/459, loss: 0.10682031512260437 2023-01-24 01:06:23.307303: step: 670/459, loss: 0.5322294235229492 2023-01-24 01:06:24.023585: step: 672/459, loss: 0.21737299859523773 2023-01-24 01:06:24.624734: step: 674/459, loss: 0.30536869168281555 2023-01-24 01:06:25.226933: step: 676/459, loss: 0.15687422454357147 2023-01-24 01:06:25.836476: step: 678/459, loss: 0.3618537187576294 2023-01-24 01:06:26.490345: step: 680/459, loss: 0.10778168588876724 2023-01-24 01:06:27.112242: step: 682/459, loss: 0.4329511523246765 2023-01-24 01:06:27.817976: step: 684/459, loss: 0.19837552309036255 2023-01-24 01:06:28.452074: step: 686/459, loss: 0.4078046679496765 2023-01-24 01:06:29.128390: step: 688/459, loss: 0.14107029139995575 2023-01-24 01:06:29.805874: step: 690/459, loss: 0.6692290902137756 2023-01-24 01:06:30.448750: step: 692/459, loss: 0.26380112767219543 2023-01-24 01:06:31.109395: step: 694/459, loss: 0.16800212860107422 2023-01-24 01:06:31.749559: step: 696/459, loss: 0.41059309244155884 2023-01-24 01:06:32.287966: step: 698/459, loss: 0.2811278998851776 2023-01-24 01:06:32.980788: step: 700/459, loss: 0.3261243999004364 2023-01-24 01:06:33.621431: step: 702/459, loss: 0.2332344502210617 2023-01-24 01:06:34.212955: step: 704/459, loss: 0.6889300346374512 2023-01-24 01:06:34.834174: step: 706/459, loss: 0.2903198301792145 2023-01-24 01:06:35.444448: step: 708/459, loss: 0.6163758635520935 2023-01-24 01:06:36.030829: step: 710/459, loss: 0.353803813457489 2023-01-24 01:06:36.616718: step: 712/459, loss: 0.14322584867477417 2023-01-24 01:06:37.285796: step: 714/459, loss: 0.1979507952928543 2023-01-24 01:06:37.927970: step: 716/459, loss: 0.16458038985729218 2023-01-24 01:06:38.520489: step: 718/459, loss: 0.45715001225471497 2023-01-24 01:06:39.058714: step: 720/459, loss: 0.18958301842212677 2023-01-24 01:06:39.798842: step: 722/459, loss: 0.8252791166305542 2023-01-24 01:06:40.425223: step: 724/459, loss: 1.6945916414260864 2023-01-24 01:06:41.024973: step: 726/459, loss: 1.9997141361236572 2023-01-24 01:06:41.685961: step: 728/459, loss: 0.22970955073833466 2023-01-24 01:06:42.289167: step: 730/459, loss: 0.20356518030166626 2023-01-24 01:06:42.902760: step: 732/459, loss: 0.5126933455467224 2023-01-24 01:06:43.482346: step: 734/459, loss: 0.08899194747209549 2023-01-24 01:06:44.129762: step: 736/459, loss: 0.21081678569316864 2023-01-24 01:06:44.731301: step: 738/459, loss: 0.5657496452331543 2023-01-24 01:06:45.349247: step: 740/459, loss: 0.10597952455282211 2023-01-24 01:06:45.935002: step: 742/459, loss: 1.0768729448318481 2023-01-24 01:06:46.519422: step: 744/459, loss: 0.18865738809108734 2023-01-24 01:06:47.117304: step: 746/459, loss: 0.06863231956958771 2023-01-24 01:06:47.793903: step: 748/459, loss: 0.053257960826158524 2023-01-24 01:06:48.349376: step: 750/459, loss: 0.07478757202625275 2023-01-24 01:06:48.973194: step: 752/459, loss: 0.13609221577644348 2023-01-24 01:06:49.597971: step: 754/459, loss: 0.35969313979148865 2023-01-24 01:06:50.155312: step: 756/459, loss: 1.7018325328826904 2023-01-24 01:06:50.760826: step: 758/459, loss: 0.5019904971122742 2023-01-24 01:06:51.377563: step: 760/459, loss: 0.16873160004615784 2023-01-24 01:06:52.069086: step: 762/459, loss: 0.43425196409225464 2023-01-24 01:06:52.629935: step: 764/459, loss: 0.1474255472421646 2023-01-24 01:06:53.213851: step: 766/459, loss: 1.2030750513076782 2023-01-24 01:06:53.797918: step: 768/459, loss: 0.1565702110528946 2023-01-24 01:06:54.378559: step: 770/459, loss: 0.28595197200775146 2023-01-24 01:06:55.002837: step: 772/459, loss: 0.2812767028808594 2023-01-24 01:06:55.613433: step: 774/459, loss: 0.3592333197593689 2023-01-24 01:06:56.211317: step: 776/459, loss: 0.5151511430740356 2023-01-24 01:06:56.892478: step: 778/459, loss: 0.30047035217285156 2023-01-24 01:06:57.501165: step: 780/459, loss: 0.17508606612682343 2023-01-24 01:06:58.027732: step: 782/459, loss: 0.1449691653251648 2023-01-24 01:06:58.645283: step: 784/459, loss: 1.132495641708374 2023-01-24 01:06:59.261018: step: 786/459, loss: 0.47848600149154663 2023-01-24 01:06:59.879237: step: 788/459, loss: 0.48225653171539307 2023-01-24 01:07:00.580391: step: 790/459, loss: 0.36345240473747253 2023-01-24 01:07:01.261598: step: 792/459, loss: 0.5792774558067322 2023-01-24 01:07:01.833246: step: 794/459, loss: 0.44768813252449036 2023-01-24 01:07:02.534186: step: 796/459, loss: 1.195369839668274 2023-01-24 01:07:03.176793: step: 798/459, loss: 0.7254694700241089 2023-01-24 01:07:03.741316: step: 800/459, loss: 0.1919122189283371 2023-01-24 01:07:04.385332: step: 802/459, loss: 0.42833006381988525 2023-01-24 01:07:05.096462: step: 804/459, loss: 0.663776159286499 2023-01-24 01:07:05.724618: step: 806/459, loss: 0.289635568857193 2023-01-24 01:07:06.407990: step: 808/459, loss: 0.3906850814819336 2023-01-24 01:07:07.039618: step: 810/459, loss: 0.1701679676771164 2023-01-24 01:07:07.666602: step: 812/459, loss: 0.12335324287414551 2023-01-24 01:07:08.249474: step: 814/459, loss: 0.2650189995765686 2023-01-24 01:07:08.959443: step: 816/459, loss: 0.39981192350387573 2023-01-24 01:07:09.540952: step: 818/459, loss: 0.5281462073326111 2023-01-24 01:07:10.155510: step: 820/459, loss: 0.913316547870636 2023-01-24 01:07:10.827475: step: 822/459, loss: 0.280655175447464 2023-01-24 01:07:11.457018: step: 824/459, loss: 0.16462141275405884 2023-01-24 01:07:12.102004: step: 826/459, loss: 1.5546847581863403 2023-01-24 01:07:12.716623: step: 828/459, loss: 0.19112418591976166 2023-01-24 01:07:13.291812: step: 830/459, loss: 0.4018186926841736 2023-01-24 01:07:13.966513: step: 832/459, loss: 0.4416646361351013 2023-01-24 01:07:14.533389: step: 834/459, loss: 0.9240483641624451 2023-01-24 01:07:15.116277: step: 836/459, loss: 0.41853979229927063 2023-01-24 01:07:15.745526: step: 838/459, loss: 0.34208613634109497 2023-01-24 01:07:16.375625: step: 840/459, loss: 0.22082556784152985 2023-01-24 01:07:16.965838: step: 842/459, loss: 0.19554832577705383 2023-01-24 01:07:17.587048: step: 844/459, loss: 0.6432961225509644 2023-01-24 01:07:18.198299: step: 846/459, loss: 0.15515032410621643 2023-01-24 01:07:18.874645: step: 848/459, loss: 0.5171859860420227 2023-01-24 01:07:19.509493: step: 850/459, loss: 0.3214985132217407 2023-01-24 01:07:20.087745: step: 852/459, loss: 0.3268499970436096 2023-01-24 01:07:20.677160: step: 854/459, loss: 0.9382930994033813 2023-01-24 01:07:21.289348: step: 856/459, loss: 1.2264169454574585 2023-01-24 01:07:21.874444: step: 858/459, loss: 0.14028987288475037 2023-01-24 01:07:22.517659: step: 860/459, loss: 0.27962589263916016 2023-01-24 01:07:23.098757: step: 862/459, loss: 0.2552591562271118 2023-01-24 01:07:23.789161: step: 864/459, loss: 0.6662813425064087 2023-01-24 01:07:24.436248: step: 866/459, loss: 0.23777727782726288 2023-01-24 01:07:25.115123: step: 868/459, loss: 0.21679410338401794 2023-01-24 01:07:25.699266: step: 870/459, loss: 0.17414627969264984 2023-01-24 01:07:26.321825: step: 872/459, loss: 0.4221573770046234 2023-01-24 01:07:26.924806: step: 874/459, loss: 0.43969395756721497 2023-01-24 01:07:27.567641: step: 876/459, loss: 0.4248467683792114 2023-01-24 01:07:28.111454: step: 878/459, loss: 0.24486185610294342 2023-01-24 01:07:28.740714: step: 880/459, loss: 0.1396687775850296 2023-01-24 01:07:29.335278: step: 882/459, loss: 0.3761585056781769 2023-01-24 01:07:29.965155: step: 884/459, loss: 0.18400150537490845 2023-01-24 01:07:30.512443: step: 886/459, loss: 0.6799338459968567 2023-01-24 01:07:31.241808: step: 888/459, loss: 0.7146721482276917 2023-01-24 01:07:31.882654: step: 890/459, loss: 0.295459121465683 2023-01-24 01:07:32.520642: step: 892/459, loss: 0.3376745879650116 2023-01-24 01:07:33.115005: step: 894/459, loss: 0.14085602760314941 2023-01-24 01:07:33.814279: step: 896/459, loss: 0.9034574627876282 2023-01-24 01:07:34.516737: step: 898/459, loss: 0.39558085799217224 2023-01-24 01:07:35.099362: step: 900/459, loss: 0.1396629512310028 2023-01-24 01:07:35.715921: step: 902/459, loss: 0.7168062329292297 2023-01-24 01:07:36.409871: step: 904/459, loss: 0.6129047870635986 2023-01-24 01:07:36.996201: step: 906/459, loss: 0.43074819445610046 2023-01-24 01:07:37.618369: step: 908/459, loss: 0.19861727952957153 2023-01-24 01:07:38.248041: step: 910/459, loss: 0.830756425857544 2023-01-24 01:07:38.953500: step: 912/459, loss: 0.19634170830249786 2023-01-24 01:07:39.588442: step: 914/459, loss: 0.8808959126472473 2023-01-24 01:07:40.154814: step: 916/459, loss: 0.1429353654384613 2023-01-24 01:07:40.745799: step: 918/459, loss: 0.2917105257511139 2023-01-24 01:07:41.199620: step: 920/459, loss: 0.030388738960027695 ================================================== Loss: 0.458 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33142113229712633, 'r': 0.31632794980162154, 'f1': 0.3236986981465137}, 'combined': 0.23851483021322062, 'epoch': 8} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3576385645707845, 'r': 0.29776095230415633, 'f1': 0.32496453483839904}, 'combined': 0.20797730229657535, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32408123791102517, 'r': 0.3179316888045541, 'f1': 0.3209770114942529}, 'combined': 0.2365093768905021, 'epoch': 8} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35647133704655776, 'r': 0.29263056123003783, 'f1': 0.3214115001028873}, 'combined': 0.20570336006584786, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.29861111111111105, 'r': 0.25595238095238093, 'f1': 0.2756410256410256}, 'combined': 0.18376068376068372, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2708333333333333, 'r': 0.2826086956521739, 'f1': 0.2765957446808511}, 'combined': 0.13829787234042554, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3420992658657132, 'r': 0.32067749020429287, 'f1': 0.331042188712365}, 'combined': 0.24392582326174264, 'epoch': 5} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3383810640925978, 'r': 0.2494791299809971, 'f1': 0.28720778961705584}, 'combined': 0.18381298535491572, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2660984848484848, 'r': 0.3345238095238095, 'f1': 0.29641350210970463}, 'combined': 0.19760900140646975, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 9 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:10:24.041198: step: 2/459, loss: 0.17335596680641174 2023-01-24 01:10:24.787722: step: 4/459, loss: 0.8028193712234497 2023-01-24 01:10:25.388330: step: 6/459, loss: 0.2405860722064972 2023-01-24 01:10:25.990208: step: 8/459, loss: 0.2641085088253021 2023-01-24 01:10:26.605832: step: 10/459, loss: 0.2775120735168457 2023-01-24 01:10:27.181134: step: 12/459, loss: 0.8880284428596497 2023-01-24 01:10:27.837912: step: 14/459, loss: 0.08282487839460373 2023-01-24 01:10:28.409509: step: 16/459, loss: 0.10490041226148605 2023-01-24 01:10:29.064991: step: 18/459, loss: 0.08039629459381104 2023-01-24 01:10:29.701612: step: 20/459, loss: 0.0695812776684761 2023-01-24 01:10:30.314537: step: 22/459, loss: 0.07460862398147583 2023-01-24 01:10:31.013391: step: 24/459, loss: 0.22900894284248352 2023-01-24 01:10:31.594088: step: 26/459, loss: 0.1540677547454834 2023-01-24 01:10:32.233720: step: 28/459, loss: 0.16876423358917236 2023-01-24 01:10:32.840599: step: 30/459, loss: 0.34678924083709717 2023-01-24 01:10:33.493566: step: 32/459, loss: 0.5181834101676941 2023-01-24 01:10:34.110447: step: 34/459, loss: 1.8750101327896118 2023-01-24 01:10:34.816855: step: 36/459, loss: 0.1816500872373581 2023-01-24 01:10:35.473755: step: 38/459, loss: 0.37752148509025574 2023-01-24 01:10:36.105066: step: 40/459, loss: 0.22020649909973145 2023-01-24 01:10:36.756313: step: 42/459, loss: 0.49645891785621643 2023-01-24 01:10:37.330564: step: 44/459, loss: 0.27955254912376404 2023-01-24 01:10:37.897132: step: 46/459, loss: 0.4946601092815399 2023-01-24 01:10:38.534544: step: 48/459, loss: 0.3457317650318146 2023-01-24 01:10:39.131857: step: 50/459, loss: 0.2714211344718933 2023-01-24 01:10:39.733865: step: 52/459, loss: 0.1781957894563675 2023-01-24 01:10:40.368399: step: 54/459, loss: 0.54975426197052 2023-01-24 01:10:41.027299: step: 56/459, loss: 0.23886820673942566 2023-01-24 01:10:41.724697: step: 58/459, loss: 0.23852606117725372 2023-01-24 01:10:42.427297: step: 60/459, loss: 0.21350502967834473 2023-01-24 01:10:43.095889: step: 62/459, loss: 0.2629624903202057 2023-01-24 01:10:43.709399: step: 64/459, loss: 0.28432005643844604 2023-01-24 01:10:44.324481: step: 66/459, loss: 0.5237942337989807 2023-01-24 01:10:45.032701: step: 68/459, loss: 0.20123478770256042 2023-01-24 01:10:45.600226: step: 70/459, loss: 0.9341320991516113 2023-01-24 01:10:46.198905: step: 72/459, loss: 0.15431679785251617 2023-01-24 01:10:46.792241: step: 74/459, loss: 0.10359106957912445 2023-01-24 01:10:47.426723: step: 76/459, loss: 0.35824111104011536 2023-01-24 01:10:48.025988: step: 78/459, loss: 0.09284496307373047 2023-01-24 01:10:48.692428: step: 80/459, loss: 0.17868901789188385 2023-01-24 01:10:49.321820: step: 82/459, loss: 2.8945212364196777 2023-01-24 01:10:50.078650: step: 84/459, loss: 0.4719068109989166 2023-01-24 01:10:50.682219: step: 86/459, loss: 0.978053629398346 2023-01-24 01:10:51.281405: step: 88/459, loss: 0.18861444294452667 2023-01-24 01:10:51.905765: step: 90/459, loss: 0.18052227795124054 2023-01-24 01:10:52.578182: step: 92/459, loss: 0.5174484252929688 2023-01-24 01:10:53.185760: step: 94/459, loss: 0.7735249400138855 2023-01-24 01:10:53.859001: step: 96/459, loss: 0.14893695712089539 2023-01-24 01:10:54.521537: step: 98/459, loss: 0.3675694465637207 2023-01-24 01:10:55.107227: step: 100/459, loss: 0.2886526584625244 2023-01-24 01:10:55.731493: step: 102/459, loss: 0.1256459355354309 2023-01-24 01:10:56.355327: step: 104/459, loss: 0.36518317461013794 2023-01-24 01:10:56.908968: step: 106/459, loss: 0.30218589305877686 2023-01-24 01:10:57.491943: step: 108/459, loss: 0.3347502648830414 2023-01-24 01:10:58.148549: step: 110/459, loss: 0.14304421842098236 2023-01-24 01:10:58.806176: step: 112/459, loss: 0.40502190589904785 2023-01-24 01:10:59.568262: step: 114/459, loss: 0.28143665194511414 2023-01-24 01:11:00.336948: step: 116/459, loss: 0.1972467303276062 2023-01-24 01:11:00.930334: step: 118/459, loss: 0.19575285911560059 2023-01-24 01:11:01.563029: step: 120/459, loss: 0.2912025451660156 2023-01-24 01:11:02.181397: step: 122/459, loss: 0.31056949496269226 2023-01-24 01:11:02.775900: step: 124/459, loss: 0.11638353019952774 2023-01-24 01:11:03.335027: step: 126/459, loss: 0.059008534997701645 2023-01-24 01:11:03.934924: step: 128/459, loss: 0.066657155752182 2023-01-24 01:11:04.542987: step: 130/459, loss: 0.35967403650283813 2023-01-24 01:11:05.147617: step: 132/459, loss: 0.19889503717422485 2023-01-24 01:11:05.836859: step: 134/459, loss: 0.09900910407304764 2023-01-24 01:11:06.394726: step: 136/459, loss: 0.20109125971794128 2023-01-24 01:11:07.003052: step: 138/459, loss: 0.16888684034347534 2023-01-24 01:11:07.653266: step: 140/459, loss: 0.30343952775001526 2023-01-24 01:11:08.271652: step: 142/459, loss: 0.4071442782878876 2023-01-24 01:11:08.864373: step: 144/459, loss: 1.4900357723236084 2023-01-24 01:11:09.445733: step: 146/459, loss: 0.5587233901023865 2023-01-24 01:11:10.046031: step: 148/459, loss: 0.11713685095310211 2023-01-24 01:11:10.646913: step: 150/459, loss: 0.32646453380584717 2023-01-24 01:11:11.267307: step: 152/459, loss: 0.3213529884815216 2023-01-24 01:11:11.883599: step: 154/459, loss: 0.5872939825057983 2023-01-24 01:11:12.526625: step: 156/459, loss: 0.3137020766735077 2023-01-24 01:11:13.250965: step: 158/459, loss: 0.5349737405776978 2023-01-24 01:11:13.866647: step: 160/459, loss: 0.38768625259399414 2023-01-24 01:11:14.541255: step: 162/459, loss: 0.24582767486572266 2023-01-24 01:11:15.169816: step: 164/459, loss: 0.18626806139945984 2023-01-24 01:11:15.765336: step: 166/459, loss: 0.11130859702825546 2023-01-24 01:11:16.390720: step: 168/459, loss: 0.1743098497390747 2023-01-24 01:11:16.993480: step: 170/459, loss: 0.2564270496368408 2023-01-24 01:11:17.627674: step: 172/459, loss: 0.09862656891345978 2023-01-24 01:11:18.326869: step: 174/459, loss: 0.27759549021720886 2023-01-24 01:11:18.898835: step: 176/459, loss: 0.1346891075372696 2023-01-24 01:11:19.496230: step: 178/459, loss: 0.23747198283672333 2023-01-24 01:11:20.133992: step: 180/459, loss: 0.2569955289363861 2023-01-24 01:11:20.762513: step: 182/459, loss: 0.9302015900611877 2023-01-24 01:11:21.414396: step: 184/459, loss: 0.48854631185531616 2023-01-24 01:11:21.990891: step: 186/459, loss: 0.11650314182043076 2023-01-24 01:11:22.689079: step: 188/459, loss: 0.2907627820968628 2023-01-24 01:11:23.275015: step: 190/459, loss: 0.11212135851383209 2023-01-24 01:11:23.876857: step: 192/459, loss: 0.10411537438631058 2023-01-24 01:11:24.451338: step: 194/459, loss: 0.2114831954240799 2023-01-24 01:11:25.101343: step: 196/459, loss: 1.69362473487854 2023-01-24 01:11:25.700458: step: 198/459, loss: 0.6675819158554077 2023-01-24 01:11:26.261725: step: 200/459, loss: 0.11358792334794998 2023-01-24 01:11:26.872988: step: 202/459, loss: 0.5298581123352051 2023-01-24 01:11:27.545724: step: 204/459, loss: 0.5031935572624207 2023-01-24 01:11:28.176326: step: 206/459, loss: 0.11199058592319489 2023-01-24 01:11:28.822894: step: 208/459, loss: 0.27398908138275146 2023-01-24 01:11:29.406727: step: 210/459, loss: 0.13885267078876495 2023-01-24 01:11:30.011508: step: 212/459, loss: 0.283002644777298 2023-01-24 01:11:30.628057: step: 214/459, loss: 0.10022798925638199 2023-01-24 01:11:31.229757: step: 216/459, loss: 0.3557553291320801 2023-01-24 01:11:31.848890: step: 218/459, loss: 0.1049472838640213 2023-01-24 01:11:32.505479: step: 220/459, loss: 0.1396653801202774 2023-01-24 01:11:33.107028: step: 222/459, loss: 0.9870519638061523 2023-01-24 01:11:33.698839: step: 224/459, loss: 0.3066250681877136 2023-01-24 01:11:34.420052: step: 226/459, loss: 0.24178078770637512 2023-01-24 01:11:35.002865: step: 228/459, loss: 0.16157619655132294 2023-01-24 01:11:35.566226: step: 230/459, loss: 0.11827258765697479 2023-01-24 01:11:36.241004: step: 232/459, loss: 0.3841438293457031 2023-01-24 01:11:36.879605: step: 234/459, loss: 0.888013482093811 2023-01-24 01:11:37.494353: step: 236/459, loss: 0.6286869645118713 2023-01-24 01:11:38.139664: step: 238/459, loss: 0.13503889739513397 2023-01-24 01:11:38.762352: step: 240/459, loss: 0.0939893126487732 2023-01-24 01:11:39.415321: step: 242/459, loss: 0.49756717681884766 2023-01-24 01:11:40.038706: step: 244/459, loss: 0.26196637749671936 2023-01-24 01:11:40.653279: step: 246/459, loss: 0.11206649243831635 2023-01-24 01:11:41.247684: step: 248/459, loss: 0.17246760427951813 2023-01-24 01:11:41.897077: step: 250/459, loss: 0.7437476515769958 2023-01-24 01:11:42.532623: step: 252/459, loss: 1.6223798990249634 2023-01-24 01:11:43.181258: step: 254/459, loss: 0.41562116146087646 2023-01-24 01:11:43.790915: step: 256/459, loss: 0.6303737759590149 2023-01-24 01:11:44.400313: step: 258/459, loss: 0.4650791883468628 2023-01-24 01:11:45.056900: step: 260/459, loss: 0.5890401005744934 2023-01-24 01:11:45.676219: step: 262/459, loss: 0.1442532241344452 2023-01-24 01:11:46.299356: step: 264/459, loss: 0.32038170099258423 2023-01-24 01:11:46.896255: step: 266/459, loss: 0.5140528678894043 2023-01-24 01:11:47.476955: step: 268/459, loss: 0.18982337415218353 2023-01-24 01:11:48.075985: step: 270/459, loss: 0.8456494212150574 2023-01-24 01:11:48.674843: step: 272/459, loss: 0.41699567437171936 2023-01-24 01:11:49.294964: step: 274/459, loss: 0.18024536967277527 2023-01-24 01:11:49.901248: step: 276/459, loss: 0.2711579203605652 2023-01-24 01:11:50.492368: step: 278/459, loss: 0.47483354806900024 2023-01-24 01:11:51.120713: step: 280/459, loss: 0.3929423689842224 2023-01-24 01:11:51.764194: step: 282/459, loss: 0.1344626545906067 2023-01-24 01:11:52.363823: step: 284/459, loss: 0.28638964891433716 2023-01-24 01:11:53.038567: step: 286/459, loss: 0.16453057527542114 2023-01-24 01:11:53.680333: step: 288/459, loss: 0.37290745973587036 2023-01-24 01:11:54.291137: step: 290/459, loss: 0.17222189903259277 2023-01-24 01:11:54.885937: step: 292/459, loss: 0.38768553733825684 2023-01-24 01:11:55.503563: step: 294/459, loss: 0.15704137086868286 2023-01-24 01:11:56.099683: step: 296/459, loss: 0.34643790125846863 2023-01-24 01:11:56.734396: step: 298/459, loss: 0.07255683839321136 2023-01-24 01:11:57.361029: step: 300/459, loss: 0.39242446422576904 2023-01-24 01:11:57.900741: step: 302/459, loss: 0.4176729619503021 2023-01-24 01:11:58.543363: step: 304/459, loss: 0.1291000247001648 2023-01-24 01:11:59.251666: step: 306/459, loss: 0.22976818680763245 2023-01-24 01:11:59.885617: step: 308/459, loss: 0.660173773765564 2023-01-24 01:12:00.499814: step: 310/459, loss: 0.12408669292926788 2023-01-24 01:12:01.116796: step: 312/459, loss: 0.13988962769508362 2023-01-24 01:12:01.711080: step: 314/459, loss: 0.6708091497421265 2023-01-24 01:12:02.315993: step: 316/459, loss: 0.23340290784835815 2023-01-24 01:12:02.893054: step: 318/459, loss: 0.3222861588001251 2023-01-24 01:12:03.592818: step: 320/459, loss: 0.2565948963165283 2023-01-24 01:12:04.195278: step: 322/459, loss: 0.6489770412445068 2023-01-24 01:12:04.818315: step: 324/459, loss: 0.2939099967479706 2023-01-24 01:12:05.379340: step: 326/459, loss: 0.6061753630638123 2023-01-24 01:12:06.032010: step: 328/459, loss: 0.09665072709321976 2023-01-24 01:12:06.675605: step: 330/459, loss: 0.22589486837387085 2023-01-24 01:12:07.252336: step: 332/459, loss: 0.1773516684770584 2023-01-24 01:12:07.894946: step: 334/459, loss: 0.3303751051425934 2023-01-24 01:12:08.525407: step: 336/459, loss: 0.2547575831413269 2023-01-24 01:12:09.098632: step: 338/459, loss: 0.2084316611289978 2023-01-24 01:12:09.695274: step: 340/459, loss: 0.22197236120700836 2023-01-24 01:12:10.315383: step: 342/459, loss: 0.11850246042013168 2023-01-24 01:12:10.923309: step: 344/459, loss: 0.37457188963890076 2023-01-24 01:12:11.580077: step: 346/459, loss: 0.15637528896331787 2023-01-24 01:12:12.132016: step: 348/459, loss: 0.6980459690093994 2023-01-24 01:12:12.741108: step: 350/459, loss: 0.24383053183555603 2023-01-24 01:12:13.353262: step: 352/459, loss: 0.5823322534561157 2023-01-24 01:12:13.999828: step: 354/459, loss: 0.24581795930862427 2023-01-24 01:12:14.590056: step: 356/459, loss: 0.20488137006759644 2023-01-24 01:12:15.196243: step: 358/459, loss: 0.19950611889362335 2023-01-24 01:12:15.763896: step: 360/459, loss: 0.1773371398448944 2023-01-24 01:12:16.412632: step: 362/459, loss: 0.09728080779314041 2023-01-24 01:12:17.024901: step: 364/459, loss: 0.8447622060775757 2023-01-24 01:12:17.629731: step: 366/459, loss: 0.4592949151992798 2023-01-24 01:12:18.182829: step: 368/459, loss: 0.1041160598397255 2023-01-24 01:12:18.796980: step: 370/459, loss: 0.5447980761528015 2023-01-24 01:12:19.452351: step: 372/459, loss: 0.1789752095937729 2023-01-24 01:12:20.053460: step: 374/459, loss: 0.07549018412828445 2023-01-24 01:12:20.712148: step: 376/459, loss: 0.1308288723230362 2023-01-24 01:12:21.313321: step: 378/459, loss: 0.19899143278598785 2023-01-24 01:12:21.906173: step: 380/459, loss: 0.24401123821735382 2023-01-24 01:12:22.562112: step: 382/459, loss: 0.23531189560890198 2023-01-24 01:12:23.193461: step: 384/459, loss: 0.3505401909351349 2023-01-24 01:12:23.769686: step: 386/459, loss: 0.050799425691366196 2023-01-24 01:12:24.414564: step: 388/459, loss: 0.16214151680469513 2023-01-24 01:12:24.990383: step: 390/459, loss: 0.20114679634571075 2023-01-24 01:12:25.548753: step: 392/459, loss: 0.2751879096031189 2023-01-24 01:12:26.162237: step: 394/459, loss: 0.05393361672759056 2023-01-24 01:12:26.861693: step: 396/459, loss: 0.1696765124797821 2023-01-24 01:12:27.478708: step: 398/459, loss: 0.17949868738651276 2023-01-24 01:12:28.143831: step: 400/459, loss: 0.13147851824760437 2023-01-24 01:12:28.813637: step: 402/459, loss: 0.8085615634918213 2023-01-24 01:12:29.411900: step: 404/459, loss: 1.7995944023132324 2023-01-24 01:12:29.946268: step: 406/459, loss: 0.6271544694900513 2023-01-24 01:12:30.594170: step: 408/459, loss: 0.20088502764701843 2023-01-24 01:12:31.211050: step: 410/459, loss: 0.21307730674743652 2023-01-24 01:12:31.872451: step: 412/459, loss: 0.14412522315979004 2023-01-24 01:12:32.525392: step: 414/459, loss: 1.0692849159240723 2023-01-24 01:12:33.191099: step: 416/459, loss: 0.3061250150203705 2023-01-24 01:12:33.736534: step: 418/459, loss: 0.06578440219163895 2023-01-24 01:12:34.366348: step: 420/459, loss: 3.4485926628112793 2023-01-24 01:12:34.970661: step: 422/459, loss: 0.23061655461788177 2023-01-24 01:12:35.540726: step: 424/459, loss: 0.3054855465888977 2023-01-24 01:12:36.136049: step: 426/459, loss: 0.3007158637046814 2023-01-24 01:12:36.753686: step: 428/459, loss: 0.2605912983417511 2023-01-24 01:12:37.420579: step: 430/459, loss: 0.6973210573196411 2023-01-24 01:12:38.013282: step: 432/459, loss: 0.23688150942325592 2023-01-24 01:12:38.655483: step: 434/459, loss: 0.08796308189630508 2023-01-24 01:12:39.287066: step: 436/459, loss: 0.2324291169643402 2023-01-24 01:12:39.864186: step: 438/459, loss: 0.29801666736602783 2023-01-24 01:12:40.467793: step: 440/459, loss: 0.4961738586425781 2023-01-24 01:12:41.153829: step: 442/459, loss: 0.3682957887649536 2023-01-24 01:12:41.754962: step: 444/459, loss: 0.2829291522502899 2023-01-24 01:12:42.340384: step: 446/459, loss: 0.11417924612760544 2023-01-24 01:12:42.920824: step: 448/459, loss: 0.7235373854637146 2023-01-24 01:12:43.570831: step: 450/459, loss: 0.08919870108366013 2023-01-24 01:12:44.273312: step: 452/459, loss: 0.7712594270706177 2023-01-24 01:12:44.878192: step: 454/459, loss: 0.161113440990448 2023-01-24 01:12:45.477137: step: 456/459, loss: 0.6215794682502747 2023-01-24 01:12:46.075309: step: 458/459, loss: 0.12004981935024261 2023-01-24 01:12:46.638888: step: 460/459, loss: 0.10601291060447693 2023-01-24 01:12:47.305189: step: 462/459, loss: 0.17480379343032837 2023-01-24 01:12:47.888892: step: 464/459, loss: 0.17480726540088654 2023-01-24 01:12:48.431009: step: 466/459, loss: 0.13804984092712402 2023-01-24 01:12:49.052334: step: 468/459, loss: 0.445826917886734 2023-01-24 01:12:49.712020: step: 470/459, loss: 0.4941258430480957 2023-01-24 01:12:50.313798: step: 472/459, loss: 0.5678410530090332 2023-01-24 01:12:50.939042: step: 474/459, loss: 0.2685389816761017 2023-01-24 01:12:51.549824: step: 476/459, loss: 0.2744081914424896 2023-01-24 01:12:52.186952: step: 478/459, loss: 1.978703260421753 2023-01-24 01:12:52.757081: step: 480/459, loss: 0.25470638275146484 2023-01-24 01:12:53.373093: step: 482/459, loss: 0.1382962465286255 2023-01-24 01:12:54.002425: step: 484/459, loss: 0.15825903415679932 2023-01-24 01:12:54.620033: step: 486/459, loss: 0.1332773119211197 2023-01-24 01:12:55.230570: step: 488/459, loss: 0.21771876513957977 2023-01-24 01:12:55.795781: step: 490/459, loss: 0.5583037734031677 2023-01-24 01:12:56.448073: step: 492/459, loss: 0.37889793515205383 2023-01-24 01:12:57.084954: step: 494/459, loss: 0.0979250818490982 2023-01-24 01:12:57.715133: step: 496/459, loss: 0.550456702709198 2023-01-24 01:12:58.426274: step: 498/459, loss: 0.11819633096456528 2023-01-24 01:12:59.068580: step: 500/459, loss: 0.23532068729400635 2023-01-24 01:12:59.701549: step: 502/459, loss: 0.431511253118515 2023-01-24 01:13:00.411471: step: 504/459, loss: 0.21540610492229462 2023-01-24 01:13:01.002066: step: 506/459, loss: 0.2734929025173187 2023-01-24 01:13:01.606576: step: 508/459, loss: 0.5956867933273315 2023-01-24 01:13:02.216712: step: 510/459, loss: 0.18806922435760498 2023-01-24 01:13:02.841024: step: 512/459, loss: 0.5461971163749695 2023-01-24 01:13:03.424206: step: 514/459, loss: 0.09699979424476624 2023-01-24 01:13:04.035545: step: 516/459, loss: 0.3094552755355835 2023-01-24 01:13:04.689583: step: 518/459, loss: 0.2447507530450821 2023-01-24 01:13:05.320801: step: 520/459, loss: 0.15734541416168213 2023-01-24 01:13:05.876734: step: 522/459, loss: 0.16060256958007812 2023-01-24 01:13:06.526965: step: 524/459, loss: 0.3676411211490631 2023-01-24 01:13:07.143252: step: 526/459, loss: 0.3671046495437622 2023-01-24 01:13:07.742757: step: 528/459, loss: 0.2835782468318939 2023-01-24 01:13:08.399076: step: 530/459, loss: 0.20277836918830872 2023-01-24 01:13:09.007474: step: 532/459, loss: 0.7523905038833618 2023-01-24 01:13:09.631781: step: 534/459, loss: 0.21089082956314087 2023-01-24 01:13:10.267808: step: 536/459, loss: 0.5197495222091675 2023-01-24 01:13:10.901019: step: 538/459, loss: 0.2809663414955139 2023-01-24 01:13:11.464759: step: 540/459, loss: 1.6155263185501099 2023-01-24 01:13:12.137709: step: 542/459, loss: 0.2632942199707031 2023-01-24 01:13:12.699213: step: 544/459, loss: 0.3939163088798523 2023-01-24 01:13:13.271068: step: 546/459, loss: 0.29368335008621216 2023-01-24 01:13:13.897639: step: 548/459, loss: 0.31838667392730713 2023-01-24 01:13:14.533015: step: 550/459, loss: 0.12661093473434448 2023-01-24 01:13:15.083085: step: 552/459, loss: 0.9546986222267151 2023-01-24 01:13:15.679429: step: 554/459, loss: 0.18624338507652283 2023-01-24 01:13:16.282730: step: 556/459, loss: 0.3705906867980957 2023-01-24 01:13:16.857870: step: 558/459, loss: 0.22286544740200043 2023-01-24 01:13:17.535539: step: 560/459, loss: 0.30559054017066956 2023-01-24 01:13:18.178802: step: 562/459, loss: 0.14368917047977448 2023-01-24 01:13:18.810010: step: 564/459, loss: 0.2327021211385727 2023-01-24 01:13:19.447445: step: 566/459, loss: 0.2811327576637268 2023-01-24 01:13:20.057692: step: 568/459, loss: 0.16152958571910858 2023-01-24 01:13:20.644233: step: 570/459, loss: 0.4746742248535156 2023-01-24 01:13:21.299861: step: 572/459, loss: 0.23963609337806702 2023-01-24 01:13:21.883953: step: 574/459, loss: 1.230757713317871 2023-01-24 01:13:22.499919: step: 576/459, loss: 0.3521808683872223 2023-01-24 01:13:23.040389: step: 578/459, loss: 0.1988457590341568 2023-01-24 01:13:23.677877: step: 580/459, loss: 0.16571728885173798 2023-01-24 01:13:24.274077: step: 582/459, loss: 0.16787496209144592 2023-01-24 01:13:24.863218: step: 584/459, loss: 0.36734652519226074 2023-01-24 01:13:25.545338: step: 586/459, loss: 0.21201568841934204 2023-01-24 01:13:26.125079: step: 588/459, loss: 0.2768889367580414 2023-01-24 01:13:26.745492: step: 590/459, loss: 0.5822448134422302 2023-01-24 01:13:27.375112: step: 592/459, loss: 0.9873772263526917 2023-01-24 01:13:28.001328: step: 594/459, loss: 0.27435725927352905 2023-01-24 01:13:28.615277: step: 596/459, loss: 0.6214287281036377 2023-01-24 01:13:29.305354: step: 598/459, loss: 0.37285521626472473 2023-01-24 01:13:29.954824: step: 600/459, loss: 0.09701328724622726 2023-01-24 01:13:30.589811: step: 602/459, loss: 0.2029900699853897 2023-01-24 01:13:31.184990: step: 604/459, loss: 0.12333521246910095 2023-01-24 01:13:31.823290: step: 606/459, loss: 0.2363642007112503 2023-01-24 01:13:32.427826: step: 608/459, loss: 0.1506139039993286 2023-01-24 01:13:33.061754: step: 610/459, loss: 0.06975002586841583 2023-01-24 01:13:33.657579: step: 612/459, loss: 0.08340327441692352 2023-01-24 01:13:34.305436: step: 614/459, loss: 0.3762032985687256 2023-01-24 01:13:34.915551: step: 616/459, loss: 0.09829050302505493 2023-01-24 01:13:35.537315: step: 618/459, loss: 0.1884852796792984 2023-01-24 01:13:36.196795: step: 620/459, loss: 0.2896145284175873 2023-01-24 01:13:36.820158: step: 622/459, loss: 0.21119140088558197 2023-01-24 01:13:37.447967: step: 624/459, loss: 0.20684760808944702 2023-01-24 01:13:38.143703: step: 626/459, loss: 0.17026236653327942 2023-01-24 01:13:38.757553: step: 628/459, loss: 0.2411709427833557 2023-01-24 01:13:39.457222: step: 630/459, loss: 0.5456506013870239 2023-01-24 01:13:40.121056: step: 632/459, loss: 0.5099262595176697 2023-01-24 01:13:40.728639: step: 634/459, loss: 0.0799926221370697 2023-01-24 01:13:41.307848: step: 636/459, loss: 0.49157142639160156 2023-01-24 01:13:41.876119: step: 638/459, loss: 5.032535076141357 2023-01-24 01:13:42.495266: step: 640/459, loss: 0.5112559795379639 2023-01-24 01:13:43.204687: step: 642/459, loss: 0.4345918893814087 2023-01-24 01:13:43.862517: step: 644/459, loss: 0.5179170966148376 2023-01-24 01:13:44.513668: step: 646/459, loss: 0.4147970378398895 2023-01-24 01:13:45.103340: step: 648/459, loss: 0.1530103236436844 2023-01-24 01:13:45.728840: step: 650/459, loss: 0.3763608932495117 2023-01-24 01:13:46.323533: step: 652/459, loss: 0.1806812435388565 2023-01-24 01:13:46.979178: step: 654/459, loss: 0.3301670253276825 2023-01-24 01:13:47.609986: step: 656/459, loss: 0.2495911568403244 2023-01-24 01:13:48.259570: step: 658/459, loss: 0.44872966408729553 2023-01-24 01:13:48.845069: step: 660/459, loss: 0.20473718643188477 2023-01-24 01:13:49.455808: step: 662/459, loss: 0.5087020993232727 2023-01-24 01:13:50.050900: step: 664/459, loss: 1.0415745973587036 2023-01-24 01:13:50.661923: step: 666/459, loss: 0.08834202587604523 2023-01-24 01:13:51.257164: step: 668/459, loss: 0.28063684701919556 2023-01-24 01:13:51.888696: step: 670/459, loss: 0.36391007900238037 2023-01-24 01:13:52.432358: step: 672/459, loss: 0.5071342587471008 2023-01-24 01:13:53.026485: step: 674/459, loss: 0.07442236691713333 2023-01-24 01:13:53.656638: step: 676/459, loss: 0.33334895968437195 2023-01-24 01:13:54.286004: step: 678/459, loss: 0.19881679117679596 2023-01-24 01:13:54.897521: step: 680/459, loss: 0.0991135984659195 2023-01-24 01:13:55.489993: step: 682/459, loss: 0.31024372577667236 2023-01-24 01:13:56.072680: step: 684/459, loss: 0.17530390620231628 2023-01-24 01:13:56.669014: step: 686/459, loss: 0.2980133593082428 2023-01-24 01:13:57.309781: step: 688/459, loss: 0.2509806752204895 2023-01-24 01:13:57.889517: step: 690/459, loss: 0.2854912281036377 2023-01-24 01:13:58.463360: step: 692/459, loss: 0.16166701912879944 2023-01-24 01:13:59.085051: step: 694/459, loss: 0.08434195816516876 2023-01-24 01:13:59.694764: step: 696/459, loss: 0.373509019613266 2023-01-24 01:14:00.315416: step: 698/459, loss: 0.20471450686454773 2023-01-24 01:14:00.899054: step: 700/459, loss: 0.47493982315063477 2023-01-24 01:14:01.531697: step: 702/459, loss: 0.7725191712379456 2023-01-24 01:14:02.128391: step: 704/459, loss: 0.5323949456214905 2023-01-24 01:14:02.800160: step: 706/459, loss: 0.8564408421516418 2023-01-24 01:14:03.403532: step: 708/459, loss: 0.1938789337873459 2023-01-24 01:14:04.002106: step: 710/459, loss: 0.19795313477516174 2023-01-24 01:14:04.605173: step: 712/459, loss: 0.2078016996383667 2023-01-24 01:14:05.210224: step: 714/459, loss: 0.19140276312828064 2023-01-24 01:14:05.833196: step: 716/459, loss: 0.18875835835933685 2023-01-24 01:14:06.508874: step: 718/459, loss: 0.514220118522644 2023-01-24 01:14:07.114120: step: 720/459, loss: 0.2699809968471527 2023-01-24 01:14:07.747046: step: 722/459, loss: 0.09441415220499039 2023-01-24 01:14:08.322201: step: 724/459, loss: 0.1529221534729004 2023-01-24 01:14:08.967634: step: 726/459, loss: 0.15906451642513275 2023-01-24 01:14:09.658415: step: 728/459, loss: 0.16116371750831604 2023-01-24 01:14:10.300772: step: 730/459, loss: 0.23889906704425812 2023-01-24 01:14:10.900710: step: 732/459, loss: 0.5336011648178101 2023-01-24 01:14:11.539038: step: 734/459, loss: 0.21928177773952484 2023-01-24 01:14:12.167793: step: 736/459, loss: 0.22715717554092407 2023-01-24 01:14:12.738844: step: 738/459, loss: 0.11252932250499725 2023-01-24 01:14:13.354076: step: 740/459, loss: 0.17274275422096252 2023-01-24 01:14:14.058314: step: 742/459, loss: 0.8303322792053223 2023-01-24 01:14:14.635682: step: 744/459, loss: 0.1861632913351059 2023-01-24 01:14:15.205300: step: 746/459, loss: 0.22923924028873444 2023-01-24 01:14:15.788928: step: 748/459, loss: 0.0775824785232544 2023-01-24 01:14:16.406180: step: 750/459, loss: 0.2742109000682831 2023-01-24 01:14:17.032501: step: 752/459, loss: 0.5355865955352783 2023-01-24 01:14:17.609706: step: 754/459, loss: 0.14947737753391266 2023-01-24 01:14:18.269668: step: 756/459, loss: 0.38858500123023987 2023-01-24 01:14:18.866283: step: 758/459, loss: 0.1192006841301918 2023-01-24 01:14:19.498306: step: 760/459, loss: 0.09959384799003601 2023-01-24 01:14:20.110286: step: 762/459, loss: 0.299812912940979 2023-01-24 01:14:20.683906: step: 764/459, loss: 0.8624223470687866 2023-01-24 01:14:21.337440: step: 766/459, loss: 0.19127006828784943 2023-01-24 01:14:21.991589: step: 768/459, loss: 0.14717143774032593 2023-01-24 01:14:22.639109: step: 770/459, loss: 0.13442949950695038 2023-01-24 01:14:23.258934: step: 772/459, loss: 1.5984588861465454 2023-01-24 01:14:24.042475: step: 774/459, loss: 0.14241696894168854 2023-01-24 01:14:24.740949: step: 776/459, loss: 0.22225573658943176 2023-01-24 01:14:25.366130: step: 778/459, loss: 1.2122604846954346 2023-01-24 01:14:25.982145: step: 780/459, loss: 0.2338508665561676 2023-01-24 01:14:26.558746: step: 782/459, loss: 0.17809613049030304 2023-01-24 01:14:27.247262: step: 784/459, loss: 0.6111647486686707 2023-01-24 01:14:27.893020: step: 786/459, loss: 0.13340482115745544 2023-01-24 01:14:28.544661: step: 788/459, loss: 0.1648138016462326 2023-01-24 01:14:29.122420: step: 790/459, loss: 0.8856968283653259 2023-01-24 01:14:29.797493: step: 792/459, loss: 0.678548276424408 2023-01-24 01:14:30.423990: step: 794/459, loss: 0.23538841307163239 2023-01-24 01:14:31.013364: step: 796/459, loss: 0.21532072126865387 2023-01-24 01:14:31.674404: step: 798/459, loss: 0.2009187936782837 2023-01-24 01:14:32.227604: step: 800/459, loss: 0.0994652509689331 2023-01-24 01:14:32.895672: step: 802/459, loss: 0.15272988379001617 2023-01-24 01:14:33.506983: step: 804/459, loss: 0.9590467810630798 2023-01-24 01:14:34.093467: step: 806/459, loss: 0.24337251484394073 2023-01-24 01:14:34.750613: step: 808/459, loss: 0.30613407492637634 2023-01-24 01:14:35.284416: step: 810/459, loss: 0.3129695951938629 2023-01-24 01:14:35.907855: step: 812/459, loss: 0.22972682118415833 2023-01-24 01:14:36.442104: step: 814/459, loss: 0.13207051157951355 2023-01-24 01:14:37.000582: step: 816/459, loss: 0.7543724775314331 2023-01-24 01:14:37.637435: step: 818/459, loss: 0.7043681740760803 2023-01-24 01:14:38.222176: step: 820/459, loss: 0.18768781423568726 2023-01-24 01:14:38.940381: step: 822/459, loss: 0.21548977494239807 2023-01-24 01:14:39.572704: step: 824/459, loss: 0.5227473378181458 2023-01-24 01:14:40.203816: step: 826/459, loss: 0.12555350363254547 2023-01-24 01:14:40.815057: step: 828/459, loss: 0.3442516326904297 2023-01-24 01:14:41.411646: step: 830/459, loss: 0.1172807589173317 2023-01-24 01:14:42.051859: step: 832/459, loss: 0.16421817243099213 2023-01-24 01:14:42.658909: step: 834/459, loss: 0.7288725972175598 2023-01-24 01:14:43.207220: step: 836/459, loss: 0.5713868141174316 2023-01-24 01:14:43.841171: step: 838/459, loss: 0.1360836923122406 2023-01-24 01:14:44.544337: step: 840/459, loss: 0.22031325101852417 2023-01-24 01:14:45.202890: step: 842/459, loss: 0.15402646362781525 2023-01-24 01:14:45.818875: step: 844/459, loss: 0.30359217524528503 2023-01-24 01:14:46.476801: step: 846/459, loss: 0.06476429849863052 2023-01-24 01:14:47.147440: step: 848/459, loss: 0.21843397617340088 2023-01-24 01:14:47.751821: step: 850/459, loss: 0.4466173052787781 2023-01-24 01:14:48.397778: step: 852/459, loss: 0.2593488097190857 2023-01-24 01:14:48.953112: step: 854/459, loss: 0.5194042921066284 2023-01-24 01:14:49.580491: step: 856/459, loss: 0.1465357542037964 2023-01-24 01:14:50.230553: step: 858/459, loss: 0.40247225761413574 2023-01-24 01:14:50.895535: step: 860/459, loss: 0.37632638216018677 2023-01-24 01:14:51.522637: step: 862/459, loss: 0.9516148567199707 2023-01-24 01:14:52.241705: step: 864/459, loss: 4.385779857635498 2023-01-24 01:14:52.841114: step: 866/459, loss: 0.1819845736026764 2023-01-24 01:14:53.529057: step: 868/459, loss: 0.30338671803474426 2023-01-24 01:14:54.112307: step: 870/459, loss: 0.1439668983221054 2023-01-24 01:14:54.705714: step: 872/459, loss: 0.06391488760709763 2023-01-24 01:14:55.429393: step: 874/459, loss: 0.19264577329158783 2023-01-24 01:14:56.101654: step: 876/459, loss: 0.3170005977153778 2023-01-24 01:14:56.675960: step: 878/459, loss: 0.26631447672843933 2023-01-24 01:14:57.287592: step: 880/459, loss: 1.4684054851531982 2023-01-24 01:14:57.890606: step: 882/459, loss: 0.242726132273674 2023-01-24 01:14:58.535438: step: 884/459, loss: 0.3144577741622925 2023-01-24 01:14:59.199214: step: 886/459, loss: 0.3297308683395386 2023-01-24 01:14:59.817072: step: 888/459, loss: 0.2616505026817322 2023-01-24 01:15:00.415947: step: 890/459, loss: 0.36376526951789856 2023-01-24 01:15:01.016245: step: 892/459, loss: 0.26123368740081787 2023-01-24 01:15:01.697614: step: 894/459, loss: 0.21233431994915009 2023-01-24 01:15:02.287257: step: 896/459, loss: 0.28718632459640503 2023-01-24 01:15:02.855415: step: 898/459, loss: 0.4199586808681488 2023-01-24 01:15:03.475886: step: 900/459, loss: 0.15769611299037933 2023-01-24 01:15:04.126067: step: 902/459, loss: 0.23641036450862885 2023-01-24 01:15:04.736545: step: 904/459, loss: 0.10239910334348679 2023-01-24 01:15:05.398473: step: 906/459, loss: 0.6381731629371643 2023-01-24 01:15:05.982411: step: 908/459, loss: 0.7318964004516602 2023-01-24 01:15:06.626140: step: 910/459, loss: 0.5030637979507446 2023-01-24 01:15:07.248862: step: 912/459, loss: 0.1791110634803772 2023-01-24 01:15:07.901814: step: 914/459, loss: 0.07231181114912033 2023-01-24 01:15:08.612624: step: 916/459, loss: 0.3641130030155182 2023-01-24 01:15:09.257455: step: 918/459, loss: 0.9656057953834534 2023-01-24 01:15:09.694294: step: 920/459, loss: 0.008281302638351917 ================================================== Loss: 0.373 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3154588080944213, 'r': 0.3172545887856609, 'f1': 0.31635415002846407}, 'combined': 0.23310305791571034, 'epoch': 9} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35110197040764407, 'r': 0.295564022361344, 'f1': 0.320948099306494}, 'combined': 0.20540678355615613, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3112420857856251, 'r': 0.3153762311376163, 'f1': 0.31329552084735873}, 'combined': 0.23084933115068537, 'epoch': 9} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3572881800647091, 'r': 0.2958995745808636, 'f1': 0.3237091318139731}, 'combined': 0.20717384436094274, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33911910909943366, 'r': 0.32045790575240596, 'f1': 0.3295245196712545}, 'combined': 0.24280754081039804, 'epoch': 9} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3571560097607819, 'r': 0.30934004124089576, 'f1': 0.33153281140312696}, 'combined': 0.23770277043997784, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.27662037037037035, 'r': 0.2845238095238095, 'f1': 0.2805164319248826}, 'combined': 0.1870109546165884, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3103448275862069, 'r': 0.391304347826087, 'f1': 0.34615384615384615}, 'combined': 0.17307692307692307, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35714285714285715, 'r': 0.1724137931034483, 'f1': 0.23255813953488377}, 'combined': 0.1550387596899225, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3420992658657132, 'r': 0.32067749020429287, 'f1': 0.331042188712365}, 'combined': 0.24392582326174264, 'epoch': 5} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3383810640925978, 'r': 0.2494791299809971, 'f1': 0.28720778961705584}, 'combined': 0.18381298535491572, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2660984848484848, 'r': 0.3345238095238095, 'f1': 0.29641350210970463}, 'combined': 0.19760900140646975, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 10 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:17:43.991772: step: 2/459, loss: 0.17125815153121948 2023-01-24 01:17:44.633808: step: 4/459, loss: 0.332763135433197 2023-01-24 01:17:45.288036: step: 6/459, loss: 0.17400400340557098 2023-01-24 01:17:45.904676: step: 8/459, loss: 0.13294239342212677 2023-01-24 01:17:46.589139: step: 10/459, loss: 0.1668187379837036 2023-01-24 01:17:47.189209: step: 12/459, loss: 0.092381052672863 2023-01-24 01:17:47.878252: step: 14/459, loss: 0.1856277585029602 2023-01-24 01:17:48.437143: step: 16/459, loss: 0.15645253658294678 2023-01-24 01:17:49.065849: step: 18/459, loss: 0.19668680429458618 2023-01-24 01:17:49.729968: step: 20/459, loss: 0.14160434901714325 2023-01-24 01:17:50.308615: step: 22/459, loss: 0.28747305274009705 2023-01-24 01:17:50.851877: step: 24/459, loss: 0.22212064266204834 2023-01-24 01:17:51.529593: step: 26/459, loss: 0.19136473536491394 2023-01-24 01:17:52.143748: step: 28/459, loss: 0.2736329436302185 2023-01-24 01:17:52.755676: step: 30/459, loss: 0.2570238411426544 2023-01-24 01:17:53.328731: step: 32/459, loss: 0.18641331791877747 2023-01-24 01:17:53.972861: step: 34/459, loss: 0.2724190652370453 2023-01-24 01:17:54.646658: step: 36/459, loss: 0.04449905827641487 2023-01-24 01:17:55.281627: step: 38/459, loss: 0.4913790225982666 2023-01-24 01:17:55.965442: step: 40/459, loss: 0.15946614742279053 2023-01-24 01:17:56.594781: step: 42/459, loss: 0.4147394895553589 2023-01-24 01:17:57.214787: step: 44/459, loss: 0.4323972463607788 2023-01-24 01:17:57.764073: step: 46/459, loss: 0.11237046867609024 2023-01-24 01:17:58.348665: step: 48/459, loss: 0.14782367646694183 2023-01-24 01:17:59.112955: step: 50/459, loss: 0.17104659974575043 2023-01-24 01:17:59.687740: step: 52/459, loss: 0.19364269077777863 2023-01-24 01:18:00.266004: step: 54/459, loss: 0.19894830882549286 2023-01-24 01:18:00.927492: step: 56/459, loss: 0.19667749106884003 2023-01-24 01:18:01.510184: step: 58/459, loss: 0.13823001086711884 2023-01-24 01:18:02.227837: step: 60/459, loss: 0.24700328707695007 2023-01-24 01:18:02.800663: step: 62/459, loss: 0.10820061713457108 2023-01-24 01:18:03.431647: step: 64/459, loss: 0.05897263437509537 2023-01-24 01:18:04.072992: step: 66/459, loss: 0.19567114114761353 2023-01-24 01:18:04.623569: step: 68/459, loss: 0.1191864162683487 2023-01-24 01:18:05.256290: step: 70/459, loss: 0.10398861020803452 2023-01-24 01:18:05.896161: step: 72/459, loss: 0.49994975328445435 2023-01-24 01:18:06.463159: step: 74/459, loss: 0.174809530377388 2023-01-24 01:18:07.127974: step: 76/459, loss: 0.1288970708847046 2023-01-24 01:18:07.768747: step: 78/459, loss: 0.14195583760738373 2023-01-24 01:18:08.378122: step: 80/459, loss: 0.3480432629585266 2023-01-24 01:18:09.032525: step: 82/459, loss: 0.29216238856315613 2023-01-24 01:18:09.651533: step: 84/459, loss: 0.15033377707004547 2023-01-24 01:18:10.239804: step: 86/459, loss: 0.34883880615234375 2023-01-24 01:18:10.946879: step: 88/459, loss: 0.0676162987947464 2023-01-24 01:18:11.525834: step: 90/459, loss: 0.29121553897857666 2023-01-24 01:18:12.214456: step: 92/459, loss: 0.10523370653390884 2023-01-24 01:18:12.831712: step: 94/459, loss: 0.4113720953464508 2023-01-24 01:18:13.527637: step: 96/459, loss: 0.6761390566825867 2023-01-24 01:18:14.109269: step: 98/459, loss: 0.08350346982479095 2023-01-24 01:18:14.700773: step: 100/459, loss: 0.24556784331798553 2023-01-24 01:18:15.356507: step: 102/459, loss: 0.1595415472984314 2023-01-24 01:18:15.958893: step: 104/459, loss: 0.4088301956653595 2023-01-24 01:18:16.613701: step: 106/459, loss: 0.17864517867565155 2023-01-24 01:18:17.213688: step: 108/459, loss: 0.1621953696012497 2023-01-24 01:18:17.864569: step: 110/459, loss: 0.06150581315159798 2023-01-24 01:18:18.560235: step: 112/459, loss: 0.24312551319599152 2023-01-24 01:18:19.196939: step: 114/459, loss: 0.3227425217628479 2023-01-24 01:18:19.857628: step: 116/459, loss: 0.06784147769212723 2023-01-24 01:18:20.475359: step: 118/459, loss: 0.15622377395629883 2023-01-24 01:18:21.084721: step: 120/459, loss: 0.1487804800271988 2023-01-24 01:18:21.741438: step: 122/459, loss: 0.12429174780845642 2023-01-24 01:18:22.364470: step: 124/459, loss: 0.19887198507785797 2023-01-24 01:18:23.018464: step: 126/459, loss: 0.6067106127738953 2023-01-24 01:18:23.592065: step: 128/459, loss: 0.11291731148958206 2023-01-24 01:18:24.153826: step: 130/459, loss: 0.8327620029449463 2023-01-24 01:18:24.779923: step: 132/459, loss: 0.21742871403694153 2023-01-24 01:18:25.375043: step: 134/459, loss: 0.24951544404029846 2023-01-24 01:18:25.994562: step: 136/459, loss: 0.07760219275951385 2023-01-24 01:18:26.595109: step: 138/459, loss: 0.26506978273391724 2023-01-24 01:18:27.313976: step: 140/459, loss: 0.352715402841568 2023-01-24 01:18:27.947095: step: 142/459, loss: 0.15117117762565613 2023-01-24 01:18:28.524790: step: 144/459, loss: 0.5747646689414978 2023-01-24 01:18:29.155789: step: 146/459, loss: 0.14647534489631653 2023-01-24 01:18:29.741497: step: 148/459, loss: 0.27084097266197205 2023-01-24 01:18:30.363295: step: 150/459, loss: 0.5881747007369995 2023-01-24 01:18:30.987545: step: 152/459, loss: 0.15794183313846588 2023-01-24 01:18:31.636732: step: 154/459, loss: 0.0474688746035099 2023-01-24 01:18:32.221649: step: 156/459, loss: 0.7681472301483154 2023-01-24 01:18:32.803577: step: 158/459, loss: 0.1385224610567093 2023-01-24 01:18:33.435331: step: 160/459, loss: 0.2469131350517273 2023-01-24 01:18:34.013472: step: 162/459, loss: 0.1798241138458252 2023-01-24 01:18:34.632044: step: 164/459, loss: 0.20695720613002777 2023-01-24 01:18:35.300929: step: 166/459, loss: 0.6449987292289734 2023-01-24 01:18:35.944624: step: 168/459, loss: 0.09640246629714966 2023-01-24 01:18:36.574512: step: 170/459, loss: 0.13458801805973053 2023-01-24 01:18:37.152483: step: 172/459, loss: 0.1952276974916458 2023-01-24 01:18:37.821490: step: 174/459, loss: 0.1352403163909912 2023-01-24 01:18:38.419151: step: 176/459, loss: 0.9936392307281494 2023-01-24 01:18:39.015351: step: 178/459, loss: 0.1764390915632248 2023-01-24 01:18:39.650562: step: 180/459, loss: 0.25290459394454956 2023-01-24 01:18:40.179377: step: 182/459, loss: 0.1684102565050125 2023-01-24 01:18:40.777173: step: 184/459, loss: 0.8918006420135498 2023-01-24 01:18:41.438018: step: 186/459, loss: 0.2138083577156067 2023-01-24 01:18:42.034104: step: 188/459, loss: 0.23180700838565826 2023-01-24 01:18:42.704353: step: 190/459, loss: 0.5244027376174927 2023-01-24 01:18:43.297154: step: 192/459, loss: 0.4511924386024475 2023-01-24 01:18:43.877094: step: 194/459, loss: 0.49680593609809875 2023-01-24 01:18:44.524889: step: 196/459, loss: 0.7255764007568359 2023-01-24 01:18:45.137964: step: 198/459, loss: 0.03203282132744789 2023-01-24 01:18:45.741430: step: 200/459, loss: 0.19952398538589478 2023-01-24 01:18:46.404270: step: 202/459, loss: 0.5511372089385986 2023-01-24 01:18:47.045830: step: 204/459, loss: 0.1557440310716629 2023-01-24 01:18:47.665062: step: 206/459, loss: 0.1594080775976181 2023-01-24 01:18:48.232581: step: 208/459, loss: 0.18186624348163605 2023-01-24 01:18:48.823057: step: 210/459, loss: 0.12469295412302017 2023-01-24 01:18:49.497093: step: 212/459, loss: 0.25011685490608215 2023-01-24 01:18:50.167514: step: 214/459, loss: 0.3203595280647278 2023-01-24 01:18:50.774571: step: 216/459, loss: 0.19444935023784637 2023-01-24 01:18:51.484919: step: 218/459, loss: 0.49845391511917114 2023-01-24 01:18:52.090981: step: 220/459, loss: 0.04411442205309868 2023-01-24 01:18:52.778789: step: 222/459, loss: 0.34847721457481384 2023-01-24 01:18:53.359570: step: 224/459, loss: 0.1811908781528473 2023-01-24 01:18:53.917529: step: 226/459, loss: 0.19885428249835968 2023-01-24 01:18:54.571719: step: 228/459, loss: 0.30258116126060486 2023-01-24 01:18:55.181570: step: 230/459, loss: 0.09878349304199219 2023-01-24 01:18:55.814699: step: 232/459, loss: 0.3286028206348419 2023-01-24 01:18:56.472058: step: 234/459, loss: 0.18203683197498322 2023-01-24 01:18:57.127461: step: 236/459, loss: 0.17053760588169098 2023-01-24 01:18:57.801020: step: 238/459, loss: 0.2649615406990051 2023-01-24 01:18:58.439141: step: 240/459, loss: 0.29692718386650085 2023-01-24 01:18:59.067106: step: 242/459, loss: 0.3332153260707855 2023-01-24 01:18:59.656716: step: 244/459, loss: 0.11241468787193298 2023-01-24 01:19:00.233248: step: 246/459, loss: 0.4105433225631714 2023-01-24 01:19:00.856670: step: 248/459, loss: 0.12580926716327667 2023-01-24 01:19:01.468277: step: 250/459, loss: 0.3232801556587219 2023-01-24 01:19:02.078214: step: 252/459, loss: 0.16258788108825684 2023-01-24 01:19:02.768882: step: 254/459, loss: 0.5609155893325806 2023-01-24 01:19:03.396645: step: 256/459, loss: 0.1415969282388687 2023-01-24 01:19:04.005753: step: 258/459, loss: 0.11811402440071106 2023-01-24 01:19:04.577016: step: 260/459, loss: 0.15496674180030823 2023-01-24 01:19:05.179709: step: 262/459, loss: 0.16588203608989716 2023-01-24 01:19:05.795510: step: 264/459, loss: 0.1064646989107132 2023-01-24 01:19:06.482890: step: 266/459, loss: 0.1508849561214447 2023-01-24 01:19:07.094202: step: 268/459, loss: 0.3357686400413513 2023-01-24 01:19:07.678765: step: 270/459, loss: 0.08120134472846985 2023-01-24 01:19:08.263413: step: 272/459, loss: 0.09986304491758347 2023-01-24 01:19:08.866986: step: 274/459, loss: 0.07480871677398682 2023-01-24 01:19:09.555319: step: 276/459, loss: 0.4401036500930786 2023-01-24 01:19:10.187254: step: 278/459, loss: 0.09514737129211426 2023-01-24 01:19:10.778203: step: 280/459, loss: 0.1708211898803711 2023-01-24 01:19:11.374162: step: 282/459, loss: 0.09417209774255753 2023-01-24 01:19:11.973096: step: 284/459, loss: 0.2729453146457672 2023-01-24 01:19:12.566942: step: 286/459, loss: 0.6811230778694153 2023-01-24 01:19:13.195922: step: 288/459, loss: 0.13037727773189545 2023-01-24 01:19:13.779061: step: 290/459, loss: 0.367770791053772 2023-01-24 01:19:14.412254: step: 292/459, loss: 0.2642227113246918 2023-01-24 01:19:14.988372: step: 294/459, loss: 0.15191255509853363 2023-01-24 01:19:15.576373: step: 296/459, loss: 0.2832147479057312 2023-01-24 01:19:16.229369: step: 298/459, loss: 0.14467820525169373 2023-01-24 01:19:16.889402: step: 300/459, loss: 0.3020950257778168 2023-01-24 01:19:17.493935: step: 302/459, loss: 0.1250937581062317 2023-01-24 01:19:18.130739: step: 304/459, loss: 0.11677929013967514 2023-01-24 01:19:18.723824: step: 306/459, loss: 0.2805742621421814 2023-01-24 01:19:19.350264: step: 308/459, loss: 0.4092783033847809 2023-01-24 01:19:19.923997: step: 310/459, loss: 0.20881883800029755 2023-01-24 01:19:20.576680: step: 312/459, loss: 0.4313201904296875 2023-01-24 01:19:21.247150: step: 314/459, loss: 0.45820456743240356 2023-01-24 01:19:21.900420: step: 316/459, loss: 0.2879067361354828 2023-01-24 01:19:22.563304: step: 318/459, loss: 0.4779314398765564 2023-01-24 01:19:23.172621: step: 320/459, loss: 0.41344618797302246 2023-01-24 01:19:23.817898: step: 322/459, loss: 0.05977484583854675 2023-01-24 01:19:24.416503: step: 324/459, loss: 0.070372074842453 2023-01-24 01:19:25.026272: step: 326/459, loss: 0.1359485685825348 2023-01-24 01:19:25.690074: step: 328/459, loss: 0.23047012090682983 2023-01-24 01:19:26.304271: step: 330/459, loss: 0.34951016306877136 2023-01-24 01:19:26.994008: step: 332/459, loss: 0.163167804479599 2023-01-24 01:19:27.581977: step: 334/459, loss: 0.09779796749353409 2023-01-24 01:19:28.203561: step: 336/459, loss: 0.12829822301864624 2023-01-24 01:19:28.797884: step: 338/459, loss: 0.4308476150035858 2023-01-24 01:19:29.450715: step: 340/459, loss: 0.1259099543094635 2023-01-24 01:19:30.003132: step: 342/459, loss: 0.12508197128772736 2023-01-24 01:19:30.597146: step: 344/459, loss: 0.14381733536720276 2023-01-24 01:19:31.177228: step: 346/459, loss: 0.11520630866289139 2023-01-24 01:19:31.747840: step: 348/459, loss: 0.22323361039161682 2023-01-24 01:19:32.361563: step: 350/459, loss: 0.6369799971580505 2023-01-24 01:19:32.957450: step: 352/459, loss: 0.23408111929893494 2023-01-24 01:19:33.570836: step: 354/459, loss: 0.20343588292598724 2023-01-24 01:19:34.163019: step: 356/459, loss: 0.06070377305150032 2023-01-24 01:19:34.737104: step: 358/459, loss: 0.18635335564613342 2023-01-24 01:19:35.311812: step: 360/459, loss: 0.09828927367925644 2023-01-24 01:19:35.970057: step: 362/459, loss: 1.0264030694961548 2023-01-24 01:19:36.540887: step: 364/459, loss: 0.20931436121463776 2023-01-24 01:19:37.138915: step: 366/459, loss: 0.2880329191684723 2023-01-24 01:19:37.799664: step: 368/459, loss: 0.12167339026927948 2023-01-24 01:19:38.511961: step: 370/459, loss: 0.20372167229652405 2023-01-24 01:19:39.028487: step: 372/459, loss: 0.24956342577934265 2023-01-24 01:19:39.607619: step: 374/459, loss: 0.15150681138038635 2023-01-24 01:19:40.191604: step: 376/459, loss: 0.38044434785842896 2023-01-24 01:19:40.804010: step: 378/459, loss: 0.11214401572942734 2023-01-24 01:19:41.420805: step: 380/459, loss: 0.11197376996278763 2023-01-24 01:19:42.016882: step: 382/459, loss: 0.530284583568573 2023-01-24 01:19:42.689027: step: 384/459, loss: 0.19694790244102478 2023-01-24 01:19:43.346194: step: 386/459, loss: 0.16756093502044678 2023-01-24 01:19:43.954804: step: 388/459, loss: 0.07308543473482132 2023-01-24 01:19:44.637070: step: 390/459, loss: 0.07173973321914673 2023-01-24 01:19:45.334681: step: 392/459, loss: 0.11009081453084946 2023-01-24 01:19:46.016515: step: 394/459, loss: 0.11157985776662827 2023-01-24 01:19:46.659009: step: 396/459, loss: 0.07279558479785919 2023-01-24 01:19:47.259174: step: 398/459, loss: 0.12753689289093018 2023-01-24 01:19:47.921196: step: 400/459, loss: 0.15886692702770233 2023-01-24 01:19:48.506597: step: 402/459, loss: 0.205514594912529 2023-01-24 01:19:49.128869: step: 404/459, loss: 0.1401960551738739 2023-01-24 01:19:49.815284: step: 406/459, loss: 0.298942506313324 2023-01-24 01:19:50.421294: step: 408/459, loss: 0.08034978061914444 2023-01-24 01:19:51.030970: step: 410/459, loss: 0.7359286546707153 2023-01-24 01:19:51.616174: step: 412/459, loss: 0.7247535586357117 2023-01-24 01:19:52.217891: step: 414/459, loss: 0.15892934799194336 2023-01-24 01:19:52.816997: step: 416/459, loss: 0.17370885610580444 2023-01-24 01:19:53.467678: step: 418/459, loss: 0.4538463354110718 2023-01-24 01:19:54.062036: step: 420/459, loss: 0.08734085410833359 2023-01-24 01:19:54.634833: step: 422/459, loss: 0.1467694193124771 2023-01-24 01:19:55.245940: step: 424/459, loss: 0.21700634062290192 2023-01-24 01:19:55.853051: step: 426/459, loss: 0.47473767399787903 2023-01-24 01:19:56.459513: step: 428/459, loss: 0.15106791257858276 2023-01-24 01:19:57.049272: step: 430/459, loss: 0.23562556505203247 2023-01-24 01:19:57.646832: step: 432/459, loss: 0.6821832656860352 2023-01-24 01:19:58.192513: step: 434/459, loss: 0.1315738558769226 2023-01-24 01:19:58.795045: step: 436/459, loss: 0.1795002520084381 2023-01-24 01:19:59.375114: step: 438/459, loss: 0.293373703956604 2023-01-24 01:19:59.972251: step: 440/459, loss: 0.2753497362136841 2023-01-24 01:20:00.576335: step: 442/459, loss: 0.5403227210044861 2023-01-24 01:20:01.228288: step: 444/459, loss: 0.05273180082440376 2023-01-24 01:20:01.812739: step: 446/459, loss: 0.2242349237203598 2023-01-24 01:20:02.442307: step: 448/459, loss: 0.15229573845863342 2023-01-24 01:20:03.055340: step: 450/459, loss: 0.1243140771985054 2023-01-24 01:20:03.696951: step: 452/459, loss: 0.1731608659029007 2023-01-24 01:20:04.326519: step: 454/459, loss: 0.8890106678009033 2023-01-24 01:20:04.948505: step: 456/459, loss: 0.03696301579475403 2023-01-24 01:20:05.479709: step: 458/459, loss: 0.07817330956459045 2023-01-24 01:20:06.153708: step: 460/459, loss: 0.07482181489467621 2023-01-24 01:20:06.779301: step: 462/459, loss: 0.34497034549713135 2023-01-24 01:20:07.405293: step: 464/459, loss: 0.41047120094299316 2023-01-24 01:20:08.062773: step: 466/459, loss: 0.06992670893669128 2023-01-24 01:20:08.669759: step: 468/459, loss: 0.13697533309459686 2023-01-24 01:20:09.280203: step: 470/459, loss: 0.7818967700004578 2023-01-24 01:20:09.922256: step: 472/459, loss: 0.14840863645076752 2023-01-24 01:20:10.593104: step: 474/459, loss: 0.06512425094842911 2023-01-24 01:20:11.224894: step: 476/459, loss: 0.2049124836921692 2023-01-24 01:20:11.840141: step: 478/459, loss: 0.025172173976898193 2023-01-24 01:20:12.452398: step: 480/459, loss: 0.12958122789859772 2023-01-24 01:20:13.058405: step: 482/459, loss: 0.6759675741195679 2023-01-24 01:20:13.810284: step: 484/459, loss: 0.8136987686157227 2023-01-24 01:20:14.420772: step: 486/459, loss: 0.10330687463283539 2023-01-24 01:20:15.054779: step: 488/459, loss: 0.19505925476551056 2023-01-24 01:20:15.660361: step: 490/459, loss: 0.15746653079986572 2023-01-24 01:20:16.265710: step: 492/459, loss: 0.2979286313056946 2023-01-24 01:20:16.929204: step: 494/459, loss: 0.1473906934261322 2023-01-24 01:20:17.561651: step: 496/459, loss: 0.13381829857826233 2023-01-24 01:20:18.154820: step: 498/459, loss: 0.11969268321990967 2023-01-24 01:20:18.861849: step: 500/459, loss: 0.7136292457580566 2023-01-24 01:20:19.513429: step: 502/459, loss: 0.4995328485965729 2023-01-24 01:20:20.175111: step: 504/459, loss: 0.5598750710487366 2023-01-24 01:20:20.806055: step: 506/459, loss: 0.5859090089797974 2023-01-24 01:20:21.400437: step: 508/459, loss: 0.1955396831035614 2023-01-24 01:20:22.035019: step: 510/459, loss: 0.177068293094635 2023-01-24 01:20:22.601483: step: 512/459, loss: 0.28677797317504883 2023-01-24 01:20:23.313097: step: 514/459, loss: 0.2146628499031067 2023-01-24 01:20:23.889533: step: 516/459, loss: 0.8030432462692261 2023-01-24 01:20:24.499748: step: 518/459, loss: 0.1559860110282898 2023-01-24 01:20:25.085619: step: 520/459, loss: 0.11379185318946838 2023-01-24 01:20:25.672021: step: 522/459, loss: 0.10224831104278564 2023-01-24 01:20:26.261775: step: 524/459, loss: 0.12376508861780167 2023-01-24 01:20:26.876217: step: 526/459, loss: 0.3230290710926056 2023-01-24 01:20:27.516004: step: 528/459, loss: 0.13565754890441895 2023-01-24 01:20:28.182197: step: 530/459, loss: 0.0542701780796051 2023-01-24 01:20:28.753834: step: 532/459, loss: 0.08297377079725266 2023-01-24 01:20:29.401319: step: 534/459, loss: 0.3390170633792877 2023-01-24 01:20:29.914439: step: 536/459, loss: 0.20054635405540466 2023-01-24 01:20:30.502324: step: 538/459, loss: 0.23851022124290466 2023-01-24 01:20:31.073681: step: 540/459, loss: 0.5092998743057251 2023-01-24 01:20:31.687392: step: 542/459, loss: 0.642332136631012 2023-01-24 01:20:32.326849: step: 544/459, loss: 0.8473807573318481 2023-01-24 01:20:32.991811: step: 546/459, loss: 1.1015186309814453 2023-01-24 01:20:33.594830: step: 548/459, loss: 0.46099480986595154 2023-01-24 01:20:34.187782: step: 550/459, loss: 0.19970695674419403 2023-01-24 01:20:34.791757: step: 552/459, loss: 0.1282433569431305 2023-01-24 01:20:35.401185: step: 554/459, loss: 0.5531922578811646 2023-01-24 01:20:35.994034: step: 556/459, loss: 0.12171528488397598 2023-01-24 01:20:36.621950: step: 558/459, loss: 0.2630916237831116 2023-01-24 01:20:37.285789: step: 560/459, loss: 0.20247220993041992 2023-01-24 01:20:37.861829: step: 562/459, loss: 0.2858603298664093 2023-01-24 01:20:38.521737: step: 564/459, loss: 1.6913655996322632 2023-01-24 01:20:39.077744: step: 566/459, loss: 0.24105285108089447 2023-01-24 01:20:39.750071: step: 568/459, loss: 0.35067233443260193 2023-01-24 01:20:40.356162: step: 570/459, loss: 0.4374296963214874 2023-01-24 01:20:40.983343: step: 572/459, loss: 0.28077489137649536 2023-01-24 01:20:41.582310: step: 574/459, loss: 0.08453390747308731 2023-01-24 01:20:42.287389: step: 576/459, loss: 0.5633910894393921 2023-01-24 01:20:42.920445: step: 578/459, loss: 0.21342326700687408 2023-01-24 01:20:43.502278: step: 580/459, loss: 0.17924810945987701 2023-01-24 01:20:44.093766: step: 582/459, loss: 0.0880827009677887 2023-01-24 01:20:44.699288: step: 584/459, loss: 0.18753349781036377 2023-01-24 01:20:45.318638: step: 586/459, loss: 1.3143996000289917 2023-01-24 01:20:45.932883: step: 588/459, loss: 0.19596165418624878 2023-01-24 01:20:46.568005: step: 590/459, loss: 0.2253033071756363 2023-01-24 01:20:47.156281: step: 592/459, loss: 0.09963874518871307 2023-01-24 01:20:47.832387: step: 594/459, loss: 0.19147613644599915 2023-01-24 01:20:48.430969: step: 596/459, loss: 1.237546682357788 2023-01-24 01:20:49.033468: step: 598/459, loss: 0.13974513113498688 2023-01-24 01:20:49.734207: step: 600/459, loss: 0.46580561995506287 2023-01-24 01:20:50.361787: step: 602/459, loss: 0.657346785068512 2023-01-24 01:20:50.967887: step: 604/459, loss: 0.07346367090940475 2023-01-24 01:20:51.550854: step: 606/459, loss: 0.05609246343374252 2023-01-24 01:20:52.106015: step: 608/459, loss: 0.2167404741048813 2023-01-24 01:20:52.741984: step: 610/459, loss: 0.4996007978916168 2023-01-24 01:20:53.305931: step: 612/459, loss: 0.04309064522385597 2023-01-24 01:20:53.897342: step: 614/459, loss: 0.2606240212917328 2023-01-24 01:20:54.478666: step: 616/459, loss: 0.43020468950271606 2023-01-24 01:20:55.136611: step: 618/459, loss: 0.1865842640399933 2023-01-24 01:20:55.766022: step: 620/459, loss: 0.05788491293787956 2023-01-24 01:20:56.374051: step: 622/459, loss: 0.22437423467636108 2023-01-24 01:20:56.985839: step: 624/459, loss: 0.16920676827430725 2023-01-24 01:20:57.632931: step: 626/459, loss: 0.18611519038677216 2023-01-24 01:20:58.262778: step: 628/459, loss: 0.09492641687393188 2023-01-24 01:20:58.829749: step: 630/459, loss: 0.10451439023017883 2023-01-24 01:20:59.485251: step: 632/459, loss: 0.1459122598171234 2023-01-24 01:21:00.160350: step: 634/459, loss: 0.1625816822052002 2023-01-24 01:21:00.712250: step: 636/459, loss: 0.1318088173866272 2023-01-24 01:21:01.329185: step: 638/459, loss: 0.2111281454563141 2023-01-24 01:21:02.027355: step: 640/459, loss: 0.13033626973628998 2023-01-24 01:21:02.604106: step: 642/459, loss: 0.10028330236673355 2023-01-24 01:21:03.302020: step: 644/459, loss: 0.06898044794797897 2023-01-24 01:21:03.907447: step: 646/459, loss: 0.14742401242256165 2023-01-24 01:21:04.566942: step: 648/459, loss: 0.22197288274765015 2023-01-24 01:21:05.212735: step: 650/459, loss: 0.15384253859519958 2023-01-24 01:21:05.930969: step: 652/459, loss: 1.8417571783065796 2023-01-24 01:21:06.587367: step: 654/459, loss: 0.165804922580719 2023-01-24 01:21:07.154762: step: 656/459, loss: 0.7815886735916138 2023-01-24 01:21:07.798262: step: 658/459, loss: 0.21451255679130554 2023-01-24 01:21:08.432463: step: 660/459, loss: 0.21149545907974243 2023-01-24 01:21:09.154805: step: 662/459, loss: 0.5635830163955688 2023-01-24 01:21:09.768690: step: 664/459, loss: 0.18787738680839539 2023-01-24 01:21:10.411431: step: 666/459, loss: 0.11547381430864334 2023-01-24 01:21:11.092658: step: 668/459, loss: 0.1843014359474182 2023-01-24 01:21:11.675380: step: 670/459, loss: 0.19759654998779297 2023-01-24 01:21:12.290531: step: 672/459, loss: 0.12948882579803467 2023-01-24 01:21:12.859846: step: 674/459, loss: 0.6119012832641602 2023-01-24 01:21:13.520881: step: 676/459, loss: 0.26001206040382385 2023-01-24 01:21:14.095014: step: 678/459, loss: 0.13340380787849426 2023-01-24 01:21:14.724286: step: 680/459, loss: 0.2582593560218811 2023-01-24 01:21:15.305944: step: 682/459, loss: 0.39864856004714966 2023-01-24 01:21:15.896793: step: 684/459, loss: 0.30493804812431335 2023-01-24 01:21:16.553271: step: 686/459, loss: 1.796566128730774 2023-01-24 01:21:17.140941: step: 688/459, loss: 0.15849432349205017 2023-01-24 01:21:17.827676: step: 690/459, loss: 0.6991130113601685 2023-01-24 01:21:18.410236: step: 692/459, loss: 0.31427907943725586 2023-01-24 01:21:19.038901: step: 694/459, loss: 0.2751021981239319 2023-01-24 01:21:19.662878: step: 696/459, loss: 0.22772768139839172 2023-01-24 01:21:20.342000: step: 698/459, loss: 0.25731295347213745 2023-01-24 01:21:21.027058: step: 700/459, loss: 0.36701464653015137 2023-01-24 01:21:21.673421: step: 702/459, loss: 0.08471129089593887 2023-01-24 01:21:22.427902: step: 704/459, loss: 0.2511719763278961 2023-01-24 01:21:23.140498: step: 706/459, loss: 0.30442163348197937 2023-01-24 01:21:23.769616: step: 708/459, loss: 0.24660107493400574 2023-01-24 01:21:24.350353: step: 710/459, loss: 0.1957455724477768 2023-01-24 01:21:25.069047: step: 712/459, loss: 0.08959624916315079 2023-01-24 01:21:25.673607: step: 714/459, loss: 0.337095707654953 2023-01-24 01:21:26.267198: step: 716/459, loss: 0.4840773046016693 2023-01-24 01:21:26.898086: step: 718/459, loss: 0.2206980586051941 2023-01-24 01:21:27.539312: step: 720/459, loss: 0.29779791831970215 2023-01-24 01:21:28.110135: step: 722/459, loss: 0.15437813103199005 2023-01-24 01:21:28.729938: step: 724/459, loss: 0.16138367354869843 2023-01-24 01:21:29.321008: step: 726/459, loss: 0.26371991634368896 2023-01-24 01:21:29.944388: step: 728/459, loss: 0.2877993881702423 2023-01-24 01:21:30.545584: step: 730/459, loss: 0.5694596171379089 2023-01-24 01:21:31.168758: step: 732/459, loss: 0.9912011027336121 2023-01-24 01:21:31.823610: step: 734/459, loss: 0.09186184406280518 2023-01-24 01:21:32.448787: step: 736/459, loss: 0.20718693733215332 2023-01-24 01:21:33.116511: step: 738/459, loss: 0.22401660680770874 2023-01-24 01:21:33.756234: step: 740/459, loss: 1.8879618644714355 2023-01-24 01:21:34.347374: step: 742/459, loss: 0.13363297283649445 2023-01-24 01:21:34.922541: step: 744/459, loss: 0.2230210304260254 2023-01-24 01:21:35.553062: step: 746/459, loss: 0.3380025029182434 2023-01-24 01:21:36.168751: step: 748/459, loss: 0.06498363614082336 2023-01-24 01:21:36.774102: step: 750/459, loss: 0.5234909057617188 2023-01-24 01:21:37.416699: step: 752/459, loss: 0.11136837303638458 2023-01-24 01:21:38.021982: step: 754/459, loss: 0.4811667799949646 2023-01-24 01:21:38.597598: step: 756/459, loss: 0.10227550566196442 2023-01-24 01:21:39.189902: step: 758/459, loss: 0.031424738466739655 2023-01-24 01:21:39.761187: step: 760/459, loss: 0.4723798632621765 2023-01-24 01:21:40.356818: step: 762/459, loss: 0.23854956030845642 2023-01-24 01:21:40.972619: step: 764/459, loss: 0.2076602578163147 2023-01-24 01:21:41.586604: step: 766/459, loss: 0.13565179705619812 2023-01-24 01:21:42.198569: step: 768/459, loss: 0.5936933159828186 2023-01-24 01:21:42.739384: step: 770/459, loss: 0.052733685821294785 2023-01-24 01:21:43.436241: step: 772/459, loss: 0.2891384959220886 2023-01-24 01:21:44.140702: step: 774/459, loss: 0.6160274147987366 2023-01-24 01:21:44.741762: step: 776/459, loss: 0.1920936554670334 2023-01-24 01:21:45.306662: step: 778/459, loss: 0.09968074411153793 2023-01-24 01:21:46.035996: step: 780/459, loss: 0.6195327639579773 2023-01-24 01:21:46.596358: step: 782/459, loss: 0.17204086482524872 2023-01-24 01:21:47.222206: step: 784/459, loss: 0.14723150432109833 2023-01-24 01:21:47.831403: step: 786/459, loss: 0.246367946267128 2023-01-24 01:21:48.447190: step: 788/459, loss: 0.22740615904331207 2023-01-24 01:21:49.064846: step: 790/459, loss: 7.4582743644714355 2023-01-24 01:21:49.652100: step: 792/459, loss: 0.10984209924936295 2023-01-24 01:21:50.232188: step: 794/459, loss: 1.9843415021896362 2023-01-24 01:21:50.842414: step: 796/459, loss: 3.2093262672424316 2023-01-24 01:21:51.440029: step: 798/459, loss: 0.4451572597026825 2023-01-24 01:21:52.065945: step: 800/459, loss: 0.21722494065761566 2023-01-24 01:21:52.681070: step: 802/459, loss: 0.13896703720092773 2023-01-24 01:21:53.294700: step: 804/459, loss: 1.1317224502563477 2023-01-24 01:21:53.979883: step: 806/459, loss: 0.12583205103874207 2023-01-24 01:21:54.593030: step: 808/459, loss: 0.40542489290237427 2023-01-24 01:21:55.226466: step: 810/459, loss: 0.06498067080974579 2023-01-24 01:21:55.798813: step: 812/459, loss: 0.28849056363105774 2023-01-24 01:21:56.376297: step: 814/459, loss: 0.10134904086589813 2023-01-24 01:21:56.972933: step: 816/459, loss: 0.23083198070526123 2023-01-24 01:21:57.595571: step: 818/459, loss: 0.14775025844573975 2023-01-24 01:21:58.189025: step: 820/459, loss: 0.17366936802864075 2023-01-24 01:21:58.795695: step: 822/459, loss: 0.1004960685968399 2023-01-24 01:21:59.459590: step: 824/459, loss: 0.42904987931251526 2023-01-24 01:22:00.028538: step: 826/459, loss: 0.28194108605384827 2023-01-24 01:22:00.635646: step: 828/459, loss: 0.06699840724468231 2023-01-24 01:22:01.347899: step: 830/459, loss: 0.1414741426706314 2023-01-24 01:22:02.043459: step: 832/459, loss: 0.2630632221698761 2023-01-24 01:22:02.677307: step: 834/459, loss: 0.21558554470539093 2023-01-24 01:22:03.314185: step: 836/459, loss: 0.19985446333885193 2023-01-24 01:22:03.961462: step: 838/459, loss: 0.06485264748334885 2023-01-24 01:22:04.546348: step: 840/459, loss: 0.24058939516544342 2023-01-24 01:22:05.120150: step: 842/459, loss: 0.1474485844373703 2023-01-24 01:22:05.683524: step: 844/459, loss: 0.8773193955421448 2023-01-24 01:22:06.275917: step: 846/459, loss: 0.15848414599895477 2023-01-24 01:22:06.918037: step: 848/459, loss: 0.44556644558906555 2023-01-24 01:22:07.549524: step: 850/459, loss: 0.08109371364116669 2023-01-24 01:22:08.133883: step: 852/459, loss: 0.19866976141929626 2023-01-24 01:22:08.758696: step: 854/459, loss: 0.45352280139923096 2023-01-24 01:22:09.414074: step: 856/459, loss: 0.12297870963811874 2023-01-24 01:22:09.996589: step: 858/459, loss: 0.11407219618558884 2023-01-24 01:22:10.609006: step: 860/459, loss: 0.35643160343170166 2023-01-24 01:22:11.198802: step: 862/459, loss: 0.14556767046451569 2023-01-24 01:22:11.756555: step: 864/459, loss: 0.13254494965076447 2023-01-24 01:22:12.318269: step: 866/459, loss: 0.13992054760456085 2023-01-24 01:22:12.979542: step: 868/459, loss: 0.5030947327613831 2023-01-24 01:22:13.620477: step: 870/459, loss: 0.10183116793632507 2023-01-24 01:22:14.233699: step: 872/459, loss: 0.169880211353302 2023-01-24 01:22:14.883555: step: 874/459, loss: 0.133046954870224 2023-01-24 01:22:15.522671: step: 876/459, loss: 0.6286401152610779 2023-01-24 01:22:16.109784: step: 878/459, loss: 0.39796382188796997 2023-01-24 01:22:16.740568: step: 880/459, loss: 0.5414756536483765 2023-01-24 01:22:17.349747: step: 882/459, loss: 0.4376463294029236 2023-01-24 01:22:17.965298: step: 884/459, loss: 0.11601842194795609 2023-01-24 01:22:18.644863: step: 886/459, loss: 0.4026702046394348 2023-01-24 01:22:19.269564: step: 888/459, loss: 0.03432097285985947 2023-01-24 01:22:19.886049: step: 890/459, loss: 0.1806098073720932 2023-01-24 01:22:20.506552: step: 892/459, loss: 0.10867214947938919 2023-01-24 01:22:21.074386: step: 894/459, loss: 0.06843176484107971 2023-01-24 01:22:21.723394: step: 896/459, loss: 0.0748240202665329 2023-01-24 01:22:22.306484: step: 898/459, loss: 0.1584329754114151 2023-01-24 01:22:22.912594: step: 900/459, loss: 0.21586425602436066 2023-01-24 01:22:23.481104: step: 902/459, loss: 0.47275546193122864 2023-01-24 01:22:24.097534: step: 904/459, loss: 0.23157605528831482 2023-01-24 01:22:24.700162: step: 906/459, loss: 0.12824992835521698 2023-01-24 01:22:25.324061: step: 908/459, loss: 0.2952934205532074 2023-01-24 01:22:25.925313: step: 910/459, loss: 0.10347428917884827 2023-01-24 01:22:26.549575: step: 912/459, loss: 0.10599271953105927 2023-01-24 01:22:27.131426: step: 914/459, loss: 0.20181435346603394 2023-01-24 01:22:27.642568: step: 916/459, loss: 0.20168662071228027 2023-01-24 01:22:28.275635: step: 918/459, loss: 0.23510690033435822 2023-01-24 01:22:28.726198: step: 920/459, loss: 0.024957263842225075 ================================================== Loss: 0.301 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3191736506430529, 'r': 0.3379485712691149, 'f1': 0.32829289780428306}, 'combined': 0.24190002996105067, 'epoch': 10} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3584323700662122, 'r': 0.2799030962607966, 'f1': 0.3143373209666935}, 'combined': 0.2011758854186838, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3241953235699605, 'r': 0.32727118052982734, 'f1': 0.3257259908200548}, 'combined': 0.24000862481477722, 'epoch': 10} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3593787429497859, 'r': 0.30701025581684804, 'f1': 0.3311367985136534}, 'combined': 0.2374188366701666, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.31521739130434784, 'f1': 0.2788461538461538}, 'combined': 0.1394230769230769, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.10344827586206896, 'f1': 0.14634146341463414}, 'combined': 0.09756097560975609, 'epoch': 10} New best chinese model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 11 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:25:08.672263: step: 2/459, loss: 0.1064663901925087 2023-01-24 01:25:09.263745: step: 4/459, loss: 0.11158014833927155 2023-01-24 01:25:09.908407: step: 6/459, loss: 0.31235718727111816 2023-01-24 01:25:10.529399: step: 8/459, loss: 0.08067012578248978 2023-01-24 01:25:11.179648: step: 10/459, loss: 1.4421483278274536 2023-01-24 01:25:11.814132: step: 12/459, loss: 0.21073101460933685 2023-01-24 01:25:12.456908: step: 14/459, loss: 0.3250364065170288 2023-01-24 01:25:13.109704: step: 16/459, loss: 0.21686294674873352 2023-01-24 01:25:13.795991: step: 18/459, loss: 0.11218512803316116 2023-01-24 01:25:14.421970: step: 20/459, loss: 0.3183239996433258 2023-01-24 01:25:15.040826: step: 22/459, loss: 0.08096960932016373 2023-01-24 01:25:15.704167: step: 24/459, loss: 0.09690922498703003 2023-01-24 01:25:16.436265: step: 26/459, loss: 0.44802719354629517 2023-01-24 01:25:17.036212: step: 28/459, loss: 1.3152124881744385 2023-01-24 01:25:17.642607: step: 30/459, loss: 0.16864757239818573 2023-01-24 01:25:18.215005: step: 32/459, loss: 0.13651035726070404 2023-01-24 01:25:18.851418: step: 34/459, loss: 0.09545163065195084 2023-01-24 01:25:19.522270: step: 36/459, loss: 0.1449788212776184 2023-01-24 01:25:20.147470: step: 38/459, loss: 1.1815979480743408 2023-01-24 01:25:20.800251: step: 40/459, loss: 0.1134345680475235 2023-01-24 01:25:21.424155: step: 42/459, loss: 0.09671846032142639 2023-01-24 01:25:22.041517: step: 44/459, loss: 0.10404068231582642 2023-01-24 01:25:22.641540: step: 46/459, loss: 0.030971398577094078 2023-01-24 01:25:23.263760: step: 48/459, loss: 0.11006869375705719 2023-01-24 01:25:23.810594: step: 50/459, loss: 0.08510725200176239 2023-01-24 01:25:24.457301: step: 52/459, loss: 0.3005543649196625 2023-01-24 01:25:25.029777: step: 54/459, loss: 0.5092741250991821 2023-01-24 01:25:25.745809: step: 56/459, loss: 0.6074564456939697 2023-01-24 01:25:26.387941: step: 58/459, loss: 0.2578444182872772 2023-01-24 01:25:27.058954: step: 60/459, loss: 0.054291192442178726 2023-01-24 01:25:27.661339: step: 62/459, loss: 0.12517617642879486 2023-01-24 01:25:28.228831: step: 64/459, loss: 0.21186041831970215 2023-01-24 01:25:28.878062: step: 66/459, loss: 0.226368710398674 2023-01-24 01:25:29.450316: step: 68/459, loss: 0.13286228477954865 2023-01-24 01:25:30.084801: step: 70/459, loss: 0.16902662813663483 2023-01-24 01:25:30.823679: step: 72/459, loss: 0.16068463027477264 2023-01-24 01:25:31.440498: step: 74/459, loss: 0.35327666997909546 2023-01-24 01:25:32.052993: step: 76/459, loss: 0.21157409250736237 2023-01-24 01:25:32.654218: step: 78/459, loss: 0.13008509576320648 2023-01-24 01:25:33.283662: step: 80/459, loss: 1.205014944076538 2023-01-24 01:25:33.947115: step: 82/459, loss: 0.23300984501838684 2023-01-24 01:25:34.579203: step: 84/459, loss: 0.09899591654539108 2023-01-24 01:25:35.186432: step: 86/459, loss: 0.05063856393098831 2023-01-24 01:25:35.866190: step: 88/459, loss: 0.18660379946231842 2023-01-24 01:25:36.482192: step: 90/459, loss: 0.07566317915916443 2023-01-24 01:25:37.061103: step: 92/459, loss: 0.20241892337799072 2023-01-24 01:25:37.664131: step: 94/459, loss: 0.09725095331668854 2023-01-24 01:25:38.229035: step: 96/459, loss: 0.1597498655319214 2023-01-24 01:25:38.801538: step: 98/459, loss: 0.3645203113555908 2023-01-24 01:25:39.408502: step: 100/459, loss: 0.14424048364162445 2023-01-24 01:25:39.984286: step: 102/459, loss: 0.18234121799468994 2023-01-24 01:25:40.607513: step: 104/459, loss: 1.7936718463897705 2023-01-24 01:25:41.243445: step: 106/459, loss: 0.6754070520401001 2023-01-24 01:25:41.926956: step: 108/459, loss: 0.7132262587547302 2023-01-24 01:25:42.482539: step: 110/459, loss: 0.05377208814024925 2023-01-24 01:25:43.169973: step: 112/459, loss: 0.4605875313282013 2023-01-24 01:25:43.811692: step: 114/459, loss: 0.20995847880840302 2023-01-24 01:25:44.531334: step: 116/459, loss: 0.10808410495519638 2023-01-24 01:25:45.141988: step: 118/459, loss: 0.1836111694574356 2023-01-24 01:25:45.782449: step: 120/459, loss: 0.1843041479587555 2023-01-24 01:25:46.321124: step: 122/459, loss: 0.07879669219255447 2023-01-24 01:25:46.963028: step: 124/459, loss: 0.4522390067577362 2023-01-24 01:25:47.534900: step: 126/459, loss: 0.04130486026406288 2023-01-24 01:25:48.121060: step: 128/459, loss: 0.12525488436222076 2023-01-24 01:25:48.723882: step: 130/459, loss: 0.12086424231529236 2023-01-24 01:25:49.293956: step: 132/459, loss: 0.1679960936307907 2023-01-24 01:25:49.992448: step: 134/459, loss: 0.13355891406536102 2023-01-24 01:25:50.616964: step: 136/459, loss: 0.13633868098258972 2023-01-24 01:25:51.186625: step: 138/459, loss: 0.07238764315843582 2023-01-24 01:25:51.794006: step: 140/459, loss: 0.12499092519283295 2023-01-24 01:25:52.428503: step: 142/459, loss: 0.09099820256233215 2023-01-24 01:25:53.086072: step: 144/459, loss: 0.2693673372268677 2023-01-24 01:25:53.683955: step: 146/459, loss: 0.297279953956604 2023-01-24 01:25:54.291568: step: 148/459, loss: 0.3454153537750244 2023-01-24 01:25:54.921960: step: 150/459, loss: 0.2528136670589447 2023-01-24 01:25:55.539216: step: 152/459, loss: 0.36448416113853455 2023-01-24 01:25:56.128410: step: 154/459, loss: 0.11259215325117111 2023-01-24 01:25:56.743757: step: 156/459, loss: 0.1999616026878357 2023-01-24 01:25:57.493594: step: 158/459, loss: 0.5080814361572266 2023-01-24 01:25:58.212768: step: 160/459, loss: 0.16597363352775574 2023-01-24 01:25:58.792570: step: 162/459, loss: 0.3199305534362793 2023-01-24 01:25:59.382189: step: 164/459, loss: 0.05609815940260887 2023-01-24 01:26:00.014321: step: 166/459, loss: 0.18863293528556824 2023-01-24 01:26:00.615447: step: 168/459, loss: 0.11574272066354752 2023-01-24 01:26:01.228658: step: 170/459, loss: 0.24875354766845703 2023-01-24 01:26:01.806960: step: 172/459, loss: 0.21348154544830322 2023-01-24 01:26:02.411870: step: 174/459, loss: 0.9428542256355286 2023-01-24 01:26:03.060608: step: 176/459, loss: 0.39754626154899597 2023-01-24 01:26:03.701436: step: 178/459, loss: 0.04605812579393387 2023-01-24 01:26:04.333294: step: 180/459, loss: 0.19703832268714905 2023-01-24 01:26:04.965803: step: 182/459, loss: 0.34118467569351196 2023-01-24 01:26:05.587229: step: 184/459, loss: 0.5218039155006409 2023-01-24 01:26:06.189069: step: 186/459, loss: 0.1025569885969162 2023-01-24 01:26:06.783691: step: 188/459, loss: 0.1511545330286026 2023-01-24 01:26:07.384047: step: 190/459, loss: 0.06982841342687607 2023-01-24 01:26:07.991542: step: 192/459, loss: 0.1707327961921692 2023-01-24 01:26:08.623911: step: 194/459, loss: 14.013739585876465 2023-01-24 01:26:09.214553: step: 196/459, loss: 0.1611916422843933 2023-01-24 01:26:09.808298: step: 198/459, loss: 0.2139482945203781 2023-01-24 01:26:10.406728: step: 200/459, loss: 0.124664306640625 2023-01-24 01:26:11.050678: step: 202/459, loss: 0.057622022926807404 2023-01-24 01:26:11.684357: step: 204/459, loss: 0.05425829440355301 2023-01-24 01:26:12.401636: step: 206/459, loss: 0.8880446553230286 2023-01-24 01:26:13.046954: step: 208/459, loss: 0.09059439599514008 2023-01-24 01:26:13.662554: step: 210/459, loss: 0.38740259408950806 2023-01-24 01:26:14.326182: step: 212/459, loss: 0.4090414345264435 2023-01-24 01:26:15.013018: step: 214/459, loss: 0.13813777267932892 2023-01-24 01:26:15.604111: step: 216/459, loss: 0.0707910805940628 2023-01-24 01:26:16.245576: step: 218/459, loss: 0.313612699508667 2023-01-24 01:26:16.809387: step: 220/459, loss: 0.0705358162522316 2023-01-24 01:26:17.447034: step: 222/459, loss: 0.19249635934829712 2023-01-24 01:26:18.026602: step: 224/459, loss: 0.14078976213932037 2023-01-24 01:26:18.621097: step: 226/459, loss: 0.26673606038093567 2023-01-24 01:26:19.294023: step: 228/459, loss: 0.07499901205301285 2023-01-24 01:26:19.846686: step: 230/459, loss: 0.0941598191857338 2023-01-24 01:26:20.433179: step: 232/459, loss: 0.12241967767477036 2023-01-24 01:26:21.034471: step: 234/459, loss: 0.10046818107366562 2023-01-24 01:26:21.636063: step: 236/459, loss: 0.6958301663398743 2023-01-24 01:26:22.363710: step: 238/459, loss: 0.17795473337173462 2023-01-24 01:26:22.988761: step: 240/459, loss: 0.2122962325811386 2023-01-24 01:26:23.559803: step: 242/459, loss: 0.18730950355529785 2023-01-24 01:26:24.205968: step: 244/459, loss: 1.6027145385742188 2023-01-24 01:26:24.855516: step: 246/459, loss: 0.12122776359319687 2023-01-24 01:26:25.527042: step: 248/459, loss: 0.2605363726615906 2023-01-24 01:26:26.107025: step: 250/459, loss: 0.3723083436489105 2023-01-24 01:26:26.774343: step: 252/459, loss: 0.6698643565177917 2023-01-24 01:26:27.437530: step: 254/459, loss: 0.11354546248912811 2023-01-24 01:26:28.053156: step: 256/459, loss: 0.32935523986816406 2023-01-24 01:26:28.632321: step: 258/459, loss: 0.06360450387001038 2023-01-24 01:26:29.326738: step: 260/459, loss: 0.35008570551872253 2023-01-24 01:26:29.925251: step: 262/459, loss: 0.8852096796035767 2023-01-24 01:26:30.515086: step: 264/459, loss: 0.12724681198596954 2023-01-24 01:26:31.154097: step: 266/459, loss: 0.13767603039741516 2023-01-24 01:26:31.882658: step: 268/459, loss: 0.1786031275987625 2023-01-24 01:26:32.496560: step: 270/459, loss: 0.27111995220184326 2023-01-24 01:26:33.080997: step: 272/459, loss: 0.20828819274902344 2023-01-24 01:26:33.665796: step: 274/459, loss: 0.07600343972444534 2023-01-24 01:26:34.284739: step: 276/459, loss: 0.1671770215034485 2023-01-24 01:26:34.929762: step: 278/459, loss: 0.27558261156082153 2023-01-24 01:26:35.607427: step: 280/459, loss: 0.6715952754020691 2023-01-24 01:26:36.228681: step: 282/459, loss: 0.11412022262811661 2023-01-24 01:26:36.818838: step: 284/459, loss: 0.07733199745416641 2023-01-24 01:26:37.382492: step: 286/459, loss: 0.12416849285364151 2023-01-24 01:26:37.995847: step: 288/459, loss: 0.14517612755298615 2023-01-24 01:26:38.594059: step: 290/459, loss: 0.22381870448589325 2023-01-24 01:26:39.180112: step: 292/459, loss: 0.1995229423046112 2023-01-24 01:26:39.828019: step: 294/459, loss: 0.09629460424184799 2023-01-24 01:26:40.610451: step: 296/459, loss: 0.0768846943974495 2023-01-24 01:26:41.214316: step: 298/459, loss: 0.34489724040031433 2023-01-24 01:26:41.781156: step: 300/459, loss: 0.20700134336948395 2023-01-24 01:26:42.435534: step: 302/459, loss: 0.21972674131393433 2023-01-24 01:26:43.127338: step: 304/459, loss: 0.07807107269763947 2023-01-24 01:26:43.713134: step: 306/459, loss: 0.08032617717981339 2023-01-24 01:26:44.251330: step: 308/459, loss: 0.12393326312303543 2023-01-24 01:26:44.893254: step: 310/459, loss: 0.6045414209365845 2023-01-24 01:26:45.497755: step: 312/459, loss: 0.5574327111244202 2023-01-24 01:26:46.117534: step: 314/459, loss: 0.09220817685127258 2023-01-24 01:26:46.714828: step: 316/459, loss: 0.2216806411743164 2023-01-24 01:26:47.301905: step: 318/459, loss: 0.1364980787038803 2023-01-24 01:26:47.875684: step: 320/459, loss: 0.02960818260908127 2023-01-24 01:26:48.506580: step: 322/459, loss: 0.11976509541273117 2023-01-24 01:26:49.052373: step: 324/459, loss: 0.7214043140411377 2023-01-24 01:26:49.684925: step: 326/459, loss: 0.18733958899974823 2023-01-24 01:26:50.283630: step: 328/459, loss: 0.1710665374994278 2023-01-24 01:26:50.883466: step: 330/459, loss: 0.07126230001449585 2023-01-24 01:26:51.493109: step: 332/459, loss: 0.10400945693254471 2023-01-24 01:26:52.100811: step: 334/459, loss: 0.15189780294895172 2023-01-24 01:26:52.750912: step: 336/459, loss: 0.19132807850837708 2023-01-24 01:26:53.386862: step: 338/459, loss: 0.7433226704597473 2023-01-24 01:26:54.083634: step: 340/459, loss: 0.08569953590631485 2023-01-24 01:26:54.715196: step: 342/459, loss: 0.14914116263389587 2023-01-24 01:26:55.329935: step: 344/459, loss: 0.0633355900645256 2023-01-24 01:26:55.959263: step: 346/459, loss: 0.6250271201133728 2023-01-24 01:26:56.598461: step: 348/459, loss: 0.13262973725795746 2023-01-24 01:26:57.256338: step: 350/459, loss: 0.05898759141564369 2023-01-24 01:26:57.920472: step: 352/459, loss: 0.14578941464424133 2023-01-24 01:26:58.538887: step: 354/459, loss: 0.2883298397064209 2023-01-24 01:26:59.136252: step: 356/459, loss: 0.1101762056350708 2023-01-24 01:26:59.717997: step: 358/459, loss: 0.17046219110488892 2023-01-24 01:27:00.333171: step: 360/459, loss: 0.07288108021020889 2023-01-24 01:27:00.943613: step: 362/459, loss: 0.055745359510183334 2023-01-24 01:27:01.526370: step: 364/459, loss: 0.2385525107383728 2023-01-24 01:27:02.173125: step: 366/459, loss: 0.14266280829906464 2023-01-24 01:27:02.782232: step: 368/459, loss: 0.23312661051750183 2023-01-24 01:27:03.386254: step: 370/459, loss: 0.25930652022361755 2023-01-24 01:27:04.078866: step: 372/459, loss: 0.1912037879228592 2023-01-24 01:27:04.722383: step: 374/459, loss: 0.16906411945819855 2023-01-24 01:27:05.404214: step: 376/459, loss: 0.05919190123677254 2023-01-24 01:27:06.026207: step: 378/459, loss: 0.18991714715957642 2023-01-24 01:27:06.634631: step: 380/459, loss: 0.12008295208215714 2023-01-24 01:27:07.267248: step: 382/459, loss: 0.23120443522930145 2023-01-24 01:27:07.898016: step: 384/459, loss: 0.11368823796510696 2023-01-24 01:27:08.510617: step: 386/459, loss: 0.13669353723526 2023-01-24 01:27:09.116587: step: 388/459, loss: 0.26549312472343445 2023-01-24 01:27:09.785343: step: 390/459, loss: 0.16241101920604706 2023-01-24 01:27:10.402631: step: 392/459, loss: 0.1969415694475174 2023-01-24 01:27:11.045259: step: 394/459, loss: 0.12394047528505325 2023-01-24 01:27:11.681763: step: 396/459, loss: 0.08251889050006866 2023-01-24 01:27:12.316181: step: 398/459, loss: 0.10518063604831696 2023-01-24 01:27:12.896937: step: 400/459, loss: 0.13775032758712769 2023-01-24 01:27:13.547926: step: 402/459, loss: 0.5381285548210144 2023-01-24 01:27:14.114252: step: 404/459, loss: 0.17247067391872406 2023-01-24 01:27:14.744677: step: 406/459, loss: 0.15570737421512604 2023-01-24 01:27:15.330279: step: 408/459, loss: 0.12124495208263397 2023-01-24 01:27:15.957053: step: 410/459, loss: 0.09077656269073486 2023-01-24 01:27:16.585087: step: 412/459, loss: 0.26962289214134216 2023-01-24 01:27:17.276498: step: 414/459, loss: 0.18654365837574005 2023-01-24 01:27:17.890517: step: 416/459, loss: 0.18030522763729095 2023-01-24 01:27:18.572597: step: 418/459, loss: 0.32901886105537415 2023-01-24 01:27:19.196383: step: 420/459, loss: 0.3237190544605255 2023-01-24 01:27:19.795238: step: 422/459, loss: 0.26541516184806824 2023-01-24 01:27:20.418246: step: 424/459, loss: 0.09737680107355118 2023-01-24 01:27:21.021513: step: 426/459, loss: 0.221756249666214 2023-01-24 01:27:21.611805: step: 428/459, loss: 0.038467567414045334 2023-01-24 01:27:22.275529: step: 430/459, loss: 0.1141916811466217 2023-01-24 01:27:22.856668: step: 432/459, loss: 0.22700822353363037 2023-01-24 01:27:23.440356: step: 434/459, loss: 0.11899314820766449 2023-01-24 01:27:24.041180: step: 436/459, loss: 0.1462128460407257 2023-01-24 01:27:24.676307: step: 438/459, loss: 0.20367810130119324 2023-01-24 01:27:25.268818: step: 440/459, loss: 0.1467658281326294 2023-01-24 01:27:25.908737: step: 442/459, loss: 0.1222086101770401 2023-01-24 01:27:26.496387: step: 444/459, loss: 0.12466493993997574 2023-01-24 01:27:27.049065: step: 446/459, loss: 0.2166869193315506 2023-01-24 01:27:27.600539: step: 448/459, loss: 0.25927749276161194 2023-01-24 01:27:28.190265: step: 450/459, loss: 0.18062761425971985 2023-01-24 01:27:28.877343: step: 452/459, loss: 0.31149402260780334 2023-01-24 01:27:29.526913: step: 454/459, loss: 0.31913089752197266 2023-01-24 01:27:30.161003: step: 456/459, loss: 0.029003942385315895 2023-01-24 01:27:30.846514: step: 458/459, loss: 0.2202642261981964 2023-01-24 01:27:31.464848: step: 460/459, loss: 0.15836532413959503 2023-01-24 01:27:32.079006: step: 462/459, loss: 0.41280826926231384 2023-01-24 01:27:32.668908: step: 464/459, loss: 0.2872396409511566 2023-01-24 01:27:33.223431: step: 466/459, loss: 0.0602542981505394 2023-01-24 01:27:33.943422: step: 468/459, loss: 0.1542939692735672 2023-01-24 01:27:34.566125: step: 470/459, loss: 0.7608324885368347 2023-01-24 01:27:35.175920: step: 472/459, loss: 0.11862261593341827 2023-01-24 01:27:35.793963: step: 474/459, loss: 0.23696233332157135 2023-01-24 01:27:36.375948: step: 476/459, loss: 0.15078437328338623 2023-01-24 01:27:36.966923: step: 478/459, loss: 0.09871316701173782 2023-01-24 01:27:37.578966: step: 480/459, loss: 0.1340356171131134 2023-01-24 01:27:38.171416: step: 482/459, loss: 0.0966702401638031 2023-01-24 01:27:38.820209: step: 484/459, loss: 0.07322462648153305 2023-01-24 01:27:39.437754: step: 486/459, loss: 0.23487450182437897 2023-01-24 01:27:40.012554: step: 488/459, loss: 0.0572572760283947 2023-01-24 01:27:40.569322: step: 490/459, loss: 0.25263193249702454 2023-01-24 01:27:41.268467: step: 492/459, loss: 0.19338096678256989 2023-01-24 01:27:41.879456: step: 494/459, loss: 0.26824524998664856 2023-01-24 01:27:42.474993: step: 496/459, loss: 0.1923637092113495 2023-01-24 01:27:43.079466: step: 498/459, loss: 0.17619982361793518 2023-01-24 01:27:43.626947: step: 500/459, loss: 0.12766583263874054 2023-01-24 01:27:44.189722: step: 502/459, loss: 0.30193236470222473 2023-01-24 01:27:44.891970: step: 504/459, loss: 0.39266642928123474 2023-01-24 01:27:45.411386: step: 506/459, loss: 0.16842728853225708 2023-01-24 01:27:46.073939: step: 508/459, loss: 0.8085938692092896 2023-01-24 01:27:46.659216: step: 510/459, loss: 0.2601700723171234 2023-01-24 01:27:47.260292: step: 512/459, loss: 0.17622999846935272 2023-01-24 01:27:47.916702: step: 514/459, loss: 0.13656868040561676 2023-01-24 01:27:48.514116: step: 516/459, loss: 0.05229703336954117 2023-01-24 01:27:49.072714: step: 518/459, loss: 0.13560259342193604 2023-01-24 01:27:49.697377: step: 520/459, loss: 0.5520755052566528 2023-01-24 01:27:50.346634: step: 522/459, loss: 0.1562456637620926 2023-01-24 01:27:50.995464: step: 524/459, loss: 0.14852549135684967 2023-01-24 01:27:51.659894: step: 526/459, loss: 0.13670992851257324 2023-01-24 01:27:52.280736: step: 528/459, loss: 0.2627703547477722 2023-01-24 01:27:52.943911: step: 530/459, loss: 0.21745756268501282 2023-01-24 01:27:53.535543: step: 532/459, loss: 0.060979172587394714 2023-01-24 01:27:54.196083: step: 534/459, loss: 0.059494927525520325 2023-01-24 01:27:54.802630: step: 536/459, loss: 0.39801913499832153 2023-01-24 01:27:55.452333: step: 538/459, loss: 0.14751866459846497 2023-01-24 01:27:56.042315: step: 540/459, loss: 0.31264981627464294 2023-01-24 01:27:56.621046: step: 542/459, loss: 0.33336159586906433 2023-01-24 01:27:57.310471: step: 544/459, loss: 0.5002256035804749 2023-01-24 01:27:57.961618: step: 546/459, loss: 0.2963346838951111 2023-01-24 01:27:58.650347: step: 548/459, loss: 0.6217797994613647 2023-01-24 01:27:59.246638: step: 550/459, loss: 0.11309590935707092 2023-01-24 01:27:59.843554: step: 552/459, loss: 0.42406952381134033 2023-01-24 01:28:00.484869: step: 554/459, loss: 0.14117369055747986 2023-01-24 01:28:01.119697: step: 556/459, loss: 0.3626340627670288 2023-01-24 01:28:01.733292: step: 558/459, loss: 0.07610797137022018 2023-01-24 01:28:02.366434: step: 560/459, loss: 0.10453978925943375 2023-01-24 01:28:03.034112: step: 562/459, loss: 0.1946968287229538 2023-01-24 01:28:03.634257: step: 564/459, loss: 0.06357981264591217 2023-01-24 01:28:04.232187: step: 566/459, loss: 0.48066583275794983 2023-01-24 01:28:04.856903: step: 568/459, loss: 0.9693073630332947 2023-01-24 01:28:05.506671: step: 570/459, loss: 0.31119319796562195 2023-01-24 01:28:06.092358: step: 572/459, loss: 0.0684751346707344 2023-01-24 01:28:06.714062: step: 574/459, loss: 0.14072731137275696 2023-01-24 01:28:07.378898: step: 576/459, loss: 0.5962415337562561 2023-01-24 01:28:08.016103: step: 578/459, loss: 0.45287054777145386 2023-01-24 01:28:08.593832: step: 580/459, loss: 0.2678304612636566 2023-01-24 01:28:09.215952: step: 582/459, loss: 0.16364458203315735 2023-01-24 01:28:09.829017: step: 584/459, loss: 0.13397984206676483 2023-01-24 01:28:10.482640: step: 586/459, loss: 0.23038597404956818 2023-01-24 01:28:11.082788: step: 588/459, loss: 0.47421151399612427 2023-01-24 01:28:11.728741: step: 590/459, loss: 0.43619588017463684 2023-01-24 01:28:12.322665: step: 592/459, loss: 0.3919358551502228 2023-01-24 01:28:12.973541: step: 594/459, loss: 0.19632916152477264 2023-01-24 01:28:13.535158: step: 596/459, loss: 0.30438920855522156 2023-01-24 01:28:14.135446: step: 598/459, loss: 0.12928548455238342 2023-01-24 01:28:14.769306: step: 600/459, loss: 0.1546223908662796 2023-01-24 01:28:15.396410: step: 602/459, loss: 0.3809058666229248 2023-01-24 01:28:16.036375: step: 604/459, loss: 0.21195271611213684 2023-01-24 01:28:16.655029: step: 606/459, loss: 0.18842662870883942 2023-01-24 01:28:17.289730: step: 608/459, loss: 0.061067499220371246 2023-01-24 01:28:17.904214: step: 610/459, loss: 0.1671002060174942 2023-01-24 01:28:18.492320: step: 612/459, loss: 0.3932648301124573 2023-01-24 01:28:19.122079: step: 614/459, loss: 0.18073749542236328 2023-01-24 01:28:19.876039: step: 616/459, loss: 0.6080862879753113 2023-01-24 01:28:20.477330: step: 618/459, loss: 0.215606227517128 2023-01-24 01:28:21.079155: step: 620/459, loss: 0.38133513927459717 2023-01-24 01:28:21.695571: step: 622/459, loss: 0.6500828862190247 2023-01-24 01:28:22.337658: step: 624/459, loss: 0.07948678731918335 2023-01-24 01:28:22.905432: step: 626/459, loss: 0.06140897795557976 2023-01-24 01:28:23.505800: step: 628/459, loss: 0.05496756359934807 2023-01-24 01:28:24.166930: step: 630/459, loss: 1.2152177095413208 2023-01-24 01:28:24.771087: step: 632/459, loss: 0.20944102108478546 2023-01-24 01:28:25.437962: step: 634/459, loss: 0.1446942836046219 2023-01-24 01:28:26.098155: step: 636/459, loss: 0.16859769821166992 2023-01-24 01:28:26.678240: step: 638/459, loss: 0.555134117603302 2023-01-24 01:28:27.284205: step: 640/459, loss: 0.749035120010376 2023-01-24 01:28:27.997382: step: 642/459, loss: 0.17283488810062408 2023-01-24 01:28:28.566259: step: 644/459, loss: 0.06270129978656769 2023-01-24 01:28:29.164697: step: 646/459, loss: 0.38756826519966125 2023-01-24 01:28:29.824791: step: 648/459, loss: 0.20970559120178223 2023-01-24 01:28:30.430557: step: 650/459, loss: 0.12249969691038132 2023-01-24 01:28:31.003052: step: 652/459, loss: 1.858886957168579 2023-01-24 01:28:31.645541: step: 654/459, loss: 0.19882260262966156 2023-01-24 01:28:32.274365: step: 656/459, loss: 0.0299889724701643 2023-01-24 01:28:32.876239: step: 658/459, loss: 0.16264070570468903 2023-01-24 01:28:33.465440: step: 660/459, loss: 0.15667477250099182 2023-01-24 01:28:34.060598: step: 662/459, loss: 0.31244710087776184 2023-01-24 01:28:34.625152: step: 664/459, loss: 0.08042054623365402 2023-01-24 01:28:35.274495: step: 666/459, loss: 0.10260596871376038 2023-01-24 01:28:35.934793: step: 668/459, loss: 0.0961567759513855 2023-01-24 01:28:36.574841: step: 670/459, loss: 0.05566959083080292 2023-01-24 01:28:37.160698: step: 672/459, loss: 0.23374320566654205 2023-01-24 01:28:37.782964: step: 674/459, loss: 0.1750093549489975 2023-01-24 01:28:38.394073: step: 676/459, loss: 0.0423060841858387 2023-01-24 01:28:39.030752: step: 678/459, loss: 0.3228585124015808 2023-01-24 01:28:39.614022: step: 680/459, loss: 0.14587251842021942 2023-01-24 01:28:40.257707: step: 682/459, loss: 0.3089654743671417 2023-01-24 01:28:40.887211: step: 684/459, loss: 0.1269625723361969 2023-01-24 01:28:41.521205: step: 686/459, loss: 0.06981679797172546 2023-01-24 01:28:42.072449: step: 688/459, loss: 0.20536749064922333 2023-01-24 01:28:42.675782: step: 690/459, loss: 0.19533126056194305 2023-01-24 01:28:43.212687: step: 692/459, loss: 1.0392893552780151 2023-01-24 01:28:43.867129: step: 694/459, loss: 0.10795479267835617 2023-01-24 01:28:44.554339: step: 696/459, loss: 1.8005703687667847 2023-01-24 01:28:45.155785: step: 698/459, loss: 0.2071092426776886 2023-01-24 01:28:45.806397: step: 700/459, loss: 0.1459488868713379 2023-01-24 01:28:46.449132: step: 702/459, loss: 0.05309125408530235 2023-01-24 01:28:47.058983: step: 704/459, loss: 0.18646124005317688 2023-01-24 01:28:47.663038: step: 706/459, loss: 0.16715843975543976 2023-01-24 01:28:48.315338: step: 708/459, loss: 0.49287134408950806 2023-01-24 01:28:48.947760: step: 710/459, loss: 0.14658448100090027 2023-01-24 01:28:49.531990: step: 712/459, loss: 1.3289024829864502 2023-01-24 01:28:50.137729: step: 714/459, loss: 0.0531160943210125 2023-01-24 01:28:50.718223: step: 716/459, loss: 0.19980177283287048 2023-01-24 01:28:51.358631: step: 718/459, loss: 0.08206631243228912 2023-01-24 01:28:52.006296: step: 720/459, loss: 0.7505170702934265 2023-01-24 01:28:52.561098: step: 722/459, loss: 0.18689915537834167 2023-01-24 01:28:53.132301: step: 724/459, loss: 0.07201957702636719 2023-01-24 01:28:53.752214: step: 726/459, loss: 0.10746103525161743 2023-01-24 01:28:54.452619: step: 728/459, loss: 0.24979270994663239 2023-01-24 01:28:55.025954: step: 730/459, loss: 0.07331781089305878 2023-01-24 01:28:55.616497: step: 732/459, loss: 0.22183701395988464 2023-01-24 01:28:56.213575: step: 734/459, loss: 1.180862545967102 2023-01-24 01:28:56.857751: step: 736/459, loss: 0.18035632371902466 2023-01-24 01:28:57.465749: step: 738/459, loss: 0.14556604623794556 2023-01-24 01:28:58.094673: step: 740/459, loss: 0.4169226288795471 2023-01-24 01:28:58.694779: step: 742/459, loss: 0.166493758559227 2023-01-24 01:28:59.327505: step: 744/459, loss: 0.36087027192115784 2023-01-24 01:28:59.920006: step: 746/459, loss: 0.2825201153755188 2023-01-24 01:29:00.549584: step: 748/459, loss: 0.11809560656547546 2023-01-24 01:29:01.147156: step: 750/459, loss: 0.7618184685707092 2023-01-24 01:29:01.788985: step: 752/459, loss: 0.13606183230876923 2023-01-24 01:29:02.391069: step: 754/459, loss: 0.5206527709960938 2023-01-24 01:29:03.025025: step: 756/459, loss: 0.32819664478302 2023-01-24 01:29:03.696863: step: 758/459, loss: 0.38109511137008667 2023-01-24 01:29:04.414594: step: 760/459, loss: 0.21833238005638123 2023-01-24 01:29:04.992559: step: 762/459, loss: 0.12814854085445404 2023-01-24 01:29:05.569457: step: 764/459, loss: 0.01991993375122547 2023-01-24 01:29:06.218949: step: 766/459, loss: 0.30079904198646545 2023-01-24 01:29:06.839570: step: 768/459, loss: 0.13139012455940247 2023-01-24 01:29:07.470437: step: 770/459, loss: 0.1324775069952011 2023-01-24 01:29:08.071057: step: 772/459, loss: 0.5232932567596436 2023-01-24 01:29:08.618768: step: 774/459, loss: 0.17399485409259796 2023-01-24 01:29:09.243817: step: 776/459, loss: 0.1692793369293213 2023-01-24 01:29:09.877607: step: 778/459, loss: 0.3633912205696106 2023-01-24 01:29:10.506588: step: 780/459, loss: 0.13807927072048187 2023-01-24 01:29:11.098043: step: 782/459, loss: 0.3766198456287384 2023-01-24 01:29:11.702481: step: 784/459, loss: 0.1765850931406021 2023-01-24 01:29:12.255592: step: 786/459, loss: 0.31702229380607605 2023-01-24 01:29:12.856845: step: 788/459, loss: 0.03546976298093796 2023-01-24 01:29:13.408718: step: 790/459, loss: 0.14781183004379272 2023-01-24 01:29:14.042603: step: 792/459, loss: 0.43746310472488403 2023-01-24 01:29:14.612994: step: 794/459, loss: 0.3873409330844879 2023-01-24 01:29:15.265434: step: 796/459, loss: 1.0572718381881714 2023-01-24 01:29:15.864715: step: 798/459, loss: 0.05424221605062485 2023-01-24 01:29:16.532802: step: 800/459, loss: 0.17255792021751404 2023-01-24 01:29:17.143270: step: 802/459, loss: 0.10642454028129578 2023-01-24 01:29:17.727434: step: 804/459, loss: 0.6380460262298584 2023-01-24 01:29:18.427190: step: 806/459, loss: 0.29349279403686523 2023-01-24 01:29:19.008036: step: 808/459, loss: 0.14727184176445007 2023-01-24 01:29:19.543173: step: 810/459, loss: 0.12155184149742126 2023-01-24 01:29:20.233097: step: 812/459, loss: 0.20595060288906097 2023-01-24 01:29:20.779048: step: 814/459, loss: 0.3046717345714569 2023-01-24 01:29:21.370015: step: 816/459, loss: 0.46808260679244995 2023-01-24 01:29:22.020222: step: 818/459, loss: 0.10022162646055222 2023-01-24 01:29:22.684389: step: 820/459, loss: 0.1505175679922104 2023-01-24 01:29:23.254937: step: 822/459, loss: 0.05874425172805786 2023-01-24 01:29:23.860802: step: 824/459, loss: 0.21391405165195465 2023-01-24 01:29:24.417777: step: 826/459, loss: 0.045672010630369186 2023-01-24 01:29:25.029916: step: 828/459, loss: 0.09804245084524155 2023-01-24 01:29:25.634329: step: 830/459, loss: 0.09034465253353119 2023-01-24 01:29:26.214987: step: 832/459, loss: 0.19016298651695251 2023-01-24 01:29:26.865486: step: 834/459, loss: 0.25679174065589905 2023-01-24 01:29:27.521110: step: 836/459, loss: 0.0903720110654831 2023-01-24 01:29:28.134585: step: 838/459, loss: 0.28256016969680786 2023-01-24 01:29:28.766670: step: 840/459, loss: 0.069305419921875 2023-01-24 01:29:29.398064: step: 842/459, loss: 0.14215560257434845 2023-01-24 01:29:29.976877: step: 844/459, loss: 0.1558695137500763 2023-01-24 01:29:30.572899: step: 846/459, loss: 0.07051127403974533 2023-01-24 01:29:31.213271: step: 848/459, loss: 0.20018818974494934 2023-01-24 01:29:31.850590: step: 850/459, loss: 0.23308880627155304 2023-01-24 01:29:32.488573: step: 852/459, loss: 0.12373056262731552 2023-01-24 01:29:33.094230: step: 854/459, loss: 0.09523728489875793 2023-01-24 01:29:33.721503: step: 856/459, loss: 0.08928917348384857 2023-01-24 01:29:34.376649: step: 858/459, loss: 0.15971139073371887 2023-01-24 01:29:34.978810: step: 860/459, loss: 0.10055336356163025 2023-01-24 01:29:35.564228: step: 862/459, loss: 0.41559648513793945 2023-01-24 01:29:36.166957: step: 864/459, loss: 0.11977822333574295 2023-01-24 01:29:36.832822: step: 866/459, loss: 0.36031222343444824 2023-01-24 01:29:37.488712: step: 868/459, loss: 0.18643563985824585 2023-01-24 01:29:38.057036: step: 870/459, loss: 0.3675023317337036 2023-01-24 01:29:38.685584: step: 872/459, loss: 0.3289773762226105 2023-01-24 01:29:39.296412: step: 874/459, loss: 0.45340460538864136 2023-01-24 01:29:39.901617: step: 876/459, loss: 0.38299551606178284 2023-01-24 01:29:40.587908: step: 878/459, loss: 0.17647844552993774 2023-01-24 01:29:41.202629: step: 880/459, loss: 0.22130052745342255 2023-01-24 01:29:41.773367: step: 882/459, loss: 0.32116979360580444 2023-01-24 01:29:42.400859: step: 884/459, loss: 0.3741645812988281 2023-01-24 01:29:42.950488: step: 886/459, loss: 0.3224460780620575 2023-01-24 01:29:43.596487: step: 888/459, loss: 0.5589501857757568 2023-01-24 01:29:44.169935: step: 890/459, loss: 0.07134560495615005 2023-01-24 01:29:44.816565: step: 892/459, loss: 0.11485853791236877 2023-01-24 01:29:45.387850: step: 894/459, loss: 0.03932184725999832 2023-01-24 01:29:46.012380: step: 896/459, loss: 0.22806015610694885 2023-01-24 01:29:46.656445: step: 898/459, loss: 0.7611336708068848 2023-01-24 01:29:47.223795: step: 900/459, loss: 0.20379619300365448 2023-01-24 01:29:47.831856: step: 902/459, loss: 0.5378459692001343 2023-01-24 01:29:48.411042: step: 904/459, loss: 0.14033350348472595 2023-01-24 01:29:49.034131: step: 906/459, loss: 0.07833391427993774 2023-01-24 01:29:49.726175: step: 908/459, loss: 0.2568073570728302 2023-01-24 01:29:50.385775: step: 910/459, loss: 0.1750573068857193 2023-01-24 01:29:51.020126: step: 912/459, loss: 0.18652953207492828 2023-01-24 01:29:51.669575: step: 914/459, loss: 0.9141426682472229 2023-01-24 01:29:52.289769: step: 916/459, loss: 0.17014101147651672 2023-01-24 01:29:52.914204: step: 918/459, loss: 2.857335090637207 2023-01-24 01:29:53.335461: step: 920/459, loss: 0.00046204854152165353 ================================================== Loss: 0.294 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3380306425811904, 'r': 0.32199503335058366, 'f1': 0.3298180419353889}, 'combined': 0.24302382037344442, 'epoch': 11} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3537604429575869, 'r': 0.28107875194993726, 'f1': 0.31325899406781255}, 'combined': 0.2004857562034, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34423315525194137, 'r': 0.3390076045080789, 'f1': 0.3416003968943739}, 'combined': 0.25170555560638075, 'epoch': 11} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35239415534868485, 'r': 0.277109949433284, 'f1': 0.31025032506525435}, 'combined': 0.19856020804176275, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35589967123996563, 'r': 0.3369903907945785, 'f1': 0.3461870096466722}, 'combined': 0.2550851650028111, 'epoch': 11} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34559693160946725, 'r': 0.286369745517781, 'f1': 0.31320798687238854}, 'combined': 0.22456421700284462, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24113475177304963, 'r': 0.32380952380952377, 'f1': 0.2764227642276422}, 'combined': 0.18428184281842813, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29464285714285715, 'r': 0.358695652173913, 'f1': 0.3235294117647059}, 'combined': 0.16176470588235295, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3409090909090909, 'r': 0.12931034482758622, 'f1': 0.1875}, 'combined': 0.125, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 12 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:32:28.648985: step: 2/459, loss: 0.20131805539131165 2023-01-24 01:32:29.276586: step: 4/459, loss: 0.22844558954238892 2023-01-24 01:32:29.865845: step: 6/459, loss: 0.08449432253837585 2023-01-24 01:32:30.395349: step: 8/459, loss: 0.08550949394702911 2023-01-24 01:32:30.947604: step: 10/459, loss: 0.06675716489553452 2023-01-24 01:32:31.659541: step: 12/459, loss: 0.33260399103164673 2023-01-24 01:32:32.251994: step: 14/459, loss: 0.09237676113843918 2023-01-24 01:32:32.948859: step: 16/459, loss: 0.10697923600673676 2023-01-24 01:32:33.631836: step: 18/459, loss: 0.13751839101314545 2023-01-24 01:32:34.212978: step: 20/459, loss: 0.10973818600177765 2023-01-24 01:32:34.945378: step: 22/459, loss: 0.10475124418735504 2023-01-24 01:32:35.558255: step: 24/459, loss: 0.09912589192390442 2023-01-24 01:32:36.138288: step: 26/459, loss: 0.15933018922805786 2023-01-24 01:32:36.720967: step: 28/459, loss: 0.15114115178585052 2023-01-24 01:32:37.388595: step: 30/459, loss: 0.2078331708908081 2023-01-24 01:32:37.949006: step: 32/459, loss: 0.16847412288188934 2023-01-24 01:32:38.522760: step: 34/459, loss: 0.11442198604345322 2023-01-24 01:32:39.097432: step: 36/459, loss: 0.0676564946770668 2023-01-24 01:32:39.715212: step: 38/459, loss: 0.2822154760360718 2023-01-24 01:32:40.293890: step: 40/459, loss: 0.11940392851829529 2023-01-24 01:32:40.909814: step: 42/459, loss: 0.17860868573188782 2023-01-24 01:32:41.521057: step: 44/459, loss: 2.9645628929138184 2023-01-24 01:32:42.216377: step: 46/459, loss: 0.9081100821495056 2023-01-24 01:32:42.794842: step: 48/459, loss: 0.0870758444070816 2023-01-24 01:32:43.475747: step: 50/459, loss: 0.2707997262477875 2023-01-24 01:32:44.081162: step: 52/459, loss: 0.11961579322814941 2023-01-24 01:32:44.647686: step: 54/459, loss: 0.13478417694568634 2023-01-24 01:32:45.299518: step: 56/459, loss: 0.2426917999982834 2023-01-24 01:32:45.967971: step: 58/459, loss: 0.11545181274414062 2023-01-24 01:32:46.521796: step: 60/459, loss: 0.2169703096151352 2023-01-24 01:32:47.156911: step: 62/459, loss: 0.0650131106376648 2023-01-24 01:32:47.766697: step: 64/459, loss: 0.12471482157707214 2023-01-24 01:32:48.373601: step: 66/459, loss: 0.09378822147846222 2023-01-24 01:32:48.999858: step: 68/459, loss: 0.13725370168685913 2023-01-24 01:32:49.566481: step: 70/459, loss: 0.11443983018398285 2023-01-24 01:32:50.226023: step: 72/459, loss: 0.02062373422086239 2023-01-24 01:32:50.829175: step: 74/459, loss: 0.18415574729442596 2023-01-24 01:32:51.450165: step: 76/459, loss: 0.7524752020835876 2023-01-24 01:32:51.989744: step: 78/459, loss: 0.22762346267700195 2023-01-24 01:32:52.609503: step: 80/459, loss: 0.514062225818634 2023-01-24 01:32:53.248965: step: 82/459, loss: 0.5580326318740845 2023-01-24 01:32:53.830339: step: 84/459, loss: 0.08575423061847687 2023-01-24 01:32:54.449475: step: 86/459, loss: 0.09179013222455978 2023-01-24 01:32:55.064529: step: 88/459, loss: 0.09330759197473526 2023-01-24 01:32:55.667824: step: 90/459, loss: 0.2675114870071411 2023-01-24 01:32:56.297990: step: 92/459, loss: 0.12284877151250839 2023-01-24 01:32:56.908414: step: 94/459, loss: 0.18507355451583862 2023-01-24 01:32:57.553673: step: 96/459, loss: 0.18433430790901184 2023-01-24 01:32:58.199655: step: 98/459, loss: 0.09178150445222855 2023-01-24 01:32:58.788456: step: 100/459, loss: 0.1079837828874588 2023-01-24 01:32:59.363345: step: 102/459, loss: 0.21755273640155792 2023-01-24 01:33:00.051236: step: 104/459, loss: 0.2343464493751526 2023-01-24 01:33:00.632707: step: 106/459, loss: 0.06486473232507706 2023-01-24 01:33:01.175600: step: 108/459, loss: 0.09672924131155014 2023-01-24 01:33:01.818162: step: 110/459, loss: 0.23191188275814056 2023-01-24 01:33:02.401151: step: 112/459, loss: 0.06009260565042496 2023-01-24 01:33:02.986975: step: 114/459, loss: 0.09129321575164795 2023-01-24 01:33:03.587252: step: 116/459, loss: 0.3086608350276947 2023-01-24 01:33:04.182394: step: 118/459, loss: 0.07996121793985367 2023-01-24 01:33:04.810977: step: 120/459, loss: 0.07860449701547623 2023-01-24 01:33:05.517271: step: 122/459, loss: 0.5003012418746948 2023-01-24 01:33:06.175020: step: 124/459, loss: 0.3858654201030731 2023-01-24 01:33:06.790018: step: 126/459, loss: 0.39125943183898926 2023-01-24 01:33:07.493906: step: 128/459, loss: 0.2204659879207611 2023-01-24 01:33:08.091455: step: 130/459, loss: 0.09331394731998444 2023-01-24 01:33:08.676808: step: 132/459, loss: 0.08864787966012955 2023-01-24 01:33:09.240548: step: 134/459, loss: 0.02414126694202423 2023-01-24 01:33:09.886878: step: 136/459, loss: 0.14870506525039673 2023-01-24 01:33:10.550465: step: 138/459, loss: 0.0352768674492836 2023-01-24 01:33:11.237043: step: 140/459, loss: 0.13664919137954712 2023-01-24 01:33:11.942591: step: 142/459, loss: 0.04156733304262161 2023-01-24 01:33:12.550405: step: 144/459, loss: 0.48381906747817993 2023-01-24 01:33:13.190103: step: 146/459, loss: 0.1998247504234314 2023-01-24 01:33:13.794402: step: 148/459, loss: 0.2640714943408966 2023-01-24 01:33:14.405956: step: 150/459, loss: 0.07567166537046432 2023-01-24 01:33:15.028786: step: 152/459, loss: 0.1927567422389984 2023-01-24 01:33:15.677392: step: 154/459, loss: 0.3202313482761383 2023-01-24 01:33:16.278526: step: 156/459, loss: 0.6741471290588379 2023-01-24 01:33:16.875537: step: 158/459, loss: 0.23301827907562256 2023-01-24 01:33:17.626677: step: 160/459, loss: 0.10075488686561584 2023-01-24 01:33:18.264152: step: 162/459, loss: 0.2340995967388153 2023-01-24 01:33:18.893451: step: 164/459, loss: 0.06285052001476288 2023-01-24 01:33:19.584637: step: 166/459, loss: 0.0926881656050682 2023-01-24 01:33:20.227215: step: 168/459, loss: 0.08863204717636108 2023-01-24 01:33:20.857480: step: 170/459, loss: 0.20101694762706757 2023-01-24 01:33:21.440323: step: 172/459, loss: 0.1728532314300537 2023-01-24 01:33:21.998967: step: 174/459, loss: 0.12905050814151764 2023-01-24 01:33:22.627861: step: 176/459, loss: 0.08486810326576233 2023-01-24 01:33:23.175458: step: 178/459, loss: 0.10866452008485794 2023-01-24 01:33:23.786944: step: 180/459, loss: 0.2677394151687622 2023-01-24 01:33:24.416740: step: 182/459, loss: 0.13241417706012726 2023-01-24 01:33:25.053194: step: 184/459, loss: 0.19168369472026825 2023-01-24 01:33:25.641329: step: 186/459, loss: 0.45648613572120667 2023-01-24 01:33:26.265062: step: 188/459, loss: 3.151050090789795 2023-01-24 01:33:26.881085: step: 190/459, loss: 0.4376945197582245 2023-01-24 01:33:27.555465: step: 192/459, loss: 0.11654966324567795 2023-01-24 01:33:28.187121: step: 194/459, loss: 0.24869073927402496 2023-01-24 01:33:28.790464: step: 196/459, loss: 0.07175112515687943 2023-01-24 01:33:29.419777: step: 198/459, loss: 0.11445057392120361 2023-01-24 01:33:29.956446: step: 200/459, loss: 0.039626594632864 2023-01-24 01:33:30.543209: step: 202/459, loss: 0.128557488322258 2023-01-24 01:33:31.191605: step: 204/459, loss: 0.0716424435377121 2023-01-24 01:33:31.823803: step: 206/459, loss: 0.39855828881263733 2023-01-24 01:33:32.409478: step: 208/459, loss: 0.09718836843967438 2023-01-24 01:33:33.000892: step: 210/459, loss: 0.3510565757751465 2023-01-24 01:33:33.680523: step: 212/459, loss: 0.06685909628868103 2023-01-24 01:33:34.287346: step: 214/459, loss: 0.039473067969083786 2023-01-24 01:33:34.854803: step: 216/459, loss: 0.10181576013565063 2023-01-24 01:33:35.488948: step: 218/459, loss: 0.07519818842411041 2023-01-24 01:33:36.135036: step: 220/459, loss: 0.09037259966135025 2023-01-24 01:33:36.791633: step: 222/459, loss: 0.09935402870178223 2023-01-24 01:33:37.445556: step: 224/459, loss: 0.16083943843841553 2023-01-24 01:33:38.028758: step: 226/459, loss: 0.09295327216386795 2023-01-24 01:33:38.660157: step: 228/459, loss: 0.058349546045064926 2023-01-24 01:33:39.302753: step: 230/459, loss: 0.19169890880584717 2023-01-24 01:33:39.949836: step: 232/459, loss: 0.7037563323974609 2023-01-24 01:33:40.591368: step: 234/459, loss: 0.13236108422279358 2023-01-24 01:33:41.267875: step: 236/459, loss: 0.1643594652414322 2023-01-24 01:33:41.899666: step: 238/459, loss: 0.351449191570282 2023-01-24 01:33:42.462973: step: 240/459, loss: 0.4893571138381958 2023-01-24 01:33:42.993971: step: 242/459, loss: 0.08955967426300049 2023-01-24 01:33:43.645431: step: 244/459, loss: 0.11619558185338974 2023-01-24 01:33:44.216746: step: 246/459, loss: 0.27599918842315674 2023-01-24 01:33:44.839436: step: 248/459, loss: 0.29120033979415894 2023-01-24 01:33:45.445367: step: 250/459, loss: 0.12366664409637451 2023-01-24 01:33:46.009539: step: 252/459, loss: 0.19689598679542542 2023-01-24 01:33:46.621422: step: 254/459, loss: 0.3352130651473999 2023-01-24 01:33:47.270354: step: 256/459, loss: 0.18488091230392456 2023-01-24 01:33:47.901520: step: 258/459, loss: 0.23462745547294617 2023-01-24 01:33:48.493093: step: 260/459, loss: 0.21218907833099365 2023-01-24 01:33:49.147656: step: 262/459, loss: 0.18350842595100403 2023-01-24 01:33:49.791053: step: 264/459, loss: 0.4815365970134735 2023-01-24 01:33:50.368551: step: 266/459, loss: 0.23611797392368317 2023-01-24 01:33:51.004276: step: 268/459, loss: 0.3919960856437683 2023-01-24 01:33:51.629204: step: 270/459, loss: 0.0872403234243393 2023-01-24 01:33:52.204955: step: 272/459, loss: 0.35844168066978455 2023-01-24 01:33:52.892351: step: 274/459, loss: 0.17845410108566284 2023-01-24 01:33:53.502114: step: 276/459, loss: 0.41007229685783386 2023-01-24 01:33:54.162448: step: 278/459, loss: 0.044664375483989716 2023-01-24 01:33:54.726449: step: 280/459, loss: 0.29073837399482727 2023-01-24 01:33:55.488252: step: 282/459, loss: 0.13408984243869781 2023-01-24 01:33:56.072792: step: 284/459, loss: 0.09195280075073242 2023-01-24 01:33:56.599316: step: 286/459, loss: 0.28312787413597107 2023-01-24 01:33:57.219369: step: 288/459, loss: 0.05498214811086655 2023-01-24 01:33:57.919195: step: 290/459, loss: 0.08361966162919998 2023-01-24 01:33:58.601082: step: 292/459, loss: 0.08075595647096634 2023-01-24 01:33:59.214316: step: 294/459, loss: 0.18923979997634888 2023-01-24 01:33:59.793234: step: 296/459, loss: 0.3001464605331421 2023-01-24 01:34:00.447017: step: 298/459, loss: 1.224954605102539 2023-01-24 01:34:01.083758: step: 300/459, loss: 0.0405903197824955 2023-01-24 01:34:01.680259: step: 302/459, loss: 0.08239481598138809 2023-01-24 01:34:02.263884: step: 304/459, loss: 0.030959129333496094 2023-01-24 01:34:02.826860: step: 306/459, loss: 0.04204098880290985 2023-01-24 01:34:03.479856: step: 308/459, loss: 0.1992795169353485 2023-01-24 01:34:04.076618: step: 310/459, loss: 0.0645618885755539 2023-01-24 01:34:04.644806: step: 312/459, loss: 0.018429547548294067 2023-01-24 01:34:05.376078: step: 314/459, loss: 0.1906338483095169 2023-01-24 01:34:05.999944: step: 316/459, loss: 0.5916157960891724 2023-01-24 01:34:06.618214: step: 318/459, loss: 0.0963098555803299 2023-01-24 01:34:07.225794: step: 320/459, loss: 0.24217282235622406 2023-01-24 01:34:07.849779: step: 322/459, loss: 0.3090922236442566 2023-01-24 01:34:08.440366: step: 324/459, loss: 0.11071746796369553 2023-01-24 01:34:09.068637: step: 326/459, loss: 0.0718727633357048 2023-01-24 01:34:09.657201: step: 328/459, loss: 0.17915783822536469 2023-01-24 01:34:10.254971: step: 330/459, loss: 0.03348206356167793 2023-01-24 01:34:10.869415: step: 332/459, loss: 0.41032975912094116 2023-01-24 01:34:11.504178: step: 334/459, loss: 0.18370826542377472 2023-01-24 01:34:12.152452: step: 336/459, loss: 0.05640631169080734 2023-01-24 01:34:12.734785: step: 338/459, loss: 0.0667005106806755 2023-01-24 01:34:13.394590: step: 340/459, loss: 0.0664251521229744 2023-01-24 01:34:14.020903: step: 342/459, loss: 0.05521997809410095 2023-01-24 01:34:14.626013: step: 344/459, loss: 0.17536276578903198 2023-01-24 01:34:15.283624: step: 346/459, loss: 0.2197228968143463 2023-01-24 01:34:15.858923: step: 348/459, loss: 0.6603759527206421 2023-01-24 01:34:16.466470: step: 350/459, loss: 0.24606865644454956 2023-01-24 01:34:17.081371: step: 352/459, loss: 0.11682845652103424 2023-01-24 01:34:17.715927: step: 354/459, loss: 0.2522812485694885 2023-01-24 01:34:18.312391: step: 356/459, loss: 0.2281874716281891 2023-01-24 01:34:18.988938: step: 358/459, loss: 0.3431416451931 2023-01-24 01:34:19.614418: step: 360/459, loss: 0.3026084005832672 2023-01-24 01:34:20.253924: step: 362/459, loss: 0.2366412878036499 2023-01-24 01:34:20.891334: step: 364/459, loss: 0.08342117816209793 2023-01-24 01:34:21.497376: step: 366/459, loss: 0.07994380593299866 2023-01-24 01:34:22.065526: step: 368/459, loss: 0.7085579633712769 2023-01-24 01:34:22.709140: step: 370/459, loss: 0.10055103898048401 2023-01-24 01:34:23.328096: step: 372/459, loss: 0.36449721455574036 2023-01-24 01:34:23.928319: step: 374/459, loss: 0.15065604448318481 2023-01-24 01:34:24.615063: step: 376/459, loss: 0.11090473085641861 2023-01-24 01:34:25.228445: step: 378/459, loss: 0.1498882919549942 2023-01-24 01:34:25.838640: step: 380/459, loss: 0.2562587559223175 2023-01-24 01:34:26.438635: step: 382/459, loss: 0.08001600950956345 2023-01-24 01:34:27.088588: step: 384/459, loss: 0.042522940784692764 2023-01-24 01:34:27.710607: step: 386/459, loss: 0.47736892104148865 2023-01-24 01:34:28.327438: step: 388/459, loss: 0.40947896242141724 2023-01-24 01:34:29.015215: step: 390/459, loss: 0.10184325277805328 2023-01-24 01:34:29.638310: step: 392/459, loss: 0.18467067182064056 2023-01-24 01:34:30.283324: step: 394/459, loss: 0.08231713622808456 2023-01-24 01:34:30.921158: step: 396/459, loss: 0.3529738783836365 2023-01-24 01:34:31.554991: step: 398/459, loss: 0.1363532990217209 2023-01-24 01:34:32.191984: step: 400/459, loss: 0.12389450520277023 2023-01-24 01:34:32.795826: step: 402/459, loss: 0.13068382441997528 2023-01-24 01:34:33.377356: step: 404/459, loss: 0.06090005487203598 2023-01-24 01:34:33.976681: step: 406/459, loss: 0.100251205265522 2023-01-24 01:34:34.601865: step: 408/459, loss: 0.10678161680698395 2023-01-24 01:34:35.228987: step: 410/459, loss: 0.10369154810905457 2023-01-24 01:34:35.820687: step: 412/459, loss: 0.3123739957809448 2023-01-24 01:34:36.428922: step: 414/459, loss: 0.8888931274414062 2023-01-24 01:34:37.080764: step: 416/459, loss: 0.11201296001672745 2023-01-24 01:34:37.676413: step: 418/459, loss: 0.08623640239238739 2023-01-24 01:34:38.284021: step: 420/459, loss: 0.1697816550731659 2023-01-24 01:34:38.898416: step: 422/459, loss: 0.11019528657197952 2023-01-24 01:34:39.578248: step: 424/459, loss: 0.35956937074661255 2023-01-24 01:34:40.200849: step: 426/459, loss: 0.32616132497787476 2023-01-24 01:34:40.799615: step: 428/459, loss: 0.1757209450006485 2023-01-24 01:34:41.363906: step: 430/459, loss: 0.08463487774133682 2023-01-24 01:34:41.923505: step: 432/459, loss: 0.07098120450973511 2023-01-24 01:34:42.561455: step: 434/459, loss: 0.09154414385557175 2023-01-24 01:34:43.244591: step: 436/459, loss: 0.9429218769073486 2023-01-24 01:34:43.909824: step: 438/459, loss: 0.24478666484355927 2023-01-24 01:34:44.569928: step: 440/459, loss: 0.8889807462692261 2023-01-24 01:34:45.243154: step: 442/459, loss: 0.2218589335680008 2023-01-24 01:34:45.793469: step: 444/459, loss: 0.23123125731945038 2023-01-24 01:34:46.408372: step: 446/459, loss: 0.04629623889923096 2023-01-24 01:34:47.008600: step: 448/459, loss: 0.35860586166381836 2023-01-24 01:34:47.633779: step: 450/459, loss: 0.1962718814611435 2023-01-24 01:34:48.240387: step: 452/459, loss: 0.05879446119070053 2023-01-24 01:34:48.843296: step: 454/459, loss: 0.20231661200523376 2023-01-24 01:34:49.438640: step: 456/459, loss: 0.08510150015354156 2023-01-24 01:34:50.062664: step: 458/459, loss: 0.04160851612687111 2023-01-24 01:34:50.638117: step: 460/459, loss: 0.11409764736890793 2023-01-24 01:34:51.242196: step: 462/459, loss: 0.08921925723552704 2023-01-24 01:34:51.884773: step: 464/459, loss: 0.2401428520679474 2023-01-24 01:34:52.522190: step: 466/459, loss: 0.14771372079849243 2023-01-24 01:34:53.123846: step: 468/459, loss: 0.19556252658367157 2023-01-24 01:34:53.817671: step: 470/459, loss: 0.09037167578935623 2023-01-24 01:34:54.451432: step: 472/459, loss: 0.9626556634902954 2023-01-24 01:34:55.023799: step: 474/459, loss: 0.23599663376808167 2023-01-24 01:34:55.568938: step: 476/459, loss: 0.5714236497879028 2023-01-24 01:34:56.227616: step: 478/459, loss: 0.380731999874115 2023-01-24 01:34:56.859759: step: 480/459, loss: 0.5467585325241089 2023-01-24 01:34:57.481945: step: 482/459, loss: 0.2123822420835495 2023-01-24 01:34:58.075410: step: 484/459, loss: 1.3242627382278442 2023-01-24 01:34:58.741385: step: 486/459, loss: 0.4884151518344879 2023-01-24 01:34:59.341521: step: 488/459, loss: 0.15238727629184723 2023-01-24 01:34:59.915818: step: 490/459, loss: 0.06837502121925354 2023-01-24 01:35:00.533473: step: 492/459, loss: 0.042243052273988724 2023-01-24 01:35:01.179214: step: 494/459, loss: 0.1598936766386032 2023-01-24 01:35:01.756293: step: 496/459, loss: 0.11864296346902847 2023-01-24 01:35:02.349321: step: 498/459, loss: 0.09276149421930313 2023-01-24 01:35:02.955878: step: 500/459, loss: 0.16140179336071014 2023-01-24 01:35:03.562115: step: 502/459, loss: 0.2329453080892563 2023-01-24 01:35:04.198361: step: 504/459, loss: 0.15668059885501862 2023-01-24 01:35:04.814978: step: 506/459, loss: 0.2858220040798187 2023-01-24 01:35:05.463274: step: 508/459, loss: 0.24222925305366516 2023-01-24 01:35:06.085706: step: 510/459, loss: 0.10641314834356308 2023-01-24 01:35:06.706348: step: 512/459, loss: 0.14571836590766907 2023-01-24 01:35:07.420314: step: 514/459, loss: 0.435564786195755 2023-01-24 01:35:08.016786: step: 516/459, loss: 0.45303213596343994 2023-01-24 01:35:08.603890: step: 518/459, loss: 6.1606268882751465 2023-01-24 01:35:09.196518: step: 520/459, loss: 0.088041752576828 2023-01-24 01:35:09.773521: step: 522/459, loss: 0.21979716420173645 2023-01-24 01:35:10.441399: step: 524/459, loss: 0.05410269275307655 2023-01-24 01:35:11.044648: step: 526/459, loss: 0.10581444203853607 2023-01-24 01:35:11.692622: step: 528/459, loss: 0.2970019578933716 2023-01-24 01:35:12.388914: step: 530/459, loss: 0.4424816966056824 2023-01-24 01:35:13.025949: step: 532/459, loss: 0.20262694358825684 2023-01-24 01:35:13.694240: step: 534/459, loss: 0.12549827992916107 2023-01-24 01:35:14.352123: step: 536/459, loss: 0.0934322401881218 2023-01-24 01:35:14.980251: step: 538/459, loss: 0.10896021127700806 2023-01-24 01:35:15.558402: step: 540/459, loss: 0.0859772115945816 2023-01-24 01:35:16.189242: step: 542/459, loss: 0.08527164906263351 2023-01-24 01:35:16.821094: step: 544/459, loss: 0.28704050183296204 2023-01-24 01:35:17.429223: step: 546/459, loss: 0.10426626354455948 2023-01-24 01:35:18.082545: step: 548/459, loss: 1.026018500328064 2023-01-24 01:35:18.789556: step: 550/459, loss: 0.15484118461608887 2023-01-24 01:35:19.411951: step: 552/459, loss: 0.18870952725410461 2023-01-24 01:35:20.025280: step: 554/459, loss: 0.09706490486860275 2023-01-24 01:35:20.647403: step: 556/459, loss: 0.17788128554821014 2023-01-24 01:35:21.242665: step: 558/459, loss: 0.06312243640422821 2023-01-24 01:35:21.859193: step: 560/459, loss: 0.1909502148628235 2023-01-24 01:35:22.517246: step: 562/459, loss: 0.11460736393928528 2023-01-24 01:35:23.110690: step: 564/459, loss: 0.07652252167463303 2023-01-24 01:35:23.727789: step: 566/459, loss: 0.1845918446779251 2023-01-24 01:35:24.280994: step: 568/459, loss: 0.06979547441005707 2023-01-24 01:35:24.927399: step: 570/459, loss: 0.09649089723825455 2023-01-24 01:35:25.538352: step: 572/459, loss: 0.11327105760574341 2023-01-24 01:35:26.117223: step: 574/459, loss: 0.11269639432430267 2023-01-24 01:35:26.653952: step: 576/459, loss: 0.25770804286003113 2023-01-24 01:35:27.256395: step: 578/459, loss: 1.389470100402832 2023-01-24 01:35:27.845351: step: 580/459, loss: 0.29336947202682495 2023-01-24 01:35:28.547565: step: 582/459, loss: 0.16969692707061768 2023-01-24 01:35:29.321730: step: 584/459, loss: 0.13713249564170837 2023-01-24 01:35:29.961310: step: 586/459, loss: 0.07624874264001846 2023-01-24 01:35:30.586058: step: 588/459, loss: 0.07569125294685364 2023-01-24 01:35:31.278676: step: 590/459, loss: 0.11003334820270538 2023-01-24 01:35:31.879257: step: 592/459, loss: 0.10472642630338669 2023-01-24 01:35:32.472759: step: 594/459, loss: 0.18261238932609558 2023-01-24 01:35:33.086991: step: 596/459, loss: 0.14879043400287628 2023-01-24 01:35:33.648941: step: 598/459, loss: 0.2946148216724396 2023-01-24 01:35:34.282638: step: 600/459, loss: 0.25499653816223145 2023-01-24 01:35:34.983592: step: 602/459, loss: 0.15327461063861847 2023-01-24 01:35:35.629708: step: 604/459, loss: 0.12625883519649506 2023-01-24 01:35:36.248240: step: 606/459, loss: 0.10408663004636765 2023-01-24 01:35:36.869764: step: 608/459, loss: 0.2055274099111557 2023-01-24 01:35:37.503271: step: 610/459, loss: 0.20048856735229492 2023-01-24 01:35:38.139064: step: 612/459, loss: 0.1777433604001999 2023-01-24 01:35:38.755524: step: 614/459, loss: 0.21171128749847412 2023-01-24 01:35:39.357721: step: 616/459, loss: 0.10986361652612686 2023-01-24 01:35:39.965464: step: 618/459, loss: 0.1676647961139679 2023-01-24 01:35:40.588906: step: 620/459, loss: 0.25269410014152527 2023-01-24 01:35:41.210425: step: 622/459, loss: 0.16498234868049622 2023-01-24 01:35:41.786767: step: 624/459, loss: 0.05144534632563591 2023-01-24 01:35:42.354484: step: 626/459, loss: 0.3980550765991211 2023-01-24 01:35:43.036950: step: 628/459, loss: 0.08984960615634918 2023-01-24 01:35:43.672575: step: 630/459, loss: 0.12520700693130493 2023-01-24 01:35:44.269602: step: 632/459, loss: 0.16921642422676086 2023-01-24 01:35:44.896101: step: 634/459, loss: 1.35245943069458 2023-01-24 01:35:45.485280: step: 636/459, loss: 0.08083229511976242 2023-01-24 01:35:46.070102: step: 638/459, loss: 0.08771326392889023 2023-01-24 01:35:46.653632: step: 640/459, loss: 0.08717463165521622 2023-01-24 01:35:47.269111: step: 642/459, loss: 0.09722501784563065 2023-01-24 01:35:47.880013: step: 644/459, loss: 0.358805388212204 2023-01-24 01:35:48.474245: step: 646/459, loss: 0.06192414090037346 2023-01-24 01:35:49.084926: step: 648/459, loss: 0.14740224182605743 2023-01-24 01:35:49.699456: step: 650/459, loss: 0.117793008685112 2023-01-24 01:35:50.335551: step: 652/459, loss: 0.08165507763624191 2023-01-24 01:35:50.988636: step: 654/459, loss: 0.3100636303424835 2023-01-24 01:35:51.537973: step: 656/459, loss: 0.0851125568151474 2023-01-24 01:35:52.105423: step: 658/459, loss: 0.12106870859861374 2023-01-24 01:35:52.782703: step: 660/459, loss: 0.6381152868270874 2023-01-24 01:35:53.448542: step: 662/459, loss: 0.2818031311035156 2023-01-24 01:35:54.101412: step: 664/459, loss: 0.27813786268234253 2023-01-24 01:35:54.720574: step: 666/459, loss: 0.14081986248493195 2023-01-24 01:35:55.298219: step: 668/459, loss: 0.15740583837032318 2023-01-24 01:35:55.884304: step: 670/459, loss: 0.8002132177352905 2023-01-24 01:35:56.564605: step: 672/459, loss: 0.4966908097267151 2023-01-24 01:35:57.181523: step: 674/459, loss: 0.1267368495464325 2023-01-24 01:35:57.821674: step: 676/459, loss: 0.10885705798864365 2023-01-24 01:35:58.465646: step: 678/459, loss: 0.1922990083694458 2023-01-24 01:35:59.025764: step: 680/459, loss: 0.09538579732179642 2023-01-24 01:35:59.658220: step: 682/459, loss: 0.09502815455198288 2023-01-24 01:36:00.278566: step: 684/459, loss: 0.14922717213630676 2023-01-24 01:36:00.930592: step: 686/459, loss: 0.10707380622625351 2023-01-24 01:36:01.534453: step: 688/459, loss: 0.8439429402351379 2023-01-24 01:36:02.115022: step: 690/459, loss: 0.09936154633760452 2023-01-24 01:36:02.698532: step: 692/459, loss: 0.0834423080086708 2023-01-24 01:36:03.270179: step: 694/459, loss: 0.5072116851806641 2023-01-24 01:36:03.836956: step: 696/459, loss: 0.06131739914417267 2023-01-24 01:36:04.500249: step: 698/459, loss: 0.3827941119670868 2023-01-24 01:36:05.174370: step: 700/459, loss: 0.24438755214214325 2023-01-24 01:36:05.809126: step: 702/459, loss: 0.5124945640563965 2023-01-24 01:36:06.387549: step: 704/459, loss: 0.8272207379341125 2023-01-24 01:36:06.940186: step: 706/459, loss: 0.1691576987504959 2023-01-24 01:36:07.566564: step: 708/459, loss: 0.17819352447986603 2023-01-24 01:36:08.192910: step: 710/459, loss: 0.062387362122535706 2023-01-24 01:36:08.867870: step: 712/459, loss: 0.22510869801044464 2023-01-24 01:36:09.549938: step: 714/459, loss: 0.3724295198917389 2023-01-24 01:36:10.155923: step: 716/459, loss: 0.07734615355730057 2023-01-24 01:36:10.824736: step: 718/459, loss: 0.15815716981887817 2023-01-24 01:36:11.489822: step: 720/459, loss: 0.20160625874996185 2023-01-24 01:36:12.041170: step: 722/459, loss: 0.10010167956352234 2023-01-24 01:36:12.622569: step: 724/459, loss: 0.10462930798530579 2023-01-24 01:36:13.359751: step: 726/459, loss: 0.08646044135093689 2023-01-24 01:36:13.967057: step: 728/459, loss: 0.454476535320282 2023-01-24 01:36:14.615496: step: 730/459, loss: 0.40503478050231934 2023-01-24 01:36:15.188979: step: 732/459, loss: 0.15852321684360504 2023-01-24 01:36:15.778992: step: 734/459, loss: 0.07810546457767487 2023-01-24 01:36:16.384294: step: 736/459, loss: 0.13083352148532867 2023-01-24 01:36:17.026400: step: 738/459, loss: 0.10266000032424927 2023-01-24 01:36:17.714503: step: 740/459, loss: 0.23322638869285583 2023-01-24 01:36:18.274205: step: 742/459, loss: 0.10288105905056 2023-01-24 01:36:18.895088: step: 744/459, loss: 0.16868968307971954 2023-01-24 01:36:19.570871: step: 746/459, loss: 0.12133030593395233 2023-01-24 01:36:20.245998: step: 748/459, loss: 0.06390470266342163 2023-01-24 01:36:20.858896: step: 750/459, loss: 0.16227714717388153 2023-01-24 01:36:21.496133: step: 752/459, loss: 0.337083637714386 2023-01-24 01:36:22.081505: step: 754/459, loss: 0.1622665822505951 2023-01-24 01:36:22.700726: step: 756/459, loss: 0.1814146488904953 2023-01-24 01:36:23.303938: step: 758/459, loss: 0.08688877522945404 2023-01-24 01:36:23.906233: step: 760/459, loss: 0.30689358711242676 2023-01-24 01:36:24.567816: step: 762/459, loss: 0.1264703869819641 2023-01-24 01:36:25.194546: step: 764/459, loss: 0.08501287549734116 2023-01-24 01:36:25.838721: step: 766/459, loss: 0.06374897062778473 2023-01-24 01:36:26.438151: step: 768/459, loss: 0.0534362755715847 2023-01-24 01:36:27.067206: step: 770/459, loss: 0.2057400345802307 2023-01-24 01:36:27.642802: step: 772/459, loss: 0.19386570155620575 2023-01-24 01:36:28.292778: step: 774/459, loss: 0.4729737639427185 2023-01-24 01:36:28.878302: step: 776/459, loss: 0.09114549309015274 2023-01-24 01:36:29.446698: step: 778/459, loss: 0.3780786395072937 2023-01-24 01:36:30.032254: step: 780/459, loss: 1.2965947389602661 2023-01-24 01:36:30.664889: step: 782/459, loss: 0.384662389755249 2023-01-24 01:36:31.262168: step: 784/459, loss: 0.07792684435844421 2023-01-24 01:36:31.870092: step: 786/459, loss: 0.18571904301643372 2023-01-24 01:36:32.510465: step: 788/459, loss: 0.10134194791316986 2023-01-24 01:36:33.100907: step: 790/459, loss: 0.3236640393733978 2023-01-24 01:36:33.736369: step: 792/459, loss: 0.22309847176074982 2023-01-24 01:36:34.362303: step: 794/459, loss: 0.12304048985242844 2023-01-24 01:36:34.999367: step: 796/459, loss: 0.10716929286718369 2023-01-24 01:36:35.609266: step: 798/459, loss: 0.04786952957510948 2023-01-24 01:36:36.220714: step: 800/459, loss: 0.24123647809028625 2023-01-24 01:36:36.741507: step: 802/459, loss: 0.1038781926035881 2023-01-24 01:36:37.389077: step: 804/459, loss: 0.059039145708084106 2023-01-24 01:36:37.997925: step: 806/459, loss: 0.31086266040802 2023-01-24 01:36:38.626523: step: 808/459, loss: 0.10875619202852249 2023-01-24 01:36:39.288489: step: 810/459, loss: 0.27358630299568176 2023-01-24 01:36:39.865553: step: 812/459, loss: 0.19512958824634552 2023-01-24 01:36:40.467216: step: 814/459, loss: 0.07586756348609924 2023-01-24 01:36:41.148890: step: 816/459, loss: 0.4910012185573578 2023-01-24 01:36:41.780041: step: 818/459, loss: 0.19788500666618347 2023-01-24 01:36:42.398989: step: 820/459, loss: 0.5227187871932983 2023-01-24 01:36:43.034624: step: 822/459, loss: 0.19593602418899536 2023-01-24 01:36:43.701928: step: 824/459, loss: 0.154924213886261 2023-01-24 01:36:44.295242: step: 826/459, loss: 0.0835176408290863 2023-01-24 01:36:44.946506: step: 828/459, loss: 0.16370746493339539 2023-01-24 01:36:45.588199: step: 830/459, loss: 0.08382795751094818 2023-01-24 01:36:46.249180: step: 832/459, loss: 0.23770079016685486 2023-01-24 01:36:46.851896: step: 834/459, loss: 0.14226698875427246 2023-01-24 01:36:47.471186: step: 836/459, loss: 0.321987122297287 2023-01-24 01:36:48.096840: step: 838/459, loss: 0.240774467587471 2023-01-24 01:36:48.727520: step: 840/459, loss: 0.1930207461118698 2023-01-24 01:36:49.410638: step: 842/459, loss: 0.19894415140151978 2023-01-24 01:36:50.035761: step: 844/459, loss: 0.1389712244272232 2023-01-24 01:36:50.667005: step: 846/459, loss: 0.6015245318412781 2023-01-24 01:36:51.293792: step: 848/459, loss: 0.22200769186019897 2023-01-24 01:36:51.906117: step: 850/459, loss: 0.07075955718755722 2023-01-24 01:36:52.544207: step: 852/459, loss: 0.20462752878665924 2023-01-24 01:36:53.311437: step: 854/459, loss: 0.1736164391040802 2023-01-24 01:36:53.960327: step: 856/459, loss: 0.14379091560840607 2023-01-24 01:36:54.571535: step: 858/459, loss: 0.10607738047838211 2023-01-24 01:36:55.151251: step: 860/459, loss: 0.0412941612303257 2023-01-24 01:36:55.745801: step: 862/459, loss: 1.1903762817382812 2023-01-24 01:36:56.341937: step: 864/459, loss: 0.15516018867492676 2023-01-24 01:36:57.010516: step: 866/459, loss: 0.6355540752410889 2023-01-24 01:36:57.628471: step: 868/459, loss: 0.11422116309404373 2023-01-24 01:36:58.228595: step: 870/459, loss: 0.0800798237323761 2023-01-24 01:36:58.828546: step: 872/459, loss: 0.09079727530479431 2023-01-24 01:36:59.412815: step: 874/459, loss: 0.13440380990505219 2023-01-24 01:37:00.000272: step: 876/459, loss: 15.362287521362305 2023-01-24 01:37:00.644528: step: 878/459, loss: 0.14750488102436066 2023-01-24 01:37:01.288470: step: 880/459, loss: 0.15305110812187195 2023-01-24 01:37:01.886892: step: 882/459, loss: 0.04445166140794754 2023-01-24 01:37:02.492606: step: 884/459, loss: 0.39712026715278625 2023-01-24 01:37:03.122144: step: 886/459, loss: 0.651215672492981 2023-01-24 01:37:03.773278: step: 888/459, loss: 0.18908759951591492 2023-01-24 01:37:04.391227: step: 890/459, loss: 0.10196417570114136 2023-01-24 01:37:04.994726: step: 892/459, loss: 0.20737577974796295 2023-01-24 01:37:05.650922: step: 894/459, loss: 0.14457905292510986 2023-01-24 01:37:06.270799: step: 896/459, loss: 0.15965184569358826 2023-01-24 01:37:06.900199: step: 898/459, loss: 0.1148669570684433 2023-01-24 01:37:07.512258: step: 900/459, loss: 0.23572149872779846 2023-01-24 01:37:08.130352: step: 902/459, loss: 0.46421125531196594 2023-01-24 01:37:08.743826: step: 904/459, loss: 0.13725276291370392 2023-01-24 01:37:09.326612: step: 906/459, loss: 0.20422974228858948 2023-01-24 01:37:10.018413: step: 908/459, loss: 0.13321685791015625 2023-01-24 01:37:10.638644: step: 910/459, loss: 0.14525404572486877 2023-01-24 01:37:11.203717: step: 912/459, loss: 0.12679992616176605 2023-01-24 01:37:11.893448: step: 914/459, loss: 0.3129420578479767 2023-01-24 01:37:12.466781: step: 916/459, loss: 0.10865958034992218 2023-01-24 01:37:13.042081: step: 918/459, loss: 0.34645208716392517 2023-01-24 01:37:13.470938: step: 920/459, loss: 0.07208578288555145 ================================================== Loss: 0.275 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3344066255534472, 'r': 0.3344066255534472, 'f1': 0.3344066255534472}, 'combined': 0.24640488198675053, 'epoch': 12} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3491435423381003, 'r': 0.3062551181200443, 'f1': 0.3262960492621704}, 'combined': 0.208829471527789, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33280864197530863, 'r': 0.34101834282099935, 'f1': 0.3368634801624492}, 'combined': 0.2482151959091731, 'epoch': 12} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3524055381541947, 'r': 0.30467060616785374, 'f1': 0.3268041606871176}, 'combined': 0.20915466283975523, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35468654596821636, 'r': 0.3412259559883979, 'f1': 0.34782607119126824}, 'combined': 0.2562928945619871, 'epoch': 12} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.35065807663482546, 'r': 0.31773393634914904, 'f1': 0.33338510585845677}, 'combined': 0.23903083061549732, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3083333333333333, 'r': 0.35238095238095235, 'f1': 0.32888888888888884}, 'combined': 0.2192592592592592, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.24074074074074073, 'r': 0.2826086956521739, 'f1': 0.26}, 'combined': 0.13, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2, 'r': 0.06896551724137931, 'f1': 0.10256410256410257}, 'combined': 0.06837606837606838, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 13 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:39:48.635925: step: 2/459, loss: 0.20927375555038452 2023-01-24 01:39:49.411786: step: 4/459, loss: 0.1532648801803589 2023-01-24 01:39:50.043170: step: 6/459, loss: 0.13827691972255707 2023-01-24 01:39:50.719447: step: 8/459, loss: 0.16541950404644012 2023-01-24 01:39:51.399571: step: 10/459, loss: 1.0394524335861206 2023-01-24 01:39:51.990203: step: 12/459, loss: 0.09928685426712036 2023-01-24 01:39:52.583280: step: 14/459, loss: 0.1679028868675232 2023-01-24 01:39:53.252535: step: 16/459, loss: 0.1822929084300995 2023-01-24 01:39:53.801120: step: 18/459, loss: 0.09924665838479996 2023-01-24 01:39:54.431054: step: 20/459, loss: 0.2591027617454529 2023-01-24 01:39:55.031779: step: 22/459, loss: 0.057872891426086426 2023-01-24 01:39:55.621302: step: 24/459, loss: 0.07697039097547531 2023-01-24 01:39:56.344402: step: 26/459, loss: 0.1348942667245865 2023-01-24 01:39:56.982426: step: 28/459, loss: 0.10731618851423264 2023-01-24 01:39:57.571641: step: 30/459, loss: 0.8491590619087219 2023-01-24 01:39:58.192745: step: 32/459, loss: 0.20703916251659393 2023-01-24 01:39:58.803705: step: 34/459, loss: 0.09677630662918091 2023-01-24 01:39:59.493576: step: 36/459, loss: 0.11319464445114136 2023-01-24 01:40:00.158873: step: 38/459, loss: 0.12456658482551575 2023-01-24 01:40:00.725678: step: 40/459, loss: 0.11774548888206482 2023-01-24 01:40:01.399151: step: 42/459, loss: 0.07571619749069214 2023-01-24 01:40:02.028771: step: 44/459, loss: 0.21100151538848877 2023-01-24 01:40:02.668214: step: 46/459, loss: 0.3369593620300293 2023-01-24 01:40:03.229274: step: 48/459, loss: 0.020322144031524658 2023-01-24 01:40:03.869669: step: 50/459, loss: 0.08077998459339142 2023-01-24 01:40:04.440440: step: 52/459, loss: 0.10019479691982269 2023-01-24 01:40:05.022326: step: 54/459, loss: 0.4173669219017029 2023-01-24 01:40:05.637528: step: 56/459, loss: 0.0798049196600914 2023-01-24 01:40:06.285600: step: 58/459, loss: 0.045127108693122864 2023-01-24 01:40:06.915211: step: 60/459, loss: 0.15024398267269135 2023-01-24 01:40:07.572084: step: 62/459, loss: 0.06584512442350388 2023-01-24 01:40:08.204201: step: 64/459, loss: 0.09968288987874985 2023-01-24 01:40:08.868952: step: 66/459, loss: 0.0982460081577301 2023-01-24 01:40:09.492507: step: 68/459, loss: 0.06781520694494247 2023-01-24 01:40:10.148825: step: 70/459, loss: 0.10412721335887909 2023-01-24 01:40:10.821715: step: 72/459, loss: 0.09420589357614517 2023-01-24 01:40:11.402130: step: 74/459, loss: 0.09154083579778671 2023-01-24 01:40:11.991824: step: 76/459, loss: 0.18464593589305878 2023-01-24 01:40:12.665418: step: 78/459, loss: 0.43697285652160645 2023-01-24 01:40:13.295802: step: 80/459, loss: 0.13779298961162567 2023-01-24 01:40:13.972100: step: 82/459, loss: 0.13426260650157928 2023-01-24 01:40:14.571745: step: 84/459, loss: 0.12546980381011963 2023-01-24 01:40:15.186374: step: 86/459, loss: 0.3337631821632385 2023-01-24 01:40:15.873102: step: 88/459, loss: 0.30144742131233215 2023-01-24 01:40:16.526583: step: 90/459, loss: 0.6727092862129211 2023-01-24 01:40:17.236825: step: 92/459, loss: 0.21661421656608582 2023-01-24 01:40:17.913423: step: 94/459, loss: 0.1035386249423027 2023-01-24 01:40:18.551700: step: 96/459, loss: 0.16805222630500793 2023-01-24 01:40:19.189211: step: 98/459, loss: 0.06526506692171097 2023-01-24 01:40:19.798858: step: 100/459, loss: 0.08080954849720001 2023-01-24 01:40:20.387189: step: 102/459, loss: 0.6407771110534668 2023-01-24 01:40:20.967559: step: 104/459, loss: 0.08463013172149658 2023-01-24 01:40:21.575668: step: 106/459, loss: 0.08286351710557938 2023-01-24 01:40:22.261098: step: 108/459, loss: 0.05327927693724632 2023-01-24 01:40:22.875562: step: 110/459, loss: 0.06678027659654617 2023-01-24 01:40:23.497767: step: 112/459, loss: 0.06115363538265228 2023-01-24 01:40:24.047505: step: 114/459, loss: 0.01880536787211895 2023-01-24 01:40:24.690467: step: 116/459, loss: 0.1261235624551773 2023-01-24 01:40:25.336968: step: 118/459, loss: 0.07666917890310287 2023-01-24 01:40:25.954571: step: 120/459, loss: 0.35725656151771545 2023-01-24 01:40:26.512578: step: 122/459, loss: 0.07440907508134842 2023-01-24 01:40:27.118241: step: 124/459, loss: 0.22026918828487396 2023-01-24 01:40:27.738948: step: 126/459, loss: 0.06886263191699982 2023-01-24 01:40:28.325835: step: 128/459, loss: 0.2197868973016739 2023-01-24 01:40:28.988127: step: 130/459, loss: 0.056958675384521484 2023-01-24 01:40:29.557605: step: 132/459, loss: 0.14821073412895203 2023-01-24 01:40:30.165980: step: 134/459, loss: 0.08883754163980484 2023-01-24 01:40:30.842035: step: 136/459, loss: 1.4822291135787964 2023-01-24 01:40:31.493927: step: 138/459, loss: 0.1526670902967453 2023-01-24 01:40:32.099463: step: 140/459, loss: 0.06547312438488007 2023-01-24 01:40:32.770784: step: 142/459, loss: 0.08667325973510742 2023-01-24 01:40:33.421643: step: 144/459, loss: 0.5791699886322021 2023-01-24 01:40:34.126815: step: 146/459, loss: 0.4788343608379364 2023-01-24 01:40:34.762233: step: 148/459, loss: 0.19295069575309753 2023-01-24 01:40:35.434240: step: 150/459, loss: 0.14887432754039764 2023-01-24 01:40:36.065735: step: 152/459, loss: 0.13422559201717377 2023-01-24 01:40:36.730370: step: 154/459, loss: 0.07466470450162888 2023-01-24 01:40:37.416373: step: 156/459, loss: 0.13244669139385223 2023-01-24 01:40:38.067847: step: 158/459, loss: 0.02999091148376465 2023-01-24 01:40:38.648117: step: 160/459, loss: 0.256674587726593 2023-01-24 01:40:39.246762: step: 162/459, loss: 0.09058741480112076 2023-01-24 01:40:39.885280: step: 164/459, loss: 0.2829582989215851 2023-01-24 01:40:40.448207: step: 166/459, loss: 0.0897015854716301 2023-01-24 01:40:41.129976: step: 168/459, loss: 0.12764404714107513 2023-01-24 01:40:41.831880: step: 170/459, loss: 0.14941827952861786 2023-01-24 01:40:42.389538: step: 172/459, loss: 0.05726190656423569 2023-01-24 01:40:43.056226: step: 174/459, loss: 0.12152819335460663 2023-01-24 01:40:43.690317: step: 176/459, loss: 0.15610012412071228 2023-01-24 01:40:44.278393: step: 178/459, loss: 0.15277431905269623 2023-01-24 01:40:44.900131: step: 180/459, loss: 0.14063780009746552 2023-01-24 01:40:45.512767: step: 182/459, loss: 0.24735528230667114 2023-01-24 01:40:46.142261: step: 184/459, loss: 0.08262521028518677 2023-01-24 01:40:46.764500: step: 186/459, loss: 0.22288277745246887 2023-01-24 01:40:47.405115: step: 188/459, loss: 0.06538510322570801 2023-01-24 01:40:48.051371: step: 190/459, loss: 0.13949917256832123 2023-01-24 01:40:48.663599: step: 192/459, loss: 0.1232997328042984 2023-01-24 01:40:49.254769: step: 194/459, loss: 0.07429663091897964 2023-01-24 01:40:49.886010: step: 196/459, loss: 0.5338945984840393 2023-01-24 01:40:50.541900: step: 198/459, loss: 0.17838017642498016 2023-01-24 01:40:51.159488: step: 200/459, loss: 0.6608726978302002 2023-01-24 01:40:51.775409: step: 202/459, loss: 0.06802286207675934 2023-01-24 01:40:52.363755: step: 204/459, loss: 0.1294148564338684 2023-01-24 01:40:53.058542: step: 206/459, loss: 0.3071208894252777 2023-01-24 01:40:53.700111: step: 208/459, loss: 0.1077674925327301 2023-01-24 01:40:54.294922: step: 210/459, loss: 0.23361562192440033 2023-01-24 01:40:54.892295: step: 212/459, loss: 0.8588075041770935 2023-01-24 01:40:55.525688: step: 214/459, loss: 0.3619821071624756 2023-01-24 01:40:56.096007: step: 216/459, loss: 0.069002166390419 2023-01-24 01:40:56.807784: step: 218/459, loss: 0.19510246813297272 2023-01-24 01:40:57.401005: step: 220/459, loss: 0.8093500137329102 2023-01-24 01:40:58.019322: step: 222/459, loss: 0.06748756766319275 2023-01-24 01:40:58.676388: step: 224/459, loss: 0.12487049400806427 2023-01-24 01:40:59.295176: step: 226/459, loss: 0.11001888662576675 2023-01-24 01:40:59.976612: step: 228/459, loss: 0.07122944295406342 2023-01-24 01:41:00.615126: step: 230/459, loss: 0.09868335723876953 2023-01-24 01:41:01.178966: step: 232/459, loss: 0.19728228449821472 2023-01-24 01:41:01.838502: step: 234/459, loss: 0.08251515030860901 2023-01-24 01:41:02.417092: step: 236/459, loss: 0.24654892086982727 2023-01-24 01:41:03.032456: step: 238/459, loss: 0.5634627342224121 2023-01-24 01:41:03.628293: step: 240/459, loss: 0.24834518134593964 2023-01-24 01:41:04.297821: step: 242/459, loss: 0.05166925489902496 2023-01-24 01:41:04.855447: step: 244/459, loss: 0.151080921292305 2023-01-24 01:41:05.483205: step: 246/459, loss: 0.14757601916790009 2023-01-24 01:41:06.163463: step: 248/459, loss: 0.0885925143957138 2023-01-24 01:41:06.855542: step: 250/459, loss: 0.15381795167922974 2023-01-24 01:41:07.455015: step: 252/459, loss: 0.08849211782217026 2023-01-24 01:41:08.061928: step: 254/459, loss: 0.20401893556118011 2023-01-24 01:41:08.728900: step: 256/459, loss: 0.10144712775945663 2023-01-24 01:41:09.359363: step: 258/459, loss: 0.047716785222291946 2023-01-24 01:41:09.990485: step: 260/459, loss: 0.08421627432107925 2023-01-24 01:41:10.602458: step: 262/459, loss: 0.051057759672403336 2023-01-24 01:41:11.252406: step: 264/459, loss: 0.14093244075775146 2023-01-24 01:41:11.937683: step: 266/459, loss: 0.16717959940433502 2023-01-24 01:41:12.595890: step: 268/459, loss: 0.15864813327789307 2023-01-24 01:41:13.241878: step: 270/459, loss: 0.29240384697914124 2023-01-24 01:41:13.846427: step: 272/459, loss: 0.1661066859960556 2023-01-24 01:41:14.409414: step: 274/459, loss: 0.04552650824189186 2023-01-24 01:41:15.072942: step: 276/459, loss: 0.07233887910842896 2023-01-24 01:41:15.677850: step: 278/459, loss: 0.2601988613605499 2023-01-24 01:41:16.195907: step: 280/459, loss: 0.13270875811576843 2023-01-24 01:41:16.824964: step: 282/459, loss: 0.1677623838186264 2023-01-24 01:41:17.512851: step: 284/459, loss: 0.07936739176511765 2023-01-24 01:41:18.130210: step: 286/459, loss: 0.12088201195001602 2023-01-24 01:41:18.721427: step: 288/459, loss: 0.1524907499551773 2023-01-24 01:41:19.331633: step: 290/459, loss: 0.042841363698244095 2023-01-24 01:41:19.976660: step: 292/459, loss: 0.3849572241306305 2023-01-24 01:41:20.558403: step: 294/459, loss: 0.2533780634403229 2023-01-24 01:41:21.153339: step: 296/459, loss: 0.0966196283698082 2023-01-24 01:41:21.762175: step: 298/459, loss: 0.05562613531947136 2023-01-24 01:41:22.376806: step: 300/459, loss: 1.5962555408477783 2023-01-24 01:41:23.009388: step: 302/459, loss: 0.21590876579284668 2023-01-24 01:41:23.674995: step: 304/459, loss: 0.5922390222549438 2023-01-24 01:41:24.311673: step: 306/459, loss: 0.22443099319934845 2023-01-24 01:41:24.993135: step: 308/459, loss: 0.08055678755044937 2023-01-24 01:41:25.644175: step: 310/459, loss: 0.41724061965942383 2023-01-24 01:41:26.251399: step: 312/459, loss: 0.13254515826702118 2023-01-24 01:41:26.859921: step: 314/459, loss: 0.05587856471538544 2023-01-24 01:41:27.488814: step: 316/459, loss: 0.06678611785173416 2023-01-24 01:41:28.157285: step: 318/459, loss: 0.10335859656333923 2023-01-24 01:41:28.735469: step: 320/459, loss: 0.14267009496688843 2023-01-24 01:41:29.387274: step: 322/459, loss: 0.38916531205177307 2023-01-24 01:41:29.938485: step: 324/459, loss: 0.09246985614299774 2023-01-24 01:41:30.530932: step: 326/459, loss: 0.10884933173656464 2023-01-24 01:41:31.171773: step: 328/459, loss: 0.11737058311700821 2023-01-24 01:41:31.830876: step: 330/459, loss: 0.12722478806972504 2023-01-24 01:41:32.521319: step: 332/459, loss: 0.22753769159317017 2023-01-24 01:41:33.199626: step: 334/459, loss: 0.18845143914222717 2023-01-24 01:41:33.769898: step: 336/459, loss: 0.14645840227603912 2023-01-24 01:41:34.375477: step: 338/459, loss: 0.05293821170926094 2023-01-24 01:41:34.978752: step: 340/459, loss: 0.06845393776893616 2023-01-24 01:41:35.597754: step: 342/459, loss: 0.12923775613307953 2023-01-24 01:41:36.203839: step: 344/459, loss: 0.9089282751083374 2023-01-24 01:41:36.862397: step: 346/459, loss: 0.09943050891160965 2023-01-24 01:41:37.488376: step: 348/459, loss: 0.09722704440355301 2023-01-24 01:41:38.062383: step: 350/459, loss: 0.15703420341014862 2023-01-24 01:41:38.640420: step: 352/459, loss: 0.06487808376550674 2023-01-24 01:41:39.268700: step: 354/459, loss: 0.018515709787607193 2023-01-24 01:41:39.828173: step: 356/459, loss: 0.07831931859254837 2023-01-24 01:41:40.385559: step: 358/459, loss: 0.1881951093673706 2023-01-24 01:41:40.988559: step: 360/459, loss: 0.1054404005408287 2023-01-24 01:41:41.679941: step: 362/459, loss: 0.24507832527160645 2023-01-24 01:41:42.324859: step: 364/459, loss: 0.15626175701618195 2023-01-24 01:41:42.887014: step: 366/459, loss: 0.25350314378738403 2023-01-24 01:41:43.527293: step: 368/459, loss: 0.1383938491344452 2023-01-24 01:41:44.150559: step: 370/459, loss: 0.05649762228131294 2023-01-24 01:41:44.781371: step: 372/459, loss: 0.10335411131381989 2023-01-24 01:41:45.404065: step: 374/459, loss: 0.151516854763031 2023-01-24 01:41:46.022804: step: 376/459, loss: 0.2498285472393036 2023-01-24 01:41:46.641231: step: 378/459, loss: 0.23142968118190765 2023-01-24 01:41:47.195023: step: 380/459, loss: 0.033507514744997025 2023-01-24 01:41:47.829141: step: 382/459, loss: 0.31207776069641113 2023-01-24 01:41:48.411216: step: 384/459, loss: 0.24253660440444946 2023-01-24 01:41:49.029583: step: 386/459, loss: 0.18744255602359772 2023-01-24 01:41:49.681585: step: 388/459, loss: 0.11253287643194199 2023-01-24 01:41:50.317840: step: 390/459, loss: 0.4503507614135742 2023-01-24 01:41:50.876168: step: 392/459, loss: 0.08594147115945816 2023-01-24 01:41:51.467884: step: 394/459, loss: 0.03725135326385498 2023-01-24 01:41:52.111206: step: 396/459, loss: 0.08510839939117432 2023-01-24 01:41:52.708906: step: 398/459, loss: 0.1872204691171646 2023-01-24 01:41:53.304754: step: 400/459, loss: 0.11162698268890381 2023-01-24 01:41:53.901344: step: 402/459, loss: 0.18616077303886414 2023-01-24 01:41:54.553598: step: 404/459, loss: 0.04362247884273529 2023-01-24 01:41:55.148505: step: 406/459, loss: 0.20679669082164764 2023-01-24 01:41:55.739604: step: 408/459, loss: 0.1938103288412094 2023-01-24 01:41:56.306885: step: 410/459, loss: 0.11922101676464081 2023-01-24 01:41:56.960719: step: 412/459, loss: 0.1014353409409523 2023-01-24 01:41:57.544942: step: 414/459, loss: 0.14492829144001007 2023-01-24 01:41:58.135695: step: 416/459, loss: 0.14429129660129547 2023-01-24 01:41:58.830185: step: 418/459, loss: 0.26141202449798584 2023-01-24 01:41:59.388903: step: 420/459, loss: 0.2584635019302368 2023-01-24 01:42:00.028419: step: 422/459, loss: 4.004095554351807 2023-01-24 01:42:00.630396: step: 424/459, loss: 0.33011752367019653 2023-01-24 01:42:01.220560: step: 426/459, loss: 0.4399576187133789 2023-01-24 01:42:01.769673: step: 428/459, loss: 2.3445379734039307 2023-01-24 01:42:02.393905: step: 430/459, loss: 0.1618776023387909 2023-01-24 01:42:02.988413: step: 432/459, loss: 0.31887951493263245 2023-01-24 01:42:03.612734: step: 434/459, loss: 0.1733994036912918 2023-01-24 01:42:04.218702: step: 436/459, loss: 0.06007939577102661 2023-01-24 01:42:04.865852: step: 438/459, loss: 0.3329640030860901 2023-01-24 01:42:05.499584: step: 440/459, loss: 0.09166347980499268 2023-01-24 01:42:06.137133: step: 442/459, loss: 0.046937406063079834 2023-01-24 01:42:06.794480: step: 444/459, loss: 0.10102054476737976 2023-01-24 01:42:07.393841: step: 446/459, loss: 0.11659872531890869 2023-01-24 01:42:07.933102: step: 448/459, loss: 0.15553194284439087 2023-01-24 01:42:08.569034: step: 450/459, loss: 0.21695880591869354 2023-01-24 01:42:09.170695: step: 452/459, loss: 0.20779691636562347 2023-01-24 01:42:09.811300: step: 454/459, loss: 0.036580558866262436 2023-01-24 01:42:10.410890: step: 456/459, loss: 0.04759306088089943 2023-01-24 01:42:11.023839: step: 458/459, loss: 0.1099429726600647 2023-01-24 01:42:11.643037: step: 460/459, loss: 0.06411714851856232 2023-01-24 01:42:12.244584: step: 462/459, loss: 0.1200205609202385 2023-01-24 01:42:12.829559: step: 464/459, loss: 0.11970175057649612 2023-01-24 01:42:13.382004: step: 466/459, loss: 1.1199228763580322 2023-01-24 01:42:13.983360: step: 468/459, loss: 0.2134189009666443 2023-01-24 01:42:14.598730: step: 470/459, loss: 0.10025081038475037 2023-01-24 01:42:15.169041: step: 472/459, loss: 0.18133558332920074 2023-01-24 01:42:15.789464: step: 474/459, loss: 0.1482253223657608 2023-01-24 01:42:16.450820: step: 476/459, loss: 0.12242645025253296 2023-01-24 01:42:17.141835: step: 478/459, loss: 0.26649999618530273 2023-01-24 01:42:17.776338: step: 480/459, loss: 0.029101161286234856 2023-01-24 01:42:18.368525: step: 482/459, loss: 0.0592440627515316 2023-01-24 01:42:18.913380: step: 484/459, loss: 0.0174099151045084 2023-01-24 01:42:19.507814: step: 486/459, loss: 0.08248534798622131 2023-01-24 01:42:20.116328: step: 488/459, loss: 0.2653360366821289 2023-01-24 01:42:20.728638: step: 490/459, loss: 1.938102126121521 2023-01-24 01:42:21.358056: step: 492/459, loss: 0.7065800428390503 2023-01-24 01:42:21.946925: step: 494/459, loss: 0.06244165450334549 2023-01-24 01:42:22.534324: step: 496/459, loss: 0.6604436039924622 2023-01-24 01:42:23.160591: step: 498/459, loss: 0.13771037757396698 2023-01-24 01:42:23.783592: step: 500/459, loss: 0.39878034591674805 2023-01-24 01:42:24.363808: step: 502/459, loss: 0.23225516080856323 2023-01-24 01:42:24.972692: step: 504/459, loss: 0.182167187333107 2023-01-24 01:42:25.691923: step: 506/459, loss: 0.16695773601531982 2023-01-24 01:42:26.332954: step: 508/459, loss: 0.08602381497621536 2023-01-24 01:42:27.011820: step: 510/459, loss: 0.10937577486038208 2023-01-24 01:42:27.612620: step: 512/459, loss: 0.16129125654697418 2023-01-24 01:42:28.186837: step: 514/459, loss: 0.15623678267002106 2023-01-24 01:42:28.803167: step: 516/459, loss: 0.3787432014942169 2023-01-24 01:42:29.429972: step: 518/459, loss: 0.09737873077392578 2023-01-24 01:42:30.076217: step: 520/459, loss: 0.0984070748090744 2023-01-24 01:42:30.672305: step: 522/459, loss: 0.08771736919879913 2023-01-24 01:42:31.259665: step: 524/459, loss: 0.24798312783241272 2023-01-24 01:42:31.898581: step: 526/459, loss: 0.12554605305194855 2023-01-24 01:42:32.473490: step: 528/459, loss: 0.05238288640975952 2023-01-24 01:42:33.177664: step: 530/459, loss: 0.4424508512020111 2023-01-24 01:42:33.898338: step: 532/459, loss: 0.3630236089229584 2023-01-24 01:42:34.503654: step: 534/459, loss: 0.07074953615665436 2023-01-24 01:42:35.187872: step: 536/459, loss: 0.4965871274471283 2023-01-24 01:42:35.781033: step: 538/459, loss: 0.121241994202137 2023-01-24 01:42:36.371559: step: 540/459, loss: 0.18229134380817413 2023-01-24 01:42:36.977232: step: 542/459, loss: 0.25575900077819824 2023-01-24 01:42:37.563334: step: 544/459, loss: 0.07657302170991898 2023-01-24 01:42:38.088390: step: 546/459, loss: 0.07629764825105667 2023-01-24 01:42:38.727492: step: 548/459, loss: 0.08925733715295792 2023-01-24 01:42:39.360893: step: 550/459, loss: 0.024507369846105576 2023-01-24 01:42:39.992812: step: 552/459, loss: 0.03874736651778221 2023-01-24 01:42:40.630240: step: 554/459, loss: 0.27256330847740173 2023-01-24 01:42:41.254979: step: 556/459, loss: 0.2795548439025879 2023-01-24 01:42:41.845761: step: 558/459, loss: 0.034558240324258804 2023-01-24 01:42:42.513589: step: 560/459, loss: 0.538637638092041 2023-01-24 01:42:43.151513: step: 562/459, loss: 0.029228288680315018 2023-01-24 01:42:43.705788: step: 564/459, loss: 0.04129324108362198 2023-01-24 01:42:44.419906: step: 566/459, loss: 0.03387163206934929 2023-01-24 01:42:45.039024: step: 568/459, loss: 0.07881743460893631 2023-01-24 01:42:45.636697: step: 570/459, loss: 0.06979397684335709 2023-01-24 01:42:46.212178: step: 572/459, loss: 0.2428869605064392 2023-01-24 01:42:46.843817: step: 574/459, loss: 0.15294894576072693 2023-01-24 01:42:47.371736: step: 576/459, loss: 0.06147534400224686 2023-01-24 01:42:47.958758: step: 578/459, loss: 0.0868789479136467 2023-01-24 01:42:48.600566: step: 580/459, loss: 0.1153472363948822 2023-01-24 01:42:49.234458: step: 582/459, loss: 0.08385437726974487 2023-01-24 01:42:49.865801: step: 584/459, loss: 0.053005896508693695 2023-01-24 01:42:50.417461: step: 586/459, loss: 0.190373495221138 2023-01-24 01:42:50.976539: step: 588/459, loss: 0.2875838577747345 2023-01-24 01:42:51.620005: step: 590/459, loss: 0.17066769301891327 2023-01-24 01:42:52.225857: step: 592/459, loss: 0.4465467929840088 2023-01-24 01:42:52.870086: step: 594/459, loss: 0.008646595291793346 2023-01-24 01:42:53.557782: step: 596/459, loss: 0.05340017378330231 2023-01-24 01:42:54.144528: step: 598/459, loss: 0.4649478495121002 2023-01-24 01:42:54.775809: step: 600/459, loss: 0.09033412486314774 2023-01-24 01:42:55.345178: step: 602/459, loss: 0.12903180718421936 2023-01-24 01:42:56.062837: step: 604/459, loss: 0.12485986202955246 2023-01-24 01:42:56.675041: step: 606/459, loss: 0.04847421869635582 2023-01-24 01:42:57.257523: step: 608/459, loss: 0.5077511668205261 2023-01-24 01:42:57.846050: step: 610/459, loss: 0.10117786377668381 2023-01-24 01:42:58.422654: step: 612/459, loss: 0.030310088768601418 2023-01-24 01:42:59.029103: step: 614/459, loss: 0.09119818359613419 2023-01-24 01:42:59.613351: step: 616/459, loss: 0.8089322447776794 2023-01-24 01:43:00.211784: step: 618/459, loss: 0.11692710965871811 2023-01-24 01:43:00.770570: step: 620/459, loss: 0.3517252504825592 2023-01-24 01:43:01.417396: step: 622/459, loss: 0.12106548994779587 2023-01-24 01:43:01.972246: step: 624/459, loss: 0.17447204887866974 2023-01-24 01:43:02.583458: step: 626/459, loss: 0.5729275345802307 2023-01-24 01:43:03.203578: step: 628/459, loss: 0.26834413409233093 2023-01-24 01:43:03.788940: step: 630/459, loss: 0.2341347336769104 2023-01-24 01:43:04.392111: step: 632/459, loss: 0.11908192187547684 2023-01-24 01:43:04.993566: step: 634/459, loss: 0.0780395120382309 2023-01-24 01:43:05.590603: step: 636/459, loss: 0.6283779740333557 2023-01-24 01:43:06.164019: step: 638/459, loss: 0.11185828596353531 2023-01-24 01:43:06.748585: step: 640/459, loss: 0.07725457847118378 2023-01-24 01:43:07.338450: step: 642/459, loss: 0.27710869908332825 2023-01-24 01:43:07.977919: step: 644/459, loss: 0.028724130243062973 2023-01-24 01:43:08.592492: step: 646/459, loss: 0.16146817803382874 2023-01-24 01:43:09.202420: step: 648/459, loss: 0.30534857511520386 2023-01-24 01:43:09.797114: step: 650/459, loss: 0.1775391548871994 2023-01-24 01:43:10.445029: step: 652/459, loss: 0.20762009918689728 2023-01-24 01:43:11.053731: step: 654/459, loss: 0.05563787370920181 2023-01-24 01:43:11.686307: step: 656/459, loss: 0.1330936998128891 2023-01-24 01:43:12.290515: step: 658/459, loss: 0.08499737083911896 2023-01-24 01:43:12.883554: step: 660/459, loss: 0.07034870237112045 2023-01-24 01:43:13.487960: step: 662/459, loss: 0.06155025213956833 2023-01-24 01:43:14.040998: step: 664/459, loss: 0.05880401283502579 2023-01-24 01:43:14.659053: step: 666/459, loss: 0.11676064133644104 2023-01-24 01:43:15.277459: step: 668/459, loss: 0.15328340232372284 2023-01-24 01:43:15.881448: step: 670/459, loss: 0.18924134969711304 2023-01-24 01:43:16.494519: step: 672/459, loss: 0.34556347131729126 2023-01-24 01:43:17.115109: step: 674/459, loss: 0.05552801862359047 2023-01-24 01:43:17.740778: step: 676/459, loss: 0.2815198600292206 2023-01-24 01:43:18.359803: step: 678/459, loss: 0.10901150107383728 2023-01-24 01:43:18.928315: step: 680/459, loss: 0.08759982138872147 2023-01-24 01:43:19.537479: step: 682/459, loss: 0.010949497111141682 2023-01-24 01:43:20.157371: step: 684/459, loss: 0.836398720741272 2023-01-24 01:43:20.738275: step: 686/459, loss: 0.06943424791097641 2023-01-24 01:43:21.303280: step: 688/459, loss: 0.4307463467121124 2023-01-24 01:43:21.969021: step: 690/459, loss: 0.13477304577827454 2023-01-24 01:43:22.601558: step: 692/459, loss: 0.1520858108997345 2023-01-24 01:43:23.167313: step: 694/459, loss: 0.2519288957118988 2023-01-24 01:43:23.806886: step: 696/459, loss: 0.10831020027399063 2023-01-24 01:43:24.396855: step: 698/459, loss: 0.14415821433067322 2023-01-24 01:43:25.045546: step: 700/459, loss: 0.28432708978652954 2023-01-24 01:43:25.699214: step: 702/459, loss: 0.31678128242492676 2023-01-24 01:43:26.320816: step: 704/459, loss: 0.6290286779403687 2023-01-24 01:43:27.080925: step: 706/459, loss: 0.09101077914237976 2023-01-24 01:43:27.702460: step: 708/459, loss: 0.11500295996665955 2023-01-24 01:43:28.318490: step: 710/459, loss: 0.21722251176834106 2023-01-24 01:43:28.914455: step: 712/459, loss: 0.12133494764566422 2023-01-24 01:43:29.470150: step: 714/459, loss: 0.06394307315349579 2023-01-24 01:43:30.073973: step: 716/459, loss: 0.15055891871452332 2023-01-24 01:43:30.700619: step: 718/459, loss: 0.07506881654262543 2023-01-24 01:43:31.317904: step: 720/459, loss: 0.13751862943172455 2023-01-24 01:43:31.909430: step: 722/459, loss: 0.9705318212509155 2023-01-24 01:43:32.523410: step: 724/459, loss: 0.7368402481079102 2023-01-24 01:43:33.142431: step: 726/459, loss: 0.43118196725845337 2023-01-24 01:43:33.830359: step: 728/459, loss: 0.09710769355297089 2023-01-24 01:43:34.475738: step: 730/459, loss: 6.663374900817871 2023-01-24 01:43:35.090343: step: 732/459, loss: 0.1476956307888031 2023-01-24 01:43:35.680289: step: 734/459, loss: 0.10225719213485718 2023-01-24 01:43:36.331829: step: 736/459, loss: 0.047553353011608124 2023-01-24 01:43:36.993329: step: 738/459, loss: 0.5253232717514038 2023-01-24 01:43:37.572374: step: 740/459, loss: 0.05285780876874924 2023-01-24 01:43:38.224702: step: 742/459, loss: 0.10710303485393524 2023-01-24 01:43:38.834443: step: 744/459, loss: 0.10757799446582794 2023-01-24 01:43:39.439308: step: 746/459, loss: 0.18422162532806396 2023-01-24 01:43:40.049950: step: 748/459, loss: 0.27493441104888916 2023-01-24 01:43:40.671745: step: 750/459, loss: 0.5156088471412659 2023-01-24 01:43:41.270345: step: 752/459, loss: 0.10243377089500427 2023-01-24 01:43:41.901605: step: 754/459, loss: 0.05911752209067345 2023-01-24 01:43:42.482695: step: 756/459, loss: 0.1637984663248062 2023-01-24 01:43:43.057203: step: 758/459, loss: 0.11436356604099274 2023-01-24 01:43:43.662715: step: 760/459, loss: 0.3074532151222229 2023-01-24 01:43:44.358154: step: 762/459, loss: 0.37487316131591797 2023-01-24 01:43:44.981426: step: 764/459, loss: 0.21758419275283813 2023-01-24 01:43:45.549061: step: 766/459, loss: 0.12295028567314148 2023-01-24 01:43:46.122346: step: 768/459, loss: 0.13948547840118408 2023-01-24 01:43:46.800614: step: 770/459, loss: 0.04828039929270744 2023-01-24 01:43:47.430599: step: 772/459, loss: 0.24757584929466248 2023-01-24 01:43:48.106637: step: 774/459, loss: 0.07548278570175171 2023-01-24 01:43:48.752975: step: 776/459, loss: 0.03969706594944 2023-01-24 01:43:49.356677: step: 778/459, loss: 0.31221720576286316 2023-01-24 01:43:50.029175: step: 780/459, loss: 0.05376415327191353 2023-01-24 01:43:50.659915: step: 782/459, loss: 0.26209592819213867 2023-01-24 01:43:51.326221: step: 784/459, loss: 0.1325143724679947 2023-01-24 01:43:52.004467: step: 786/459, loss: 0.06989704817533493 2023-01-24 01:43:52.728718: step: 788/459, loss: 0.1369429975748062 2023-01-24 01:43:53.321685: step: 790/459, loss: 0.16037492454051971 2023-01-24 01:43:53.878285: step: 792/459, loss: 0.0913311168551445 2023-01-24 01:43:54.462677: step: 794/459, loss: 0.07742588967084885 2023-01-24 01:43:55.028461: step: 796/459, loss: 0.18780025839805603 2023-01-24 01:43:55.636639: step: 798/459, loss: 0.11609622836112976 2023-01-24 01:43:56.235635: step: 800/459, loss: 0.1992252916097641 2023-01-24 01:43:56.845726: step: 802/459, loss: 0.09195840358734131 2023-01-24 01:43:57.458248: step: 804/459, loss: 0.4149000942707062 2023-01-24 01:43:58.052671: step: 806/459, loss: 0.09766478836536407 2023-01-24 01:43:58.717310: step: 808/459, loss: 0.07644077390432358 2023-01-24 01:43:59.295784: step: 810/459, loss: 0.34726402163505554 2023-01-24 01:43:59.930673: step: 812/459, loss: 0.116389200091362 2023-01-24 01:44:00.549455: step: 814/459, loss: 0.04174213856458664 2023-01-24 01:44:01.170540: step: 816/459, loss: 0.0938701257109642 2023-01-24 01:44:01.839945: step: 818/459, loss: 0.2720496952533722 2023-01-24 01:44:02.421756: step: 820/459, loss: 0.1780373752117157 2023-01-24 01:44:03.109483: step: 822/459, loss: 0.15496326982975006 2023-01-24 01:44:03.680164: step: 824/459, loss: 0.0994071215391159 2023-01-24 01:44:04.284835: step: 826/459, loss: 0.1647522747516632 2023-01-24 01:44:04.884920: step: 828/459, loss: 0.09947433322668076 2023-01-24 01:44:05.497307: step: 830/459, loss: 0.14588545262813568 2023-01-24 01:44:06.095149: step: 832/459, loss: 0.08643001317977905 2023-01-24 01:44:06.679880: step: 834/459, loss: 0.10815045237541199 2023-01-24 01:44:07.309306: step: 836/459, loss: 0.22675029933452606 2023-01-24 01:44:07.953038: step: 838/459, loss: 0.07140377163887024 2023-01-24 01:44:08.516062: step: 840/459, loss: 0.0805126428604126 2023-01-24 01:44:09.161638: step: 842/459, loss: 0.05149362236261368 2023-01-24 01:44:09.679893: step: 844/459, loss: 2.2226650714874268 2023-01-24 01:44:10.283346: step: 846/459, loss: 0.2010558843612671 2023-01-24 01:44:10.922524: step: 848/459, loss: 0.24792200326919556 2023-01-24 01:44:11.516738: step: 850/459, loss: 0.1374415010213852 2023-01-24 01:44:12.142922: step: 852/459, loss: 0.6635507345199585 2023-01-24 01:44:12.780154: step: 854/459, loss: 0.3181004226207733 2023-01-24 01:44:13.421105: step: 856/459, loss: 0.16424930095672607 2023-01-24 01:44:14.074466: step: 858/459, loss: 0.1444997936487198 2023-01-24 01:44:14.769062: step: 860/459, loss: 0.11028905212879181 2023-01-24 01:44:15.368337: step: 862/459, loss: 0.1239614486694336 2023-01-24 01:44:15.940357: step: 864/459, loss: 0.15977224707603455 2023-01-24 01:44:16.581878: step: 866/459, loss: 0.024444887414574623 2023-01-24 01:44:17.263151: step: 868/459, loss: 0.12818819284439087 2023-01-24 01:44:17.833087: step: 870/459, loss: 0.15130718052387238 2023-01-24 01:44:18.447829: step: 872/459, loss: 0.4332387447357178 2023-01-24 01:44:19.097225: step: 874/459, loss: 0.1088869646191597 2023-01-24 01:44:19.734788: step: 876/459, loss: 0.11888479441404343 2023-01-24 01:44:20.486074: step: 878/459, loss: 0.33866947889328003 2023-01-24 01:44:21.119609: step: 880/459, loss: 0.1177043616771698 2023-01-24 01:44:21.705359: step: 882/459, loss: 0.22167907655239105 2023-01-24 01:44:22.367106: step: 884/459, loss: 0.33633506298065186 2023-01-24 01:44:22.958468: step: 886/459, loss: 0.06693042814731598 2023-01-24 01:44:23.536167: step: 888/459, loss: 0.09383074194192886 2023-01-24 01:44:24.173509: step: 890/459, loss: 0.09806197881698608 2023-01-24 01:44:24.837830: step: 892/459, loss: 0.10851804167032242 2023-01-24 01:44:25.428006: step: 894/459, loss: 0.25488439202308655 2023-01-24 01:44:25.997037: step: 896/459, loss: 0.1141214445233345 2023-01-24 01:44:26.603319: step: 898/459, loss: 0.27636805176734924 2023-01-24 01:44:27.242035: step: 900/459, loss: 0.10045617818832397 2023-01-24 01:44:27.927630: step: 902/459, loss: 0.16792084276676178 2023-01-24 01:44:28.596921: step: 904/459, loss: 0.10252072662115097 2023-01-24 01:44:29.265982: step: 906/459, loss: 0.08811137825250626 2023-01-24 01:44:29.880291: step: 908/459, loss: 0.2103530466556549 2023-01-24 01:44:30.472999: step: 910/459, loss: 0.33840882778167725 2023-01-24 01:44:31.060546: step: 912/459, loss: 0.1436239629983902 2023-01-24 01:44:31.717629: step: 914/459, loss: 0.13876290619373322 2023-01-24 01:44:32.251375: step: 916/459, loss: 0.057025469839572906 2023-01-24 01:44:32.837525: step: 918/459, loss: 0.25125378370285034 2023-01-24 01:44:33.299106: step: 920/459, loss: 0.05245758220553398 ================================================== Loss: 0.226 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3202273832684825, 'r': 0.312328036053131, 'f1': 0.31622838616714705}, 'combined': 0.2330103898073715, 'epoch': 13} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35032942888304575, 'r': 0.30096482754043474, 'f1': 0.32377634258628674}, 'combined': 0.20721685925522348, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3174878861668634, 'r': 0.3180903299736317, 'f1': 0.31778882255185575}, 'combined': 0.23416018503820948, 'epoch': 13} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34968102982358457, 'r': 0.29277838951592855, 'f1': 0.3187097758213967}, 'combined': 0.20397425652569387, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34029775126327866, 'r': 0.3267374993154061, 'f1': 0.33337979116983346}, 'combined': 0.24564826717777202, 'epoch': 13} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34963853158143904, 'r': 0.30692972280120856, 'f1': 0.3268950542843939}, 'combined': 0.23437758609069753, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.26727642276422764, 'r': 0.31309523809523804, 'f1': 0.2883771929824561}, 'combined': 0.19225146198830406, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 14 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:47:07.243164: step: 2/459, loss: 0.07895553857088089 2023-01-24 01:47:07.836115: step: 4/459, loss: 0.16746407747268677 2023-01-24 01:47:08.445896: step: 6/459, loss: 0.07593001425266266 2023-01-24 01:47:09.037896: step: 8/459, loss: 0.09054185450077057 2023-01-24 01:47:09.759896: step: 10/459, loss: 0.1411190927028656 2023-01-24 01:47:10.439872: step: 12/459, loss: 0.07114152610301971 2023-01-24 01:47:11.081067: step: 14/459, loss: 0.0700361505150795 2023-01-24 01:47:11.662755: step: 16/459, loss: 0.1687828153371811 2023-01-24 01:47:12.232659: step: 18/459, loss: 0.10659261792898178 2023-01-24 01:47:12.847245: step: 20/459, loss: 0.036527715623378754 2023-01-24 01:47:13.462362: step: 22/459, loss: 0.26346126198768616 2023-01-24 01:47:14.113376: step: 24/459, loss: 1.570128321647644 2023-01-24 01:47:14.693786: step: 26/459, loss: 0.10833314061164856 2023-01-24 01:47:15.361597: step: 28/459, loss: 0.33850836753845215 2023-01-24 01:47:15.986129: step: 30/459, loss: 0.07697781175374985 2023-01-24 01:47:16.592351: step: 32/459, loss: 0.09540826082229614 2023-01-24 01:47:17.234088: step: 34/459, loss: 0.08892139047384262 2023-01-24 01:47:17.820402: step: 36/459, loss: 0.05764003470540047 2023-01-24 01:47:18.405450: step: 38/459, loss: 0.10468365997076035 2023-01-24 01:47:18.997183: step: 40/459, loss: 4.835668087005615 2023-01-24 01:47:19.610040: step: 42/459, loss: 0.2400381714105606 2023-01-24 01:47:20.228045: step: 44/459, loss: 0.1442590057849884 2023-01-24 01:47:20.852061: step: 46/459, loss: 0.06966568529605865 2023-01-24 01:47:21.531545: step: 48/459, loss: 0.045418769121170044 2023-01-24 01:47:22.133474: step: 50/459, loss: 0.05689983069896698 2023-01-24 01:47:22.796187: step: 52/459, loss: 0.22033533453941345 2023-01-24 01:47:23.386019: step: 54/459, loss: 0.10728070139884949 2023-01-24 01:47:23.997191: step: 56/459, loss: 0.06875508278608322 2023-01-24 01:47:24.581399: step: 58/459, loss: 0.2695624530315399 2023-01-24 01:47:25.235011: step: 60/459, loss: 0.07635197043418884 2023-01-24 01:47:25.835861: step: 62/459, loss: 0.0884830430150032 2023-01-24 01:47:26.447163: step: 64/459, loss: 0.11158141493797302 2023-01-24 01:47:26.975598: step: 66/459, loss: 0.10358937829732895 2023-01-24 01:47:27.734285: step: 68/459, loss: 0.11182990670204163 2023-01-24 01:47:28.354915: step: 70/459, loss: 0.1130247563123703 2023-01-24 01:47:28.915794: step: 72/459, loss: 0.07149433344602585 2023-01-24 01:47:29.567379: step: 74/459, loss: 0.06640762090682983 2023-01-24 01:47:30.214174: step: 76/459, loss: 0.19071510434150696 2023-01-24 01:47:30.846412: step: 78/459, loss: 0.4862356185913086 2023-01-24 01:47:31.460901: step: 80/459, loss: 0.013320961967110634 2023-01-24 01:47:32.064415: step: 82/459, loss: 0.07524336129426956 2023-01-24 01:47:32.635265: step: 84/459, loss: 0.07187794893980026 2023-01-24 01:47:33.278119: step: 86/459, loss: 0.10450797528028488 2023-01-24 01:47:33.913232: step: 88/459, loss: 0.060708895325660706 2023-01-24 01:47:34.531085: step: 90/459, loss: 0.15530692040920258 2023-01-24 01:47:35.106911: step: 92/459, loss: 0.03762265294790268 2023-01-24 01:47:35.733244: step: 94/459, loss: 0.7915648818016052 2023-01-24 01:47:36.356858: step: 96/459, loss: 0.05775202438235283 2023-01-24 01:47:36.996324: step: 98/459, loss: 0.04847617819905281 2023-01-24 01:47:37.604213: step: 100/459, loss: 0.129079669713974 2023-01-24 01:47:38.212770: step: 102/459, loss: 0.5015999674797058 2023-01-24 01:47:38.891980: step: 104/459, loss: 0.10824532061815262 2023-01-24 01:47:39.482232: step: 106/459, loss: 0.06066713482141495 2023-01-24 01:47:40.096075: step: 108/459, loss: 0.1951134204864502 2023-01-24 01:47:40.730910: step: 110/459, loss: 0.5897997617721558 2023-01-24 01:47:41.349379: step: 112/459, loss: 0.125932976603508 2023-01-24 01:47:41.974013: step: 114/459, loss: 0.026947448030114174 2023-01-24 01:47:42.578478: step: 116/459, loss: 0.07398516684770584 2023-01-24 01:47:43.175443: step: 118/459, loss: 0.3718324899673462 2023-01-24 01:47:43.718885: step: 120/459, loss: 0.19831566512584686 2023-01-24 01:47:44.339569: step: 122/459, loss: 0.044698093086481094 2023-01-24 01:47:44.970769: step: 124/459, loss: 0.10276887565851212 2023-01-24 01:47:45.565028: step: 126/459, loss: 0.12280240654945374 2023-01-24 01:47:46.279817: step: 128/459, loss: 0.096082903444767 2023-01-24 01:47:46.929242: step: 130/459, loss: 0.038930684328079224 2023-01-24 01:47:47.579303: step: 132/459, loss: 0.18823684751987457 2023-01-24 01:47:48.241926: step: 134/459, loss: 0.07026797533035278 2023-01-24 01:47:48.882340: step: 136/459, loss: 0.47391805052757263 2023-01-24 01:47:49.518659: step: 138/459, loss: 0.1189911961555481 2023-01-24 01:47:50.136397: step: 140/459, loss: 0.10698843002319336 2023-01-24 01:47:50.718635: step: 142/459, loss: 0.03621973842382431 2023-01-24 01:47:51.290124: step: 144/459, loss: 0.14824523031711578 2023-01-24 01:47:51.912646: step: 146/459, loss: 0.5518172383308411 2023-01-24 01:47:52.460005: step: 148/459, loss: 0.023694923147559166 2023-01-24 01:47:53.032959: step: 150/459, loss: 0.04780610650777817 2023-01-24 01:47:53.624463: step: 152/459, loss: 0.02959367074072361 2023-01-24 01:47:54.241842: step: 154/459, loss: 0.20192094147205353 2023-01-24 01:47:54.839142: step: 156/459, loss: 0.09027627110481262 2023-01-24 01:47:55.381750: step: 158/459, loss: 0.1471467763185501 2023-01-24 01:47:56.018545: step: 160/459, loss: 0.07004654407501221 2023-01-24 01:47:56.650740: step: 162/459, loss: 0.26482507586479187 2023-01-24 01:47:57.278497: step: 164/459, loss: 0.09673424810171127 2023-01-24 01:47:57.949458: step: 166/459, loss: 0.07127122581005096 2023-01-24 01:47:58.726067: step: 168/459, loss: 0.24315857887268066 2023-01-24 01:47:59.424413: step: 170/459, loss: 0.0318518728017807 2023-01-24 01:48:00.136011: step: 172/459, loss: 0.04098863527178764 2023-01-24 01:48:00.776542: step: 174/459, loss: 0.1537790149450302 2023-01-24 01:48:01.384067: step: 176/459, loss: 0.021741271018981934 2023-01-24 01:48:02.027637: step: 178/459, loss: 0.1821894347667694 2023-01-24 01:48:02.642675: step: 180/459, loss: 0.22225360572338104 2023-01-24 01:48:03.371677: step: 182/459, loss: 0.12047475576400757 2023-01-24 01:48:03.980118: step: 184/459, loss: 0.5775659680366516 2023-01-24 01:48:04.580434: step: 186/459, loss: 0.08792395144701004 2023-01-24 01:48:05.244587: step: 188/459, loss: 0.10601978003978729 2023-01-24 01:48:05.861576: step: 190/459, loss: 0.1677037924528122 2023-01-24 01:48:06.454509: step: 192/459, loss: 0.2994798719882965 2023-01-24 01:48:07.071689: step: 194/459, loss: 0.07189606130123138 2023-01-24 01:48:07.648394: step: 196/459, loss: 1.3966971635818481 2023-01-24 01:48:08.295872: step: 198/459, loss: 0.08882810175418854 2023-01-24 01:48:08.937865: step: 200/459, loss: 0.24431873857975006 2023-01-24 01:48:09.572233: step: 202/459, loss: 0.14776192605495453 2023-01-24 01:48:10.179382: step: 204/459, loss: 0.3352596163749695 2023-01-24 01:48:10.854309: step: 206/459, loss: 0.10816320031881332 2023-01-24 01:48:11.413792: step: 208/459, loss: 0.018300916999578476 2023-01-24 01:48:12.019389: step: 210/459, loss: 0.5196093916893005 2023-01-24 01:48:12.668596: step: 212/459, loss: 0.3380054831504822 2023-01-24 01:48:13.301912: step: 214/459, loss: 0.15756018459796906 2023-01-24 01:48:13.956817: step: 216/459, loss: 0.008404449559748173 2023-01-24 01:48:14.554709: step: 218/459, loss: 0.13331370055675507 2023-01-24 01:48:15.190733: step: 220/459, loss: 0.33390942215919495 2023-01-24 01:48:15.780256: step: 222/459, loss: 0.06972936540842056 2023-01-24 01:48:16.398229: step: 224/459, loss: 0.3516439199447632 2023-01-24 01:48:17.056763: step: 226/459, loss: 2.1714134216308594 2023-01-24 01:48:17.691076: step: 228/459, loss: 0.1120411604642868 2023-01-24 01:48:18.287764: step: 230/459, loss: 0.13175885379314423 2023-01-24 01:48:18.913141: step: 232/459, loss: 0.09094700217247009 2023-01-24 01:48:19.569220: step: 234/459, loss: 0.2537795901298523 2023-01-24 01:48:20.219981: step: 236/459, loss: 0.108060821890831 2023-01-24 01:48:20.842317: step: 238/459, loss: 0.1166854128241539 2023-01-24 01:48:21.522721: step: 240/459, loss: 0.10024866461753845 2023-01-24 01:48:22.117603: step: 242/459, loss: 0.1015346497297287 2023-01-24 01:48:22.692784: step: 244/459, loss: 0.09119909256696701 2023-01-24 01:48:23.353046: step: 246/459, loss: 0.10527601093053818 2023-01-24 01:48:23.942017: step: 248/459, loss: 0.2096743881702423 2023-01-24 01:48:24.538655: step: 250/459, loss: 0.07063239067792892 2023-01-24 01:48:25.204662: step: 252/459, loss: 0.06485911458730698 2023-01-24 01:48:25.758232: step: 254/459, loss: 0.0727652907371521 2023-01-24 01:48:26.359021: step: 256/459, loss: 0.2626670002937317 2023-01-24 01:48:26.862335: step: 258/459, loss: 0.11295332759618759 2023-01-24 01:48:27.488852: step: 260/459, loss: 0.20103363692760468 2023-01-24 01:48:28.080902: step: 262/459, loss: 0.06996839493513107 2023-01-24 01:48:28.702684: step: 264/459, loss: 0.061744146049022675 2023-01-24 01:48:29.395968: step: 266/459, loss: 0.08186685293912888 2023-01-24 01:48:29.993840: step: 268/459, loss: 0.09393690526485443 2023-01-24 01:48:30.569961: step: 270/459, loss: 1.7087193727493286 2023-01-24 01:48:31.297136: step: 272/459, loss: 0.20606748759746552 2023-01-24 01:48:31.918564: step: 274/459, loss: 0.12940311431884766 2023-01-24 01:48:32.508262: step: 276/459, loss: 0.12156303972005844 2023-01-24 01:48:33.112314: step: 278/459, loss: 1.479581594467163 2023-01-24 01:48:33.713550: step: 280/459, loss: 0.0960269644856453 2023-01-24 01:48:34.343015: step: 282/459, loss: 0.04660903289914131 2023-01-24 01:48:34.978757: step: 284/459, loss: 0.09775205701589584 2023-01-24 01:48:35.550949: step: 286/459, loss: 0.2592925429344177 2023-01-24 01:48:36.156460: step: 288/459, loss: 0.17371129989624023 2023-01-24 01:48:36.769581: step: 290/459, loss: 0.019597603008151054 2023-01-24 01:48:37.348462: step: 292/459, loss: 0.4434507191181183 2023-01-24 01:48:37.932129: step: 294/459, loss: 0.13821905851364136 2023-01-24 01:48:38.539612: step: 296/459, loss: 0.12402225285768509 2023-01-24 01:48:39.180345: step: 298/459, loss: 0.40598011016845703 2023-01-24 01:48:39.842831: step: 300/459, loss: 0.09425797313451767 2023-01-24 01:48:40.455148: step: 302/459, loss: 0.07042331993579865 2023-01-24 01:48:41.122298: step: 304/459, loss: 0.059273771941661835 2023-01-24 01:48:41.809427: step: 306/459, loss: 0.15803004801273346 2023-01-24 01:48:42.443150: step: 308/459, loss: 0.14882059395313263 2023-01-24 01:48:43.164190: step: 310/459, loss: 0.11908257752656937 2023-01-24 01:48:43.791337: step: 312/459, loss: 0.07407360523939133 2023-01-24 01:48:44.396294: step: 314/459, loss: 0.20116478204727173 2023-01-24 01:48:44.983735: step: 316/459, loss: 0.13268595933914185 2023-01-24 01:48:45.691334: step: 318/459, loss: 0.08104398846626282 2023-01-24 01:48:46.328703: step: 320/459, loss: 0.16287092864513397 2023-01-24 01:48:46.920546: step: 322/459, loss: 1.1232050657272339 2023-01-24 01:48:47.529389: step: 324/459, loss: 0.2392491102218628 2023-01-24 01:48:48.166993: step: 326/459, loss: 0.06052554398775101 2023-01-24 01:48:48.793443: step: 328/459, loss: 0.07009230554103851 2023-01-24 01:48:49.367053: step: 330/459, loss: 0.10510993003845215 2023-01-24 01:48:50.029902: step: 332/459, loss: 0.44413667917251587 2023-01-24 01:48:50.623746: step: 334/459, loss: 0.07855276018381119 2023-01-24 01:48:51.252460: step: 336/459, loss: 0.13609832525253296 2023-01-24 01:48:51.829925: step: 338/459, loss: 0.24876734614372253 2023-01-24 01:48:52.419564: step: 340/459, loss: 0.10263845324516296 2023-01-24 01:48:53.123317: step: 342/459, loss: 0.2852824926376343 2023-01-24 01:48:53.728593: step: 344/459, loss: 0.10422408580780029 2023-01-24 01:48:54.436834: step: 346/459, loss: 0.042843908071517944 2023-01-24 01:48:55.178915: step: 348/459, loss: 0.14542269706726074 2023-01-24 01:48:55.784395: step: 350/459, loss: 0.14614813029766083 2023-01-24 01:48:56.398264: step: 352/459, loss: 0.03630266711115837 2023-01-24 01:48:57.018225: step: 354/459, loss: 0.2918793559074402 2023-01-24 01:48:57.631634: step: 356/459, loss: 0.11473110318183899 2023-01-24 01:48:58.291013: step: 358/459, loss: 0.06831765174865723 2023-01-24 01:48:58.879315: step: 360/459, loss: 0.038236815482378006 2023-01-24 01:48:59.472751: step: 362/459, loss: 0.06797708570957184 2023-01-24 01:49:00.052503: step: 364/459, loss: 0.11908730864524841 2023-01-24 01:49:00.630517: step: 366/459, loss: 0.05507790297269821 2023-01-24 01:49:01.233149: step: 368/459, loss: 0.10803884267807007 2023-01-24 01:49:01.827892: step: 370/459, loss: 0.12169502675533295 2023-01-24 01:49:02.419404: step: 372/459, loss: 0.22376932203769684 2023-01-24 01:49:03.094066: step: 374/459, loss: 0.18182383477687836 2023-01-24 01:49:03.729936: step: 376/459, loss: 0.10743743926286697 2023-01-24 01:49:04.305259: step: 378/459, loss: 0.47045964002609253 2023-01-24 01:49:04.898412: step: 380/459, loss: 0.23800572752952576 2023-01-24 01:49:05.485113: step: 382/459, loss: 0.1129157766699791 2023-01-24 01:49:06.047337: step: 384/459, loss: 0.07695455849170685 2023-01-24 01:49:06.623529: step: 386/459, loss: 0.2640870213508606 2023-01-24 01:49:07.206847: step: 388/459, loss: 0.04599899426102638 2023-01-24 01:49:07.777444: step: 390/459, loss: 0.0551254078745842 2023-01-24 01:49:08.364211: step: 392/459, loss: 0.011186243034899235 2023-01-24 01:49:08.995559: step: 394/459, loss: 0.26274728775024414 2023-01-24 01:49:09.562179: step: 396/459, loss: 0.1954510509967804 2023-01-24 01:49:10.183281: step: 398/459, loss: 0.3502616286277771 2023-01-24 01:49:10.746271: step: 400/459, loss: 0.07230685651302338 2023-01-24 01:49:11.399902: step: 402/459, loss: 0.09598079323768616 2023-01-24 01:49:11.981701: step: 404/459, loss: 0.11200552433729172 2023-01-24 01:49:12.581921: step: 406/459, loss: 0.35171154141426086 2023-01-24 01:49:13.249659: step: 408/459, loss: 0.6112487316131592 2023-01-24 01:49:13.864885: step: 410/459, loss: 0.08580570667982101 2023-01-24 01:49:14.470988: step: 412/459, loss: 0.0693792924284935 2023-01-24 01:49:15.086532: step: 414/459, loss: 0.4321889877319336 2023-01-24 01:49:15.663831: step: 416/459, loss: 0.17081286013126373 2023-01-24 01:49:16.303148: step: 418/459, loss: 0.6067904829978943 2023-01-24 01:49:16.930548: step: 420/459, loss: 0.027009110897779465 2023-01-24 01:49:17.606103: step: 422/459, loss: 0.0900254175066948 2023-01-24 01:49:18.261958: step: 424/459, loss: 0.2595897614955902 2023-01-24 01:49:18.889084: step: 426/459, loss: 0.5619332790374756 2023-01-24 01:49:19.489034: step: 428/459, loss: 0.10177306085824966 2023-01-24 01:49:20.067084: step: 430/459, loss: 0.03195422142744064 2023-01-24 01:49:20.677867: step: 432/459, loss: 0.23108962178230286 2023-01-24 01:49:21.375319: step: 434/459, loss: 0.14750134944915771 2023-01-24 01:49:21.933481: step: 436/459, loss: 0.0558532290160656 2023-01-24 01:49:22.562866: step: 438/459, loss: 0.06379338353872299 2023-01-24 01:49:23.199575: step: 440/459, loss: 0.12274406850337982 2023-01-24 01:49:23.796422: step: 442/459, loss: 0.15684494376182556 2023-01-24 01:49:24.393966: step: 444/459, loss: 0.0475209541618824 2023-01-24 01:49:24.998846: step: 446/459, loss: 0.06925489008426666 2023-01-24 01:49:25.628558: step: 448/459, loss: 0.09914302825927734 2023-01-24 01:49:26.258251: step: 450/459, loss: 0.4216490089893341 2023-01-24 01:49:26.897494: step: 452/459, loss: 0.4556877613067627 2023-01-24 01:49:27.618439: step: 454/459, loss: 0.11346312612295151 2023-01-24 01:49:28.273558: step: 456/459, loss: 0.07898890972137451 2023-01-24 01:49:28.908301: step: 458/459, loss: 0.06417644023895264 2023-01-24 01:49:29.479962: step: 460/459, loss: 0.11428433656692505 2023-01-24 01:49:30.129630: step: 462/459, loss: 0.09117647260427475 2023-01-24 01:49:30.741120: step: 464/459, loss: 0.03944198042154312 2023-01-24 01:49:31.343596: step: 466/459, loss: 0.12720181047916412 2023-01-24 01:49:31.988218: step: 468/459, loss: 0.17071837186813354 2023-01-24 01:49:32.536430: step: 470/459, loss: 0.04911182075738907 2023-01-24 01:49:33.157715: step: 472/459, loss: 0.01934375986456871 2023-01-24 01:49:33.801385: step: 474/459, loss: 0.12653182446956635 2023-01-24 01:49:34.365440: step: 476/459, loss: 0.5617062449455261 2023-01-24 01:49:35.004486: step: 478/459, loss: 0.07976988703012466 2023-01-24 01:49:35.554368: step: 480/459, loss: 0.0431186817586422 2023-01-24 01:49:36.174172: step: 482/459, loss: 0.11377106606960297 2023-01-24 01:49:36.801551: step: 484/459, loss: 0.09005872160196304 2023-01-24 01:49:37.436506: step: 486/459, loss: 0.11476094275712967 2023-01-24 01:49:38.171799: step: 488/459, loss: 0.7353214621543884 2023-01-24 01:49:38.793111: step: 490/459, loss: 0.12986165285110474 2023-01-24 01:49:39.414127: step: 492/459, loss: 0.027242425829172134 2023-01-24 01:49:40.062533: step: 494/459, loss: 0.2828725576400757 2023-01-24 01:49:40.700969: step: 496/459, loss: 0.058652929961681366 2023-01-24 01:49:41.304826: step: 498/459, loss: 0.10764553397893906 2023-01-24 01:49:41.934300: step: 500/459, loss: 0.10296376049518585 2023-01-24 01:49:42.528334: step: 502/459, loss: 0.08665664494037628 2023-01-24 01:49:43.154296: step: 504/459, loss: 0.12862353026866913 2023-01-24 01:49:43.762246: step: 506/459, loss: 0.07835648208856583 2023-01-24 01:49:44.456699: step: 508/459, loss: 0.18854781985282898 2023-01-24 01:49:45.068340: step: 510/459, loss: 0.06129857152700424 2023-01-24 01:49:45.733291: step: 512/459, loss: 0.24677276611328125 2023-01-24 01:49:46.326753: step: 514/459, loss: 0.11707513779401779 2023-01-24 01:49:46.917582: step: 516/459, loss: 0.048116788268089294 2023-01-24 01:49:47.486543: step: 518/459, loss: 0.0991760641336441 2023-01-24 01:49:48.145795: step: 520/459, loss: 0.809011697769165 2023-01-24 01:49:48.719576: step: 522/459, loss: 0.1262689232826233 2023-01-24 01:49:49.340534: step: 524/459, loss: 0.03568524122238159 2023-01-24 01:49:49.920000: step: 526/459, loss: 0.10908528417348862 2023-01-24 01:49:50.571628: step: 528/459, loss: 0.5340299606323242 2023-01-24 01:49:51.226945: step: 530/459, loss: 0.09728576987981796 2023-01-24 01:49:51.835564: step: 532/459, loss: 0.07387314736843109 2023-01-24 01:49:52.497938: step: 534/459, loss: 0.1314755380153656 2023-01-24 01:49:53.141631: step: 536/459, loss: 0.070738285779953 2023-01-24 01:49:53.776811: step: 538/459, loss: 0.07075928896665573 2023-01-24 01:49:54.337363: step: 540/459, loss: 0.04548231512308121 2023-01-24 01:49:55.032850: step: 542/459, loss: 0.45353469252586365 2023-01-24 01:49:55.654065: step: 544/459, loss: 0.08505791425704956 2023-01-24 01:49:56.243271: step: 546/459, loss: 0.21452750265598297 2023-01-24 01:49:56.867040: step: 548/459, loss: 0.10988228768110275 2023-01-24 01:49:57.615093: step: 550/459, loss: 0.3038367033004761 2023-01-24 01:49:58.207639: step: 552/459, loss: 0.11341913044452667 2023-01-24 01:49:58.814007: step: 554/459, loss: 0.046741973608732224 2023-01-24 01:49:59.330842: step: 556/459, loss: 0.455349862575531 2023-01-24 01:49:59.892567: step: 558/459, loss: 0.08719022572040558 2023-01-24 01:50:00.550070: step: 560/459, loss: 0.2755070924758911 2023-01-24 01:50:01.351485: step: 562/459, loss: 0.14134597778320312 2023-01-24 01:50:01.965001: step: 564/459, loss: 0.08809281140565872 2023-01-24 01:50:02.538809: step: 566/459, loss: 0.09820317476987839 2023-01-24 01:50:03.210063: step: 568/459, loss: 0.09391652792692184 2023-01-24 01:50:03.821901: step: 570/459, loss: 0.39060041308403015 2023-01-24 01:50:04.427561: step: 572/459, loss: 0.7656233310699463 2023-01-24 01:50:05.043266: step: 574/459, loss: 0.08267928659915924 2023-01-24 01:50:05.659394: step: 576/459, loss: 0.048019107431173325 2023-01-24 01:50:06.255612: step: 578/459, loss: 0.09421752393245697 2023-01-24 01:50:06.896394: step: 580/459, loss: 0.04087572172284126 2023-01-24 01:50:07.602650: step: 582/459, loss: 0.11955372989177704 2023-01-24 01:50:08.240079: step: 584/459, loss: 0.05659904703497887 2023-01-24 01:50:08.825721: step: 586/459, loss: 0.24365054070949554 2023-01-24 01:50:09.411878: step: 588/459, loss: 0.10278130322694778 2023-01-24 01:50:09.946798: step: 590/459, loss: 0.04839910939335823 2023-01-24 01:50:10.637598: step: 592/459, loss: 0.18757818639278412 2023-01-24 01:50:11.229733: step: 594/459, loss: 0.18054023385047913 2023-01-24 01:50:11.860886: step: 596/459, loss: 0.09416694194078445 2023-01-24 01:50:12.533336: step: 598/459, loss: 0.09597768634557724 2023-01-24 01:50:13.171152: step: 600/459, loss: 0.09956438094377518 2023-01-24 01:50:13.755618: step: 602/459, loss: 0.06264859437942505 2023-01-24 01:50:14.397501: step: 604/459, loss: 0.08785153180360794 2023-01-24 01:50:15.073204: step: 606/459, loss: 0.10613507777452469 2023-01-24 01:50:15.709728: step: 608/459, loss: 0.08422931283712387 2023-01-24 01:50:16.353564: step: 610/459, loss: 0.055667366832494736 2023-01-24 01:50:17.020478: step: 612/459, loss: 0.149459108710289 2023-01-24 01:50:17.589552: step: 614/459, loss: 0.2561667263507843 2023-01-24 01:50:18.185123: step: 616/459, loss: 0.014382944442331791 2023-01-24 01:50:18.891921: step: 618/459, loss: 0.1527315378189087 2023-01-24 01:50:19.467793: step: 620/459, loss: 0.052585016936063766 2023-01-24 01:50:20.178366: step: 622/459, loss: 0.09421343356370926 2023-01-24 01:50:20.799198: step: 624/459, loss: 0.0293054711073637 2023-01-24 01:50:21.377165: step: 626/459, loss: 0.04551135376095772 2023-01-24 01:50:22.010185: step: 628/459, loss: 0.19119015336036682 2023-01-24 01:50:22.689030: step: 630/459, loss: 0.19080600142478943 2023-01-24 01:50:23.279764: step: 632/459, loss: 0.055394552648067474 2023-01-24 01:50:23.889259: step: 634/459, loss: 0.13019052147865295 2023-01-24 01:50:24.529651: step: 636/459, loss: 0.1013282984495163 2023-01-24 01:50:25.206907: step: 638/459, loss: 0.27969294786453247 2023-01-24 01:50:25.834976: step: 640/459, loss: 0.22533421218395233 2023-01-24 01:50:26.405610: step: 642/459, loss: 0.22325685620307922 2023-01-24 01:50:27.011204: step: 644/459, loss: 0.061694953590631485 2023-01-24 01:50:27.628003: step: 646/459, loss: 0.12384023517370224 2023-01-24 01:50:28.215082: step: 648/459, loss: 0.18374159932136536 2023-01-24 01:50:28.878877: step: 650/459, loss: 0.04642844200134277 2023-01-24 01:50:29.441451: step: 652/459, loss: 3.795876979827881 2023-01-24 01:50:30.063600: step: 654/459, loss: 0.12703970074653625 2023-01-24 01:50:30.720024: step: 656/459, loss: 0.14815233647823334 2023-01-24 01:50:31.336677: step: 658/459, loss: 0.27131423354148865 2023-01-24 01:50:31.928149: step: 660/459, loss: 0.08376535028219223 2023-01-24 01:50:32.605776: step: 662/459, loss: 0.11034833639860153 2023-01-24 01:50:33.302119: step: 664/459, loss: 0.1698511689901352 2023-01-24 01:50:33.974793: step: 666/459, loss: 0.06348570436239243 2023-01-24 01:50:34.586900: step: 668/459, loss: 0.17116065323352814 2023-01-24 01:50:35.192860: step: 670/459, loss: 0.07903848588466644 2023-01-24 01:50:35.819564: step: 672/459, loss: 0.13621783256530762 2023-01-24 01:50:36.483670: step: 674/459, loss: 0.10474523156881332 2023-01-24 01:50:37.106418: step: 676/459, loss: 0.06439889967441559 2023-01-24 01:50:37.754243: step: 678/459, loss: 0.5890564322471619 2023-01-24 01:50:38.415981: step: 680/459, loss: 0.07288604229688644 2023-01-24 01:50:39.085462: step: 682/459, loss: 0.24196261167526245 2023-01-24 01:50:39.761031: step: 684/459, loss: 0.09915906935930252 2023-01-24 01:50:40.366670: step: 686/459, loss: 0.06693477928638458 2023-01-24 01:50:41.003113: step: 688/459, loss: 0.09477308392524719 2023-01-24 01:50:41.628625: step: 690/459, loss: 0.5285822153091431 2023-01-24 01:50:42.240444: step: 692/459, loss: 0.09251757711172104 2023-01-24 01:50:42.844977: step: 694/459, loss: 0.16044504940509796 2023-01-24 01:50:43.482137: step: 696/459, loss: 0.18977223336696625 2023-01-24 01:50:44.067729: step: 698/459, loss: 0.05620906502008438 2023-01-24 01:50:44.714594: step: 700/459, loss: 0.050074223428964615 2023-01-24 01:50:45.300348: step: 702/459, loss: 0.1588631272315979 2023-01-24 01:50:45.915352: step: 704/459, loss: 0.2731025516986847 2023-01-24 01:50:46.496885: step: 706/459, loss: 1.2417627573013306 2023-01-24 01:50:47.143226: step: 708/459, loss: 0.13880658149719238 2023-01-24 01:50:47.818211: step: 710/459, loss: 0.031058253720402718 2023-01-24 01:50:48.416329: step: 712/459, loss: 0.27914634346961975 2023-01-24 01:50:49.039337: step: 714/459, loss: 0.5552509427070618 2023-01-24 01:50:49.733240: step: 716/459, loss: 0.2688225209712982 2023-01-24 01:50:50.474854: step: 718/459, loss: 0.29251009225845337 2023-01-24 01:50:51.209662: step: 720/459, loss: 0.16504669189453125 2023-01-24 01:50:51.860042: step: 722/459, loss: 0.12993067502975464 2023-01-24 01:50:52.452413: step: 724/459, loss: 0.06920645385980606 2023-01-24 01:50:53.148040: step: 726/459, loss: 0.0663805603981018 2023-01-24 01:50:53.768586: step: 728/459, loss: 0.12222901731729507 2023-01-24 01:50:54.444137: step: 730/459, loss: 0.2582072615623474 2023-01-24 01:50:55.031883: step: 732/459, loss: 0.053628019988536835 2023-01-24 01:50:55.597693: step: 734/459, loss: 0.05099410191178322 2023-01-24 01:50:56.236578: step: 736/459, loss: 0.2511037290096283 2023-01-24 01:50:56.866039: step: 738/459, loss: 0.33282971382141113 2023-01-24 01:50:57.470892: step: 740/459, loss: 1.8570572137832642 2023-01-24 01:50:58.098707: step: 742/459, loss: 0.20452454686164856 2023-01-24 01:50:58.715010: step: 744/459, loss: 0.25265881419181824 2023-01-24 01:50:59.314867: step: 746/459, loss: 0.035049859434366226 2023-01-24 01:50:59.942865: step: 748/459, loss: 0.12552158534526825 2023-01-24 01:51:00.560673: step: 750/459, loss: 0.11555209755897522 2023-01-24 01:51:01.204013: step: 752/459, loss: 0.31255215406417847 2023-01-24 01:51:01.924905: step: 754/459, loss: 0.03255574405193329 2023-01-24 01:51:02.480026: step: 756/459, loss: 0.11754315346479416 2023-01-24 01:51:03.108735: step: 758/459, loss: 0.09866669028997421 2023-01-24 01:51:03.747559: step: 760/459, loss: 0.05652007833123207 2023-01-24 01:51:04.367454: step: 762/459, loss: 0.054239556193351746 2023-01-24 01:51:05.022589: step: 764/459, loss: 0.24114306271076202 2023-01-24 01:51:05.586896: step: 766/459, loss: 0.1191919595003128 2023-01-24 01:51:06.229799: step: 768/459, loss: 0.15579940378665924 2023-01-24 01:51:06.806385: step: 770/459, loss: 0.13957533240318298 2023-01-24 01:51:07.456566: step: 772/459, loss: 0.042994435876607895 2023-01-24 01:51:08.133529: step: 774/459, loss: 0.3709518313407898 2023-01-24 01:51:08.793880: step: 776/459, loss: 0.40835461020469666 2023-01-24 01:51:09.382589: step: 778/459, loss: 0.08590472489595413 2023-01-24 01:51:10.009407: step: 780/459, loss: 0.17813268303871155 2023-01-24 01:51:10.590871: step: 782/459, loss: 0.19230616092681885 2023-01-24 01:51:11.240969: step: 784/459, loss: 0.12376800179481506 2023-01-24 01:51:11.939526: step: 786/459, loss: 0.017171841114759445 2023-01-24 01:51:12.562420: step: 788/459, loss: 0.5739054083824158 2023-01-24 01:51:13.150143: step: 790/459, loss: 0.08669405430555344 2023-01-24 01:51:13.785006: step: 792/459, loss: 0.21030326187610626 2023-01-24 01:51:14.374435: step: 794/459, loss: 0.1605069786310196 2023-01-24 01:51:14.998129: step: 796/459, loss: 0.04146850109100342 2023-01-24 01:51:15.559764: step: 798/459, loss: 0.10982034355401993 2023-01-24 01:51:16.246357: step: 800/459, loss: 0.1273861825466156 2023-01-24 01:51:16.875561: step: 802/459, loss: 0.04644719511270523 2023-01-24 01:51:17.465727: step: 804/459, loss: 0.2607214152812958 2023-01-24 01:51:18.060022: step: 806/459, loss: 0.14565302431583405 2023-01-24 01:51:18.698683: step: 808/459, loss: 0.11632492393255234 2023-01-24 01:51:19.346997: step: 810/459, loss: 0.049425795674324036 2023-01-24 01:51:19.969262: step: 812/459, loss: 0.6391122341156006 2023-01-24 01:51:20.621000: step: 814/459, loss: 0.05525633692741394 2023-01-24 01:51:21.206548: step: 816/459, loss: 0.04894682392477989 2023-01-24 01:51:21.774469: step: 818/459, loss: 0.0934777781367302 2023-01-24 01:51:22.369264: step: 820/459, loss: 0.1285315304994583 2023-01-24 01:51:22.896570: step: 822/459, loss: 0.06747044622898102 2023-01-24 01:51:23.511789: step: 824/459, loss: 0.07628713548183441 2023-01-24 01:51:24.103674: step: 826/459, loss: 0.15469160676002502 2023-01-24 01:51:24.702121: step: 828/459, loss: 0.13639603555202484 2023-01-24 01:51:25.411841: step: 830/459, loss: 0.09403097629547119 2023-01-24 01:51:26.013803: step: 832/459, loss: 0.6119087934494019 2023-01-24 01:51:26.659344: step: 834/459, loss: 0.0846157893538475 2023-01-24 01:51:27.298753: step: 836/459, loss: 0.15547087788581848 2023-01-24 01:51:27.882082: step: 838/459, loss: 0.12387694418430328 2023-01-24 01:51:28.440698: step: 840/459, loss: 0.4652208983898163 2023-01-24 01:51:29.073514: step: 842/459, loss: 0.5553849935531616 2023-01-24 01:51:29.710590: step: 844/459, loss: 0.2445918321609497 2023-01-24 01:51:30.364466: step: 846/459, loss: 0.14093023538589478 2023-01-24 01:51:30.959881: step: 848/459, loss: 0.25062164664268494 2023-01-24 01:51:31.571320: step: 850/459, loss: 0.22560374438762665 2023-01-24 01:51:32.162957: step: 852/459, loss: 0.2648850679397583 2023-01-24 01:51:32.756941: step: 854/459, loss: 0.15741512179374695 2023-01-24 01:51:33.322245: step: 856/459, loss: 0.13665825128555298 2023-01-24 01:51:33.914145: step: 858/459, loss: 0.1702839583158493 2023-01-24 01:51:34.513199: step: 860/459, loss: 0.30056437849998474 2023-01-24 01:51:35.115145: step: 862/459, loss: 0.12614190578460693 2023-01-24 01:51:35.687689: step: 864/459, loss: 0.0721745491027832 2023-01-24 01:51:36.306580: step: 866/459, loss: 0.5048778653144836 2023-01-24 01:51:36.955693: step: 868/459, loss: 0.047150369733572006 2023-01-24 01:51:37.570304: step: 870/459, loss: 0.379112184047699 2023-01-24 01:51:38.189567: step: 872/459, loss: 0.10085912048816681 2023-01-24 01:51:38.781557: step: 874/459, loss: 0.10351745784282684 2023-01-24 01:51:39.376434: step: 876/459, loss: 0.10504743456840515 2023-01-24 01:51:39.925651: step: 878/459, loss: 0.19310523569583893 2023-01-24 01:51:40.555646: step: 880/459, loss: 0.12399845570325851 2023-01-24 01:51:41.142484: step: 882/459, loss: 0.022707439959049225 2023-01-24 01:51:41.725036: step: 884/459, loss: 0.1568664312362671 2023-01-24 01:51:42.279048: step: 886/459, loss: 0.06377475708723068 2023-01-24 01:51:42.887940: step: 888/459, loss: 0.15527117252349854 2023-01-24 01:51:43.514748: step: 890/459, loss: 0.12827689945697784 2023-01-24 01:51:44.166037: step: 892/459, loss: 0.17329305410385132 2023-01-24 01:51:44.786697: step: 894/459, loss: 0.10602427273988724 2023-01-24 01:51:45.327944: step: 896/459, loss: 1.6542359590530396 2023-01-24 01:51:45.829268: step: 898/459, loss: 0.14174148440361023 2023-01-24 01:51:46.388807: step: 900/459, loss: 0.06295634806156158 2023-01-24 01:51:46.960082: step: 902/459, loss: 0.05797145143151283 2023-01-24 01:51:47.553989: step: 904/459, loss: 0.05755578726530075 2023-01-24 01:51:48.170799: step: 906/459, loss: 0.06902572512626648 2023-01-24 01:51:48.775971: step: 908/459, loss: 0.11544841527938843 2023-01-24 01:51:49.423446: step: 910/459, loss: 0.08728015422821045 2023-01-24 01:51:50.015391: step: 912/459, loss: 0.028572477400302887 2023-01-24 01:51:50.600242: step: 914/459, loss: 0.07147645205259323 2023-01-24 01:51:51.246247: step: 916/459, loss: 0.08885826915502548 2023-01-24 01:51:51.938256: step: 918/459, loss: 1.724076747894287 2023-01-24 01:51:52.420016: step: 920/459, loss: 0.011140746995806694 ================================================== Loss: 0.207 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34171414355699414, 'r': 0.32226172551769655, 'f1': 0.33170298700747286}, 'combined': 0.2444127272686642, 'epoch': 14} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35385391249923254, 'r': 0.29948908412435044, 'f1': 0.32440964306921266}, 'combined': 0.20762217156429608, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3421782178217822, 'r': 0.3278937381404175, 'f1': 0.3348837209302326}, 'combined': 0.24675642594859243, 'epoch': 14} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3490288241943122, 'r': 0.2912804187367078, 'f1': 0.3175505060558757}, 'combined': 0.20323232387576043, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3514228942603306, 'r': 0.32074841013134536, 'f1': 0.3353857383714663}, 'combined': 0.24712633353686989, 'epoch': 14} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.35809918795458123, 'r': 0.3137444615776932, 'f1': 0.3344576881672885}, 'combined': 0.23979985189352762, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25735294117647056, 'r': 0.25, 'f1': 0.25362318840579706}, 'combined': 0.1690821256038647, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2777777777777778, 'r': 0.32608695652173914, 'f1': 0.3}, 'combined': 0.15, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2727272727272727, 'r': 0.10344827586206896, 'f1': 0.15}, 'combined': 0.09999999999999999, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 15 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 01:54:26.064185: step: 2/459, loss: 0.13628347218036652 2023-01-24 01:54:26.653716: step: 4/459, loss: 0.14490050077438354 2023-01-24 01:54:27.201100: step: 6/459, loss: 0.18433257937431335 2023-01-24 01:54:27.771395: step: 8/459, loss: 0.11004992574453354 2023-01-24 01:54:28.432326: step: 10/459, loss: 0.09675870090723038 2023-01-24 01:54:29.018510: step: 12/459, loss: 0.30624130368232727 2023-01-24 01:54:29.550464: step: 14/459, loss: 0.03492650389671326 2023-01-24 01:54:30.213273: step: 16/459, loss: 0.5975997447967529 2023-01-24 01:54:30.838118: step: 18/459, loss: 0.04342330992221832 2023-01-24 01:54:31.481074: step: 20/459, loss: 0.21024519205093384 2023-01-24 01:54:32.057375: step: 22/459, loss: 0.06675899773836136 2023-01-24 01:54:32.667433: step: 24/459, loss: 0.05311901122331619 2023-01-24 01:54:33.257130: step: 26/459, loss: 0.053255315870046616 2023-01-24 01:54:33.870846: step: 28/459, loss: 0.06237369030714035 2023-01-24 01:54:34.450237: step: 30/459, loss: 0.039603497833013535 2023-01-24 01:54:35.054507: step: 32/459, loss: 0.06694106012582779 2023-01-24 01:54:35.751498: step: 34/459, loss: 0.19580602645874023 2023-01-24 01:54:36.399728: step: 36/459, loss: 0.08370650559663773 2023-01-24 01:54:37.048913: step: 38/459, loss: 0.06686625629663467 2023-01-24 01:54:37.632682: step: 40/459, loss: 0.11586157232522964 2023-01-24 01:54:38.212513: step: 42/459, loss: 0.04276254028081894 2023-01-24 01:54:38.840178: step: 44/459, loss: 0.037264756858348846 2023-01-24 01:54:39.539566: step: 46/459, loss: 0.08758223801851273 2023-01-24 01:54:40.162905: step: 48/459, loss: 0.5865753889083862 2023-01-24 01:54:40.825570: step: 50/459, loss: 0.2779299020767212 2023-01-24 01:54:41.431135: step: 52/459, loss: 0.06506766378879547 2023-01-24 01:54:41.986363: step: 54/459, loss: 0.12780053913593292 2023-01-24 01:54:42.570169: step: 56/459, loss: 0.03199302405118942 2023-01-24 01:54:43.179312: step: 58/459, loss: 0.1341077983379364 2023-01-24 01:54:43.811009: step: 60/459, loss: 0.023114070296287537 2023-01-24 01:54:44.496301: step: 62/459, loss: 0.1059298887848854 2023-01-24 01:54:45.131464: step: 64/459, loss: 0.06723789870738983 2023-01-24 01:54:45.785791: step: 66/459, loss: 0.10310249030590057 2023-01-24 01:54:46.442850: step: 68/459, loss: 0.024783069267868996 2023-01-24 01:54:47.060815: step: 70/459, loss: 0.20334839820861816 2023-01-24 01:54:47.594388: step: 72/459, loss: 0.04349376633763313 2023-01-24 01:54:48.269768: step: 74/459, loss: 0.06285911053419113 2023-01-24 01:54:48.843265: step: 76/459, loss: 0.09787029027938843 2023-01-24 01:54:49.520853: step: 78/459, loss: 0.8786290287971497 2023-01-24 01:54:50.138951: step: 80/459, loss: 0.056976333260536194 2023-01-24 01:54:50.744299: step: 82/459, loss: 0.3456345796585083 2023-01-24 01:54:51.328127: step: 84/459, loss: 0.11365010589361191 2023-01-24 01:54:51.994205: step: 86/459, loss: 0.018736111000180244 2023-01-24 01:54:52.570085: step: 88/459, loss: 0.08410734683275223 2023-01-24 01:54:53.253724: step: 90/459, loss: 0.12327621132135391 2023-01-24 01:54:53.855597: step: 92/459, loss: 0.10233281552791595 2023-01-24 01:54:54.508618: step: 94/459, loss: 0.24814076721668243 2023-01-24 01:54:55.232578: step: 96/459, loss: 0.12884032726287842 2023-01-24 01:54:55.928226: step: 98/459, loss: 0.03893974795937538 2023-01-24 01:54:56.516929: step: 100/459, loss: 0.09016532450914383 2023-01-24 01:54:57.085801: step: 102/459, loss: 0.8095468282699585 2023-01-24 01:54:57.723458: step: 104/459, loss: 0.04039709270000458 2023-01-24 01:54:58.329282: step: 106/459, loss: 0.234324112534523 2023-01-24 01:54:58.973149: step: 108/459, loss: 0.0964934304356575 2023-01-24 01:54:59.558198: step: 110/459, loss: 0.044364187866449356 2023-01-24 01:55:00.163661: step: 112/459, loss: 0.16323620080947876 2023-01-24 01:55:00.713387: step: 114/459, loss: 0.040803972631692886 2023-01-24 01:55:01.347330: step: 116/459, loss: 0.09072820097208023 2023-01-24 01:55:01.936638: step: 118/459, loss: 0.06396462023258209 2023-01-24 01:55:02.501940: step: 120/459, loss: 0.6941345930099487 2023-01-24 01:55:03.103817: step: 122/459, loss: 0.1566065102815628 2023-01-24 01:55:03.844912: step: 124/459, loss: 0.3948741555213928 2023-01-24 01:55:04.485119: step: 126/459, loss: 0.07519743591547012 2023-01-24 01:55:05.136907: step: 128/459, loss: 0.06377793848514557 2023-01-24 01:55:05.723596: step: 130/459, loss: 0.06452328711748123 2023-01-24 01:55:06.334295: step: 132/459, loss: 0.03893287479877472 2023-01-24 01:55:06.968279: step: 134/459, loss: 0.09163550287485123 2023-01-24 01:55:07.613321: step: 136/459, loss: 0.10897832363843918 2023-01-24 01:55:08.234616: step: 138/459, loss: 0.06684568524360657 2023-01-24 01:55:08.880608: step: 140/459, loss: 0.043043043464422226 2023-01-24 01:55:09.476472: step: 142/459, loss: 0.17324858903884888 2023-01-24 01:55:10.072046: step: 144/459, loss: 0.070863276720047 2023-01-24 01:55:10.678607: step: 146/459, loss: 0.06099698320031166 2023-01-24 01:55:11.314058: step: 148/459, loss: 0.4718403220176697 2023-01-24 01:55:11.923734: step: 150/459, loss: 0.1099737137556076 2023-01-24 01:55:12.555612: step: 152/459, loss: 0.359775573015213 2023-01-24 01:55:13.129314: step: 154/459, loss: 0.14161361753940582 2023-01-24 01:55:13.862516: step: 156/459, loss: 0.16305889189243317 2023-01-24 01:55:14.504441: step: 158/459, loss: 0.03383931517601013 2023-01-24 01:55:15.118683: step: 160/459, loss: 0.048995938152074814 2023-01-24 01:55:15.711309: step: 162/459, loss: 0.21852436661720276 2023-01-24 01:55:16.358870: step: 164/459, loss: 0.11325350403785706 2023-01-24 01:55:16.939729: step: 166/459, loss: 0.1643737107515335 2023-01-24 01:55:17.618595: step: 168/459, loss: 0.05087772011756897 2023-01-24 01:55:18.223578: step: 170/459, loss: 0.5875586867332458 2023-01-24 01:55:18.838429: step: 172/459, loss: 0.12939634919166565 2023-01-24 01:55:19.471917: step: 174/459, loss: 0.0880008339881897 2023-01-24 01:55:20.022943: step: 176/459, loss: 0.01276240311563015 2023-01-24 01:55:20.606169: step: 178/459, loss: 0.14827550947666168 2023-01-24 01:55:21.229747: step: 180/459, loss: 0.21899206936359406 2023-01-24 01:55:21.888334: step: 182/459, loss: 0.013766929507255554 2023-01-24 01:55:22.504391: step: 184/459, loss: 0.02085316926240921 2023-01-24 01:55:23.107422: step: 186/459, loss: 0.07372023910284042 2023-01-24 01:55:23.732963: step: 188/459, loss: 0.15617088973522186 2023-01-24 01:55:24.333443: step: 190/459, loss: 0.17092062532901764 2023-01-24 01:55:24.917639: step: 192/459, loss: 0.1659872978925705 2023-01-24 01:55:25.610150: step: 194/459, loss: 0.0672636330127716 2023-01-24 01:55:26.243432: step: 196/459, loss: 0.0559796579182148 2023-01-24 01:55:26.813111: step: 198/459, loss: 0.9925041794776917 2023-01-24 01:55:27.405687: step: 200/459, loss: 0.12009792774915695 2023-01-24 01:55:28.053224: step: 202/459, loss: 0.06060430780053139 2023-01-24 01:55:28.682388: step: 204/459, loss: 0.5011839866638184 2023-01-24 01:55:29.359753: step: 206/459, loss: 0.4937766492366791 2023-01-24 01:55:29.940399: step: 208/459, loss: 0.14324666559696198 2023-01-24 01:55:30.538360: step: 210/459, loss: 0.13871672749519348 2023-01-24 01:55:31.165768: step: 212/459, loss: 0.16501972079277039 2023-01-24 01:55:31.791116: step: 214/459, loss: 0.18192964792251587 2023-01-24 01:55:32.478119: step: 216/459, loss: 0.4802156388759613 2023-01-24 01:55:33.221940: step: 218/459, loss: 0.16074521839618683 2023-01-24 01:55:33.801188: step: 220/459, loss: 0.2012537270784378 2023-01-24 01:55:34.412564: step: 222/459, loss: 0.06403632462024689 2023-01-24 01:55:35.090219: step: 224/459, loss: 0.1150350496172905 2023-01-24 01:55:35.708157: step: 226/459, loss: 0.2918533682823181 2023-01-24 01:55:36.287889: step: 228/459, loss: 0.09867390245199203 2023-01-24 01:55:36.881498: step: 230/459, loss: 0.18079960346221924 2023-01-24 01:55:37.506933: step: 232/459, loss: 0.09180627763271332 2023-01-24 01:55:38.110822: step: 234/459, loss: 0.06375924497842789 2023-01-24 01:55:38.694994: step: 236/459, loss: 0.02362692914903164 2023-01-24 01:55:39.360538: step: 238/459, loss: 0.0786290317773819 2023-01-24 01:55:39.927482: step: 240/459, loss: 0.2131921350955963 2023-01-24 01:55:40.517774: step: 242/459, loss: 0.3947404623031616 2023-01-24 01:55:41.128754: step: 244/459, loss: 0.11720526218414307 2023-01-24 01:55:41.755978: step: 246/459, loss: 0.20457197725772858 2023-01-24 01:55:42.310262: step: 248/459, loss: 0.07210832834243774 2023-01-24 01:55:42.951335: step: 250/459, loss: 0.09259874373674393 2023-01-24 01:55:43.585726: step: 252/459, loss: 0.2299267053604126 2023-01-24 01:55:44.222202: step: 254/459, loss: 0.05470795929431915 2023-01-24 01:55:44.821289: step: 256/459, loss: 0.09055405855178833 2023-01-24 01:55:45.446824: step: 258/459, loss: 0.06286013871431351 2023-01-24 01:55:46.048451: step: 260/459, loss: 0.7208109498023987 2023-01-24 01:55:46.737500: step: 262/459, loss: 0.09166919440031052 2023-01-24 01:55:47.410599: step: 264/459, loss: 0.18493616580963135 2023-01-24 01:55:48.045296: step: 266/459, loss: 0.08869848400354385 2023-01-24 01:55:48.680291: step: 268/459, loss: 0.1829054206609726 2023-01-24 01:55:49.343135: step: 270/459, loss: 0.20508040487766266 2023-01-24 01:55:50.031806: step: 272/459, loss: 0.0527411513030529 2023-01-24 01:55:50.679867: step: 274/459, loss: 0.05155780166387558 2023-01-24 01:55:51.352239: step: 276/459, loss: 0.21164384484291077 2023-01-24 01:55:51.983235: step: 278/459, loss: 0.04325151443481445 2023-01-24 01:55:52.585863: step: 280/459, loss: 0.0701708123087883 2023-01-24 01:55:53.179359: step: 282/459, loss: 0.206478551030159 2023-01-24 01:55:53.774541: step: 284/459, loss: 0.09134501218795776 2023-01-24 01:55:54.347001: step: 286/459, loss: 0.19882096350193024 2023-01-24 01:55:54.965837: step: 288/459, loss: 0.06346333771944046 2023-01-24 01:55:55.592480: step: 290/459, loss: 0.36140143871307373 2023-01-24 01:55:56.193810: step: 292/459, loss: 0.0324077270925045 2023-01-24 01:55:56.834791: step: 294/459, loss: 0.18531343340873718 2023-01-24 01:55:57.384529: step: 296/459, loss: 0.09940654039382935 2023-01-24 01:55:58.048540: step: 298/459, loss: 0.13422198593616486 2023-01-24 01:55:58.648220: step: 300/459, loss: 0.13124004006385803 2023-01-24 01:55:59.240939: step: 302/459, loss: 0.4274349510669708 2023-01-24 01:55:59.913893: step: 304/459, loss: 0.04422704502940178 2023-01-24 01:56:00.497707: step: 306/459, loss: 0.11244460195302963 2023-01-24 01:56:01.060601: step: 308/459, loss: 0.11207491904497147 2023-01-24 01:56:01.694030: step: 310/459, loss: 0.06840714812278748 2023-01-24 01:56:02.333628: step: 312/459, loss: 0.04744204506278038 2023-01-24 01:56:02.873128: step: 314/459, loss: 0.06617796421051025 2023-01-24 01:56:03.589315: step: 316/459, loss: 0.3011752963066101 2023-01-24 01:56:04.240262: step: 318/459, loss: 0.05265146493911743 2023-01-24 01:56:04.849323: step: 320/459, loss: 0.04168803617358208 2023-01-24 01:56:05.471215: step: 322/459, loss: 0.051453713327646255 2023-01-24 01:56:06.070002: step: 324/459, loss: 0.0317925289273262 2023-01-24 01:56:06.685560: step: 326/459, loss: 0.09795232862234116 2023-01-24 01:56:07.285204: step: 328/459, loss: 0.044954050332307816 2023-01-24 01:56:08.027370: step: 330/459, loss: 0.0494869090616703 2023-01-24 01:56:08.621799: step: 332/459, loss: 0.42663392424583435 2023-01-24 01:56:09.275261: step: 334/459, loss: 0.16341976821422577 2023-01-24 01:56:09.875663: step: 336/459, loss: 0.13116799294948578 2023-01-24 01:56:10.516450: step: 338/459, loss: 0.18971537053585052 2023-01-24 01:56:11.150424: step: 340/459, loss: 0.1080528050661087 2023-01-24 01:56:11.757348: step: 342/459, loss: 0.1546853929758072 2023-01-24 01:56:12.427613: step: 344/459, loss: 0.5715345740318298 2023-01-24 01:56:13.024784: step: 346/459, loss: 0.029792632907629013 2023-01-24 01:56:13.675719: step: 348/459, loss: 0.0854974314570427 2023-01-24 01:56:14.329537: step: 350/459, loss: 0.4123022258281708 2023-01-24 01:56:14.978308: step: 352/459, loss: 0.4474159777164459 2023-01-24 01:56:15.625576: step: 354/459, loss: 0.19853250682353973 2023-01-24 01:56:16.287115: step: 356/459, loss: 0.14895734190940857 2023-01-24 01:56:16.900715: step: 358/459, loss: 0.06283043324947357 2023-01-24 01:56:17.501102: step: 360/459, loss: 0.07509107142686844 2023-01-24 01:56:18.093381: step: 362/459, loss: 0.05232227221131325 2023-01-24 01:56:18.654300: step: 364/459, loss: 0.16109013557434082 2023-01-24 01:56:19.294494: step: 366/459, loss: 0.10614386200904846 2023-01-24 01:56:19.939338: step: 368/459, loss: 0.13438569009304047 2023-01-24 01:56:20.496918: step: 370/459, loss: 0.15157823264598846 2023-01-24 01:56:21.114493: step: 372/459, loss: 0.20248815417289734 2023-01-24 01:56:21.690904: step: 374/459, loss: 0.33196160197257996 2023-01-24 01:56:22.304387: step: 376/459, loss: 0.14353953301906586 2023-01-24 01:56:22.973542: step: 378/459, loss: 0.03218647465109825 2023-01-24 01:56:23.667263: step: 380/459, loss: 0.10228210687637329 2023-01-24 01:56:24.351774: step: 382/459, loss: 0.3121955096721649 2023-01-24 01:56:24.982311: step: 384/459, loss: 0.12069398164749146 2023-01-24 01:56:25.666573: step: 386/459, loss: 0.03091898187994957 2023-01-24 01:56:26.214218: step: 388/459, loss: 0.17869509756565094 2023-01-24 01:56:26.840666: step: 390/459, loss: 0.03217653930187225 2023-01-24 01:56:27.418125: step: 392/459, loss: 0.09062366187572479 2023-01-24 01:56:28.061851: step: 394/459, loss: 0.09386659413576126 2023-01-24 01:56:28.639218: step: 396/459, loss: 0.2466583549976349 2023-01-24 01:56:29.292486: step: 398/459, loss: 0.10122314095497131 2023-01-24 01:56:29.900717: step: 400/459, loss: 0.06470135599374771 2023-01-24 01:56:30.563147: step: 402/459, loss: 0.34764474630355835 2023-01-24 01:56:31.139309: step: 404/459, loss: 0.03053687885403633 2023-01-24 01:56:31.747877: step: 406/459, loss: 0.06287901848554611 2023-01-24 01:56:32.360179: step: 408/459, loss: 0.1734040379524231 2023-01-24 01:56:32.949031: step: 410/459, loss: 0.047398727387189865 2023-01-24 01:56:33.566215: step: 412/459, loss: 0.1024894267320633 2023-01-24 01:56:34.197341: step: 414/459, loss: 0.1600847989320755 2023-01-24 01:56:34.772651: step: 416/459, loss: 0.15863695740699768 2023-01-24 01:56:35.427618: step: 418/459, loss: 0.715982973575592 2023-01-24 01:56:36.021479: step: 420/459, loss: 0.058382149785757065 2023-01-24 01:56:36.683317: step: 422/459, loss: 0.0940161794424057 2023-01-24 01:56:37.236528: step: 424/459, loss: 0.08690014481544495 2023-01-24 01:56:37.826986: step: 426/459, loss: 0.10679380595684052 2023-01-24 01:56:38.367553: step: 428/459, loss: 0.07676704972982407 2023-01-24 01:56:38.983685: step: 430/459, loss: 0.06543120741844177 2023-01-24 01:56:39.641514: step: 432/459, loss: 0.0788215771317482 2023-01-24 01:56:40.290695: step: 434/459, loss: 0.04484344646334648 2023-01-24 01:56:40.916901: step: 436/459, loss: 0.05317992717027664 2023-01-24 01:56:41.566844: step: 438/459, loss: 2.096122980117798 2023-01-24 01:56:42.245865: step: 440/459, loss: 0.06707759946584702 2023-01-24 01:56:42.905956: step: 442/459, loss: 0.07002007961273193 2023-01-24 01:56:43.506508: step: 444/459, loss: 0.1263730674982071 2023-01-24 01:56:44.115049: step: 446/459, loss: 0.3093484938144684 2023-01-24 01:56:44.774289: step: 448/459, loss: 0.28750079870224 2023-01-24 01:56:45.432733: step: 450/459, loss: 0.19896189868450165 2023-01-24 01:56:46.021986: step: 452/459, loss: 0.19633302092552185 2023-01-24 01:56:46.740067: step: 454/459, loss: 0.04098625108599663 2023-01-24 01:56:47.437926: step: 456/459, loss: 0.07781156152486801 2023-01-24 01:56:48.056908: step: 458/459, loss: 0.014938807114958763 2023-01-24 01:56:48.666265: step: 460/459, loss: 0.2913772463798523 2023-01-24 01:56:49.265708: step: 462/459, loss: 0.015264871530234814 2023-01-24 01:56:49.851224: step: 464/459, loss: 0.07363523542881012 2023-01-24 01:56:50.487387: step: 466/459, loss: 0.03559623286128044 2023-01-24 01:56:51.088527: step: 468/459, loss: 0.1781499683856964 2023-01-24 01:56:51.685372: step: 470/459, loss: 0.05921219661831856 2023-01-24 01:56:52.232455: step: 472/459, loss: 0.06676816195249557 2023-01-24 01:56:52.921578: step: 474/459, loss: 0.054286617785692215 2023-01-24 01:56:53.508689: step: 476/459, loss: 0.044702935963869095 2023-01-24 01:56:54.126240: step: 478/459, loss: 0.18808616697788239 2023-01-24 01:56:54.739682: step: 480/459, loss: 0.12048882246017456 2023-01-24 01:56:55.324813: step: 482/459, loss: 0.09272011369466782 2023-01-24 01:56:55.976390: step: 484/459, loss: 0.08296084403991699 2023-01-24 01:56:56.575196: step: 486/459, loss: 0.08832576870918274 2023-01-24 01:56:57.195498: step: 488/459, loss: 0.07659371942281723 2023-01-24 01:56:57.824790: step: 490/459, loss: 0.07589676976203918 2023-01-24 01:56:58.437679: step: 492/459, loss: 0.1067175418138504 2023-01-24 01:56:59.039266: step: 494/459, loss: 0.1452016979455948 2023-01-24 01:56:59.693796: step: 496/459, loss: 0.07576659321784973 2023-01-24 01:57:00.332474: step: 498/459, loss: 0.0627782866358757 2023-01-24 01:57:00.939905: step: 500/459, loss: 0.04427836090326309 2023-01-24 01:57:01.621278: step: 502/459, loss: 0.07662472873926163 2023-01-24 01:57:02.203416: step: 504/459, loss: 1.695873737335205 2023-01-24 01:57:02.755982: step: 506/459, loss: 0.056444939225912094 2023-01-24 01:57:03.407776: step: 508/459, loss: 0.07366358488798141 2023-01-24 01:57:03.959020: step: 510/459, loss: 0.08690348267555237 2023-01-24 01:57:04.534571: step: 512/459, loss: 0.04207029193639755 2023-01-24 01:57:05.103150: step: 514/459, loss: 0.09823302179574966 2023-01-24 01:57:05.715223: step: 516/459, loss: 0.1393795758485794 2023-01-24 01:57:06.351697: step: 518/459, loss: 0.4692433774471283 2023-01-24 01:57:06.972562: step: 520/459, loss: 0.10861486196517944 2023-01-24 01:57:07.558741: step: 522/459, loss: 0.04040699079632759 2023-01-24 01:57:08.175121: step: 524/459, loss: 0.07447831332683563 2023-01-24 01:57:08.709456: step: 526/459, loss: 0.04401032626628876 2023-01-24 01:57:09.366694: step: 528/459, loss: 0.058455996215343475 2023-01-24 01:57:09.958018: step: 530/459, loss: 0.0737372487783432 2023-01-24 01:57:10.573514: step: 532/459, loss: 0.07130859792232513 2023-01-24 01:57:11.313110: step: 534/459, loss: 0.1886870115995407 2023-01-24 01:57:11.901416: step: 536/459, loss: 0.10513345152139664 2023-01-24 01:57:12.507696: step: 538/459, loss: 0.04575610160827637 2023-01-24 01:57:13.067797: step: 540/459, loss: 0.19838441908359528 2023-01-24 01:57:13.574706: step: 542/459, loss: 0.039040178060531616 2023-01-24 01:57:14.191252: step: 544/459, loss: 0.038640089333057404 2023-01-24 01:57:14.841625: step: 546/459, loss: 0.04424406588077545 2023-01-24 01:57:15.424893: step: 548/459, loss: 0.030607495456933975 2023-01-24 01:57:16.064731: step: 550/459, loss: 0.25527504086494446 2023-01-24 01:57:16.678985: step: 552/459, loss: 0.10036134719848633 2023-01-24 01:57:17.289364: step: 554/459, loss: 0.03512859717011452 2023-01-24 01:57:17.901213: step: 556/459, loss: 0.06809733062982559 2023-01-24 01:57:18.562941: step: 558/459, loss: 0.7518518567085266 2023-01-24 01:57:19.234894: step: 560/459, loss: 0.08092666417360306 2023-01-24 01:57:19.825119: step: 562/459, loss: 0.09481794387102127 2023-01-24 01:57:20.485052: step: 564/459, loss: 0.06523492187261581 2023-01-24 01:57:21.136218: step: 566/459, loss: 0.07845529913902283 2023-01-24 01:57:21.730578: step: 568/459, loss: 0.023996956646442413 2023-01-24 01:57:22.331497: step: 570/459, loss: 0.16907979547977448 2023-01-24 01:57:23.144539: step: 572/459, loss: 0.06361133605241776 2023-01-24 01:57:23.766359: step: 574/459, loss: 0.014396407641470432 2023-01-24 01:57:24.370682: step: 576/459, loss: 0.06518371403217316 2023-01-24 01:57:25.010386: step: 578/459, loss: 0.15005485713481903 2023-01-24 01:57:25.628810: step: 580/459, loss: 0.23836900293827057 2023-01-24 01:57:26.261762: step: 582/459, loss: 0.07504424452781677 2023-01-24 01:57:26.905679: step: 584/459, loss: 0.08799760043621063 2023-01-24 01:57:27.551621: step: 586/459, loss: 0.515514075756073 2023-01-24 01:57:28.192176: step: 588/459, loss: 0.3567342162132263 2023-01-24 01:57:28.796468: step: 590/459, loss: 0.146470308303833 2023-01-24 01:57:29.406491: step: 592/459, loss: 0.02376694232225418 2023-01-24 01:57:30.001406: step: 594/459, loss: 0.07308794558048248 2023-01-24 01:57:30.738516: step: 596/459, loss: 0.2451782375574112 2023-01-24 01:57:31.382313: step: 598/459, loss: 0.2963593900203705 2023-01-24 01:57:31.985266: step: 600/459, loss: 0.045235250145196915 2023-01-24 01:57:32.551888: step: 602/459, loss: 0.04254304617643356 2023-01-24 01:57:33.165658: step: 604/459, loss: 0.12257275730371475 2023-01-24 01:57:33.811978: step: 606/459, loss: 0.5032672882080078 2023-01-24 01:57:34.444050: step: 608/459, loss: 0.21742336452007294 2023-01-24 01:57:35.043653: step: 610/459, loss: 0.05048718303442001 2023-01-24 01:57:35.622951: step: 612/459, loss: 0.05267229676246643 2023-01-24 01:57:36.241931: step: 614/459, loss: 0.1419040858745575 2023-01-24 01:57:36.895179: step: 616/459, loss: 0.06099243089556694 2023-01-24 01:57:37.499542: step: 618/459, loss: 0.12863244116306305 2023-01-24 01:57:38.144123: step: 620/459, loss: 0.38623982667922974 2023-01-24 01:57:38.725496: step: 622/459, loss: 0.16319817304611206 2023-01-24 01:57:39.368552: step: 624/459, loss: 0.09035225212574005 2023-01-24 01:57:40.023068: step: 626/459, loss: 0.251569539308548 2023-01-24 01:57:40.598900: step: 628/459, loss: 0.34488677978515625 2023-01-24 01:57:41.162381: step: 630/459, loss: 0.16378305852413177 2023-01-24 01:57:41.729553: step: 632/459, loss: 0.11786740273237228 2023-01-24 01:57:42.327355: step: 634/459, loss: 0.01052634697407484 2023-01-24 01:57:42.947169: step: 636/459, loss: 0.17594026029109955 2023-01-24 01:57:43.495588: step: 638/459, loss: 0.04015747085213661 2023-01-24 01:57:44.119639: step: 640/459, loss: 0.17890338599681854 2023-01-24 01:57:44.726990: step: 642/459, loss: 0.06940944492816925 2023-01-24 01:57:45.378991: step: 644/459, loss: 0.07677511125802994 2023-01-24 01:57:45.998977: step: 646/459, loss: 0.10732200741767883 2023-01-24 01:57:46.604048: step: 648/459, loss: 0.07929249107837677 2023-01-24 01:57:47.242720: step: 650/459, loss: 0.061114728450775146 2023-01-24 01:57:47.854626: step: 652/459, loss: 0.08525842428207397 2023-01-24 01:57:48.436383: step: 654/459, loss: 0.11310896277427673 2023-01-24 01:57:49.121753: step: 656/459, loss: 0.017102546989917755 2023-01-24 01:57:49.755141: step: 658/459, loss: 0.035074543207883835 2023-01-24 01:57:50.376289: step: 660/459, loss: 0.08847036212682724 2023-01-24 01:57:51.012230: step: 662/459, loss: 0.08012127876281738 2023-01-24 01:57:51.584700: step: 664/459, loss: 0.12504224479198456 2023-01-24 01:57:52.179553: step: 666/459, loss: 0.1580563336610794 2023-01-24 01:57:52.815530: step: 668/459, loss: 0.08750860393047333 2023-01-24 01:57:53.410754: step: 670/459, loss: 0.05413184314966202 2023-01-24 01:57:54.037300: step: 672/459, loss: 0.17403705418109894 2023-01-24 01:57:54.578095: step: 674/459, loss: 0.07680046558380127 2023-01-24 01:57:55.156617: step: 676/459, loss: 0.04049000144004822 2023-01-24 01:57:55.798853: step: 678/459, loss: 0.09736716747283936 2023-01-24 01:57:56.419526: step: 680/459, loss: 0.4562024772167206 2023-01-24 01:57:57.132173: step: 682/459, loss: 0.3094898760318756 2023-01-24 01:57:57.810882: step: 684/459, loss: 0.2012021243572235 2023-01-24 01:57:58.424836: step: 686/459, loss: 0.1290193647146225 2023-01-24 01:57:59.033669: step: 688/459, loss: 0.19675031304359436 2023-01-24 01:57:59.634840: step: 690/459, loss: 0.046348586678504944 2023-01-24 01:58:00.277764: step: 692/459, loss: 0.20186521112918854 2023-01-24 01:58:00.899211: step: 694/459, loss: 0.06724777817726135 2023-01-24 01:58:01.522470: step: 696/459, loss: 0.3859793543815613 2023-01-24 01:58:02.116176: step: 698/459, loss: 0.10704749822616577 2023-01-24 01:58:02.741593: step: 700/459, loss: 0.0998850017786026 2023-01-24 01:58:03.367363: step: 702/459, loss: 0.07905120402574539 2023-01-24 01:58:03.981877: step: 704/459, loss: 0.06940245628356934 2023-01-24 01:58:04.631837: step: 706/459, loss: 0.1839239001274109 2023-01-24 01:58:05.255925: step: 708/459, loss: 0.0688069686293602 2023-01-24 01:58:05.856097: step: 710/459, loss: 0.09637168049812317 2023-01-24 01:58:06.495372: step: 712/459, loss: 0.18127182126045227 2023-01-24 01:58:07.094607: step: 714/459, loss: 0.06018897145986557 2023-01-24 01:58:07.688803: step: 716/459, loss: 0.0894615426659584 2023-01-24 01:58:08.361548: step: 718/459, loss: 0.03716019168496132 2023-01-24 01:58:09.016208: step: 720/459, loss: 0.10701368004083633 2023-01-24 01:58:09.626220: step: 722/459, loss: 0.2088770717382431 2023-01-24 01:58:10.269380: step: 724/459, loss: 0.03356407210230827 2023-01-24 01:58:10.880046: step: 726/459, loss: 0.029769890010356903 2023-01-24 01:58:11.506851: step: 728/459, loss: 0.5364834070205688 2023-01-24 01:58:12.147801: step: 730/459, loss: 0.10892283916473389 2023-01-24 01:58:12.739290: step: 732/459, loss: 0.025713764131069183 2023-01-24 01:58:13.334657: step: 734/459, loss: 0.10682553797960281 2023-01-24 01:58:13.917236: step: 736/459, loss: 0.24941667914390564 2023-01-24 01:58:14.521944: step: 738/459, loss: 0.13459408283233643 2023-01-24 01:58:15.185044: step: 740/459, loss: 0.08415152877569199 2023-01-24 01:58:15.805406: step: 742/459, loss: 0.20878447592258453 2023-01-24 01:58:16.407371: step: 744/459, loss: 0.056178413331508636 2023-01-24 01:58:17.044487: step: 746/459, loss: 0.07127678394317627 2023-01-24 01:58:17.646526: step: 748/459, loss: 0.14733676612377167 2023-01-24 01:58:18.211203: step: 750/459, loss: 0.16625738143920898 2023-01-24 01:58:18.894922: step: 752/459, loss: 0.29621732234954834 2023-01-24 01:58:19.543781: step: 754/459, loss: 0.0744377076625824 2023-01-24 01:58:20.183952: step: 756/459, loss: 0.030758928507566452 2023-01-24 01:58:20.949155: step: 758/459, loss: 0.08607392013072968 2023-01-24 01:58:21.547691: step: 760/459, loss: 0.3721146881580353 2023-01-24 01:58:22.151497: step: 762/459, loss: 0.10926148295402527 2023-01-24 01:58:22.724110: step: 764/459, loss: 0.3267406225204468 2023-01-24 01:58:23.345280: step: 766/459, loss: 0.2642576992511749 2023-01-24 01:58:23.980774: step: 768/459, loss: 0.15583254396915436 2023-01-24 01:58:24.569329: step: 770/459, loss: 0.21106745302677155 2023-01-24 01:58:25.169379: step: 772/459, loss: 0.0549163818359375 2023-01-24 01:58:25.847398: step: 774/459, loss: 0.2670581340789795 2023-01-24 01:58:26.406618: step: 776/459, loss: 0.13981038331985474 2023-01-24 01:58:26.981180: step: 778/459, loss: 0.03955908864736557 2023-01-24 01:58:27.611600: step: 780/459, loss: 0.038967572152614594 2023-01-24 01:58:28.229188: step: 782/459, loss: 0.053363364189863205 2023-01-24 01:58:28.807900: step: 784/459, loss: 3.7635951042175293 2023-01-24 01:58:29.382599: step: 786/459, loss: 0.06063844636082649 2023-01-24 01:58:30.044573: step: 788/459, loss: 0.1619468480348587 2023-01-24 01:58:30.640823: step: 790/459, loss: 0.11203496158123016 2023-01-24 01:58:31.276615: step: 792/459, loss: 0.07697413861751556 2023-01-24 01:58:31.885470: step: 794/459, loss: 0.07045766711235046 2023-01-24 01:58:32.460259: step: 796/459, loss: 0.13583076000213623 2023-01-24 01:58:33.073574: step: 798/459, loss: 10.052125930786133 2023-01-24 01:58:33.682437: step: 800/459, loss: 0.09314850717782974 2023-01-24 01:58:34.322318: step: 802/459, loss: 0.09957461059093475 2023-01-24 01:58:34.963247: step: 804/459, loss: 0.0988377183675766 2023-01-24 01:58:35.575562: step: 806/459, loss: 0.09559522569179535 2023-01-24 01:58:36.223102: step: 808/459, loss: 0.18739748001098633 2023-01-24 01:58:36.928692: step: 810/459, loss: 0.15656334161758423 2023-01-24 01:58:37.501801: step: 812/459, loss: 3.5893144607543945 2023-01-24 01:58:38.159607: step: 814/459, loss: 0.20958146452903748 2023-01-24 01:58:38.858671: step: 816/459, loss: 2.297520160675049 2023-01-24 01:58:39.428193: step: 818/459, loss: 0.029120156541466713 2023-01-24 01:58:40.031621: step: 820/459, loss: 0.4897439777851105 2023-01-24 01:58:40.576738: step: 822/459, loss: 0.0700531154870987 2023-01-24 01:58:41.197706: step: 824/459, loss: 0.21913203597068787 2023-01-24 01:58:41.882904: step: 826/459, loss: 5.851499080657959 2023-01-24 01:58:42.482750: step: 828/459, loss: 0.040644191205501556 2023-01-24 01:58:43.130592: step: 830/459, loss: 0.2286517322063446 2023-01-24 01:58:43.766539: step: 832/459, loss: 0.0852484330534935 2023-01-24 01:58:44.376473: step: 834/459, loss: 0.1810971349477768 2023-01-24 01:58:45.021401: step: 836/459, loss: 0.08672875165939331 2023-01-24 01:58:45.623754: step: 838/459, loss: 0.13800406455993652 2023-01-24 01:58:46.288557: step: 840/459, loss: 0.37456557154655457 2023-01-24 01:58:46.946095: step: 842/459, loss: 0.028130261227488518 2023-01-24 01:58:47.696182: step: 844/459, loss: 0.1422521471977234 2023-01-24 01:58:48.243034: step: 846/459, loss: 0.006248687859624624 2023-01-24 01:58:48.886917: step: 848/459, loss: 0.1841914802789688 2023-01-24 01:58:49.522479: step: 850/459, loss: 0.26615098118782043 2023-01-24 01:58:50.131816: step: 852/459, loss: 0.05570116266608238 2023-01-24 01:58:50.749827: step: 854/459, loss: 0.5031582713127136 2023-01-24 01:58:51.354589: step: 856/459, loss: 0.11038310825824738 2023-01-24 01:58:51.987534: step: 858/459, loss: 0.12240522354841232 2023-01-24 01:58:52.603179: step: 860/459, loss: 0.05975285544991493 2023-01-24 01:58:53.222014: step: 862/459, loss: 0.3003198802471161 2023-01-24 01:58:53.793378: step: 864/459, loss: 0.09920331090688705 2023-01-24 01:58:54.396764: step: 866/459, loss: 0.05031297728419304 2023-01-24 01:58:55.059782: step: 868/459, loss: 0.027948837727308273 2023-01-24 01:58:55.720457: step: 870/459, loss: 0.07983909547328949 2023-01-24 01:58:56.363304: step: 872/459, loss: 0.22517846524715424 2023-01-24 01:58:57.074249: step: 874/459, loss: 0.025913279503583908 2023-01-24 01:58:57.689957: step: 876/459, loss: 0.05934268236160278 2023-01-24 01:58:58.291325: step: 878/459, loss: 0.2111596167087555 2023-01-24 01:58:58.917803: step: 880/459, loss: 0.2264663428068161 2023-01-24 01:58:59.472030: step: 882/459, loss: 0.3631376028060913 2023-01-24 01:59:00.031501: step: 884/459, loss: 0.03527398407459259 2023-01-24 01:59:00.683037: step: 886/459, loss: 0.17488062381744385 2023-01-24 01:59:01.316995: step: 888/459, loss: 0.15069524943828583 2023-01-24 01:59:01.916114: step: 890/459, loss: 0.17181895673274994 2023-01-24 01:59:02.596636: step: 892/459, loss: 0.0548144094645977 2023-01-24 01:59:03.227046: step: 894/459, loss: 0.383242666721344 2023-01-24 01:59:03.812374: step: 896/459, loss: 0.04169578105211258 2023-01-24 01:59:04.437217: step: 898/459, loss: 0.08496256172657013 2023-01-24 01:59:05.068919: step: 900/459, loss: 0.08051106333732605 2023-01-24 01:59:05.723775: step: 902/459, loss: 0.21713100373744965 2023-01-24 01:59:06.314957: step: 904/459, loss: 0.164235919713974 2023-01-24 01:59:06.973083: step: 906/459, loss: 0.07511423528194427 2023-01-24 01:59:07.562937: step: 908/459, loss: 0.05483529716730118 2023-01-24 01:59:08.138315: step: 910/459, loss: 0.07203560322523117 2023-01-24 01:59:08.757308: step: 912/459, loss: 0.05350668355822563 2023-01-24 01:59:09.368641: step: 914/459, loss: 0.047764841467142105 2023-01-24 01:59:09.970633: step: 916/459, loss: 0.019942093640565872 2023-01-24 01:59:10.588298: step: 918/459, loss: 0.11396947503089905 2023-01-24 01:59:11.081650: step: 920/459, loss: 0.01304656732827425 ================================================== Loss: 0.205 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32548413956935623, 'r': 0.3236312886799671, 'f1': 0.3245550697133067}, 'combined': 0.23914584084138388, 'epoch': 15} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33577145209520615, 'r': 0.29303690364672536, 'f1': 0.3129520330207746}, 'combined': 0.20028930113329574, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3152174297700496, 'r': 0.3211987851736558, 'f1': 0.31817999959871546}, 'combined': 0.23444842075694822, 'epoch': 15} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3403744814711576, 'r': 0.29117489733123575, 'f1': 0.31385829207678523}, 'combined': 0.2008693069291425, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32774107587862694, 'r': 0.3190344818325154, 'f1': 0.32332917678026085}, 'combined': 0.23824255131177113, 'epoch': 15} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.35199624041505345, 'r': 0.31545018266704244, 'f1': 0.3327226710551514}, 'combined': 0.23855587736029726, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25203252032520324, 'r': 0.2952380952380952, 'f1': 0.27192982456140347}, 'combined': 0.1812865497076023, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2661290322580645, 'r': 0.358695652173913, 'f1': 0.30555555555555547}, 'combined': 0.15277777777777773, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3125, 'r': 0.1724137931034483, 'f1': 0.22222222222222224}, 'combined': 0.14814814814814814, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 16 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:01:44.450065: step: 2/459, loss: 0.8879448771476746 2023-01-24 02:01:45.031778: step: 4/459, loss: 0.04727478325366974 2023-01-24 02:01:45.626905: step: 6/459, loss: 0.07473929226398468 2023-01-24 02:01:46.291068: step: 8/459, loss: 0.007892918772995472 2023-01-24 02:01:47.005585: step: 10/459, loss: 0.1245039775967598 2023-01-24 02:01:47.613079: step: 12/459, loss: 0.053387757390737534 2023-01-24 02:01:48.237740: step: 14/459, loss: 0.2125917226076126 2023-01-24 02:01:48.815472: step: 16/459, loss: 0.17098701000213623 2023-01-24 02:01:49.484476: step: 18/459, loss: 0.2840515971183777 2023-01-24 02:01:50.055135: step: 20/459, loss: 0.06563922017812729 2023-01-24 02:01:50.646173: step: 22/459, loss: 0.11207056045532227 2023-01-24 02:01:51.242395: step: 24/459, loss: 0.05409586802124977 2023-01-24 02:01:51.843295: step: 26/459, loss: 0.12479449063539505 2023-01-24 02:01:52.502849: step: 28/459, loss: 0.09201860427856445 2023-01-24 02:01:53.067068: step: 30/459, loss: 0.13400150835514069 2023-01-24 02:01:53.649252: step: 32/459, loss: 0.07436249405145645 2023-01-24 02:01:54.242227: step: 34/459, loss: 0.2398362159729004 2023-01-24 02:01:54.867682: step: 36/459, loss: 0.06600896269083023 2023-01-24 02:01:55.492547: step: 38/459, loss: 0.11355055868625641 2023-01-24 02:01:56.107777: step: 40/459, loss: 0.7200385928153992 2023-01-24 02:01:56.676228: step: 42/459, loss: 0.12218756973743439 2023-01-24 02:01:57.346717: step: 44/459, loss: 0.1020936667919159 2023-01-24 02:01:58.011778: step: 46/459, loss: 0.2365526258945465 2023-01-24 02:01:58.563973: step: 48/459, loss: 0.02653694525361061 2023-01-24 02:01:59.205108: step: 50/459, loss: 0.050459835678339005 2023-01-24 02:01:59.863759: step: 52/459, loss: 0.359302818775177 2023-01-24 02:02:00.448204: step: 54/459, loss: 0.08137480169534683 2023-01-24 02:02:01.073880: step: 56/459, loss: 0.031984999775886536 2023-01-24 02:02:01.648470: step: 58/459, loss: 0.07727279514074326 2023-01-24 02:02:02.277206: step: 60/459, loss: 0.229757159948349 2023-01-24 02:02:02.831574: step: 62/459, loss: 0.06477025896310806 2023-01-24 02:02:03.465196: step: 64/459, loss: 0.07425765693187714 2023-01-24 02:02:04.091174: step: 66/459, loss: 0.040783412754535675 2023-01-24 02:02:04.694535: step: 68/459, loss: 0.5752573013305664 2023-01-24 02:02:05.321640: step: 70/459, loss: 0.04612255468964577 2023-01-24 02:02:05.940217: step: 72/459, loss: 0.06793859601020813 2023-01-24 02:02:06.543346: step: 74/459, loss: 0.11783116310834885 2023-01-24 02:02:07.163124: step: 76/459, loss: 0.06138864904642105 2023-01-24 02:02:07.852928: step: 78/459, loss: 0.029068447649478912 2023-01-24 02:02:08.475269: step: 80/459, loss: 0.12332269549369812 2023-01-24 02:02:09.157894: step: 82/459, loss: 0.1213996633887291 2023-01-24 02:02:09.769013: step: 84/459, loss: 0.15078327059745789 2023-01-24 02:02:10.367686: step: 86/459, loss: 0.11250796914100647 2023-01-24 02:02:10.920457: step: 88/459, loss: 0.03275086358189583 2023-01-24 02:02:11.559739: step: 90/459, loss: 0.11170636117458344 2023-01-24 02:02:12.094508: step: 92/459, loss: 0.02614309825003147 2023-01-24 02:02:12.761963: step: 94/459, loss: 0.07047171145677567 2023-01-24 02:02:13.430457: step: 96/459, loss: 0.30975252389907837 2023-01-24 02:02:13.971212: step: 98/459, loss: 0.05462261289358139 2023-01-24 02:02:14.581860: step: 100/459, loss: 0.1791364699602127 2023-01-24 02:02:15.162820: step: 102/459, loss: 0.13750123977661133 2023-01-24 02:02:15.815103: step: 104/459, loss: 0.14019528031349182 2023-01-24 02:02:16.335608: step: 106/459, loss: 0.2746846079826355 2023-01-24 02:02:16.937372: step: 108/459, loss: 0.06207350641489029 2023-01-24 02:02:17.570310: step: 110/459, loss: 0.10237284004688263 2023-01-24 02:02:18.225424: step: 112/459, loss: 0.21774455904960632 2023-01-24 02:02:18.884774: step: 114/459, loss: 0.06984371691942215 2023-01-24 02:02:19.513256: step: 116/459, loss: 0.10358574986457825 2023-01-24 02:02:20.169804: step: 118/459, loss: 0.07275152206420898 2023-01-24 02:02:20.861956: step: 120/459, loss: 0.08602172136306763 2023-01-24 02:02:21.510276: step: 122/459, loss: 0.1252421885728836 2023-01-24 02:02:22.115638: step: 124/459, loss: 0.04194954037666321 2023-01-24 02:02:22.718409: step: 126/459, loss: 0.3217321038246155 2023-01-24 02:02:23.298387: step: 128/459, loss: 0.11070289462804794 2023-01-24 02:02:23.905456: step: 130/459, loss: 0.043696608394384384 2023-01-24 02:02:24.483571: step: 132/459, loss: 0.1207684651017189 2023-01-24 02:02:25.051776: step: 134/459, loss: 0.028522023931145668 2023-01-24 02:02:25.666966: step: 136/459, loss: 0.06377468258142471 2023-01-24 02:02:26.244311: step: 138/459, loss: 0.02597254142165184 2023-01-24 02:02:26.912623: step: 140/459, loss: 0.14486649632453918 2023-01-24 02:02:27.539330: step: 142/459, loss: 0.031424738466739655 2023-01-24 02:02:28.178672: step: 144/459, loss: 0.05724690109491348 2023-01-24 02:02:28.777135: step: 146/459, loss: 0.24415312707424164 2023-01-24 02:02:29.393888: step: 148/459, loss: 0.08075231313705444 2023-01-24 02:02:30.028873: step: 150/459, loss: 0.06807135790586472 2023-01-24 02:02:30.666729: step: 152/459, loss: 0.03524206951260567 2023-01-24 02:02:31.280350: step: 154/459, loss: 0.048571206629276276 2023-01-24 02:02:31.962060: step: 156/459, loss: 0.047060929238796234 2023-01-24 02:02:32.603327: step: 158/459, loss: 0.048136916011571884 2023-01-24 02:02:33.193564: step: 160/459, loss: 0.2367589920759201 2023-01-24 02:02:33.840551: step: 162/459, loss: 0.0982067808508873 2023-01-24 02:02:34.412953: step: 164/459, loss: 0.09181015193462372 2023-01-24 02:02:35.027604: step: 166/459, loss: 0.16200105845928192 2023-01-24 02:02:35.622883: step: 168/459, loss: 0.07581184059381485 2023-01-24 02:02:36.228443: step: 170/459, loss: 0.09860744327306747 2023-01-24 02:02:36.836776: step: 172/459, loss: 0.08637428283691406 2023-01-24 02:02:37.453341: step: 174/459, loss: 0.037244971841573715 2023-01-24 02:02:38.086574: step: 176/459, loss: 0.14009764790534973 2023-01-24 02:02:38.711931: step: 178/459, loss: 0.023562032729387283 2023-01-24 02:02:39.306176: step: 180/459, loss: 0.04535692557692528 2023-01-24 02:02:39.922755: step: 182/459, loss: 0.055362310260534286 2023-01-24 02:02:40.515420: step: 184/459, loss: 0.114610955119133 2023-01-24 02:02:41.116735: step: 186/459, loss: 0.09951644390821457 2023-01-24 02:02:41.783321: step: 188/459, loss: 0.05621141940355301 2023-01-24 02:02:42.462705: step: 190/459, loss: 0.6577220559120178 2023-01-24 02:02:43.117867: step: 192/459, loss: 0.04476972669363022 2023-01-24 02:02:43.710800: step: 194/459, loss: 0.0225608479231596 2023-01-24 02:02:44.359761: step: 196/459, loss: 0.31201785802841187 2023-01-24 02:02:45.014855: step: 198/459, loss: 0.05798350274562836 2023-01-24 02:02:45.647895: step: 200/459, loss: 0.09901396930217743 2023-01-24 02:02:46.273357: step: 202/459, loss: 0.12344395369291306 2023-01-24 02:02:46.924736: step: 204/459, loss: 0.09457338601350784 2023-01-24 02:02:47.556285: step: 206/459, loss: 0.03216688707470894 2023-01-24 02:02:48.099870: step: 208/459, loss: 0.6916610598564148 2023-01-24 02:02:48.675029: step: 210/459, loss: 0.10062653571367264 2023-01-24 02:02:49.368823: step: 212/459, loss: 0.07279683649539948 2023-01-24 02:02:49.929771: step: 214/459, loss: 0.45073413848876953 2023-01-24 02:02:50.552613: step: 216/459, loss: 0.05640142410993576 2023-01-24 02:02:51.197507: step: 218/459, loss: 0.07811546325683594 2023-01-24 02:02:51.823668: step: 220/459, loss: 0.05461932718753815 2023-01-24 02:02:52.413394: step: 222/459, loss: 0.0802067294716835 2023-01-24 02:02:53.014634: step: 224/459, loss: 0.2509532868862152 2023-01-24 02:02:53.711979: step: 226/459, loss: 0.08392586559057236 2023-01-24 02:02:54.343405: step: 228/459, loss: 0.06334364414215088 2023-01-24 02:02:55.038414: step: 230/459, loss: 0.19233959913253784 2023-01-24 02:02:55.688254: step: 232/459, loss: 0.17105154693126678 2023-01-24 02:02:56.290190: step: 234/459, loss: 0.5635660886764526 2023-01-24 02:02:56.920230: step: 236/459, loss: 0.014467786997556686 2023-01-24 02:02:57.541571: step: 238/459, loss: 0.18220539391040802 2023-01-24 02:02:58.171129: step: 240/459, loss: 0.22101569175720215 2023-01-24 02:02:58.788587: step: 242/459, loss: 0.8205050230026245 2023-01-24 02:02:59.406973: step: 244/459, loss: 0.13369247317314148 2023-01-24 02:03:00.054543: step: 246/459, loss: 0.3034721314907074 2023-01-24 02:03:00.681823: step: 248/459, loss: 0.06143798306584358 2023-01-24 02:03:01.299961: step: 250/459, loss: 0.14388805627822876 2023-01-24 02:03:01.922136: step: 252/459, loss: 0.05799439176917076 2023-01-24 02:03:02.558369: step: 254/459, loss: 0.03487298637628555 2023-01-24 02:03:03.204647: step: 256/459, loss: 0.07388923317193985 2023-01-24 02:03:03.778357: step: 258/459, loss: 0.09288249164819717 2023-01-24 02:03:04.448606: step: 260/459, loss: 0.09600287675857544 2023-01-24 02:03:05.121738: step: 262/459, loss: 0.3373076021671295 2023-01-24 02:03:05.751545: step: 264/459, loss: 0.06873781234025955 2023-01-24 02:03:06.316963: step: 266/459, loss: 0.030057856813073158 2023-01-24 02:03:06.942737: step: 268/459, loss: 0.06725158542394638 2023-01-24 02:03:07.531449: step: 270/459, loss: 0.02215883880853653 2023-01-24 02:03:08.137540: step: 272/459, loss: 0.05780639871954918 2023-01-24 02:03:08.761486: step: 274/459, loss: 0.08008652925491333 2023-01-24 02:03:09.383587: step: 276/459, loss: 0.10111922770738602 2023-01-24 02:03:09.980530: step: 278/459, loss: 0.07384724915027618 2023-01-24 02:03:10.586042: step: 280/459, loss: 0.023197997361421585 2023-01-24 02:03:11.205356: step: 282/459, loss: 0.12255624681711197 2023-01-24 02:03:11.849390: step: 284/459, loss: 0.04852262884378433 2023-01-24 02:03:12.421453: step: 286/459, loss: 0.13565580546855927 2023-01-24 02:03:13.120720: step: 288/459, loss: 0.7016648650169373 2023-01-24 02:03:13.651888: step: 290/459, loss: 0.072405144572258 2023-01-24 02:03:14.310468: step: 292/459, loss: 0.134109765291214 2023-01-24 02:03:15.036464: step: 294/459, loss: 0.02140689827501774 2023-01-24 02:03:15.632728: step: 296/459, loss: 0.015162130817770958 2023-01-24 02:03:16.278955: step: 298/459, loss: 0.06102388724684715 2023-01-24 02:03:16.877183: step: 300/459, loss: 0.05829443037509918 2023-01-24 02:03:17.456858: step: 302/459, loss: 0.09963026642799377 2023-01-24 02:03:18.017279: step: 304/459, loss: 0.16334538161754608 2023-01-24 02:03:18.599122: step: 306/459, loss: 0.0539473257958889 2023-01-24 02:03:19.257990: step: 308/459, loss: 0.7033682465553284 2023-01-24 02:03:19.912359: step: 310/459, loss: 0.05329035595059395 2023-01-24 02:03:20.532829: step: 312/459, loss: 0.05190055072307587 2023-01-24 02:03:21.217980: step: 314/459, loss: 0.08543004095554352 2023-01-24 02:03:21.834514: step: 316/459, loss: 0.10185921937227249 2023-01-24 02:03:22.387416: step: 318/459, loss: 0.06099340319633484 2023-01-24 02:03:23.037076: step: 320/459, loss: 0.016971714794635773 2023-01-24 02:03:23.674322: step: 322/459, loss: 0.10191063582897186 2023-01-24 02:03:24.224995: step: 324/459, loss: 0.1125110313296318 2023-01-24 02:03:24.876318: step: 326/459, loss: 0.04141242802143097 2023-01-24 02:03:25.495444: step: 328/459, loss: 0.09103623032569885 2023-01-24 02:03:26.117971: step: 330/459, loss: 0.28200623393058777 2023-01-24 02:03:26.756438: step: 332/459, loss: 0.4620209336280823 2023-01-24 02:03:27.360127: step: 334/459, loss: 0.22659842669963837 2023-01-24 02:03:27.968691: step: 336/459, loss: 0.19790397584438324 2023-01-24 02:03:28.568853: step: 338/459, loss: 0.003361095441505313 2023-01-24 02:03:29.248268: step: 340/459, loss: 0.8161413073539734 2023-01-24 02:03:29.882911: step: 342/459, loss: 1.7921037673950195 2023-01-24 02:03:30.484289: step: 344/459, loss: 0.11554652452468872 2023-01-24 02:03:31.203045: step: 346/459, loss: 0.11420895159244537 2023-01-24 02:03:31.908436: step: 348/459, loss: 0.9567229151725769 2023-01-24 02:03:32.528228: step: 350/459, loss: 0.029874393716454506 2023-01-24 02:03:33.154981: step: 352/459, loss: 0.03435511142015457 2023-01-24 02:03:33.850995: step: 354/459, loss: 0.042487651109695435 2023-01-24 02:03:34.446758: step: 356/459, loss: 0.07236837595701218 2023-01-24 02:03:35.092139: step: 358/459, loss: 0.0859876275062561 2023-01-24 02:03:35.729794: step: 360/459, loss: 0.092832051217556 2023-01-24 02:03:36.344744: step: 362/459, loss: 0.055321186780929565 2023-01-24 02:03:36.970645: step: 364/459, loss: 0.22268900275230408 2023-01-24 02:03:37.516701: step: 366/459, loss: 0.1900249719619751 2023-01-24 02:03:38.133330: step: 368/459, loss: 0.28799599409103394 2023-01-24 02:03:38.730811: step: 370/459, loss: 0.679068386554718 2023-01-24 02:03:39.367398: step: 372/459, loss: 0.05811925604939461 2023-01-24 02:03:39.982647: step: 374/459, loss: 0.12708403170108795 2023-01-24 02:03:40.673067: step: 376/459, loss: 0.04053386673331261 2023-01-24 02:03:41.299501: step: 378/459, loss: 0.2417488545179367 2023-01-24 02:03:41.891216: step: 380/459, loss: 0.1172473207116127 2023-01-24 02:03:42.512173: step: 382/459, loss: 0.10385758429765701 2023-01-24 02:03:43.167678: step: 384/459, loss: 0.1375928521156311 2023-01-24 02:03:43.794366: step: 386/459, loss: 0.29502445459365845 2023-01-24 02:03:44.431120: step: 388/459, loss: 0.10329929739236832 2023-01-24 02:03:45.110018: step: 390/459, loss: 0.09191186726093292 2023-01-24 02:03:45.758932: step: 392/459, loss: 0.10360842943191528 2023-01-24 02:03:46.353217: step: 394/459, loss: 0.37493640184402466 2023-01-24 02:03:46.951561: step: 396/459, loss: 0.06689000129699707 2023-01-24 02:03:47.556828: step: 398/459, loss: 0.11244995892047882 2023-01-24 02:03:48.096197: step: 400/459, loss: 0.18422530591487885 2023-01-24 02:03:48.656772: step: 402/459, loss: 0.07916881144046783 2023-01-24 02:03:49.276636: step: 404/459, loss: 0.058357562869787216 2023-01-24 02:03:49.828589: step: 406/459, loss: 0.048287563025951385 2023-01-24 02:03:50.455436: step: 408/459, loss: 0.018982207402586937 2023-01-24 02:03:51.110697: step: 410/459, loss: 0.07664906978607178 2023-01-24 02:03:51.798879: step: 412/459, loss: 0.18341441452503204 2023-01-24 02:03:52.435766: step: 414/459, loss: 0.1184607595205307 2023-01-24 02:03:53.021672: step: 416/459, loss: 0.07574816048145294 2023-01-24 02:03:53.627619: step: 418/459, loss: 0.05619792640209198 2023-01-24 02:03:54.289010: step: 420/459, loss: 0.821246862411499 2023-01-24 02:03:54.913032: step: 422/459, loss: 0.08153391629457474 2023-01-24 02:03:55.547174: step: 424/459, loss: 0.1746615320444107 2023-01-24 02:03:56.164259: step: 426/459, loss: 0.0843830406665802 2023-01-24 02:03:56.783911: step: 428/459, loss: 0.16560687124729156 2023-01-24 02:03:57.380920: step: 430/459, loss: 0.23431463539600372 2023-01-24 02:03:57.898694: step: 432/459, loss: 0.07940586656332016 2023-01-24 02:03:58.498517: step: 434/459, loss: 0.33663052320480347 2023-01-24 02:03:59.087340: step: 436/459, loss: 0.35963425040245056 2023-01-24 02:03:59.775628: step: 438/459, loss: 0.0925745740532875 2023-01-24 02:04:00.346665: step: 440/459, loss: 0.1296464204788208 2023-01-24 02:04:00.957607: step: 442/459, loss: 0.10442955046892166 2023-01-24 02:04:01.565540: step: 444/459, loss: 0.07118776440620422 2023-01-24 02:04:02.182050: step: 446/459, loss: 0.33070990443229675 2023-01-24 02:04:02.781084: step: 448/459, loss: 0.07543440163135529 2023-01-24 02:04:03.409526: step: 450/459, loss: 0.060925573110580444 2023-01-24 02:04:04.082805: step: 452/459, loss: 0.1011437326669693 2023-01-24 02:04:04.664940: step: 454/459, loss: 0.0843692496418953 2023-01-24 02:04:05.248065: step: 456/459, loss: 0.10096218436956406 2023-01-24 02:04:05.916767: step: 458/459, loss: 0.00753779849037528 2023-01-24 02:04:06.535907: step: 460/459, loss: 0.17426066100597382 2023-01-24 02:04:07.138280: step: 462/459, loss: 0.2552141547203064 2023-01-24 02:04:07.780175: step: 464/459, loss: 0.14819921553134918 2023-01-24 02:04:08.358556: step: 466/459, loss: 0.10655447840690613 2023-01-24 02:04:08.986642: step: 468/459, loss: 0.1460755616426468 2023-01-24 02:04:09.557771: step: 470/459, loss: 0.11302580684423447 2023-01-24 02:04:10.218518: step: 472/459, loss: 0.09060259908437729 2023-01-24 02:04:10.861062: step: 474/459, loss: 0.12146192044019699 2023-01-24 02:04:11.497525: step: 476/459, loss: 0.9154244661331177 2023-01-24 02:04:12.143830: step: 478/459, loss: 0.18819062411785126 2023-01-24 02:04:12.787701: step: 480/459, loss: 0.5615552663803101 2023-01-24 02:04:13.447318: step: 482/459, loss: 0.11075892299413681 2023-01-24 02:04:14.119715: step: 484/459, loss: 0.12827812135219574 2023-01-24 02:04:14.742366: step: 486/459, loss: 0.10607924312353134 2023-01-24 02:04:15.340409: step: 488/459, loss: 0.17333079874515533 2023-01-24 02:04:15.938467: step: 490/459, loss: 0.04611348733305931 2023-01-24 02:04:16.604582: step: 492/459, loss: 0.32898327708244324 2023-01-24 02:04:17.202579: step: 494/459, loss: 0.025962205603718758 2023-01-24 02:04:17.825795: step: 496/459, loss: 0.07168611884117126 2023-01-24 02:04:18.422755: step: 498/459, loss: 0.04299633949995041 2023-01-24 02:04:19.065976: step: 500/459, loss: 0.21844051778316498 2023-01-24 02:04:19.649092: step: 502/459, loss: 0.03939861059188843 2023-01-24 02:04:20.271523: step: 504/459, loss: 0.7802074551582336 2023-01-24 02:04:20.850664: step: 506/459, loss: 1.0611538887023926 2023-01-24 02:04:21.501866: step: 508/459, loss: 0.16216319799423218 2023-01-24 02:04:22.151310: step: 510/459, loss: 0.024261988699436188 2023-01-24 02:04:22.743464: step: 512/459, loss: 0.1245577409863472 2023-01-24 02:04:23.289484: step: 514/459, loss: 0.12281256914138794 2023-01-24 02:04:23.890068: step: 516/459, loss: 0.03210621327161789 2023-01-24 02:04:24.464122: step: 518/459, loss: 0.027598926797509193 2023-01-24 02:04:25.036167: step: 520/459, loss: 0.056513987481594086 2023-01-24 02:04:25.630392: step: 522/459, loss: 0.09394478052854538 2023-01-24 02:04:26.316529: step: 524/459, loss: 0.11628502607345581 2023-01-24 02:04:26.881445: step: 526/459, loss: 0.3313004970550537 2023-01-24 02:04:27.503642: step: 528/459, loss: 0.06811508536338806 2023-01-24 02:04:28.184449: step: 530/459, loss: 0.056805554777383804 2023-01-24 02:04:28.811084: step: 532/459, loss: 0.24579162895679474 2023-01-24 02:04:29.433069: step: 534/459, loss: 0.257712721824646 2023-01-24 02:04:30.068017: step: 536/459, loss: 0.24681930243968964 2023-01-24 02:04:30.717662: step: 538/459, loss: 0.048322807997465134 2023-01-24 02:04:31.364636: step: 540/459, loss: 0.18846842646598816 2023-01-24 02:04:31.967156: step: 542/459, loss: 0.14259827136993408 2023-01-24 02:04:32.655095: step: 544/459, loss: 0.33930668234825134 2023-01-24 02:04:33.270511: step: 546/459, loss: 0.08005011081695557 2023-01-24 02:04:33.873446: step: 548/459, loss: 0.06313712894916534 2023-01-24 02:04:34.508545: step: 550/459, loss: 0.13902728259563446 2023-01-24 02:04:35.097962: step: 552/459, loss: 0.052794408053159714 2023-01-24 02:04:35.779316: step: 554/459, loss: 0.09844132512807846 2023-01-24 02:04:36.385634: step: 556/459, loss: 0.09308983385562897 2023-01-24 02:04:37.085018: step: 558/459, loss: 0.09996873885393143 2023-01-24 02:04:37.671442: step: 560/459, loss: 0.27076905965805054 2023-01-24 02:04:38.262336: step: 562/459, loss: 3.5108914375305176 2023-01-24 02:04:38.897647: step: 564/459, loss: 0.10042358934879303 2023-01-24 02:04:39.552356: step: 566/459, loss: 0.09189004451036453 2023-01-24 02:04:40.136522: step: 568/459, loss: 0.13599471747875214 2023-01-24 02:04:40.850180: step: 570/459, loss: 0.13733239471912384 2023-01-24 02:04:41.469677: step: 572/459, loss: 0.08009476959705353 2023-01-24 02:04:42.071195: step: 574/459, loss: 0.12414371967315674 2023-01-24 02:04:42.694153: step: 576/459, loss: 0.1632091999053955 2023-01-24 02:04:43.262048: step: 578/459, loss: 0.08208313584327698 2023-01-24 02:04:43.802736: step: 580/459, loss: 0.09395232796669006 2023-01-24 02:04:44.449895: step: 582/459, loss: 0.03474675863981247 2023-01-24 02:04:45.091700: step: 584/459, loss: 0.09587102383375168 2023-01-24 02:04:45.673989: step: 586/459, loss: 3.187950611114502 2023-01-24 02:04:46.317962: step: 588/459, loss: 0.04549644514918327 2023-01-24 02:04:46.922280: step: 590/459, loss: 0.3366769254207611 2023-01-24 02:04:47.586195: step: 592/459, loss: 0.06915085762739182 2023-01-24 02:04:48.183849: step: 594/459, loss: 0.3014187812805176 2023-01-24 02:04:48.770539: step: 596/459, loss: 0.2883806526660919 2023-01-24 02:04:49.447482: step: 598/459, loss: 0.09995653480291367 2023-01-24 02:04:50.029860: step: 600/459, loss: 0.06166382133960724 2023-01-24 02:04:50.630095: step: 602/459, loss: 0.25231078267097473 2023-01-24 02:04:51.270761: step: 604/459, loss: 0.04706256464123726 2023-01-24 02:04:51.943513: step: 606/459, loss: 0.3325459063053131 2023-01-24 02:04:52.576389: step: 608/459, loss: 0.07108671963214874 2023-01-24 02:04:53.236396: step: 610/459, loss: 0.17685885727405548 2023-01-24 02:04:53.816015: step: 612/459, loss: 0.35201117396354675 2023-01-24 02:04:54.388692: step: 614/459, loss: 0.10156324505805969 2023-01-24 02:04:54.990920: step: 616/459, loss: 0.05311933159828186 2023-01-24 02:04:55.531106: step: 618/459, loss: 0.01932988502085209 2023-01-24 02:04:56.140385: step: 620/459, loss: 0.13552223145961761 2023-01-24 02:04:56.751142: step: 622/459, loss: 0.05023641139268875 2023-01-24 02:04:57.385800: step: 624/459, loss: 0.23697809875011444 2023-01-24 02:04:58.076368: step: 626/459, loss: 0.07193796336650848 2023-01-24 02:04:58.718203: step: 628/459, loss: 0.07861020416021347 2023-01-24 02:04:59.351206: step: 630/459, loss: 0.19488883018493652 2023-01-24 02:04:59.963950: step: 632/459, loss: 0.10828530043363571 2023-01-24 02:05:00.493341: step: 634/459, loss: 0.06810830533504486 2023-01-24 02:05:01.102353: step: 636/459, loss: 0.1384148746728897 2023-01-24 02:05:01.728303: step: 638/459, loss: 0.45994338393211365 2023-01-24 02:05:02.475141: step: 640/459, loss: 0.08801347017288208 2023-01-24 02:05:03.085002: step: 642/459, loss: 0.0733514130115509 2023-01-24 02:05:03.673578: step: 644/459, loss: 0.022017747163772583 2023-01-24 02:05:04.361380: step: 646/459, loss: 0.06546148657798767 2023-01-24 02:05:04.964010: step: 648/459, loss: 0.08377476036548615 2023-01-24 02:05:05.524893: step: 650/459, loss: 0.02632025070488453 2023-01-24 02:05:06.221880: step: 652/459, loss: 0.10992850363254547 2023-01-24 02:05:06.920475: step: 654/459, loss: 0.04190557450056076 2023-01-24 02:05:07.506674: step: 656/459, loss: 0.02639523334801197 2023-01-24 02:05:08.128431: step: 658/459, loss: 0.09664706140756607 2023-01-24 02:05:08.776133: step: 660/459, loss: 0.09208986908197403 2023-01-24 02:05:09.392456: step: 662/459, loss: 0.0320853516459465 2023-01-24 02:05:09.959794: step: 664/459, loss: 0.24309496581554413 2023-01-24 02:05:10.529968: step: 666/459, loss: 0.2965535819530487 2023-01-24 02:05:11.170035: step: 668/459, loss: 0.03941243141889572 2023-01-24 02:05:11.807463: step: 670/459, loss: 0.08951199054718018 2023-01-24 02:05:12.485520: step: 672/459, loss: 0.08161386102437973 2023-01-24 02:05:13.098289: step: 674/459, loss: 0.05279042571783066 2023-01-24 02:05:13.769045: step: 676/459, loss: 0.07945773750543594 2023-01-24 02:05:14.389576: step: 678/459, loss: 0.03231073170900345 2023-01-24 02:05:15.066372: step: 680/459, loss: 0.06708275526762009 2023-01-24 02:05:15.756008: step: 682/459, loss: 2.4558122158050537 2023-01-24 02:05:16.357797: step: 684/459, loss: 0.08269591629505157 2023-01-24 02:05:16.957654: step: 686/459, loss: 0.013052178546786308 2023-01-24 02:05:17.588951: step: 688/459, loss: 0.06078064814209938 2023-01-24 02:05:18.259888: step: 690/459, loss: 0.11198976635932922 2023-01-24 02:05:18.943828: step: 692/459, loss: 0.400978147983551 2023-01-24 02:05:19.549589: step: 694/459, loss: 0.08607816696166992 2023-01-24 02:05:20.202303: step: 696/459, loss: 0.057426102459430695 2023-01-24 02:05:20.787982: step: 698/459, loss: 0.13164649903774261 2023-01-24 02:05:21.357599: step: 700/459, loss: 0.20797213912010193 2023-01-24 02:05:21.981675: step: 702/459, loss: 0.1394898146390915 2023-01-24 02:05:22.683821: step: 704/459, loss: 0.11705829203128815 2023-01-24 02:05:23.292654: step: 706/459, loss: 0.002370404312387109 2023-01-24 02:05:23.901161: step: 708/459, loss: 0.0833931714296341 2023-01-24 02:05:24.550532: step: 710/459, loss: 0.11425817757844925 2023-01-24 02:05:25.172358: step: 712/459, loss: 0.09220904856920242 2023-01-24 02:05:25.804701: step: 714/459, loss: 0.15167245268821716 2023-01-24 02:05:26.412818: step: 716/459, loss: 0.004804875701665878 2023-01-24 02:05:27.004798: step: 718/459, loss: 0.1697312295436859 2023-01-24 02:05:27.740782: step: 720/459, loss: 0.15802252292633057 2023-01-24 02:05:28.324833: step: 722/459, loss: 0.08858530968427658 2023-01-24 02:05:28.939699: step: 724/459, loss: 0.08172594010829926 2023-01-24 02:05:29.548771: step: 726/459, loss: 0.06709741801023483 2023-01-24 02:05:30.195276: step: 728/459, loss: 0.07536706328392029 2023-01-24 02:05:30.768474: step: 730/459, loss: 0.038063399493694305 2023-01-24 02:05:31.421104: step: 732/459, loss: 0.11029160767793655 2023-01-24 02:05:31.997294: step: 734/459, loss: 0.028825556859374046 2023-01-24 02:05:32.600924: step: 736/459, loss: 0.38286399841308594 2023-01-24 02:05:33.189681: step: 738/459, loss: 0.9089787006378174 2023-01-24 02:05:33.783602: step: 740/459, loss: 0.08147499710321426 2023-01-24 02:05:34.445463: step: 742/459, loss: 0.06796517968177795 2023-01-24 02:05:35.087482: step: 744/459, loss: 0.6181621551513672 2023-01-24 02:05:35.712653: step: 746/459, loss: 0.4888882339000702 2023-01-24 02:05:36.292222: step: 748/459, loss: 0.2925356328487396 2023-01-24 02:05:36.943812: step: 750/459, loss: 0.05433874577283859 2023-01-24 02:05:37.557114: step: 752/459, loss: 3.4177660942077637 2023-01-24 02:05:38.216191: step: 754/459, loss: 0.09696972370147705 2023-01-24 02:05:38.839088: step: 756/459, loss: 0.04206155240535736 2023-01-24 02:05:39.423378: step: 758/459, loss: 0.14090225100517273 2023-01-24 02:05:40.109332: step: 760/459, loss: 0.8810139298439026 2023-01-24 02:05:40.729656: step: 762/459, loss: 0.08021038770675659 2023-01-24 02:05:41.300221: step: 764/459, loss: 0.030677393078804016 2023-01-24 02:05:41.987355: step: 766/459, loss: 0.1049027368426323 2023-01-24 02:05:42.630698: step: 768/459, loss: 0.032831255346536636 2023-01-24 02:05:43.221561: step: 770/459, loss: 0.2500513792037964 2023-01-24 02:05:43.867335: step: 772/459, loss: 0.05519668385386467 2023-01-24 02:05:44.487021: step: 774/459, loss: 0.15829069912433624 2023-01-24 02:05:45.047449: step: 776/459, loss: 0.14170663058757782 2023-01-24 02:05:45.634836: step: 778/459, loss: 0.24556122720241547 2023-01-24 02:05:46.194824: step: 780/459, loss: 0.03205791860818863 2023-01-24 02:05:46.761939: step: 782/459, loss: 0.053853604942560196 2023-01-24 02:05:47.351186: step: 784/459, loss: 0.060374870896339417 2023-01-24 02:05:47.920971: step: 786/459, loss: 0.03137237951159477 2023-01-24 02:05:48.501843: step: 788/459, loss: 0.3026465177536011 2023-01-24 02:05:49.135287: step: 790/459, loss: 0.11247716099023819 2023-01-24 02:05:49.780618: step: 792/459, loss: 0.29344436526298523 2023-01-24 02:05:50.374639: step: 794/459, loss: 0.1255662590265274 2023-01-24 02:05:50.986409: step: 796/459, loss: 0.0921609029173851 2023-01-24 02:05:51.539026: step: 798/459, loss: 0.13169322907924652 2023-01-24 02:05:52.244702: step: 800/459, loss: 0.19478848576545715 2023-01-24 02:05:52.883686: step: 802/459, loss: 0.08693885803222656 2023-01-24 02:05:53.500672: step: 804/459, loss: 1.1657713651657104 2023-01-24 02:05:54.120308: step: 806/459, loss: 0.12023893743753433 2023-01-24 02:05:54.694332: step: 808/459, loss: 2.581371545791626 2023-01-24 02:05:55.299518: step: 810/459, loss: 0.1869339495897293 2023-01-24 02:05:55.867263: step: 812/459, loss: 0.1091032549738884 2023-01-24 02:05:56.455583: step: 814/459, loss: 0.058019429445266724 2023-01-24 02:05:57.148197: step: 816/459, loss: 0.4429408609867096 2023-01-24 02:05:57.782186: step: 818/459, loss: 0.6199474334716797 2023-01-24 02:05:58.418774: step: 820/459, loss: 0.0704524889588356 2023-01-24 02:05:59.020178: step: 822/459, loss: 0.08412133157253265 2023-01-24 02:05:59.698026: step: 824/459, loss: 0.3091006278991699 2023-01-24 02:06:00.311811: step: 826/459, loss: 0.057254817336797714 2023-01-24 02:06:00.880204: step: 828/459, loss: 0.1368977129459381 2023-01-24 02:06:01.466057: step: 830/459, loss: 0.03790481388568878 2023-01-24 02:06:02.096949: step: 832/459, loss: 0.023171722888946533 2023-01-24 02:06:02.732796: step: 834/459, loss: 0.047425899654626846 2023-01-24 02:06:03.320505: step: 836/459, loss: 0.10208597779273987 2023-01-24 02:06:03.916314: step: 838/459, loss: 0.06666526198387146 2023-01-24 02:06:04.568967: step: 840/459, loss: 0.6783316135406494 2023-01-24 02:06:05.135014: step: 842/459, loss: 0.1524314433336258 2023-01-24 02:06:05.730324: step: 844/459, loss: 0.07457062602043152 2023-01-24 02:06:06.405406: step: 846/459, loss: 0.049148477613925934 2023-01-24 02:06:07.044502: step: 848/459, loss: 0.8664312958717346 2023-01-24 02:06:07.607146: step: 850/459, loss: 0.21296454966068268 2023-01-24 02:06:08.269323: step: 852/459, loss: 2.5619375705718994 2023-01-24 02:06:08.958630: step: 854/459, loss: 0.12015214562416077 2023-01-24 02:06:09.568860: step: 856/459, loss: 0.0789339616894722 2023-01-24 02:06:10.173017: step: 858/459, loss: 0.04166645556688309 2023-01-24 02:06:10.797817: step: 860/459, loss: 0.05015014857053757 2023-01-24 02:06:11.460931: step: 862/459, loss: 0.03168589249253273 2023-01-24 02:06:12.082497: step: 864/459, loss: 0.06536014378070831 2023-01-24 02:06:12.742922: step: 866/459, loss: 0.035541780292987823 2023-01-24 02:06:13.376361: step: 868/459, loss: 0.0296663586050272 2023-01-24 02:06:13.929959: step: 870/459, loss: 0.10016995668411255 2023-01-24 02:06:14.552909: step: 872/459, loss: 0.11620702594518661 2023-01-24 02:06:15.207023: step: 874/459, loss: 0.11550084501504898 2023-01-24 02:06:15.803280: step: 876/459, loss: 0.10136686265468597 2023-01-24 02:06:16.385580: step: 878/459, loss: 0.21205849945545197 2023-01-24 02:06:17.063530: step: 880/459, loss: 0.11933580040931702 2023-01-24 02:06:17.642573: step: 882/459, loss: 0.052056290209293365 2023-01-24 02:06:18.151942: step: 884/459, loss: 0.1244216114282608 2023-01-24 02:06:18.769787: step: 886/459, loss: 0.20485864579677582 2023-01-24 02:06:19.407985: step: 888/459, loss: 0.15688461065292358 2023-01-24 02:06:19.952995: step: 890/459, loss: 0.04750636965036392 2023-01-24 02:06:20.569629: step: 892/459, loss: 0.09721124172210693 2023-01-24 02:06:21.148763: step: 894/459, loss: 0.03525615110993385 2023-01-24 02:06:21.771057: step: 896/459, loss: 0.35135751962661743 2023-01-24 02:06:22.422849: step: 898/459, loss: 0.04915737360715866 2023-01-24 02:06:23.084942: step: 900/459, loss: 0.06339741498231888 2023-01-24 02:06:23.713681: step: 902/459, loss: 0.04550689086318016 2023-01-24 02:06:24.298476: step: 904/459, loss: 0.10822436213493347 2023-01-24 02:06:24.875720: step: 906/459, loss: 0.03533259034156799 2023-01-24 02:06:25.456889: step: 908/459, loss: 0.0849573090672493 2023-01-24 02:06:26.090686: step: 910/459, loss: 0.05269338935613632 2023-01-24 02:06:26.714388: step: 912/459, loss: 1.0473259687423706 2023-01-24 02:06:27.352413: step: 914/459, loss: 0.28274422883987427 2023-01-24 02:06:27.986612: step: 916/459, loss: 0.1270114779472351 2023-01-24 02:06:28.646735: step: 918/459, loss: 0.1440057009458542 2023-01-24 02:06:29.075055: step: 920/459, loss: 0.013715104199945927 ================================================== Loss: 0.194 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3384053296344981, 'r': 0.3191412691239954, 'f1': 0.32849111099286243}, 'combined': 0.2420460817842144, 'epoch': 16} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35436975721980496, 'r': 0.28993889227074954, 'f1': 0.31893278149782445}, 'combined': 0.2041169801586076, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33591269841269844, 'r': 0.3212523719165086, 'f1': 0.3284190106692532}, 'combined': 0.24199295522997605, 'epoch': 16} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35472469026560427, 'r': 0.2850696601770856, 'f1': 0.3161054699544296}, 'combined': 0.20230750077083492, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3439031636768945, 'r': 0.3178004567564471, 'f1': 0.33033696392632667}, 'combined': 0.24340618394571437, 'epoch': 16} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36433839164047394, 'r': 0.298246012482357, 'f1': 0.32799586535653696}, 'combined': 0.2351668468594039, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3131313131313131, 'r': 0.2952380952380952, 'f1': 0.30392156862745096}, 'combined': 0.2026143790849673, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2708333333333333, 'r': 0.2826086956521739, 'f1': 0.2765957446808511}, 'combined': 0.13829787234042554, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.10344827586206896, 'f1': 0.14634146341463414}, 'combined': 0.09756097560975609, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 17 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:09:02.672925: step: 2/459, loss: 0.023617900907993317 2023-01-24 02:09:03.344294: step: 4/459, loss: 0.042917270213365555 2023-01-24 02:09:03.908034: step: 6/459, loss: 0.04132473096251488 2023-01-24 02:09:04.543345: step: 8/459, loss: 0.061491627246141434 2023-01-24 02:09:05.184520: step: 10/459, loss: 0.06487783044576645 2023-01-24 02:09:05.875576: step: 12/459, loss: 0.04811283200979233 2023-01-24 02:09:06.477227: step: 14/459, loss: 0.07599284499883652 2023-01-24 02:09:07.093573: step: 16/459, loss: 0.1744881570339203 2023-01-24 02:09:07.635579: step: 18/459, loss: 0.05150439590215683 2023-01-24 02:09:08.193701: step: 20/459, loss: 0.04898804426193237 2023-01-24 02:09:08.821382: step: 22/459, loss: 0.041439104825258255 2023-01-24 02:09:09.491162: step: 24/459, loss: 0.11179296672344208 2023-01-24 02:09:10.104469: step: 26/459, loss: 0.17864306271076202 2023-01-24 02:09:10.801270: step: 28/459, loss: 0.19963650405406952 2023-01-24 02:09:11.467839: step: 30/459, loss: 0.02400391362607479 2023-01-24 02:09:12.139587: step: 32/459, loss: 0.01613488793373108 2023-01-24 02:09:12.730319: step: 34/459, loss: 0.0878744125366211 2023-01-24 02:09:13.311335: step: 36/459, loss: 0.5559832453727722 2023-01-24 02:09:13.908536: step: 38/459, loss: 0.17401431500911713 2023-01-24 02:09:14.574622: step: 40/459, loss: 0.15121257305145264 2023-01-24 02:09:15.225461: step: 42/459, loss: 0.14103561639785767 2023-01-24 02:09:15.900362: step: 44/459, loss: 0.01791762188076973 2023-01-24 02:09:16.512767: step: 46/459, loss: 0.036389995366334915 2023-01-24 02:09:17.119254: step: 48/459, loss: 0.022581489756703377 2023-01-24 02:09:17.753875: step: 50/459, loss: 0.05001528561115265 2023-01-24 02:09:18.339241: step: 52/459, loss: 0.07352833449840546 2023-01-24 02:09:18.953758: step: 54/459, loss: 0.38255757093429565 2023-01-24 02:09:19.581957: step: 56/459, loss: 0.19791819155216217 2023-01-24 02:09:20.308250: step: 58/459, loss: 0.058141302317380905 2023-01-24 02:09:20.873471: step: 60/459, loss: 0.06262841075658798 2023-01-24 02:09:21.477843: step: 62/459, loss: 0.0402521938085556 2023-01-24 02:09:22.100916: step: 64/459, loss: 0.08754488080739975 2023-01-24 02:09:22.707724: step: 66/459, loss: 0.010230867192149162 2023-01-24 02:09:23.346202: step: 68/459, loss: 0.060881346464157104 2023-01-24 02:09:23.930237: step: 70/459, loss: 0.06119375675916672 2023-01-24 02:09:24.465966: step: 72/459, loss: 0.048383649438619614 2023-01-24 02:09:25.092651: step: 74/459, loss: 0.06367171555757523 2023-01-24 02:09:25.725408: step: 76/459, loss: 0.838499903678894 2023-01-24 02:09:26.325460: step: 78/459, loss: 0.03776371851563454 2023-01-24 02:09:26.975208: step: 80/459, loss: 0.06390121579170227 2023-01-24 02:09:27.600645: step: 82/459, loss: 1.1200140714645386 2023-01-24 02:09:28.250020: step: 84/459, loss: 0.11002936959266663 2023-01-24 02:09:28.831499: step: 86/459, loss: 0.060569290071725845 2023-01-24 02:09:29.439030: step: 88/459, loss: 0.0804135650396347 2023-01-24 02:09:30.174519: step: 90/459, loss: 0.05924653634428978 2023-01-24 02:09:30.745618: step: 92/459, loss: 0.07930826395750046 2023-01-24 02:09:31.363054: step: 94/459, loss: 0.16884511709213257 2023-01-24 02:09:31.969071: step: 96/459, loss: 0.08759103715419769 2023-01-24 02:09:32.556347: step: 98/459, loss: 0.034541454166173935 2023-01-24 02:09:33.196298: step: 100/459, loss: 0.041537120938301086 2023-01-24 02:09:33.815591: step: 102/459, loss: 0.03321737423539162 2023-01-24 02:09:34.525824: step: 104/459, loss: 0.07529294490814209 2023-01-24 02:09:35.073387: step: 106/459, loss: 0.031296465545892715 2023-01-24 02:09:35.651343: step: 108/459, loss: 0.16464368999004364 2023-01-24 02:09:36.296620: step: 110/459, loss: 0.11073466390371323 2023-01-24 02:09:36.970623: step: 112/459, loss: 0.4822169244289398 2023-01-24 02:09:37.551709: step: 114/459, loss: 0.10786917805671692 2023-01-24 02:09:38.175473: step: 116/459, loss: 0.08898655325174332 2023-01-24 02:09:38.779070: step: 118/459, loss: 0.105762779712677 2023-01-24 02:09:39.358887: step: 120/459, loss: 0.06101712957024574 2023-01-24 02:09:39.976900: step: 122/459, loss: 0.09032213687896729 2023-01-24 02:09:40.627251: step: 124/459, loss: 0.040036872029304504 2023-01-24 02:09:41.320611: step: 126/459, loss: 0.10270537436008453 2023-01-24 02:09:42.005961: step: 128/459, loss: 0.0167459174990654 2023-01-24 02:09:42.591686: step: 130/459, loss: 0.08457823842763901 2023-01-24 02:09:43.184919: step: 132/459, loss: 0.10497186332941055 2023-01-24 02:09:43.821245: step: 134/459, loss: 0.08408579975366592 2023-01-24 02:09:44.414738: step: 136/459, loss: 0.016146305948495865 2023-01-24 02:09:45.081271: step: 138/459, loss: 0.03297305107116699 2023-01-24 02:09:45.749963: step: 140/459, loss: 0.054273270070552826 2023-01-24 02:09:46.304428: step: 142/459, loss: 0.0511932335793972 2023-01-24 02:09:46.986170: step: 144/459, loss: 0.048122234642505646 2023-01-24 02:09:47.588661: step: 146/459, loss: 0.12248047441244125 2023-01-24 02:09:48.332989: step: 148/459, loss: 0.623723566532135 2023-01-24 02:09:49.010285: step: 150/459, loss: 0.10519196838140488 2023-01-24 02:09:49.660396: step: 152/459, loss: 0.055708784610033035 2023-01-24 02:09:50.258603: step: 154/459, loss: 0.051616400480270386 2023-01-24 02:09:50.874600: step: 156/459, loss: 0.02640586905181408 2023-01-24 02:09:51.498916: step: 158/459, loss: 0.07304804772138596 2023-01-24 02:09:52.168771: step: 160/459, loss: 0.16465741395950317 2023-01-24 02:09:52.732384: step: 162/459, loss: 0.09279470145702362 2023-01-24 02:09:53.325598: step: 164/459, loss: 0.0971711277961731 2023-01-24 02:09:53.964537: step: 166/459, loss: 0.013417240232229233 2023-01-24 02:09:54.603794: step: 168/459, loss: 0.1909426897764206 2023-01-24 02:09:55.227623: step: 170/459, loss: 0.04454432800412178 2023-01-24 02:09:55.851457: step: 172/459, loss: 0.0822724848985672 2023-01-24 02:09:56.493407: step: 174/459, loss: 0.046097494661808014 2023-01-24 02:09:57.091687: step: 176/459, loss: 0.10182993859052658 2023-01-24 02:09:57.740742: step: 178/459, loss: 0.2735452353954315 2023-01-24 02:09:58.327900: step: 180/459, loss: 0.0720914900302887 2023-01-24 02:09:58.916522: step: 182/459, loss: 0.032329410314559937 2023-01-24 02:09:59.531855: step: 184/459, loss: 0.06305782496929169 2023-01-24 02:10:00.174670: step: 186/459, loss: 0.06109801307320595 2023-01-24 02:10:00.802925: step: 188/459, loss: 0.12039431929588318 2023-01-24 02:10:01.431505: step: 190/459, loss: 0.06010859087109566 2023-01-24 02:10:02.011560: step: 192/459, loss: 0.025670738890767097 2023-01-24 02:10:02.606534: step: 194/459, loss: 0.028268590569496155 2023-01-24 02:10:03.252197: step: 196/459, loss: 0.010709376074373722 2023-01-24 02:10:03.811217: step: 198/459, loss: 0.02102375403046608 2023-01-24 02:10:04.397793: step: 200/459, loss: 0.010205554775893688 2023-01-24 02:10:04.994525: step: 202/459, loss: 0.013999762944877148 2023-01-24 02:10:05.676819: step: 204/459, loss: 0.06063319370150566 2023-01-24 02:10:06.291679: step: 206/459, loss: 0.09702546894550323 2023-01-24 02:10:06.947438: step: 208/459, loss: 0.05307465046644211 2023-01-24 02:10:07.506468: step: 210/459, loss: 0.09326665103435516 2023-01-24 02:10:08.123454: step: 212/459, loss: 0.04474213346838951 2023-01-24 02:10:08.731126: step: 214/459, loss: 0.46350324153900146 2023-01-24 02:10:09.397350: step: 216/459, loss: 1.4491281509399414 2023-01-24 02:10:10.036946: step: 218/459, loss: 0.09353916347026825 2023-01-24 02:10:10.642362: step: 220/459, loss: 0.04725097492337227 2023-01-24 02:10:11.229110: step: 222/459, loss: 0.12393723428249359 2023-01-24 02:10:11.825138: step: 224/459, loss: 0.028627432882785797 2023-01-24 02:10:12.459383: step: 226/459, loss: 0.09343075007200241 2023-01-24 02:10:13.105705: step: 228/459, loss: 0.4199906587600708 2023-01-24 02:10:13.748337: step: 230/459, loss: 0.036933500319719315 2023-01-24 02:10:14.404946: step: 232/459, loss: 0.052467942237854004 2023-01-24 02:10:15.024157: step: 234/459, loss: 0.15153734385967255 2023-01-24 02:10:15.609513: step: 236/459, loss: 0.06361532211303711 2023-01-24 02:10:16.177582: step: 238/459, loss: 0.07768677920103073 2023-01-24 02:10:16.772526: step: 240/459, loss: 0.039046816527843475 2023-01-24 02:10:17.401648: step: 242/459, loss: 0.13897506892681122 2023-01-24 02:10:18.027156: step: 244/459, loss: 0.09441661834716797 2023-01-24 02:10:18.646674: step: 246/459, loss: 0.031246041879057884 2023-01-24 02:10:19.294703: step: 248/459, loss: 0.251242458820343 2023-01-24 02:10:20.022638: step: 250/459, loss: 0.06872815638780594 2023-01-24 02:10:20.601265: step: 252/459, loss: 1.6685680150985718 2023-01-24 02:10:21.200429: step: 254/459, loss: 0.07157920300960541 2023-01-24 02:10:21.889307: step: 256/459, loss: 0.09108021855354309 2023-01-24 02:10:22.470169: step: 258/459, loss: 0.17249703407287598 2023-01-24 02:10:23.087680: step: 260/459, loss: 0.08050550520420074 2023-01-24 02:10:23.806518: step: 262/459, loss: 0.0625477284193039 2023-01-24 02:10:24.483083: step: 264/459, loss: 0.03974320366978645 2023-01-24 02:10:25.102034: step: 266/459, loss: 0.03668951988220215 2023-01-24 02:10:25.722183: step: 268/459, loss: 0.01776174083352089 2023-01-24 02:10:26.312619: step: 270/459, loss: 0.16315367817878723 2023-01-24 02:10:26.961103: step: 272/459, loss: 0.14852692186832428 2023-01-24 02:10:27.590082: step: 274/459, loss: 0.044340793043375015 2023-01-24 02:10:28.206355: step: 276/459, loss: 0.1601134091615677 2023-01-24 02:10:28.861328: step: 278/459, loss: 0.14793793857097626 2023-01-24 02:10:29.428417: step: 280/459, loss: 0.03840414062142372 2023-01-24 02:10:30.025169: step: 282/459, loss: 0.1302376091480255 2023-01-24 02:10:30.609942: step: 284/459, loss: 0.12639309465885162 2023-01-24 02:10:31.180423: step: 286/459, loss: 0.03411242365837097 2023-01-24 02:10:31.734338: step: 288/459, loss: 0.01305207796394825 2023-01-24 02:10:32.361120: step: 290/459, loss: 0.12014467269182205 2023-01-24 02:10:32.953722: step: 292/459, loss: 0.024230530485510826 2023-01-24 02:10:33.605564: step: 294/459, loss: 0.07831723242998123 2023-01-24 02:10:34.316599: step: 296/459, loss: 0.3378256559371948 2023-01-24 02:10:34.889013: step: 298/459, loss: 0.014373361133038998 2023-01-24 02:10:35.512990: step: 300/459, loss: 0.07347707450389862 2023-01-24 02:10:36.140604: step: 302/459, loss: 0.13864287734031677 2023-01-24 02:10:36.783829: step: 304/459, loss: 0.03191189467906952 2023-01-24 02:10:37.399082: step: 306/459, loss: 0.046855952590703964 2023-01-24 02:10:38.132935: step: 308/459, loss: 0.2673243284225464 2023-01-24 02:10:38.759001: step: 310/459, loss: 0.08154354244470596 2023-01-24 02:10:39.411574: step: 312/459, loss: 0.09015719592571259 2023-01-24 02:10:40.011518: step: 314/459, loss: 0.10716946423053741 2023-01-24 02:10:40.625180: step: 316/459, loss: 0.10369449108839035 2023-01-24 02:10:41.291113: step: 318/459, loss: 0.24734295904636383 2023-01-24 02:10:41.953307: step: 320/459, loss: 0.6796312928199768 2023-01-24 02:10:42.597046: step: 322/459, loss: 0.10343334078788757 2023-01-24 02:10:43.250464: step: 324/459, loss: 0.20953302085399628 2023-01-24 02:10:43.849733: step: 326/459, loss: 0.8634041547775269 2023-01-24 02:10:44.470470: step: 328/459, loss: 0.15839160978794098 2023-01-24 02:10:45.035195: step: 330/459, loss: 0.06026395410299301 2023-01-24 02:10:45.611730: step: 332/459, loss: 0.12758509814739227 2023-01-24 02:10:46.203204: step: 334/459, loss: 0.07899324595928192 2023-01-24 02:10:46.819868: step: 336/459, loss: 0.2506687045097351 2023-01-24 02:10:47.391739: step: 338/459, loss: 0.1152324229478836 2023-01-24 02:10:48.060880: step: 340/459, loss: 0.07790293544530869 2023-01-24 02:10:48.658033: step: 342/459, loss: 0.18366481363773346 2023-01-24 02:10:49.294715: step: 344/459, loss: 0.22912512719631195 2023-01-24 02:10:49.866364: step: 346/459, loss: 0.0358346626162529 2023-01-24 02:10:50.522641: step: 348/459, loss: 0.4236759543418884 2023-01-24 02:10:51.208160: step: 350/459, loss: 0.027342965826392174 2023-01-24 02:10:51.803173: step: 352/459, loss: 0.02166707254946232 2023-01-24 02:10:52.439597: step: 354/459, loss: 0.06232389062643051 2023-01-24 02:10:53.052465: step: 356/459, loss: 0.039982691407203674 2023-01-24 02:10:53.626849: step: 358/459, loss: 0.05154063552618027 2023-01-24 02:10:54.259383: step: 360/459, loss: 0.03367205709218979 2023-01-24 02:10:54.866656: step: 362/459, loss: 0.0737287700176239 2023-01-24 02:10:55.440572: step: 364/459, loss: 0.03722355514764786 2023-01-24 02:10:56.061988: step: 366/459, loss: 0.01532907783985138 2023-01-24 02:10:56.695676: step: 368/459, loss: 0.009699760004878044 2023-01-24 02:10:57.328860: step: 370/459, loss: 0.05487627536058426 2023-01-24 02:10:57.870430: step: 372/459, loss: 0.10910601168870926 2023-01-24 02:10:58.444710: step: 374/459, loss: 0.11488139629364014 2023-01-24 02:10:59.064582: step: 376/459, loss: 0.05870002135634422 2023-01-24 02:10:59.609130: step: 378/459, loss: 0.3448238670825958 2023-01-24 02:11:00.250675: step: 380/459, loss: 0.907960057258606 2023-01-24 02:11:00.928333: step: 382/459, loss: 0.40306442975997925 2023-01-24 02:11:01.549752: step: 384/459, loss: 0.08667413890361786 2023-01-24 02:11:02.140796: step: 386/459, loss: 0.11188200116157532 2023-01-24 02:11:02.749274: step: 388/459, loss: 0.1242564395070076 2023-01-24 02:11:03.360906: step: 390/459, loss: 0.06850863993167877 2023-01-24 02:11:03.988435: step: 392/459, loss: 0.12430678308010101 2023-01-24 02:11:04.616183: step: 394/459, loss: 0.04329359903931618 2023-01-24 02:11:05.192899: step: 396/459, loss: 0.0378931500017643 2023-01-24 02:11:05.828531: step: 398/459, loss: 0.21393896639347076 2023-01-24 02:11:06.442448: step: 400/459, loss: 0.061272937804460526 2023-01-24 02:11:07.042824: step: 402/459, loss: 0.12339387834072113 2023-01-24 02:11:07.692024: step: 404/459, loss: 0.03447604551911354 2023-01-24 02:11:08.262928: step: 406/459, loss: 0.030484095215797424 2023-01-24 02:11:08.875584: step: 408/459, loss: 0.17272557318210602 2023-01-24 02:11:09.436061: step: 410/459, loss: 0.17757295072078705 2023-01-24 02:11:10.101356: step: 412/459, loss: 0.27297481894493103 2023-01-24 02:11:10.758402: step: 414/459, loss: 0.05134124681353569 2023-01-24 02:11:11.315230: step: 416/459, loss: 0.02276548743247986 2023-01-24 02:11:11.931486: step: 418/459, loss: 0.030830014497041702 2023-01-24 02:11:12.561480: step: 420/459, loss: 0.006846593227237463 2023-01-24 02:11:13.182159: step: 422/459, loss: 0.059731993824243546 2023-01-24 02:11:13.892645: step: 424/459, loss: 0.012153912335634232 2023-01-24 02:11:14.520277: step: 426/459, loss: 0.16158223152160645 2023-01-24 02:11:15.109791: step: 428/459, loss: 0.013866973109543324 2023-01-24 02:11:15.773247: step: 430/459, loss: 0.8400973677635193 2023-01-24 02:11:16.355893: step: 432/459, loss: 0.1180863231420517 2023-01-24 02:11:16.979490: step: 434/459, loss: 0.012854097411036491 2023-01-24 02:11:17.548284: step: 436/459, loss: 0.027588527649641037 2023-01-24 02:11:18.248107: step: 438/459, loss: 0.03459269180893898 2023-01-24 02:11:18.881398: step: 440/459, loss: 0.014368250966072083 2023-01-24 02:11:19.503723: step: 442/459, loss: 0.031756479293107986 2023-01-24 02:11:20.224102: step: 444/459, loss: 0.09058498591184616 2023-01-24 02:11:20.867947: step: 446/459, loss: 0.012751544825732708 2023-01-24 02:11:21.471183: step: 448/459, loss: 0.024569755420088768 2023-01-24 02:11:22.129114: step: 450/459, loss: 0.04515725374221802 2023-01-24 02:11:22.753964: step: 452/459, loss: 0.538608729839325 2023-01-24 02:11:23.368895: step: 454/459, loss: 0.010411237366497517 2023-01-24 02:11:24.036340: step: 456/459, loss: 0.07525904476642609 2023-01-24 02:11:24.651883: step: 458/459, loss: 0.088095523416996 2023-01-24 02:11:25.202229: step: 460/459, loss: 0.03918806090950966 2023-01-24 02:11:25.798109: step: 462/459, loss: 0.9005579948425293 2023-01-24 02:11:26.415837: step: 464/459, loss: 0.0622355192899704 2023-01-24 02:11:27.029174: step: 466/459, loss: 0.10376790910959244 2023-01-24 02:11:27.631735: step: 468/459, loss: 0.09071056544780731 2023-01-24 02:11:28.222068: step: 470/459, loss: 1.1271142959594727 2023-01-24 02:11:28.869372: step: 472/459, loss: 0.1307099312543869 2023-01-24 02:11:29.569322: step: 474/459, loss: 0.6417203545570374 2023-01-24 02:11:30.260508: step: 476/459, loss: 0.0656098872423172 2023-01-24 02:11:30.846970: step: 478/459, loss: 0.02642812766134739 2023-01-24 02:11:31.461274: step: 480/459, loss: 0.025968484580516815 2023-01-24 02:11:32.046357: step: 482/459, loss: 0.06565985083580017 2023-01-24 02:11:32.591877: step: 484/459, loss: 0.12358145415782928 2023-01-24 02:11:33.215413: step: 486/459, loss: 0.3458564877510071 2023-01-24 02:11:33.814549: step: 488/459, loss: 0.0017942443955689669 2023-01-24 02:11:34.386923: step: 490/459, loss: 0.0719977542757988 2023-01-24 02:11:34.950298: step: 492/459, loss: 0.010776638984680176 2023-01-24 02:11:35.502862: step: 494/459, loss: 0.16108338534832 2023-01-24 02:11:36.022059: step: 496/459, loss: 0.04110371693968773 2023-01-24 02:11:36.598120: step: 498/459, loss: 0.0980057492852211 2023-01-24 02:11:37.298271: step: 500/459, loss: 1.9842379093170166 2023-01-24 02:11:37.915321: step: 502/459, loss: 0.01712704449892044 2023-01-24 02:11:38.563339: step: 504/459, loss: 0.6701747179031372 2023-01-24 02:11:39.200706: step: 506/459, loss: 0.7044492363929749 2023-01-24 02:11:39.780397: step: 508/459, loss: 0.19316411018371582 2023-01-24 02:11:40.371967: step: 510/459, loss: 0.2237125039100647 2023-01-24 02:11:40.957949: step: 512/459, loss: 0.043580010533332825 2023-01-24 02:11:41.578765: step: 514/459, loss: 0.20655123889446259 2023-01-24 02:11:42.145773: step: 516/459, loss: 0.1630714386701584 2023-01-24 02:11:42.717939: step: 518/459, loss: 0.12124571204185486 2023-01-24 02:11:43.307860: step: 520/459, loss: 0.007585718296468258 2023-01-24 02:11:43.925177: step: 522/459, loss: 0.23828403651714325 2023-01-24 02:11:44.549877: step: 524/459, loss: 0.02057051658630371 2023-01-24 02:11:45.164955: step: 526/459, loss: 0.0716228112578392 2023-01-24 02:11:45.823984: step: 528/459, loss: 0.06481881439685822 2023-01-24 02:11:46.454246: step: 530/459, loss: 0.09685064852237701 2023-01-24 02:11:47.055288: step: 532/459, loss: 0.08366868644952774 2023-01-24 02:11:47.634603: step: 534/459, loss: 1.1870650053024292 2023-01-24 02:11:48.287052: step: 536/459, loss: 0.07621609419584274 2023-01-24 02:11:48.899986: step: 538/459, loss: 0.10012846440076828 2023-01-24 02:11:49.600164: step: 540/459, loss: 0.03262370452284813 2023-01-24 02:11:50.203153: step: 542/459, loss: 0.050166450440883636 2023-01-24 02:11:50.815623: step: 544/459, loss: 0.10699218511581421 2023-01-24 02:11:51.509080: step: 546/459, loss: 0.0795406848192215 2023-01-24 02:11:52.104719: step: 548/459, loss: 0.13801270723342896 2023-01-24 02:11:52.684337: step: 550/459, loss: 0.02724553272128105 2023-01-24 02:11:53.268235: step: 552/459, loss: 0.6094252467155457 2023-01-24 02:11:53.884621: step: 554/459, loss: 0.03245340287685394 2023-01-24 02:11:54.558931: step: 556/459, loss: 0.16825851798057556 2023-01-24 02:11:55.174013: step: 558/459, loss: 0.084275983273983 2023-01-24 02:11:55.788777: step: 560/459, loss: 0.07699065655469894 2023-01-24 02:11:56.411540: step: 562/459, loss: 0.11920484900474548 2023-01-24 02:11:57.103255: step: 564/459, loss: 0.04193229600787163 2023-01-24 02:11:57.712043: step: 566/459, loss: 0.4942529797554016 2023-01-24 02:11:58.330553: step: 568/459, loss: 0.048310015350580215 2023-01-24 02:11:58.990427: step: 570/459, loss: 0.31177929043769836 2023-01-24 02:11:59.569485: step: 572/459, loss: 0.14262011647224426 2023-01-24 02:12:00.130931: step: 574/459, loss: 0.11387825757265091 2023-01-24 02:12:00.810232: step: 576/459, loss: 0.03187757730484009 2023-01-24 02:12:01.425959: step: 578/459, loss: 0.03206734359264374 2023-01-24 02:12:02.049396: step: 580/459, loss: 0.061331357806921005 2023-01-24 02:12:02.744173: step: 582/459, loss: 0.07416746765375137 2023-01-24 02:12:03.408462: step: 584/459, loss: 0.09677277505397797 2023-01-24 02:12:04.043155: step: 586/459, loss: 0.09000352025032043 2023-01-24 02:12:04.688565: step: 588/459, loss: 0.05713291093707085 2023-01-24 02:12:05.302781: step: 590/459, loss: 0.021915238350629807 2023-01-24 02:12:05.902637: step: 592/459, loss: 0.03861263766884804 2023-01-24 02:12:06.496653: step: 594/459, loss: 0.5308077931404114 2023-01-24 02:12:07.086886: step: 596/459, loss: 0.024995706975460052 2023-01-24 02:12:07.675757: step: 598/459, loss: 0.018737884238362312 2023-01-24 02:12:08.356411: step: 600/459, loss: 0.11769544333219528 2023-01-24 02:12:08.950260: step: 602/459, loss: 0.0570979118347168 2023-01-24 02:12:09.564119: step: 604/459, loss: 0.0655985176563263 2023-01-24 02:12:10.107381: step: 606/459, loss: 0.05712244659662247 2023-01-24 02:12:10.768042: step: 608/459, loss: 0.27267658710479736 2023-01-24 02:12:11.341462: step: 610/459, loss: 0.05626314505934715 2023-01-24 02:12:11.935116: step: 612/459, loss: 0.06160359084606171 2023-01-24 02:12:12.501569: step: 614/459, loss: 0.030153628438711166 2023-01-24 02:12:13.147485: step: 616/459, loss: 0.05971634387969971 2023-01-24 02:12:13.756435: step: 618/459, loss: 0.2541730999946594 2023-01-24 02:12:14.371377: step: 620/459, loss: 0.0824403166770935 2023-01-24 02:12:14.937701: step: 622/459, loss: 0.026315705850720406 2023-01-24 02:12:15.555202: step: 624/459, loss: 0.04229848459362984 2023-01-24 02:12:16.139971: step: 626/459, loss: 0.109538733959198 2023-01-24 02:12:16.738943: step: 628/459, loss: 0.03269818425178528 2023-01-24 02:12:17.331736: step: 630/459, loss: 0.06686903536319733 2023-01-24 02:12:17.927004: step: 632/459, loss: 0.090690977871418 2023-01-24 02:12:18.518643: step: 634/459, loss: 0.09941346943378448 2023-01-24 02:12:19.134669: step: 636/459, loss: 0.15237931907176971 2023-01-24 02:12:19.784470: step: 638/459, loss: 0.026036452502012253 2023-01-24 02:12:20.416692: step: 640/459, loss: 0.05737706273794174 2023-01-24 02:12:21.062789: step: 642/459, loss: 0.14413554966449738 2023-01-24 02:12:21.681774: step: 644/459, loss: 0.03420555964112282 2023-01-24 02:12:22.278533: step: 646/459, loss: 3.5683486461639404 2023-01-24 02:12:22.988141: step: 648/459, loss: 0.07318519800901413 2023-01-24 02:12:23.578598: step: 650/459, loss: 0.07195789366960526 2023-01-24 02:12:24.103078: step: 652/459, loss: 0.03785356879234314 2023-01-24 02:12:24.743111: step: 654/459, loss: 0.05621008202433586 2023-01-24 02:12:25.327754: step: 656/459, loss: 0.028407329693436623 2023-01-24 02:12:25.964573: step: 658/459, loss: 0.06853795051574707 2023-01-24 02:12:26.552179: step: 660/459, loss: 0.04672542214393616 2023-01-24 02:12:27.090416: step: 662/459, loss: 0.020425360649824142 2023-01-24 02:12:27.640860: step: 664/459, loss: 0.005562279839068651 2023-01-24 02:12:28.225763: step: 666/459, loss: 0.7817181348800659 2023-01-24 02:12:28.871953: step: 668/459, loss: 0.10454368591308594 2023-01-24 02:12:29.463715: step: 670/459, loss: 0.02383536472916603 2023-01-24 02:12:30.033249: step: 672/459, loss: 0.04615335538983345 2023-01-24 02:12:30.671095: step: 674/459, loss: 0.22089464962482452 2023-01-24 02:12:31.274803: step: 676/459, loss: 0.0370873287320137 2023-01-24 02:12:31.900395: step: 678/459, loss: 0.3219626545906067 2023-01-24 02:12:32.547433: step: 680/459, loss: 0.05795430392026901 2023-01-24 02:12:33.175068: step: 682/459, loss: 0.14761821925640106 2023-01-24 02:12:33.751162: step: 684/459, loss: 0.06729243695735931 2023-01-24 02:12:34.400641: step: 686/459, loss: 0.019455241039395332 2023-01-24 02:12:34.996820: step: 688/459, loss: 0.2557271420955658 2023-01-24 02:12:35.578386: step: 690/459, loss: 0.023343723267316818 2023-01-24 02:12:36.200836: step: 692/459, loss: 0.09126888960599899 2023-01-24 02:12:36.861245: step: 694/459, loss: 3.302541732788086 2023-01-24 02:12:37.499841: step: 696/459, loss: 0.15810053050518036 2023-01-24 02:12:38.081683: step: 698/459, loss: 0.18989631533622742 2023-01-24 02:12:38.692259: step: 700/459, loss: 0.07024659961462021 2023-01-24 02:12:39.335454: step: 702/459, loss: 0.05286220833659172 2023-01-24 02:12:39.995265: step: 704/459, loss: 0.1263725608587265 2023-01-24 02:12:40.628807: step: 706/459, loss: 0.050269488245248795 2023-01-24 02:12:41.300399: step: 708/459, loss: 0.011672346852719784 2023-01-24 02:12:41.951919: step: 710/459, loss: 0.08076886832714081 2023-01-24 02:12:42.545137: step: 712/459, loss: 0.11434967070817947 2023-01-24 02:12:43.117478: step: 714/459, loss: 0.08247226476669312 2023-01-24 02:12:43.815457: step: 716/459, loss: 0.1447485089302063 2023-01-24 02:12:44.432999: step: 718/459, loss: 0.3271235525608063 2023-01-24 02:12:45.038323: step: 720/459, loss: 0.11658532172441483 2023-01-24 02:12:45.585370: step: 722/459, loss: 0.05055231600999832 2023-01-24 02:12:46.320185: step: 724/459, loss: 0.11874014139175415 2023-01-24 02:12:46.915011: step: 726/459, loss: 0.2921452224254608 2023-01-24 02:12:47.628949: step: 728/459, loss: 0.11985394358634949 2023-01-24 02:12:48.239677: step: 730/459, loss: 0.1724495142698288 2023-01-24 02:12:48.859615: step: 732/459, loss: 0.1039980798959732 2023-01-24 02:12:49.496766: step: 734/459, loss: 0.06251256912946701 2023-01-24 02:12:50.123628: step: 736/459, loss: 0.08507806062698364 2023-01-24 02:12:50.765729: step: 738/459, loss: 0.29526662826538086 2023-01-24 02:12:51.392197: step: 740/459, loss: 0.06125098839402199 2023-01-24 02:12:52.025433: step: 742/459, loss: 0.05526433512568474 2023-01-24 02:12:52.701913: step: 744/459, loss: 7.055176734924316 2023-01-24 02:12:53.297961: step: 746/459, loss: 0.02699962444603443 2023-01-24 02:12:53.857564: step: 748/459, loss: 0.04508724808692932 2023-01-24 02:12:54.514356: step: 750/459, loss: 0.16921484470367432 2023-01-24 02:12:55.147349: step: 752/459, loss: 0.10099383443593979 2023-01-24 02:12:55.762503: step: 754/459, loss: 0.2237633764743805 2023-01-24 02:12:56.375056: step: 756/459, loss: 0.07439128309488297 2023-01-24 02:12:57.005211: step: 758/459, loss: 0.05535467714071274 2023-01-24 02:12:57.608521: step: 760/459, loss: 0.018714092671871185 2023-01-24 02:12:58.220145: step: 762/459, loss: 0.029250890016555786 2023-01-24 02:12:58.770617: step: 764/459, loss: 0.1567198783159256 2023-01-24 02:12:59.389912: step: 766/459, loss: 0.03020903468132019 2023-01-24 02:13:00.012633: step: 768/459, loss: 0.10689166188240051 2023-01-24 02:13:00.592731: step: 770/459, loss: 0.07049798965454102 2023-01-24 02:13:01.227020: step: 772/459, loss: 0.06988490372896194 2023-01-24 02:13:01.881573: step: 774/459, loss: 0.10741022974252701 2023-01-24 02:13:02.468158: step: 776/459, loss: 0.24830171465873718 2023-01-24 02:13:03.046805: step: 778/459, loss: 0.12741389870643616 2023-01-24 02:13:03.677720: step: 780/459, loss: 0.342180997133255 2023-01-24 02:13:04.296573: step: 782/459, loss: 0.0340384803712368 2023-01-24 02:13:04.945133: step: 784/459, loss: 0.10137978196144104 2023-01-24 02:13:05.585236: step: 786/459, loss: 0.05287212133407593 2023-01-24 02:13:06.194007: step: 788/459, loss: 0.08830134570598602 2023-01-24 02:13:06.809814: step: 790/459, loss: 0.3623201251029968 2023-01-24 02:13:07.422560: step: 792/459, loss: 0.07708245515823364 2023-01-24 02:13:08.016494: step: 794/459, loss: 0.23484663665294647 2023-01-24 02:13:08.621972: step: 796/459, loss: 0.019112618640065193 2023-01-24 02:13:09.294717: step: 798/459, loss: 0.11403153836727142 2023-01-24 02:13:09.930654: step: 800/459, loss: 0.04532870277762413 2023-01-24 02:13:10.535365: step: 802/459, loss: 0.06421814113855362 2023-01-24 02:13:11.136242: step: 804/459, loss: 0.026776708662509918 2023-01-24 02:13:11.760886: step: 806/459, loss: 0.0696449875831604 2023-01-24 02:13:12.392793: step: 808/459, loss: 0.03568470478057861 2023-01-24 02:13:12.995249: step: 810/459, loss: 0.0422649160027504 2023-01-24 02:13:13.610499: step: 812/459, loss: 0.06878703832626343 2023-01-24 02:13:14.192749: step: 814/459, loss: 0.1169789507985115 2023-01-24 02:13:14.884053: step: 816/459, loss: 0.32632604241371155 2023-01-24 02:13:15.560086: step: 818/459, loss: 0.15199817717075348 2023-01-24 02:13:16.121226: step: 820/459, loss: 0.12229562550783157 2023-01-24 02:13:16.712299: step: 822/459, loss: 0.21295574307441711 2023-01-24 02:13:17.353505: step: 824/459, loss: 0.07996854931116104 2023-01-24 02:13:17.996867: step: 826/459, loss: 0.11264877766370773 2023-01-24 02:13:18.614641: step: 828/459, loss: 0.2981933653354645 2023-01-24 02:13:19.248224: step: 830/459, loss: 0.14042028784751892 2023-01-24 02:13:19.884415: step: 832/459, loss: 0.07054133713245392 2023-01-24 02:13:20.549355: step: 834/459, loss: 0.2658541202545166 2023-01-24 02:13:21.174223: step: 836/459, loss: 0.06687168776988983 2023-01-24 02:13:21.794549: step: 838/459, loss: 0.03813822194933891 2023-01-24 02:13:22.390750: step: 840/459, loss: 0.07955422252416611 2023-01-24 02:13:22.968230: step: 842/459, loss: 0.14613060653209686 2023-01-24 02:13:23.563399: step: 844/459, loss: 0.15755102038383484 2023-01-24 02:13:24.214429: step: 846/459, loss: 0.11665306985378265 2023-01-24 02:13:24.817344: step: 848/459, loss: 0.29114267230033875 2023-01-24 02:13:25.463476: step: 850/459, loss: 0.07266083359718323 2023-01-24 02:13:26.034348: step: 852/459, loss: 0.20188665390014648 2023-01-24 02:13:26.639982: step: 854/459, loss: 0.034362029284238815 2023-01-24 02:13:27.284858: step: 856/459, loss: 0.11382361501455307 2023-01-24 02:13:27.879247: step: 858/459, loss: 0.07874472439289093 2023-01-24 02:13:28.557252: step: 860/459, loss: 0.0427146777510643 2023-01-24 02:13:29.184341: step: 862/459, loss: 0.05386700481176376 2023-01-24 02:13:29.897046: step: 864/459, loss: 0.4725996255874634 2023-01-24 02:13:30.581096: step: 866/459, loss: 0.09462841600179672 2023-01-24 02:13:31.144580: step: 868/459, loss: 0.06547026336193085 2023-01-24 02:13:31.729869: step: 870/459, loss: 0.07499255985021591 2023-01-24 02:13:32.338347: step: 872/459, loss: 0.17000456154346466 2023-01-24 02:13:32.955587: step: 874/459, loss: 0.16830797493457794 2023-01-24 02:13:33.552889: step: 876/459, loss: 0.1058272123336792 2023-01-24 02:13:34.214137: step: 878/459, loss: 0.07510938495397568 2023-01-24 02:13:34.856376: step: 880/459, loss: 0.18140915036201477 2023-01-24 02:13:35.502862: step: 882/459, loss: 0.08217549324035645 2023-01-24 02:13:36.163969: step: 884/459, loss: 0.26564091444015503 2023-01-24 02:13:36.848817: step: 886/459, loss: 0.07682933658361435 2023-01-24 02:13:37.439041: step: 888/459, loss: 0.09926269948482513 2023-01-24 02:13:38.063917: step: 890/459, loss: 0.6898413300514221 2023-01-24 02:13:38.624099: step: 892/459, loss: 0.05102268606424332 2023-01-24 02:13:39.272760: step: 894/459, loss: 0.025307053700089455 2023-01-24 02:13:39.854265: step: 896/459, loss: 0.09399547427892685 2023-01-24 02:13:40.453580: step: 898/459, loss: 0.23830318450927734 2023-01-24 02:13:41.052115: step: 900/459, loss: 0.021852262318134308 2023-01-24 02:13:41.661632: step: 902/459, loss: 0.0554911270737648 2023-01-24 02:13:42.333275: step: 904/459, loss: 0.07519907504320145 2023-01-24 02:13:42.933179: step: 906/459, loss: 0.047604817897081375 2023-01-24 02:13:43.619707: step: 908/459, loss: 0.04604256525635719 2023-01-24 02:13:44.255984: step: 910/459, loss: 0.16952566802501678 2023-01-24 02:13:44.905996: step: 912/459, loss: 0.7971755862236023 2023-01-24 02:13:45.484690: step: 914/459, loss: 0.082918182015419 2023-01-24 02:13:46.114227: step: 916/459, loss: 2.763888120651245 2023-01-24 02:13:46.772185: step: 918/459, loss: 0.2912076711654663 2023-01-24 02:13:47.236756: step: 920/459, loss: 0.001495184376835823 ================================================== Loss: 0.174 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3384285056160056, 'r': 0.3326488916681042, 'f1': 0.33551381035232714}, 'combined': 0.24722070236487262, 'epoch': 17} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3417895712062956, 'r': 0.28989969994133985, 'f1': 0.3137133988543766}, 'combined': 0.20077657526680098, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32956535196321185, 'r': 0.3214356563739865, 'f1': 0.32544974238057806}, 'combined': 0.2398050733330575, 'epoch': 17} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34769222991403836, 'r': 0.28542371237488784, 'f1': 0.3134958398525977}, 'combined': 0.2006373375056625, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.350675739907472, 'r': 0.33869820040399096, 'f1': 0.3445829181716279}, 'combined': 0.25390320286330476, 'epoch': 17} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3485205313218174, 'r': 0.3075368043022053, 'f1': 0.3267485465564351}, 'combined': 0.23427254281404783, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2883771929824561, 'r': 0.31309523809523804, 'f1': 0.30022831050228305}, 'combined': 0.20015220700152203, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.20535714285714285, 'r': 0.25, 'f1': 0.22549019607843138}, 'combined': 0.11274509803921569, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2222222222222222, 'r': 0.06896551724137931, 'f1': 0.10526315789473684}, 'combined': 0.07017543859649122, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 18 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:16:22.145264: step: 2/459, loss: 0.033610858023166656 2023-01-24 02:16:22.760646: step: 4/459, loss: 0.04571382701396942 2023-01-24 02:16:23.331595: step: 6/459, loss: 0.04249054566025734 2023-01-24 02:16:23.985935: step: 8/459, loss: 0.075380340218544 2023-01-24 02:16:24.581137: step: 10/459, loss: 0.04506218060851097 2023-01-24 02:16:25.236283: step: 12/459, loss: 0.027242545038461685 2023-01-24 02:16:25.855302: step: 14/459, loss: 0.04616411775350571 2023-01-24 02:16:26.507455: step: 16/459, loss: 0.029716044664382935 2023-01-24 02:16:27.120490: step: 18/459, loss: 0.07503177225589752 2023-01-24 02:16:27.661133: step: 20/459, loss: 0.1350172758102417 2023-01-24 02:16:28.300391: step: 22/459, loss: 0.04845882207155228 2023-01-24 02:16:28.947240: step: 24/459, loss: 0.013467568904161453 2023-01-24 02:16:29.570298: step: 26/459, loss: 0.07994174212217331 2023-01-24 02:16:30.253224: step: 28/459, loss: 0.050321388989686966 2023-01-24 02:16:30.859487: step: 30/459, loss: 0.028226200491189957 2023-01-24 02:16:31.489710: step: 32/459, loss: 0.03172803297638893 2023-01-24 02:16:32.073039: step: 34/459, loss: 0.048319198191165924 2023-01-24 02:16:32.719305: step: 36/459, loss: 0.06866500526666641 2023-01-24 02:16:33.359040: step: 38/459, loss: 0.17881987988948822 2023-01-24 02:16:33.956225: step: 40/459, loss: 0.13652265071868896 2023-01-24 02:16:34.589787: step: 42/459, loss: 0.15351280570030212 2023-01-24 02:16:35.150380: step: 44/459, loss: 0.0637209564447403 2023-01-24 02:16:35.900908: step: 46/459, loss: 0.8192552924156189 2023-01-24 02:16:36.494427: step: 48/459, loss: 0.06957622617483139 2023-01-24 02:16:37.173213: step: 50/459, loss: 0.07720775157213211 2023-01-24 02:16:37.853572: step: 52/459, loss: 0.08914695680141449 2023-01-24 02:16:38.472797: step: 54/459, loss: 0.05873541533946991 2023-01-24 02:16:39.104563: step: 56/459, loss: 0.12930922210216522 2023-01-24 02:16:39.703170: step: 58/459, loss: 0.09303481131792068 2023-01-24 02:16:40.290827: step: 60/459, loss: 0.20358437299728394 2023-01-24 02:16:40.830650: step: 62/459, loss: 0.01356075145304203 2023-01-24 02:16:41.473359: step: 64/459, loss: 0.05714920908212662 2023-01-24 02:16:42.013546: step: 66/459, loss: 0.0759505107998848 2023-01-24 02:16:42.631704: step: 68/459, loss: 0.0320504754781723 2023-01-24 02:16:43.275545: step: 70/459, loss: 0.08214043080806732 2023-01-24 02:16:43.912882: step: 72/459, loss: 0.06813124567270279 2023-01-24 02:16:44.464035: step: 74/459, loss: 0.09314986318349838 2023-01-24 02:16:45.136987: step: 76/459, loss: 0.4366767704486847 2023-01-24 02:16:45.738074: step: 78/459, loss: 0.15788783133029938 2023-01-24 02:16:46.375425: step: 80/459, loss: 0.12062712013721466 2023-01-24 02:16:46.989032: step: 82/459, loss: 0.045702338218688965 2023-01-24 02:16:47.593434: step: 84/459, loss: 0.017561284825205803 2023-01-24 02:16:48.200468: step: 86/459, loss: 0.05534899979829788 2023-01-24 02:16:48.795593: step: 88/459, loss: 0.028918515890836716 2023-01-24 02:16:49.446332: step: 90/459, loss: 0.04072954133152962 2023-01-24 02:16:50.033095: step: 92/459, loss: 0.05892883613705635 2023-01-24 02:16:50.632451: step: 94/459, loss: 0.14664243161678314 2023-01-24 02:16:51.283242: step: 96/459, loss: 0.0773751512169838 2023-01-24 02:16:51.956988: step: 98/459, loss: 0.2389359027147293 2023-01-24 02:16:52.702855: step: 100/459, loss: 0.12358778715133667 2023-01-24 02:16:53.352050: step: 102/459, loss: 0.05631176009774208 2023-01-24 02:16:54.003296: step: 104/459, loss: 0.042649608105421066 2023-01-24 02:16:54.656115: step: 106/459, loss: 0.11333596706390381 2023-01-24 02:16:55.232376: step: 108/459, loss: 0.11516864597797394 2023-01-24 02:16:55.842097: step: 110/459, loss: 0.021516216918826103 2023-01-24 02:16:56.425483: step: 112/459, loss: 0.020874885842204094 2023-01-24 02:16:57.032519: step: 114/459, loss: 0.06651577353477478 2023-01-24 02:16:57.597146: step: 116/459, loss: 0.05260429158806801 2023-01-24 02:16:58.243729: step: 118/459, loss: 1.324324131011963 2023-01-24 02:16:58.841957: step: 120/459, loss: 0.10004092752933502 2023-01-24 02:16:59.434350: step: 122/459, loss: 0.028822163119912148 2023-01-24 02:17:00.022925: step: 124/459, loss: 0.07182654738426208 2023-01-24 02:17:00.604849: step: 126/459, loss: 0.053182173520326614 2023-01-24 02:17:01.197409: step: 128/459, loss: 0.04359092563390732 2023-01-24 02:17:01.845159: step: 130/459, loss: 0.9041641354560852 2023-01-24 02:17:02.434547: step: 132/459, loss: 0.4314751625061035 2023-01-24 02:17:03.061374: step: 134/459, loss: 0.08308633416891098 2023-01-24 02:17:03.677730: step: 136/459, loss: 0.08162807673215866 2023-01-24 02:17:04.287531: step: 138/459, loss: 0.06280703097581863 2023-01-24 02:17:04.871021: step: 140/459, loss: 0.10649312287569046 2023-01-24 02:17:05.496196: step: 142/459, loss: 0.35013729333877563 2023-01-24 02:17:06.106942: step: 144/459, loss: 0.08015374094247818 2023-01-24 02:17:06.750706: step: 146/459, loss: 0.04143805801868439 2023-01-24 02:17:07.418531: step: 148/459, loss: 0.15884092450141907 2023-01-24 02:17:08.103027: step: 150/459, loss: 0.08463162928819656 2023-01-24 02:17:08.758322: step: 152/459, loss: 0.23995107412338257 2023-01-24 02:17:09.387763: step: 154/459, loss: 0.022913791239261627 2023-01-24 02:17:09.985402: step: 156/459, loss: 0.13412217795848846 2023-01-24 02:17:10.655093: step: 158/459, loss: 0.09944913536310196 2023-01-24 02:17:11.237913: step: 160/459, loss: 0.038928646594285965 2023-01-24 02:17:11.885002: step: 162/459, loss: 0.11818308383226395 2023-01-24 02:17:12.556688: step: 164/459, loss: 0.00623404560610652 2023-01-24 02:17:13.176435: step: 166/459, loss: 0.06668317317962646 2023-01-24 02:17:13.809375: step: 168/459, loss: 0.07899486273527145 2023-01-24 02:17:14.431481: step: 170/459, loss: 0.11592818051576614 2023-01-24 02:17:15.030924: step: 172/459, loss: 0.016258088871836662 2023-01-24 02:17:15.662152: step: 174/459, loss: 0.40067073702812195 2023-01-24 02:17:16.275379: step: 176/459, loss: 0.04530729353427887 2023-01-24 02:17:16.970971: step: 178/459, loss: 0.002387912245467305 2023-01-24 02:17:17.540938: step: 180/459, loss: 0.12460809201002121 2023-01-24 02:17:18.186529: step: 182/459, loss: 0.0925569161772728 2023-01-24 02:17:18.757052: step: 184/459, loss: 0.7470595836639404 2023-01-24 02:17:19.432070: step: 186/459, loss: 0.04477834701538086 2023-01-24 02:17:20.025529: step: 188/459, loss: 0.1416543871164322 2023-01-24 02:17:20.640445: step: 190/459, loss: 0.06493277847766876 2023-01-24 02:17:21.228274: step: 192/459, loss: 0.039399661123752594 2023-01-24 02:17:21.868981: step: 194/459, loss: 0.08825384080410004 2023-01-24 02:17:22.423423: step: 196/459, loss: 0.025865629315376282 2023-01-24 02:17:23.005602: step: 198/459, loss: 0.0917133241891861 2023-01-24 02:17:23.617242: step: 200/459, loss: 0.03985757380723953 2023-01-24 02:17:24.234857: step: 202/459, loss: 0.06918543577194214 2023-01-24 02:17:24.902246: step: 204/459, loss: 0.10762568563222885 2023-01-24 02:17:25.496065: step: 206/459, loss: 0.05518211051821709 2023-01-24 02:17:26.090278: step: 208/459, loss: 0.4458884000778198 2023-01-24 02:17:26.726936: step: 210/459, loss: 0.578163743019104 2023-01-24 02:17:27.431355: step: 212/459, loss: 0.04086310788989067 2023-01-24 02:17:28.045860: step: 214/459, loss: 0.07264456897974014 2023-01-24 02:17:28.757293: step: 216/459, loss: 0.041008587926626205 2023-01-24 02:17:29.436520: step: 218/459, loss: 0.036911316215991974 2023-01-24 02:17:30.059773: step: 220/459, loss: 0.09427587687969208 2023-01-24 02:17:30.765392: step: 222/459, loss: 0.03136202692985535 2023-01-24 02:17:31.451231: step: 224/459, loss: 0.033117301762104034 2023-01-24 02:17:31.991828: step: 226/459, loss: 0.013200237415730953 2023-01-24 02:17:32.647054: step: 228/459, loss: 0.4137049913406372 2023-01-24 02:17:33.298687: step: 230/459, loss: 0.03147859126329422 2023-01-24 02:17:33.933588: step: 232/459, loss: 0.07086245715618134 2023-01-24 02:17:34.606237: step: 234/459, loss: 0.15357305109500885 2023-01-24 02:17:35.246264: step: 236/459, loss: 0.09545422345399857 2023-01-24 02:17:35.845147: step: 238/459, loss: 0.01357046514749527 2023-01-24 02:17:36.418207: step: 240/459, loss: 0.027064094319939613 2023-01-24 02:17:37.079812: step: 242/459, loss: 0.11906545609235764 2023-01-24 02:17:37.703835: step: 244/459, loss: 0.0701231062412262 2023-01-24 02:17:38.305920: step: 246/459, loss: 0.5469827651977539 2023-01-24 02:17:38.867443: step: 248/459, loss: 0.05879361554980278 2023-01-24 02:17:39.482562: step: 250/459, loss: 0.06245232746005058 2023-01-24 02:17:40.106525: step: 252/459, loss: 0.05050404742360115 2023-01-24 02:17:40.658756: step: 254/459, loss: 0.0054909465834498405 2023-01-24 02:17:41.368413: step: 256/459, loss: 0.30578818917274475 2023-01-24 02:17:42.013348: step: 258/459, loss: 0.033648546785116196 2023-01-24 02:17:42.623481: step: 260/459, loss: 0.031832460314035416 2023-01-24 02:17:43.306892: step: 262/459, loss: 0.044170185923576355 2023-01-24 02:17:43.985332: step: 264/459, loss: 0.05135485157370567 2023-01-24 02:17:44.594419: step: 266/459, loss: 0.04063574597239494 2023-01-24 02:17:45.238177: step: 268/459, loss: 0.04559621959924698 2023-01-24 02:17:45.873155: step: 270/459, loss: 0.05653829127550125 2023-01-24 02:17:46.420766: step: 272/459, loss: 0.1012801080942154 2023-01-24 02:17:47.052013: step: 274/459, loss: 0.015253220684826374 2023-01-24 02:17:47.658411: step: 276/459, loss: 0.012093120254576206 2023-01-24 02:17:48.251524: step: 278/459, loss: 0.4252244234085083 2023-01-24 02:17:48.864508: step: 280/459, loss: 0.025940855965018272 2023-01-24 02:17:49.471780: step: 282/459, loss: 0.18174266815185547 2023-01-24 02:17:50.150964: step: 284/459, loss: 0.11721880733966827 2023-01-24 02:17:50.795665: step: 286/459, loss: 0.05190527066588402 2023-01-24 02:17:51.453666: step: 288/459, loss: 0.06055450066924095 2023-01-24 02:17:52.079038: step: 290/459, loss: 0.06543251872062683 2023-01-24 02:17:52.634365: step: 292/459, loss: 0.005124656483530998 2023-01-24 02:17:53.232166: step: 294/459, loss: 0.04283805564045906 2023-01-24 02:17:53.871813: step: 296/459, loss: 0.036506906151771545 2023-01-24 02:17:54.516867: step: 298/459, loss: 0.06180115044116974 2023-01-24 02:17:55.125880: step: 300/459, loss: 0.006827778648585081 2023-01-24 02:17:55.731491: step: 302/459, loss: 0.009734579361975193 2023-01-24 02:17:56.425392: step: 304/459, loss: 0.015982389450073242 2023-01-24 02:17:57.132546: step: 306/459, loss: 0.06759083271026611 2023-01-24 02:17:57.774931: step: 308/459, loss: 0.05685378238558769 2023-01-24 02:17:58.310067: step: 310/459, loss: 0.07085362821817398 2023-01-24 02:17:58.953917: step: 312/459, loss: 0.026812776923179626 2023-01-24 02:17:59.685889: step: 314/459, loss: 0.1570136696100235 2023-01-24 02:18:00.235432: step: 316/459, loss: 0.12359370291233063 2023-01-24 02:18:00.816181: step: 318/459, loss: 0.16700789332389832 2023-01-24 02:18:01.482158: step: 320/459, loss: 0.02395341359078884 2023-01-24 02:18:02.110612: step: 322/459, loss: 0.07976959645748138 2023-01-24 02:18:02.723648: step: 324/459, loss: 0.04945274442434311 2023-01-24 02:18:03.310649: step: 326/459, loss: 0.0880642682313919 2023-01-24 02:18:03.949327: step: 328/459, loss: 0.024737972766160965 2023-01-24 02:18:04.507175: step: 330/459, loss: 0.950221061706543 2023-01-24 02:18:05.189614: step: 332/459, loss: 0.0736905112862587 2023-01-24 02:18:05.783191: step: 334/459, loss: 0.005813379772007465 2023-01-24 02:18:06.367861: step: 336/459, loss: 0.07052227854728699 2023-01-24 02:18:06.985638: step: 338/459, loss: 0.04257620498538017 2023-01-24 02:18:07.620988: step: 340/459, loss: 0.034401752054691315 2023-01-24 02:18:08.226296: step: 342/459, loss: 0.02134673297405243 2023-01-24 02:18:08.851465: step: 344/459, loss: 0.21307158470153809 2023-01-24 02:18:09.442312: step: 346/459, loss: 0.06307412683963776 2023-01-24 02:18:10.066930: step: 348/459, loss: 0.6515809297561646 2023-01-24 02:18:10.748202: step: 350/459, loss: 0.14236177504062653 2023-01-24 02:18:11.341461: step: 352/459, loss: 0.07600562274456024 2023-01-24 02:18:11.889764: step: 354/459, loss: 0.15383444726467133 2023-01-24 02:18:12.435455: step: 356/459, loss: 0.1586785614490509 2023-01-24 02:18:13.049413: step: 358/459, loss: 0.11215466260910034 2023-01-24 02:18:13.618662: step: 360/459, loss: 0.002766141202300787 2023-01-24 02:18:14.287240: step: 362/459, loss: 0.11953767389059067 2023-01-24 02:18:14.866404: step: 364/459, loss: 0.04037013649940491 2023-01-24 02:18:15.484945: step: 366/459, loss: 0.04282503202557564 2023-01-24 02:18:16.139918: step: 368/459, loss: 0.07111167907714844 2023-01-24 02:18:16.731152: step: 370/459, loss: 0.010713606141507626 2023-01-24 02:18:17.350386: step: 372/459, loss: 0.0620669350028038 2023-01-24 02:18:18.035927: step: 374/459, loss: 0.052083760499954224 2023-01-24 02:18:18.592526: step: 376/459, loss: 0.05463911220431328 2023-01-24 02:18:19.194879: step: 378/459, loss: 0.027632009238004684 2023-01-24 02:18:19.837926: step: 380/459, loss: 0.05756990984082222 2023-01-24 02:18:20.453245: step: 382/459, loss: 0.1097499430179596 2023-01-24 02:18:21.024874: step: 384/459, loss: 0.061805710196495056 2023-01-24 02:18:21.628466: step: 386/459, loss: 0.49651604890823364 2023-01-24 02:18:22.260092: step: 388/459, loss: 0.22786998748779297 2023-01-24 02:18:22.917355: step: 390/459, loss: 0.03472978621721268 2023-01-24 02:18:23.530482: step: 392/459, loss: 0.030955197289586067 2023-01-24 02:18:24.150038: step: 394/459, loss: 0.018940793350338936 2023-01-24 02:18:24.769013: step: 396/459, loss: 0.060451608151197433 2023-01-24 02:18:25.313335: step: 398/459, loss: 0.08382024616003036 2023-01-24 02:18:25.864717: step: 400/459, loss: 0.0768987312912941 2023-01-24 02:18:26.487365: step: 402/459, loss: 0.02861858159303665 2023-01-24 02:18:27.110823: step: 404/459, loss: 0.046733107417821884 2023-01-24 02:18:27.683872: step: 406/459, loss: 0.0036365282721817493 2023-01-24 02:18:28.309417: step: 408/459, loss: 0.10275527089834213 2023-01-24 02:18:28.883749: step: 410/459, loss: 0.0056349532678723335 2023-01-24 02:18:29.534032: step: 412/459, loss: 0.06506099551916122 2023-01-24 02:18:30.129343: step: 414/459, loss: 0.055057406425476074 2023-01-24 02:18:30.699702: step: 416/459, loss: 0.03917497396469116 2023-01-24 02:18:31.299874: step: 418/459, loss: 0.07840226590633392 2023-01-24 02:18:31.920626: step: 420/459, loss: 0.21193869411945343 2023-01-24 02:18:32.684496: step: 422/459, loss: 0.02974744699895382 2023-01-24 02:18:33.340164: step: 424/459, loss: 0.05180055648088455 2023-01-24 02:18:33.933815: step: 426/459, loss: 0.1134333461523056 2023-01-24 02:18:34.583421: step: 428/459, loss: 0.01540443953126669 2023-01-24 02:18:35.196410: step: 430/459, loss: 0.05219533294439316 2023-01-24 02:18:35.865183: step: 432/459, loss: 0.38040074706077576 2023-01-24 02:18:36.492332: step: 434/459, loss: 0.05768277868628502 2023-01-24 02:18:37.145973: step: 436/459, loss: 0.15601596236228943 2023-01-24 02:18:37.720720: step: 438/459, loss: 0.028485817834734917 2023-01-24 02:18:38.324185: step: 440/459, loss: 0.08003383874893188 2023-01-24 02:18:38.967260: step: 442/459, loss: 0.9325746893882751 2023-01-24 02:18:39.619844: step: 444/459, loss: 0.048545945435762405 2023-01-24 02:18:40.228552: step: 446/459, loss: 0.0518331378698349 2023-01-24 02:18:40.823266: step: 448/459, loss: 0.049232132732868195 2023-01-24 02:18:41.432627: step: 450/459, loss: 0.020519418641924858 2023-01-24 02:18:42.027938: step: 452/459, loss: 0.08672451227903366 2023-01-24 02:18:42.630746: step: 454/459, loss: 0.04014485329389572 2023-01-24 02:18:43.256304: step: 456/459, loss: 0.055521298199892044 2023-01-24 02:18:43.902221: step: 458/459, loss: 0.10683456063270569 2023-01-24 02:18:44.517150: step: 460/459, loss: 0.36708980798721313 2023-01-24 02:18:45.111142: step: 462/459, loss: 0.0729442685842514 2023-01-24 02:18:45.672321: step: 464/459, loss: 0.10496244579553604 2023-01-24 02:18:46.352682: step: 466/459, loss: 0.009948634542524815 2023-01-24 02:18:47.006259: step: 468/459, loss: 0.059112079441547394 2023-01-24 02:18:47.648924: step: 470/459, loss: 0.15029940009117126 2023-01-24 02:18:48.245512: step: 472/459, loss: 0.07624528557062149 2023-01-24 02:18:48.882202: step: 474/459, loss: 0.06904160976409912 2023-01-24 02:18:49.540703: step: 476/459, loss: 0.024868236854672432 2023-01-24 02:18:50.133569: step: 478/459, loss: 0.6238386034965515 2023-01-24 02:18:50.753485: step: 480/459, loss: 0.09223869442939758 2023-01-24 02:18:51.352709: step: 482/459, loss: 0.08680159598588943 2023-01-24 02:18:51.952401: step: 484/459, loss: 0.30005699396133423 2023-01-24 02:18:52.593960: step: 486/459, loss: 0.1074511855840683 2023-01-24 02:18:53.198840: step: 488/459, loss: 0.11580711603164673 2023-01-24 02:18:53.781357: step: 490/459, loss: 0.10428890585899353 2023-01-24 02:18:54.393687: step: 492/459, loss: 0.026706283912062645 2023-01-24 02:18:54.959875: step: 494/459, loss: 0.029722725972533226 2023-01-24 02:18:55.634462: step: 496/459, loss: 0.24749483168125153 2023-01-24 02:18:56.272197: step: 498/459, loss: 0.08279737830162048 2023-01-24 02:18:56.946422: step: 500/459, loss: 0.054393380880355835 2023-01-24 02:18:57.585257: step: 502/459, loss: 0.15574143826961517 2023-01-24 02:18:58.125328: step: 504/459, loss: 0.03405275568366051 2023-01-24 02:18:58.792908: step: 506/459, loss: 0.15951579809188843 2023-01-24 02:18:59.374719: step: 508/459, loss: 0.1246228739619255 2023-01-24 02:19:00.046594: step: 510/459, loss: 0.7096854448318481 2023-01-24 02:19:00.672211: step: 512/459, loss: 0.09685265272855759 2023-01-24 02:19:01.297140: step: 514/459, loss: 0.10322785377502441 2023-01-24 02:19:01.947568: step: 516/459, loss: 0.06947822123765945 2023-01-24 02:19:02.636371: step: 518/459, loss: 0.034248076379299164 2023-01-24 02:19:03.259560: step: 520/459, loss: 0.027616949751973152 2023-01-24 02:19:03.835646: step: 522/459, loss: 0.014827974140644073 2023-01-24 02:19:04.454066: step: 524/459, loss: 0.11564917862415314 2023-01-24 02:19:05.121929: step: 526/459, loss: 0.017345324158668518 2023-01-24 02:19:05.758962: step: 528/459, loss: 0.08637873828411102 2023-01-24 02:19:06.382774: step: 530/459, loss: 0.04408480226993561 2023-01-24 02:19:07.002949: step: 532/459, loss: 0.0537027008831501 2023-01-24 02:19:07.627554: step: 534/459, loss: 0.08018452674150467 2023-01-24 02:19:08.250297: step: 536/459, loss: 0.03860725462436676 2023-01-24 02:19:08.878368: step: 538/459, loss: 0.05443984642624855 2023-01-24 02:19:09.467996: step: 540/459, loss: 0.062132827937603 2023-01-24 02:19:10.068483: step: 542/459, loss: 0.0763883888721466 2023-01-24 02:19:10.728104: step: 544/459, loss: 0.09978597611188889 2023-01-24 02:19:11.318633: step: 546/459, loss: 0.15137407183647156 2023-01-24 02:19:11.944154: step: 548/459, loss: 0.184229776263237 2023-01-24 02:19:12.548192: step: 550/459, loss: 0.06548948585987091 2023-01-24 02:19:13.183428: step: 552/459, loss: 0.05667546018958092 2023-01-24 02:19:13.754709: step: 554/459, loss: 0.0939883217215538 2023-01-24 02:19:14.355712: step: 556/459, loss: 0.01569480635225773 2023-01-24 02:19:14.909087: step: 558/459, loss: 0.09575796872377396 2023-01-24 02:19:15.492748: step: 560/459, loss: 0.07276824116706848 2023-01-24 02:19:16.143495: step: 562/459, loss: 0.12000308185815811 2023-01-24 02:19:16.721668: step: 564/459, loss: 0.11557488888502121 2023-01-24 02:19:17.295858: step: 566/459, loss: 0.029356975108385086 2023-01-24 02:19:17.961722: step: 568/459, loss: 0.07021918147802353 2023-01-24 02:19:18.626372: step: 570/459, loss: 0.032439351081848145 2023-01-24 02:19:19.291398: step: 572/459, loss: 0.08176787942647934 2023-01-24 02:19:19.919190: step: 574/459, loss: 0.11878777295351028 2023-01-24 02:19:20.549286: step: 576/459, loss: 0.41100284457206726 2023-01-24 02:19:21.140682: step: 578/459, loss: 0.03777846693992615 2023-01-24 02:19:21.761863: step: 580/459, loss: 0.03617825359106064 2023-01-24 02:19:22.342432: step: 582/459, loss: 0.05100248381495476 2023-01-24 02:19:23.009045: step: 584/459, loss: 0.08580311387777328 2023-01-24 02:19:23.713968: step: 586/459, loss: 0.015872502699494362 2023-01-24 02:19:24.349411: step: 588/459, loss: 0.0988045260310173 2023-01-24 02:19:25.019697: step: 590/459, loss: 0.03413816913962364 2023-01-24 02:19:25.591333: step: 592/459, loss: 0.07517612725496292 2023-01-24 02:19:26.261575: step: 594/459, loss: 0.035332318395376205 2023-01-24 02:19:26.841453: step: 596/459, loss: 0.26558220386505127 2023-01-24 02:19:27.420604: step: 598/459, loss: 0.05945177748799324 2023-01-24 02:19:28.020864: step: 600/459, loss: 0.04402568191289902 2023-01-24 02:19:28.632986: step: 602/459, loss: 0.07058162987232208 2023-01-24 02:19:29.320352: step: 604/459, loss: 0.06121443212032318 2023-01-24 02:19:30.058906: step: 606/459, loss: 0.02472340315580368 2023-01-24 02:19:30.733840: step: 608/459, loss: 0.17591436207294464 2023-01-24 02:19:31.379933: step: 610/459, loss: 0.12337613105773926 2023-01-24 02:19:32.074178: step: 612/459, loss: 0.03306951746344566 2023-01-24 02:19:32.713163: step: 614/459, loss: 0.026776524260640144 2023-01-24 02:19:33.307451: step: 616/459, loss: 0.07272528856992722 2023-01-24 02:19:34.003719: step: 618/459, loss: 0.14618584513664246 2023-01-24 02:19:34.625965: step: 620/459, loss: 0.03587915003299713 2023-01-24 02:19:35.226783: step: 622/459, loss: 0.10695653408765793 2023-01-24 02:19:35.800898: step: 624/459, loss: 0.11073779314756393 2023-01-24 02:19:36.354566: step: 626/459, loss: 0.01272757351398468 2023-01-24 02:19:36.943001: step: 628/459, loss: 0.0789196640253067 2023-01-24 02:19:37.583956: step: 630/459, loss: 0.17942625284194946 2023-01-24 02:19:38.268147: step: 632/459, loss: 0.03864947333931923 2023-01-24 02:19:38.972525: step: 634/459, loss: 2.0909526348114014 2023-01-24 02:19:39.647714: step: 636/459, loss: 0.021334419026970863 2023-01-24 02:19:40.230578: step: 638/459, loss: 0.046016547828912735 2023-01-24 02:19:40.910456: step: 640/459, loss: 0.006616792641580105 2023-01-24 02:19:41.483103: step: 642/459, loss: 0.7905037999153137 2023-01-24 02:19:42.097148: step: 644/459, loss: 0.03953585773706436 2023-01-24 02:19:42.771886: step: 646/459, loss: 0.10287618637084961 2023-01-24 02:19:43.407424: step: 648/459, loss: 0.32726624608039856 2023-01-24 02:19:44.040802: step: 650/459, loss: 0.0265617948025465 2023-01-24 02:19:44.621979: step: 652/459, loss: 0.11357685178518295 2023-01-24 02:19:45.297592: step: 654/459, loss: 0.16367855668067932 2023-01-24 02:19:45.877236: step: 656/459, loss: 0.14381958544254303 2023-01-24 02:19:46.487332: step: 658/459, loss: 0.895998477935791 2023-01-24 02:19:47.141721: step: 660/459, loss: 0.016134148463606834 2023-01-24 02:19:47.720997: step: 662/459, loss: 1.2865796089172363 2023-01-24 02:19:48.303593: step: 664/459, loss: 0.16289524734020233 2023-01-24 02:19:48.972102: step: 666/459, loss: 0.053246431052684784 2023-01-24 02:19:49.578983: step: 668/459, loss: 0.14384426176548004 2023-01-24 02:19:50.257458: step: 670/459, loss: 0.033975474536418915 2023-01-24 02:19:50.897796: step: 672/459, loss: 0.11578178405761719 2023-01-24 02:19:51.455797: step: 674/459, loss: 0.10618877410888672 2023-01-24 02:19:52.049835: step: 676/459, loss: 0.10097634047269821 2023-01-24 02:19:52.653720: step: 678/459, loss: 0.052227821201086044 2023-01-24 02:19:53.235642: step: 680/459, loss: 0.020124055445194244 2023-01-24 02:19:53.837701: step: 682/459, loss: 0.19891229271888733 2023-01-24 02:19:54.416442: step: 684/459, loss: 0.615874707698822 2023-01-24 02:19:55.097022: step: 686/459, loss: 0.06630229949951172 2023-01-24 02:19:55.680644: step: 688/459, loss: 0.02126501128077507 2023-01-24 02:19:56.244023: step: 690/459, loss: 0.0329006053507328 2023-01-24 02:19:56.893176: step: 692/459, loss: 0.045345839112997055 2023-01-24 02:19:57.458801: step: 694/459, loss: 0.05886152759194374 2023-01-24 02:19:58.131745: step: 696/459, loss: 0.09763237088918686 2023-01-24 02:19:58.717120: step: 698/459, loss: 0.15866370499134064 2023-01-24 02:19:59.333854: step: 700/459, loss: 0.03994162380695343 2023-01-24 02:20:00.021513: step: 702/459, loss: 0.4406089782714844 2023-01-24 02:20:00.604931: step: 704/459, loss: 0.28979697823524475 2023-01-24 02:20:01.278857: step: 706/459, loss: 0.10363621264696121 2023-01-24 02:20:01.897620: step: 708/459, loss: 0.08241596072912216 2023-01-24 02:20:02.474285: step: 710/459, loss: 0.07625776529312134 2023-01-24 02:20:03.077742: step: 712/459, loss: 0.01860000751912594 2023-01-24 02:20:03.672819: step: 714/459, loss: 0.04978875815868378 2023-01-24 02:20:04.247393: step: 716/459, loss: 0.098565973341465 2023-01-24 02:20:04.927163: step: 718/459, loss: 0.048441819846630096 2023-01-24 02:20:05.533416: step: 720/459, loss: 0.08272606879472733 2023-01-24 02:20:06.215413: step: 722/459, loss: 0.08051839470863342 2023-01-24 02:20:06.837964: step: 724/459, loss: 0.0685669332742691 2023-01-24 02:20:07.382155: step: 726/459, loss: 0.07902563363313675 2023-01-24 02:20:08.037325: step: 728/459, loss: 0.06697879731655121 2023-01-24 02:20:08.644425: step: 730/459, loss: 0.31771865487098694 2023-01-24 02:20:09.228994: step: 732/459, loss: 0.06254205107688904 2023-01-24 02:20:09.867145: step: 734/459, loss: 0.14274589717388153 2023-01-24 02:20:10.566363: step: 736/459, loss: 0.08407097309827805 2023-01-24 02:20:11.208825: step: 738/459, loss: 0.08393845707178116 2023-01-24 02:20:11.835359: step: 740/459, loss: 0.06559504568576813 2023-01-24 02:20:12.488221: step: 742/459, loss: 0.06156276538968086 2023-01-24 02:20:13.091773: step: 744/459, loss: 0.04495581239461899 2023-01-24 02:20:13.646829: step: 746/459, loss: 0.028246015310287476 2023-01-24 02:20:14.249379: step: 748/459, loss: 0.05479363352060318 2023-01-24 02:20:14.852293: step: 750/459, loss: 0.06005875766277313 2023-01-24 02:20:15.491964: step: 752/459, loss: 0.05806000158190727 2023-01-24 02:20:16.074807: step: 754/459, loss: 0.018127944320440292 2023-01-24 02:20:16.725962: step: 756/459, loss: 0.03795893117785454 2023-01-24 02:20:17.341747: step: 758/459, loss: 0.029100855812430382 2023-01-24 02:20:17.977697: step: 760/459, loss: 0.05207011476159096 2023-01-24 02:20:18.583413: step: 762/459, loss: 0.14553451538085938 2023-01-24 02:20:19.272388: step: 764/459, loss: 0.07555356621742249 2023-01-24 02:20:19.874151: step: 766/459, loss: 0.032278064638376236 2023-01-24 02:20:20.408924: step: 768/459, loss: 0.12175998091697693 2023-01-24 02:20:21.052529: step: 770/459, loss: 0.18201379477977753 2023-01-24 02:20:21.672076: step: 772/459, loss: 0.11679049581289291 2023-01-24 02:20:22.278982: step: 774/459, loss: 0.10610795021057129 2023-01-24 02:20:22.887007: step: 776/459, loss: 0.056696753948926926 2023-01-24 02:20:23.576612: step: 778/459, loss: 0.024782566353678703 2023-01-24 02:20:24.206064: step: 780/459, loss: 0.07331383973360062 2023-01-24 02:20:24.888558: step: 782/459, loss: 0.050942566245794296 2023-01-24 02:20:25.493242: step: 784/459, loss: 0.015505329705774784 2023-01-24 02:20:26.104497: step: 786/459, loss: 0.06654304265975952 2023-01-24 02:20:26.725518: step: 788/459, loss: 0.05221100151538849 2023-01-24 02:20:27.308981: step: 790/459, loss: 0.11970260739326477 2023-01-24 02:20:27.851654: step: 792/459, loss: 0.02995116449892521 2023-01-24 02:20:28.448637: step: 794/459, loss: 0.07995066046714783 2023-01-24 02:20:29.043993: step: 796/459, loss: 0.06901880353689194 2023-01-24 02:20:29.665885: step: 798/459, loss: 0.09572526812553406 2023-01-24 02:20:30.257372: step: 800/459, loss: 0.05067753419280052 2023-01-24 02:20:30.897741: step: 802/459, loss: 0.06282605230808258 2023-01-24 02:20:31.576633: step: 804/459, loss: 0.010998529382050037 2023-01-24 02:20:32.150560: step: 806/459, loss: 0.04085085913538933 2023-01-24 02:20:32.837213: step: 808/459, loss: 0.06074994429945946 2023-01-24 02:20:33.495458: step: 810/459, loss: 0.08586347848176956 2023-01-24 02:20:34.087604: step: 812/459, loss: 0.06230306252837181 2023-01-24 02:20:34.762010: step: 814/459, loss: 0.06518997251987457 2023-01-24 02:20:35.363744: step: 816/459, loss: 0.018127430230379105 2023-01-24 02:20:36.023570: step: 818/459, loss: 0.12652328610420227 2023-01-24 02:20:36.672050: step: 820/459, loss: 0.06392063200473785 2023-01-24 02:20:37.313845: step: 822/459, loss: 0.03482289984822273 2023-01-24 02:20:37.963396: step: 824/459, loss: 0.05602201446890831 2023-01-24 02:20:38.594371: step: 826/459, loss: 0.09220605343580246 2023-01-24 02:20:39.227368: step: 828/459, loss: 0.042623549699783325 2023-01-24 02:20:39.846619: step: 830/459, loss: 0.11437392234802246 2023-01-24 02:20:40.453468: step: 832/459, loss: 0.02967342920601368 2023-01-24 02:20:41.078710: step: 834/459, loss: 0.174713134765625 2023-01-24 02:20:41.734371: step: 836/459, loss: 0.044007807970047 2023-01-24 02:20:42.376129: step: 838/459, loss: 0.14102117717266083 2023-01-24 02:20:42.989197: step: 840/459, loss: 0.007517766207456589 2023-01-24 02:20:43.617240: step: 842/459, loss: 0.09972231090068817 2023-01-24 02:20:44.302706: step: 844/459, loss: 0.03825141116976738 2023-01-24 02:20:44.915887: step: 846/459, loss: 0.04012852907180786 2023-01-24 02:20:45.452208: step: 848/459, loss: 0.24780267477035522 2023-01-24 02:20:46.057244: step: 850/459, loss: 0.050393395125865936 2023-01-24 02:20:46.690307: step: 852/459, loss: 0.14773264527320862 2023-01-24 02:20:47.285027: step: 854/459, loss: 0.0444936640560627 2023-01-24 02:20:47.887968: step: 856/459, loss: 0.013516506180167198 2023-01-24 02:20:48.497210: step: 858/459, loss: 0.19723889231681824 2023-01-24 02:20:49.082870: step: 860/459, loss: 0.15181535482406616 2023-01-24 02:20:49.719376: step: 862/459, loss: 0.034597016870975494 2023-01-24 02:20:50.341096: step: 864/459, loss: 0.45917823910713196 2023-01-24 02:20:50.953538: step: 866/459, loss: 0.05751344561576843 2023-01-24 02:20:51.564855: step: 868/459, loss: 0.054830700159072876 2023-01-24 02:20:52.127720: step: 870/459, loss: 0.01572544500231743 2023-01-24 02:20:52.746110: step: 872/459, loss: 0.1063140481710434 2023-01-24 02:20:53.319717: step: 874/459, loss: 0.024192243814468384 2023-01-24 02:20:54.002103: step: 876/459, loss: 2.5545504093170166 2023-01-24 02:20:54.627593: step: 878/459, loss: 0.1231955885887146 2023-01-24 02:20:55.228561: step: 880/459, loss: 0.025192996487021446 2023-01-24 02:20:55.848207: step: 882/459, loss: 0.2215256541967392 2023-01-24 02:20:56.482923: step: 884/459, loss: 0.052822280675172806 2023-01-24 02:20:57.084874: step: 886/459, loss: 0.14785753190517426 2023-01-24 02:20:57.674366: step: 888/459, loss: 0.09392763674259186 2023-01-24 02:20:58.298285: step: 890/459, loss: 0.08839825540781021 2023-01-24 02:20:58.957727: step: 892/459, loss: 0.08095818012952805 2023-01-24 02:20:59.637146: step: 894/459, loss: 0.025669319555163383 2023-01-24 02:21:00.325444: step: 896/459, loss: 0.03540866822004318 2023-01-24 02:21:00.969495: step: 898/459, loss: 0.05807473137974739 2023-01-24 02:21:01.559993: step: 900/459, loss: 0.09025970101356506 2023-01-24 02:21:02.198973: step: 902/459, loss: 0.07956739515066147 2023-01-24 02:21:02.821604: step: 904/459, loss: 0.08361867815256119 2023-01-24 02:21:03.460574: step: 906/459, loss: 0.04965708404779434 2023-01-24 02:21:04.140016: step: 908/459, loss: 0.13482189178466797 2023-01-24 02:21:04.758424: step: 910/459, loss: 0.09641584753990173 2023-01-24 02:21:05.338585: step: 912/459, loss: 0.6241233944892883 2023-01-24 02:21:05.950309: step: 914/459, loss: 0.044208236038684845 2023-01-24 02:21:06.518902: step: 916/459, loss: 0.06799349933862686 2023-01-24 02:21:07.101223: step: 918/459, loss: 0.18873141705989838 2023-01-24 02:21:07.510073: step: 920/459, loss: 0.01234035100787878 ================================================== Loss: 0.121 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33471791808689644, 'r': 0.3112177986007197, 'f1': 0.32254037337773706}, 'combined': 0.23766132775201676, 'epoch': 18} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3551808082803973, 'r': 0.29130633049700155, 'f1': 0.3200880936898191}, 'combined': 0.2048563799614842, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3384959286745001, 'r': 0.32372286157864905, 'f1': 0.3309446130978624}, 'combined': 0.24385392544053017, 'epoch': 18} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35409700683912804, 'r': 0.28424332458086365, 'f1': 0.31534811602516394}, 'combined': 0.2018227942561049, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3477174118090926, 'r': 0.3206653930535465, 'f1': 0.33364395289085685}, 'combined': 0.24584291265642083, 'epoch': 18} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3677262019999736, 'r': 0.3033657363810174, 'f1': 0.3324597530569191}, 'combined': 0.23836737011628165, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.34244791666666663, 'r': 0.31309523809523804, 'f1': 0.3271144278606965}, 'combined': 0.21807628524046432, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.29, 'r': 0.31521739130434784, 'f1': 0.3020833333333333}, 'combined': 0.15104166666666666, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.13793103448275862, 'f1': 0.2}, 'combined': 0.13333333333333333, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 19 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:23:40.308250: step: 2/459, loss: 0.055631689727306366 2023-01-24 02:23:40.967137: step: 4/459, loss: 0.09221483021974564 2023-01-24 02:23:41.546657: step: 6/459, loss: 0.050496358424425125 2023-01-24 02:23:42.166347: step: 8/459, loss: 0.09521479159593582 2023-01-24 02:23:42.765872: step: 10/459, loss: 0.05534030869603157 2023-01-24 02:23:43.384585: step: 12/459, loss: 0.0285016018897295 2023-01-24 02:23:43.999400: step: 14/459, loss: 0.15810246765613556 2023-01-24 02:23:44.593805: step: 16/459, loss: 0.013846262358129025 2023-01-24 02:23:45.190676: step: 18/459, loss: 0.00971539318561554 2023-01-24 02:23:45.792884: step: 20/459, loss: 4.474266052246094 2023-01-24 02:23:46.440601: step: 22/459, loss: 0.04191272705793381 2023-01-24 02:23:47.051445: step: 24/459, loss: 0.05906436964869499 2023-01-24 02:23:47.710711: step: 26/459, loss: 0.23633182048797607 2023-01-24 02:23:48.356860: step: 28/459, loss: 0.14978469908237457 2023-01-24 02:23:48.944720: step: 30/459, loss: 0.02425280585885048 2023-01-24 02:23:49.566620: step: 32/459, loss: 0.03976859152317047 2023-01-24 02:23:50.186016: step: 34/459, loss: 0.06281391531229019 2023-01-24 02:23:50.818851: step: 36/459, loss: 0.2715296745300293 2023-01-24 02:23:51.412924: step: 38/459, loss: 0.06704933196306229 2023-01-24 02:23:52.048248: step: 40/459, loss: 0.10801587253808975 2023-01-24 02:23:52.586798: step: 42/459, loss: 0.04788731038570404 2023-01-24 02:23:53.207758: step: 44/459, loss: 0.04931866005063057 2023-01-24 02:23:53.809040: step: 46/459, loss: 0.14753177762031555 2023-01-24 02:23:54.415948: step: 48/459, loss: 0.057354070246219635 2023-01-24 02:23:55.105437: step: 50/459, loss: 0.033912986516952515 2023-01-24 02:23:55.697018: step: 52/459, loss: 0.005596837028861046 2023-01-24 02:23:56.303989: step: 54/459, loss: 0.14509467780590057 2023-01-24 02:23:56.922723: step: 56/459, loss: 0.043546900153160095 2023-01-24 02:23:57.485615: step: 58/459, loss: 0.028083091601729393 2023-01-24 02:23:58.158244: step: 60/459, loss: 0.02111830934882164 2023-01-24 02:23:58.777192: step: 62/459, loss: 0.049068402498960495 2023-01-24 02:23:59.456317: step: 64/459, loss: 0.07440444082021713 2023-01-24 02:24:00.011971: step: 66/459, loss: 0.12007065862417221 2023-01-24 02:24:00.596042: step: 68/459, loss: 0.013795123435556889 2023-01-24 02:24:01.225070: step: 70/459, loss: 0.09384602308273315 2023-01-24 02:24:01.844432: step: 72/459, loss: 0.0826074630022049 2023-01-24 02:24:02.415846: step: 74/459, loss: 0.1188095211982727 2023-01-24 02:24:02.977895: step: 76/459, loss: 0.04850737750530243 2023-01-24 02:24:03.652500: step: 78/459, loss: 2.5992753505706787 2023-01-24 02:24:04.225489: step: 80/459, loss: 0.018933603540062904 2023-01-24 02:24:04.846844: step: 82/459, loss: 0.12329467386007309 2023-01-24 02:24:05.447302: step: 84/459, loss: 0.11049447953701019 2023-01-24 02:24:06.040976: step: 86/459, loss: 0.12449253350496292 2023-01-24 02:24:06.646908: step: 88/459, loss: 0.20220725238323212 2023-01-24 02:24:07.261152: step: 90/459, loss: 0.021504107862710953 2023-01-24 02:24:07.892671: step: 92/459, loss: 0.10502303391695023 2023-01-24 02:24:08.512974: step: 94/459, loss: 0.4918448030948639 2023-01-24 02:24:09.036077: step: 96/459, loss: 0.04023956507444382 2023-01-24 02:24:09.689515: step: 98/459, loss: 0.09575260430574417 2023-01-24 02:24:10.289254: step: 100/459, loss: 0.07550585269927979 2023-01-24 02:24:10.879479: step: 102/459, loss: 0.014826728031039238 2023-01-24 02:24:11.574623: step: 104/459, loss: 0.14364206790924072 2023-01-24 02:24:12.196439: step: 106/459, loss: 0.08659927546977997 2023-01-24 02:24:12.780195: step: 108/459, loss: 0.37254413962364197 2023-01-24 02:24:13.371625: step: 110/459, loss: 0.0879984050989151 2023-01-24 02:24:13.988151: step: 112/459, loss: 0.43324771523475647 2023-01-24 02:24:14.556692: step: 114/459, loss: 0.020147306844592094 2023-01-24 02:24:15.180298: step: 116/459, loss: 0.009408765472471714 2023-01-24 02:24:15.732066: step: 118/459, loss: 0.004613899160176516 2023-01-24 02:24:16.371887: step: 120/459, loss: 0.02116990089416504 2023-01-24 02:24:17.025564: step: 122/459, loss: 0.04422599449753761 2023-01-24 02:24:17.565669: step: 124/459, loss: 0.03656242787837982 2023-01-24 02:24:18.111263: step: 126/459, loss: 0.048822201788425446 2023-01-24 02:24:18.719521: step: 128/459, loss: 0.03783644735813141 2023-01-24 02:24:19.441083: step: 130/459, loss: 0.015068372711539268 2023-01-24 02:24:20.114875: step: 132/459, loss: 0.12029267102479935 2023-01-24 02:24:20.828969: step: 134/459, loss: 0.8700453639030457 2023-01-24 02:24:21.400678: step: 136/459, loss: 0.04162882640957832 2023-01-24 02:24:22.001269: step: 138/459, loss: 0.02938259206712246 2023-01-24 02:24:22.598750: step: 140/459, loss: 0.11357592791318893 2023-01-24 02:24:23.235764: step: 142/459, loss: 0.2512688636779785 2023-01-24 02:24:23.866388: step: 144/459, loss: 0.48990583419799805 2023-01-24 02:24:24.490267: step: 146/459, loss: 0.13655754923820496 2023-01-24 02:24:25.211075: step: 148/459, loss: 0.2323136031627655 2023-01-24 02:24:25.919508: step: 150/459, loss: 0.42484062910079956 2023-01-24 02:24:26.505174: step: 152/459, loss: 0.05287516862154007 2023-01-24 02:24:27.111857: step: 154/459, loss: 0.038263555616140366 2023-01-24 02:24:27.775469: step: 156/459, loss: 0.06990724056959152 2023-01-24 02:24:28.342533: step: 158/459, loss: 0.01580103300511837 2023-01-24 02:24:28.945723: step: 160/459, loss: 0.04991232976317406 2023-01-24 02:24:29.554826: step: 162/459, loss: 0.019754666835069656 2023-01-24 02:24:30.206892: step: 164/459, loss: 0.067677803337574 2023-01-24 02:24:30.802849: step: 166/459, loss: 6.653501987457275 2023-01-24 02:24:31.451117: step: 168/459, loss: 0.05729812756180763 2023-01-24 02:24:32.103869: step: 170/459, loss: 0.045902837067842484 2023-01-24 02:24:32.694463: step: 172/459, loss: 0.06049998477101326 2023-01-24 02:24:33.322318: step: 174/459, loss: 0.0595923475921154 2023-01-24 02:24:33.943403: step: 176/459, loss: 0.047569017857313156 2023-01-24 02:24:34.578638: step: 178/459, loss: 0.142852783203125 2023-01-24 02:24:35.205207: step: 180/459, loss: 0.09694468975067139 2023-01-24 02:24:35.767129: step: 182/459, loss: 0.05707986652851105 2023-01-24 02:24:36.450985: step: 184/459, loss: 0.05333773419260979 2023-01-24 02:24:37.125407: step: 186/459, loss: 0.09684722125530243 2023-01-24 02:24:37.724867: step: 188/459, loss: 0.07278839498758316 2023-01-24 02:24:38.372281: step: 190/459, loss: 0.012038126587867737 2023-01-24 02:24:39.025960: step: 192/459, loss: 0.058874353766441345 2023-01-24 02:24:39.657499: step: 194/459, loss: 0.011474978178739548 2023-01-24 02:24:40.241916: step: 196/459, loss: 0.04392266646027565 2023-01-24 02:24:40.938088: step: 198/459, loss: 0.023739170283079147 2023-01-24 02:24:41.550545: step: 200/459, loss: 0.15579068660736084 2023-01-24 02:24:42.200384: step: 202/459, loss: 0.021892040967941284 2023-01-24 02:24:42.790059: step: 204/459, loss: 0.12029122561216354 2023-01-24 02:24:43.348214: step: 206/459, loss: 0.02809486910700798 2023-01-24 02:24:44.014169: step: 208/459, loss: 0.03921320289373398 2023-01-24 02:24:44.662012: step: 210/459, loss: 0.07227424532175064 2023-01-24 02:24:45.292585: step: 212/459, loss: 0.3124282956123352 2023-01-24 02:24:45.900695: step: 214/459, loss: 0.03408445417881012 2023-01-24 02:24:46.478672: step: 216/459, loss: 0.08747018128633499 2023-01-24 02:24:47.049297: step: 218/459, loss: 0.020001331344246864 2023-01-24 02:24:47.613817: step: 220/459, loss: 0.0418853759765625 2023-01-24 02:24:48.244218: step: 222/459, loss: 0.01389380544424057 2023-01-24 02:24:48.960831: step: 224/459, loss: 0.07029926031827927 2023-01-24 02:24:49.640271: step: 226/459, loss: 0.03735468536615372 2023-01-24 02:24:50.369782: step: 228/459, loss: 0.10326801985502243 2023-01-24 02:24:51.002849: step: 230/459, loss: 0.09219168871641159 2023-01-24 02:24:51.587810: step: 232/459, loss: 0.04084968939423561 2023-01-24 02:24:52.208497: step: 234/459, loss: 0.03349605202674866 2023-01-24 02:24:52.878511: step: 236/459, loss: 0.02412317879498005 2023-01-24 02:24:53.488291: step: 238/459, loss: 0.037915732711553574 2023-01-24 02:24:54.099390: step: 240/459, loss: 0.04776817560195923 2023-01-24 02:24:54.770051: step: 242/459, loss: 0.08992898464202881 2023-01-24 02:24:55.400957: step: 244/459, loss: 0.06772038340568542 2023-01-24 02:24:55.999935: step: 246/459, loss: 0.023640166968107224 2023-01-24 02:24:56.588554: step: 248/459, loss: 0.08866351842880249 2023-01-24 02:24:57.211297: step: 250/459, loss: 0.0845235213637352 2023-01-24 02:24:57.833418: step: 252/459, loss: 0.01912700943648815 2023-01-24 02:24:58.494303: step: 254/459, loss: 0.6106810569763184 2023-01-24 02:24:59.129230: step: 256/459, loss: 0.027180079370737076 2023-01-24 02:24:59.839811: step: 258/459, loss: 0.05524162948131561 2023-01-24 02:25:00.464719: step: 260/459, loss: 0.012182990089058876 2023-01-24 02:25:01.060105: step: 262/459, loss: 0.4535169005393982 2023-01-24 02:25:01.667756: step: 264/459, loss: 0.8595037460327148 2023-01-24 02:25:02.389112: step: 266/459, loss: 0.13769811391830444 2023-01-24 02:25:02.986936: step: 268/459, loss: 0.008307751268148422 2023-01-24 02:25:03.584690: step: 270/459, loss: 0.01024704147130251 2023-01-24 02:25:04.247873: step: 272/459, loss: 0.06325408816337585 2023-01-24 02:25:04.909674: step: 274/459, loss: 0.1414734274148941 2023-01-24 02:25:05.611687: step: 276/459, loss: 0.02036289870738983 2023-01-24 02:25:06.245856: step: 278/459, loss: 0.029201088473200798 2023-01-24 02:25:06.833676: step: 280/459, loss: 0.07791352272033691 2023-01-24 02:25:07.508901: step: 282/459, loss: 0.1255849301815033 2023-01-24 02:25:08.147686: step: 284/459, loss: 0.02919425442814827 2023-01-24 02:25:08.741650: step: 286/459, loss: 0.05185685306787491 2023-01-24 02:25:09.418804: step: 288/459, loss: 0.07938562333583832 2023-01-24 02:25:09.998279: step: 290/459, loss: 0.008882928639650345 2023-01-24 02:25:10.664671: step: 292/459, loss: 0.12111680209636688 2023-01-24 02:25:11.264313: step: 294/459, loss: 0.06091207265853882 2023-01-24 02:25:11.847685: step: 296/459, loss: 0.06512444466352463 2023-01-24 02:25:12.453537: step: 298/459, loss: 0.022300783544778824 2023-01-24 02:25:13.118810: step: 300/459, loss: 0.04297550767660141 2023-01-24 02:25:13.738303: step: 302/459, loss: 0.08153089880943298 2023-01-24 02:25:14.340440: step: 304/459, loss: 0.28723210096359253 2023-01-24 02:25:14.951538: step: 306/459, loss: 0.04520732909440994 2023-01-24 02:25:15.601222: step: 308/459, loss: 0.058510392904281616 2023-01-24 02:25:16.179930: step: 310/459, loss: 0.049815550446510315 2023-01-24 02:25:16.800107: step: 312/459, loss: 0.3564881384372711 2023-01-24 02:25:17.411585: step: 314/459, loss: 0.13662883639335632 2023-01-24 02:25:18.065595: step: 316/459, loss: 0.044644758105278015 2023-01-24 02:25:18.703055: step: 318/459, loss: 0.04751395806670189 2023-01-24 02:25:19.295774: step: 320/459, loss: 0.021888891234993935 2023-01-24 02:25:19.963410: step: 322/459, loss: 0.08147253841161728 2023-01-24 02:25:20.525579: step: 324/459, loss: 0.030193127691745758 2023-01-24 02:25:21.147605: step: 326/459, loss: 0.018574360758066177 2023-01-24 02:25:21.727838: step: 328/459, loss: 0.015308882109820843 2023-01-24 02:25:22.350551: step: 330/459, loss: 0.03431312367320061 2023-01-24 02:25:22.959408: step: 332/459, loss: 0.0917513370513916 2023-01-24 02:25:23.577117: step: 334/459, loss: 0.06253886967897415 2023-01-24 02:25:24.205716: step: 336/459, loss: 0.06472652405500412 2023-01-24 02:25:24.848887: step: 338/459, loss: 0.14903971552848816 2023-01-24 02:25:25.503521: step: 340/459, loss: 0.12655657529830933 2023-01-24 02:25:26.178684: step: 342/459, loss: 0.48764321208000183 2023-01-24 02:25:26.828488: step: 344/459, loss: 0.09912855923175812 2023-01-24 02:25:27.387537: step: 346/459, loss: 0.008213277906179428 2023-01-24 02:25:28.037576: step: 348/459, loss: 0.05243556573987007 2023-01-24 02:25:28.722039: step: 350/459, loss: 0.030315600335597992 2023-01-24 02:25:29.298330: step: 352/459, loss: 0.07113074511289597 2023-01-24 02:25:29.928329: step: 354/459, loss: 0.19822606444358826 2023-01-24 02:25:30.541983: step: 356/459, loss: 0.17880189418792725 2023-01-24 02:25:31.174570: step: 358/459, loss: 0.03532573953270912 2023-01-24 02:25:31.787483: step: 360/459, loss: 0.06279587000608444 2023-01-24 02:25:32.441249: step: 362/459, loss: 0.0517311692237854 2023-01-24 02:25:32.985862: step: 364/459, loss: 0.02082410268485546 2023-01-24 02:25:33.647009: step: 366/459, loss: 0.05663611367344856 2023-01-24 02:25:34.298579: step: 368/459, loss: 0.03252007067203522 2023-01-24 02:25:34.919257: step: 370/459, loss: 0.0268669743090868 2023-01-24 02:25:35.553925: step: 372/459, loss: 0.10555824637413025 2023-01-24 02:25:36.244168: step: 374/459, loss: 0.0500229150056839 2023-01-24 02:25:36.867026: step: 376/459, loss: 0.0448029525578022 2023-01-24 02:25:37.497819: step: 378/459, loss: 0.061799079179763794 2023-01-24 02:25:38.176995: step: 380/459, loss: 0.05849863588809967 2023-01-24 02:25:38.734549: step: 382/459, loss: 0.014298862777650356 2023-01-24 02:25:39.328443: step: 384/459, loss: 0.12226129323244095 2023-01-24 02:25:39.935477: step: 386/459, loss: 0.021079793572425842 2023-01-24 02:25:40.569783: step: 388/459, loss: 0.0635683536529541 2023-01-24 02:25:41.138850: step: 390/459, loss: 0.03170430287718773 2023-01-24 02:25:41.832364: step: 392/459, loss: 0.03823016956448555 2023-01-24 02:25:42.522777: step: 394/459, loss: 0.14452165365219116 2023-01-24 02:25:43.228822: step: 396/459, loss: 0.06127626821398735 2023-01-24 02:25:43.964199: step: 398/459, loss: 0.06252164393663406 2023-01-24 02:25:44.584932: step: 400/459, loss: 0.007753286510705948 2023-01-24 02:25:45.174311: step: 402/459, loss: 0.001054741907864809 2023-01-24 02:25:45.757493: step: 404/459, loss: 0.08895339071750641 2023-01-24 02:25:46.366175: step: 406/459, loss: 0.021605124697089195 2023-01-24 02:25:46.903330: step: 408/459, loss: 0.021855492144823074 2023-01-24 02:25:47.523950: step: 410/459, loss: 0.3049907684326172 2023-01-24 02:25:48.232360: step: 412/459, loss: 0.047860220074653625 2023-01-24 02:25:48.866349: step: 414/459, loss: 0.10590559244155884 2023-01-24 02:25:49.500701: step: 416/459, loss: 0.11609135568141937 2023-01-24 02:25:50.138324: step: 418/459, loss: 0.024894122034311295 2023-01-24 02:25:50.860409: step: 420/459, loss: 0.03162519633769989 2023-01-24 02:25:51.423690: step: 422/459, loss: 0.04155169799923897 2023-01-24 02:25:52.067604: step: 424/459, loss: 0.02680276893079281 2023-01-24 02:25:52.688553: step: 426/459, loss: 0.025564195588231087 2023-01-24 02:25:53.297813: step: 428/459, loss: 0.052792150527238846 2023-01-24 02:25:53.875695: step: 430/459, loss: 0.03995170071721077 2023-01-24 02:25:54.520351: step: 432/459, loss: 0.01944108121097088 2023-01-24 02:25:55.149793: step: 434/459, loss: 0.15538187325000763 2023-01-24 02:25:55.921187: step: 436/459, loss: 0.12691634893417358 2023-01-24 02:25:56.564425: step: 438/459, loss: 0.11029352992773056 2023-01-24 02:25:57.152769: step: 440/459, loss: 0.031351394951343536 2023-01-24 02:25:57.753002: step: 442/459, loss: 0.03869444504380226 2023-01-24 02:25:58.356040: step: 444/459, loss: 0.11666399985551834 2023-01-24 02:25:58.976379: step: 446/459, loss: 0.024888265877962112 2023-01-24 02:25:59.610574: step: 448/459, loss: 0.10139728337526321 2023-01-24 02:26:00.291728: step: 450/459, loss: 0.038963399827480316 2023-01-24 02:26:00.819872: step: 452/459, loss: 0.039233263581991196 2023-01-24 02:26:01.401224: step: 454/459, loss: 0.06915609538555145 2023-01-24 02:26:02.032401: step: 456/459, loss: 0.022455209866166115 2023-01-24 02:26:02.656779: step: 458/459, loss: 0.08178674429655075 2023-01-24 02:26:03.258124: step: 460/459, loss: 0.10291894525289536 2023-01-24 02:26:03.887626: step: 462/459, loss: 0.22560666501522064 2023-01-24 02:26:04.497130: step: 464/459, loss: 0.0954480767250061 2023-01-24 02:26:05.241483: step: 466/459, loss: 0.061557456851005554 2023-01-24 02:26:05.911590: step: 468/459, loss: 0.2576570510864258 2023-01-24 02:26:06.541288: step: 470/459, loss: 0.047467198222875595 2023-01-24 02:26:07.128692: step: 472/459, loss: 0.021590275689959526 2023-01-24 02:26:07.730813: step: 474/459, loss: 0.017237436026334763 2023-01-24 02:26:08.369946: step: 476/459, loss: 0.07285743951797485 2023-01-24 02:26:08.940460: step: 478/459, loss: 0.10690474510192871 2023-01-24 02:26:09.514235: step: 480/459, loss: 0.0416409857571125 2023-01-24 02:26:10.062442: step: 482/459, loss: 0.053561657667160034 2023-01-24 02:26:10.620814: step: 484/459, loss: 0.06669449806213379 2023-01-24 02:26:11.195209: step: 486/459, loss: 0.22242549061775208 2023-01-24 02:26:11.737822: step: 488/459, loss: 0.006698645185679197 2023-01-24 02:26:12.375434: step: 490/459, loss: 0.05510342866182327 2023-01-24 02:26:12.944316: step: 492/459, loss: 0.053932566195726395 2023-01-24 02:26:13.537779: step: 494/459, loss: 0.1051400899887085 2023-01-24 02:26:14.204471: step: 496/459, loss: 0.04192233458161354 2023-01-24 02:26:14.793010: step: 498/459, loss: 0.09756564348936081 2023-01-24 02:26:15.429275: step: 500/459, loss: 0.027597492560744286 2023-01-24 02:26:16.070461: step: 502/459, loss: 0.04158827289938927 2023-01-24 02:26:16.681760: step: 504/459, loss: 0.03377663344144821 2023-01-24 02:26:17.322803: step: 506/459, loss: 0.05637423321604729 2023-01-24 02:26:17.920389: step: 508/459, loss: 0.05556065961718559 2023-01-24 02:26:18.542283: step: 510/459, loss: 0.0527581162750721 2023-01-24 02:26:19.159873: step: 512/459, loss: 0.4403376579284668 2023-01-24 02:26:19.850393: step: 514/459, loss: 0.20589342713356018 2023-01-24 02:26:20.474500: step: 516/459, loss: 0.1104179322719574 2023-01-24 02:26:21.069556: step: 518/459, loss: 0.02844885364174843 2023-01-24 02:26:21.744708: step: 520/459, loss: 0.07601743936538696 2023-01-24 02:26:22.386197: step: 522/459, loss: 0.9929943680763245 2023-01-24 02:26:23.020919: step: 524/459, loss: 0.04503794386982918 2023-01-24 02:26:23.617970: step: 526/459, loss: 0.0524447001516819 2023-01-24 02:26:24.305432: step: 528/459, loss: 0.02102140709757805 2023-01-24 02:26:24.849922: step: 530/459, loss: 0.04545632377266884 2023-01-24 02:26:25.479369: step: 532/459, loss: 0.033838529139757156 2023-01-24 02:26:26.180341: step: 534/459, loss: 0.18278467655181885 2023-01-24 02:26:26.807078: step: 536/459, loss: 0.64522385597229 2023-01-24 02:26:27.381667: step: 538/459, loss: 0.010892949067056179 2023-01-24 02:26:28.020516: step: 540/459, loss: 0.061659131199121475 2023-01-24 02:26:28.631649: step: 542/459, loss: 0.699042022228241 2023-01-24 02:26:29.218009: step: 544/459, loss: 1.2571923732757568 2023-01-24 02:26:29.888368: step: 546/459, loss: 0.04765338823199272 2023-01-24 02:26:30.490847: step: 548/459, loss: 0.07360069453716278 2023-01-24 02:26:31.118824: step: 550/459, loss: 0.4588533639907837 2023-01-24 02:26:31.747205: step: 552/459, loss: 0.029765186831355095 2023-01-24 02:26:32.345179: step: 554/459, loss: 0.05204090476036072 2023-01-24 02:26:32.911372: step: 556/459, loss: 0.0362238883972168 2023-01-24 02:26:33.560485: step: 558/459, loss: 0.16369295120239258 2023-01-24 02:26:34.180228: step: 560/459, loss: 0.07913034409284592 2023-01-24 02:26:34.888963: step: 562/459, loss: 0.04694347828626633 2023-01-24 02:26:35.517412: step: 564/459, loss: 0.38297492265701294 2023-01-24 02:26:36.090653: step: 566/459, loss: 0.06277919560670853 2023-01-24 02:26:36.757059: step: 568/459, loss: 0.014586205594241619 2023-01-24 02:26:37.408336: step: 570/459, loss: 0.0206475667655468 2023-01-24 02:26:37.991186: step: 572/459, loss: 0.28104305267333984 2023-01-24 02:26:38.617602: step: 574/459, loss: 0.26983919739723206 2023-01-24 02:26:39.216888: step: 576/459, loss: 0.09294566512107849 2023-01-24 02:26:39.849650: step: 578/459, loss: 0.07610447704792023 2023-01-24 02:26:40.445271: step: 580/459, loss: 0.06704461574554443 2023-01-24 02:26:41.082648: step: 582/459, loss: 0.13185004889965057 2023-01-24 02:26:41.704673: step: 584/459, loss: 0.03453000262379646 2023-01-24 02:26:42.306183: step: 586/459, loss: 0.04272279515862465 2023-01-24 02:26:42.965824: step: 588/459, loss: 0.4333772659301758 2023-01-24 02:26:43.587556: step: 590/459, loss: 0.05218004062771797 2023-01-24 02:26:44.150102: step: 592/459, loss: 0.06351087242364883 2023-01-24 02:26:44.800537: step: 594/459, loss: 0.08335108309984207 2023-01-24 02:26:45.390514: step: 596/459, loss: 0.032595545053482056 2023-01-24 02:26:45.967098: step: 598/459, loss: 0.14289572834968567 2023-01-24 02:26:46.646529: step: 600/459, loss: 0.3056044280529022 2023-01-24 02:26:47.235499: step: 602/459, loss: 1.7439515590667725 2023-01-24 02:26:47.882484: step: 604/459, loss: 0.04186530411243439 2023-01-24 02:26:48.478047: step: 606/459, loss: 0.025355800986289978 2023-01-24 02:26:49.148457: step: 608/459, loss: 0.08880600333213806 2023-01-24 02:26:49.812236: step: 610/459, loss: 0.06951065361499786 2023-01-24 02:26:50.394547: step: 612/459, loss: 0.014576900750398636 2023-01-24 02:26:51.021360: step: 614/459, loss: 0.07623930275440216 2023-01-24 02:26:51.622308: step: 616/459, loss: 0.05958852544426918 2023-01-24 02:26:52.199606: step: 618/459, loss: 0.03182990476489067 2023-01-24 02:26:52.841255: step: 620/459, loss: 0.05994420871138573 2023-01-24 02:26:53.451345: step: 622/459, loss: 0.046758752316236496 2023-01-24 02:26:54.078671: step: 624/459, loss: 0.025848299264907837 2023-01-24 02:26:54.672927: step: 626/459, loss: 0.3466233015060425 2023-01-24 02:26:55.227383: step: 628/459, loss: 0.10128891468048096 2023-01-24 02:26:55.906634: step: 630/459, loss: 0.055216312408447266 2023-01-24 02:26:56.523481: step: 632/459, loss: 0.6649923324584961 2023-01-24 02:26:57.190723: step: 634/459, loss: 0.1147696003317833 2023-01-24 02:26:57.731163: step: 636/459, loss: 0.038140229880809784 2023-01-24 02:26:58.390109: step: 638/459, loss: 0.6009621620178223 2023-01-24 02:26:58.994344: step: 640/459, loss: 0.03010309487581253 2023-01-24 02:26:59.586765: step: 642/459, loss: 0.16812081634998322 2023-01-24 02:27:00.190509: step: 644/459, loss: 0.015891583636403084 2023-01-24 02:27:00.764158: step: 646/459, loss: 0.04123851656913757 2023-01-24 02:27:01.454497: step: 648/459, loss: 0.12350207567214966 2023-01-24 02:27:02.131094: step: 650/459, loss: 0.05050799995660782 2023-01-24 02:27:02.748069: step: 652/459, loss: 0.07332231849431992 2023-01-24 02:27:03.377965: step: 654/459, loss: 0.05923059210181236 2023-01-24 02:27:03.950280: step: 656/459, loss: 0.35951969027519226 2023-01-24 02:27:04.580559: step: 658/459, loss: 0.05494295805692673 2023-01-24 02:27:05.257199: step: 660/459, loss: 0.13890093564987183 2023-01-24 02:27:05.959416: step: 662/459, loss: 0.043284233659505844 2023-01-24 02:27:06.549788: step: 664/459, loss: 0.4194561541080475 2023-01-24 02:27:07.156719: step: 666/459, loss: 0.028620561584830284 2023-01-24 02:27:07.712194: step: 668/459, loss: 0.030440418049693108 2023-01-24 02:27:08.375232: step: 670/459, loss: 0.10830559581518173 2023-01-24 02:27:09.065683: step: 672/459, loss: 0.06736963242292404 2023-01-24 02:27:09.666602: step: 674/459, loss: 0.11759641766548157 2023-01-24 02:27:10.266793: step: 676/459, loss: 0.07684194296598434 2023-01-24 02:27:10.848381: step: 678/459, loss: 0.17663271725177765 2023-01-24 02:27:11.470719: step: 680/459, loss: 0.019254812970757484 2023-01-24 02:27:12.141493: step: 682/459, loss: 0.03842959925532341 2023-01-24 02:27:12.672579: step: 684/459, loss: 0.04907999560236931 2023-01-24 02:27:13.243402: step: 686/459, loss: 0.05104978755116463 2023-01-24 02:27:13.876409: step: 688/459, loss: 0.14859484136104584 2023-01-24 02:27:14.519639: step: 690/459, loss: 0.03855162486433983 2023-01-24 02:27:15.124920: step: 692/459, loss: 0.01960843615233898 2023-01-24 02:27:15.719205: step: 694/459, loss: 0.13491691648960114 2023-01-24 02:27:16.353857: step: 696/459, loss: 0.029036423191428185 2023-01-24 02:27:17.010622: step: 698/459, loss: 0.10803235322237015 2023-01-24 02:27:17.656739: step: 700/459, loss: 0.09073489904403687 2023-01-24 02:27:18.367793: step: 702/459, loss: 0.029885711148381233 2023-01-24 02:27:18.958806: step: 704/459, loss: 0.051648616790771484 2023-01-24 02:27:19.640628: step: 706/459, loss: 0.0852218046784401 2023-01-24 02:27:20.302001: step: 708/459, loss: 0.09375996142625809 2023-01-24 02:27:20.956701: step: 710/459, loss: 0.9389169216156006 2023-01-24 02:27:21.530644: step: 712/459, loss: 0.28436386585235596 2023-01-24 02:27:22.125716: step: 714/459, loss: 0.050803184509277344 2023-01-24 02:27:22.823811: step: 716/459, loss: 0.6776975393295288 2023-01-24 02:27:23.488907: step: 718/459, loss: 0.021141666918992996 2023-01-24 02:27:24.108559: step: 720/459, loss: 0.19259443879127502 2023-01-24 02:27:24.701543: step: 722/459, loss: 0.03704686462879181 2023-01-24 02:27:25.327155: step: 724/459, loss: 0.14617720246315002 2023-01-24 02:27:25.945494: step: 726/459, loss: 0.010841804556548595 2023-01-24 02:27:26.550003: step: 728/459, loss: 0.17223718762397766 2023-01-24 02:27:27.225348: step: 730/459, loss: 3.81978178024292 2023-01-24 02:27:27.844484: step: 732/459, loss: 0.03144128993153572 2023-01-24 02:27:28.405786: step: 734/459, loss: 0.015028171241283417 2023-01-24 02:27:29.048496: step: 736/459, loss: 0.026376893743872643 2023-01-24 02:27:29.667508: step: 738/459, loss: 0.02098722755908966 2023-01-24 02:27:30.341629: step: 740/459, loss: 0.11986003816127777 2023-01-24 02:27:30.975465: step: 742/459, loss: 0.018197529017925262 2023-01-24 02:27:31.573038: step: 744/459, loss: 0.029075469821691513 2023-01-24 02:27:32.198359: step: 746/459, loss: 0.2558530271053314 2023-01-24 02:27:32.831325: step: 748/459, loss: 0.0905689224600792 2023-01-24 02:27:33.424273: step: 750/459, loss: 0.08112265169620514 2023-01-24 02:27:34.035733: step: 752/459, loss: 0.05579144507646561 2023-01-24 02:27:34.685354: step: 754/459, loss: 0.03541965410113335 2023-01-24 02:27:35.392827: step: 756/459, loss: 1.8552159070968628 2023-01-24 02:27:36.025750: step: 758/459, loss: 0.07307909429073334 2023-01-24 02:27:36.615773: step: 760/459, loss: 0.059801794588565826 2023-01-24 02:27:37.201607: step: 762/459, loss: 0.08134332299232483 2023-01-24 02:27:37.845683: step: 764/459, loss: 0.05707692354917526 2023-01-24 02:27:38.514782: step: 766/459, loss: 0.03581614792346954 2023-01-24 02:27:39.119901: step: 768/459, loss: 0.06156236305832863 2023-01-24 02:27:39.736692: step: 770/459, loss: 0.11770349740982056 2023-01-24 02:27:40.348010: step: 772/459, loss: 0.07705764472484589 2023-01-24 02:27:40.982018: step: 774/459, loss: 0.025879837572574615 2023-01-24 02:27:41.598860: step: 776/459, loss: 0.07440976798534393 2023-01-24 02:27:42.220271: step: 778/459, loss: 0.2142772674560547 2023-01-24 02:27:42.853367: step: 780/459, loss: 0.02501547895371914 2023-01-24 02:27:43.448969: step: 782/459, loss: 0.05520422384142876 2023-01-24 02:27:44.045539: step: 784/459, loss: 0.13421611487865448 2023-01-24 02:27:44.602513: step: 786/459, loss: 0.21800437569618225 2023-01-24 02:27:45.201414: step: 788/459, loss: 0.07542753964662552 2023-01-24 02:27:45.777620: step: 790/459, loss: 0.052096910774707794 2023-01-24 02:27:46.378257: step: 792/459, loss: 0.02676287479698658 2023-01-24 02:27:47.015679: step: 794/459, loss: 0.031656231731176376 2023-01-24 02:27:47.614789: step: 796/459, loss: 0.06274886429309845 2023-01-24 02:27:48.224680: step: 798/459, loss: 0.02593863755464554 2023-01-24 02:27:48.833603: step: 800/459, loss: 0.050658125430345535 2023-01-24 02:27:49.512281: step: 802/459, loss: 0.09006095677614212 2023-01-24 02:27:50.140797: step: 804/459, loss: 0.08306868374347687 2023-01-24 02:27:50.719274: step: 806/459, loss: 0.05107176676392555 2023-01-24 02:27:51.300535: step: 808/459, loss: 0.006474387366324663 2023-01-24 02:27:51.860244: step: 810/459, loss: 0.059440527111291885 2023-01-24 02:27:52.494377: step: 812/459, loss: 0.10511468350887299 2023-01-24 02:27:53.085467: step: 814/459, loss: 0.12075568735599518 2023-01-24 02:27:53.688229: step: 816/459, loss: 0.0943027138710022 2023-01-24 02:27:54.272956: step: 818/459, loss: 0.5583130121231079 2023-01-24 02:27:54.908543: step: 820/459, loss: 0.05308383330702782 2023-01-24 02:27:55.553746: step: 822/459, loss: 0.23205441236495972 2023-01-24 02:27:56.217448: step: 824/459, loss: 0.06047426164150238 2023-01-24 02:27:56.834715: step: 826/459, loss: 0.06480038911104202 2023-01-24 02:27:57.435553: step: 828/459, loss: 0.05162873491644859 2023-01-24 02:27:58.045935: step: 830/459, loss: 0.11744977533817291 2023-01-24 02:27:58.643885: step: 832/459, loss: 0.026819705963134766 2023-01-24 02:27:59.244146: step: 834/459, loss: 0.10505102574825287 2023-01-24 02:27:59.956351: step: 836/459, loss: 0.2163470834493637 2023-01-24 02:28:00.530407: step: 838/459, loss: 0.032992660999298096 2023-01-24 02:28:01.135348: step: 840/459, loss: 0.08037274330854416 2023-01-24 02:28:01.729335: step: 842/459, loss: 0.035590216517448425 2023-01-24 02:28:02.321829: step: 844/459, loss: 0.051212672144174576 2023-01-24 02:28:02.991973: step: 846/459, loss: 0.15898790955543518 2023-01-24 02:28:03.543766: step: 848/459, loss: 0.3790505528450012 2023-01-24 02:28:04.229972: step: 850/459, loss: 0.10037035495042801 2023-01-24 02:28:04.814792: step: 852/459, loss: 0.06447765976190567 2023-01-24 02:28:05.445733: step: 854/459, loss: 0.02366582117974758 2023-01-24 02:28:06.066688: step: 856/459, loss: 0.24100680649280548 2023-01-24 02:28:06.742467: step: 858/459, loss: 0.06562105566263199 2023-01-24 02:28:07.360861: step: 860/459, loss: 0.04753232002258301 2023-01-24 02:28:08.021103: step: 862/459, loss: 0.05455365404486656 2023-01-24 02:28:08.639414: step: 864/459, loss: 0.03624941036105156 2023-01-24 02:28:09.266847: step: 866/459, loss: 0.11372513324022293 2023-01-24 02:28:09.879105: step: 868/459, loss: 0.10903168469667435 2023-01-24 02:28:10.507362: step: 870/459, loss: 0.07456669956445694 2023-01-24 02:28:11.107570: step: 872/459, loss: 0.07420925050973892 2023-01-24 02:28:11.755629: step: 874/459, loss: 0.04814403876662254 2023-01-24 02:28:12.339024: step: 876/459, loss: 0.005009477026760578 2023-01-24 02:28:13.023392: step: 878/459, loss: 0.04779409244656563 2023-01-24 02:28:13.663768: step: 880/459, loss: 0.02514307014644146 2023-01-24 02:28:14.306231: step: 882/459, loss: 0.09510721266269684 2023-01-24 02:28:14.823047: step: 884/459, loss: 0.007908480241894722 2023-01-24 02:28:15.384107: step: 886/459, loss: 0.0033190397080034018 2023-01-24 02:28:16.032324: step: 888/459, loss: 0.04485737904906273 2023-01-24 02:28:16.674840: step: 890/459, loss: 0.27437087893486023 2023-01-24 02:28:17.256287: step: 892/459, loss: 0.165922150015831 2023-01-24 02:28:17.884028: step: 894/459, loss: 0.05500934273004532 2023-01-24 02:28:18.521296: step: 896/459, loss: 0.034868765622377396 2023-01-24 02:28:19.186791: step: 898/459, loss: 0.052422091364860535 2023-01-24 02:28:19.858439: step: 900/459, loss: 0.032972343266010284 2023-01-24 02:28:20.454805: step: 902/459, loss: 0.2704108655452728 2023-01-24 02:28:21.098581: step: 904/459, loss: 0.02588648349046707 2023-01-24 02:28:21.726691: step: 906/459, loss: 0.0621635727584362 2023-01-24 02:28:22.314695: step: 908/459, loss: 0.08371716737747192 2023-01-24 02:28:22.911756: step: 910/459, loss: 0.007605513092130423 2023-01-24 02:28:23.581012: step: 912/459, loss: 0.07607914507389069 2023-01-24 02:28:24.154086: step: 914/459, loss: 0.24229057133197784 2023-01-24 02:28:24.743297: step: 916/459, loss: 0.023043068125844002 2023-01-24 02:28:25.316954: step: 918/459, loss: 0.009195388294756413 2023-01-24 02:28:25.746002: step: 920/459, loss: 0.0008574100211262703 ================================================== Loss: 0.148 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.332647705078125, 'r': 0.3231795540796964, 'f1': 0.3278452839268528}, 'combined': 0.24157020920925995, 'epoch': 19} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35252998408891095, 'r': 0.2922794049900789, 'f1': 0.31958980664919157}, 'combined': 0.20453747625548258, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32663718929254304, 'r': 0.3241579696394687, 'f1': 0.3253928571428571}, 'combined': 0.2397631578947368, 'epoch': 19} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35772506055211944, 'r': 0.28741695037991033, 'f1': 0.31873988745461296}, 'combined': 0.20399352797095227, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3340175781838639, 'r': 0.3213413892584801, 'f1': 0.3275568900178317}, 'combined': 0.2413577084341918, 'epoch': 19} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3540615803001952, 'r': 0.2991432694872087, 'f1': 0.32429379164983263}, 'combined': 0.23251252986214416, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3434343434343434, 'r': 0.32380952380952377, 'f1': 0.3333333333333333}, 'combined': 0.2222222222222222, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.2826086956521739, 'f1': 0.2653061224489796}, 'combined': 0.1326530612244898, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2, 'r': 0.06896551724137931, 'f1': 0.10256410256410257}, 'combined': 0.06837606837606838, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 20 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:30:59.428992: step: 2/459, loss: 0.014940405264496803 2023-01-24 02:31:00.085938: step: 4/459, loss: 0.140610471367836 2023-01-24 02:31:00.729426: step: 6/459, loss: 0.017796948552131653 2023-01-24 02:31:01.364056: step: 8/459, loss: 0.10646424442529678 2023-01-24 02:31:01.982470: step: 10/459, loss: 0.020137490704655647 2023-01-24 02:31:02.565045: step: 12/459, loss: 0.01943928189575672 2023-01-24 02:31:03.094079: step: 14/459, loss: 0.01052146591246128 2023-01-24 02:31:03.665764: step: 16/459, loss: 0.06267571449279785 2023-01-24 02:31:04.270097: step: 18/459, loss: 0.037802524864673615 2023-01-24 02:31:04.872075: step: 20/459, loss: 0.02208045870065689 2023-01-24 02:31:05.405703: step: 22/459, loss: 0.020495424047112465 2023-01-24 02:31:06.034220: step: 24/459, loss: 0.007590212393552065 2023-01-24 02:31:06.616263: step: 26/459, loss: 0.6327555179595947 2023-01-24 02:31:07.269603: step: 28/459, loss: 0.046969860792160034 2023-01-24 02:31:07.947580: step: 30/459, loss: 0.011020619422197342 2023-01-24 02:31:08.535732: step: 32/459, loss: 0.04019872471690178 2023-01-24 02:31:09.151955: step: 34/459, loss: 0.02815617062151432 2023-01-24 02:31:09.742047: step: 36/459, loss: 0.003555057803168893 2023-01-24 02:31:10.332154: step: 38/459, loss: 0.012610325589776039 2023-01-24 02:31:10.971653: step: 40/459, loss: 1.1840729713439941 2023-01-24 02:31:11.632770: step: 42/459, loss: 0.007861725986003876 2023-01-24 02:31:12.235840: step: 44/459, loss: 0.0622323714196682 2023-01-24 02:31:12.848169: step: 46/459, loss: 0.060271404683589935 2023-01-24 02:31:13.444593: step: 48/459, loss: 0.056804973632097244 2023-01-24 02:31:14.069779: step: 50/459, loss: 0.08441536128520966 2023-01-24 02:31:14.712704: step: 52/459, loss: 0.014803184196352959 2023-01-24 02:31:15.321128: step: 54/459, loss: 0.04412374645471573 2023-01-24 02:31:15.914294: step: 56/459, loss: 0.05871342122554779 2023-01-24 02:31:16.524670: step: 58/459, loss: 0.030433623120188713 2023-01-24 02:31:17.142256: step: 60/459, loss: 0.04889079928398132 2023-01-24 02:31:17.775492: step: 62/459, loss: 0.31078240275382996 2023-01-24 02:31:18.402857: step: 64/459, loss: 0.0874224603176117 2023-01-24 02:31:18.990086: step: 66/459, loss: 0.01720076985657215 2023-01-24 02:31:19.621897: step: 68/459, loss: 0.1485835164785385 2023-01-24 02:31:20.260042: step: 70/459, loss: 0.008321855217218399 2023-01-24 02:31:20.855425: step: 72/459, loss: 0.11127056181430817 2023-01-24 02:31:21.571765: step: 74/459, loss: 0.00546249421313405 2023-01-24 02:31:22.191552: step: 76/459, loss: 0.026361579075455666 2023-01-24 02:31:22.886685: step: 78/459, loss: 0.09299775958061218 2023-01-24 02:31:23.529254: step: 80/459, loss: 0.06739338487386703 2023-01-24 02:31:24.142466: step: 82/459, loss: 0.02624170482158661 2023-01-24 02:31:24.783402: step: 84/459, loss: 0.16857612133026123 2023-01-24 02:31:25.413874: step: 86/459, loss: 0.05208248645067215 2023-01-24 02:31:26.023955: step: 88/459, loss: 0.02007688768208027 2023-01-24 02:31:26.650905: step: 90/459, loss: 0.03261217847466469 2023-01-24 02:31:27.254735: step: 92/459, loss: 0.08434320986270905 2023-01-24 02:31:27.857772: step: 94/459, loss: 0.01952463947236538 2023-01-24 02:31:28.468175: step: 96/459, loss: 0.01262087095528841 2023-01-24 02:31:29.093308: step: 98/459, loss: 0.0507480688393116 2023-01-24 02:31:29.702552: step: 100/459, loss: 0.031595729291439056 2023-01-24 02:31:30.415240: step: 102/459, loss: 0.0343325138092041 2023-01-24 02:31:30.991707: step: 104/459, loss: 0.0015000920975580812 2023-01-24 02:31:31.610507: step: 106/459, loss: 0.04250509664416313 2023-01-24 02:31:32.167094: step: 108/459, loss: 0.0538380891084671 2023-01-24 02:31:32.734629: step: 110/459, loss: 0.27112558484077454 2023-01-24 02:31:33.362951: step: 112/459, loss: 0.03703233599662781 2023-01-24 02:31:33.978878: step: 114/459, loss: 0.024364568293094635 2023-01-24 02:31:34.631286: step: 116/459, loss: 0.2680390477180481 2023-01-24 02:31:35.395124: step: 118/459, loss: 0.052101410925388336 2023-01-24 02:31:35.974478: step: 120/459, loss: 0.05130903422832489 2023-01-24 02:31:36.558140: step: 122/459, loss: 0.04817735776305199 2023-01-24 02:31:37.158484: step: 124/459, loss: 0.03444742038846016 2023-01-24 02:31:37.747898: step: 126/459, loss: 0.042774099856615067 2023-01-24 02:31:38.383451: step: 128/459, loss: 0.027877705171704292 2023-01-24 02:31:38.987787: step: 130/459, loss: 0.05152736231684685 2023-01-24 02:31:39.541344: step: 132/459, loss: 0.3240774869918823 2023-01-24 02:31:40.159522: step: 134/459, loss: 0.011983702890574932 2023-01-24 02:31:40.787073: step: 136/459, loss: 0.04953129589557648 2023-01-24 02:31:41.449452: step: 138/459, loss: 0.006254301406443119 2023-01-24 02:31:42.061849: step: 140/459, loss: 0.037525590509176254 2023-01-24 02:31:42.649189: step: 142/459, loss: 0.019388101994991302 2023-01-24 02:31:43.265528: step: 144/459, loss: 0.027609895914793015 2023-01-24 02:31:43.923526: step: 146/459, loss: 0.05780380219221115 2023-01-24 02:31:44.565825: step: 148/459, loss: 0.09683682024478912 2023-01-24 02:31:45.258215: step: 150/459, loss: 0.03011438623070717 2023-01-24 02:31:45.934184: step: 152/459, loss: 0.01615162566304207 2023-01-24 02:31:46.577529: step: 154/459, loss: 0.018398623913526535 2023-01-24 02:31:47.165320: step: 156/459, loss: 0.0016612517647445202 2023-01-24 02:31:47.722543: step: 158/459, loss: 0.025012541562318802 2023-01-24 02:31:48.338241: step: 160/459, loss: 0.014450779184699059 2023-01-24 02:31:48.926156: step: 162/459, loss: 0.06375013291835785 2023-01-24 02:31:49.560533: step: 164/459, loss: 0.007787941489368677 2023-01-24 02:31:50.147367: step: 166/459, loss: 0.08506912738084793 2023-01-24 02:31:50.805390: step: 168/459, loss: 0.0024246147368103266 2023-01-24 02:31:51.403193: step: 170/459, loss: 0.10224735736846924 2023-01-24 02:31:51.976242: step: 172/459, loss: 0.024571174755692482 2023-01-24 02:31:52.624846: step: 174/459, loss: 0.02981138788163662 2023-01-24 02:31:53.300293: step: 176/459, loss: 0.2872546315193176 2023-01-24 02:31:53.925062: step: 178/459, loss: 0.02442961372435093 2023-01-24 02:31:54.527769: step: 180/459, loss: 0.05518132075667381 2023-01-24 02:31:55.179328: step: 182/459, loss: 0.03001190908253193 2023-01-24 02:31:55.785870: step: 184/459, loss: 0.07935965061187744 2023-01-24 02:31:56.400501: step: 186/459, loss: 0.011287820525467396 2023-01-24 02:31:57.008465: step: 188/459, loss: 12.940958023071289 2023-01-24 02:31:57.669431: step: 190/459, loss: 0.1278846263885498 2023-01-24 02:31:58.269358: step: 192/459, loss: 0.020221520215272903 2023-01-24 02:31:58.878773: step: 194/459, loss: 0.06027984619140625 2023-01-24 02:31:59.444084: step: 196/459, loss: 0.004199616145342588 2023-01-24 02:32:00.060151: step: 198/459, loss: 0.009609641507267952 2023-01-24 02:32:00.729319: step: 200/459, loss: 0.3536277115345001 2023-01-24 02:32:01.312360: step: 202/459, loss: 0.1001882404088974 2023-01-24 02:32:01.951191: step: 204/459, loss: 0.32314273715019226 2023-01-24 02:32:02.563632: step: 206/459, loss: 0.023596378043293953 2023-01-24 02:32:03.165868: step: 208/459, loss: 0.0782468318939209 2023-01-24 02:32:03.802227: step: 210/459, loss: 0.0373222716152668 2023-01-24 02:32:04.493407: step: 212/459, loss: 0.02919229492545128 2023-01-24 02:32:05.064349: step: 214/459, loss: 0.004651626106351614 2023-01-24 02:32:05.613924: step: 216/459, loss: 0.07031756639480591 2023-01-24 02:32:06.238815: step: 218/459, loss: 0.06265412271022797 2023-01-24 02:32:06.868025: step: 220/459, loss: 0.04320270195603371 2023-01-24 02:32:07.462152: step: 222/459, loss: 0.07698706537485123 2023-01-24 02:32:08.054304: step: 224/459, loss: 0.057496577501297 2023-01-24 02:32:08.734633: step: 226/459, loss: 0.0002070517948595807 2023-01-24 02:32:09.304767: step: 228/459, loss: 0.04302654415369034 2023-01-24 02:32:10.001819: step: 230/459, loss: 0.048974551260471344 2023-01-24 02:32:10.572550: step: 232/459, loss: 0.05979817733168602 2023-01-24 02:32:11.128621: step: 234/459, loss: 0.0010875466978177428 2023-01-24 02:32:11.723098: step: 236/459, loss: 0.056623127311468124 2023-01-24 02:32:12.335821: step: 238/459, loss: 0.14994795620441437 2023-01-24 02:32:12.952255: step: 240/459, loss: 0.03157693147659302 2023-01-24 02:32:13.496511: step: 242/459, loss: 0.09438183903694153 2023-01-24 02:32:14.107847: step: 244/459, loss: 0.3220146894454956 2023-01-24 02:32:14.707279: step: 246/459, loss: 0.6528304815292358 2023-01-24 02:32:15.271170: step: 248/459, loss: 0.027325822040438652 2023-01-24 02:32:15.880898: step: 250/459, loss: 0.29274529218673706 2023-01-24 02:32:16.584682: step: 252/459, loss: 0.0324740894138813 2023-01-24 02:32:17.248638: step: 254/459, loss: 0.048437345772981644 2023-01-24 02:32:17.940578: step: 256/459, loss: 0.15365710854530334 2023-01-24 02:32:18.550040: step: 258/459, loss: 0.04411028325557709 2023-01-24 02:32:19.224955: step: 260/459, loss: 0.05118612200021744 2023-01-24 02:32:19.875699: step: 262/459, loss: 0.07934235036373138 2023-01-24 02:32:20.469074: step: 264/459, loss: 0.8293138146400452 2023-01-24 02:32:21.161708: step: 266/459, loss: 0.0337531603872776 2023-01-24 02:32:21.797326: step: 268/459, loss: 0.0610637404024601 2023-01-24 02:32:22.516357: step: 270/459, loss: 0.024267857894301414 2023-01-24 02:32:23.110905: step: 272/459, loss: 0.18232430517673492 2023-01-24 02:32:23.656416: step: 274/459, loss: 0.06387472152709961 2023-01-24 02:32:24.201174: step: 276/459, loss: 0.0045008487068116665 2023-01-24 02:32:24.802506: step: 278/459, loss: 0.034796375781297684 2023-01-24 02:32:25.405028: step: 280/459, loss: 0.07945174723863602 2023-01-24 02:32:25.979938: step: 282/459, loss: 0.03745526075363159 2023-01-24 02:32:26.573800: step: 284/459, loss: 0.016380568966269493 2023-01-24 02:32:27.212100: step: 286/459, loss: 0.006341065280139446 2023-01-24 02:32:27.825815: step: 288/459, loss: 0.053420234471559525 2023-01-24 02:32:28.523658: step: 290/459, loss: 0.4722435474395752 2023-01-24 02:32:29.132928: step: 292/459, loss: 0.019859854131937027 2023-01-24 02:32:29.729051: step: 294/459, loss: 0.1169092059135437 2023-01-24 02:32:30.332233: step: 296/459, loss: 0.11008717864751816 2023-01-24 02:32:30.978516: step: 298/459, loss: 0.05465245246887207 2023-01-24 02:32:31.582225: step: 300/459, loss: 0.08439292013645172 2023-01-24 02:32:32.200163: step: 302/459, loss: 0.020895784720778465 2023-01-24 02:32:32.863879: step: 304/459, loss: 0.04201433062553406 2023-01-24 02:32:33.536712: step: 306/459, loss: 0.09222754836082458 2023-01-24 02:32:34.167635: step: 308/459, loss: 0.06083659455180168 2023-01-24 02:32:34.694157: step: 310/459, loss: 0.09877226501703262 2023-01-24 02:32:35.256257: step: 312/459, loss: 0.05098579451441765 2023-01-24 02:32:35.937880: step: 314/459, loss: 0.09367850422859192 2023-01-24 02:32:36.572458: step: 316/459, loss: 0.030926978215575218 2023-01-24 02:32:37.133829: step: 318/459, loss: 0.06462505459785461 2023-01-24 02:32:37.718951: step: 320/459, loss: 0.05613197758793831 2023-01-24 02:32:38.385442: step: 322/459, loss: 0.06225021928548813 2023-01-24 02:32:38.984191: step: 324/459, loss: 0.0007425770163536072 2023-01-24 02:32:39.598266: step: 326/459, loss: 0.02161005698144436 2023-01-24 02:32:40.185646: step: 328/459, loss: 0.025674430653452873 2023-01-24 02:32:40.887579: step: 330/459, loss: 0.030679963529109955 2023-01-24 02:32:41.503781: step: 332/459, loss: 0.04151024669408798 2023-01-24 02:32:42.093918: step: 334/459, loss: 0.053428251296281815 2023-01-24 02:32:42.722539: step: 336/459, loss: 0.041755642741918564 2023-01-24 02:32:43.360615: step: 338/459, loss: 0.0209880992770195 2023-01-24 02:32:43.955942: step: 340/459, loss: 0.11120866984128952 2023-01-24 02:32:44.616409: step: 342/459, loss: 0.03199305012822151 2023-01-24 02:32:45.262506: step: 344/459, loss: 0.0796334519982338 2023-01-24 02:32:45.838321: step: 346/459, loss: 0.033540986478328705 2023-01-24 02:32:46.409643: step: 348/459, loss: 0.08334802091121674 2023-01-24 02:32:47.053490: step: 350/459, loss: 0.01941808871924877 2023-01-24 02:32:47.688282: step: 352/459, loss: 0.06909354776144028 2023-01-24 02:32:48.378353: step: 354/459, loss: 0.015690715983510017 2023-01-24 02:32:48.982105: step: 356/459, loss: 0.0031008573714643717 2023-01-24 02:32:49.610430: step: 358/459, loss: 0.13481327891349792 2023-01-24 02:32:50.262859: step: 360/459, loss: 0.029927387833595276 2023-01-24 02:32:50.896093: step: 362/459, loss: 0.1427372545003891 2023-01-24 02:32:51.550458: step: 364/459, loss: 0.06293978542089462 2023-01-24 02:32:52.144525: step: 366/459, loss: 0.014291471801698208 2023-01-24 02:32:52.733928: step: 368/459, loss: 0.02497250773012638 2023-01-24 02:32:53.387275: step: 370/459, loss: 0.03796132653951645 2023-01-24 02:32:54.016805: step: 372/459, loss: 0.05267423763871193 2023-01-24 02:32:54.545050: step: 374/459, loss: 0.053738247603178024 2023-01-24 02:32:55.246232: step: 376/459, loss: 0.10421024262905121 2023-01-24 02:32:55.849411: step: 378/459, loss: 0.043799273669719696 2023-01-24 02:32:56.475494: step: 380/459, loss: 0.027109988033771515 2023-01-24 02:32:57.060037: step: 382/459, loss: 0.044611550867557526 2023-01-24 02:32:57.753989: step: 384/459, loss: 0.03705955669283867 2023-01-24 02:32:58.362078: step: 386/459, loss: 0.05434653162956238 2023-01-24 02:32:59.007615: step: 388/459, loss: 0.07453730702400208 2023-01-24 02:32:59.642939: step: 390/459, loss: 0.14143605530261993 2023-01-24 02:33:00.307793: step: 392/459, loss: 0.07537195831537247 2023-01-24 02:33:00.953689: step: 394/459, loss: 0.18254296481609344 2023-01-24 02:33:01.599651: step: 396/459, loss: 0.0952092632651329 2023-01-24 02:33:02.207501: step: 398/459, loss: 0.11676359921693802 2023-01-24 02:33:02.789745: step: 400/459, loss: 0.06791603565216064 2023-01-24 02:33:03.360209: step: 402/459, loss: 0.29755550622940063 2023-01-24 02:33:03.920131: step: 404/459, loss: 0.06380373239517212 2023-01-24 02:33:04.573310: step: 406/459, loss: 0.1002907007932663 2023-01-24 02:33:05.218024: step: 408/459, loss: 0.006885298062115908 2023-01-24 02:33:05.846898: step: 410/459, loss: 0.012659495696425438 2023-01-24 02:33:06.416224: step: 412/459, loss: 0.060012463480234146 2023-01-24 02:33:06.965999: step: 414/459, loss: 0.10207698494195938 2023-01-24 02:33:07.589026: step: 416/459, loss: 0.035527776926755905 2023-01-24 02:33:08.196208: step: 418/459, loss: 0.05537581443786621 2023-01-24 02:33:08.898695: step: 420/459, loss: 0.018299374729394913 2023-01-24 02:33:09.490950: step: 422/459, loss: 0.10748773068189621 2023-01-24 02:33:10.099580: step: 424/459, loss: 0.08056985586881638 2023-01-24 02:33:10.730871: step: 426/459, loss: 0.03192351758480072 2023-01-24 02:33:11.330028: step: 428/459, loss: 0.07320230454206467 2023-01-24 02:33:11.863244: step: 430/459, loss: 0.10017694532871246 2023-01-24 02:33:12.460461: step: 432/459, loss: 0.18334238231182098 2023-01-24 02:33:13.196321: step: 434/459, loss: 0.05059414356946945 2023-01-24 02:33:13.903045: step: 436/459, loss: 0.06509370356798172 2023-01-24 02:33:14.512142: step: 438/459, loss: 0.018818171694874763 2023-01-24 02:33:15.091506: step: 440/459, loss: 0.07158081978559494 2023-01-24 02:33:15.742655: step: 442/459, loss: 0.06983327865600586 2023-01-24 02:33:16.362637: step: 444/459, loss: 0.024099625647068024 2023-01-24 02:33:16.936920: step: 446/459, loss: 0.029380492866039276 2023-01-24 02:33:17.577157: step: 448/459, loss: 0.060811255127191544 2023-01-24 02:33:18.174009: step: 450/459, loss: 0.02299421839416027 2023-01-24 02:33:18.733590: step: 452/459, loss: 0.018377576023340225 2023-01-24 02:33:19.347363: step: 454/459, loss: 0.39766785502433777 2023-01-24 02:33:19.963327: step: 456/459, loss: 0.12628090381622314 2023-01-24 02:33:20.594287: step: 458/459, loss: 0.050480373203754425 2023-01-24 02:33:21.197199: step: 460/459, loss: 0.07444126904010773 2023-01-24 02:33:21.892442: step: 462/459, loss: 0.08192811161279678 2023-01-24 02:33:22.490060: step: 464/459, loss: 0.017445126548409462 2023-01-24 02:33:23.039220: step: 466/459, loss: 0.028298841789364815 2023-01-24 02:33:23.628922: step: 468/459, loss: 0.02506200410425663 2023-01-24 02:33:24.188605: step: 470/459, loss: 0.09003667533397675 2023-01-24 02:33:24.812991: step: 472/459, loss: 0.07806745916604996 2023-01-24 02:33:25.490022: step: 474/459, loss: 0.05112246051430702 2023-01-24 02:33:26.165978: step: 476/459, loss: 0.10332406312227249 2023-01-24 02:33:26.835657: step: 478/459, loss: 0.046495500952005386 2023-01-24 02:33:27.408378: step: 480/459, loss: 0.08417051285505295 2023-01-24 02:33:27.979143: step: 482/459, loss: 0.07056533545255661 2023-01-24 02:33:28.562960: step: 484/459, loss: 0.04724317789077759 2023-01-24 02:33:29.308574: step: 486/459, loss: 0.08470015227794647 2023-01-24 02:33:29.911260: step: 488/459, loss: 0.26177388429641724 2023-01-24 02:33:30.496626: step: 490/459, loss: 0.0357060544192791 2023-01-24 02:33:31.107122: step: 492/459, loss: 0.03796686232089996 2023-01-24 02:33:31.764536: step: 494/459, loss: 0.057333189994096756 2023-01-24 02:33:32.401263: step: 496/459, loss: 0.021280614659190178 2023-01-24 02:33:33.045065: step: 498/459, loss: 0.03743210434913635 2023-01-24 02:33:33.681551: step: 500/459, loss: 0.005118540022522211 2023-01-24 02:33:34.261138: step: 502/459, loss: 0.004220176488161087 2023-01-24 02:33:34.986874: step: 504/459, loss: 0.05983619764447212 2023-01-24 02:33:35.578236: step: 506/459, loss: 0.062023553997278214 2023-01-24 02:33:36.212906: step: 508/459, loss: 0.027342647314071655 2023-01-24 02:33:36.812193: step: 510/459, loss: 0.03278176486492157 2023-01-24 02:33:37.471173: step: 512/459, loss: 0.011706954799592495 2023-01-24 02:33:38.040480: step: 514/459, loss: 0.02863047644495964 2023-01-24 02:33:38.639604: step: 516/459, loss: 0.0570010207593441 2023-01-24 02:33:39.220096: step: 518/459, loss: 0.41321852803230286 2023-01-24 02:33:39.780062: step: 520/459, loss: 4.631407737731934 2023-01-24 02:33:40.447889: step: 522/459, loss: 0.1173371896147728 2023-01-24 02:33:41.104121: step: 524/459, loss: 0.07268640398979187 2023-01-24 02:33:41.721641: step: 526/459, loss: 0.029155539348721504 2023-01-24 02:33:42.350736: step: 528/459, loss: 0.027444180101156235 2023-01-24 02:33:42.990377: step: 530/459, loss: 0.09756750613451004 2023-01-24 02:33:43.729435: step: 532/459, loss: 0.23922254145145416 2023-01-24 02:33:44.294296: step: 534/459, loss: 0.0369989313185215 2023-01-24 02:33:44.900117: step: 536/459, loss: 0.10172449052333832 2023-01-24 02:33:45.581332: step: 538/459, loss: 0.07407946139574051 2023-01-24 02:33:46.226923: step: 540/459, loss: 0.01469461154192686 2023-01-24 02:33:46.892127: step: 542/459, loss: 0.4955582022666931 2023-01-24 02:33:47.701032: step: 544/459, loss: 0.07215596735477448 2023-01-24 02:33:48.282874: step: 546/459, loss: 0.008863699622452259 2023-01-24 02:33:48.907175: step: 548/459, loss: 0.07366462051868439 2023-01-24 02:33:49.549808: step: 550/459, loss: 0.030925756320357323 2023-01-24 02:33:50.167103: step: 552/459, loss: 0.08989725261926651 2023-01-24 02:33:50.759454: step: 554/459, loss: 0.9941978454589844 2023-01-24 02:33:51.349667: step: 556/459, loss: 0.026637157425284386 2023-01-24 02:33:52.031295: step: 558/459, loss: 0.05105017498135567 2023-01-24 02:33:52.601954: step: 560/459, loss: 0.12361796945333481 2023-01-24 02:33:53.206617: step: 562/459, loss: 0.06770625710487366 2023-01-24 02:33:53.785323: step: 564/459, loss: 0.016385767608880997 2023-01-24 02:33:54.365903: step: 566/459, loss: 0.212424635887146 2023-01-24 02:33:54.971788: step: 568/459, loss: 0.042761143296957016 2023-01-24 02:33:55.546980: step: 570/459, loss: 0.02058424986898899 2023-01-24 02:33:56.213333: step: 572/459, loss: 0.057158540934324265 2023-01-24 02:33:56.816890: step: 574/459, loss: 0.03722098097205162 2023-01-24 02:33:57.417552: step: 576/459, loss: 0.023424623534083366 2023-01-24 02:33:57.996085: step: 578/459, loss: 0.0547776035964489 2023-01-24 02:33:58.593539: step: 580/459, loss: 0.04696127772331238 2023-01-24 02:33:59.276215: step: 582/459, loss: 0.039314862340688705 2023-01-24 02:33:59.928923: step: 584/459, loss: 0.04967000335454941 2023-01-24 02:34:00.534105: step: 586/459, loss: 0.12650571763515472 2023-01-24 02:34:01.143957: step: 588/459, loss: 0.08114660531282425 2023-01-24 02:34:01.777268: step: 590/459, loss: 0.0464356392621994 2023-01-24 02:34:02.332184: step: 592/459, loss: 0.050955310463905334 2023-01-24 02:34:02.965458: step: 594/459, loss: 0.30600816011428833 2023-01-24 02:34:03.605699: step: 596/459, loss: 0.9174012541770935 2023-01-24 02:34:04.181851: step: 598/459, loss: 0.00379646266810596 2023-01-24 02:34:04.744109: step: 600/459, loss: 0.02450074814260006 2023-01-24 02:34:05.380982: step: 602/459, loss: 0.011895151808857918 2023-01-24 02:34:05.943203: step: 604/459, loss: 0.02448103576898575 2023-01-24 02:34:06.596220: step: 606/459, loss: 0.04125193506479263 2023-01-24 02:34:07.127010: step: 608/459, loss: 0.038478951901197433 2023-01-24 02:34:07.707929: step: 610/459, loss: 0.17389792203903198 2023-01-24 02:34:08.326527: step: 612/459, loss: 0.17099300026893616 2023-01-24 02:34:08.997075: step: 614/459, loss: 0.03417222574353218 2023-01-24 02:34:09.633069: step: 616/459, loss: 0.05662145838141441 2023-01-24 02:34:10.269531: step: 618/459, loss: 0.07087621092796326 2023-01-24 02:34:10.935566: step: 620/459, loss: 0.32739734649658203 2023-01-24 02:34:11.543500: step: 622/459, loss: 0.8380759358406067 2023-01-24 02:34:12.159975: step: 624/459, loss: 0.03383093327283859 2023-01-24 02:34:12.798399: step: 626/459, loss: 0.07127579301595688 2023-01-24 02:34:13.425195: step: 628/459, loss: 1.3526668548583984 2023-01-24 02:34:14.058719: step: 630/459, loss: 0.11265580356121063 2023-01-24 02:34:14.705124: step: 632/459, loss: 0.014403901994228363 2023-01-24 02:34:15.370511: step: 634/459, loss: 0.11302470415830612 2023-01-24 02:34:16.020463: step: 636/459, loss: 0.11631527543067932 2023-01-24 02:34:16.745521: step: 638/459, loss: 0.023031633347272873 2023-01-24 02:34:17.360191: step: 640/459, loss: 0.0512913316488266 2023-01-24 02:34:17.953504: step: 642/459, loss: 0.06034834310412407 2023-01-24 02:34:18.587535: step: 644/459, loss: 0.06365146487951279 2023-01-24 02:34:19.173044: step: 646/459, loss: 0.002392749534919858 2023-01-24 02:34:19.828853: step: 648/459, loss: 0.03239598497748375 2023-01-24 02:34:20.427231: step: 650/459, loss: 0.20589740574359894 2023-01-24 02:34:21.110695: step: 652/459, loss: 0.084620401263237 2023-01-24 02:34:21.737203: step: 654/459, loss: 0.3713834285736084 2023-01-24 02:34:22.355061: step: 656/459, loss: 0.028818883001804352 2023-01-24 02:34:22.967918: step: 658/459, loss: 0.0351385772228241 2023-01-24 02:34:23.612750: step: 660/459, loss: 0.02786829136312008 2023-01-24 02:34:24.215147: step: 662/459, loss: 0.05894472077488899 2023-01-24 02:34:24.843271: step: 664/459, loss: 0.04295730963349342 2023-01-24 02:34:25.481934: step: 666/459, loss: 0.025312094017863274 2023-01-24 02:34:26.238566: step: 668/459, loss: 0.04253789037466049 2023-01-24 02:34:26.838237: step: 670/459, loss: 0.017303263768553734 2023-01-24 02:34:27.454648: step: 672/459, loss: 0.008018742315471172 2023-01-24 02:34:28.082795: step: 674/459, loss: 0.007718229666352272 2023-01-24 02:34:28.630780: step: 676/459, loss: 0.04924841597676277 2023-01-24 02:34:29.307670: step: 678/459, loss: 0.07254641503095627 2023-01-24 02:34:29.988326: step: 680/459, loss: 0.04879157245159149 2023-01-24 02:34:30.637927: step: 682/459, loss: 0.021072953939437866 2023-01-24 02:34:31.280629: step: 684/459, loss: 0.0739382803440094 2023-01-24 02:34:31.883339: step: 686/459, loss: 0.07250259071588516 2023-01-24 02:34:32.483547: step: 688/459, loss: 0.5106527805328369 2023-01-24 02:34:33.194320: step: 690/459, loss: 0.06987520307302475 2023-01-24 02:34:33.844386: step: 692/459, loss: 0.17014357447624207 2023-01-24 02:34:34.465790: step: 694/459, loss: 0.023349417373538017 2023-01-24 02:34:35.052674: step: 696/459, loss: 0.021580088883638382 2023-01-24 02:34:35.677503: step: 698/459, loss: 0.10650444775819778 2023-01-24 02:34:36.346951: step: 700/459, loss: 0.0294220969080925 2023-01-24 02:34:36.944559: step: 702/459, loss: 0.032890208065509796 2023-01-24 02:34:37.562865: step: 704/459, loss: 0.036044828593730927 2023-01-24 02:34:38.130842: step: 706/459, loss: 0.07633000612258911 2023-01-24 02:34:38.732280: step: 708/459, loss: 0.032051511108875275 2023-01-24 02:34:39.286988: step: 710/459, loss: 0.06507841497659683 2023-01-24 02:34:39.909892: step: 712/459, loss: 0.0349886417388916 2023-01-24 02:34:40.581655: step: 714/459, loss: 0.0669262632727623 2023-01-24 02:34:41.188879: step: 716/459, loss: 0.2546699047088623 2023-01-24 02:34:41.804727: step: 718/459, loss: 0.09404171258211136 2023-01-24 02:34:42.396540: step: 720/459, loss: 0.04709174856543541 2023-01-24 02:34:42.989263: step: 722/459, loss: 0.050547197461128235 2023-01-24 02:34:43.672781: step: 724/459, loss: 0.049189936369657516 2023-01-24 02:34:44.266978: step: 726/459, loss: 0.01294037140905857 2023-01-24 02:34:44.839436: step: 728/459, loss: 0.16536548733711243 2023-01-24 02:34:45.406033: step: 730/459, loss: 0.024516399949789047 2023-01-24 02:34:45.973195: step: 732/459, loss: 0.061286915093660355 2023-01-24 02:34:46.586346: step: 734/459, loss: 0.028722845017910004 2023-01-24 02:34:47.169112: step: 736/459, loss: 0.03797425702214241 2023-01-24 02:34:47.724127: step: 738/459, loss: 0.03552486002445221 2023-01-24 02:34:48.402990: step: 740/459, loss: 0.528389573097229 2023-01-24 02:34:49.008766: step: 742/459, loss: 0.08148670196533203 2023-01-24 02:34:49.634685: step: 744/459, loss: 0.04483341425657272 2023-01-24 02:34:50.322548: step: 746/459, loss: 0.03920360654592514 2023-01-24 02:34:50.917895: step: 748/459, loss: 0.0260970089584589 2023-01-24 02:34:51.553972: step: 750/459, loss: 0.05972394347190857 2023-01-24 02:34:52.204068: step: 752/459, loss: 0.044780388474464417 2023-01-24 02:34:52.783991: step: 754/459, loss: 0.023735765367746353 2023-01-24 02:34:53.427778: step: 756/459, loss: 0.0308105256408453 2023-01-24 02:34:54.085800: step: 758/459, loss: 0.036645904183387756 2023-01-24 02:34:54.713180: step: 760/459, loss: 0.005911178421229124 2023-01-24 02:34:55.338126: step: 762/459, loss: 0.08233819156885147 2023-01-24 02:34:56.051560: step: 764/459, loss: 0.07062827795743942 2023-01-24 02:34:56.655998: step: 766/459, loss: 0.09541213512420654 2023-01-24 02:34:57.256794: step: 768/459, loss: 0.04619116708636284 2023-01-24 02:34:57.870939: step: 770/459, loss: 0.25403714179992676 2023-01-24 02:34:58.486771: step: 772/459, loss: 0.04808074235916138 2023-01-24 02:34:59.106292: step: 774/459, loss: 0.006003272719681263 2023-01-24 02:34:59.712533: step: 776/459, loss: 0.06431656330823898 2023-01-24 02:35:00.331391: step: 778/459, loss: 0.029419580474495888 2023-01-24 02:35:00.956754: step: 780/459, loss: 0.054221250116825104 2023-01-24 02:35:01.577495: step: 782/459, loss: 0.04068940505385399 2023-01-24 02:35:02.292419: step: 784/459, loss: 0.0075583984144032 2023-01-24 02:35:02.926147: step: 786/459, loss: 0.022211935371160507 2023-01-24 02:35:03.603214: step: 788/459, loss: 0.048528846353292465 2023-01-24 02:35:04.176307: step: 790/459, loss: 0.20539072155952454 2023-01-24 02:35:04.825854: step: 792/459, loss: 0.2753973603248596 2023-01-24 02:35:05.458890: step: 794/459, loss: 0.0520162358880043 2023-01-24 02:35:06.079798: step: 796/459, loss: 0.07443539053201675 2023-01-24 02:35:06.698765: step: 798/459, loss: 0.05468644201755524 2023-01-24 02:35:07.289439: step: 800/459, loss: 0.12016861885786057 2023-01-24 02:35:07.892301: step: 802/459, loss: 0.015472847037017345 2023-01-24 02:35:08.491253: step: 804/459, loss: 0.07333249598741531 2023-01-24 02:35:09.084643: step: 806/459, loss: 0.0946127399802208 2023-01-24 02:35:09.717391: step: 808/459, loss: 0.01624084636569023 2023-01-24 02:35:10.302942: step: 810/459, loss: 0.03784452751278877 2023-01-24 02:35:10.942022: step: 812/459, loss: 0.0730871707201004 2023-01-24 02:35:11.530975: step: 814/459, loss: 0.06964240223169327 2023-01-24 02:35:12.132846: step: 816/459, loss: 0.027750195935368538 2023-01-24 02:35:12.753449: step: 818/459, loss: 0.5473633408546448 2023-01-24 02:35:13.387292: step: 820/459, loss: 0.07515460252761841 2023-01-24 02:35:13.965120: step: 822/459, loss: 0.05095948651432991 2023-01-24 02:35:14.574389: step: 824/459, loss: 0.014735664241015911 2023-01-24 02:35:15.197152: step: 826/459, loss: 0.005040908697992563 2023-01-24 02:35:15.755722: step: 828/459, loss: 0.06019521877169609 2023-01-24 02:35:16.420808: step: 830/459, loss: 0.2413157820701599 2023-01-24 02:35:17.115723: step: 832/459, loss: 0.024689486250281334 2023-01-24 02:35:17.721913: step: 834/459, loss: 0.015617852099239826 2023-01-24 02:35:18.327379: step: 836/459, loss: 0.06052683666348457 2023-01-24 02:35:18.932046: step: 838/459, loss: 0.07546380162239075 2023-01-24 02:35:19.606225: step: 840/459, loss: 0.14347809553146362 2023-01-24 02:35:20.307880: step: 842/459, loss: 0.2700725793838501 2023-01-24 02:35:20.903786: step: 844/459, loss: 0.0625152513384819 2023-01-24 02:35:21.474897: step: 846/459, loss: 0.12293127179145813 2023-01-24 02:35:22.064254: step: 848/459, loss: 0.06978929787874222 2023-01-24 02:35:22.731628: step: 850/459, loss: 0.05367438495159149 2023-01-24 02:35:23.313732: step: 852/459, loss: 0.11705374717712402 2023-01-24 02:35:23.885748: step: 854/459, loss: 0.057661060243844986 2023-01-24 02:35:24.597537: step: 856/459, loss: 0.022967005148530006 2023-01-24 02:35:25.224993: step: 858/459, loss: 0.0334920696914196 2023-01-24 02:35:25.865333: step: 860/459, loss: 0.10940772294998169 2023-01-24 02:35:26.505294: step: 862/459, loss: 0.057638511061668396 2023-01-24 02:35:27.122131: step: 864/459, loss: 0.03772168979048729 2023-01-24 02:35:27.911361: step: 866/459, loss: 0.05963517352938652 2023-01-24 02:35:28.595020: step: 868/459, loss: 0.08069335669279099 2023-01-24 02:35:29.298647: step: 870/459, loss: 0.3193773627281189 2023-01-24 02:35:29.944367: step: 872/459, loss: 0.03972264379262924 2023-01-24 02:35:30.572423: step: 874/459, loss: 0.16720527410507202 2023-01-24 02:35:31.200678: step: 876/459, loss: 0.045246489346027374 2023-01-24 02:35:31.790523: step: 878/459, loss: 0.04141484573483467 2023-01-24 02:35:32.446881: step: 880/459, loss: 0.1452389657497406 2023-01-24 02:35:33.052106: step: 882/459, loss: 0.2271365076303482 2023-01-24 02:35:33.691653: step: 884/459, loss: 0.037754300981760025 2023-01-24 02:35:34.337670: step: 886/459, loss: 0.030902199447155 2023-01-24 02:35:34.993144: step: 888/459, loss: 0.02367168478667736 2023-01-24 02:35:35.650688: step: 890/459, loss: 0.08381428569555283 2023-01-24 02:35:36.214266: step: 892/459, loss: 0.07345903664827347 2023-01-24 02:35:36.773623: step: 894/459, loss: 0.0747426226735115 2023-01-24 02:35:37.467905: step: 896/459, loss: 0.0381978414952755 2023-01-24 02:35:38.109739: step: 898/459, loss: 0.09712815284729004 2023-01-24 02:35:38.752327: step: 900/459, loss: 0.07503015547990799 2023-01-24 02:35:39.391766: step: 902/459, loss: 0.2151593267917633 2023-01-24 02:35:39.995781: step: 904/459, loss: 0.03266777843236923 2023-01-24 02:35:40.617631: step: 906/459, loss: 1.4690715074539185 2023-01-24 02:35:41.202439: step: 908/459, loss: 0.14419664442539215 2023-01-24 02:35:41.872682: step: 910/459, loss: 0.095465287566185 2023-01-24 02:35:42.439358: step: 912/459, loss: 0.09554950147867203 2023-01-24 02:35:43.011342: step: 914/459, loss: 0.5206647515296936 2023-01-24 02:35:43.636965: step: 916/459, loss: 0.10245165228843689 2023-01-24 02:35:44.272371: step: 918/459, loss: 0.1089090034365654 2023-01-24 02:35:44.796814: step: 920/459, loss: 2.173919256165391e-06 ================================================== Loss: 0.130 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3456407132446711, 'r': 0.3272765007762635, 'f1': 0.33620802321460214}, 'combined': 0.24773222763181207, 'epoch': 20} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.349365342663776, 'r': 0.2953725169793742, 'f1': 0.32010814648011}, 'combined': 0.20486921374727035, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33518621271306814, 'r': 0.32564580817664307, 'f1': 0.33034714323212877}, 'combined': 0.2434136844868317, 'epoch': 20} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3511818194847562, 'r': 0.29275793497047403, 'f1': 0.3193195126103336}, 'combined': 0.20436448807061347, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.36088655538616127, 'r': 0.3355491691446281, 'f1': 0.3477569560259961}, 'combined': 0.25624196759810236, 'epoch': 20} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3509920131384371, 'r': 0.3116732357103608, 'f1': 0.33016614839360947}, 'combined': 0.23672289884824832, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2845238095238095, 'r': 0.2845238095238095, 'f1': 0.2845238095238095}, 'combined': 0.18968253968253967, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2403846153846154, 'r': 0.2717391304347826, 'f1': 0.25510204081632654}, 'combined': 0.12755102040816327, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2727272727272727, 'r': 0.10344827586206896, 'f1': 0.15}, 'combined': 0.09999999999999999, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 21 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:38:20.516969: step: 2/459, loss: 0.04175436496734619 2023-01-24 02:38:21.292071: step: 4/459, loss: 0.020804904401302338 2023-01-24 02:38:21.921503: step: 6/459, loss: 0.09828265756368637 2023-01-24 02:38:22.488840: step: 8/459, loss: 0.020189102739095688 2023-01-24 02:38:23.067735: step: 10/459, loss: 0.05470661073923111 2023-01-24 02:38:23.706223: step: 12/459, loss: 0.04470071569085121 2023-01-24 02:38:24.290528: step: 14/459, loss: 0.05810379981994629 2023-01-24 02:38:24.889667: step: 16/459, loss: 0.05176740139722824 2023-01-24 02:38:25.476178: step: 18/459, loss: 0.06217209994792938 2023-01-24 02:38:26.144827: step: 20/459, loss: 0.13971483707427979 2023-01-24 02:38:26.745264: step: 22/459, loss: 0.265951544046402 2023-01-24 02:38:27.394023: step: 24/459, loss: 0.007855880074203014 2023-01-24 02:38:28.047417: step: 26/459, loss: 0.04450293630361557 2023-01-24 02:38:28.668791: step: 28/459, loss: 0.05643085017800331 2023-01-24 02:38:29.242601: step: 30/459, loss: 0.05316291004419327 2023-01-24 02:38:29.861773: step: 32/459, loss: 0.10079570859670639 2023-01-24 02:38:30.456187: step: 34/459, loss: 0.04103732109069824 2023-01-24 02:38:31.072880: step: 36/459, loss: 0.20999088883399963 2023-01-24 02:38:31.658955: step: 38/459, loss: 0.007740084081888199 2023-01-24 02:38:32.289363: step: 40/459, loss: 0.0013154454063624144 2023-01-24 02:38:32.888399: step: 42/459, loss: 0.01564539596438408 2023-01-24 02:38:33.473205: step: 44/459, loss: 0.4317435622215271 2023-01-24 02:38:34.090268: step: 46/459, loss: 0.013941795565187931 2023-01-24 02:38:34.755345: step: 48/459, loss: 0.06744975596666336 2023-01-24 02:38:35.476093: step: 50/459, loss: 0.02318497560918331 2023-01-24 02:38:36.050049: step: 52/459, loss: 0.01876458153128624 2023-01-24 02:38:36.703323: step: 54/459, loss: 0.06455294042825699 2023-01-24 02:38:37.347676: step: 56/459, loss: 0.019117943942546844 2023-01-24 02:38:38.029801: step: 58/459, loss: 0.011972843669354916 2023-01-24 02:38:38.733721: step: 60/459, loss: 0.008588564582169056 2023-01-24 02:38:39.373624: step: 62/459, loss: 0.02504478022456169 2023-01-24 02:38:40.003145: step: 64/459, loss: 0.6418391466140747 2023-01-24 02:38:40.554010: step: 66/459, loss: 0.019706960767507553 2023-01-24 02:38:41.159733: step: 68/459, loss: 0.3326074481010437 2023-01-24 02:38:41.722885: step: 70/459, loss: 0.1346128135919571 2023-01-24 02:38:42.283420: step: 72/459, loss: 0.014117256738245487 2023-01-24 02:38:42.894979: step: 74/459, loss: 0.049445681273937225 2023-01-24 02:38:43.535343: step: 76/459, loss: 0.02771441824734211 2023-01-24 02:38:44.178606: step: 78/459, loss: 0.02787897363305092 2023-01-24 02:38:44.793922: step: 80/459, loss: 0.2799614667892456 2023-01-24 02:38:45.400718: step: 82/459, loss: 0.0256597138941288 2023-01-24 02:38:46.095072: step: 84/459, loss: 0.0231012012809515 2023-01-24 02:38:46.683878: step: 86/459, loss: 0.042492955923080444 2023-01-24 02:38:47.362727: step: 88/459, loss: 0.10340433567762375 2023-01-24 02:38:48.020594: step: 90/459, loss: 0.20467080175876617 2023-01-24 02:38:48.700556: step: 92/459, loss: 0.005772710777819157 2023-01-24 02:38:49.366288: step: 94/459, loss: 0.08799021691083908 2023-01-24 02:38:50.012977: step: 96/459, loss: 0.09023118019104004 2023-01-24 02:38:50.735061: step: 98/459, loss: 0.07065694779157639 2023-01-24 02:38:51.366424: step: 100/459, loss: 0.021769290789961815 2023-01-24 02:38:51.979413: step: 102/459, loss: 0.09475356340408325 2023-01-24 02:38:52.585611: step: 104/459, loss: 0.01306947972625494 2023-01-24 02:38:53.207082: step: 106/459, loss: 0.003895478555932641 2023-01-24 02:38:53.797223: step: 108/459, loss: 0.04566316306591034 2023-01-24 02:38:54.449012: step: 110/459, loss: 0.01629900187253952 2023-01-24 02:38:55.131614: step: 112/459, loss: 0.7792109847068787 2023-01-24 02:38:55.791537: step: 114/459, loss: 0.04020761325955391 2023-01-24 02:38:56.384619: step: 116/459, loss: 0.3227381706237793 2023-01-24 02:38:57.039480: step: 118/459, loss: 0.03904323652386665 2023-01-24 02:38:57.636534: step: 120/459, loss: 0.05060930922627449 2023-01-24 02:38:58.287048: step: 122/459, loss: 0.025871336460113525 2023-01-24 02:38:58.859970: step: 124/459, loss: 0.009960929863154888 2023-01-24 02:38:59.455705: step: 126/459, loss: 0.09856385737657547 2023-01-24 02:39:00.160269: step: 128/459, loss: 0.05878306180238724 2023-01-24 02:39:00.762485: step: 130/459, loss: 0.1019957885146141 2023-01-24 02:39:01.371188: step: 132/459, loss: 0.011747331358492374 2023-01-24 02:39:01.966663: step: 134/459, loss: 0.004856936167925596 2023-01-24 02:39:02.633653: step: 136/459, loss: 0.0670374184846878 2023-01-24 02:39:03.307250: step: 138/459, loss: 0.04271630942821503 2023-01-24 02:39:03.932408: step: 140/459, loss: 0.02723216451704502 2023-01-24 02:39:04.547661: step: 142/459, loss: 0.04679373651742935 2023-01-24 02:39:05.176291: step: 144/459, loss: 0.039268363267183304 2023-01-24 02:39:05.811791: step: 146/459, loss: 0.06587547808885574 2023-01-24 02:39:06.456649: step: 148/459, loss: 0.01884562149643898 2023-01-24 02:39:07.106304: step: 150/459, loss: 0.026752078905701637 2023-01-24 02:39:07.806801: step: 152/459, loss: 0.008258668705821037 2023-01-24 02:39:08.386852: step: 154/459, loss: 0.6376261115074158 2023-01-24 02:39:09.050698: step: 156/459, loss: 0.1839842051267624 2023-01-24 02:39:09.682686: step: 158/459, loss: 0.10447844862937927 2023-01-24 02:39:10.336273: step: 160/459, loss: 0.10252963751554489 2023-01-24 02:39:10.939165: step: 162/459, loss: 0.04445028677582741 2023-01-24 02:39:11.569507: step: 164/459, loss: 0.06345675885677338 2023-01-24 02:39:12.112298: step: 166/459, loss: 0.052255794405937195 2023-01-24 02:39:12.685111: step: 168/459, loss: 0.030527641996741295 2023-01-24 02:39:13.268754: step: 170/459, loss: 0.008864779025316238 2023-01-24 02:39:13.860351: step: 172/459, loss: 0.0851624459028244 2023-01-24 02:39:14.436666: step: 174/459, loss: 0.012585651129484177 2023-01-24 02:39:15.092419: step: 176/459, loss: 0.048929356038570404 2023-01-24 02:39:15.670033: step: 178/459, loss: 0.008619416505098343 2023-01-24 02:39:16.289290: step: 180/459, loss: 0.08180616050958633 2023-01-24 02:39:16.922430: step: 182/459, loss: 0.03790799900889397 2023-01-24 02:39:17.533004: step: 184/459, loss: 0.005703766830265522 2023-01-24 02:39:18.136671: step: 186/459, loss: 0.024648243561387062 2023-01-24 02:39:18.775114: step: 188/459, loss: 0.031450528651475906 2023-01-24 02:39:19.447747: step: 190/459, loss: 0.032325152307748795 2023-01-24 02:39:20.044281: step: 192/459, loss: 0.0209133792668581 2023-01-24 02:39:20.664166: step: 194/459, loss: 0.012814260087907314 2023-01-24 02:39:21.293922: step: 196/459, loss: 0.02398822270333767 2023-01-24 02:39:21.895365: step: 198/459, loss: 0.0062651727348566055 2023-01-24 02:39:22.561211: step: 200/459, loss: 0.09325066953897476 2023-01-24 02:39:23.256675: step: 202/459, loss: 0.02169969119131565 2023-01-24 02:39:23.907790: step: 204/459, loss: 0.33702796697616577 2023-01-24 02:39:24.549995: step: 206/459, loss: 0.022086335346102715 2023-01-24 02:39:25.171808: step: 208/459, loss: 0.1444537341594696 2023-01-24 02:39:25.869798: step: 210/459, loss: 0.02056743949651718 2023-01-24 02:39:26.472471: step: 212/459, loss: 0.3397158086299896 2023-01-24 02:39:27.099772: step: 214/459, loss: 0.1026570126414299 2023-01-24 02:39:27.759711: step: 216/459, loss: 0.06112322211265564 2023-01-24 02:39:28.358497: step: 218/459, loss: 0.018580185249447823 2023-01-24 02:39:28.972271: step: 220/459, loss: 1.2812316417694092 2023-01-24 02:39:29.612563: step: 222/459, loss: 0.037728454917669296 2023-01-24 02:39:30.239526: step: 224/459, loss: 0.07463113963603973 2023-01-24 02:39:30.856448: step: 226/459, loss: 0.09522107988595963 2023-01-24 02:39:31.442326: step: 228/459, loss: 0.01493317261338234 2023-01-24 02:39:32.006920: step: 230/459, loss: 0.04138581454753876 2023-01-24 02:39:32.674118: step: 232/459, loss: 0.010628330521285534 2023-01-24 02:39:33.300609: step: 234/459, loss: 0.32590368390083313 2023-01-24 02:39:33.901693: step: 236/459, loss: 0.028171690180897713 2023-01-24 02:39:34.466414: step: 238/459, loss: 0.01602230779826641 2023-01-24 02:39:35.063888: step: 240/459, loss: 0.005676749162375927 2023-01-24 02:39:35.674983: step: 242/459, loss: 0.021910423412919044 2023-01-24 02:39:36.307101: step: 244/459, loss: 0.10355304181575775 2023-01-24 02:39:37.031932: step: 246/459, loss: 0.16978049278259277 2023-01-24 02:39:37.679288: step: 248/459, loss: 0.0020349605474621058 2023-01-24 02:39:38.284517: step: 250/459, loss: 0.024898743256926537 2023-01-24 02:39:38.934664: step: 252/459, loss: 0.007065415848046541 2023-01-24 02:39:39.531729: step: 254/459, loss: 0.01621568575501442 2023-01-24 02:39:40.112605: step: 256/459, loss: 0.037742335349321365 2023-01-24 02:39:40.709253: step: 258/459, loss: 0.1852014660835266 2023-01-24 02:39:41.250518: step: 260/459, loss: 0.04281001165509224 2023-01-24 02:39:41.853001: step: 262/459, loss: 0.012592978775501251 2023-01-24 02:39:42.462126: step: 264/459, loss: 0.029264988377690315 2023-01-24 02:39:43.060983: step: 266/459, loss: 0.1518438458442688 2023-01-24 02:39:43.737677: step: 268/459, loss: 0.037162475287914276 2023-01-24 02:39:44.332760: step: 270/459, loss: 0.21132510900497437 2023-01-24 02:39:44.960604: step: 272/459, loss: 0.03471281751990318 2023-01-24 02:39:45.577699: step: 274/459, loss: 0.008231468498706818 2023-01-24 02:39:46.170644: step: 276/459, loss: 0.027825849130749702 2023-01-24 02:39:46.860812: step: 278/459, loss: 0.02127872221171856 2023-01-24 02:39:47.479431: step: 280/459, loss: 0.028327301144599915 2023-01-24 02:39:48.050067: step: 282/459, loss: 0.07137411832809448 2023-01-24 02:39:48.758839: step: 284/459, loss: 0.02854936197400093 2023-01-24 02:39:49.452407: step: 286/459, loss: 0.05451096594333649 2023-01-24 02:39:50.146143: step: 288/459, loss: 0.08784028142690659 2023-01-24 02:39:50.787629: step: 290/459, loss: 0.10952906310558319 2023-01-24 02:39:51.433917: step: 292/459, loss: 0.041637640446424484 2023-01-24 02:39:52.015095: step: 294/459, loss: 0.05728711932897568 2023-01-24 02:39:52.640661: step: 296/459, loss: 0.060862522572278976 2023-01-24 02:39:53.284238: step: 298/459, loss: 0.1324399709701538 2023-01-24 02:39:53.961736: step: 300/459, loss: 0.05888011306524277 2023-01-24 02:39:54.531117: step: 302/459, loss: 0.008057131431996822 2023-01-24 02:39:55.173198: step: 304/459, loss: 0.03458218649029732 2023-01-24 02:39:55.890594: step: 306/459, loss: 0.0023210898507386446 2023-01-24 02:39:56.559156: step: 308/459, loss: 0.033704034984111786 2023-01-24 02:39:57.175495: step: 310/459, loss: 0.09547307342290878 2023-01-24 02:39:57.873841: step: 312/459, loss: 0.05855901539325714 2023-01-24 02:39:58.522853: step: 314/459, loss: 0.06603360176086426 2023-01-24 02:39:59.103716: step: 316/459, loss: 0.04100651293992996 2023-01-24 02:39:59.687719: step: 318/459, loss: 0.014795291237533092 2023-01-24 02:40:00.334124: step: 320/459, loss: 0.049644533544778824 2023-01-24 02:40:01.008961: step: 322/459, loss: 0.018985824659466743 2023-01-24 02:40:01.599696: step: 324/459, loss: 0.034203652292490005 2023-01-24 02:40:02.277911: step: 326/459, loss: 0.05411852151155472 2023-01-24 02:40:02.891753: step: 328/459, loss: 0.060771554708480835 2023-01-24 02:40:03.511952: step: 330/459, loss: 0.0883699581027031 2023-01-24 02:40:04.110249: step: 332/459, loss: 0.036465007811784744 2023-01-24 02:40:04.797310: step: 334/459, loss: 0.10485399514436722 2023-01-24 02:40:05.505174: step: 336/459, loss: 0.2992021441459656 2023-01-24 02:40:06.062786: step: 338/459, loss: 0.00482220109552145 2023-01-24 02:40:06.615260: step: 340/459, loss: 0.05033537745475769 2023-01-24 02:40:07.217191: step: 342/459, loss: 0.02262873575091362 2023-01-24 02:40:07.796152: step: 344/459, loss: 0.7788990139961243 2023-01-24 02:40:08.434450: step: 346/459, loss: 0.03206572309136391 2023-01-24 02:40:09.033392: step: 348/459, loss: 0.04114873334765434 2023-01-24 02:40:09.675678: step: 350/459, loss: 0.12566237151622772 2023-01-24 02:40:10.344025: step: 352/459, loss: 0.033024366945028305 2023-01-24 02:40:10.937037: step: 354/459, loss: 0.007002884056419134 2023-01-24 02:40:11.520222: step: 356/459, loss: 0.015748266130685806 2023-01-24 02:40:12.228996: step: 358/459, loss: 0.041690196841955185 2023-01-24 02:40:12.838396: step: 360/459, loss: 0.07484215497970581 2023-01-24 02:40:13.507854: step: 362/459, loss: 0.5056418180465698 2023-01-24 02:40:14.131861: step: 364/459, loss: 0.05936842039227486 2023-01-24 02:40:14.783304: step: 366/459, loss: 0.04937787726521492 2023-01-24 02:40:15.438055: step: 368/459, loss: 0.0612277053296566 2023-01-24 02:40:16.039108: step: 370/459, loss: 0.14569376409053802 2023-01-24 02:40:16.635753: step: 372/459, loss: 0.010552938096225262 2023-01-24 02:40:17.279147: step: 374/459, loss: 0.02066660113632679 2023-01-24 02:40:17.881195: step: 376/459, loss: 0.021299321204423904 2023-01-24 02:40:18.466024: step: 378/459, loss: 0.028386985883116722 2023-01-24 02:40:19.142413: step: 380/459, loss: 0.025380831211805344 2023-01-24 02:40:19.795087: step: 382/459, loss: 0.4876663088798523 2023-01-24 02:40:20.439871: step: 384/459, loss: 0.1939888596534729 2023-01-24 02:40:21.015646: step: 386/459, loss: 0.8999460935592651 2023-01-24 02:40:21.574721: step: 388/459, loss: 0.04268151521682739 2023-01-24 02:40:22.280082: step: 390/459, loss: 0.08126859366893768 2023-01-24 02:40:22.828730: step: 392/459, loss: 0.0375216007232666 2023-01-24 02:40:23.417608: step: 394/459, loss: 0.08518879860639572 2023-01-24 02:40:24.039897: step: 396/459, loss: 0.02796131931245327 2023-01-24 02:40:24.639251: step: 398/459, loss: 0.05485693737864494 2023-01-24 02:40:25.266167: step: 400/459, loss: 0.025592757388949394 2023-01-24 02:40:25.874246: step: 402/459, loss: 0.012602240778505802 2023-01-24 02:40:26.447521: step: 404/459, loss: 0.0477316714823246 2023-01-24 02:40:27.096376: step: 406/459, loss: 0.017983023077249527 2023-01-24 02:40:27.706745: step: 408/459, loss: 0.031202280893921852 2023-01-24 02:40:28.303313: step: 410/459, loss: 0.008387504145503044 2023-01-24 02:40:28.914146: step: 412/459, loss: 0.005621288437396288 2023-01-24 02:40:29.525685: step: 414/459, loss: 0.01710941269993782 2023-01-24 02:40:30.156998: step: 416/459, loss: 0.029878849163651466 2023-01-24 02:40:30.771896: step: 418/459, loss: 0.07300105690956116 2023-01-24 02:40:31.362305: step: 420/459, loss: 0.060311637818813324 2023-01-24 02:40:32.013795: step: 422/459, loss: 0.14361967146396637 2023-01-24 02:40:32.631328: step: 424/459, loss: 0.04478023201227188 2023-01-24 02:40:33.253460: step: 426/459, loss: 0.016315467655658722 2023-01-24 02:40:33.822711: step: 428/459, loss: 3.122880697250366 2023-01-24 02:40:34.422341: step: 430/459, loss: 0.01548811886459589 2023-01-24 02:40:35.056557: step: 432/459, loss: 0.031885743141174316 2023-01-24 02:40:35.646394: step: 434/459, loss: 0.12439538538455963 2023-01-24 02:40:36.292975: step: 436/459, loss: 0.0889517068862915 2023-01-24 02:40:37.008003: step: 438/459, loss: 0.0775764063000679 2023-01-24 02:40:37.635268: step: 440/459, loss: 0.007232303265482187 2023-01-24 02:40:38.279091: step: 442/459, loss: 0.0750846266746521 2023-01-24 02:40:38.883347: step: 444/459, loss: 0.033889587968587875 2023-01-24 02:40:39.430274: step: 446/459, loss: 0.09181681275367737 2023-01-24 02:40:40.015152: step: 448/459, loss: 0.010201776400208473 2023-01-24 02:40:40.625988: step: 450/459, loss: 0.05051221698522568 2023-01-24 02:40:41.270861: step: 452/459, loss: 0.0049010273069143295 2023-01-24 02:40:41.936784: step: 454/459, loss: 0.04036809504032135 2023-01-24 02:40:42.534871: step: 456/459, loss: 0.015586109831929207 2023-01-24 02:40:43.130923: step: 458/459, loss: 0.06961443275213242 2023-01-24 02:40:43.760253: step: 460/459, loss: 0.03790973871946335 2023-01-24 02:40:44.398752: step: 462/459, loss: 0.037395279854536057 2023-01-24 02:40:45.038449: step: 464/459, loss: 0.05502459406852722 2023-01-24 02:40:45.659593: step: 466/459, loss: 0.04461849853396416 2023-01-24 02:40:46.272000: step: 468/459, loss: 0.04356023296713829 2023-01-24 02:40:46.954371: step: 470/459, loss: 0.8090776205062866 2023-01-24 02:40:47.583741: step: 472/459, loss: 0.07602483034133911 2023-01-24 02:40:48.275832: step: 474/459, loss: 0.11171656847000122 2023-01-24 02:40:48.873883: step: 476/459, loss: 0.017094073817133904 2023-01-24 02:40:49.568435: step: 478/459, loss: 0.09495069831609726 2023-01-24 02:40:50.138705: step: 480/459, loss: 0.08075558394193649 2023-01-24 02:40:50.770889: step: 482/459, loss: 0.016327757388353348 2023-01-24 02:40:51.455357: step: 484/459, loss: 0.05324457585811615 2023-01-24 02:40:52.038471: step: 486/459, loss: 0.02370971068739891 2023-01-24 02:40:52.675864: step: 488/459, loss: 0.02009798027575016 2023-01-24 02:40:53.386783: step: 490/459, loss: 0.0649937242269516 2023-01-24 02:40:54.006953: step: 492/459, loss: 0.03530922904610634 2023-01-24 02:40:54.646379: step: 494/459, loss: 0.04260009899735451 2023-01-24 02:40:55.264431: step: 496/459, loss: 0.06878063827753067 2023-01-24 02:40:55.903800: step: 498/459, loss: 0.08862363547086716 2023-01-24 02:40:56.463111: step: 500/459, loss: 0.10871273279190063 2023-01-24 02:40:57.106771: step: 502/459, loss: 0.03203457593917847 2023-01-24 02:40:57.722393: step: 504/459, loss: 0.051112230867147446 2023-01-24 02:40:58.326547: step: 506/459, loss: 0.03632513806223869 2023-01-24 02:40:58.928005: step: 508/459, loss: 0.025538370013237 2023-01-24 02:40:59.575307: step: 510/459, loss: 0.028817778453230858 2023-01-24 02:41:00.200353: step: 512/459, loss: 0.18331636488437653 2023-01-24 02:41:00.836198: step: 514/459, loss: 0.08349744230508804 2023-01-24 02:41:01.445021: step: 516/459, loss: 0.023196645081043243 2023-01-24 02:41:02.071972: step: 518/459, loss: 0.03837990760803223 2023-01-24 02:41:02.649622: step: 520/459, loss: 0.04829510301351547 2023-01-24 02:41:03.216926: step: 522/459, loss: 0.028018251061439514 2023-01-24 02:41:03.842454: step: 524/459, loss: 0.04352071136236191 2023-01-24 02:41:04.474055: step: 526/459, loss: 0.007871539331972599 2023-01-24 02:41:05.094878: step: 528/459, loss: 0.0709342285990715 2023-01-24 02:41:05.704819: step: 530/459, loss: 0.07470843195915222 2023-01-24 02:41:06.304832: step: 532/459, loss: 0.011408516205847263 2023-01-24 02:41:06.888651: step: 534/459, loss: 0.03011637181043625 2023-01-24 02:41:07.481645: step: 536/459, loss: 0.17196711897850037 2023-01-24 02:41:08.136807: step: 538/459, loss: 0.21250636875629425 2023-01-24 02:41:08.771278: step: 540/459, loss: 0.5720111131668091 2023-01-24 02:41:09.290107: step: 542/459, loss: 0.016464954242110252 2023-01-24 02:41:09.867190: step: 544/459, loss: 0.021450301632285118 2023-01-24 02:41:10.492967: step: 546/459, loss: 0.0243068914860487 2023-01-24 02:41:11.079258: step: 548/459, loss: 0.02590971440076828 2023-01-24 02:41:11.713072: step: 550/459, loss: 0.03634170442819595 2023-01-24 02:41:12.359673: step: 552/459, loss: 0.15264084935188293 2023-01-24 02:41:13.144144: step: 554/459, loss: 0.006976650096476078 2023-01-24 02:41:13.845633: step: 556/459, loss: 0.020693911239504814 2023-01-24 02:41:14.430098: step: 558/459, loss: 0.031538013368844986 2023-01-24 02:41:15.133736: step: 560/459, loss: 0.01838819868862629 2023-01-24 02:41:15.756183: step: 562/459, loss: 0.11392112821340561 2023-01-24 02:41:16.357615: step: 564/459, loss: 0.04617447033524513 2023-01-24 02:41:16.960921: step: 566/459, loss: 0.006529453210532665 2023-01-24 02:41:17.670371: step: 568/459, loss: 0.07440601289272308 2023-01-24 02:41:18.285116: step: 570/459, loss: 0.02948068454861641 2023-01-24 02:41:18.934366: step: 572/459, loss: 0.09927075356245041 2023-01-24 02:41:19.567152: step: 574/459, loss: 0.6740154027938843 2023-01-24 02:41:20.192261: step: 576/459, loss: 0.03747100010514259 2023-01-24 02:41:20.842468: step: 578/459, loss: 0.30206310749053955 2023-01-24 02:41:21.425553: step: 580/459, loss: 0.0302566047757864 2023-01-24 02:41:22.037660: step: 582/459, loss: 0.0778128132224083 2023-01-24 02:41:22.661014: step: 584/459, loss: 0.02825068309903145 2023-01-24 02:41:23.283939: step: 586/459, loss: 0.0769130140542984 2023-01-24 02:41:23.884528: step: 588/459, loss: 0.060389645397663116 2023-01-24 02:41:24.541300: step: 590/459, loss: 0.07569652795791626 2023-01-24 02:41:25.165236: step: 592/459, loss: 0.12353824079036713 2023-01-24 02:41:25.767570: step: 594/459, loss: 0.1703449934720993 2023-01-24 02:41:26.425520: step: 596/459, loss: 0.03290051594376564 2023-01-24 02:41:27.116113: step: 598/459, loss: 0.02284853532910347 2023-01-24 02:41:27.728163: step: 600/459, loss: 0.17792677879333496 2023-01-24 02:41:28.267399: step: 602/459, loss: 0.05886291712522507 2023-01-24 02:41:28.900070: step: 604/459, loss: 0.010620561428368092 2023-01-24 02:41:29.518627: step: 606/459, loss: 0.05324544385075569 2023-01-24 02:41:30.167063: step: 608/459, loss: 0.19421710073947906 2023-01-24 02:41:30.837931: step: 610/459, loss: 0.04099569469690323 2023-01-24 02:41:31.540221: step: 612/459, loss: 0.04303886741399765 2023-01-24 02:41:32.187705: step: 614/459, loss: 0.05115086957812309 2023-01-24 02:41:32.728509: step: 616/459, loss: 0.06232253089547157 2023-01-24 02:41:33.316899: step: 618/459, loss: 0.0038500467780977488 2023-01-24 02:41:33.923991: step: 620/459, loss: 0.0966094508767128 2023-01-24 02:41:34.524415: step: 622/459, loss: 0.012712189927697182 2023-01-24 02:41:35.106024: step: 624/459, loss: 0.2131592035293579 2023-01-24 02:41:35.724977: step: 626/459, loss: 0.02659871242940426 2023-01-24 02:41:36.371141: step: 628/459, loss: 0.10192181915044785 2023-01-24 02:41:36.932878: step: 630/459, loss: 0.011713892221450806 2023-01-24 02:41:37.607804: step: 632/459, loss: 0.009871026501059532 2023-01-24 02:41:38.248845: step: 634/459, loss: 0.049449801445007324 2023-01-24 02:41:38.871638: step: 636/459, loss: 0.003176107769832015 2023-01-24 02:41:39.524295: step: 638/459, loss: 0.0525258406996727 2023-01-24 02:41:40.147966: step: 640/459, loss: 0.014708908274769783 2023-01-24 02:41:40.762950: step: 642/459, loss: 0.019432643428444862 2023-01-24 02:41:41.459656: step: 644/459, loss: 0.0931129977107048 2023-01-24 02:41:42.104101: step: 646/459, loss: 0.08615534007549286 2023-01-24 02:41:42.706100: step: 648/459, loss: 0.024323992431163788 2023-01-24 02:41:43.260689: step: 650/459, loss: 0.09837210923433304 2023-01-24 02:41:43.824466: step: 652/459, loss: 0.024666257202625275 2023-01-24 02:41:44.550224: step: 654/459, loss: 0.01931791752576828 2023-01-24 02:41:45.204334: step: 656/459, loss: 0.02862308733165264 2023-01-24 02:41:45.748278: step: 658/459, loss: 0.004597049672156572 2023-01-24 02:41:46.324898: step: 660/459, loss: 0.01692391186952591 2023-01-24 02:41:46.930329: step: 662/459, loss: 0.0738520696759224 2023-01-24 02:41:47.645195: step: 664/459, loss: 0.04598308354616165 2023-01-24 02:41:48.295953: step: 666/459, loss: 0.07247915118932724 2023-01-24 02:41:48.898240: step: 668/459, loss: 0.14618167281150818 2023-01-24 02:41:49.498625: step: 670/459, loss: 0.11967770755290985 2023-01-24 02:41:50.118197: step: 672/459, loss: 0.021102532744407654 2023-01-24 02:41:50.738501: step: 674/459, loss: 0.06844451278448105 2023-01-24 02:41:51.449689: step: 676/459, loss: 0.1613527536392212 2023-01-24 02:41:52.069728: step: 678/459, loss: 0.011565503664314747 2023-01-24 02:41:52.642226: step: 680/459, loss: 0.043163541704416275 2023-01-24 02:41:53.313903: step: 682/459, loss: 0.030079493299126625 2023-01-24 02:41:53.916974: step: 684/459, loss: 0.38367417454719543 2023-01-24 02:41:54.569831: step: 686/459, loss: 0.05570203810930252 2023-01-24 02:41:55.177530: step: 688/459, loss: 0.09024549275636673 2023-01-24 02:41:55.778941: step: 690/459, loss: 0.08679983019828796 2023-01-24 02:41:56.370106: step: 692/459, loss: 0.17163784801959991 2023-01-24 02:41:56.994833: step: 694/459, loss: 0.038959525525569916 2023-01-24 02:41:57.560660: step: 696/459, loss: 0.081719771027565 2023-01-24 02:41:58.126045: step: 698/459, loss: 0.01145242527127266 2023-01-24 02:41:58.728962: step: 700/459, loss: 0.11082016676664352 2023-01-24 02:41:59.372816: step: 702/459, loss: 0.2825894355773926 2023-01-24 02:41:59.974834: step: 704/459, loss: 0.0024594159331172705 2023-01-24 02:42:00.595620: step: 706/459, loss: 0.004001529421657324 2023-01-24 02:42:01.273276: step: 708/459, loss: 0.052878495305776596 2023-01-24 02:42:01.887620: step: 710/459, loss: 0.022790217772126198 2023-01-24 02:42:02.535914: step: 712/459, loss: 0.013568460009992123 2023-01-24 02:42:03.118904: step: 714/459, loss: 0.020832527428865433 2023-01-24 02:42:03.814010: step: 716/459, loss: 0.42406827211380005 2023-01-24 02:42:04.497714: step: 718/459, loss: 0.14075928926467896 2023-01-24 02:42:05.124173: step: 720/459, loss: 0.04777150973677635 2023-01-24 02:42:05.872127: step: 722/459, loss: 0.009840510785579681 2023-01-24 02:42:06.508750: step: 724/459, loss: 0.001309025683440268 2023-01-24 02:42:07.137781: step: 726/459, loss: 0.007524025160819292 2023-01-24 02:42:07.756226: step: 728/459, loss: 0.03719890117645264 2023-01-24 02:42:08.345532: step: 730/459, loss: 0.1116669625043869 2023-01-24 02:42:08.964967: step: 732/459, loss: 0.160640686750412 2023-01-24 02:42:09.595397: step: 734/459, loss: 0.1867968738079071 2023-01-24 02:42:10.191451: step: 736/459, loss: 0.019231028854846954 2023-01-24 02:42:10.752656: step: 738/459, loss: 0.014113090001046658 2023-01-24 02:42:11.349686: step: 740/459, loss: 0.02293974719941616 2023-01-24 02:42:12.055379: step: 742/459, loss: 0.06934670358896255 2023-01-24 02:42:12.678778: step: 744/459, loss: 0.014942790381610394 2023-01-24 02:42:13.348594: step: 746/459, loss: 0.019613848999142647 2023-01-24 02:42:13.964569: step: 748/459, loss: 0.0006286624702624977 2023-01-24 02:42:14.591592: step: 750/459, loss: 0.08876416087150574 2023-01-24 02:42:15.264509: step: 752/459, loss: 0.7242976427078247 2023-01-24 02:42:15.916587: step: 754/459, loss: 0.07103165239095688 2023-01-24 02:42:16.575049: step: 756/459, loss: 0.035249289125204086 2023-01-24 02:42:17.218836: step: 758/459, loss: 0.026600852608680725 2023-01-24 02:42:17.884714: step: 760/459, loss: 0.8437798023223877 2023-01-24 02:42:18.518164: step: 762/459, loss: 0.02416621521115303 2023-01-24 02:42:19.142661: step: 764/459, loss: 0.01569906435906887 2023-01-24 02:42:19.774149: step: 766/459, loss: 0.03145166486501694 2023-01-24 02:42:20.360986: step: 768/459, loss: 0.06622913479804993 2023-01-24 02:42:20.975811: step: 770/459, loss: 0.0715799555182457 2023-01-24 02:42:21.588933: step: 772/459, loss: 0.1124831959605217 2023-01-24 02:42:22.223298: step: 774/459, loss: 0.10759007185697556 2023-01-24 02:42:22.791246: step: 776/459, loss: 0.06319201737642288 2023-01-24 02:42:23.363005: step: 778/459, loss: 0.12118958681821823 2023-01-24 02:42:23.943078: step: 780/459, loss: 0.09145686775445938 2023-01-24 02:42:24.616605: step: 782/459, loss: 0.10725570470094681 2023-01-24 02:42:25.197809: step: 784/459, loss: 0.041374094784259796 2023-01-24 02:42:25.869951: step: 786/459, loss: 0.19002075493335724 2023-01-24 02:42:26.515593: step: 788/459, loss: 0.0554644912481308 2023-01-24 02:42:27.066797: step: 790/459, loss: 0.33316609263420105 2023-01-24 02:42:27.721756: step: 792/459, loss: 0.0595940463244915 2023-01-24 02:42:28.303116: step: 794/459, loss: 0.34832337498664856 2023-01-24 02:42:28.848232: step: 796/459, loss: 0.08265670388936996 2023-01-24 02:42:29.404393: step: 798/459, loss: 0.022960031405091286 2023-01-24 02:42:30.081572: step: 800/459, loss: 0.03258640691637993 2023-01-24 02:42:30.681882: step: 802/459, loss: 0.02923310361802578 2023-01-24 02:42:31.324570: step: 804/459, loss: 0.08579622954130173 2023-01-24 02:42:31.860240: step: 806/459, loss: 0.022360334172844887 2023-01-24 02:42:32.453179: step: 808/459, loss: 0.034956786781549454 2023-01-24 02:42:33.081011: step: 810/459, loss: 0.014934243634343147 2023-01-24 02:42:33.644997: step: 812/459, loss: 0.006924243178218603 2023-01-24 02:42:34.337600: step: 814/459, loss: 0.03156069293618202 2023-01-24 02:42:34.959847: step: 816/459, loss: 0.05667133256793022 2023-01-24 02:42:35.582510: step: 818/459, loss: 0.03769180178642273 2023-01-24 02:42:36.145551: step: 820/459, loss: 0.040102411061525345 2023-01-24 02:42:36.792934: step: 822/459, loss: 0.014741737395524979 2023-01-24 02:42:37.429989: step: 824/459, loss: 0.010960806161165237 2023-01-24 02:42:38.053222: step: 826/459, loss: 0.017285974696278572 2023-01-24 02:42:38.602786: step: 828/459, loss: 0.11570321768522263 2023-01-24 02:42:39.222923: step: 830/459, loss: 0.05691082775592804 2023-01-24 02:42:39.825617: step: 832/459, loss: 0.027601907029747963 2023-01-24 02:42:40.537376: step: 834/459, loss: 0.12424728274345398 2023-01-24 02:42:41.155118: step: 836/459, loss: 0.04390221834182739 2023-01-24 02:42:41.764621: step: 838/459, loss: 0.044101532548666 2023-01-24 02:42:42.339928: step: 840/459, loss: 0.06782525032758713 2023-01-24 02:42:42.933803: step: 842/459, loss: 0.03774000704288483 2023-01-24 02:42:43.571257: step: 844/459, loss: 0.04610248655080795 2023-01-24 02:42:44.227030: step: 846/459, loss: 0.14652808010578156 2023-01-24 02:42:44.897385: step: 848/459, loss: 0.1058826595544815 2023-01-24 02:42:45.504302: step: 850/459, loss: 0.13238534331321716 2023-01-24 02:42:46.103874: step: 852/459, loss: 0.011138319969177246 2023-01-24 02:42:46.701780: step: 854/459, loss: 0.027212021872401237 2023-01-24 02:42:47.268995: step: 856/459, loss: 0.020786378532648087 2023-01-24 02:42:47.884185: step: 858/459, loss: 0.07103755325078964 2023-01-24 02:42:48.525347: step: 860/459, loss: 0.012507262639701366 2023-01-24 02:42:49.174087: step: 862/459, loss: 0.04974125698208809 2023-01-24 02:42:49.721221: step: 864/459, loss: 0.052342548966407776 2023-01-24 02:42:50.265991: step: 866/459, loss: 0.028928088024258614 2023-01-24 02:42:50.818825: step: 868/459, loss: 0.044515181332826614 2023-01-24 02:42:51.437048: step: 870/459, loss: 0.02451634407043457 2023-01-24 02:42:52.067796: step: 872/459, loss: 0.0740215927362442 2023-01-24 02:42:52.692552: step: 874/459, loss: 0.766711413860321 2023-01-24 02:42:53.277356: step: 876/459, loss: 0.01989920251071453 2023-01-24 02:42:53.992731: step: 878/459, loss: 0.003330049803480506 2023-01-24 02:42:54.656009: step: 880/459, loss: 0.06257487833499908 2023-01-24 02:42:55.366037: step: 882/459, loss: 0.09583732485771179 2023-01-24 02:42:55.995045: step: 884/459, loss: 0.11483842134475708 2023-01-24 02:42:56.644233: step: 886/459, loss: 0.02087639458477497 2023-01-24 02:42:57.248401: step: 888/459, loss: 0.055545974522829056 2023-01-24 02:42:57.926207: step: 890/459, loss: 0.07921437919139862 2023-01-24 02:42:58.591839: step: 892/459, loss: 0.07773417979478836 2023-01-24 02:42:59.200204: step: 894/459, loss: 0.030160188674926758 2023-01-24 02:42:59.813150: step: 896/459, loss: 0.07293444126844406 2023-01-24 02:43:00.414866: step: 898/459, loss: 0.0367460660636425 2023-01-24 02:43:01.052247: step: 900/459, loss: 0.024338120594620705 2023-01-24 02:43:01.701445: step: 902/459, loss: 0.036364927887916565 2023-01-24 02:43:02.259564: step: 904/459, loss: 0.08345430344343185 2023-01-24 02:43:02.801647: step: 906/459, loss: 0.07700160890817642 2023-01-24 02:43:03.387502: step: 908/459, loss: 0.006680100224912167 2023-01-24 02:43:03.942421: step: 910/459, loss: 0.023813065141439438 2023-01-24 02:43:04.603236: step: 912/459, loss: 0.019588371738791466 2023-01-24 02:43:05.184586: step: 914/459, loss: 0.1231042668223381 2023-01-24 02:43:05.818264: step: 916/459, loss: 0.002478000707924366 2023-01-24 02:43:06.402771: step: 918/459, loss: 0.0682540237903595 2023-01-24 02:43:06.829724: step: 920/459, loss: 0.010733403265476227 ================================================== Loss: 0.090 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33429026124338623, 'r': 0.319700743200506, 'f1': 0.32683276753960555}, 'combined': 0.24082414450286724, 'epoch': 21} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3530078353151059, 'r': 0.2965265816646889, 'f1': 0.3223115018094445}, 'combined': 0.20627936115804446, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3245469901719901, 'r': 0.31900444195273414, 'f1': 0.3217518486298391}, 'combined': 0.23708030951672351, 'epoch': 21} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3517009680382249, 'r': 0.28871452194410646, 'f1': 0.3171103086755039}, 'combined': 0.20295059755232245, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3533650003452587, 'r': 0.32922621474292607, 'f1': 0.3408687920815757}, 'combined': 0.2511664783758979, 'epoch': 21} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3532161352968141, 'r': 0.30560594583968437, 'f1': 0.32769075051950536}, 'combined': 0.23494808527813593, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.28879310344827586, 'r': 0.2392857142857143, 'f1': 0.26171875000000006}, 'combined': 0.17447916666666669, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2391304347826087, 'r': 0.2391304347826087, 'f1': 0.2391304347826087}, 'combined': 0.11956521739130435, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.10344827586206896, 'f1': 0.15789473684210528}, 'combined': 0.10526315789473685, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 22 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:45:43.313212: step: 2/459, loss: 0.03422289341688156 2023-01-24 02:45:43.949808: step: 4/459, loss: 0.051869019865989685 2023-01-24 02:45:44.539345: step: 6/459, loss: 0.029504435136914253 2023-01-24 02:45:45.209402: step: 8/459, loss: 0.02975468523800373 2023-01-24 02:45:45.820661: step: 10/459, loss: 0.05002465844154358 2023-01-24 02:45:46.451309: step: 12/459, loss: 0.059847891330718994 2023-01-24 02:45:47.062457: step: 14/459, loss: 0.061025284230709076 2023-01-24 02:45:47.605781: step: 16/459, loss: 0.030019666999578476 2023-01-24 02:45:48.188674: step: 18/459, loss: 0.0853167325258255 2023-01-24 02:45:48.795072: step: 20/459, loss: 0.051620036363601685 2023-01-24 02:45:49.465798: step: 22/459, loss: 0.031663715839385986 2023-01-24 02:45:50.048441: step: 24/459, loss: 0.018382858484983444 2023-01-24 02:45:50.624866: step: 26/459, loss: 0.05145333707332611 2023-01-24 02:45:51.239467: step: 28/459, loss: 0.013066393323242664 2023-01-24 02:45:51.789531: step: 30/459, loss: 0.302929162979126 2023-01-24 02:45:52.392118: step: 32/459, loss: 0.19139066338539124 2023-01-24 02:45:52.988820: step: 34/459, loss: 0.02611786685883999 2023-01-24 02:45:53.644643: step: 36/459, loss: 0.031428903341293335 2023-01-24 02:45:54.217076: step: 38/459, loss: 0.0174191202968359 2023-01-24 02:45:54.766456: step: 40/459, loss: 0.026289204135537148 2023-01-24 02:45:55.402677: step: 42/459, loss: 0.056110866367816925 2023-01-24 02:45:56.008318: step: 44/459, loss: 0.10268247872591019 2023-01-24 02:45:56.756825: step: 46/459, loss: 0.016314540058374405 2023-01-24 02:45:57.349820: step: 48/459, loss: 0.055135566741228104 2023-01-24 02:45:57.926959: step: 50/459, loss: 0.027467044070363045 2023-01-24 02:45:58.490453: step: 52/459, loss: 0.019078314304351807 2023-01-24 02:45:59.153394: step: 54/459, loss: 0.011082799173891544 2023-01-24 02:45:59.809460: step: 56/459, loss: 0.25330865383148193 2023-01-24 02:46:00.424883: step: 58/459, loss: 0.06424811482429504 2023-01-24 02:46:01.065879: step: 60/459, loss: 0.037340860813856125 2023-01-24 02:46:01.676178: step: 62/459, loss: 0.009588737972080708 2023-01-24 02:46:02.354943: step: 64/459, loss: 0.0788722112774849 2023-01-24 02:46:02.946480: step: 66/459, loss: 0.20822986960411072 2023-01-24 02:46:03.650770: step: 68/459, loss: 0.009300257079303265 2023-01-24 02:46:04.265959: step: 70/459, loss: 0.10521803796291351 2023-01-24 02:46:04.905605: step: 72/459, loss: 0.06681903451681137 2023-01-24 02:46:05.554268: step: 74/459, loss: 0.024804556742310524 2023-01-24 02:46:06.188750: step: 76/459, loss: 0.13356289267539978 2023-01-24 02:46:06.823801: step: 78/459, loss: 0.028096988797187805 2023-01-24 02:46:07.400767: step: 80/459, loss: 0.01921546459197998 2023-01-24 02:46:08.068425: step: 82/459, loss: 0.01933158189058304 2023-01-24 02:46:08.732279: step: 84/459, loss: 0.06198737770318985 2023-01-24 02:46:09.388111: step: 86/459, loss: 0.01285473071038723 2023-01-24 02:46:10.138388: step: 88/459, loss: 0.020392512902617455 2023-01-24 02:46:10.704458: step: 90/459, loss: 0.0180718582123518 2023-01-24 02:46:11.289791: step: 92/459, loss: 0.2802385687828064 2023-01-24 02:46:11.925860: step: 94/459, loss: 0.05734796077013016 2023-01-24 02:46:12.525022: step: 96/459, loss: 0.004754351917654276 2023-01-24 02:46:13.129759: step: 98/459, loss: 0.013900226913392544 2023-01-24 02:46:13.775253: step: 100/459, loss: 0.11737526953220367 2023-01-24 02:46:14.435261: step: 102/459, loss: 0.04484374076128006 2023-01-24 02:46:15.098524: step: 104/459, loss: 0.09981662780046463 2023-01-24 02:46:15.678289: step: 106/459, loss: 0.01752568408846855 2023-01-24 02:46:16.236357: step: 108/459, loss: 0.016817552968859673 2023-01-24 02:46:16.872339: step: 110/459, loss: 0.06784584373235703 2023-01-24 02:46:17.496973: step: 112/459, loss: 0.028813321143388748 2023-01-24 02:46:18.057373: step: 114/459, loss: 0.024011656641960144 2023-01-24 02:46:18.634281: step: 116/459, loss: 0.04277036339044571 2023-01-24 02:46:19.282874: step: 118/459, loss: 0.06142188981175423 2023-01-24 02:46:19.834961: step: 120/459, loss: 0.028963517397642136 2023-01-24 02:46:20.418759: step: 122/459, loss: 0.0017218288267031312 2023-01-24 02:46:20.998603: step: 124/459, loss: 0.0015299232909455895 2023-01-24 02:46:21.595448: step: 126/459, loss: 0.07562021166086197 2023-01-24 02:46:22.188828: step: 128/459, loss: 0.023877304047346115 2023-01-24 02:46:22.808605: step: 130/459, loss: 0.029638219624757767 2023-01-24 02:46:23.554987: step: 132/459, loss: 0.09045525640249252 2023-01-24 02:46:24.162483: step: 134/459, loss: 0.02220148965716362 2023-01-24 02:46:24.775132: step: 136/459, loss: 0.01926819048821926 2023-01-24 02:46:25.390836: step: 138/459, loss: 0.2328614592552185 2023-01-24 02:46:25.917478: step: 140/459, loss: 0.02110457792878151 2023-01-24 02:46:26.510821: step: 142/459, loss: 0.04650574550032616 2023-01-24 02:46:27.154493: step: 144/459, loss: 0.019369790330529213 2023-01-24 02:46:27.794141: step: 146/459, loss: 0.0220940001308918 2023-01-24 02:46:28.429250: step: 148/459, loss: 0.014898005872964859 2023-01-24 02:46:29.018254: step: 150/459, loss: 0.031187890097498894 2023-01-24 02:46:29.584012: step: 152/459, loss: 0.02785717323422432 2023-01-24 02:46:30.182917: step: 154/459, loss: 0.002235921798273921 2023-01-24 02:46:30.731574: step: 156/459, loss: 0.09828519821166992 2023-01-24 02:46:31.353611: step: 158/459, loss: 0.04037465155124664 2023-01-24 02:46:31.989296: step: 160/459, loss: 0.02351292036473751 2023-01-24 02:46:32.590892: step: 162/459, loss: 0.002742733806371689 2023-01-24 02:46:33.284900: step: 164/459, loss: 0.02290835976600647 2023-01-24 02:46:33.893586: step: 166/459, loss: 0.04617006331682205 2023-01-24 02:46:34.557875: step: 168/459, loss: 0.08917178958654404 2023-01-24 02:46:35.184765: step: 170/459, loss: 0.015886032953858376 2023-01-24 02:46:35.812280: step: 172/459, loss: 0.011741265654563904 2023-01-24 02:46:36.411174: step: 174/459, loss: 0.07674016058444977 2023-01-24 02:46:37.080364: step: 176/459, loss: 0.19178202748298645 2023-01-24 02:46:37.751069: step: 178/459, loss: 0.03643377870321274 2023-01-24 02:46:38.377878: step: 180/459, loss: 0.035683684051036835 2023-01-24 02:46:39.013799: step: 182/459, loss: 0.08475485444068909 2023-01-24 02:46:39.627059: step: 184/459, loss: 0.040489792823791504 2023-01-24 02:46:40.264632: step: 186/459, loss: 0.041109442710876465 2023-01-24 02:46:40.877054: step: 188/459, loss: 0.04824772849678993 2023-01-24 02:46:41.491648: step: 190/459, loss: 0.0904393345117569 2023-01-24 02:46:42.073010: step: 192/459, loss: 0.053669191896915436 2023-01-24 02:46:42.721786: step: 194/459, loss: 0.023612601682543755 2023-01-24 02:46:43.346465: step: 196/459, loss: 0.03325054422020912 2023-01-24 02:46:44.021491: step: 198/459, loss: 0.06419719755649567 2023-01-24 02:46:44.653341: step: 200/459, loss: 0.029619015753269196 2023-01-24 02:46:45.263981: step: 202/459, loss: 0.1385820358991623 2023-01-24 02:46:45.902037: step: 204/459, loss: 0.05335545167326927 2023-01-24 02:46:46.561708: step: 206/459, loss: 0.41830891370773315 2023-01-24 02:46:47.088812: step: 208/459, loss: 0.04372062906622887 2023-01-24 02:46:47.662561: step: 210/459, loss: 0.00963744055479765 2023-01-24 02:46:48.297884: step: 212/459, loss: 0.05148430913686752 2023-01-24 02:46:48.922448: step: 214/459, loss: 0.07920388132333755 2023-01-24 02:46:49.585839: step: 216/459, loss: 0.3011869192123413 2023-01-24 02:46:50.182776: step: 218/459, loss: 0.020348217338323593 2023-01-24 02:46:50.846456: step: 220/459, loss: 0.10531225055456161 2023-01-24 02:46:51.549658: step: 222/459, loss: 0.026451537385582924 2023-01-24 02:46:52.162791: step: 224/459, loss: 0.015801850706338882 2023-01-24 02:46:52.846900: step: 226/459, loss: 0.012055412866175175 2023-01-24 02:46:53.431126: step: 228/459, loss: 0.036951616406440735 2023-01-24 02:46:54.087665: step: 230/459, loss: 0.0211988165974617 2023-01-24 02:46:54.780509: step: 232/459, loss: 0.03665865957736969 2023-01-24 02:46:55.392778: step: 234/459, loss: 0.26936179399490356 2023-01-24 02:46:56.001909: step: 236/459, loss: 0.015508039854466915 2023-01-24 02:46:56.590191: step: 238/459, loss: 0.055428434163331985 2023-01-24 02:46:57.199069: step: 240/459, loss: 0.017630701884627342 2023-01-24 02:46:57.871108: step: 242/459, loss: 0.05286921188235283 2023-01-24 02:46:58.493406: step: 244/459, loss: 0.04183616116642952 2023-01-24 02:46:59.208105: step: 246/459, loss: 0.3769092559814453 2023-01-24 02:46:59.886247: step: 248/459, loss: 0.006540005095303059 2023-01-24 02:47:00.495476: step: 250/459, loss: 0.019957907497882843 2023-01-24 02:47:01.125755: step: 252/459, loss: 0.08266793191432953 2023-01-24 02:47:01.731862: step: 254/459, loss: 0.03727078437805176 2023-01-24 02:47:02.353653: step: 256/459, loss: 0.11440615355968475 2023-01-24 02:47:03.106301: step: 258/459, loss: 0.04061153531074524 2023-01-24 02:47:03.733448: step: 260/459, loss: 0.020355936139822006 2023-01-24 02:47:04.392631: step: 262/459, loss: 0.07773664593696594 2023-01-24 02:47:05.110443: step: 264/459, loss: 0.052622318267822266 2023-01-24 02:47:05.765295: step: 266/459, loss: 0.2211923897266388 2023-01-24 02:47:06.400477: step: 268/459, loss: 0.01306238118559122 2023-01-24 02:47:07.014247: step: 270/459, loss: 0.026030603796243668 2023-01-24 02:47:07.580309: step: 272/459, loss: 0.10110747814178467 2023-01-24 02:47:08.191173: step: 274/459, loss: 0.07199133932590485 2023-01-24 02:47:08.852792: step: 276/459, loss: 0.011066229082643986 2023-01-24 02:47:09.479273: step: 278/459, loss: 0.023403791710734367 2023-01-24 02:47:10.131633: step: 280/459, loss: 0.062029290944337845 2023-01-24 02:47:10.749869: step: 282/459, loss: 0.1468910425901413 2023-01-24 02:47:11.373488: step: 284/459, loss: 0.10094645619392395 2023-01-24 02:47:12.094344: step: 286/459, loss: 0.13057462871074677 2023-01-24 02:47:12.633861: step: 288/459, loss: 0.023057328537106514 2023-01-24 02:47:13.264384: step: 290/459, loss: 0.03815563768148422 2023-01-24 02:47:13.938916: step: 292/459, loss: 0.026620684191584587 2023-01-24 02:47:14.577438: step: 294/459, loss: 0.003909250721335411 2023-01-24 02:47:15.150940: step: 296/459, loss: 0.002017376944422722 2023-01-24 02:47:15.754280: step: 298/459, loss: 0.06528027355670929 2023-01-24 02:47:16.365681: step: 300/459, loss: 0.013019753620028496 2023-01-24 02:47:17.030501: step: 302/459, loss: 0.005867501255124807 2023-01-24 02:47:17.632927: step: 304/459, loss: 0.58479243516922 2023-01-24 02:47:18.222418: step: 306/459, loss: 0.014086171984672546 2023-01-24 02:47:18.788546: step: 308/459, loss: 0.016134057193994522 2023-01-24 02:47:19.372196: step: 310/459, loss: 0.202149897813797 2023-01-24 02:47:20.014773: step: 312/459, loss: 0.029521243646740913 2023-01-24 02:47:20.603206: step: 314/459, loss: 0.052937813103199005 2023-01-24 02:47:21.334390: step: 316/459, loss: 0.22502993047237396 2023-01-24 02:47:21.884751: step: 318/459, loss: 0.059039853513240814 2023-01-24 02:47:22.471323: step: 320/459, loss: 0.0023631094954907894 2023-01-24 02:47:23.085524: step: 322/459, loss: 0.10299938917160034 2023-01-24 02:47:23.661202: step: 324/459, loss: 0.009847824461758137 2023-01-24 02:47:24.304078: step: 326/459, loss: 0.018547626212239265 2023-01-24 02:47:24.951228: step: 328/459, loss: 0.015748074278235435 2023-01-24 02:47:25.547549: step: 330/459, loss: 0.07016324251890182 2023-01-24 02:47:26.194290: step: 332/459, loss: 0.054239511489868164 2023-01-24 02:47:26.862393: step: 334/459, loss: 0.031427524983882904 2023-01-24 02:47:27.460312: step: 336/459, loss: 0.011760934256017208 2023-01-24 02:47:28.172988: step: 338/459, loss: 0.026989733800292015 2023-01-24 02:47:28.770456: step: 340/459, loss: 0.02535509131848812 2023-01-24 02:47:29.405206: step: 342/459, loss: 0.054435137659311295 2023-01-24 02:47:30.105697: step: 344/459, loss: 0.14568592607975006 2023-01-24 02:47:30.796778: step: 346/459, loss: 0.012741419486701488 2023-01-24 02:47:31.434006: step: 348/459, loss: 0.01656206138432026 2023-01-24 02:47:31.964960: step: 350/459, loss: 0.01325094886124134 2023-01-24 02:47:32.554712: step: 352/459, loss: 0.0746050551533699 2023-01-24 02:47:33.221529: step: 354/459, loss: 0.05823260173201561 2023-01-24 02:47:33.778585: step: 356/459, loss: 0.13165494799613953 2023-01-24 02:47:34.390906: step: 358/459, loss: 0.03915419429540634 2023-01-24 02:47:35.050099: step: 360/459, loss: 0.056740500032901764 2023-01-24 02:47:35.831689: step: 362/459, loss: 0.042967963963747025 2023-01-24 02:47:36.470725: step: 364/459, loss: 0.013027814216911793 2023-01-24 02:47:37.157170: step: 366/459, loss: 0.02394726127386093 2023-01-24 02:47:37.778221: step: 368/459, loss: 0.080499067902565 2023-01-24 02:47:38.364298: step: 370/459, loss: 0.045137856155633926 2023-01-24 02:47:38.971724: step: 372/459, loss: 0.010560013353824615 2023-01-24 02:47:39.585966: step: 374/459, loss: 0.046152662485837936 2023-01-24 02:47:40.189933: step: 376/459, loss: 0.029851224273443222 2023-01-24 02:47:40.847381: step: 378/459, loss: 0.20577174425125122 2023-01-24 02:47:41.425177: step: 380/459, loss: 0.05547810345888138 2023-01-24 02:47:42.065842: step: 382/459, loss: 0.03539254143834114 2023-01-24 02:47:42.689833: step: 384/459, loss: 0.5119855999946594 2023-01-24 02:47:43.240612: step: 386/459, loss: 0.003971648868173361 2023-01-24 02:47:43.887322: step: 388/459, loss: 0.015139984898269176 2023-01-24 02:47:44.472912: step: 390/459, loss: 0.04921640455722809 2023-01-24 02:47:45.062927: step: 392/459, loss: 0.047349974513053894 2023-01-24 02:47:45.657092: step: 394/459, loss: 0.03905944526195526 2023-01-24 02:47:46.276389: step: 396/459, loss: 0.12253505736589432 2023-01-24 02:47:46.958846: step: 398/459, loss: 0.09275754541158676 2023-01-24 02:47:47.552158: step: 400/459, loss: 0.021676858887076378 2023-01-24 02:47:48.150212: step: 402/459, loss: 0.05720597133040428 2023-01-24 02:47:48.769417: step: 404/459, loss: 0.03173650801181793 2023-01-24 02:47:49.436714: step: 406/459, loss: 0.15850982069969177 2023-01-24 02:47:50.095906: step: 408/459, loss: 0.48702970147132874 2023-01-24 02:47:50.720376: step: 410/459, loss: 0.046737879514694214 2023-01-24 02:47:51.257188: step: 412/459, loss: 0.010132639668881893 2023-01-24 02:47:51.896814: step: 414/459, loss: 0.060041919350624084 2023-01-24 02:47:52.517260: step: 416/459, loss: 0.052958425134420395 2023-01-24 02:47:53.156247: step: 418/459, loss: 0.06799822300672531 2023-01-24 02:47:53.778592: step: 420/459, loss: 0.7281894683837891 2023-01-24 02:47:54.428700: step: 422/459, loss: 0.026650937274098396 2023-01-24 02:47:55.040152: step: 424/459, loss: 0.03284631296992302 2023-01-24 02:47:55.579445: step: 426/459, loss: 0.0765705406665802 2023-01-24 02:47:56.185093: step: 428/459, loss: 0.0504569411277771 2023-01-24 02:47:56.754423: step: 430/459, loss: 0.05993768200278282 2023-01-24 02:47:57.331992: step: 432/459, loss: 0.03980689123272896 2023-01-24 02:47:57.890762: step: 434/459, loss: 0.0021824659779667854 2023-01-24 02:47:58.549465: step: 436/459, loss: 0.06021382659673691 2023-01-24 02:47:59.201268: step: 438/459, loss: 0.11384029686450958 2023-01-24 02:47:59.821653: step: 440/459, loss: 0.08062213659286499 2023-01-24 02:48:00.374609: step: 442/459, loss: 0.05608866363763809 2023-01-24 02:48:01.010645: step: 444/459, loss: 0.05940686911344528 2023-01-24 02:48:01.646875: step: 446/459, loss: 0.10483574122190475 2023-01-24 02:48:02.232825: step: 448/459, loss: 0.24563108384609222 2023-01-24 02:48:02.862969: step: 450/459, loss: 0.06778449565172195 2023-01-24 02:48:03.447002: step: 452/459, loss: 0.0028401196468621492 2023-01-24 02:48:04.075182: step: 454/459, loss: 0.1017361730337143 2023-01-24 02:48:04.707446: step: 456/459, loss: 0.07263899594545364 2023-01-24 02:48:05.346301: step: 458/459, loss: 0.05099988356232643 2023-01-24 02:48:05.916069: step: 460/459, loss: 0.03813786432147026 2023-01-24 02:48:06.543082: step: 462/459, loss: 0.017515167593955994 2023-01-24 02:48:07.159331: step: 464/459, loss: 0.009262602776288986 2023-01-24 02:48:07.843009: step: 466/459, loss: 0.07479911297559738 2023-01-24 02:48:08.468441: step: 468/459, loss: 0.01600450649857521 2023-01-24 02:48:09.117483: step: 470/459, loss: 0.020290570333600044 2023-01-24 02:48:09.737061: step: 472/459, loss: 0.0244107898324728 2023-01-24 02:48:10.326678: step: 474/459, loss: 0.0017009639414027333 2023-01-24 02:48:10.908327: step: 476/459, loss: 0.03471878170967102 2023-01-24 02:48:11.480708: step: 478/459, loss: 0.08630511164665222 2023-01-24 02:48:12.076813: step: 480/459, loss: 0.0993325486779213 2023-01-24 02:48:12.613009: step: 482/459, loss: 0.0034499175380915403 2023-01-24 02:48:13.186901: step: 484/459, loss: 0.024724366143345833 2023-01-24 02:48:13.819392: step: 486/459, loss: 0.07213571667671204 2023-01-24 02:48:14.432939: step: 488/459, loss: 0.0054709953255951405 2023-01-24 02:48:15.025294: step: 490/459, loss: 0.04365076869726181 2023-01-24 02:48:15.707621: step: 492/459, loss: 0.05851982161402702 2023-01-24 02:48:16.320332: step: 494/459, loss: 0.03434891998767853 2023-01-24 02:48:16.971619: step: 496/459, loss: 0.031717147678136826 2023-01-24 02:48:17.662594: step: 498/459, loss: 0.06231527402997017 2023-01-24 02:48:18.259966: step: 500/459, loss: 1.1979258060455322 2023-01-24 02:48:19.011874: step: 502/459, loss: 0.0330733060836792 2023-01-24 02:48:19.718257: step: 504/459, loss: 0.0870317816734314 2023-01-24 02:48:20.393055: step: 506/459, loss: 0.013599671423435211 2023-01-24 02:48:21.015489: step: 508/459, loss: 0.08704257011413574 2023-01-24 02:48:21.637027: step: 510/459, loss: 0.20451131463050842 2023-01-24 02:48:22.191634: step: 512/459, loss: 0.05762092396616936 2023-01-24 02:48:22.799322: step: 514/459, loss: 0.010723981074988842 2023-01-24 02:48:23.463238: step: 516/459, loss: 0.09636347740888596 2023-01-24 02:48:24.069894: step: 518/459, loss: 0.01661720685660839 2023-01-24 02:48:24.690234: step: 520/459, loss: 0.017433617264032364 2023-01-24 02:48:25.376599: step: 522/459, loss: 0.006725629325956106 2023-01-24 02:48:26.013641: step: 524/459, loss: 0.08519244939088821 2023-01-24 02:48:26.613774: step: 526/459, loss: 0.012527345679700375 2023-01-24 02:48:27.240491: step: 528/459, loss: 0.03936615213751793 2023-01-24 02:48:27.857461: step: 530/459, loss: 0.07488248497247696 2023-01-24 02:48:28.475964: step: 532/459, loss: 0.022477619349956512 2023-01-24 02:48:29.057003: step: 534/459, loss: 0.013012945652008057 2023-01-24 02:48:29.657686: step: 536/459, loss: 0.05087803676724434 2023-01-24 02:48:30.322964: step: 538/459, loss: 0.029151184484362602 2023-01-24 02:48:31.065856: step: 540/459, loss: 0.004275529645383358 2023-01-24 02:48:31.733926: step: 542/459, loss: 0.005190553143620491 2023-01-24 02:48:32.397406: step: 544/459, loss: 0.07205995172262192 2023-01-24 02:48:33.012901: step: 546/459, loss: 0.28979334235191345 2023-01-24 02:48:33.600136: step: 548/459, loss: 0.03955889865756035 2023-01-24 02:48:34.269976: step: 550/459, loss: 0.004679565317928791 2023-01-24 02:48:34.936437: step: 552/459, loss: 0.016662269830703735 2023-01-24 02:48:35.496775: step: 554/459, loss: 0.008621640503406525 2023-01-24 02:48:36.105505: step: 556/459, loss: 0.02240343578159809 2023-01-24 02:48:36.714060: step: 558/459, loss: 0.7953637838363647 2023-01-24 02:48:37.337271: step: 560/459, loss: 0.00702751474454999 2023-01-24 02:48:37.967879: step: 562/459, loss: 0.14435219764709473 2023-01-24 02:48:38.645261: step: 564/459, loss: 0.13520044088363647 2023-01-24 02:48:39.228920: step: 566/459, loss: 0.009311078116297722 2023-01-24 02:48:39.818420: step: 568/459, loss: 0.056057706475257874 2023-01-24 02:48:40.451434: step: 570/459, loss: 0.058336514979600906 2023-01-24 02:48:41.046693: step: 572/459, loss: 0.1132848784327507 2023-01-24 02:48:41.712132: step: 574/459, loss: 0.06190592423081398 2023-01-24 02:48:42.316470: step: 576/459, loss: 0.01092554908245802 2023-01-24 02:48:42.874417: step: 578/459, loss: 0.04402356222271919 2023-01-24 02:48:43.557731: step: 580/459, loss: 0.0620778389275074 2023-01-24 02:48:44.187533: step: 582/459, loss: 0.018952863290905952 2023-01-24 02:48:44.833195: step: 584/459, loss: 0.004666609689593315 2023-01-24 02:48:45.512735: step: 586/459, loss: 0.1136830672621727 2023-01-24 02:48:46.129137: step: 588/459, loss: 0.011770782992243767 2023-01-24 02:48:46.801723: step: 590/459, loss: 0.33300691843032837 2023-01-24 02:48:47.458474: step: 592/459, loss: 0.02036369778215885 2023-01-24 02:48:48.024706: step: 594/459, loss: 0.016575761139392853 2023-01-24 02:48:48.619968: step: 596/459, loss: 0.012957151979207993 2023-01-24 02:48:49.272823: step: 598/459, loss: 0.016039161011576653 2023-01-24 02:48:49.838095: step: 600/459, loss: 0.009427663870155811 2023-01-24 02:48:50.470163: step: 602/459, loss: 0.013199999928474426 2023-01-24 02:48:51.026589: step: 604/459, loss: 0.009907972067594528 2023-01-24 02:48:51.628960: step: 606/459, loss: 0.014565858989953995 2023-01-24 02:48:52.192913: step: 608/459, loss: 0.014164583757519722 2023-01-24 02:48:52.895377: step: 610/459, loss: 0.1879325658082962 2023-01-24 02:48:53.478507: step: 612/459, loss: 0.013257795944809914 2023-01-24 02:48:54.170740: step: 614/459, loss: 0.08545474708080292 2023-01-24 02:48:54.813442: step: 616/459, loss: 0.014736698940396309 2023-01-24 02:48:55.414962: step: 618/459, loss: 0.0038821841590106487 2023-01-24 02:48:56.092650: step: 620/459, loss: 0.5481931567192078 2023-01-24 02:48:56.709635: step: 622/459, loss: 0.005487872753292322 2023-01-24 02:48:57.324272: step: 624/459, loss: 0.030155016109347343 2023-01-24 02:48:57.937199: step: 626/459, loss: 0.006037791725248098 2023-01-24 02:48:58.565941: step: 628/459, loss: 0.022363705560564995 2023-01-24 02:48:59.199405: step: 630/459, loss: 0.111129529774189 2023-01-24 02:48:59.755445: step: 632/459, loss: 0.057883311063051224 2023-01-24 02:49:00.369245: step: 634/459, loss: 0.022456781938672066 2023-01-24 02:49:00.957805: step: 636/459, loss: 0.5322012901306152 2023-01-24 02:49:01.557585: step: 638/459, loss: 0.12849658727645874 2023-01-24 02:49:02.203256: step: 640/459, loss: 0.016393398866057396 2023-01-24 02:49:02.809959: step: 642/459, loss: 0.0775933712720871 2023-01-24 02:49:03.438676: step: 644/459, loss: 0.0185804832726717 2023-01-24 02:49:04.060243: step: 646/459, loss: 0.0440424308180809 2023-01-24 02:49:04.718045: step: 648/459, loss: 0.08578051626682281 2023-01-24 02:49:05.379771: step: 650/459, loss: 0.04826183617115021 2023-01-24 02:49:06.013345: step: 652/459, loss: 0.020569708198308945 2023-01-24 02:49:06.641674: step: 654/459, loss: 0.18465924263000488 2023-01-24 02:49:07.253026: step: 656/459, loss: 0.027193283662199974 2023-01-24 02:49:07.950673: step: 658/459, loss: 0.1518525928258896 2023-01-24 02:49:08.613784: step: 660/459, loss: 0.0571848526597023 2023-01-24 02:49:09.182634: step: 662/459, loss: 0.02042914554476738 2023-01-24 02:49:09.821339: step: 664/459, loss: 0.03731910511851311 2023-01-24 02:49:10.485222: step: 666/459, loss: 0.020956045016646385 2023-01-24 02:49:11.091811: step: 668/459, loss: 0.08580338954925537 2023-01-24 02:49:11.727470: step: 670/459, loss: 0.0259641595184803 2023-01-24 02:49:12.364811: step: 672/459, loss: 0.04611656069755554 2023-01-24 02:49:13.069898: step: 674/459, loss: 0.12766486406326294 2023-01-24 02:49:13.665983: step: 676/459, loss: 0.28064680099487305 2023-01-24 02:49:14.305507: step: 678/459, loss: 0.006643943954259157 2023-01-24 02:49:14.987908: step: 680/459, loss: 0.04816385358572006 2023-01-24 02:49:15.593599: step: 682/459, loss: 0.08216135203838348 2023-01-24 02:49:16.233752: step: 684/459, loss: 0.04351931810379028 2023-01-24 02:49:16.842465: step: 686/459, loss: 0.10181744396686554 2023-01-24 02:49:17.498310: step: 688/459, loss: 0.032159268856048584 2023-01-24 02:49:18.204566: step: 690/459, loss: 0.05286663398146629 2023-01-24 02:49:18.856114: step: 692/459, loss: 0.05202174186706543 2023-01-24 02:49:19.520215: step: 694/459, loss: 0.041786856949329376 2023-01-24 02:49:20.225290: step: 696/459, loss: 0.030061569064855576 2023-01-24 02:49:20.851948: step: 698/459, loss: 0.01386939361691475 2023-01-24 02:49:21.547561: step: 700/459, loss: 0.2621559798717499 2023-01-24 02:49:22.122864: step: 702/459, loss: 0.010600157082080841 2023-01-24 02:49:22.752627: step: 704/459, loss: 0.13119009137153625 2023-01-24 02:49:23.374508: step: 706/459, loss: 0.07296766340732574 2023-01-24 02:49:23.960866: step: 708/459, loss: 0.008275248110294342 2023-01-24 02:49:24.518598: step: 710/459, loss: 0.042562272399663925 2023-01-24 02:49:25.198595: step: 712/459, loss: 0.017191961407661438 2023-01-24 02:49:25.843635: step: 714/459, loss: 0.010890362784266472 2023-01-24 02:49:26.494312: step: 716/459, loss: 0.2692664563655853 2023-01-24 02:49:27.111635: step: 718/459, loss: 0.0364135317504406 2023-01-24 02:49:27.733197: step: 720/459, loss: 0.01956172287464142 2023-01-24 02:49:28.323308: step: 722/459, loss: 0.009009202010929585 2023-01-24 02:49:28.921199: step: 724/459, loss: 0.06195630133152008 2023-01-24 02:49:29.496883: step: 726/459, loss: 0.028130752965807915 2023-01-24 02:49:30.050321: step: 728/459, loss: 0.06968555599451065 2023-01-24 02:49:30.643998: step: 730/459, loss: 0.012265793047845364 2023-01-24 02:49:31.250226: step: 732/459, loss: 0.06454727053642273 2023-01-24 02:49:31.874885: step: 734/459, loss: 0.09266804903745651 2023-01-24 02:49:32.476693: step: 736/459, loss: 0.0009717458160594106 2023-01-24 02:49:33.054165: step: 738/459, loss: 0.018550487235188484 2023-01-24 02:49:33.683764: step: 740/459, loss: 0.004285939037799835 2023-01-24 02:49:34.340750: step: 742/459, loss: 0.048729315400123596 2023-01-24 02:49:34.945745: step: 744/459, loss: 0.010147066786885262 2023-01-24 02:49:35.562988: step: 746/459, loss: 0.004584996495395899 2023-01-24 02:49:36.213698: step: 748/459, loss: 0.027649274095892906 2023-01-24 02:49:36.917652: step: 750/459, loss: 0.21887217462062836 2023-01-24 02:49:37.520163: step: 752/459, loss: 0.009706034325063229 2023-01-24 02:49:38.168328: step: 754/459, loss: 0.02815350517630577 2023-01-24 02:49:38.787242: step: 756/459, loss: 0.010855340398848057 2023-01-24 02:49:39.339131: step: 758/459, loss: 0.03639065846800804 2023-01-24 02:49:39.910263: step: 760/459, loss: 0.021287916228175163 2023-01-24 02:49:40.468915: step: 762/459, loss: 0.0659450814127922 2023-01-24 02:49:41.040926: step: 764/459, loss: 0.014639993198215961 2023-01-24 02:49:41.643326: step: 766/459, loss: 0.03003508225083351 2023-01-24 02:49:42.277460: step: 768/459, loss: 0.049379538744688034 2023-01-24 02:49:42.931175: step: 770/459, loss: 0.015600327402353287 2023-01-24 02:49:43.460822: step: 772/459, loss: 0.14695224165916443 2023-01-24 02:49:44.091800: step: 774/459, loss: 0.025068730115890503 2023-01-24 02:49:44.658743: step: 776/459, loss: 0.019659649580717087 2023-01-24 02:49:45.279645: step: 778/459, loss: 0.0067901043221354485 2023-01-24 02:49:45.940250: step: 780/459, loss: 0.04996926710009575 2023-01-24 02:49:46.546872: step: 782/459, loss: 0.044487785547971725 2023-01-24 02:49:47.214682: step: 784/459, loss: 0.021642165258526802 2023-01-24 02:49:47.765903: step: 786/459, loss: 0.016132980585098267 2023-01-24 02:49:48.443894: step: 788/459, loss: 0.04918060079216957 2023-01-24 02:49:49.109924: step: 790/459, loss: 0.12962354719638824 2023-01-24 02:49:49.748409: step: 792/459, loss: 0.0725344568490982 2023-01-24 02:49:50.431742: step: 794/459, loss: 0.2268894463777542 2023-01-24 02:49:51.050738: step: 796/459, loss: 0.08888950943946838 2023-01-24 02:49:51.661093: step: 798/459, loss: 0.091502845287323 2023-01-24 02:49:52.300054: step: 800/459, loss: 0.004465257748961449 2023-01-24 02:49:52.868554: step: 802/459, loss: 0.019595719873905182 2023-01-24 02:49:53.510538: step: 804/459, loss: 0.04478674754500389 2023-01-24 02:49:54.160154: step: 806/459, loss: 0.013387189246714115 2023-01-24 02:49:54.816643: step: 808/459, loss: 0.07859260588884354 2023-01-24 02:49:55.413979: step: 810/459, loss: 0.0573100708425045 2023-01-24 02:49:55.990528: step: 812/459, loss: 0.009529469534754753 2023-01-24 02:49:56.614965: step: 814/459, loss: 0.502430260181427 2023-01-24 02:49:57.283834: step: 816/459, loss: 0.06899656355381012 2023-01-24 02:49:57.903199: step: 818/459, loss: 0.08285260945558548 2023-01-24 02:49:58.497347: step: 820/459, loss: 0.10494013130664825 2023-01-24 02:49:59.064990: step: 822/459, loss: 0.005916796624660492 2023-01-24 02:49:59.712413: step: 824/459, loss: 0.012785524129867554 2023-01-24 02:50:00.303303: step: 826/459, loss: 0.036394622176885605 2023-01-24 02:50:00.953228: step: 828/459, loss: 0.025332171469926834 2023-01-24 02:50:01.603221: step: 830/459, loss: 0.010647272691130638 2023-01-24 02:50:02.272898: step: 832/459, loss: 0.0032494496554136276 2023-01-24 02:50:03.059461: step: 834/459, loss: 0.11896999180316925 2023-01-24 02:50:03.678864: step: 836/459, loss: 0.13433776795864105 2023-01-24 02:50:04.324282: step: 838/459, loss: 0.03276222199201584 2023-01-24 02:50:04.923700: step: 840/459, loss: 0.04768996313214302 2023-01-24 02:50:05.525428: step: 842/459, loss: 0.012092847377061844 2023-01-24 02:50:06.122625: step: 844/459, loss: 0.04938676208257675 2023-01-24 02:50:06.727611: step: 846/459, loss: 0.06745370477437973 2023-01-24 02:50:07.404205: step: 848/459, loss: 0.36478057503700256 2023-01-24 02:50:08.018057: step: 850/459, loss: 0.08812043070793152 2023-01-24 02:50:08.585990: step: 852/459, loss: 0.01182335801422596 2023-01-24 02:50:09.215570: step: 854/459, loss: 0.0704980418086052 2023-01-24 02:50:09.805187: step: 856/459, loss: 0.04450513422489166 2023-01-24 02:50:10.429205: step: 858/459, loss: 0.19032496213912964 2023-01-24 02:50:11.071328: step: 860/459, loss: 0.1900949627161026 2023-01-24 02:50:11.698380: step: 862/459, loss: 0.02412947453558445 2023-01-24 02:50:12.280151: step: 864/459, loss: 0.015560764819383621 2023-01-24 02:50:12.926344: step: 866/459, loss: 0.09000065177679062 2023-01-24 02:50:13.537779: step: 868/459, loss: 0.05013231560587883 2023-01-24 02:50:14.253581: step: 870/459, loss: 0.036002278327941895 2023-01-24 02:50:14.850732: step: 872/459, loss: 0.01337822899222374 2023-01-24 02:50:15.400244: step: 874/459, loss: 0.046018701046705246 2023-01-24 02:50:16.004578: step: 876/459, loss: 0.08340325206518173 2023-01-24 02:50:16.631068: step: 878/459, loss: 0.043796468526124954 2023-01-24 02:50:17.323410: step: 880/459, loss: 0.009450560435652733 2023-01-24 02:50:17.944227: step: 882/459, loss: 0.019608665257692337 2023-01-24 02:50:18.577534: step: 884/459, loss: 0.035260677337646484 2023-01-24 02:50:19.135325: step: 886/459, loss: 0.1404682695865631 2023-01-24 02:50:19.878710: step: 888/459, loss: 0.06222408637404442 2023-01-24 02:50:20.488040: step: 890/459, loss: 0.06914512068033218 2023-01-24 02:50:21.127521: step: 892/459, loss: 0.02122936025261879 2023-01-24 02:50:21.832303: step: 894/459, loss: 0.03692995011806488 2023-01-24 02:50:22.464382: step: 896/459, loss: 0.14544624090194702 2023-01-24 02:50:23.036818: step: 898/459, loss: 0.01455566007643938 2023-01-24 02:50:23.721899: step: 900/459, loss: 0.02185020223259926 2023-01-24 02:50:24.294252: step: 902/459, loss: 0.012591151520609856 2023-01-24 02:50:24.995258: step: 904/459, loss: 0.0789497122168541 2023-01-24 02:50:25.624182: step: 906/459, loss: 0.4094599783420563 2023-01-24 02:50:26.266526: step: 908/459, loss: 0.029437251389026642 2023-01-24 02:50:26.903959: step: 910/459, loss: 0.1498805433511734 2023-01-24 02:50:27.497732: step: 912/459, loss: 0.04888227581977844 2023-01-24 02:50:28.083324: step: 914/459, loss: 0.06072673201560974 2023-01-24 02:50:28.763175: step: 916/459, loss: 0.025668364018201828 2023-01-24 02:50:29.394024: step: 918/459, loss: 0.048669926822185516 2023-01-24 02:50:29.839710: step: 920/459, loss: 0.0004160644894000143 ================================================== Loss: 0.070 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3239747302794482, 'r': 0.31598294376401587, 'f1': 0.31992893633743774}, 'combined': 0.23573711098548042, 'epoch': 22} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34423853906952573, 'r': 0.30543346739259736, 'f1': 0.32367708490545}, 'combined': 0.20715333433948796, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3228793511321392, 'r': 0.3296187683284457, 'f1': 0.3262142552283397}, 'combined': 0.24036839858930292, 'epoch': 22} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34290433200456477, 'r': 0.2977033064221449, 'f1': 0.3187091358290602}, 'combined': 0.2039738469305985, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3482557438643373, 'r': 0.3350392070952922, 'f1': 0.3415196559752785}, 'combined': 0.25164606229757364, 'epoch': 22} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34632357731604846, 'r': 0.31888263812980416, 'f1': 0.33203711395592694}, 'combined': 0.23806434585519293, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30392156862745096, 'r': 0.2952380952380952, 'f1': 0.2995169082125604}, 'combined': 0.1996779388083736, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3055555555555556, 'r': 0.358695652173913, 'f1': 0.32999999999999996}, 'combined': 0.16499999999999998, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.13793103448275862, 'f1': 0.2}, 'combined': 0.13333333333333333, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 23 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 02:53:06.064452: step: 2/459, loss: 0.012074586935341358 2023-01-24 02:53:06.693022: step: 4/459, loss: 0.12527024745941162 2023-01-24 02:53:07.432835: step: 6/459, loss: 0.10792862623929977 2023-01-24 02:53:08.118225: step: 8/459, loss: 0.031548213213682175 2023-01-24 02:53:08.772431: step: 10/459, loss: 0.10656685382127762 2023-01-24 02:53:09.424380: step: 12/459, loss: 0.09433260560035706 2023-01-24 02:53:10.040165: step: 14/459, loss: 0.09119603037834167 2023-01-24 02:53:10.672748: step: 16/459, loss: 0.04885183274745941 2023-01-24 02:53:11.330633: step: 18/459, loss: 0.04173801839351654 2023-01-24 02:53:11.940462: step: 20/459, loss: 0.016439134255051613 2023-01-24 02:53:12.549520: step: 22/459, loss: 0.03786846995353699 2023-01-24 02:53:13.179532: step: 24/459, loss: 0.014143424108624458 2023-01-24 02:53:13.774081: step: 26/459, loss: 0.03709794208407402 2023-01-24 02:53:14.334899: step: 28/459, loss: 0.11596716940402985 2023-01-24 02:53:15.007339: step: 30/459, loss: 0.006335001904517412 2023-01-24 02:53:15.626975: step: 32/459, loss: 0.021276667714118958 2023-01-24 02:53:16.318700: step: 34/459, loss: 0.0623537041246891 2023-01-24 02:53:16.927685: step: 36/459, loss: 0.0312967486679554 2023-01-24 02:53:17.536817: step: 38/459, loss: 0.005486996844410896 2023-01-24 02:53:18.177901: step: 40/459, loss: 0.027745593339204788 2023-01-24 02:53:18.846234: step: 42/459, loss: 0.03242529556155205 2023-01-24 02:53:19.499798: step: 44/459, loss: 0.008989173918962479 2023-01-24 02:53:20.113411: step: 46/459, loss: 0.02535748854279518 2023-01-24 02:53:20.758501: step: 48/459, loss: 0.027887940406799316 2023-01-24 02:53:21.425949: step: 50/459, loss: 0.050659094005823135 2023-01-24 02:53:22.025340: step: 52/459, loss: 0.05957222729921341 2023-01-24 02:53:22.700171: step: 54/459, loss: 0.03669968247413635 2023-01-24 02:53:23.271995: step: 56/459, loss: 0.010669194161891937 2023-01-24 02:53:23.904314: step: 58/459, loss: 0.031396396458148956 2023-01-24 02:53:24.501594: step: 60/459, loss: 0.03349947929382324 2023-01-24 02:53:25.076423: step: 62/459, loss: 0.1258593201637268 2023-01-24 02:53:25.701264: step: 64/459, loss: 0.01724674552679062 2023-01-24 02:53:26.345909: step: 66/459, loss: 0.7532044649124146 2023-01-24 02:53:26.964076: step: 68/459, loss: 0.023669321089982986 2023-01-24 02:53:27.611786: step: 70/459, loss: 0.014826754108071327 2023-01-24 02:53:28.252009: step: 72/459, loss: 0.05434068664908409 2023-01-24 02:53:28.904404: step: 74/459, loss: 0.023616181686520576 2023-01-24 02:53:29.485659: step: 76/459, loss: 0.014671619981527328 2023-01-24 02:53:30.105603: step: 78/459, loss: 0.042053744196891785 2023-01-24 02:53:30.730466: step: 80/459, loss: 0.004624681547284126 2023-01-24 02:53:31.352418: step: 82/459, loss: 0.02580920234322548 2023-01-24 02:53:31.966738: step: 84/459, loss: 0.003537988755851984 2023-01-24 02:53:32.559232: step: 86/459, loss: 0.03318526968359947 2023-01-24 02:53:33.218200: step: 88/459, loss: 0.093271404504776 2023-01-24 02:53:33.880084: step: 90/459, loss: 0.05305955559015274 2023-01-24 02:53:34.457917: step: 92/459, loss: 0.0994146317243576 2023-01-24 02:53:34.988104: step: 94/459, loss: 0.030215494334697723 2023-01-24 02:53:35.582103: step: 96/459, loss: 0.007922031916677952 2023-01-24 02:53:36.154662: step: 98/459, loss: 0.005161124747246504 2023-01-24 02:53:36.752214: step: 100/459, loss: 0.06544876843690872 2023-01-24 02:53:37.394471: step: 102/459, loss: 0.03134745731949806 2023-01-24 02:53:37.992480: step: 104/459, loss: 0.04905514791607857 2023-01-24 02:53:38.598155: step: 106/459, loss: 0.0075807166285812855 2023-01-24 02:53:39.226351: step: 108/459, loss: 0.036528922617435455 2023-01-24 02:53:39.795061: step: 110/459, loss: 0.013472547754645348 2023-01-24 02:53:40.406160: step: 112/459, loss: 0.01719595678150654 2023-01-24 02:53:41.016697: step: 114/459, loss: 0.019739240407943726 2023-01-24 02:53:41.635919: step: 116/459, loss: 0.025053638964891434 2023-01-24 02:53:42.266514: step: 118/459, loss: 0.05044450983405113 2023-01-24 02:53:42.870393: step: 120/459, loss: 0.04058939591050148 2023-01-24 02:53:43.452348: step: 122/459, loss: 0.06432182341814041 2023-01-24 02:53:44.053061: step: 124/459, loss: 0.013774133287370205 2023-01-24 02:53:44.685707: step: 126/459, loss: 0.013910680077970028 2023-01-24 02:53:45.359459: step: 128/459, loss: 0.06578688323497772 2023-01-24 02:53:46.037820: step: 130/459, loss: 0.03682314231991768 2023-01-24 02:53:46.597861: step: 132/459, loss: 0.026071885600686073 2023-01-24 02:53:47.146436: step: 134/459, loss: 0.0005445979768410325 2023-01-24 02:53:47.713469: step: 136/459, loss: 0.0007204789435490966 2023-01-24 02:53:48.308941: step: 138/459, loss: 0.023479191586375237 2023-01-24 02:53:48.932253: step: 140/459, loss: 0.022132016718387604 2023-01-24 02:53:49.615988: step: 142/459, loss: 0.1713891327381134 2023-01-24 02:53:50.295851: step: 144/459, loss: 0.008058629930019379 2023-01-24 02:53:50.961336: step: 146/459, loss: 0.002855035476386547 2023-01-24 02:53:51.548222: step: 148/459, loss: 0.13736049830913544 2023-01-24 02:53:52.166241: step: 150/459, loss: 0.12215618044137955 2023-01-24 02:53:52.831536: step: 152/459, loss: 0.005800523329526186 2023-01-24 02:53:53.483638: step: 154/459, loss: 0.0822734534740448 2023-01-24 02:53:54.191291: step: 156/459, loss: 0.2724076807498932 2023-01-24 02:53:54.773426: step: 158/459, loss: 0.02209136076271534 2023-01-24 02:53:55.429259: step: 160/459, loss: 0.03235984593629837 2023-01-24 02:53:56.091892: step: 162/459, loss: 0.015409678220748901 2023-01-24 02:53:56.750321: step: 164/459, loss: 0.058305613696575165 2023-01-24 02:53:57.375684: step: 166/459, loss: 0.15123721957206726 2023-01-24 02:53:58.010808: step: 168/459, loss: 0.031101249158382416 2023-01-24 02:53:58.643102: step: 170/459, loss: 0.05006962642073631 2023-01-24 02:53:59.238251: step: 172/459, loss: 0.0171798262745142 2023-01-24 02:53:59.864974: step: 174/459, loss: 0.03039112687110901 2023-01-24 02:54:00.546937: step: 176/459, loss: 0.012670834548771381 2023-01-24 02:54:01.207924: step: 178/459, loss: 0.961486279964447 2023-01-24 02:54:01.893923: step: 180/459, loss: 0.012912362813949585 2023-01-24 02:54:02.488513: step: 182/459, loss: 0.004108508117496967 2023-01-24 02:54:03.184101: step: 184/459, loss: 0.09588613361120224 2023-01-24 02:54:03.813543: step: 186/459, loss: 0.14365553855895996 2023-01-24 02:54:04.451307: step: 188/459, loss: 0.256279855966568 2023-01-24 02:54:05.062830: step: 190/459, loss: 0.01402112003415823 2023-01-24 02:54:05.600891: step: 192/459, loss: 0.09226711839437485 2023-01-24 02:54:06.204174: step: 194/459, loss: 0.013786186464130878 2023-01-24 02:54:06.849859: step: 196/459, loss: 0.12736786901950836 2023-01-24 02:54:07.470334: step: 198/459, loss: 0.08436442911624908 2023-01-24 02:54:08.146423: step: 200/459, loss: 0.03243992477655411 2023-01-24 02:54:08.756699: step: 202/459, loss: 0.015408287756145 2023-01-24 02:54:09.381509: step: 204/459, loss: 0.004933853168040514 2023-01-24 02:54:09.988621: step: 206/459, loss: 0.04151984304189682 2023-01-24 02:54:10.630580: step: 208/459, loss: 0.050309062004089355 2023-01-24 02:54:11.229632: step: 210/459, loss: 0.2589106559753418 2023-01-24 02:54:11.827612: step: 212/459, loss: 0.11314365267753601 2023-01-24 02:54:12.451593: step: 214/459, loss: 0.0407874658703804 2023-01-24 02:54:13.035411: step: 216/459, loss: 0.06761202216148376 2023-01-24 02:54:13.683112: step: 218/459, loss: 0.015622560866177082 2023-01-24 02:54:14.340931: step: 220/459, loss: 0.21141932904720306 2023-01-24 02:54:14.972160: step: 222/459, loss: 0.1268448382616043 2023-01-24 02:54:15.572748: step: 224/459, loss: 0.03524945676326752 2023-01-24 02:54:16.145529: step: 226/459, loss: 0.025525948032736778 2023-01-24 02:54:16.744858: step: 228/459, loss: 0.007467614021152258 2023-01-24 02:54:17.357796: step: 230/459, loss: 0.039709072560071945 2023-01-24 02:54:17.965354: step: 232/459, loss: 0.049414779990911484 2023-01-24 02:54:18.594477: step: 234/459, loss: 0.02070026844739914 2023-01-24 02:54:19.282626: step: 236/459, loss: 0.11137732118368149 2023-01-24 02:54:19.949874: step: 238/459, loss: 0.004869761876761913 2023-01-24 02:54:20.563145: step: 240/459, loss: 0.03622918576002121 2023-01-24 02:54:21.182741: step: 242/459, loss: 0.016933623701334 2023-01-24 02:54:21.793605: step: 244/459, loss: 0.01299294549971819 2023-01-24 02:54:22.467039: step: 246/459, loss: 0.0954475849866867 2023-01-24 02:54:23.063008: step: 248/459, loss: 0.012846496887505054 2023-01-24 02:54:23.694671: step: 250/459, loss: 0.021274784579873085 2023-01-24 02:54:24.304861: step: 252/459, loss: 0.04940425604581833 2023-01-24 02:54:24.834728: step: 254/459, loss: 0.04921872541308403 2023-01-24 02:54:25.470949: step: 256/459, loss: 0.07333771884441376 2023-01-24 02:54:26.097332: step: 258/459, loss: 0.028693148866295815 2023-01-24 02:54:26.722730: step: 260/459, loss: 0.05073268339037895 2023-01-24 02:54:27.377731: step: 262/459, loss: 0.05296669527888298 2023-01-24 02:54:28.006829: step: 264/459, loss: 0.00594786973670125 2023-01-24 02:54:28.686329: step: 266/459, loss: 0.026000771671533585 2023-01-24 02:54:29.293382: step: 268/459, loss: 0.18541376292705536 2023-01-24 02:54:29.877115: step: 270/459, loss: 0.08493668586015701 2023-01-24 02:54:30.516825: step: 272/459, loss: 0.013509964570403099 2023-01-24 02:54:31.150349: step: 274/459, loss: 0.1398460566997528 2023-01-24 02:54:31.762258: step: 276/459, loss: 0.06833967566490173 2023-01-24 02:54:32.402034: step: 278/459, loss: 0.022454705089330673 2023-01-24 02:54:33.055120: step: 280/459, loss: 0.0022395530249923468 2023-01-24 02:54:33.666039: step: 282/459, loss: 0.021008187904953957 2023-01-24 02:54:34.285017: step: 284/459, loss: 0.001094592153094709 2023-01-24 02:54:34.991903: step: 286/459, loss: 0.07170505076646805 2023-01-24 02:54:35.624415: step: 288/459, loss: 0.1987495720386505 2023-01-24 02:54:36.279351: step: 290/459, loss: 0.027168847620487213 2023-01-24 02:54:36.948954: step: 292/459, loss: 0.023003078997135162 2023-01-24 02:54:37.567833: step: 294/459, loss: 0.06690734624862671 2023-01-24 02:54:38.264799: step: 296/459, loss: 0.014312511309981346 2023-01-24 02:54:38.831637: step: 298/459, loss: 0.009971722960472107 2023-01-24 02:54:39.469330: step: 300/459, loss: 0.004523103125393391 2023-01-24 02:54:40.082312: step: 302/459, loss: 0.12277115136384964 2023-01-24 02:54:40.704801: step: 304/459, loss: 0.04621993005275726 2023-01-24 02:54:41.343412: step: 306/459, loss: 0.05466769263148308 2023-01-24 02:54:41.986714: step: 308/459, loss: 0.007382086478173733 2023-01-24 02:54:42.640691: step: 310/459, loss: 0.01784287765622139 2023-01-24 02:54:43.205047: step: 312/459, loss: 0.015004025772213936 2023-01-24 02:54:43.824690: step: 314/459, loss: 0.01828419230878353 2023-01-24 02:54:44.426703: step: 316/459, loss: 0.0366315059363842 2023-01-24 02:54:45.088191: step: 318/459, loss: 0.04657652601599693 2023-01-24 02:54:45.761155: step: 320/459, loss: 0.04033559560775757 2023-01-24 02:54:46.358710: step: 322/459, loss: 0.02092553861439228 2023-01-24 02:54:47.153671: step: 324/459, loss: 0.028353367000818253 2023-01-24 02:54:47.715916: step: 326/459, loss: 0.022267447784543037 2023-01-24 02:54:48.345871: step: 328/459, loss: 0.031107502058148384 2023-01-24 02:54:49.008360: step: 330/459, loss: 0.04867973551154137 2023-01-24 02:54:49.586562: step: 332/459, loss: 0.02368372678756714 2023-01-24 02:54:50.222437: step: 334/459, loss: 0.5820201635360718 2023-01-24 02:54:50.788073: step: 336/459, loss: 0.003033228451386094 2023-01-24 02:54:51.378325: step: 338/459, loss: 0.016476523131132126 2023-01-24 02:54:51.990621: step: 340/459, loss: 0.061376042664051056 2023-01-24 02:54:52.589912: step: 342/459, loss: 0.024811886250972748 2023-01-24 02:54:53.218449: step: 344/459, loss: 0.04646265506744385 2023-01-24 02:54:53.830996: step: 346/459, loss: 0.008101758547127247 2023-01-24 02:54:54.460126: step: 348/459, loss: 0.0371074452996254 2023-01-24 02:54:55.035778: step: 350/459, loss: 0.06286902725696564 2023-01-24 02:54:55.706317: step: 352/459, loss: 0.009159360080957413 2023-01-24 02:54:56.317273: step: 354/459, loss: 0.02589336223900318 2023-01-24 02:54:56.965134: step: 356/459, loss: 0.08716321736574173 2023-01-24 02:54:57.654358: step: 358/459, loss: 0.020923301577568054 2023-01-24 02:54:58.212193: step: 360/459, loss: 0.0033969057258218527 2023-01-24 02:54:58.825632: step: 362/459, loss: 0.012562991119921207 2023-01-24 02:54:59.476427: step: 364/459, loss: 0.10364270210266113 2023-01-24 02:55:00.092427: step: 366/459, loss: 0.030485445633530617 2023-01-24 02:55:00.718337: step: 368/459, loss: 0.005521456245332956 2023-01-24 02:55:01.299016: step: 370/459, loss: 0.04610094055533409 2023-01-24 02:55:01.930536: step: 372/459, loss: 0.0933816060423851 2023-01-24 02:55:02.582019: step: 374/459, loss: 0.012479089200496674 2023-01-24 02:55:03.187729: step: 376/459, loss: 0.03908274695277214 2023-01-24 02:55:03.913294: step: 378/459, loss: 0.044082943350076675 2023-01-24 02:55:04.516544: step: 380/459, loss: 0.005740134045481682 2023-01-24 02:55:05.139603: step: 382/459, loss: 0.03294391185045242 2023-01-24 02:55:05.778980: step: 384/459, loss: 0.017264870926737785 2023-01-24 02:55:06.414403: step: 386/459, loss: 0.021990764886140823 2023-01-24 02:55:07.082390: step: 388/459, loss: 0.05364929139614105 2023-01-24 02:55:07.751931: step: 390/459, loss: 0.12989453971385956 2023-01-24 02:55:08.405813: step: 392/459, loss: 0.03485443443059921 2023-01-24 02:55:08.997898: step: 394/459, loss: 0.29024335741996765 2023-01-24 02:55:09.622519: step: 396/459, loss: 0.008667674846947193 2023-01-24 02:55:10.229993: step: 398/459, loss: 0.027496563270688057 2023-01-24 02:55:10.912389: step: 400/459, loss: 0.08607247471809387 2023-01-24 02:55:11.545685: step: 402/459, loss: 0.0708710327744484 2023-01-24 02:55:12.203305: step: 404/459, loss: 0.03827223554253578 2023-01-24 02:55:12.805484: step: 406/459, loss: 0.025970375165343285 2023-01-24 02:55:13.433585: step: 408/459, loss: 0.022959895431995392 2023-01-24 02:55:14.057240: step: 410/459, loss: 0.020584452897310257 2023-01-24 02:55:14.680912: step: 412/459, loss: 0.019253985956311226 2023-01-24 02:55:15.263344: step: 414/459, loss: 0.0027357926592230797 2023-01-24 02:55:15.868610: step: 416/459, loss: 0.005222187843173742 2023-01-24 02:55:16.487881: step: 418/459, loss: 0.03858353942632675 2023-01-24 02:55:17.172122: step: 420/459, loss: 0.06537490338087082 2023-01-24 02:55:17.797641: step: 422/459, loss: 0.0046831597574055195 2023-01-24 02:55:18.369686: step: 424/459, loss: 0.0012490248773247004 2023-01-24 02:55:19.018365: step: 426/459, loss: 0.0668071061372757 2023-01-24 02:55:19.635351: step: 428/459, loss: 0.26235660910606384 2023-01-24 02:55:20.284183: step: 430/459, loss: 0.11779731512069702 2023-01-24 02:55:20.941283: step: 432/459, loss: 0.08203694969415665 2023-01-24 02:55:21.545217: step: 434/459, loss: 0.017113927751779556 2023-01-24 02:55:22.191322: step: 436/459, loss: 0.0030219366308301687 2023-01-24 02:55:22.843147: step: 438/459, loss: 0.03015967272222042 2023-01-24 02:55:23.482116: step: 440/459, loss: 0.43506497144699097 2023-01-24 02:55:24.109998: step: 442/459, loss: 0.08329861611127853 2023-01-24 02:55:24.789072: step: 444/459, loss: 0.014921310357749462 2023-01-24 02:55:25.410778: step: 446/459, loss: 0.0015847806353121996 2023-01-24 02:55:26.003208: step: 448/459, loss: 0.013554947450757027 2023-01-24 02:55:26.678525: step: 450/459, loss: 0.10819762945175171 2023-01-24 02:55:27.235107: step: 452/459, loss: 0.3437517285346985 2023-01-24 02:55:27.867108: step: 454/459, loss: 0.02282201312482357 2023-01-24 02:55:28.480222: step: 456/459, loss: 0.25195619463920593 2023-01-24 02:55:29.075458: step: 458/459, loss: 0.03672183305025101 2023-01-24 02:55:29.670613: step: 460/459, loss: 0.010873949155211449 2023-01-24 02:55:30.246777: step: 462/459, loss: 0.011975242756307125 2023-01-24 02:55:30.874004: step: 464/459, loss: 0.0210949145257473 2023-01-24 02:55:31.463887: step: 466/459, loss: 0.42017316818237305 2023-01-24 02:55:32.070342: step: 468/459, loss: 0.02011323906481266 2023-01-24 02:55:32.711471: step: 470/459, loss: 0.004386780317872763 2023-01-24 02:55:33.301117: step: 472/459, loss: 0.07060785591602325 2023-01-24 02:55:33.913843: step: 474/459, loss: 0.032789744436740875 2023-01-24 02:55:34.510387: step: 476/459, loss: 0.01926703192293644 2023-01-24 02:55:35.083032: step: 478/459, loss: 0.044668760150671005 2023-01-24 02:55:35.701790: step: 480/459, loss: 0.034898899495601654 2023-01-24 02:55:36.283363: step: 482/459, loss: 0.06781114637851715 2023-01-24 02:55:36.879571: step: 484/459, loss: 0.18661238253116608 2023-01-24 02:55:37.488302: step: 486/459, loss: 0.011112608015537262 2023-01-24 02:55:38.066224: step: 488/459, loss: 0.7615871429443359 2023-01-24 02:55:38.696015: step: 490/459, loss: 0.04537693411111832 2023-01-24 02:55:39.343714: step: 492/459, loss: 0.025857284665107727 2023-01-24 02:55:39.994100: step: 494/459, loss: 0.025303225964307785 2023-01-24 02:55:40.551337: step: 496/459, loss: 0.00043304942664690316 2023-01-24 02:55:41.235888: step: 498/459, loss: 0.002851334400475025 2023-01-24 02:55:41.851293: step: 500/459, loss: 0.004451768007129431 2023-01-24 02:55:42.443241: step: 502/459, loss: 0.003077533794566989 2023-01-24 02:55:43.054976: step: 504/459, loss: 0.010262541472911835 2023-01-24 02:55:43.621036: step: 506/459, loss: 0.027912352234125137 2023-01-24 02:55:44.272798: step: 508/459, loss: 0.037648871541023254 2023-01-24 02:55:44.943551: step: 510/459, loss: 0.08410654217004776 2023-01-24 02:55:45.561729: step: 512/459, loss: 0.052754808217287064 2023-01-24 02:55:46.195897: step: 514/459, loss: 0.04039740934967995 2023-01-24 02:55:46.771166: step: 516/459, loss: 0.07158946245908737 2023-01-24 02:55:47.428908: step: 518/459, loss: 0.05878650024533272 2023-01-24 02:55:48.064177: step: 520/459, loss: 0.0008592797094024718 2023-01-24 02:55:48.638534: step: 522/459, loss: 0.004252637270838022 2023-01-24 02:55:49.294161: step: 524/459, loss: 0.01573898456990719 2023-01-24 02:55:49.946019: step: 526/459, loss: 0.09804709255695343 2023-01-24 02:55:50.534499: step: 528/459, loss: 0.3411743640899658 2023-01-24 02:55:51.096253: step: 530/459, loss: 0.07862994074821472 2023-01-24 02:55:51.729296: step: 532/459, loss: 0.03271352872252464 2023-01-24 02:55:52.325563: step: 534/459, loss: 0.004225669428706169 2023-01-24 02:55:52.952057: step: 536/459, loss: 0.025898341089487076 2023-01-24 02:55:53.562164: step: 538/459, loss: 0.01850232295691967 2023-01-24 02:55:54.197052: step: 540/459, loss: 0.050849560648202896 2023-01-24 02:55:54.791351: step: 542/459, loss: 0.032755471765995026 2023-01-24 02:55:55.460642: step: 544/459, loss: 0.4699884057044983 2023-01-24 02:55:56.059638: step: 546/459, loss: 0.03137214109301567 2023-01-24 02:55:56.632556: step: 548/459, loss: 0.11220861971378326 2023-01-24 02:55:57.266865: step: 550/459, loss: 0.04130680486559868 2023-01-24 02:55:57.898944: step: 552/459, loss: 0.17167915403842926 2023-01-24 02:55:58.548439: step: 554/459, loss: 0.07850068807601929 2023-01-24 02:55:59.154709: step: 556/459, loss: 0.045494552701711655 2023-01-24 02:55:59.721745: step: 558/459, loss: 0.022914301604032516 2023-01-24 02:56:00.393391: step: 560/459, loss: 0.013470727019011974 2023-01-24 02:56:00.973195: step: 562/459, loss: 0.16416232287883759 2023-01-24 02:56:01.560205: step: 564/459, loss: 0.011879832483828068 2023-01-24 02:56:02.268929: step: 566/459, loss: 0.013545677997171879 2023-01-24 02:56:02.942563: step: 568/459, loss: 0.04342089965939522 2023-01-24 02:56:03.592112: step: 570/459, loss: 0.268902450799942 2023-01-24 02:56:04.239409: step: 572/459, loss: 0.6228017210960388 2023-01-24 02:56:04.886018: step: 574/459, loss: 0.015953408554196358 2023-01-24 02:56:05.518297: step: 576/459, loss: 0.04011896252632141 2023-01-24 02:56:06.110348: step: 578/459, loss: 0.003560800338163972 2023-01-24 02:56:06.728268: step: 580/459, loss: 0.06729014962911606 2023-01-24 02:56:07.314325: step: 582/459, loss: 0.013125933706760406 2023-01-24 02:56:07.925860: step: 584/459, loss: 0.02127283439040184 2023-01-24 02:56:08.512379: step: 586/459, loss: 0.10287982225418091 2023-01-24 02:56:09.086266: step: 588/459, loss: 0.5387134552001953 2023-01-24 02:56:09.660269: step: 590/459, loss: 0.0029724976047873497 2023-01-24 02:56:10.259255: step: 592/459, loss: 0.003525578184053302 2023-01-24 02:56:10.979713: step: 594/459, loss: 0.3829483091831207 2023-01-24 02:56:11.611767: step: 596/459, loss: 0.012423534877598286 2023-01-24 02:56:12.284087: step: 598/459, loss: 0.02279650606215 2023-01-24 02:56:12.855754: step: 600/459, loss: 0.014694144017994404 2023-01-24 02:56:13.364903: step: 602/459, loss: 0.007741284091025591 2023-01-24 02:56:13.949197: step: 604/459, loss: 0.016666484996676445 2023-01-24 02:56:14.629500: step: 606/459, loss: 0.06060680374503136 2023-01-24 02:56:15.254197: step: 608/459, loss: 0.04132172465324402 2023-01-24 02:56:15.922302: step: 610/459, loss: 0.35389482975006104 2023-01-24 02:56:16.581470: step: 612/459, loss: 0.026649100705981255 2023-01-24 02:56:17.220935: step: 614/459, loss: 0.1740981638431549 2023-01-24 02:56:17.838906: step: 616/459, loss: 0.03486368805170059 2023-01-24 02:56:18.439223: step: 618/459, loss: 0.0733012780547142 2023-01-24 02:56:19.066081: step: 620/459, loss: 0.004530432168394327 2023-01-24 02:56:19.770448: step: 622/459, loss: 0.04441601783037186 2023-01-24 02:56:20.309544: step: 624/459, loss: 0.05576254054903984 2023-01-24 02:56:20.872365: step: 626/459, loss: 0.0018835399532690644 2023-01-24 02:56:21.466212: step: 628/459, loss: 0.02532954327762127 2023-01-24 02:56:22.027453: step: 630/459, loss: 0.05058944225311279 2023-01-24 02:56:22.681249: step: 632/459, loss: 0.2719796597957611 2023-01-24 02:56:23.308940: step: 634/459, loss: 0.058200903236866 2023-01-24 02:56:23.960763: step: 636/459, loss: 0.007548452354967594 2023-01-24 02:56:24.714889: step: 638/459, loss: 0.026713047176599503 2023-01-24 02:56:25.323814: step: 640/459, loss: 0.021161947399377823 2023-01-24 02:56:25.967007: step: 642/459, loss: 0.016956297680735588 2023-01-24 02:56:26.610527: step: 644/459, loss: 0.035361889749765396 2023-01-24 02:56:27.198532: step: 646/459, loss: 0.0746588334441185 2023-01-24 02:56:27.841346: step: 648/459, loss: 0.0743803083896637 2023-01-24 02:56:28.473002: step: 650/459, loss: 0.004791458137333393 2023-01-24 02:56:29.186205: step: 652/459, loss: 0.0634552463889122 2023-01-24 02:56:29.770462: step: 654/459, loss: 0.07103849202394485 2023-01-24 02:56:30.443890: step: 656/459, loss: 0.03539882227778435 2023-01-24 02:56:31.032802: step: 658/459, loss: 0.03657232224941254 2023-01-24 02:56:31.615834: step: 660/459, loss: 0.016462450847029686 2023-01-24 02:56:32.246132: step: 662/459, loss: 0.012612278573215008 2023-01-24 02:56:32.958955: step: 664/459, loss: 0.012185904197394848 2023-01-24 02:56:33.594460: step: 666/459, loss: 0.01338304951786995 2023-01-24 02:56:34.198811: step: 668/459, loss: 0.029532121494412422 2023-01-24 02:56:34.881587: step: 670/459, loss: 0.12257605791091919 2023-01-24 02:56:35.536463: step: 672/459, loss: 0.03137889504432678 2023-01-24 02:56:36.124134: step: 674/459, loss: 0.170928955078125 2023-01-24 02:56:36.685320: step: 676/459, loss: 0.06227419897913933 2023-01-24 02:56:37.311310: step: 678/459, loss: 0.05923806130886078 2023-01-24 02:56:37.908261: step: 680/459, loss: 0.03249737620353699 2023-01-24 02:56:38.515493: step: 682/459, loss: 0.0336279571056366 2023-01-24 02:56:39.192668: step: 684/459, loss: 0.00800677016377449 2023-01-24 02:56:39.852299: step: 686/459, loss: 0.03444009646773338 2023-01-24 02:56:40.450047: step: 688/459, loss: 0.10043961554765701 2023-01-24 02:56:41.069959: step: 690/459, loss: 0.015631400048732758 2023-01-24 02:56:41.680120: step: 692/459, loss: 0.07286001741886139 2023-01-24 02:56:42.252504: step: 694/459, loss: 0.007020760793238878 2023-01-24 02:56:42.855467: step: 696/459, loss: 0.023825274780392647 2023-01-24 02:56:43.465095: step: 698/459, loss: 0.03523171320557594 2023-01-24 02:56:44.093898: step: 700/459, loss: 0.02164696343243122 2023-01-24 02:56:44.718296: step: 702/459, loss: 0.0417177677154541 2023-01-24 02:56:45.433412: step: 704/459, loss: 0.001024835160933435 2023-01-24 02:56:46.162399: step: 706/459, loss: 0.011359288357198238 2023-01-24 02:56:46.791756: step: 708/459, loss: 0.006415989715605974 2023-01-24 02:56:47.473303: step: 710/459, loss: 0.07950087636709213 2023-01-24 02:56:48.147289: step: 712/459, loss: 0.049247585237026215 2023-01-24 02:56:48.761272: step: 714/459, loss: 0.05622231960296631 2023-01-24 02:56:49.449026: step: 716/459, loss: 0.020223552361130714 2023-01-24 02:56:50.109415: step: 718/459, loss: 0.015327113680541515 2023-01-24 02:56:50.698759: step: 720/459, loss: 0.0389593169093132 2023-01-24 02:56:51.288517: step: 722/459, loss: 0.031626176089048386 2023-01-24 02:56:51.889595: step: 724/459, loss: 0.04108447954058647 2023-01-24 02:56:52.521800: step: 726/459, loss: 0.06273843348026276 2023-01-24 02:56:53.138074: step: 728/459, loss: 0.04862210899591446 2023-01-24 02:56:53.751639: step: 730/459, loss: 0.03829612210392952 2023-01-24 02:56:54.344520: step: 732/459, loss: 0.032687488943338394 2023-01-24 02:56:54.935914: step: 734/459, loss: 0.01115456037223339 2023-01-24 02:56:55.535914: step: 736/459, loss: 0.007615417707711458 2023-01-24 02:56:56.203777: step: 738/459, loss: 0.15536674857139587 2023-01-24 02:56:56.811242: step: 740/459, loss: 0.5003904104232788 2023-01-24 02:56:57.455893: step: 742/459, loss: 0.07422546297311783 2023-01-24 02:56:58.117101: step: 744/459, loss: 0.025319745764136314 2023-01-24 02:56:58.738439: step: 746/459, loss: 0.3203408122062683 2023-01-24 02:56:59.358253: step: 748/459, loss: 0.018643464893102646 2023-01-24 02:56:59.913684: step: 750/459, loss: 0.03637256100773811 2023-01-24 02:57:00.562293: step: 752/459, loss: 0.008931596763432026 2023-01-24 02:57:01.130472: step: 754/459, loss: 0.027377817779779434 2023-01-24 02:57:01.790542: step: 756/459, loss: 0.010506464168429375 2023-01-24 02:57:02.457522: step: 758/459, loss: 0.030081577599048615 2023-01-24 02:57:03.101144: step: 760/459, loss: 0.03769123554229736 2023-01-24 02:57:03.699929: step: 762/459, loss: 0.08509726822376251 2023-01-24 02:57:04.409260: step: 764/459, loss: 0.038878459483385086 2023-01-24 02:57:05.131696: step: 766/459, loss: 0.05772551894187927 2023-01-24 02:57:05.914994: step: 768/459, loss: 0.019335538148880005 2023-01-24 02:57:06.538755: step: 770/459, loss: 0.00871077086776495 2023-01-24 02:57:07.158377: step: 772/459, loss: 0.032197505235672 2023-01-24 02:57:07.760208: step: 774/459, loss: 0.07908990234136581 2023-01-24 02:57:08.408478: step: 776/459, loss: 0.038538530468940735 2023-01-24 02:57:09.066439: step: 778/459, loss: 0.02229977585375309 2023-01-24 02:57:09.642988: step: 780/459, loss: 0.04909451678395271 2023-01-24 02:57:10.315659: step: 782/459, loss: 0.03666704520583153 2023-01-24 02:57:10.935165: step: 784/459, loss: 0.03437761589884758 2023-01-24 02:57:11.580511: step: 786/459, loss: 0.020146410912275314 2023-01-24 02:57:12.308735: step: 788/459, loss: 0.030045585706830025 2023-01-24 02:57:12.876424: step: 790/459, loss: 0.06492883712053299 2023-01-24 02:57:13.450688: step: 792/459, loss: 0.041105180978775024 2023-01-24 02:57:13.996451: step: 794/459, loss: 0.009594758041203022 2023-01-24 02:57:14.599621: step: 796/459, loss: 0.08837708085775375 2023-01-24 02:57:15.192739: step: 798/459, loss: 0.017274055629968643 2023-01-24 02:57:15.834319: step: 800/459, loss: 0.035693664103746414 2023-01-24 02:57:16.460698: step: 802/459, loss: 0.06795796751976013 2023-01-24 02:57:17.045668: step: 804/459, loss: 0.009912916459143162 2023-01-24 02:57:17.635846: step: 806/459, loss: 0.138859361410141 2023-01-24 02:57:18.282561: step: 808/459, loss: 0.027491480112075806 2023-01-24 02:57:18.945276: step: 810/459, loss: 0.7731581926345825 2023-01-24 02:57:19.639344: step: 812/459, loss: 0.26543521881103516 2023-01-24 02:57:20.292379: step: 814/459, loss: 0.048611219972372055 2023-01-24 02:57:20.871484: step: 816/459, loss: 0.14329668879508972 2023-01-24 02:57:21.493755: step: 818/459, loss: 0.05756962671875954 2023-01-24 02:57:22.073526: step: 820/459, loss: 0.012953819707036018 2023-01-24 02:57:22.679987: step: 822/459, loss: 0.05047354847192764 2023-01-24 02:57:23.371326: step: 824/459, loss: 0.02906177192926407 2023-01-24 02:57:23.967551: step: 826/459, loss: 0.03592163324356079 2023-01-24 02:57:24.602219: step: 828/459, loss: 0.006285969167947769 2023-01-24 02:57:25.226994: step: 830/459, loss: 0.012627794407308102 2023-01-24 02:57:25.918283: step: 832/459, loss: 0.020649999380111694 2023-01-24 02:57:26.560556: step: 834/459, loss: 0.0441635437309742 2023-01-24 02:57:27.151731: step: 836/459, loss: 0.034109000116586685 2023-01-24 02:57:27.767433: step: 838/459, loss: 0.11418268084526062 2023-01-24 02:57:28.457274: step: 840/459, loss: 0.05284840241074562 2023-01-24 02:57:28.983319: step: 842/459, loss: 0.06453627347946167 2023-01-24 02:57:29.637148: step: 844/459, loss: 0.15319018065929413 2023-01-24 02:57:30.210395: step: 846/459, loss: 0.04442985728383064 2023-01-24 02:57:30.821398: step: 848/459, loss: 0.029934527352452278 2023-01-24 02:57:31.412291: step: 850/459, loss: 0.04699254035949707 2023-01-24 02:57:31.996658: step: 852/459, loss: 0.024414844810962677 2023-01-24 02:57:32.581038: step: 854/459, loss: 0.03977462276816368 2023-01-24 02:57:33.222803: step: 856/459, loss: 0.10212451219558716 2023-01-24 02:57:33.866287: step: 858/459, loss: 0.03540663421154022 2023-01-24 02:57:34.502105: step: 860/459, loss: 0.07690282166004181 2023-01-24 02:57:35.119118: step: 862/459, loss: 0.00843555387109518 2023-01-24 02:57:35.715546: step: 864/459, loss: 0.01637382060289383 2023-01-24 02:57:36.392646: step: 866/459, loss: 0.018209349364042282 2023-01-24 02:57:36.968452: step: 868/459, loss: 0.008101092651486397 2023-01-24 02:57:37.563816: step: 870/459, loss: 0.09524846076965332 2023-01-24 02:57:38.184866: step: 872/459, loss: 0.03980395942926407 2023-01-24 02:57:38.786168: step: 874/459, loss: 0.11659757047891617 2023-01-24 02:57:39.377233: step: 876/459, loss: 0.010056075640022755 2023-01-24 02:57:39.962306: step: 878/459, loss: 0.008429917506873608 2023-01-24 02:57:40.522300: step: 880/459, loss: 0.04706180468201637 2023-01-24 02:57:41.218249: step: 882/459, loss: 0.37652379274368286 2023-01-24 02:57:41.836238: step: 884/459, loss: 0.04835044592618942 2023-01-24 02:57:42.419471: step: 886/459, loss: 0.0699397400021553 2023-01-24 02:57:43.000416: step: 888/459, loss: 0.018757294863462448 2023-01-24 02:57:43.598806: step: 890/459, loss: 0.2017417848110199 2023-01-24 02:57:44.153529: step: 892/459, loss: 0.2599036991596222 2023-01-24 02:57:44.720606: step: 894/459, loss: 0.019581949338316917 2023-01-24 02:57:45.364443: step: 896/459, loss: 0.09077009558677673 2023-01-24 02:57:46.084688: step: 898/459, loss: 0.013456791639328003 2023-01-24 02:57:46.665771: step: 900/459, loss: 0.03078809753060341 2023-01-24 02:57:47.280348: step: 902/459, loss: 0.01516728289425373 2023-01-24 02:57:47.901905: step: 904/459, loss: 0.07539083808660507 2023-01-24 02:57:48.509404: step: 906/459, loss: 0.02408621832728386 2023-01-24 02:57:49.089718: step: 908/459, loss: 0.1374514251947403 2023-01-24 02:57:49.732341: step: 910/459, loss: 0.0750197172164917 2023-01-24 02:57:50.307272: step: 912/459, loss: 0.0006892503006383777 2023-01-24 02:57:50.896019: step: 914/459, loss: 0.017630675807595253 2023-01-24 02:57:51.490747: step: 916/459, loss: 0.01869465596973896 2023-01-24 02:57:52.163040: step: 918/459, loss: 0.01933048479259014 2023-01-24 02:57:52.641241: step: 920/459, loss: 1.9868213740892315e-08 ================================================== Loss: 0.065 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33212094802607744, 'r': 0.31636568483698463, 'f1': 0.3240519259651912}, 'combined': 0.23877510334277247, 'epoch': 23} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3436578167744675, 'r': 0.3021442022140641, 'f1': 0.32156671497117884}, 'combined': 0.20580269758155442, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3233229963492232, 'r': 0.3159608028839657, 'f1': 0.3195995069478886}, 'combined': 0.23549437354054947, 'epoch': 23} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3432619025859142, 'r': 0.2940017930413416, 'f1': 0.3167279590396449}, 'combined': 0.2027058937853727, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3554160495149461, 'r': 0.32776508551093697, 'f1': 0.34103099716537766}, 'combined': 0.2512859979113309, 'epoch': 23} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.354190068597228, 'r': 0.3174162363384994, 'f1': 0.3347963760803769}, 'combined': 0.240042684736874, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3517156862745098, 'r': 0.3416666666666666, 'f1': 0.34661835748792263}, 'combined': 0.23107890499194841, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32954545454545453, 'r': 0.31521739130434784, 'f1': 0.3222222222222222}, 'combined': 0.1611111111111111, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.06896551724137931, 'f1': 0.1081081081081081}, 'combined': 0.07207207207207206, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 24 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:00:29.282417: step: 2/459, loss: 0.6769057512283325 2023-01-24 03:00:29.933957: step: 4/459, loss: 0.03036675974726677 2023-01-24 03:00:30.578606: step: 6/459, loss: 0.0696287602186203 2023-01-24 03:00:31.202642: step: 8/459, loss: 0.07563994824886322 2023-01-24 03:00:31.800545: step: 10/459, loss: 0.0094511229544878 2023-01-24 03:00:32.476407: step: 12/459, loss: 0.007617923431098461 2023-01-24 03:00:33.117634: step: 14/459, loss: 0.02814353071153164 2023-01-24 03:00:33.755248: step: 16/459, loss: 0.010358802042901516 2023-01-24 03:00:34.469260: step: 18/459, loss: 0.07613883912563324 2023-01-24 03:00:35.143287: step: 20/459, loss: 0.10917557775974274 2023-01-24 03:00:35.806131: step: 22/459, loss: 0.052835218608379364 2023-01-24 03:00:36.365096: step: 24/459, loss: 0.30863556265830994 2023-01-24 03:00:37.034368: step: 26/459, loss: 0.25294438004493713 2023-01-24 03:00:37.710701: step: 28/459, loss: 0.12626060843467712 2023-01-24 03:00:38.342505: step: 30/459, loss: 0.028965607285499573 2023-01-24 03:00:38.992414: step: 32/459, loss: 0.05894503742456436 2023-01-24 03:00:39.683799: step: 34/459, loss: 0.035458069294691086 2023-01-24 03:00:40.299720: step: 36/459, loss: 0.1723954677581787 2023-01-24 03:00:40.910981: step: 38/459, loss: 0.03986980766057968 2023-01-24 03:00:41.563546: step: 40/459, loss: 0.07136423140764236 2023-01-24 03:00:42.176893: step: 42/459, loss: 0.4338868260383606 2023-01-24 03:00:42.800622: step: 44/459, loss: 0.13203942775726318 2023-01-24 03:00:43.545929: step: 46/459, loss: 0.03988394886255264 2023-01-24 03:00:44.264479: step: 48/459, loss: 0.011471112258732319 2023-01-24 03:00:44.859920: step: 50/459, loss: 0.033624663949012756 2023-01-24 03:00:45.520931: step: 52/459, loss: 0.09154405444860458 2023-01-24 03:00:46.137606: step: 54/459, loss: 0.005569766275584698 2023-01-24 03:00:46.768650: step: 56/459, loss: 9.389004707336426 2023-01-24 03:00:47.367103: step: 58/459, loss: 0.06785601377487183 2023-01-24 03:00:47.973699: step: 60/459, loss: 0.014365948736667633 2023-01-24 03:00:48.575753: step: 62/459, loss: 0.015537554398179054 2023-01-24 03:00:49.154576: step: 64/459, loss: 0.002190256491303444 2023-01-24 03:00:49.717271: step: 66/459, loss: 0.024644721299409866 2023-01-24 03:00:50.386474: step: 68/459, loss: 0.02446868270635605 2023-01-24 03:00:50.977099: step: 70/459, loss: 0.030257154256105423 2023-01-24 03:00:51.609959: step: 72/459, loss: 0.02746262028813362 2023-01-24 03:00:52.228001: step: 74/459, loss: 0.027597524225711823 2023-01-24 03:00:52.854508: step: 76/459, loss: 0.1026972234249115 2023-01-24 03:00:53.527592: step: 78/459, loss: 1.431778907775879 2023-01-24 03:00:54.170652: step: 80/459, loss: 0.03142143040895462 2023-01-24 03:00:54.775206: step: 82/459, loss: 0.02791326493024826 2023-01-24 03:00:55.401710: step: 84/459, loss: 0.000258587155258283 2023-01-24 03:00:55.980917: step: 86/459, loss: 0.0018071052618324757 2023-01-24 03:00:56.638927: step: 88/459, loss: 0.25081852078437805 2023-01-24 03:00:57.167302: step: 90/459, loss: 0.00912094209343195 2023-01-24 03:00:57.812715: step: 92/459, loss: 0.06909488141536713 2023-01-24 03:00:58.433479: step: 94/459, loss: 0.010707722045481205 2023-01-24 03:00:59.052468: step: 96/459, loss: 0.048255931586027145 2023-01-24 03:00:59.650236: step: 98/459, loss: 0.004317821469157934 2023-01-24 03:01:00.232777: step: 100/459, loss: 0.005399363115429878 2023-01-24 03:01:00.799671: step: 102/459, loss: 0.004374892916530371 2023-01-24 03:01:01.374856: step: 104/459, loss: 0.025964191183447838 2023-01-24 03:01:02.049145: step: 106/459, loss: 0.007612037938088179 2023-01-24 03:01:02.674779: step: 108/459, loss: 0.03101026825606823 2023-01-24 03:01:03.309439: step: 110/459, loss: 0.004779907409101725 2023-01-24 03:01:03.888937: step: 112/459, loss: 0.03351442515850067 2023-01-24 03:01:04.458511: step: 114/459, loss: 0.05321822315454483 2023-01-24 03:01:05.071248: step: 116/459, loss: 0.02372005581855774 2023-01-24 03:01:05.691745: step: 118/459, loss: 0.019390027970075607 2023-01-24 03:01:06.352685: step: 120/459, loss: 0.04898505285382271 2023-01-24 03:01:06.965591: step: 122/459, loss: 0.015579620376229286 2023-01-24 03:01:07.564802: step: 124/459, loss: 0.005445773713290691 2023-01-24 03:01:08.162279: step: 126/459, loss: 0.003484015353024006 2023-01-24 03:01:08.768277: step: 128/459, loss: 0.03290262445807457 2023-01-24 03:01:09.382410: step: 130/459, loss: 0.5627195835113525 2023-01-24 03:01:09.967649: step: 132/459, loss: 0.0052758632227778435 2023-01-24 03:01:10.622829: step: 134/459, loss: 0.027157992124557495 2023-01-24 03:01:11.242945: step: 136/459, loss: 0.04587419331073761 2023-01-24 03:01:11.887993: step: 138/459, loss: 0.03060578554868698 2023-01-24 03:01:12.459381: step: 140/459, loss: 0.0034826418850570917 2023-01-24 03:01:13.135974: step: 142/459, loss: 0.006953095551580191 2023-01-24 03:01:13.770075: step: 144/459, loss: 0.0005211576935835183 2023-01-24 03:01:14.390095: step: 146/459, loss: 0.029589146375656128 2023-01-24 03:01:15.025239: step: 148/459, loss: 0.1435951590538025 2023-01-24 03:01:15.592900: step: 150/459, loss: 0.029943598434329033 2023-01-24 03:01:16.219259: step: 152/459, loss: 0.03360862657427788 2023-01-24 03:01:16.836333: step: 154/459, loss: 0.04967529699206352 2023-01-24 03:01:17.491055: step: 156/459, loss: 0.045991986989974976 2023-01-24 03:01:18.056345: step: 158/459, loss: 0.05717676132917404 2023-01-24 03:01:18.784676: step: 160/459, loss: 0.014124608598649502 2023-01-24 03:01:19.408664: step: 162/459, loss: 0.07624881714582443 2023-01-24 03:01:20.041804: step: 164/459, loss: 0.0026515047065913677 2023-01-24 03:01:20.632909: step: 166/459, loss: 0.048074476420879364 2023-01-24 03:01:21.235558: step: 168/459, loss: 0.011277448385953903 2023-01-24 03:01:21.852985: step: 170/459, loss: 0.03084634244441986 2023-01-24 03:01:22.459845: step: 172/459, loss: 0.07190115004777908 2023-01-24 03:01:23.077113: step: 174/459, loss: 0.010102623142302036 2023-01-24 03:01:23.686908: step: 176/459, loss: 0.24894429743289948 2023-01-24 03:01:24.370715: step: 178/459, loss: 0.0392945297062397 2023-01-24 03:01:24.993695: step: 180/459, loss: 0.006838581059128046 2023-01-24 03:01:25.700624: step: 182/459, loss: 0.03054216504096985 2023-01-24 03:01:26.290921: step: 184/459, loss: 0.0924912840127945 2023-01-24 03:01:26.866616: step: 186/459, loss: 0.0177588053047657 2023-01-24 03:01:27.481105: step: 188/459, loss: 0.038194358348846436 2023-01-24 03:01:28.121021: step: 190/459, loss: 0.1043025553226471 2023-01-24 03:01:28.745697: step: 192/459, loss: 0.011761745437979698 2023-01-24 03:01:29.385858: step: 194/459, loss: 0.02101150155067444 2023-01-24 03:01:29.993748: step: 196/459, loss: 0.037655554711818695 2023-01-24 03:01:30.570854: step: 198/459, loss: 0.02112741395831108 2023-01-24 03:01:31.231718: step: 200/459, loss: 0.009523472748696804 2023-01-24 03:01:31.852112: step: 202/459, loss: 0.0009331719484180212 2023-01-24 03:01:32.452613: step: 204/459, loss: 0.017332909628748894 2023-01-24 03:01:33.096375: step: 206/459, loss: 0.032406874001026154 2023-01-24 03:01:33.776318: step: 208/459, loss: 0.05585869401693344 2023-01-24 03:01:34.516228: step: 210/459, loss: 0.011401094496250153 2023-01-24 03:01:35.168064: step: 212/459, loss: 0.08447621762752533 2023-01-24 03:01:35.797204: step: 214/459, loss: 0.006938368082046509 2023-01-24 03:01:36.446901: step: 216/459, loss: 0.005996344145387411 2023-01-24 03:01:37.125325: step: 218/459, loss: 0.061942800879478455 2023-01-24 03:01:37.858160: step: 220/459, loss: 0.040303219109773636 2023-01-24 03:01:38.514020: step: 222/459, loss: 0.12951096892356873 2023-01-24 03:01:39.143634: step: 224/459, loss: 0.02896326407790184 2023-01-24 03:01:39.810129: step: 226/459, loss: 0.058726970106363297 2023-01-24 03:01:40.452206: step: 228/459, loss: 0.04669542983174324 2023-01-24 03:01:41.067313: step: 230/459, loss: 0.026895862072706223 2023-01-24 03:01:41.732748: step: 232/459, loss: 0.022640014067292213 2023-01-24 03:01:42.404850: step: 234/459, loss: 0.5994431972503662 2023-01-24 03:01:43.033380: step: 236/459, loss: 0.009195717051625252 2023-01-24 03:01:43.719928: step: 238/459, loss: 0.10136397927999496 2023-01-24 03:01:44.324035: step: 240/459, loss: 0.00564112002030015 2023-01-24 03:01:44.955754: step: 242/459, loss: 0.08630938082933426 2023-01-24 03:01:45.582039: step: 244/459, loss: 0.08488935232162476 2023-01-24 03:01:46.218250: step: 246/459, loss: 0.04651714488863945 2023-01-24 03:01:46.858378: step: 248/459, loss: 0.06109550967812538 2023-01-24 03:01:47.449009: step: 250/459, loss: 0.28388285636901855 2023-01-24 03:01:48.055104: step: 252/459, loss: 0.011681722477078438 2023-01-24 03:01:48.716247: step: 254/459, loss: 0.03681178763508797 2023-01-24 03:01:49.341517: step: 256/459, loss: 0.05364524945616722 2023-01-24 03:01:49.923237: step: 258/459, loss: 0.007260677870362997 2023-01-24 03:01:50.514914: step: 260/459, loss: 0.08788633346557617 2023-01-24 03:01:51.134129: step: 262/459, loss: 0.05610230565071106 2023-01-24 03:01:51.754222: step: 264/459, loss: 0.10812222957611084 2023-01-24 03:01:52.340659: step: 266/459, loss: 0.0962987020611763 2023-01-24 03:01:53.016032: step: 268/459, loss: 0.08531250804662704 2023-01-24 03:01:53.660257: step: 270/459, loss: 0.023749886080622673 2023-01-24 03:01:54.310759: step: 272/459, loss: 0.05389319360256195 2023-01-24 03:01:54.932656: step: 274/459, loss: 0.016836566850543022 2023-01-24 03:01:55.568975: step: 276/459, loss: 0.025812597945332527 2023-01-24 03:01:56.148851: step: 278/459, loss: 0.14395633339881897 2023-01-24 03:01:56.763726: step: 280/459, loss: 0.016395116224884987 2023-01-24 03:01:57.362856: step: 282/459, loss: 0.24158623814582825 2023-01-24 03:01:57.949223: step: 284/459, loss: 0.02559332177042961 2023-01-24 03:01:58.616402: step: 286/459, loss: 0.008047600276768208 2023-01-24 03:01:59.196135: step: 288/459, loss: 0.003964893985539675 2023-01-24 03:01:59.792450: step: 290/459, loss: 0.01031365618109703 2023-01-24 03:02:00.392807: step: 292/459, loss: 0.04468785226345062 2023-01-24 03:02:01.072674: step: 294/459, loss: 0.019081097096204758 2023-01-24 03:02:01.620833: step: 296/459, loss: 0.025469373911619186 2023-01-24 03:02:02.303828: step: 298/459, loss: 0.04160650447010994 2023-01-24 03:02:02.920206: step: 300/459, loss: 0.005815334152430296 2023-01-24 03:02:03.465301: step: 302/459, loss: 0.0016675674123689532 2023-01-24 03:02:04.068986: step: 304/459, loss: 0.011210017837584019 2023-01-24 03:02:04.637039: step: 306/459, loss: 0.03300490230321884 2023-01-24 03:02:05.300401: step: 308/459, loss: 0.06033947691321373 2023-01-24 03:02:05.845831: step: 310/459, loss: 0.04057495668530464 2023-01-24 03:02:06.450344: step: 312/459, loss: 0.005185695365071297 2023-01-24 03:02:07.120919: step: 314/459, loss: 0.014770510606467724 2023-01-24 03:02:07.746962: step: 316/459, loss: 0.06092808395624161 2023-01-24 03:02:08.373447: step: 318/459, loss: 0.013926100917160511 2023-01-24 03:02:09.032452: step: 320/459, loss: 0.45509329438209534 2023-01-24 03:02:09.702547: step: 322/459, loss: 0.03528890386223793 2023-01-24 03:02:10.273729: step: 324/459, loss: 0.009745498187839985 2023-01-24 03:02:10.922792: step: 326/459, loss: 0.11996316909790039 2023-01-24 03:02:11.499051: step: 328/459, loss: 0.0187115129083395 2023-01-24 03:02:12.102300: step: 330/459, loss: 0.0036542692687362432 2023-01-24 03:02:12.703506: step: 332/459, loss: 0.03564373031258583 2023-01-24 03:02:13.324295: step: 334/459, loss: 0.11843519657850266 2023-01-24 03:02:13.937022: step: 336/459, loss: 0.04478735104203224 2023-01-24 03:02:14.569624: step: 338/459, loss: 0.007986185140907764 2023-01-24 03:02:15.246015: step: 340/459, loss: 0.03748931735754013 2023-01-24 03:02:15.856958: step: 342/459, loss: 0.02251068875193596 2023-01-24 03:02:16.412240: step: 344/459, loss: 0.11786296218633652 2023-01-24 03:02:17.073771: step: 346/459, loss: 0.07091626524925232 2023-01-24 03:02:17.772587: step: 348/459, loss: 0.011842598207294941 2023-01-24 03:02:18.462576: step: 350/459, loss: 0.03828633204102516 2023-01-24 03:02:19.082710: step: 352/459, loss: 0.008780975826084614 2023-01-24 03:02:19.726180: step: 354/459, loss: 0.02035050466656685 2023-01-24 03:02:20.328991: step: 356/459, loss: 0.007304822560399771 2023-01-24 03:02:20.988867: step: 358/459, loss: 0.0006103442283347249 2023-01-24 03:02:21.497133: step: 360/459, loss: 0.013593696057796478 2023-01-24 03:02:22.126904: step: 362/459, loss: 0.016312796622514725 2023-01-24 03:02:22.753505: step: 364/459, loss: 0.02240115776658058 2023-01-24 03:02:23.355241: step: 366/459, loss: 0.37951183319091797 2023-01-24 03:02:23.995666: step: 368/459, loss: 0.01591704972088337 2023-01-24 03:02:24.555544: step: 370/459, loss: 0.02832728438079357 2023-01-24 03:02:25.156570: step: 372/459, loss: 0.030423574149608612 2023-01-24 03:02:25.808177: step: 374/459, loss: 0.0718071311712265 2023-01-24 03:02:26.407667: step: 376/459, loss: 0.03011983633041382 2023-01-24 03:02:27.008367: step: 378/459, loss: 0.0007276915712282062 2023-01-24 03:02:27.654185: step: 380/459, loss: 0.04250456020236015 2023-01-24 03:02:28.306413: step: 382/459, loss: 0.0010876840678974986 2023-01-24 03:02:28.886122: step: 384/459, loss: 0.03427955135703087 2023-01-24 03:02:29.521055: step: 386/459, loss: 0.02443394623696804 2023-01-24 03:02:30.133295: step: 388/459, loss: 0.042512197047472 2023-01-24 03:02:30.787857: step: 390/459, loss: 0.0018544542836025357 2023-01-24 03:02:31.417098: step: 392/459, loss: 0.19069775938987732 2023-01-24 03:02:32.051791: step: 394/459, loss: 0.014975035563111305 2023-01-24 03:02:32.856871: step: 396/459, loss: 0.29881423711776733 2023-01-24 03:02:33.416732: step: 398/459, loss: 0.008719406090676785 2023-01-24 03:02:34.016109: step: 400/459, loss: 0.00724451569840312 2023-01-24 03:02:34.642921: step: 402/459, loss: 0.07545849680900574 2023-01-24 03:02:35.293234: step: 404/459, loss: 0.0019846016075462103 2023-01-24 03:02:35.879426: step: 406/459, loss: 0.0012921919114887714 2023-01-24 03:02:36.518711: step: 408/459, loss: 0.1130947470664978 2023-01-24 03:02:37.142961: step: 410/459, loss: 0.028933243826031685 2023-01-24 03:02:37.784641: step: 412/459, loss: 0.028204353526234627 2023-01-24 03:02:38.382671: step: 414/459, loss: 0.01401766948401928 2023-01-24 03:02:38.981256: step: 416/459, loss: 0.2263689786195755 2023-01-24 03:02:39.565167: step: 418/459, loss: 0.03952399268746376 2023-01-24 03:02:40.260855: step: 420/459, loss: 0.011448054574429989 2023-01-24 03:02:40.863723: step: 422/459, loss: 0.007909527979791164 2023-01-24 03:02:41.480284: step: 424/459, loss: 0.0258182343095541 2023-01-24 03:02:42.199606: step: 426/459, loss: 0.19805626571178436 2023-01-24 03:02:42.876999: step: 428/459, loss: 0.06853856891393661 2023-01-24 03:02:43.433761: step: 430/459, loss: 0.00037992329453118145 2023-01-24 03:02:44.004970: step: 432/459, loss: 0.019971420988440514 2023-01-24 03:02:44.582215: step: 434/459, loss: 0.27267366647720337 2023-01-24 03:02:45.295799: step: 436/459, loss: 0.23396526277065277 2023-01-24 03:02:45.937995: step: 438/459, loss: 0.1007409393787384 2023-01-24 03:02:46.566923: step: 440/459, loss: 0.015993714332580566 2023-01-24 03:02:47.183694: step: 442/459, loss: 0.034275006502866745 2023-01-24 03:02:47.804152: step: 444/459, loss: 0.03179327771067619 2023-01-24 03:02:48.425956: step: 446/459, loss: 0.11687692999839783 2023-01-24 03:02:49.023622: step: 448/459, loss: 3.3646583557128906 2023-01-24 03:02:49.692590: step: 450/459, loss: 0.1337302029132843 2023-01-24 03:02:50.325843: step: 452/459, loss: 0.006438437849283218 2023-01-24 03:02:50.950721: step: 454/459, loss: 0.03572304546833038 2023-01-24 03:02:51.499042: step: 456/459, loss: 0.025770585983991623 2023-01-24 03:02:52.165194: step: 458/459, loss: 0.0010278525296598673 2023-01-24 03:02:52.872209: step: 460/459, loss: 0.04664089158177376 2023-01-24 03:02:53.483890: step: 462/459, loss: 0.03141443431377411 2023-01-24 03:02:54.098129: step: 464/459, loss: 0.016807349398732185 2023-01-24 03:02:54.670821: step: 466/459, loss: 0.02654407173395157 2023-01-24 03:02:55.281430: step: 468/459, loss: 0.1973433941602707 2023-01-24 03:02:55.888657: step: 470/459, loss: 0.010903898626565933 2023-01-24 03:02:56.588917: step: 472/459, loss: 0.009145641699433327 2023-01-24 03:02:57.228516: step: 474/459, loss: 0.020279869437217712 2023-01-24 03:02:57.864978: step: 476/459, loss: 0.046906664967536926 2023-01-24 03:02:58.541602: step: 478/459, loss: 0.033465854823589325 2023-01-24 03:02:59.184600: step: 480/459, loss: 0.036581456661224365 2023-01-24 03:02:59.776247: step: 482/459, loss: 0.039886198937892914 2023-01-24 03:03:00.457780: step: 484/459, loss: 0.002081256592646241 2023-01-24 03:03:01.059270: step: 486/459, loss: 0.009844735264778137 2023-01-24 03:03:01.652687: step: 488/459, loss: 0.029115259647369385 2023-01-24 03:03:02.258749: step: 490/459, loss: 0.021352004259824753 2023-01-24 03:03:02.911485: step: 492/459, loss: 0.04639708250761032 2023-01-24 03:03:03.558791: step: 494/459, loss: 0.09088754653930664 2023-01-24 03:03:04.161149: step: 496/459, loss: 0.05346452072262764 2023-01-24 03:03:04.851206: step: 498/459, loss: 0.05004598945379257 2023-01-24 03:03:05.486851: step: 500/459, loss: 0.012069007381796837 2023-01-24 03:03:06.098942: step: 502/459, loss: 0.004696419928222895 2023-01-24 03:03:06.713352: step: 504/459, loss: 0.007400523871183395 2023-01-24 03:03:07.316806: step: 506/459, loss: 0.006444346625357866 2023-01-24 03:03:08.027402: step: 508/459, loss: 0.04272785782814026 2023-01-24 03:03:08.591383: step: 510/459, loss: 0.006061771418899298 2023-01-24 03:03:09.182404: step: 512/459, loss: 0.12598296999931335 2023-01-24 03:03:09.736627: step: 514/459, loss: 0.01307691726833582 2023-01-24 03:03:10.350180: step: 516/459, loss: 0.029858576133847237 2023-01-24 03:03:10.949881: step: 518/459, loss: 0.014208188280463219 2023-01-24 03:03:11.584190: step: 520/459, loss: 0.027288926765322685 2023-01-24 03:03:12.222731: step: 522/459, loss: 0.08393972367048264 2023-01-24 03:03:12.891684: step: 524/459, loss: 0.012544223107397556 2023-01-24 03:03:13.543563: step: 526/459, loss: 0.05531763285398483 2023-01-24 03:03:14.161956: step: 528/459, loss: 0.01067149918526411 2023-01-24 03:03:14.795166: step: 530/459, loss: 0.010507393628358841 2023-01-24 03:03:15.450286: step: 532/459, loss: 0.04820583015680313 2023-01-24 03:03:16.097800: step: 534/459, loss: 0.04708148539066315 2023-01-24 03:03:16.720498: step: 536/459, loss: 0.07725609093904495 2023-01-24 03:03:17.288684: step: 538/459, loss: 0.03977410867810249 2023-01-24 03:03:17.954795: step: 540/459, loss: 0.022387737408280373 2023-01-24 03:03:18.589372: step: 542/459, loss: 0.026033464819192886 2023-01-24 03:03:19.255959: step: 544/459, loss: 0.053438954055309296 2023-01-24 03:03:19.876964: step: 546/459, loss: 0.02800826169550419 2023-01-24 03:03:20.481026: step: 548/459, loss: 0.015028269961476326 2023-01-24 03:03:21.102998: step: 550/459, loss: 0.006381030194461346 2023-01-24 03:03:21.745824: step: 552/459, loss: 0.025356778874993324 2023-01-24 03:03:22.395616: step: 554/459, loss: 0.015517096035182476 2023-01-24 03:03:22.988698: step: 556/459, loss: 0.014077902771532536 2023-01-24 03:03:23.562669: step: 558/459, loss: 0.04088245704770088 2023-01-24 03:03:24.237434: step: 560/459, loss: 0.05370250344276428 2023-01-24 03:03:24.838431: step: 562/459, loss: 0.020882342010736465 2023-01-24 03:03:25.420585: step: 564/459, loss: 0.012929135002195835 2023-01-24 03:03:26.002367: step: 566/459, loss: 0.0335572250187397 2023-01-24 03:03:26.605951: step: 568/459, loss: 0.009191765449941158 2023-01-24 03:03:27.155344: step: 570/459, loss: 0.04815976694226265 2023-01-24 03:03:27.818908: step: 572/459, loss: 0.023434069007635117 2023-01-24 03:03:28.544891: step: 574/459, loss: 0.05625598505139351 2023-01-24 03:03:29.159485: step: 576/459, loss: 0.12181024253368378 2023-01-24 03:03:29.811525: step: 578/459, loss: 0.010866069234907627 2023-01-24 03:03:30.465663: step: 580/459, loss: 0.06048004701733589 2023-01-24 03:03:31.151636: step: 582/459, loss: 0.01925354264676571 2023-01-24 03:03:31.750362: step: 584/459, loss: 0.02738296426832676 2023-01-24 03:03:32.339782: step: 586/459, loss: 0.02101915329694748 2023-01-24 03:03:33.026739: step: 588/459, loss: 0.06451179832220078 2023-01-24 03:03:33.654214: step: 590/459, loss: 0.025113217532634735 2023-01-24 03:03:34.287614: step: 592/459, loss: 0.12473088502883911 2023-01-24 03:03:34.928440: step: 594/459, loss: 0.07116865366697311 2023-01-24 03:03:35.548205: step: 596/459, loss: 0.021268893033266068 2023-01-24 03:03:36.164765: step: 598/459, loss: 0.14964105188846588 2023-01-24 03:03:36.775221: step: 600/459, loss: 0.018052615225315094 2023-01-24 03:03:37.361624: step: 602/459, loss: 0.016704104840755463 2023-01-24 03:03:37.965433: step: 604/459, loss: 0.017177000641822815 2023-01-24 03:03:38.558994: step: 606/459, loss: 0.03409499675035477 2023-01-24 03:03:39.198192: step: 608/459, loss: 0.03317052870988846 2023-01-24 03:03:39.787291: step: 610/459, loss: 0.143827423453331 2023-01-24 03:03:40.392016: step: 612/459, loss: 0.15863314270973206 2023-01-24 03:03:41.013365: step: 614/459, loss: 0.028656432405114174 2023-01-24 03:03:41.713884: step: 616/459, loss: 0.04520604759454727 2023-01-24 03:03:42.388342: step: 618/459, loss: 0.2257298082113266 2023-01-24 03:03:43.015535: step: 620/459, loss: 0.049327459186315536 2023-01-24 03:03:43.651386: step: 622/459, loss: 0.009863273240625858 2023-01-24 03:03:44.245548: step: 624/459, loss: 0.07638751715421677 2023-01-24 03:03:44.868204: step: 626/459, loss: 0.11474288254976273 2023-01-24 03:03:45.491298: step: 628/459, loss: 0.02314266748726368 2023-01-24 03:03:46.154905: step: 630/459, loss: 0.0415242500603199 2023-01-24 03:03:46.989484: step: 632/459, loss: 0.04841012880206108 2023-01-24 03:03:47.607713: step: 634/459, loss: 0.05012574419379234 2023-01-24 03:03:48.190962: step: 636/459, loss: 0.019115136936306953 2023-01-24 03:03:48.890802: step: 638/459, loss: 0.012171211652457714 2023-01-24 03:03:49.530787: step: 640/459, loss: 0.03667573630809784 2023-01-24 03:03:50.097315: step: 642/459, loss: 0.053480036556720734 2023-01-24 03:03:50.752443: step: 644/459, loss: 0.04868445545434952 2023-01-24 03:03:51.367355: step: 646/459, loss: 0.054531075060367584 2023-01-24 03:03:52.083503: step: 648/459, loss: 0.05748328939080238 2023-01-24 03:03:52.707142: step: 650/459, loss: 0.03445430099964142 2023-01-24 03:03:53.334631: step: 652/459, loss: 0.03744221478700638 2023-01-24 03:03:53.961146: step: 654/459, loss: 0.0723940059542656 2023-01-24 03:03:54.563246: step: 656/459, loss: 0.0025498978793621063 2023-01-24 03:03:55.226843: step: 658/459, loss: 0.0015861854190006852 2023-01-24 03:03:55.823414: step: 660/459, loss: 0.05478588864207268 2023-01-24 03:03:56.433152: step: 662/459, loss: 0.014567812904715538 2023-01-24 03:03:57.071782: step: 664/459, loss: 0.019580449908971786 2023-01-24 03:03:57.714040: step: 666/459, loss: 0.05621281638741493 2023-01-24 03:03:58.291962: step: 668/459, loss: 0.08545415103435516 2023-01-24 03:03:58.874250: step: 670/459, loss: 0.04797207564115524 2023-01-24 03:03:59.494056: step: 672/459, loss: 0.013326752930879593 2023-01-24 03:04:00.115402: step: 674/459, loss: 0.04345744103193283 2023-01-24 03:04:00.750236: step: 676/459, loss: 0.7681735157966614 2023-01-24 03:04:01.386118: step: 678/459, loss: 0.010091947391629219 2023-01-24 03:04:02.022214: step: 680/459, loss: 0.011447102762758732 2023-01-24 03:04:02.639173: step: 682/459, loss: 0.029323477298021317 2023-01-24 03:04:03.228500: step: 684/459, loss: 0.012210090644657612 2023-01-24 03:04:04.027545: step: 686/459, loss: 0.03017255663871765 2023-01-24 03:04:04.614380: step: 688/459, loss: 0.07807794213294983 2023-01-24 03:04:05.230184: step: 690/459, loss: 0.030191197991371155 2023-01-24 03:04:05.851819: step: 692/459, loss: 0.02694246545433998 2023-01-24 03:04:06.457304: step: 694/459, loss: 0.025738509371876717 2023-01-24 03:04:07.136956: step: 696/459, loss: 0.012131587602198124 2023-01-24 03:04:07.738437: step: 698/459, loss: 0.04007183015346527 2023-01-24 03:04:08.321477: step: 700/459, loss: 0.011449295096099377 2023-01-24 03:04:08.901940: step: 702/459, loss: 0.016256701201200485 2023-01-24 03:04:09.480517: step: 704/459, loss: 0.006022576242685318 2023-01-24 03:04:10.100680: step: 706/459, loss: 0.01158948428928852 2023-01-24 03:04:10.742328: step: 708/459, loss: 0.017752837389707565 2023-01-24 03:04:11.365782: step: 710/459, loss: 0.020549336448311806 2023-01-24 03:04:11.972017: step: 712/459, loss: 0.003424045629799366 2023-01-24 03:04:12.610017: step: 714/459, loss: 0.002613201504573226 2023-01-24 03:04:13.222643: step: 716/459, loss: 0.01933569274842739 2023-01-24 03:04:13.844634: step: 718/459, loss: 0.0014973686775192618 2023-01-24 03:04:14.481476: step: 720/459, loss: 0.02922901138663292 2023-01-24 03:04:15.069699: step: 722/459, loss: 0.008830993436276913 2023-01-24 03:04:15.724890: step: 724/459, loss: 0.17546185851097107 2023-01-24 03:04:16.333280: step: 726/459, loss: 0.004155205097049475 2023-01-24 03:04:16.919122: step: 728/459, loss: 0.04116999730467796 2023-01-24 03:04:17.501265: step: 730/459, loss: 1.2574403285980225 2023-01-24 03:04:18.132345: step: 732/459, loss: 0.01566287875175476 2023-01-24 03:04:18.681046: step: 734/459, loss: 0.03653362765908241 2023-01-24 03:04:19.283116: step: 736/459, loss: 0.023236854001879692 2023-01-24 03:04:19.916351: step: 738/459, loss: 0.45200905203819275 2023-01-24 03:04:20.516033: step: 740/459, loss: 0.08747388422489166 2023-01-24 03:04:21.108850: step: 742/459, loss: 0.05534888803958893 2023-01-24 03:04:21.746562: step: 744/459, loss: 0.030259178951382637 2023-01-24 03:04:22.426921: step: 746/459, loss: 0.01777295395731926 2023-01-24 03:04:23.061857: step: 748/459, loss: 0.021658170968294144 2023-01-24 03:04:23.720678: step: 750/459, loss: 0.10331100225448608 2023-01-24 03:04:24.301276: step: 752/459, loss: 0.027051672339439392 2023-01-24 03:04:24.914681: step: 754/459, loss: 0.05915257707238197 2023-01-24 03:04:25.486881: step: 756/459, loss: 0.03769196942448616 2023-01-24 03:04:26.103256: step: 758/459, loss: 0.049552030861377716 2023-01-24 03:04:26.735872: step: 760/459, loss: 0.011014669202268124 2023-01-24 03:04:27.307870: step: 762/459, loss: 0.06167290732264519 2023-01-24 03:04:27.906314: step: 764/459, loss: 0.012386552058160305 2023-01-24 03:04:28.530222: step: 766/459, loss: 0.008860512636601925 2023-01-24 03:04:29.159715: step: 768/459, loss: 0.014462725259363651 2023-01-24 03:04:29.775199: step: 770/459, loss: 0.20735618472099304 2023-01-24 03:04:30.347704: step: 772/459, loss: 0.3037894666194916 2023-01-24 03:04:30.937911: step: 774/459, loss: 0.023909198120236397 2023-01-24 03:04:31.498059: step: 776/459, loss: 0.01944868639111519 2023-01-24 03:04:32.143280: step: 778/459, loss: 0.04426515847444534 2023-01-24 03:04:32.708225: step: 780/459, loss: 0.010501553304493427 2023-01-24 03:04:33.263611: step: 782/459, loss: 0.2834237217903137 2023-01-24 03:04:33.997379: step: 784/459, loss: 0.07434540241956711 2023-01-24 03:04:34.623919: step: 786/459, loss: 0.023835817351937294 2023-01-24 03:04:35.244501: step: 788/459, loss: 0.3165789544582367 2023-01-24 03:04:35.869651: step: 790/459, loss: 0.1451309323310852 2023-01-24 03:04:36.566168: step: 792/459, loss: 0.0768003985285759 2023-01-24 03:04:37.218859: step: 794/459, loss: 0.015541900880634785 2023-01-24 03:04:37.844236: step: 796/459, loss: 0.062214892357587814 2023-01-24 03:04:38.438875: step: 798/459, loss: 0.026403876021504402 2023-01-24 03:04:39.151386: step: 800/459, loss: 0.028494320809841156 2023-01-24 03:04:39.812045: step: 802/459, loss: 0.0834537073969841 2023-01-24 03:04:40.383870: step: 804/459, loss: 0.024634065106511116 2023-01-24 03:04:41.081641: step: 806/459, loss: 0.02528242953121662 2023-01-24 03:04:41.768061: step: 808/459, loss: 0.07998374104499817 2023-01-24 03:04:42.445819: step: 810/459, loss: 0.05575772747397423 2023-01-24 03:04:43.089877: step: 812/459, loss: 0.16041742265224457 2023-01-24 03:04:43.719770: step: 814/459, loss: 0.14506278932094574 2023-01-24 03:04:44.324922: step: 816/459, loss: 0.028443776071071625 2023-01-24 03:04:44.935866: step: 818/459, loss: 0.06810334324836731 2023-01-24 03:04:45.584981: step: 820/459, loss: 0.05728062242269516 2023-01-24 03:04:46.205547: step: 822/459, loss: 0.07802672684192657 2023-01-24 03:04:46.920211: step: 824/459, loss: 0.1849862039089203 2023-01-24 03:04:47.512463: step: 826/459, loss: 0.15240657329559326 2023-01-24 03:04:48.135212: step: 828/459, loss: 0.18502046167850494 2023-01-24 03:04:48.801186: step: 830/459, loss: 0.008740467950701714 2023-01-24 03:04:49.440688: step: 832/459, loss: 0.002092280425131321 2023-01-24 03:04:50.027689: step: 834/459, loss: 0.05580839142203331 2023-01-24 03:04:50.624232: step: 836/459, loss: 0.010621081106364727 2023-01-24 03:04:51.138186: step: 838/459, loss: 0.01101402472704649 2023-01-24 03:04:51.756764: step: 840/459, loss: 0.004039391875267029 2023-01-24 03:04:52.393204: step: 842/459, loss: 0.05312519147992134 2023-01-24 03:04:52.955124: step: 844/459, loss: 0.00524504017084837 2023-01-24 03:04:53.557779: step: 846/459, loss: 0.03993631899356842 2023-01-24 03:04:54.177241: step: 848/459, loss: 0.038464080542325974 2023-01-24 03:04:54.776685: step: 850/459, loss: 0.036688949912786484 2023-01-24 03:04:55.332130: step: 852/459, loss: 0.025608347728848457 2023-01-24 03:04:55.987898: step: 854/459, loss: 0.050177644938230515 2023-01-24 03:04:56.670041: step: 856/459, loss: 0.00463565718382597 2023-01-24 03:04:57.287301: step: 858/459, loss: 0.1445709466934204 2023-01-24 03:04:57.925141: step: 860/459, loss: 0.07316996157169342 2023-01-24 03:04:58.536534: step: 862/459, loss: 0.025530751794576645 2023-01-24 03:04:59.166697: step: 864/459, loss: 0.024396881461143494 2023-01-24 03:04:59.744325: step: 866/459, loss: 0.0275582242757082 2023-01-24 03:05:01.043063: step: 868/459, loss: 0.07001520693302155 2023-01-24 03:05:01.638942: step: 870/459, loss: 0.00853524636477232 2023-01-24 03:05:02.245093: step: 872/459, loss: 0.0008662059553898871 2023-01-24 03:05:02.887102: step: 874/459, loss: 0.004420384299010038 2023-01-24 03:05:03.521544: step: 876/459, loss: 0.008796071633696556 2023-01-24 03:05:04.100783: step: 878/459, loss: 0.027570268139243126 2023-01-24 03:05:04.704271: step: 880/459, loss: 0.04376227781176567 2023-01-24 03:05:05.349976: step: 882/459, loss: 0.15420101583003998 2023-01-24 03:05:05.911697: step: 884/459, loss: 0.06307641416788101 2023-01-24 03:05:06.539176: step: 886/459, loss: 0.008808559738099575 2023-01-24 03:05:07.168299: step: 888/459, loss: 0.1116400882601738 2023-01-24 03:05:07.795115: step: 890/459, loss: 0.02839270606637001 2023-01-24 03:05:08.410975: step: 892/459, loss: 0.02203100360929966 2023-01-24 03:05:09.056471: step: 894/459, loss: 0.011464123614132404 2023-01-24 03:05:09.699514: step: 896/459, loss: 0.022893745452165604 2023-01-24 03:05:10.256994: step: 898/459, loss: 0.0006926911883056164 2023-01-24 03:05:10.864866: step: 900/459, loss: 0.025204960256814957 2023-01-24 03:05:11.504147: step: 902/459, loss: 0.1059783473610878 2023-01-24 03:05:12.154067: step: 904/459, loss: 0.017373429611325264 2023-01-24 03:05:12.692017: step: 906/459, loss: 0.028989823535084724 2023-01-24 03:05:13.273948: step: 908/459, loss: 0.013388184830546379 2023-01-24 03:05:13.900487: step: 910/459, loss: 0.004272802732884884 2023-01-24 03:05:14.515799: step: 912/459, loss: 0.043342720717191696 2023-01-24 03:05:15.165937: step: 914/459, loss: 0.04499128460884094 2023-01-24 03:05:15.778606: step: 916/459, loss: 1.0305960178375244 2023-01-24 03:05:16.420730: step: 918/459, loss: 0.007562263403087854 2023-01-24 03:05:16.907162: step: 920/459, loss: 0.00030888893525116146 ================================================== Loss: 0.091 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33761648828279345, 'r': 0.31327222537056165, 'f1': 0.3249890999415079}, 'combined': 0.23946565258847946, 'epoch': 24} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3483941344058616, 'r': 0.29486812648350647, 'f1': 0.3194041744282197}, 'combined': 0.20441867163406058, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33538346613545816, 'r': 0.3194734345351044, 'f1': 0.3272351797862002}, 'combined': 0.24112065878983172, 'epoch': 24} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3477443373123687, 'r': 0.2915240902264453, 'f1': 0.3171620783985339}, 'combined': 0.20298373017506166, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35436233653036026, 'r': 0.3220864500911624, 'f1': 0.3374543920438222}, 'combined': 0.24865060466386898, 'epoch': 24} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.35300256576981437, 'r': 0.31052641382668267, 'f1': 0.33040492334420063}, 'combined': 0.23689409598263445, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35416666666666663, 'r': 0.32380952380952377, 'f1': 0.33830845771144274}, 'combined': 0.2255389718076285, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32954545454545453, 'r': 0.31521739130434784, 'f1': 0.3222222222222222}, 'combined': 0.1611111111111111, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3342478880342958, 'r': 0.3266369304319968, 'f1': 0.33039858414138645}, 'combined': 0.24345158831470579, 'epoch': 5} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3413499740991752, 'r': 0.24608229950967814, 'f1': 0.28599105067157526}, 'combined': 0.18303427242980813, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.3269230769230769, 'r': 0.3695652173913043, 'f1': 0.346938775510204}, 'combined': 0.173469387755102, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 25 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:07:53.013177: step: 2/459, loss: 0.005336894653737545 2023-01-24 03:07:53.639028: step: 4/459, loss: 0.09724649041891098 2023-01-24 03:07:54.281775: step: 6/459, loss: 0.012387227267026901 2023-01-24 03:07:54.988748: step: 8/459, loss: 0.04596015438437462 2023-01-24 03:07:55.574056: step: 10/459, loss: 0.011533104814589024 2023-01-24 03:07:56.215902: step: 12/459, loss: 0.01953735388815403 2023-01-24 03:07:56.816180: step: 14/459, loss: 0.025245768949389458 2023-01-24 03:07:57.384462: step: 16/459, loss: 0.00030365760903805494 2023-01-24 03:07:57.981896: step: 18/459, loss: 0.010817460715770721 2023-01-24 03:07:58.641768: step: 20/459, loss: 0.008564281277358532 2023-01-24 03:07:59.312122: step: 22/459, loss: 0.06518860161304474 2023-01-24 03:07:59.885202: step: 24/459, loss: 0.01054440625011921 2023-01-24 03:08:00.540632: step: 26/459, loss: 0.08845202624797821 2023-01-24 03:08:01.117486: step: 28/459, loss: 0.008056262508034706 2023-01-24 03:08:01.733145: step: 30/459, loss: 0.04150053858757019 2023-01-24 03:08:02.327017: step: 32/459, loss: 0.1730552464723587 2023-01-24 03:08:02.943545: step: 34/459, loss: 0.01804078184068203 2023-01-24 03:08:03.542730: step: 36/459, loss: 0.009771610610187054 2023-01-24 03:08:04.144806: step: 38/459, loss: 0.07560484111309052 2023-01-24 03:08:04.726917: step: 40/459, loss: 0.04508241266012192 2023-01-24 03:08:05.395834: step: 42/459, loss: 0.12218279391527176 2023-01-24 03:08:05.942539: step: 44/459, loss: 0.04079963639378548 2023-01-24 03:08:06.589662: step: 46/459, loss: 0.010593803599476814 2023-01-24 03:08:07.200993: step: 48/459, loss: 0.0018274165922775865 2023-01-24 03:08:07.830196: step: 50/459, loss: 0.03197250887751579 2023-01-24 03:08:08.454377: step: 52/459, loss: 0.00911038275808096 2023-01-24 03:08:09.124086: step: 54/459, loss: 0.010874871164560318 2023-01-24 03:08:09.755540: step: 56/459, loss: 0.028451375663280487 2023-01-24 03:08:10.400708: step: 58/459, loss: 0.1712646186351776 2023-01-24 03:08:11.005507: step: 60/459, loss: 0.0022150997538119555 2023-01-24 03:08:11.624512: step: 62/459, loss: 0.0006640805513598025 2023-01-24 03:08:12.310633: step: 64/459, loss: 0.4633343517780304 2023-01-24 03:08:12.998084: step: 66/459, loss: 0.004727696534246206 2023-01-24 03:08:13.635712: step: 68/459, loss: 0.14891546964645386 2023-01-24 03:08:14.334283: step: 70/459, loss: 0.012517232447862625 2023-01-24 03:08:14.991748: step: 72/459, loss: 0.03301040083169937 2023-01-24 03:08:15.674007: step: 74/459, loss: 0.01306130364537239 2023-01-24 03:08:16.295937: step: 76/459, loss: 0.007971661165356636 2023-01-24 03:08:16.900563: step: 78/459, loss: 0.007086008321493864 2023-01-24 03:08:17.456746: step: 80/459, loss: 0.017849862575531006 2023-01-24 03:08:18.090520: step: 82/459, loss: 0.0156553927809 2023-01-24 03:08:18.767301: step: 84/459, loss: 0.017527258023619652 2023-01-24 03:08:19.381392: step: 86/459, loss: 0.041150372475385666 2023-01-24 03:08:20.106108: step: 88/459, loss: 0.0413198359310627 2023-01-24 03:08:20.756051: step: 90/459, loss: 0.004398810677230358 2023-01-24 03:08:21.385195: step: 92/459, loss: 0.006283036898821592 2023-01-24 03:08:21.986670: step: 94/459, loss: 0.06150203198194504 2023-01-24 03:08:22.612116: step: 96/459, loss: 0.005166851449757814 2023-01-24 03:08:23.346368: step: 98/459, loss: 0.03208677098155022 2023-01-24 03:08:23.935970: step: 100/459, loss: 0.06936204433441162 2023-01-24 03:08:24.620459: step: 102/459, loss: 0.31056809425354004 2023-01-24 03:08:25.314459: step: 104/459, loss: 0.21488679945468903 2023-01-24 03:08:25.947610: step: 106/459, loss: 0.03732592239975929 2023-01-24 03:08:26.588396: step: 108/459, loss: 0.05574408918619156 2023-01-24 03:08:27.174301: step: 110/459, loss: 0.015900103375315666 2023-01-24 03:08:27.836980: step: 112/459, loss: 0.003721128683537245 2023-01-24 03:08:28.429556: step: 114/459, loss: 0.10929712653160095 2023-01-24 03:08:29.106532: step: 116/459, loss: 0.0067026931792497635 2023-01-24 03:08:29.847080: step: 118/459, loss: 0.014088685624301434 2023-01-24 03:08:30.521982: step: 120/459, loss: 0.40565022826194763 2023-01-24 03:08:31.051931: step: 122/459, loss: 0.0017981324344873428 2023-01-24 03:08:31.679802: step: 124/459, loss: 0.048619501292705536 2023-01-24 03:08:32.354518: step: 126/459, loss: 0.03425620123744011 2023-01-24 03:08:32.925966: step: 128/459, loss: 0.015500474721193314 2023-01-24 03:08:33.536308: step: 130/459, loss: 0.03837192803621292 2023-01-24 03:08:34.107533: step: 132/459, loss: 0.06767454743385315 2023-01-24 03:08:34.730332: step: 134/459, loss: 0.017919033765792847 2023-01-24 03:08:35.355735: step: 136/459, loss: 0.015455406159162521 2023-01-24 03:08:36.014210: step: 138/459, loss: 0.06931769847869873 2023-01-24 03:08:36.655719: step: 140/459, loss: 0.9178792834281921 2023-01-24 03:08:37.257372: step: 142/459, loss: 0.06176126003265381 2023-01-24 03:08:37.878428: step: 144/459, loss: 0.0004540267400443554 2023-01-24 03:08:38.539240: step: 146/459, loss: 0.005537671037018299 2023-01-24 03:08:39.184115: step: 148/459, loss: 0.02194770984351635 2023-01-24 03:08:39.797716: step: 150/459, loss: 0.030157778412103653 2023-01-24 03:08:40.446256: step: 152/459, loss: 0.6429975032806396 2023-01-24 03:08:41.074807: step: 154/459, loss: 0.039879169315099716 2023-01-24 03:08:41.703359: step: 156/459, loss: 0.01673431694507599 2023-01-24 03:08:42.344314: step: 158/459, loss: 0.08187363296747208 2023-01-24 03:08:43.004571: step: 160/459, loss: 0.025141559541225433 2023-01-24 03:08:43.582023: step: 162/459, loss: 0.010822885669767857 2023-01-24 03:08:44.211360: step: 164/459, loss: 0.05622846633195877 2023-01-24 03:08:44.841689: step: 166/459, loss: 0.022706255316734314 2023-01-24 03:08:45.468161: step: 168/459, loss: 0.017711786553263664 2023-01-24 03:08:46.133347: step: 170/459, loss: 0.2070746123790741 2023-01-24 03:08:46.757980: step: 172/459, loss: 0.030549509450793266 2023-01-24 03:08:47.345901: step: 174/459, loss: 0.02757805772125721 2023-01-24 03:08:47.968099: step: 176/459, loss: 0.05679434910416603 2023-01-24 03:08:48.535298: step: 178/459, loss: 0.17977118492126465 2023-01-24 03:08:49.248761: step: 180/459, loss: 0.003046094672754407 2023-01-24 03:08:49.846286: step: 182/459, loss: 0.14387424290180206 2023-01-24 03:08:50.464269: step: 184/459, loss: 0.021001692861318588 2023-01-24 03:08:51.106250: step: 186/459, loss: 0.010479248128831387 2023-01-24 03:08:51.695107: step: 188/459, loss: 0.08154314756393433 2023-01-24 03:08:52.331592: step: 190/459, loss: 0.026603735983371735 2023-01-24 03:08:52.952502: step: 192/459, loss: 0.86044842004776 2023-01-24 03:08:53.572458: step: 194/459, loss: 0.5844063758850098 2023-01-24 03:08:54.180397: step: 196/459, loss: 0.024942809715867043 2023-01-24 03:08:54.763577: step: 198/459, loss: 0.07525590062141418 2023-01-24 03:08:55.405569: step: 200/459, loss: 0.025080401450395584 2023-01-24 03:08:56.024098: step: 202/459, loss: 0.001068422570824623 2023-01-24 03:08:56.643734: step: 204/459, loss: 0.02318771369755268 2023-01-24 03:08:57.236246: step: 206/459, loss: 0.0587000846862793 2023-01-24 03:08:57.881105: step: 208/459, loss: 0.008187712170183659 2023-01-24 03:08:58.514200: step: 210/459, loss: 0.03400835022330284 2023-01-24 03:08:59.195639: step: 212/459, loss: 0.0014897116925567389 2023-01-24 03:08:59.761964: step: 214/459, loss: 0.014374100603163242 2023-01-24 03:09:00.369262: step: 216/459, loss: 0.0776720643043518 2023-01-24 03:09:00.992071: step: 218/459, loss: 0.0459483377635479 2023-01-24 03:09:01.558241: step: 220/459, loss: 0.02061666175723076 2023-01-24 03:09:02.177655: step: 222/459, loss: 0.009370929561555386 2023-01-24 03:09:02.816260: step: 224/459, loss: 0.0004158112278673798 2023-01-24 03:09:03.454892: step: 226/459, loss: 0.0017286782385781407 2023-01-24 03:09:04.174630: step: 228/459, loss: 0.004778590518981218 2023-01-24 03:09:04.902554: step: 230/459, loss: 0.05086071789264679 2023-01-24 03:09:05.572532: step: 232/459, loss: 0.020087067037820816 2023-01-24 03:09:06.207155: step: 234/459, loss: 0.31494638323783875 2023-01-24 03:09:06.806696: step: 236/459, loss: 0.054261013865470886 2023-01-24 03:09:07.493567: step: 238/459, loss: 0.07409507036209106 2023-01-24 03:09:08.167024: step: 240/459, loss: 0.024352839216589928 2023-01-24 03:09:08.812817: step: 242/459, loss: 0.05584336072206497 2023-01-24 03:09:09.467432: step: 244/459, loss: 0.012522068805992603 2023-01-24 03:09:10.053259: step: 246/459, loss: 0.04731316119432449 2023-01-24 03:09:10.642513: step: 248/459, loss: 0.29467928409576416 2023-01-24 03:09:11.292940: step: 250/459, loss: 0.0060015772469341755 2023-01-24 03:09:11.923798: step: 252/459, loss: 0.04067930951714516 2023-01-24 03:09:12.528506: step: 254/459, loss: 0.0612657368183136 2023-01-24 03:09:13.195104: step: 256/459, loss: 0.022184152156114578 2023-01-24 03:09:13.866000: step: 258/459, loss: 0.0013557913480326533 2023-01-24 03:09:14.464083: step: 260/459, loss: 0.059288617223501205 2023-01-24 03:09:15.108168: step: 262/459, loss: 0.028721148148179054 2023-01-24 03:09:15.752434: step: 264/459, loss: 0.14230334758758545 2023-01-24 03:09:16.431043: step: 266/459, loss: 0.3962092697620392 2023-01-24 03:09:17.054136: step: 268/459, loss: 0.02614002116024494 2023-01-24 03:09:17.658945: step: 270/459, loss: 0.04986745864152908 2023-01-24 03:09:18.225928: step: 272/459, loss: 0.06459859013557434 2023-01-24 03:09:18.846205: step: 274/459, loss: 0.004300311673432589 2023-01-24 03:09:19.601577: step: 276/459, loss: 0.03307301178574562 2023-01-24 03:09:20.208253: step: 278/459, loss: 0.05898265540599823 2023-01-24 03:09:20.864717: step: 280/459, loss: 0.03787539526820183 2023-01-24 03:09:21.643273: step: 282/459, loss: 0.03171912580728531 2023-01-24 03:09:22.361451: step: 284/459, loss: 0.0003501899482216686 2023-01-24 03:09:22.910522: step: 286/459, loss: 0.00041948334546759725 2023-01-24 03:09:23.545330: step: 288/459, loss: 0.03122500330209732 2023-01-24 03:09:24.237683: step: 290/459, loss: 0.040295615792274475 2023-01-24 03:09:24.795990: step: 292/459, loss: 0.00868302583694458 2023-01-24 03:09:25.378039: step: 294/459, loss: 0.04625697433948517 2023-01-24 03:09:26.017325: step: 296/459, loss: 0.016673065721988678 2023-01-24 03:09:26.623174: step: 298/459, loss: 0.03131312504410744 2023-01-24 03:09:27.322733: step: 300/459, loss: 0.018722662702202797 2023-01-24 03:09:27.952740: step: 302/459, loss: 0.009431843645870686 2023-01-24 03:09:28.533633: step: 304/459, loss: 0.021097024902701378 2023-01-24 03:09:29.175315: step: 306/459, loss: 0.06032784655690193 2023-01-24 03:09:29.876896: step: 308/459, loss: 0.10916054993867874 2023-01-24 03:09:30.522133: step: 310/459, loss: 0.004455937072634697 2023-01-24 03:09:31.126453: step: 312/459, loss: 0.11939935386180878 2023-01-24 03:09:31.665947: step: 314/459, loss: 0.003264583647251129 2023-01-24 03:09:32.297831: step: 316/459, loss: 0.04391702637076378 2023-01-24 03:09:32.944499: step: 318/459, loss: 0.03171393275260925 2023-01-24 03:09:33.632832: step: 320/459, loss: 0.1827494353055954 2023-01-24 03:09:34.239945: step: 322/459, loss: 0.028103383257985115 2023-01-24 03:09:34.871423: step: 324/459, loss: 0.06913299113512039 2023-01-24 03:09:35.437870: step: 326/459, loss: 0.061729658395051956 2023-01-24 03:09:36.050585: step: 328/459, loss: 0.028941761702299118 2023-01-24 03:09:36.699529: step: 330/459, loss: 0.05714092776179314 2023-01-24 03:09:37.323695: step: 332/459, loss: 0.05826515704393387 2023-01-24 03:09:37.877551: step: 334/459, loss: 0.0005865083076059818 2023-01-24 03:09:38.500969: step: 336/459, loss: 0.050111446529626846 2023-01-24 03:09:39.165784: step: 338/459, loss: 0.10692689567804337 2023-01-24 03:09:39.780771: step: 340/459, loss: 0.009429940953850746 2023-01-24 03:09:40.389823: step: 342/459, loss: 0.02361828275024891 2023-01-24 03:09:40.982482: step: 344/459, loss: 0.008266150020062923 2023-01-24 03:09:41.568062: step: 346/459, loss: 0.14400959014892578 2023-01-24 03:09:42.188436: step: 348/459, loss: 0.048776958137750626 2023-01-24 03:09:42.927177: step: 350/459, loss: 0.01415421161800623 2023-01-24 03:09:43.497036: step: 352/459, loss: 0.03893504664301872 2023-01-24 03:09:44.091952: step: 354/459, loss: 0.04977792873978615 2023-01-24 03:09:44.661625: step: 356/459, loss: 0.02060580812394619 2023-01-24 03:09:45.274061: step: 358/459, loss: 0.003369077807292342 2023-01-24 03:09:45.928481: step: 360/459, loss: 0.012398520484566689 2023-01-24 03:09:46.583777: step: 362/459, loss: 0.014027726836502552 2023-01-24 03:09:47.201760: step: 364/459, loss: 0.019286058843135834 2023-01-24 03:09:47.815022: step: 366/459, loss: 0.013533351942896843 2023-01-24 03:09:48.389862: step: 368/459, loss: 0.06140705943107605 2023-01-24 03:09:49.034929: step: 370/459, loss: 0.02852269448339939 2023-01-24 03:09:49.644548: step: 372/459, loss: 0.008612419478595257 2023-01-24 03:09:50.261740: step: 374/459, loss: 0.04605749994516373 2023-01-24 03:09:50.905262: step: 376/459, loss: 0.04465643689036369 2023-01-24 03:09:51.586064: step: 378/459, loss: 0.023301949724555016 2023-01-24 03:09:52.308354: step: 380/459, loss: 0.03612947836518288 2023-01-24 03:09:52.929616: step: 382/459, loss: 0.011445372365415096 2023-01-24 03:09:53.578632: step: 384/459, loss: 0.023284701630473137 2023-01-24 03:09:54.127653: step: 386/459, loss: 0.12924061715602875 2023-01-24 03:09:54.855536: step: 388/459, loss: 0.02285889722406864 2023-01-24 03:09:55.419523: step: 390/459, loss: 0.011584452353417873 2023-01-24 03:09:56.042381: step: 392/459, loss: 0.003107182215899229 2023-01-24 03:09:56.661319: step: 394/459, loss: 0.4069228172302246 2023-01-24 03:09:57.243560: step: 396/459, loss: 0.09250759333372116 2023-01-24 03:09:57.861595: step: 398/459, loss: 0.07207503914833069 2023-01-24 03:09:58.462615: step: 400/459, loss: 0.0014593499945476651 2023-01-24 03:09:59.079849: step: 402/459, loss: 0.007777317427098751 2023-01-24 03:09:59.737823: step: 404/459, loss: 0.04566318169236183 2023-01-24 03:10:00.391435: step: 406/459, loss: 0.09402351826429367 2023-01-24 03:10:00.986567: step: 408/459, loss: 0.1461246758699417 2023-01-24 03:10:01.669286: step: 410/459, loss: 0.05638214573264122 2023-01-24 03:10:02.282774: step: 412/459, loss: 0.00045496143866330385 2023-01-24 03:10:02.861629: step: 414/459, loss: 0.08338192105293274 2023-01-24 03:10:03.457894: step: 416/459, loss: 0.02036847360432148 2023-01-24 03:10:04.080342: step: 418/459, loss: 0.0394730307161808 2023-01-24 03:10:04.701179: step: 420/459, loss: 0.05334838852286339 2023-01-24 03:10:05.294145: step: 422/459, loss: 0.022431373596191406 2023-01-24 03:10:05.871942: step: 424/459, loss: 0.03422519192099571 2023-01-24 03:10:06.444420: step: 426/459, loss: 0.048453982919454575 2023-01-24 03:10:07.040294: step: 428/459, loss: 0.006252637133002281 2023-01-24 03:10:07.642617: step: 430/459, loss: 0.06393507122993469 2023-01-24 03:10:08.247133: step: 432/459, loss: 0.010130088776350021 2023-01-24 03:10:08.889801: step: 434/459, loss: 0.009082221426069736 2023-01-24 03:10:09.519368: step: 436/459, loss: 0.018906759098172188 2023-01-24 03:10:10.148442: step: 438/459, loss: 0.10080009698867798 2023-01-24 03:10:10.729069: step: 440/459, loss: 0.006812764797359705 2023-01-24 03:10:11.327554: step: 442/459, loss: 0.009435986168682575 2023-01-24 03:10:11.931514: step: 444/459, loss: 5.70089864730835 2023-01-24 03:10:12.578851: step: 446/459, loss: 0.17131195962429047 2023-01-24 03:10:13.220795: step: 448/459, loss: 0.0008385840337723494 2023-01-24 03:10:13.821695: step: 450/459, loss: 0.024510841816663742 2023-01-24 03:10:14.486832: step: 452/459, loss: 0.04350704699754715 2023-01-24 03:10:15.141232: step: 454/459, loss: 0.06142185255885124 2023-01-24 03:10:15.719322: step: 456/459, loss: 0.6158235669136047 2023-01-24 03:10:16.343308: step: 458/459, loss: 0.03443772718310356 2023-01-24 03:10:16.973785: step: 460/459, loss: 0.04986957088112831 2023-01-24 03:10:17.552701: step: 462/459, loss: 0.03611331805586815 2023-01-24 03:10:18.157533: step: 464/459, loss: 0.00451182946562767 2023-01-24 03:10:18.791312: step: 466/459, loss: 0.001789797912351787 2023-01-24 03:10:19.464585: step: 468/459, loss: 0.043210726231336594 2023-01-24 03:10:20.070558: step: 470/459, loss: 0.04016966000199318 2023-01-24 03:10:20.666832: step: 472/459, loss: 0.014288142323493958 2023-01-24 03:10:21.278845: step: 474/459, loss: 0.039524901658296585 2023-01-24 03:10:21.936620: step: 476/459, loss: 0.028552699834108353 2023-01-24 03:10:22.586787: step: 478/459, loss: 0.016398921608924866 2023-01-24 03:10:23.234074: step: 480/459, loss: 0.02668628841638565 2023-01-24 03:10:23.799328: step: 482/459, loss: 0.051937248557806015 2023-01-24 03:10:24.443669: step: 484/459, loss: 0.022669747471809387 2023-01-24 03:10:25.069409: step: 486/459, loss: 0.0058940318413078785 2023-01-24 03:10:25.681169: step: 488/459, loss: 0.014630277641117573 2023-01-24 03:10:26.307135: step: 490/459, loss: 0.008006680756807327 2023-01-24 03:10:26.962516: step: 492/459, loss: 0.0028307947795838118 2023-01-24 03:10:27.664046: step: 494/459, loss: 0.06222004070878029 2023-01-24 03:10:28.286165: step: 496/459, loss: 0.028488805517554283 2023-01-24 03:10:28.860321: step: 498/459, loss: 0.06100616976618767 2023-01-24 03:10:29.431586: step: 500/459, loss: 0.03569671884179115 2023-01-24 03:10:30.094974: step: 502/459, loss: 0.020637674257159233 2023-01-24 03:10:30.692533: step: 504/459, loss: 0.12021584808826447 2023-01-24 03:10:31.284161: step: 506/459, loss: 0.06449054181575775 2023-01-24 03:10:31.916460: step: 508/459, loss: 0.03413844108581543 2023-01-24 03:10:32.541627: step: 510/459, loss: 0.03448684141039848 2023-01-24 03:10:33.190933: step: 512/459, loss: 0.044294316321611404 2023-01-24 03:10:33.764510: step: 514/459, loss: 0.05125962570309639 2023-01-24 03:10:34.418241: step: 516/459, loss: 0.03149069845676422 2023-01-24 03:10:34.999694: step: 518/459, loss: 0.0058783190324902534 2023-01-24 03:10:35.708932: step: 520/459, loss: 0.09701365232467651 2023-01-24 03:10:36.310836: step: 522/459, loss: 0.0006825666641816497 2023-01-24 03:10:36.942423: step: 524/459, loss: 0.02728365547955036 2023-01-24 03:10:37.572458: step: 526/459, loss: 0.024759534746408463 2023-01-24 03:10:38.408758: step: 528/459, loss: 0.5194379091262817 2023-01-24 03:10:39.028947: step: 530/459, loss: 0.056541379541158676 2023-01-24 03:10:39.663312: step: 532/459, loss: 0.06026920676231384 2023-01-24 03:10:40.295338: step: 534/459, loss: 0.03185949847102165 2023-01-24 03:10:40.843972: step: 536/459, loss: 0.005434154532849789 2023-01-24 03:10:41.453489: step: 538/459, loss: 0.026041513308882713 2023-01-24 03:10:42.026742: step: 540/459, loss: 0.002870888216421008 2023-01-24 03:10:42.639120: step: 542/459, loss: 0.03224144130945206 2023-01-24 03:10:43.252069: step: 544/459, loss: 0.02683592215180397 2023-01-24 03:10:43.907505: step: 546/459, loss: 0.01986636221408844 2023-01-24 03:10:44.641664: step: 548/459, loss: 0.07605239748954773 2023-01-24 03:10:45.240414: step: 550/459, loss: 0.061058610677719116 2023-01-24 03:10:45.907948: step: 552/459, loss: 0.009528577327728271 2023-01-24 03:10:46.539022: step: 554/459, loss: 0.07183293998241425 2023-01-24 03:10:47.070218: step: 556/459, loss: 0.048349618911743164 2023-01-24 03:10:47.666286: step: 558/459, loss: 0.010747154243290424 2023-01-24 03:10:48.283876: step: 560/459, loss: 0.07887736707925797 2023-01-24 03:10:48.921227: step: 562/459, loss: 0.021414436399936676 2023-01-24 03:10:49.561147: step: 564/459, loss: 0.01845194585621357 2023-01-24 03:10:50.203588: step: 566/459, loss: 0.028495628386735916 2023-01-24 03:10:50.831956: step: 568/459, loss: 0.02916407585144043 2023-01-24 03:10:51.519415: step: 570/459, loss: 0.01907695084810257 2023-01-24 03:10:52.095145: step: 572/459, loss: 0.14985351264476776 2023-01-24 03:10:52.741328: step: 574/459, loss: 0.05608009919524193 2023-01-24 03:10:53.342038: step: 576/459, loss: 0.09456975013017654 2023-01-24 03:10:53.954474: step: 578/459, loss: 0.00947211030870676 2023-01-24 03:10:54.627715: step: 580/459, loss: 0.01872856356203556 2023-01-24 03:10:55.244110: step: 582/459, loss: 0.02130456455051899 2023-01-24 03:10:55.894405: step: 584/459, loss: 0.03919132053852081 2023-01-24 03:10:56.569947: step: 586/459, loss: 0.014082208275794983 2023-01-24 03:10:57.221650: step: 588/459, loss: 0.009253296069800854 2023-01-24 03:10:57.835628: step: 590/459, loss: 0.013155353255569935 2023-01-24 03:10:58.431239: step: 592/459, loss: 0.029187696054577827 2023-01-24 03:10:59.092065: step: 594/459, loss: 0.0272873155772686 2023-01-24 03:10:59.742785: step: 596/459, loss: 0.02447090670466423 2023-01-24 03:11:00.361604: step: 598/459, loss: 0.006370871793478727 2023-01-24 03:11:01.010555: step: 600/459, loss: 0.014135938137769699 2023-01-24 03:11:01.605606: step: 602/459, loss: 0.02236557938158512 2023-01-24 03:11:02.260884: step: 604/459, loss: 0.04803217574954033 2023-01-24 03:11:02.865726: step: 606/459, loss: 0.10313215851783752 2023-01-24 03:11:03.432660: step: 608/459, loss: 0.02249648980796337 2023-01-24 03:11:04.030562: step: 610/459, loss: 0.008242115378379822 2023-01-24 03:11:04.635415: step: 612/459, loss: 0.002743144752457738 2023-01-24 03:11:05.281628: step: 614/459, loss: 0.4218192994594574 2023-01-24 03:11:05.819399: step: 616/459, loss: 0.007832827977836132 2023-01-24 03:11:06.452899: step: 618/459, loss: 0.06065986305475235 2023-01-24 03:11:07.130233: step: 620/459, loss: 0.029821669682860374 2023-01-24 03:11:07.749904: step: 622/459, loss: 0.06417291611433029 2023-01-24 03:11:08.332523: step: 624/459, loss: 0.043363358825445175 2023-01-24 03:11:08.888195: step: 626/459, loss: 0.057326704263687134 2023-01-24 03:11:09.527355: step: 628/459, loss: 0.018056752160191536 2023-01-24 03:11:10.063546: step: 630/459, loss: 0.041670188307762146 2023-01-24 03:11:10.616085: step: 632/459, loss: 0.07012314349412918 2023-01-24 03:11:11.245542: step: 634/459, loss: 0.06850945949554443 2023-01-24 03:11:11.822711: step: 636/459, loss: 0.037699200212955475 2023-01-24 03:11:12.434566: step: 638/459, loss: 0.01502014696598053 2023-01-24 03:11:13.038450: step: 640/459, loss: 0.023474959656596184 2023-01-24 03:11:13.676476: step: 642/459, loss: 0.03537876158952713 2023-01-24 03:11:14.289125: step: 644/459, loss: 0.016897203400731087 2023-01-24 03:11:14.951202: step: 646/459, loss: 0.0292756836861372 2023-01-24 03:11:15.593526: step: 648/459, loss: 0.06944812834262848 2023-01-24 03:11:16.234708: step: 650/459, loss: 0.03420393913984299 2023-01-24 03:11:16.816952: step: 652/459, loss: 0.0118078188970685 2023-01-24 03:11:17.432133: step: 654/459, loss: 0.02428813837468624 2023-01-24 03:11:17.999914: step: 656/459, loss: 0.05096840858459473 2023-01-24 03:11:18.632189: step: 658/459, loss: 0.9624965190887451 2023-01-24 03:11:19.198071: step: 660/459, loss: 0.03736297786235809 2023-01-24 03:11:19.865070: step: 662/459, loss: 0.01599983684718609 2023-01-24 03:11:20.494902: step: 664/459, loss: 0.016367081552743912 2023-01-24 03:11:21.092515: step: 666/459, loss: 0.017232978716492653 2023-01-24 03:11:21.673330: step: 668/459, loss: 0.007442761678248644 2023-01-24 03:11:22.254842: step: 670/459, loss: 0.015427838079631329 2023-01-24 03:11:22.912882: step: 672/459, loss: 0.014141736552119255 2023-01-24 03:11:23.512031: step: 674/459, loss: 0.0010727753397077322 2023-01-24 03:11:24.137139: step: 676/459, loss: 0.027712058275938034 2023-01-24 03:11:24.756461: step: 678/459, loss: 0.016035310924053192 2023-01-24 03:11:25.405948: step: 680/459, loss: 0.04117847979068756 2023-01-24 03:11:26.073658: step: 682/459, loss: 0.033397093415260315 2023-01-24 03:11:26.708817: step: 684/459, loss: 0.048223089426755905 2023-01-24 03:11:27.301473: step: 686/459, loss: 0.023286592215299606 2023-01-24 03:11:27.939186: step: 688/459, loss: 0.08660247921943665 2023-01-24 03:11:28.594321: step: 690/459, loss: 0.015764078125357628 2023-01-24 03:11:29.214871: step: 692/459, loss: 0.010379526764154434 2023-01-24 03:11:29.844249: step: 694/459, loss: 0.07044290751218796 2023-01-24 03:11:30.424686: step: 696/459, loss: 0.024718573316931725 2023-01-24 03:11:31.040833: step: 698/459, loss: 0.013958727940917015 2023-01-24 03:11:31.690477: step: 700/459, loss: 0.020691409707069397 2023-01-24 03:11:32.280808: step: 702/459, loss: 0.053177159279584885 2023-01-24 03:11:32.839178: step: 704/459, loss: 0.014590537175536156 2023-01-24 03:11:33.405663: step: 706/459, loss: 0.022194186225533485 2023-01-24 03:11:34.035686: step: 708/459, loss: 0.09726305305957794 2023-01-24 03:11:34.651332: step: 710/459, loss: 0.029884738847613335 2023-01-24 03:11:35.248646: step: 712/459, loss: 0.012213336303830147 2023-01-24 03:11:35.873121: step: 714/459, loss: 0.03998882696032524 2023-01-24 03:11:36.473457: step: 716/459, loss: 0.008655304089188576 2023-01-24 03:11:37.050301: step: 718/459, loss: 0.03752322867512703 2023-01-24 03:11:37.679241: step: 720/459, loss: 0.018633102998137474 2023-01-24 03:11:38.298624: step: 722/459, loss: 0.0176489669829607 2023-01-24 03:11:38.930938: step: 724/459, loss: 0.07837057113647461 2023-01-24 03:11:39.567439: step: 726/459, loss: 0.2509816884994507 2023-01-24 03:11:40.160932: step: 728/459, loss: 0.004065878689289093 2023-01-24 03:11:40.833155: step: 730/459, loss: 0.43644073605537415 2023-01-24 03:11:41.398406: step: 732/459, loss: 0.012617748230695724 2023-01-24 03:11:41.943145: step: 734/459, loss: 0.012882011011242867 2023-01-24 03:11:42.581081: step: 736/459, loss: 0.004973193630576134 2023-01-24 03:11:43.191556: step: 738/459, loss: 0.12600268423557281 2023-01-24 03:11:43.732790: step: 740/459, loss: 0.02129543572664261 2023-01-24 03:11:44.351243: step: 742/459, loss: 0.05281908065080643 2023-01-24 03:11:45.028224: step: 744/459, loss: 0.003261513775214553 2023-01-24 03:11:45.613604: step: 746/459, loss: 0.003048662096261978 2023-01-24 03:11:46.196944: step: 748/459, loss: 0.017289508134126663 2023-01-24 03:11:46.755567: step: 750/459, loss: 0.0006241542869247496 2023-01-24 03:11:47.372191: step: 752/459, loss: 0.003855894086882472 2023-01-24 03:11:48.001786: step: 754/459, loss: 0.5510916113853455 2023-01-24 03:11:48.673552: step: 756/459, loss: 0.19279171526432037 2023-01-24 03:11:49.339630: step: 758/459, loss: 0.021178679540753365 2023-01-24 03:11:49.947846: step: 760/459, loss: 0.11108595877885818 2023-01-24 03:11:50.545031: step: 762/459, loss: 0.016490718349814415 2023-01-24 03:11:51.188447: step: 764/459, loss: 0.0016178919468075037 2023-01-24 03:11:51.801674: step: 766/459, loss: 0.013383123092353344 2023-01-24 03:11:52.485326: step: 768/459, loss: 0.06851726025342941 2023-01-24 03:11:53.139246: step: 770/459, loss: 0.049837637692689896 2023-01-24 03:11:53.760032: step: 772/459, loss: 0.048428960144519806 2023-01-24 03:11:54.482161: step: 774/459, loss: 0.055801160633563995 2023-01-24 03:11:55.083553: step: 776/459, loss: 0.05313515663146973 2023-01-24 03:11:55.835183: step: 778/459, loss: 0.03788686916232109 2023-01-24 03:11:56.422837: step: 780/459, loss: 0.1290333867073059 2023-01-24 03:11:57.065213: step: 782/459, loss: 0.5827583074569702 2023-01-24 03:11:57.652166: step: 784/459, loss: 0.020679693669080734 2023-01-24 03:11:58.264385: step: 786/459, loss: 0.010171031579375267 2023-01-24 03:11:58.855900: step: 788/459, loss: 0.02044077217578888 2023-01-24 03:11:59.527745: step: 790/459, loss: 0.05421074479818344 2023-01-24 03:12:00.181218: step: 792/459, loss: 0.027420299127697945 2023-01-24 03:12:00.813001: step: 794/459, loss: 0.005878721363842487 2023-01-24 03:12:01.479970: step: 796/459, loss: 0.043300364166498184 2023-01-24 03:12:02.160058: step: 798/459, loss: 0.019296247512102127 2023-01-24 03:12:02.741212: step: 800/459, loss: 0.05621872842311859 2023-01-24 03:12:03.388724: step: 802/459, loss: 0.02777577005326748 2023-01-24 03:12:03.995735: step: 804/459, loss: 0.02750622108578682 2023-01-24 03:12:04.651524: step: 806/459, loss: 0.016331063583493233 2023-01-24 03:12:05.239707: step: 808/459, loss: 0.021760761737823486 2023-01-24 03:12:05.849312: step: 810/459, loss: 0.20416998863220215 2023-01-24 03:12:06.428217: step: 812/459, loss: 0.054538436233997345 2023-01-24 03:12:07.048149: step: 814/459, loss: 0.0017371996073052287 2023-01-24 03:12:07.654019: step: 816/459, loss: 0.13437427580356598 2023-01-24 03:12:08.321156: step: 818/459, loss: 0.06472614407539368 2023-01-24 03:12:08.950034: step: 820/459, loss: 0.046864211559295654 2023-01-24 03:12:09.571847: step: 822/459, loss: 0.06258804351091385 2023-01-24 03:12:10.180097: step: 824/459, loss: 0.003928331192582846 2023-01-24 03:12:10.769152: step: 826/459, loss: 0.003194620134308934 2023-01-24 03:12:11.334176: step: 828/459, loss: 0.010019319131970406 2023-01-24 03:12:12.042515: step: 830/459, loss: 0.17417244613170624 2023-01-24 03:12:12.660841: step: 832/459, loss: 0.021027076989412308 2023-01-24 03:12:13.299846: step: 834/459, loss: 0.014127434231340885 2023-01-24 03:12:13.894933: step: 836/459, loss: 0.40882131457328796 2023-01-24 03:12:14.579684: step: 838/459, loss: 0.025979915633797646 2023-01-24 03:12:15.204080: step: 840/459, loss: 0.026898259297013283 2023-01-24 03:12:15.780334: step: 842/459, loss: 0.019330328330397606 2023-01-24 03:12:16.356272: step: 844/459, loss: 0.03414809703826904 2023-01-24 03:12:16.950995: step: 846/459, loss: 0.09765846282243729 2023-01-24 03:12:17.498578: step: 848/459, loss: 0.010261639021337032 2023-01-24 03:12:18.129683: step: 850/459, loss: 0.008653284050524235 2023-01-24 03:12:18.713877: step: 852/459, loss: 0.02391485683619976 2023-01-24 03:12:19.340902: step: 854/459, loss: 0.047996871173381805 2023-01-24 03:12:19.982647: step: 856/459, loss: 0.6229890584945679 2023-01-24 03:12:20.533229: step: 858/459, loss: 0.06625106185674667 2023-01-24 03:12:21.146913: step: 860/459, loss: 0.1244526207447052 2023-01-24 03:12:21.772816: step: 862/459, loss: 0.004744629841297865 2023-01-24 03:12:22.370347: step: 864/459, loss: 0.06248527765274048 2023-01-24 03:12:22.973381: step: 866/459, loss: 0.67527174949646 2023-01-24 03:12:23.547970: step: 868/459, loss: 0.01839326322078705 2023-01-24 03:12:24.156028: step: 870/459, loss: 0.05565038323402405 2023-01-24 03:12:24.790796: step: 872/459, loss: 0.006415348034352064 2023-01-24 03:12:25.461588: step: 874/459, loss: 0.02685188315808773 2023-01-24 03:12:26.014602: step: 876/459, loss: 0.08020953088998795 2023-01-24 03:12:26.555609: step: 878/459, loss: 0.008596185594797134 2023-01-24 03:12:27.165017: step: 880/459, loss: 0.035188306123018265 2023-01-24 03:12:27.810703: step: 882/459, loss: 0.08994080871343613 2023-01-24 03:12:28.439605: step: 884/459, loss: 0.03226645290851593 2023-01-24 03:12:29.035063: step: 886/459, loss: 0.049872033298015594 2023-01-24 03:12:29.541768: step: 888/459, loss: 0.030570831149816513 2023-01-24 03:12:30.155267: step: 890/459, loss: 0.013651244342327118 2023-01-24 03:12:30.736498: step: 892/459, loss: 0.006856768392026424 2023-01-24 03:12:31.368544: step: 894/459, loss: 0.011136576533317566 2023-01-24 03:12:31.963789: step: 896/459, loss: 0.004013929981738329 2023-01-24 03:12:32.612299: step: 898/459, loss: 0.056324101984500885 2023-01-24 03:12:33.236971: step: 900/459, loss: 0.021798279136419296 2023-01-24 03:12:33.871194: step: 902/459, loss: 0.06185561791062355 2023-01-24 03:12:34.432173: step: 904/459, loss: 0.01962503418326378 2023-01-24 03:12:35.036083: step: 906/459, loss: 0.009647276252508163 2023-01-24 03:12:35.683054: step: 908/459, loss: 0.0015114904381334782 2023-01-24 03:12:36.347159: step: 910/459, loss: 0.010147901251912117 2023-01-24 03:12:36.901079: step: 912/459, loss: 0.03790364786982536 2023-01-24 03:12:37.552766: step: 914/459, loss: 0.0734710544347763 2023-01-24 03:12:38.223795: step: 916/459, loss: 0.2379118800163269 2023-01-24 03:12:38.823738: step: 918/459, loss: 0.09945216029882431 2023-01-24 03:12:39.283897: step: 920/459, loss: 0.0 ================================================== Loss: 0.074 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3402360194561384, 'r': 0.3337799280053578, 'f1': 0.33697705375253556}, 'combined': 0.24829888171239461, 'epoch': 25} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34197519823675704, 'r': 0.3037361533430106, 'f1': 0.32172341711825864}, 'combined': 0.20590298695568549, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34221293385135526, 'r': 0.3318231673587145, 'f1': 0.3369379753334153}, 'combined': 0.24827008708777967, 'epoch': 25} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.35447234878779005, 'r': 0.3170235396262384, 'f1': 0.3347036985669325}, 'combined': 0.2399762367083667, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2993421052631579, 'r': 0.325, 'f1': 0.3116438356164384}, 'combined': 0.2077625570776256, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.35714285714285715, 'r': 0.1724137931034483, 'f1': 0.23255813953488377}, 'combined': 0.1550387596899225, 'epoch': 25} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 26 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:15:22.076936: step: 2/459, loss: 0.1422441601753235 2023-01-24 03:15:22.739174: step: 4/459, loss: 0.016885315999388695 2023-01-24 03:15:23.350361: step: 6/459, loss: 0.03840169683098793 2023-01-24 03:15:23.942727: step: 8/459, loss: 0.021595049649477005 2023-01-24 03:15:24.542450: step: 10/459, loss: 0.09617199003696442 2023-01-24 03:15:25.145723: step: 12/459, loss: 0.042136747390031815 2023-01-24 03:15:25.797424: step: 14/459, loss: 0.014703473076224327 2023-01-24 03:15:26.435208: step: 16/459, loss: 0.026183463633060455 2023-01-24 03:15:27.031068: step: 18/459, loss: 0.13306155800819397 2023-01-24 03:15:27.625647: step: 20/459, loss: 0.6956260800361633 2023-01-24 03:15:28.238543: step: 22/459, loss: 0.18988600373268127 2023-01-24 03:15:28.828226: step: 24/459, loss: 0.08580126613378525 2023-01-24 03:15:29.486375: step: 26/459, loss: 0.012675904668867588 2023-01-24 03:15:30.147528: step: 28/459, loss: 0.11143918335437775 2023-01-24 03:15:30.742509: step: 30/459, loss: 0.08260030299425125 2023-01-24 03:15:31.374337: step: 32/459, loss: 0.05464771389961243 2023-01-24 03:15:32.055643: step: 34/459, loss: 0.08189649134874344 2023-01-24 03:15:32.602760: step: 36/459, loss: 0.005170625168830156 2023-01-24 03:15:33.168075: step: 38/459, loss: 0.003334174631163478 2023-01-24 03:15:33.832332: step: 40/459, loss: 0.029679886996746063 2023-01-24 03:15:34.398352: step: 42/459, loss: 0.02735856920480728 2023-01-24 03:15:35.056524: step: 44/459, loss: 0.04596854746341705 2023-01-24 03:15:35.592285: step: 46/459, loss: 0.16916200518608093 2023-01-24 03:15:36.225378: step: 48/459, loss: 0.0060810851864516735 2023-01-24 03:15:36.915257: step: 50/459, loss: 0.057463835924863815 2023-01-24 03:15:37.587741: step: 52/459, loss: 0.015039091929793358 2023-01-24 03:15:38.278536: step: 54/459, loss: 0.03983640670776367 2023-01-24 03:15:38.862929: step: 56/459, loss: 0.007858806289732456 2023-01-24 03:15:39.519875: step: 58/459, loss: 0.02921491675078869 2023-01-24 03:15:40.167786: step: 60/459, loss: 0.0333179272711277 2023-01-24 03:15:40.725904: step: 62/459, loss: 0.02725295163691044 2023-01-24 03:15:41.329336: step: 64/459, loss: 0.007865123450756073 2023-01-24 03:15:41.997780: step: 66/459, loss: 0.013261569663882256 2023-01-24 03:15:42.676045: step: 68/459, loss: 0.002490371000021696 2023-01-24 03:15:43.315166: step: 70/459, loss: 0.028915653005242348 2023-01-24 03:15:43.905943: step: 72/459, loss: 0.028395120054483414 2023-01-24 03:15:44.642218: step: 74/459, loss: 0.08704543858766556 2023-01-24 03:15:45.219564: step: 76/459, loss: 0.016767792403697968 2023-01-24 03:15:45.808366: step: 78/459, loss: 0.02212388813495636 2023-01-24 03:15:46.439847: step: 80/459, loss: 0.003484518500044942 2023-01-24 03:15:47.059931: step: 82/459, loss: 0.014746024273335934 2023-01-24 03:15:47.711546: step: 84/459, loss: 0.06327394396066666 2023-01-24 03:15:48.357061: step: 86/459, loss: 0.00871256459504366 2023-01-24 03:15:48.997123: step: 88/459, loss: 0.03947232663631439 2023-01-24 03:15:49.606217: step: 90/459, loss: 0.007579606957733631 2023-01-24 03:15:50.248009: step: 92/459, loss: 0.00972017738968134 2023-01-24 03:15:50.863315: step: 94/459, loss: 0.05947175621986389 2023-01-24 03:15:51.496081: step: 96/459, loss: 0.1408722698688507 2023-01-24 03:15:52.103840: step: 98/459, loss: 0.020382346585392952 2023-01-24 03:15:52.728972: step: 100/459, loss: 0.040652427822351456 2023-01-24 03:15:53.370592: step: 102/459, loss: 0.019772587344050407 2023-01-24 03:15:53.989550: step: 104/459, loss: 0.021744081750512123 2023-01-24 03:15:54.626816: step: 106/459, loss: 0.0066427309066057205 2023-01-24 03:15:55.238025: step: 108/459, loss: 0.0086956936866045 2023-01-24 03:15:55.811209: step: 110/459, loss: 0.012757133692502975 2023-01-24 03:15:56.504007: step: 112/459, loss: 0.03394777700304985 2023-01-24 03:15:57.180051: step: 114/459, loss: 0.10601895302534103 2023-01-24 03:15:57.835891: step: 116/459, loss: 0.03405428305268288 2023-01-24 03:15:58.415343: step: 118/459, loss: 0.05057007074356079 2023-01-24 03:15:59.061702: step: 120/459, loss: 0.2484120875597 2023-01-24 03:15:59.686664: step: 122/459, loss: 0.5678649544715881 2023-01-24 03:16:00.268337: step: 124/459, loss: 0.06640763580799103 2023-01-24 03:16:00.910502: step: 126/459, loss: 0.017003657296299934 2023-01-24 03:16:01.530999: step: 128/459, loss: 0.02077656053006649 2023-01-24 03:16:02.168806: step: 130/459, loss: 0.04439305514097214 2023-01-24 03:16:02.866415: step: 132/459, loss: 0.033613674342632294 2023-01-24 03:16:03.454171: step: 134/459, loss: 0.014237206429243088 2023-01-24 03:16:04.057544: step: 136/459, loss: 0.010102472268044949 2023-01-24 03:16:04.659281: step: 138/459, loss: 0.04177356883883476 2023-01-24 03:16:05.248690: step: 140/459, loss: 0.007354527246206999 2023-01-24 03:16:05.832277: step: 142/459, loss: 0.03500540181994438 2023-01-24 03:16:06.435673: step: 144/459, loss: 0.40015947818756104 2023-01-24 03:16:07.023878: step: 146/459, loss: 0.1036352664232254 2023-01-24 03:16:07.718936: step: 148/459, loss: 0.18346257507801056 2023-01-24 03:16:08.270415: step: 150/459, loss: 0.005347696132957935 2023-01-24 03:16:08.806889: step: 152/459, loss: 0.032626308500766754 2023-01-24 03:16:09.438147: step: 154/459, loss: 0.06107053905725479 2023-01-24 03:16:10.033366: step: 156/459, loss: 0.025352608412504196 2023-01-24 03:16:10.676724: step: 158/459, loss: 0.3799324035644531 2023-01-24 03:16:11.302918: step: 160/459, loss: 0.04601151868700981 2023-01-24 03:16:11.977879: step: 162/459, loss: 0.011237941682338715 2023-01-24 03:16:12.661582: step: 164/459, loss: 0.24533171951770782 2023-01-24 03:16:13.303038: step: 166/459, loss: 0.06520026177167892 2023-01-24 03:16:13.900566: step: 168/459, loss: 0.06633320450782776 2023-01-24 03:16:14.472813: step: 170/459, loss: 0.00017067333101294935 2023-01-24 03:16:15.069955: step: 172/459, loss: 0.07210784405469894 2023-01-24 03:16:15.664856: step: 174/459, loss: 0.044157806783914566 2023-01-24 03:16:16.282286: step: 176/459, loss: 0.011051643639802933 2023-01-24 03:16:16.900438: step: 178/459, loss: 0.0013588590081781149 2023-01-24 03:16:17.585548: step: 180/459, loss: 0.06423481553792953 2023-01-24 03:16:18.246957: step: 182/459, loss: 0.0466291606426239 2023-01-24 03:16:18.883026: step: 184/459, loss: 0.004085285123437643 2023-01-24 03:16:19.595859: step: 186/459, loss: 0.27039840817451477 2023-01-24 03:16:20.240983: step: 188/459, loss: 0.04269331693649292 2023-01-24 03:16:20.903709: step: 190/459, loss: 0.020148865878582 2023-01-24 03:16:21.532648: step: 192/459, loss: 0.014696689322590828 2023-01-24 03:16:22.184295: step: 194/459, loss: 0.13939031958580017 2023-01-24 03:16:22.867240: step: 196/459, loss: 0.01977628841996193 2023-01-24 03:16:23.489630: step: 198/459, loss: 0.008018406108021736 2023-01-24 03:16:24.104408: step: 200/459, loss: 0.09723005443811417 2023-01-24 03:16:24.752093: step: 202/459, loss: 0.0030978943686932325 2023-01-24 03:16:25.344784: step: 204/459, loss: 0.012558147311210632 2023-01-24 03:16:26.018528: step: 206/459, loss: 0.04200837016105652 2023-01-24 03:16:26.591489: step: 208/459, loss: 0.015482449904084206 2023-01-24 03:16:27.247247: step: 210/459, loss: 0.36646732687950134 2023-01-24 03:16:27.821543: step: 212/459, loss: 0.009032505564391613 2023-01-24 03:16:28.462126: step: 214/459, loss: 0.020801298320293427 2023-01-24 03:16:29.030608: step: 216/459, loss: 0.014519883319735527 2023-01-24 03:16:29.637770: step: 218/459, loss: 0.011857947334647179 2023-01-24 03:16:30.301247: step: 220/459, loss: 0.07245239615440369 2023-01-24 03:16:30.863634: step: 222/459, loss: 0.017649272456765175 2023-01-24 03:16:31.560981: step: 224/459, loss: 0.002927989698946476 2023-01-24 03:16:32.167140: step: 226/459, loss: 0.028631998226046562 2023-01-24 03:16:32.788176: step: 228/459, loss: 0.046299196779727936 2023-01-24 03:16:33.487175: step: 230/459, loss: 0.19372546672821045 2023-01-24 03:16:34.148735: step: 232/459, loss: 7.258952617645264 2023-01-24 03:16:34.752667: step: 234/459, loss: 1.0451444387435913 2023-01-24 03:16:35.388413: step: 236/459, loss: 0.02096407860517502 2023-01-24 03:16:35.949107: step: 238/459, loss: 0.05317137762904167 2023-01-24 03:16:36.628535: step: 240/459, loss: 0.01456215139478445 2023-01-24 03:16:37.268544: step: 242/459, loss: 0.09116064012050629 2023-01-24 03:16:37.942602: step: 244/459, loss: 0.1353938728570938 2023-01-24 03:16:38.537114: step: 246/459, loss: 0.018427932634949684 2023-01-24 03:16:39.178766: step: 248/459, loss: 0.008537904359400272 2023-01-24 03:16:39.775015: step: 250/459, loss: 0.008119091391563416 2023-01-24 03:16:40.491541: step: 252/459, loss: 0.018628137186169624 2023-01-24 03:16:41.103971: step: 254/459, loss: 0.0006966710789129138 2023-01-24 03:16:41.733098: step: 256/459, loss: 0.0011007908033207059 2023-01-24 03:16:42.350032: step: 258/459, loss: 0.0015752892941236496 2023-01-24 03:16:42.972467: step: 260/459, loss: 0.018379494547843933 2023-01-24 03:16:43.595156: step: 262/459, loss: 0.03869928419589996 2023-01-24 03:16:44.215914: step: 264/459, loss: 0.027751432731747627 2023-01-24 03:16:44.775013: step: 266/459, loss: 0.03637516498565674 2023-01-24 03:16:45.409146: step: 268/459, loss: 0.011001423932611942 2023-01-24 03:16:46.033167: step: 270/459, loss: 0.02718518115580082 2023-01-24 03:16:46.634942: step: 272/459, loss: 0.013158073648810387 2023-01-24 03:16:47.173001: step: 274/459, loss: 0.0008063012501224875 2023-01-24 03:16:47.798141: step: 276/459, loss: 0.052795737981796265 2023-01-24 03:16:48.399782: step: 278/459, loss: 0.013490128330886364 2023-01-24 03:16:49.013827: step: 280/459, loss: 0.011908265762031078 2023-01-24 03:16:49.857295: step: 282/459, loss: 0.0012167422100901604 2023-01-24 03:16:50.442833: step: 284/459, loss: 0.019005587324500084 2023-01-24 03:16:51.089239: step: 286/459, loss: 0.043449629098176956 2023-01-24 03:16:51.653212: step: 288/459, loss: 0.12849226593971252 2023-01-24 03:16:52.282932: step: 290/459, loss: 0.00340620381757617 2023-01-24 03:16:52.919034: step: 292/459, loss: 0.360642671585083 2023-01-24 03:16:53.611525: step: 294/459, loss: 0.11041684448719025 2023-01-24 03:16:54.204123: step: 296/459, loss: 0.007321806624531746 2023-01-24 03:16:54.754429: step: 298/459, loss: 0.06331033259630203 2023-01-24 03:16:55.408568: step: 300/459, loss: 0.3349855840206146 2023-01-24 03:16:56.022650: step: 302/459, loss: 0.0013471068814396858 2023-01-24 03:16:56.591930: step: 304/459, loss: 0.041426025331020355 2023-01-24 03:16:57.171610: step: 306/459, loss: 0.0006674296455457807 2023-01-24 03:16:57.854618: step: 308/459, loss: 0.025098828598856926 2023-01-24 03:16:58.442274: step: 310/459, loss: 0.010328361764550209 2023-01-24 03:16:59.060827: step: 312/459, loss: 0.009369364939630032 2023-01-24 03:16:59.748433: step: 314/459, loss: 0.007751733995974064 2023-01-24 03:17:00.358119: step: 316/459, loss: 0.044490352272987366 2023-01-24 03:17:00.983139: step: 318/459, loss: 0.020658651366829872 2023-01-24 03:17:01.690261: step: 320/459, loss: 0.01201686728745699 2023-01-24 03:17:02.294231: step: 322/459, loss: 0.017232393845915794 2023-01-24 03:17:02.916471: step: 324/459, loss: 0.03610227629542351 2023-01-24 03:17:03.567158: step: 326/459, loss: 0.09616465866565704 2023-01-24 03:17:04.178130: step: 328/459, loss: 0.005608739331364632 2023-01-24 03:17:04.811701: step: 330/459, loss: 0.012651038356125355 2023-01-24 03:17:05.442967: step: 332/459, loss: 0.009895667433738708 2023-01-24 03:17:06.182247: step: 334/459, loss: 0.8439549207687378 2023-01-24 03:17:06.736933: step: 336/459, loss: 0.050672683864831924 2023-01-24 03:17:07.331367: step: 338/459, loss: 0.011645868420600891 2023-01-24 03:17:07.946993: step: 340/459, loss: 0.03774873912334442 2023-01-24 03:17:08.516034: step: 342/459, loss: 0.01200911495834589 2023-01-24 03:17:09.173360: step: 344/459, loss: 0.02200760692358017 2023-01-24 03:17:09.755476: step: 346/459, loss: 0.7505030632019043 2023-01-24 03:17:10.289527: step: 348/459, loss: 0.06258893013000488 2023-01-24 03:17:10.908542: step: 350/459, loss: 0.0072353496216237545 2023-01-24 03:17:11.431820: step: 352/459, loss: 0.0071275378577411175 2023-01-24 03:17:12.023448: step: 354/459, loss: 0.0038534144405275583 2023-01-24 03:17:12.670354: step: 356/459, loss: 0.5922170877456665 2023-01-24 03:17:13.336613: step: 358/459, loss: 0.030334696173667908 2023-01-24 03:17:13.985277: step: 360/459, loss: 0.017001435160636902 2023-01-24 03:17:14.586642: step: 362/459, loss: 0.037436652928590775 2023-01-24 03:17:15.197767: step: 364/459, loss: 0.017752638086676598 2023-01-24 03:17:15.821532: step: 366/459, loss: 0.008580637164413929 2023-01-24 03:17:16.377334: step: 368/459, loss: 0.04855544865131378 2023-01-24 03:17:17.021643: step: 370/459, loss: 0.010296863503754139 2023-01-24 03:17:17.648069: step: 372/459, loss: 0.04199517145752907 2023-01-24 03:17:18.295797: step: 374/459, loss: 0.07613563537597656 2023-01-24 03:17:18.931393: step: 376/459, loss: 0.02755732461810112 2023-01-24 03:17:19.580678: step: 378/459, loss: 0.003498704871162772 2023-01-24 03:17:20.194731: step: 380/459, loss: 0.007264209445565939 2023-01-24 03:17:20.768268: step: 382/459, loss: 0.0036568758077919483 2023-01-24 03:17:21.344868: step: 384/459, loss: 0.6703159213066101 2023-01-24 03:17:21.927745: step: 386/459, loss: 0.007300166413187981 2023-01-24 03:17:22.456637: step: 388/459, loss: 0.001492402865551412 2023-01-24 03:17:23.134278: step: 390/459, loss: 0.0856156200170517 2023-01-24 03:17:23.823081: step: 392/459, loss: 0.020506877452135086 2023-01-24 03:17:24.474295: step: 394/459, loss: 1.1165491342544556 2023-01-24 03:17:25.127543: step: 396/459, loss: 0.04028264433145523 2023-01-24 03:17:25.774663: step: 398/459, loss: 0.06345942616462708 2023-01-24 03:17:26.432892: step: 400/459, loss: 0.04579631984233856 2023-01-24 03:17:27.048147: step: 402/459, loss: 0.035727597773075104 2023-01-24 03:17:27.596477: step: 404/459, loss: 0.020515870302915573 2023-01-24 03:17:28.235123: step: 406/459, loss: 0.01603621058166027 2023-01-24 03:17:28.865163: step: 408/459, loss: 0.024931011721491814 2023-01-24 03:17:29.514830: step: 410/459, loss: 0.028469184413552284 2023-01-24 03:17:30.135166: step: 412/459, loss: 0.23935428261756897 2023-01-24 03:17:30.751386: step: 414/459, loss: 0.06881818920373917 2023-01-24 03:17:31.389327: step: 416/459, loss: 0.8405499458312988 2023-01-24 03:17:31.972319: step: 418/459, loss: 0.05787065997719765 2023-01-24 03:17:32.581828: step: 420/459, loss: 0.061185240745544434 2023-01-24 03:17:33.193641: step: 422/459, loss: 0.01699361391365528 2023-01-24 03:17:33.912701: step: 424/459, loss: 0.03154866769909859 2023-01-24 03:17:34.536133: step: 426/459, loss: 0.05484114959836006 2023-01-24 03:17:35.179739: step: 428/459, loss: 0.16244639456272125 2023-01-24 03:17:35.742904: step: 430/459, loss: 0.07077723741531372 2023-01-24 03:17:36.383378: step: 432/459, loss: 0.0016945239622145891 2023-01-24 03:17:37.033632: step: 434/459, loss: 0.04233042150735855 2023-01-24 03:17:37.672529: step: 436/459, loss: 0.029216568917036057 2023-01-24 03:17:38.258311: step: 438/459, loss: 0.04337090998888016 2023-01-24 03:17:38.845757: step: 440/459, loss: 0.024589864537119865 2023-01-24 03:17:39.490764: step: 442/459, loss: 0.012990937568247318 2023-01-24 03:17:40.135312: step: 444/459, loss: 0.08345172554254532 2023-01-24 03:17:40.753727: step: 446/459, loss: 0.26067182421684265 2023-01-24 03:17:41.470760: step: 448/459, loss: 0.37078386545181274 2023-01-24 03:17:42.107762: step: 450/459, loss: 0.03855875879526138 2023-01-24 03:17:42.817595: step: 452/459, loss: 0.047859691083431244 2023-01-24 03:17:43.415820: step: 454/459, loss: 0.0036029445473104715 2023-01-24 03:17:43.994762: step: 456/459, loss: 0.01033573318272829 2023-01-24 03:17:44.558404: step: 458/459, loss: 0.044054921716451645 2023-01-24 03:17:45.204275: step: 460/459, loss: 0.014244865626096725 2023-01-24 03:17:45.824259: step: 462/459, loss: 0.05154280737042427 2023-01-24 03:17:46.511018: step: 464/459, loss: 0.03666602447628975 2023-01-24 03:17:47.228452: step: 466/459, loss: 0.01965126022696495 2023-01-24 03:17:47.838180: step: 468/459, loss: 0.008560697548091412 2023-01-24 03:17:48.492867: step: 470/459, loss: 0.00547117181122303 2023-01-24 03:17:49.175015: step: 472/459, loss: 0.008837482891976833 2023-01-24 03:17:49.804475: step: 474/459, loss: 0.0018529795343056321 2023-01-24 03:17:50.362007: step: 476/459, loss: 0.3015406131744385 2023-01-24 03:17:50.963241: step: 478/459, loss: 0.12570133805274963 2023-01-24 03:17:51.538086: step: 480/459, loss: 0.006662286352366209 2023-01-24 03:17:52.074023: step: 482/459, loss: 0.015952544286847115 2023-01-24 03:17:52.620063: step: 484/459, loss: 0.16391733288764954 2023-01-24 03:17:53.228700: step: 486/459, loss: 0.027039768174290657 2023-01-24 03:17:53.857702: step: 488/459, loss: 0.05570018291473389 2023-01-24 03:17:54.526468: step: 490/459, loss: 0.021682580932974815 2023-01-24 03:17:55.133117: step: 492/459, loss: 0.03809971362352371 2023-01-24 03:17:55.739131: step: 494/459, loss: 0.01737891510128975 2023-01-24 03:17:56.401235: step: 496/459, loss: 0.024412743747234344 2023-01-24 03:17:57.038047: step: 498/459, loss: 0.011799872852861881 2023-01-24 03:17:57.705014: step: 500/459, loss: 0.021143963560461998 2023-01-24 03:17:58.318439: step: 502/459, loss: 0.35466089844703674 2023-01-24 03:17:58.985045: step: 504/459, loss: 0.03215372934937477 2023-01-24 03:17:59.591703: step: 506/459, loss: 0.0002017118822550401 2023-01-24 03:18:00.222762: step: 508/459, loss: 0.01500652078539133 2023-01-24 03:18:00.831208: step: 510/459, loss: 0.08345106244087219 2023-01-24 03:18:01.504219: step: 512/459, loss: 0.002492598257958889 2023-01-24 03:18:02.100653: step: 514/459, loss: 0.00691247871145606 2023-01-24 03:18:02.685814: step: 516/459, loss: 0.07811039686203003 2023-01-24 03:18:03.318703: step: 518/459, loss: 0.05724811926484108 2023-01-24 03:18:03.840653: step: 520/459, loss: 0.03875594586133957 2023-01-24 03:18:04.437744: step: 522/459, loss: 0.03087247535586357 2023-01-24 03:18:05.069798: step: 524/459, loss: 0.06406255066394806 2023-01-24 03:18:05.679215: step: 526/459, loss: 0.013061746954917908 2023-01-24 03:18:06.332257: step: 528/459, loss: 0.025081757456064224 2023-01-24 03:18:06.925585: step: 530/459, loss: 0.006193141452968121 2023-01-24 03:18:07.558047: step: 532/459, loss: 0.0024200279731303453 2023-01-24 03:18:08.237488: step: 534/459, loss: 0.010916423052549362 2023-01-24 03:18:08.830054: step: 536/459, loss: 0.04576919972896576 2023-01-24 03:18:09.412665: step: 538/459, loss: 0.02773895487189293 2023-01-24 03:18:10.084512: step: 540/459, loss: 0.0031784744933247566 2023-01-24 03:18:10.759261: step: 542/459, loss: 0.026238536462187767 2023-01-24 03:18:11.424958: step: 544/459, loss: 0.011173618026077747 2023-01-24 03:18:12.039517: step: 546/459, loss: 0.020476268604397774 2023-01-24 03:18:12.606663: step: 548/459, loss: 0.001581089454703033 2023-01-24 03:18:13.251794: step: 550/459, loss: 0.2403620034456253 2023-01-24 03:18:13.917831: step: 552/459, loss: 0.05974980443716049 2023-01-24 03:18:14.510145: step: 554/459, loss: 0.002886730246245861 2023-01-24 03:18:15.158499: step: 556/459, loss: 0.02695079892873764 2023-01-24 03:18:15.731436: step: 558/459, loss: 0.02017362415790558 2023-01-24 03:18:16.391929: step: 560/459, loss: 0.012841681018471718 2023-01-24 03:18:16.987626: step: 562/459, loss: 0.00889801699668169 2023-01-24 03:18:17.557877: step: 564/459, loss: 0.001824182108975947 2023-01-24 03:18:18.183499: step: 566/459, loss: 0.006008341442793608 2023-01-24 03:18:18.805149: step: 568/459, loss: 0.0023651213850826025 2023-01-24 03:18:19.445874: step: 570/459, loss: 0.027546072378754616 2023-01-24 03:18:20.048975: step: 572/459, loss: 0.009472012519836426 2023-01-24 03:18:20.695116: step: 574/459, loss: 0.014947645366191864 2023-01-24 03:18:21.304046: step: 576/459, loss: 0.0032761923503130674 2023-01-24 03:18:21.948185: step: 578/459, loss: 0.020961303263902664 2023-01-24 03:18:22.626858: step: 580/459, loss: 0.04900258034467697 2023-01-24 03:18:23.192559: step: 582/459, loss: 0.029408790171146393 2023-01-24 03:18:23.820192: step: 584/459, loss: 0.03259893134236336 2023-01-24 03:18:24.401528: step: 586/459, loss: 0.020085223019123077 2023-01-24 03:18:25.016289: step: 588/459, loss: 0.08493650704622269 2023-01-24 03:18:25.625802: step: 590/459, loss: 0.025041548535227776 2023-01-24 03:18:26.281787: step: 592/459, loss: 0.01976020634174347 2023-01-24 03:18:26.871320: step: 594/459, loss: 0.15207886695861816 2023-01-24 03:18:27.515515: step: 596/459, loss: 0.006509702652692795 2023-01-24 03:18:28.123983: step: 598/459, loss: 0.0895443931221962 2023-01-24 03:18:28.762818: step: 600/459, loss: 0.021035421639680862 2023-01-24 03:18:29.365051: step: 602/459, loss: 0.021496495231986046 2023-01-24 03:18:29.933957: step: 604/459, loss: 0.11808641999959946 2023-01-24 03:18:30.560225: step: 606/459, loss: 0.043623752892017365 2023-01-24 03:18:31.161583: step: 608/459, loss: 0.016255121678113937 2023-01-24 03:18:31.790501: step: 610/459, loss: 0.00236461847089231 2023-01-24 03:18:32.423378: step: 612/459, loss: 0.0018112221732735634 2023-01-24 03:18:33.112924: step: 614/459, loss: 0.08818112313747406 2023-01-24 03:18:33.737700: step: 616/459, loss: 0.009530387818813324 2023-01-24 03:18:34.358496: step: 618/459, loss: 0.046991363167762756 2023-01-24 03:18:34.967182: step: 620/459, loss: 0.009326715022325516 2023-01-24 03:18:35.628532: step: 622/459, loss: 0.8065321445465088 2023-01-24 03:18:36.265934: step: 624/459, loss: 0.061471182852983475 2023-01-24 03:18:36.889512: step: 626/459, loss: 0.06320389360189438 2023-01-24 03:18:37.549692: step: 628/459, loss: 0.05060485377907753 2023-01-24 03:18:38.208471: step: 630/459, loss: 0.30590471625328064 2023-01-24 03:18:38.853281: step: 632/459, loss: 0.012969143688678741 2023-01-24 03:18:39.503371: step: 634/459, loss: 0.008858024142682552 2023-01-24 03:18:40.141601: step: 636/459, loss: 0.0010818600421771407 2023-01-24 03:18:40.751286: step: 638/459, loss: 0.02853301353752613 2023-01-24 03:18:41.463073: step: 640/459, loss: 0.009980290196835995 2023-01-24 03:18:42.133744: step: 642/459, loss: 0.03096238523721695 2023-01-24 03:18:42.683975: step: 644/459, loss: 0.0077730584889650345 2023-01-24 03:18:43.266639: step: 646/459, loss: 0.013292348012328148 2023-01-24 03:18:43.923414: step: 648/459, loss: 0.020556502044200897 2023-01-24 03:18:44.521813: step: 650/459, loss: 0.01079106330871582 2023-01-24 03:18:45.148832: step: 652/459, loss: 0.234073668718338 2023-01-24 03:18:45.768313: step: 654/459, loss: 0.0037274209316819906 2023-01-24 03:18:46.408455: step: 656/459, loss: 0.06752616167068481 2023-01-24 03:18:47.019410: step: 658/459, loss: 0.015791961923241615 2023-01-24 03:18:47.630170: step: 660/459, loss: 0.03222440183162689 2023-01-24 03:18:48.418111: step: 662/459, loss: 0.035509780049324036 2023-01-24 03:18:49.056133: step: 664/459, loss: 0.69289630651474 2023-01-24 03:18:49.680518: step: 666/459, loss: 0.025446759536862373 2023-01-24 03:18:50.280833: step: 668/459, loss: 0.27103012800216675 2023-01-24 03:18:50.961401: step: 670/459, loss: 0.0007410235120914876 2023-01-24 03:18:51.619215: step: 672/459, loss: 0.012187549844384193 2023-01-24 03:18:52.256628: step: 674/459, loss: 0.0038368823006749153 2023-01-24 03:18:52.952333: step: 676/459, loss: 0.19927336275577545 2023-01-24 03:18:53.597943: step: 678/459, loss: 0.0540560781955719 2023-01-24 03:18:54.246653: step: 680/459, loss: 0.04418802261352539 2023-01-24 03:18:54.830869: step: 682/459, loss: 0.0008310024277307093 2023-01-24 03:18:55.508909: step: 684/459, loss: 0.01217638049274683 2023-01-24 03:18:56.138852: step: 686/459, loss: 0.011312790215015411 2023-01-24 03:18:56.784581: step: 688/459, loss: 0.013914374634623528 2023-01-24 03:18:57.420057: step: 690/459, loss: 0.060727279633283615 2023-01-24 03:18:58.009584: step: 692/459, loss: 0.13170039653778076 2023-01-24 03:18:58.617163: step: 694/459, loss: 0.4197644889354706 2023-01-24 03:18:59.216218: step: 696/459, loss: 0.05662122741341591 2023-01-24 03:18:59.859272: step: 698/459, loss: 0.048432134091854095 2023-01-24 03:19:00.573646: step: 700/459, loss: 0.04095781221985817 2023-01-24 03:19:01.156823: step: 702/459, loss: 0.026206254959106445 2023-01-24 03:19:01.783825: step: 704/459, loss: 0.3090032935142517 2023-01-24 03:19:02.351890: step: 706/459, loss: 0.009102726355195045 2023-01-24 03:19:02.887487: step: 708/459, loss: 0.03537200018763542 2023-01-24 03:19:03.533113: step: 710/459, loss: 0.011652490124106407 2023-01-24 03:19:04.089664: step: 712/459, loss: 0.005471132695674896 2023-01-24 03:19:04.710370: step: 714/459, loss: 0.033431362360715866 2023-01-24 03:19:05.278408: step: 716/459, loss: 0.00351580698043108 2023-01-24 03:19:05.938685: step: 718/459, loss: 0.00897445809096098 2023-01-24 03:19:06.590054: step: 720/459, loss: 0.003374111372977495 2023-01-24 03:19:07.206216: step: 722/459, loss: 0.02282974123954773 2023-01-24 03:19:07.820386: step: 724/459, loss: 0.05160049721598625 2023-01-24 03:19:08.506776: step: 726/459, loss: 0.013885034248232841 2023-01-24 03:19:09.069357: step: 728/459, loss: 0.2046564519405365 2023-01-24 03:19:09.649011: step: 730/459, loss: 0.00872513186186552 2023-01-24 03:19:10.247289: step: 732/459, loss: 0.023829741403460503 2023-01-24 03:19:10.843053: step: 734/459, loss: 0.007721261121332645 2023-01-24 03:19:11.487094: step: 736/459, loss: 0.06485385447740555 2023-01-24 03:19:12.122997: step: 738/459, loss: 0.06225353851914406 2023-01-24 03:19:12.723649: step: 740/459, loss: 0.014209297485649586 2023-01-24 03:19:13.327518: step: 742/459, loss: 0.15187481045722961 2023-01-24 03:19:13.943029: step: 744/459, loss: 0.06834312528371811 2023-01-24 03:19:14.562681: step: 746/459, loss: 0.05330060422420502 2023-01-24 03:19:15.149413: step: 748/459, loss: 0.0010281476425006986 2023-01-24 03:19:15.757015: step: 750/459, loss: 0.1026904508471489 2023-01-24 03:19:16.367697: step: 752/459, loss: 0.03292367607355118 2023-01-24 03:19:16.958377: step: 754/459, loss: 0.023005664348602295 2023-01-24 03:19:17.529745: step: 756/459, loss: 0.0025956458412110806 2023-01-24 03:19:18.192787: step: 758/459, loss: 0.012998386286199093 2023-01-24 03:19:18.847897: step: 760/459, loss: 0.036768317222595215 2023-01-24 03:19:19.448328: step: 762/459, loss: 0.0020191071089357138 2023-01-24 03:19:19.979666: step: 764/459, loss: 0.0016394008416682482 2023-01-24 03:19:20.591450: step: 766/459, loss: 0.008889766409993172 2023-01-24 03:19:21.206185: step: 768/459, loss: 0.030323445796966553 2023-01-24 03:19:21.877316: step: 770/459, loss: 0.19733083248138428 2023-01-24 03:19:22.511342: step: 772/459, loss: 0.0327150858938694 2023-01-24 03:19:23.136777: step: 774/459, loss: 0.051243025809526443 2023-01-24 03:19:23.746818: step: 776/459, loss: 0.001606788719072938 2023-01-24 03:19:24.360404: step: 778/459, loss: 0.010394621640443802 2023-01-24 03:19:24.875053: step: 780/459, loss: 0.017340241000056267 2023-01-24 03:19:25.451173: step: 782/459, loss: 0.03804895654320717 2023-01-24 03:19:26.052420: step: 784/459, loss: 0.013379684649407864 2023-01-24 03:19:26.644194: step: 786/459, loss: 0.049173519015312195 2023-01-24 03:19:27.242580: step: 788/459, loss: 0.024323714897036552 2023-01-24 03:19:27.916405: step: 790/459, loss: 0.062102310359478 2023-01-24 03:19:28.630381: step: 792/459, loss: 0.2448374629020691 2023-01-24 03:19:29.139833: step: 794/459, loss: 0.0013964232057332993 2023-01-24 03:19:29.739777: step: 796/459, loss: 0.07192345708608627 2023-01-24 03:19:30.330208: step: 798/459, loss: 0.029597083106637 2023-01-24 03:19:30.919561: step: 800/459, loss: 0.008828246034681797 2023-01-24 03:19:31.555895: step: 802/459, loss: 0.009986083023250103 2023-01-24 03:19:32.160949: step: 804/459, loss: 0.012385498732328415 2023-01-24 03:19:32.742014: step: 806/459, loss: 0.01235873531550169 2023-01-24 03:19:33.370255: step: 808/459, loss: 0.005423566326498985 2023-01-24 03:19:33.935784: step: 810/459, loss: 0.00038964382838457823 2023-01-24 03:19:34.493057: step: 812/459, loss: 0.04657259210944176 2023-01-24 03:19:35.188668: step: 814/459, loss: 0.05320894345641136 2023-01-24 03:19:35.789847: step: 816/459, loss: 0.04883027821779251 2023-01-24 03:19:36.440462: step: 818/459, loss: 0.014358575455844402 2023-01-24 03:19:37.079790: step: 820/459, loss: 0.03744944930076599 2023-01-24 03:19:37.723840: step: 822/459, loss: 0.0026741218753159046 2023-01-24 03:19:38.378338: step: 824/459, loss: 0.010906094685196877 2023-01-24 03:19:39.067906: step: 826/459, loss: 0.010074646212160587 2023-01-24 03:19:39.701280: step: 828/459, loss: 0.023058073595166206 2023-01-24 03:19:40.308024: step: 830/459, loss: 0.0016993640456348658 2023-01-24 03:19:40.907467: step: 832/459, loss: 0.0367964543402195 2023-01-24 03:19:41.502557: step: 834/459, loss: 0.37877166271209717 2023-01-24 03:19:42.163286: step: 836/459, loss: 0.17747898399829865 2023-01-24 03:19:42.770793: step: 838/459, loss: 0.03384405001997948 2023-01-24 03:19:43.351993: step: 840/459, loss: 0.039162784814834595 2023-01-24 03:19:43.888531: step: 842/459, loss: 0.03977297991514206 2023-01-24 03:19:44.488300: step: 844/459, loss: 0.15715451538562775 2023-01-24 03:19:44.982559: step: 846/459, loss: 0.00016557499475311488 2023-01-24 03:19:45.566441: step: 848/459, loss: 0.006763504818081856 2023-01-24 03:19:46.169660: step: 850/459, loss: 0.28877267241477966 2023-01-24 03:19:46.768804: step: 852/459, loss: 9.267414134228602e-05 2023-01-24 03:19:47.397611: step: 854/459, loss: 0.015483037568628788 2023-01-24 03:19:48.137180: step: 856/459, loss: 0.012112995609641075 2023-01-24 03:19:48.779624: step: 858/459, loss: 0.02304094098508358 2023-01-24 03:19:49.482683: step: 860/459, loss: 0.10270921140909195 2023-01-24 03:19:50.079131: step: 862/459, loss: 0.0012169665424153209 2023-01-24 03:19:50.703819: step: 864/459, loss: 0.00766132352873683 2023-01-24 03:19:51.357628: step: 866/459, loss: 0.8345584869384766 2023-01-24 03:19:51.993947: step: 868/459, loss: 0.039579786360263824 2023-01-24 03:19:52.576920: step: 870/459, loss: 0.009189371950924397 2023-01-24 03:19:53.186845: step: 872/459, loss: 0.04010661691427231 2023-01-24 03:19:53.798311: step: 874/459, loss: 0.013412056490778923 2023-01-24 03:19:54.478510: step: 876/459, loss: 0.023207589983940125 2023-01-24 03:19:55.061652: step: 878/459, loss: 0.586931586265564 2023-01-24 03:19:55.662722: step: 880/459, loss: 0.0024315821938216686 2023-01-24 03:19:56.291165: step: 882/459, loss: 0.0005316147580742836 2023-01-24 03:19:56.860837: step: 884/459, loss: 0.03447524830698967 2023-01-24 03:19:57.447856: step: 886/459, loss: 0.018753185868263245 2023-01-24 03:19:58.066642: step: 888/459, loss: 0.007025322876870632 2023-01-24 03:19:58.690712: step: 890/459, loss: 0.03832392022013664 2023-01-24 03:19:59.388221: step: 892/459, loss: 0.07476336508989334 2023-01-24 03:20:00.049730: step: 894/459, loss: 0.013739819638431072 2023-01-24 03:20:00.657979: step: 896/459, loss: 0.02326662465929985 2023-01-24 03:20:01.347863: step: 898/459, loss: 0.015853965654969215 2023-01-24 03:20:01.997500: step: 900/459, loss: 0.5251670479774475 2023-01-24 03:20:02.648065: step: 902/459, loss: 0.02051280252635479 2023-01-24 03:20:03.270568: step: 904/459, loss: 0.004029958974570036 2023-01-24 03:20:03.970956: step: 906/459, loss: 0.020913684740662575 2023-01-24 03:20:04.594802: step: 908/459, loss: 0.11396452784538269 2023-01-24 03:20:05.241161: step: 910/459, loss: 0.044392671436071396 2023-01-24 03:20:05.818126: step: 912/459, loss: 0.031625401228666306 2023-01-24 03:20:06.401959: step: 914/459, loss: 0.008833681233227253 2023-01-24 03:20:07.003250: step: 916/459, loss: 0.00802213791757822 2023-01-24 03:20:07.593871: step: 918/459, loss: 0.07472669333219528 2023-01-24 03:20:08.071501: step: 920/459, loss: 0.0013009198009967804 ================================================== Loss: 0.085 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3346354166666667, 'r': 0.3276506166982922, 'f1': 0.331106184084372}, 'combined': 0.24397297774637933, 'epoch': 26} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3542948902414787, 'r': 0.2999117508674461, 'f1': 0.3248429293704458}, 'combined': 0.20789947479708526, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3286596958174905, 'r': 0.3280360531309298, 'f1': 0.3283475783475784}, 'combined': 0.24194032088768933, 'epoch': 26} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3467327419649112, 'r': 0.28532231082671167, 'f1': 0.313044209487005}, 'combined': 0.20034829407168317, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35588579105293205, 'r': 0.33225011233025153, 'f1': 0.3436620396428706}, 'combined': 0.2532246607894836, 'epoch': 26} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.34861850314473586, 'r': 0.30158519551901036, 'f1': 0.3234007424089485}, 'combined': 0.23187223040641594, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3448275862068966, 'r': 0.2857142857142857, 'f1': 0.3125}, 'combined': 0.20833333333333331, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.26851851851851855, 'r': 0.31521739130434784, 'f1': 0.29000000000000004}, 'combined': 0.14500000000000002, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.10344827586206896, 'f1': 0.15384615384615385}, 'combined': 0.10256410256410256, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 27 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:22:44.908575: step: 2/459, loss: 0.004202120006084442 2023-01-24 03:22:45.533113: step: 4/459, loss: 0.013953818008303642 2023-01-24 03:22:46.173717: step: 6/459, loss: 0.017108408734202385 2023-01-24 03:22:46.786726: step: 8/459, loss: 0.052124958485364914 2023-01-24 03:22:47.381042: step: 10/459, loss: 0.07555320113897324 2023-01-24 03:22:47.947500: step: 12/459, loss: 0.02969912812113762 2023-01-24 03:22:48.563932: step: 14/459, loss: 0.012793668545782566 2023-01-24 03:22:49.259379: step: 16/459, loss: 0.03472544997930527 2023-01-24 03:22:49.984200: step: 18/459, loss: 0.08371132612228394 2023-01-24 03:22:50.569023: step: 20/459, loss: 0.019414372742176056 2023-01-24 03:22:51.164530: step: 22/459, loss: 0.04637368395924568 2023-01-24 03:22:51.747386: step: 24/459, loss: 0.006843834649771452 2023-01-24 03:22:52.376510: step: 26/459, loss: 0.05804578959941864 2023-01-24 03:22:53.010528: step: 28/459, loss: 0.050593987107276917 2023-01-24 03:22:53.582977: step: 30/459, loss: 0.0003390614874660969 2023-01-24 03:22:54.194778: step: 32/459, loss: 0.015862565487623215 2023-01-24 03:22:54.829444: step: 34/459, loss: 0.019842738285660744 2023-01-24 03:22:55.388986: step: 36/459, loss: 0.00705671263858676 2023-01-24 03:22:56.073501: step: 38/459, loss: 0.009734335355460644 2023-01-24 03:22:56.667021: step: 40/459, loss: 0.03124416433274746 2023-01-24 03:22:57.284508: step: 42/459, loss: 0.00390655267983675 2023-01-24 03:22:57.878683: step: 44/459, loss: 0.05231354013085365 2023-01-24 03:22:58.477997: step: 46/459, loss: 0.001872747903689742 2023-01-24 03:22:59.076453: step: 48/459, loss: 0.017025176435709 2023-01-24 03:22:59.655547: step: 50/459, loss: 0.0004598738451022655 2023-01-24 03:23:00.399399: step: 52/459, loss: 0.0026006808038800955 2023-01-24 03:23:00.993442: step: 54/459, loss: 0.012946669943630695 2023-01-24 03:23:01.650511: step: 56/459, loss: 0.06751652806997299 2023-01-24 03:23:02.241806: step: 58/459, loss: 0.0009208403062075377 2023-01-24 03:23:02.851306: step: 60/459, loss: 0.009220232255756855 2023-01-24 03:23:03.481663: step: 62/459, loss: 0.008091880939900875 2023-01-24 03:23:04.092065: step: 64/459, loss: 0.043458372354507446 2023-01-24 03:23:04.695053: step: 66/459, loss: 0.008090040646493435 2023-01-24 03:23:05.317777: step: 68/459, loss: 0.04048263654112816 2023-01-24 03:23:05.925760: step: 70/459, loss: 0.03743601590394974 2023-01-24 03:23:06.499356: step: 72/459, loss: 0.0020534894429147243 2023-01-24 03:23:07.055783: step: 74/459, loss: 0.02190653793513775 2023-01-24 03:23:07.676069: step: 76/459, loss: 0.009520625695586205 2023-01-24 03:23:08.324455: step: 78/459, loss: 0.024163691326975822 2023-01-24 03:23:08.984885: step: 80/459, loss: 0.02331032231450081 2023-01-24 03:23:09.572961: step: 82/459, loss: 0.1272977739572525 2023-01-24 03:23:10.190855: step: 84/459, loss: 0.0025932693388313055 2023-01-24 03:23:10.828944: step: 86/459, loss: 0.016567934304475784 2023-01-24 03:23:11.455042: step: 88/459, loss: 0.00838703103363514 2023-01-24 03:23:12.081098: step: 90/459, loss: 0.011634252034127712 2023-01-24 03:23:12.684898: step: 92/459, loss: 0.07306411862373352 2023-01-24 03:23:13.280621: step: 94/459, loss: 0.012634526006877422 2023-01-24 03:23:13.865686: step: 96/459, loss: 0.024718882516026497 2023-01-24 03:23:14.541562: step: 98/459, loss: 0.04284103214740753 2023-01-24 03:23:15.193910: step: 100/459, loss: 0.004441998898983002 2023-01-24 03:23:15.798634: step: 102/459, loss: 0.0004100112128071487 2023-01-24 03:23:16.536174: step: 104/459, loss: 0.033472154289484024 2023-01-24 03:23:17.114316: step: 106/459, loss: 0.0011533377692103386 2023-01-24 03:23:17.710282: step: 108/459, loss: 0.004918387159705162 2023-01-24 03:23:18.251153: step: 110/459, loss: 0.016279151663184166 2023-01-24 03:23:18.869660: step: 112/459, loss: 0.03135807439684868 2023-01-24 03:23:19.544031: step: 114/459, loss: 0.007693407591432333 2023-01-24 03:23:20.107558: step: 116/459, loss: 0.015939312055706978 2023-01-24 03:23:20.727784: step: 118/459, loss: 0.0016516447067260742 2023-01-24 03:23:21.373001: step: 120/459, loss: 0.03816329687833786 2023-01-24 03:23:22.109164: step: 122/459, loss: 0.01597030647099018 2023-01-24 03:23:22.766875: step: 124/459, loss: 0.03389328718185425 2023-01-24 03:23:23.464923: step: 126/459, loss: 0.03578217327594757 2023-01-24 03:23:24.101981: step: 128/459, loss: 0.026548665016889572 2023-01-24 03:23:24.694745: step: 130/459, loss: 0.032553598284721375 2023-01-24 03:23:25.327420: step: 132/459, loss: 0.02641318179666996 2023-01-24 03:23:25.974948: step: 134/459, loss: 0.006761388387531042 2023-01-24 03:23:26.625777: step: 136/459, loss: 0.047143612056970596 2023-01-24 03:23:27.201177: step: 138/459, loss: 0.0024117662105709314 2023-01-24 03:23:27.835794: step: 140/459, loss: 0.029734961688518524 2023-01-24 03:23:28.483706: step: 142/459, loss: 0.14274924993515015 2023-01-24 03:23:29.168632: step: 144/459, loss: 0.029095109552145004 2023-01-24 03:23:29.771988: step: 146/459, loss: 0.033855728805065155 2023-01-24 03:23:30.431705: step: 148/459, loss: 0.007158314809203148 2023-01-24 03:23:30.990242: step: 150/459, loss: 0.08417786657810211 2023-01-24 03:23:31.598424: step: 152/459, loss: 0.008541380055248737 2023-01-24 03:23:32.226968: step: 154/459, loss: 0.008697988465428352 2023-01-24 03:23:32.794593: step: 156/459, loss: 0.02018396556377411 2023-01-24 03:23:33.390482: step: 158/459, loss: 0.02201659232378006 2023-01-24 03:23:34.030481: step: 160/459, loss: 0.04557609558105469 2023-01-24 03:23:34.624242: step: 162/459, loss: 0.014410541392862797 2023-01-24 03:23:35.266493: step: 164/459, loss: 0.03543887659907341 2023-01-24 03:23:35.933355: step: 166/459, loss: 0.02088273875415325 2023-01-24 03:23:36.513862: step: 168/459, loss: 0.08213561028242111 2023-01-24 03:23:37.173347: step: 170/459, loss: 0.022314241155982018 2023-01-24 03:23:37.769818: step: 172/459, loss: 0.007733841892331839 2023-01-24 03:23:38.405930: step: 174/459, loss: 0.041240956634283066 2023-01-24 03:23:39.034201: step: 176/459, loss: 0.015449118800461292 2023-01-24 03:23:39.740512: step: 178/459, loss: 0.05609424039721489 2023-01-24 03:23:40.364169: step: 180/459, loss: 0.015148701146245003 2023-01-24 03:23:40.949587: step: 182/459, loss: 0.0035662136506289244 2023-01-24 03:23:41.546479: step: 184/459, loss: 0.010242193937301636 2023-01-24 03:23:42.177566: step: 186/459, loss: 0.004310001619160175 2023-01-24 03:23:42.826483: step: 188/459, loss: 0.039268188178539276 2023-01-24 03:23:43.398530: step: 190/459, loss: 0.0017062355764210224 2023-01-24 03:23:44.034954: step: 192/459, loss: 0.02891182340681553 2023-01-24 03:23:44.735124: step: 194/459, loss: 0.1202332153916359 2023-01-24 03:23:45.440822: step: 196/459, loss: 0.024306468665599823 2023-01-24 03:23:46.109476: step: 198/459, loss: 0.028412390500307083 2023-01-24 03:23:46.723324: step: 200/459, loss: 0.09979017078876495 2023-01-24 03:23:47.417426: step: 202/459, loss: 0.020692817866802216 2023-01-24 03:23:48.038248: step: 204/459, loss: 0.051916614174842834 2023-01-24 03:23:48.723750: step: 206/459, loss: 0.0013534713070839643 2023-01-24 03:23:49.381538: step: 208/459, loss: 0.002536867978051305 2023-01-24 03:23:50.049934: step: 210/459, loss: 0.06484932452440262 2023-01-24 03:23:50.674637: step: 212/459, loss: 0.022381193935871124 2023-01-24 03:23:51.313654: step: 214/459, loss: 0.008798963390290737 2023-01-24 03:23:51.955930: step: 216/459, loss: 0.0028857553843408823 2023-01-24 03:23:52.580001: step: 218/459, loss: 0.02838783524930477 2023-01-24 03:23:53.113534: step: 220/459, loss: 0.5670238733291626 2023-01-24 03:23:53.747723: step: 222/459, loss: 0.004312396049499512 2023-01-24 03:23:54.418701: step: 224/459, loss: 0.01628853753209114 2023-01-24 03:23:55.051692: step: 226/459, loss: 0.16300496459007263 2023-01-24 03:23:55.675884: step: 228/459, loss: 0.02163325436413288 2023-01-24 03:23:56.245666: step: 230/459, loss: 0.03224543109536171 2023-01-24 03:23:56.836009: step: 232/459, loss: 0.041255973279476166 2023-01-24 03:23:57.427000: step: 234/459, loss: 0.05596616119146347 2023-01-24 03:23:58.045678: step: 236/459, loss: 0.021093443036079407 2023-01-24 03:23:58.762779: step: 238/459, loss: 0.10413773357868195 2023-01-24 03:23:59.359890: step: 240/459, loss: 0.18830125033855438 2023-01-24 03:23:59.990112: step: 242/459, loss: 0.002592080971226096 2023-01-24 03:24:00.666042: step: 244/459, loss: 0.016978038474917412 2023-01-24 03:24:01.313693: step: 246/459, loss: 0.0469910129904747 2023-01-24 03:24:01.984526: step: 248/459, loss: 0.0024734954349696636 2023-01-24 03:24:02.632213: step: 250/459, loss: 0.0007990753510966897 2023-01-24 03:24:03.215759: step: 252/459, loss: 0.0404372476041317 2023-01-24 03:24:03.816607: step: 254/459, loss: 0.0073790778405964375 2023-01-24 03:24:04.433697: step: 256/459, loss: 0.025611378252506256 2023-01-24 03:24:05.008461: step: 258/459, loss: 0.009978552348911762 2023-01-24 03:24:05.626494: step: 260/459, loss: 0.03206094726920128 2023-01-24 03:24:06.232021: step: 262/459, loss: 2.5965717213694006e-05 2023-01-24 03:24:06.880676: step: 264/459, loss: 0.02346147783100605 2023-01-24 03:24:07.515421: step: 266/459, loss: 0.0008778288611210883 2023-01-24 03:24:08.171666: step: 268/459, loss: 0.02863984927535057 2023-01-24 03:24:08.796150: step: 270/459, loss: 0.09708905220031738 2023-01-24 03:24:09.349559: step: 272/459, loss: 0.04603668674826622 2023-01-24 03:24:09.985595: step: 274/459, loss: 0.018152769654989243 2023-01-24 03:24:10.566542: step: 276/459, loss: 0.007123873103410006 2023-01-24 03:24:11.186857: step: 278/459, loss: 0.008654000237584114 2023-01-24 03:24:11.784836: step: 280/459, loss: 0.015209028497338295 2023-01-24 03:24:12.361431: step: 282/459, loss: 0.09771133214235306 2023-01-24 03:24:12.955993: step: 284/459, loss: 0.034333258867263794 2023-01-24 03:24:13.588088: step: 286/459, loss: 0.15443985164165497 2023-01-24 03:24:14.180236: step: 288/459, loss: 0.030278638005256653 2023-01-24 03:24:14.765557: step: 290/459, loss: 0.0009993722196668386 2023-01-24 03:24:15.384006: step: 292/459, loss: 0.002920224331319332 2023-01-24 03:24:15.983439: step: 294/459, loss: 0.16390955448150635 2023-01-24 03:24:16.529886: step: 296/459, loss: 0.014938031323254108 2023-01-24 03:24:17.194019: step: 298/459, loss: 0.02167469821870327 2023-01-24 03:24:17.834062: step: 300/459, loss: 0.07125762104988098 2023-01-24 03:24:18.430952: step: 302/459, loss: 0.0022254136856645346 2023-01-24 03:24:19.092128: step: 304/459, loss: 0.02970905415713787 2023-01-24 03:24:19.734638: step: 306/459, loss: 0.032048869878053665 2023-01-24 03:24:20.302849: step: 308/459, loss: 0.07179106771945953 2023-01-24 03:24:20.896782: step: 310/459, loss: 0.0011782910441979766 2023-01-24 03:24:21.486296: step: 312/459, loss: 0.021335698664188385 2023-01-24 03:24:22.110188: step: 314/459, loss: 0.06430737674236298 2023-01-24 03:24:22.697250: step: 316/459, loss: 0.025664731860160828 2023-01-24 03:24:23.332766: step: 318/459, loss: 0.01139253843575716 2023-01-24 03:24:23.902376: step: 320/459, loss: 0.013826053589582443 2023-01-24 03:24:24.554777: step: 322/459, loss: 0.5271469950675964 2023-01-24 03:24:25.170108: step: 324/459, loss: 0.0024284215178340673 2023-01-24 03:24:25.771822: step: 326/459, loss: 0.0023242852184921503 2023-01-24 03:24:26.352825: step: 328/459, loss: 0.001356639200821519 2023-01-24 03:24:26.976059: step: 330/459, loss: 0.2659379541873932 2023-01-24 03:24:27.572252: step: 332/459, loss: 0.007633828092366457 2023-01-24 03:24:28.222274: step: 334/459, loss: 0.05213717371225357 2023-01-24 03:24:28.881823: step: 336/459, loss: 0.10720080882310867 2023-01-24 03:24:29.452297: step: 338/459, loss: 0.008582009002566338 2023-01-24 03:24:30.027924: step: 340/459, loss: 0.0006971199763938785 2023-01-24 03:24:30.638395: step: 342/459, loss: 0.02232278324663639 2023-01-24 03:24:31.246995: step: 344/459, loss: 0.06801412999629974 2023-01-24 03:24:31.839393: step: 346/459, loss: 0.011386682279407978 2023-01-24 03:24:32.526071: step: 348/459, loss: 0.0011353518348187208 2023-01-24 03:24:33.165929: step: 350/459, loss: 0.8190568685531616 2023-01-24 03:24:33.816283: step: 352/459, loss: 0.025268370285630226 2023-01-24 03:24:34.382966: step: 354/459, loss: 0.08104714006185532 2023-01-24 03:24:35.035165: step: 356/459, loss: 0.0019633371848613024 2023-01-24 03:24:35.616670: step: 358/459, loss: 0.02173500694334507 2023-01-24 03:24:36.206513: step: 360/459, loss: 0.0585639551281929 2023-01-24 03:24:36.913617: step: 362/459, loss: 0.10939143598079681 2023-01-24 03:24:37.486614: step: 364/459, loss: 0.019306961447000504 2023-01-24 03:24:38.064876: step: 366/459, loss: 3.184741497039795 2023-01-24 03:24:38.672946: step: 368/459, loss: 0.05455414205789566 2023-01-24 03:24:39.258972: step: 370/459, loss: 0.003184177912771702 2023-01-24 03:24:39.916958: step: 372/459, loss: 0.018623916432261467 2023-01-24 03:24:40.579536: step: 374/459, loss: 0.032259028404951096 2023-01-24 03:24:41.176240: step: 376/459, loss: 0.19169840216636658 2023-01-24 03:24:41.770965: step: 378/459, loss: 0.31360873579978943 2023-01-24 03:24:42.381994: step: 380/459, loss: 0.08286474645137787 2023-01-24 03:24:43.009655: step: 382/459, loss: 0.0066861254163086414 2023-01-24 03:24:43.611618: step: 384/459, loss: 0.38822871446609497 2023-01-24 03:24:44.230808: step: 386/459, loss: 0.002775592030957341 2023-01-24 03:24:44.800068: step: 388/459, loss: 0.24245166778564453 2023-01-24 03:24:45.373160: step: 390/459, loss: 0.029332974925637245 2023-01-24 03:24:46.047549: step: 392/459, loss: 0.019172783941030502 2023-01-24 03:24:46.658952: step: 394/459, loss: 0.14733070135116577 2023-01-24 03:24:47.251243: step: 396/459, loss: 0.008270304650068283 2023-01-24 03:24:47.870643: step: 398/459, loss: 0.01821637898683548 2023-01-24 03:24:48.522483: step: 400/459, loss: 0.02919432520866394 2023-01-24 03:24:49.132458: step: 402/459, loss: 0.014429960399866104 2023-01-24 03:24:49.891955: step: 404/459, loss: 0.002189863473176956 2023-01-24 03:24:50.502720: step: 406/459, loss: 0.31413885951042175 2023-01-24 03:24:51.149344: step: 408/459, loss: 0.002328254748135805 2023-01-24 03:24:51.754978: step: 410/459, loss: 0.4370608329772949 2023-01-24 03:24:52.440218: step: 412/459, loss: 0.017057329416275024 2023-01-24 03:24:52.992481: step: 414/459, loss: 0.19470809400081635 2023-01-24 03:24:53.575877: step: 416/459, loss: 0.06846614181995392 2023-01-24 03:24:54.227847: step: 418/459, loss: 0.013595658354461193 2023-01-24 03:24:54.896045: step: 420/459, loss: 0.0060273101553320885 2023-01-24 03:24:55.524910: step: 422/459, loss: 0.014700259082019329 2023-01-24 03:24:56.084121: step: 424/459, loss: 0.017015744000673294 2023-01-24 03:24:56.643888: step: 426/459, loss: 0.02646017074584961 2023-01-24 03:24:57.350985: step: 428/459, loss: 0.08157929033041 2023-01-24 03:24:57.993347: step: 430/459, loss: 0.007786441128700972 2023-01-24 03:24:58.637118: step: 432/459, loss: 0.010602803900837898 2023-01-24 03:24:59.267636: step: 434/459, loss: 0.03822775557637215 2023-01-24 03:25:00.072837: step: 436/459, loss: 0.003552958369255066 2023-01-24 03:25:00.742031: step: 438/459, loss: 0.009513089433312416 2023-01-24 03:25:01.345894: step: 440/459, loss: 0.0220497976988554 2023-01-24 03:25:02.040174: step: 442/459, loss: 0.0073790717869997025 2023-01-24 03:25:02.674448: step: 444/459, loss: 1.0062005519866943 2023-01-24 03:25:03.232020: step: 446/459, loss: 0.041734036058187485 2023-01-24 03:25:03.850815: step: 448/459, loss: 0.007939551956951618 2023-01-24 03:25:04.516071: step: 450/459, loss: 0.011563876643776894 2023-01-24 03:25:05.144377: step: 452/459, loss: 0.12328796088695526 2023-01-24 03:25:05.839697: step: 454/459, loss: 0.11793786287307739 2023-01-24 03:25:06.510551: step: 456/459, loss: 0.11600121110677719 2023-01-24 03:25:07.181386: step: 458/459, loss: 0.07319942116737366 2023-01-24 03:25:07.832569: step: 460/459, loss: 0.07480118423700333 2023-01-24 03:25:08.482871: step: 462/459, loss: 0.07744129747152328 2023-01-24 03:25:09.155902: step: 464/459, loss: 0.011852304451167583 2023-01-24 03:25:09.773177: step: 466/459, loss: 0.07517170906066895 2023-01-24 03:25:10.435634: step: 468/459, loss: 0.012420672923326492 2023-01-24 03:25:11.004036: step: 470/459, loss: 0.00028413214022293687 2023-01-24 03:25:11.611102: step: 472/459, loss: 0.08896394819021225 2023-01-24 03:25:12.306784: step: 474/459, loss: 0.08468750864267349 2023-01-24 03:25:12.948328: step: 476/459, loss: 0.019553478807210922 2023-01-24 03:25:13.561145: step: 478/459, loss: 0.22108283638954163 2023-01-24 03:25:14.159737: step: 480/459, loss: 0.010751872323453426 2023-01-24 03:25:14.833527: step: 482/459, loss: 0.037065520882606506 2023-01-24 03:25:15.434533: step: 484/459, loss: 0.011227667331695557 2023-01-24 03:25:16.035660: step: 486/459, loss: 0.049202047288417816 2023-01-24 03:25:16.643636: step: 488/459, loss: 0.026146288961172104 2023-01-24 03:25:17.248881: step: 490/459, loss: 0.011686904355883598 2023-01-24 03:25:17.916434: step: 492/459, loss: 0.5758643746376038 2023-01-24 03:25:18.535448: step: 494/459, loss: 0.00047999550588428974 2023-01-24 03:25:19.187384: step: 496/459, loss: 0.014195935800671577 2023-01-24 03:25:19.870136: step: 498/459, loss: 0.003301949705928564 2023-01-24 03:25:20.432811: step: 500/459, loss: 0.03805520012974739 2023-01-24 03:25:21.011049: step: 502/459, loss: 0.003116399049758911 2023-01-24 03:25:21.644516: step: 504/459, loss: 0.014393230900168419 2023-01-24 03:25:22.280935: step: 506/459, loss: 0.07735404372215271 2023-01-24 03:25:22.937053: step: 508/459, loss: 0.0034669043961912394 2023-01-24 03:25:23.526600: step: 510/459, loss: 0.021797792986035347 2023-01-24 03:25:24.177047: step: 512/459, loss: 0.055632393807172775 2023-01-24 03:25:24.884067: step: 514/459, loss: 0.01832662709057331 2023-01-24 03:25:25.517121: step: 516/459, loss: 0.03990563750267029 2023-01-24 03:25:26.124962: step: 518/459, loss: 0.02707715332508087 2023-01-24 03:25:26.706062: step: 520/459, loss: 0.05781177803874016 2023-01-24 03:25:27.348287: step: 522/459, loss: 0.02631762996315956 2023-01-24 03:25:28.060349: step: 524/459, loss: 0.0316351018846035 2023-01-24 03:25:28.680250: step: 526/459, loss: 0.009158674627542496 2023-01-24 03:25:29.207188: step: 528/459, loss: 0.007889019325375557 2023-01-24 03:25:29.806350: step: 530/459, loss: 0.022394366562366486 2023-01-24 03:25:30.400899: step: 532/459, loss: 0.12116407603025436 2023-01-24 03:25:31.049327: step: 534/459, loss: 0.07621066272258759 2023-01-24 03:25:31.663580: step: 536/459, loss: 0.04438609629869461 2023-01-24 03:25:32.275642: step: 538/459, loss: 0.02247737906873226 2023-01-24 03:25:32.890985: step: 540/459, loss: 0.005501453764736652 2023-01-24 03:25:33.469731: step: 542/459, loss: 0.04174528643488884 2023-01-24 03:25:34.032006: step: 544/459, loss: 0.02516363374888897 2023-01-24 03:25:34.653417: step: 546/459, loss: 0.1694222390651703 2023-01-24 03:25:35.275787: step: 548/459, loss: 0.008221572265028954 2023-01-24 03:25:35.893254: step: 550/459, loss: 0.004177963826805353 2023-01-24 03:25:36.499716: step: 552/459, loss: 0.05191834270954132 2023-01-24 03:25:37.202603: step: 554/459, loss: 0.010965453460812569 2023-01-24 03:25:37.777256: step: 556/459, loss: 0.001537976204417646 2023-01-24 03:25:38.408933: step: 558/459, loss: 1.3762749433517456 2023-01-24 03:25:39.072386: step: 560/459, loss: 0.001909715821966529 2023-01-24 03:25:39.708254: step: 562/459, loss: 0.030810421332716942 2023-01-24 03:25:40.385615: step: 564/459, loss: 0.017288867384195328 2023-01-24 03:25:41.048896: step: 566/459, loss: 0.023252882063388824 2023-01-24 03:25:41.718549: step: 568/459, loss: 0.47886359691619873 2023-01-24 03:25:42.357300: step: 570/459, loss: 0.04433386027812958 2023-01-24 03:25:43.043602: step: 572/459, loss: 0.08637429028749466 2023-01-24 03:25:43.683032: step: 574/459, loss: 0.004141669720411301 2023-01-24 03:25:44.288179: step: 576/459, loss: 0.0010355147533118725 2023-01-24 03:25:44.914775: step: 578/459, loss: 0.004118455573916435 2023-01-24 03:25:45.561906: step: 580/459, loss: 0.004025398753583431 2023-01-24 03:25:46.253573: step: 582/459, loss: 0.032090701162815094 2023-01-24 03:25:46.912371: step: 584/459, loss: 0.03147312253713608 2023-01-24 03:25:47.653427: step: 586/459, loss: 0.01315198466181755 2023-01-24 03:25:48.243154: step: 588/459, loss: 0.009437191300094128 2023-01-24 03:25:48.881949: step: 590/459, loss: 0.03648608922958374 2023-01-24 03:25:49.536372: step: 592/459, loss: 0.005338140297681093 2023-01-24 03:25:50.127422: step: 594/459, loss: 0.008957989513874054 2023-01-24 03:25:50.712955: step: 596/459, loss: 0.11690621078014374 2023-01-24 03:25:51.290815: step: 598/459, loss: 0.02964400127530098 2023-01-24 03:25:51.877953: step: 600/459, loss: 0.036674484610557556 2023-01-24 03:25:52.515092: step: 602/459, loss: 0.010700996965169907 2023-01-24 03:25:53.171971: step: 604/459, loss: 0.02523631602525711 2023-01-24 03:25:53.793679: step: 606/459, loss: 0.5414230227470398 2023-01-24 03:25:54.355710: step: 608/459, loss: 1.8967622518539429 2023-01-24 03:25:55.009385: step: 610/459, loss: 0.007443842012435198 2023-01-24 03:25:55.594020: step: 612/459, loss: 0.03965386003255844 2023-01-24 03:25:56.181072: step: 614/459, loss: 0.018746720626950264 2023-01-24 03:25:56.803747: step: 616/459, loss: 0.0013455887092277408 2023-01-24 03:25:57.433690: step: 618/459, loss: 0.03216749057173729 2023-01-24 03:25:58.088643: step: 620/459, loss: 0.06213191896677017 2023-01-24 03:25:58.702175: step: 622/459, loss: 0.040764983743429184 2023-01-24 03:25:59.303832: step: 624/459, loss: 0.007617958355695009 2023-01-24 03:25:59.877953: step: 626/459, loss: 0.030792662873864174 2023-01-24 03:26:00.513449: step: 628/459, loss: 0.0034423593897372484 2023-01-24 03:26:01.113095: step: 630/459, loss: 0.007257572375237942 2023-01-24 03:26:01.715885: step: 632/459, loss: 0.0010946517577394843 2023-01-24 03:26:02.307220: step: 634/459, loss: 0.02697436697781086 2023-01-24 03:26:02.894105: step: 636/459, loss: 0.012313762679696083 2023-01-24 03:26:03.562723: step: 638/459, loss: 0.03128495439887047 2023-01-24 03:26:04.208709: step: 640/459, loss: 0.07108284533023834 2023-01-24 03:26:04.866502: step: 642/459, loss: 0.047982264310121536 2023-01-24 03:26:05.478505: step: 644/459, loss: 0.0015950346132740378 2023-01-24 03:26:06.098815: step: 646/459, loss: 0.06707985699176788 2023-01-24 03:26:06.740375: step: 648/459, loss: 0.12098025530576706 2023-01-24 03:26:07.384434: step: 650/459, loss: 0.003359179012477398 2023-01-24 03:26:08.032936: step: 652/459, loss: 0.014295756816864014 2023-01-24 03:26:08.661043: step: 654/459, loss: 0.0005717905587516725 2023-01-24 03:26:09.288311: step: 656/459, loss: 0.04847465083003044 2023-01-24 03:26:09.983194: step: 658/459, loss: 0.001303111668676138 2023-01-24 03:26:10.578696: step: 660/459, loss: 0.005574177950620651 2023-01-24 03:26:11.217861: step: 662/459, loss: 0.1283283680677414 2023-01-24 03:26:11.859418: step: 664/459, loss: 0.0034984631929546595 2023-01-24 03:26:12.504491: step: 666/459, loss: 0.09222850203514099 2023-01-24 03:26:13.099582: step: 668/459, loss: 0.13429482281208038 2023-01-24 03:26:13.774129: step: 670/459, loss: 0.03484727442264557 2023-01-24 03:26:14.376822: step: 672/459, loss: 0.05882148817181587 2023-01-24 03:26:14.996020: step: 674/459, loss: 0.04320476949214935 2023-01-24 03:26:15.657281: step: 676/459, loss: 0.013415975496172905 2023-01-24 03:26:16.319704: step: 678/459, loss: 0.004606825299561024 2023-01-24 03:26:16.980073: step: 680/459, loss: 0.051975663751363754 2023-01-24 03:26:17.584699: step: 682/459, loss: 0.006661158986389637 2023-01-24 03:26:18.189229: step: 684/459, loss: 0.00407778425142169 2023-01-24 03:26:18.751296: step: 686/459, loss: 0.024096684530377388 2023-01-24 03:26:19.522745: step: 688/459, loss: 0.031057298183441162 2023-01-24 03:26:20.183599: step: 690/459, loss: 0.008979558944702148 2023-01-24 03:26:20.832015: step: 692/459, loss: 0.06488282233476639 2023-01-24 03:26:21.438958: step: 694/459, loss: 0.0034887799993157387 2023-01-24 03:26:22.031364: step: 696/459, loss: 0.038945261389017105 2023-01-24 03:26:22.621684: step: 698/459, loss: 0.07731565833091736 2023-01-24 03:26:23.201495: step: 700/459, loss: 0.0028431741520762444 2023-01-24 03:26:23.833759: step: 702/459, loss: 0.07451039552688599 2023-01-24 03:26:24.462909: step: 704/459, loss: 0.005599058698862791 2023-01-24 03:26:25.105600: step: 706/459, loss: 0.07075434923171997 2023-01-24 03:26:25.681053: step: 708/459, loss: 0.025664962828159332 2023-01-24 03:26:26.279788: step: 710/459, loss: 0.000476284883916378 2023-01-24 03:26:26.883759: step: 712/459, loss: 0.006464335136115551 2023-01-24 03:26:27.487679: step: 714/459, loss: 0.024034973233938217 2023-01-24 03:26:28.095727: step: 716/459, loss: 0.03924018517136574 2023-01-24 03:26:28.751501: step: 718/459, loss: 0.06356378644704819 2023-01-24 03:26:29.349214: step: 720/459, loss: 0.01487315446138382 2023-01-24 03:26:29.991429: step: 722/459, loss: 0.024351483210921288 2023-01-24 03:26:30.631311: step: 724/459, loss: 0.08263873308897018 2023-01-24 03:26:31.250651: step: 726/459, loss: 0.011564361862838268 2023-01-24 03:26:31.884523: step: 728/459, loss: 0.002982259262353182 2023-01-24 03:26:32.480163: step: 730/459, loss: 0.006460248026996851 2023-01-24 03:26:33.081958: step: 732/459, loss: 0.014797425828874111 2023-01-24 03:26:33.716829: step: 734/459, loss: 0.6391137838363647 2023-01-24 03:26:34.362279: step: 736/459, loss: 0.0011082625715062022 2023-01-24 03:26:34.938450: step: 738/459, loss: 0.019903235137462616 2023-01-24 03:26:35.497906: step: 740/459, loss: 0.0007599085802212358 2023-01-24 03:26:36.104111: step: 742/459, loss: 0.03674037382006645 2023-01-24 03:26:36.693414: step: 744/459, loss: 0.017397599294781685 2023-01-24 03:26:37.320950: step: 746/459, loss: 0.04274018853902817 2023-01-24 03:26:37.928718: step: 748/459, loss: 0.022125139832496643 2023-01-24 03:26:38.566205: step: 750/459, loss: 0.04305551201105118 2023-01-24 03:26:39.156442: step: 752/459, loss: 0.003827761858701706 2023-01-24 03:26:39.693961: step: 754/459, loss: 0.02154339849948883 2023-01-24 03:26:40.294477: step: 756/459, loss: 0.09075787663459778 2023-01-24 03:26:40.915254: step: 758/459, loss: 0.008006027899682522 2023-01-24 03:26:41.467681: step: 760/459, loss: 0.004737389739602804 2023-01-24 03:26:42.107783: step: 762/459, loss: 0.2618311941623688 2023-01-24 03:26:42.693526: step: 764/459, loss: 0.02457270398736 2023-01-24 03:26:43.384597: step: 766/459, loss: 0.041063252836465836 2023-01-24 03:26:44.037774: step: 768/459, loss: 0.019426684826612473 2023-01-24 03:26:44.582992: step: 770/459, loss: 0.017260318621993065 2023-01-24 03:26:45.129501: step: 772/459, loss: 0.005386047530919313 2023-01-24 03:26:45.683005: step: 774/459, loss: 0.06959803402423859 2023-01-24 03:26:46.377069: step: 776/459, loss: 0.013491586782038212 2023-01-24 03:26:47.006234: step: 778/459, loss: 0.004668775945901871 2023-01-24 03:26:47.718488: step: 780/459, loss: 0.11513475328683853 2023-01-24 03:26:48.297608: step: 782/459, loss: 0.016426708549261093 2023-01-24 03:26:48.895047: step: 784/459, loss: 0.39265748858451843 2023-01-24 03:26:49.544301: step: 786/459, loss: 0.005940153729170561 2023-01-24 03:26:50.184062: step: 788/459, loss: 0.02664460986852646 2023-01-24 03:26:50.773083: step: 790/459, loss: 0.012441391125321388 2023-01-24 03:26:51.388053: step: 792/459, loss: 0.022335464134812355 2023-01-24 03:26:52.008420: step: 794/459, loss: 0.007193945348262787 2023-01-24 03:26:52.612262: step: 796/459, loss: 0.0416920967400074 2023-01-24 03:26:53.239773: step: 798/459, loss: 0.011174900457262993 2023-01-24 03:26:53.901672: step: 800/459, loss: 0.024435818195343018 2023-01-24 03:26:54.537762: step: 802/459, loss: 0.016857890412211418 2023-01-24 03:26:55.109061: step: 804/459, loss: 0.02263730764389038 2023-01-24 03:26:55.759827: step: 806/459, loss: 0.029626185074448586 2023-01-24 03:26:56.330781: step: 808/459, loss: 0.003089562524110079 2023-01-24 03:26:56.860378: step: 810/459, loss: 0.02702970616519451 2023-01-24 03:26:57.510492: step: 812/459, loss: 0.008930301293730736 2023-01-24 03:26:58.199304: step: 814/459, loss: 0.004202371928840876 2023-01-24 03:26:58.816344: step: 816/459, loss: 0.05869574472308159 2023-01-24 03:26:59.434054: step: 818/459, loss: 0.011420296505093575 2023-01-24 03:27:00.096565: step: 820/459, loss: 0.004216307774186134 2023-01-24 03:27:00.765431: step: 822/459, loss: 0.04173523560166359 2023-01-24 03:27:01.366666: step: 824/459, loss: 0.04227941110730171 2023-01-24 03:27:01.981837: step: 826/459, loss: 0.8063656687736511 2023-01-24 03:27:02.613107: step: 828/459, loss: 0.001411437289789319 2023-01-24 03:27:03.179706: step: 830/459, loss: 0.06005901098251343 2023-01-24 03:27:03.821658: step: 832/459, loss: 0.010953960940241814 2023-01-24 03:27:04.391051: step: 834/459, loss: 0.006765369325876236 2023-01-24 03:27:04.976449: step: 836/459, loss: 0.0002329361450392753 2023-01-24 03:27:05.660832: step: 838/459, loss: 0.022180769592523575 2023-01-24 03:27:06.268652: step: 840/459, loss: 0.04617302864789963 2023-01-24 03:27:06.890047: step: 842/459, loss: 0.01718132197856903 2023-01-24 03:27:07.570032: step: 844/459, loss: 0.1006505936384201 2023-01-24 03:27:08.328844: step: 846/459, loss: 0.10730334371328354 2023-01-24 03:27:08.903385: step: 848/459, loss: 0.17814455926418304 2023-01-24 03:27:09.490070: step: 850/459, loss: 0.2896689176559448 2023-01-24 03:27:10.129520: step: 852/459, loss: 0.012396356090903282 2023-01-24 03:27:10.794147: step: 854/459, loss: 0.03555953502655029 2023-01-24 03:27:11.386165: step: 856/459, loss: 0.018952572718262672 2023-01-24 03:27:11.952757: step: 858/459, loss: 0.06133688613772392 2023-01-24 03:27:12.545663: step: 860/459, loss: 0.003270806046202779 2023-01-24 03:27:13.106479: step: 862/459, loss: 0.0067488644272089005 2023-01-24 03:27:13.793805: step: 864/459, loss: 0.0017813448794186115 2023-01-24 03:27:14.495555: step: 866/459, loss: 0.029912849888205528 2023-01-24 03:27:15.133810: step: 868/459, loss: 0.004813488572835922 2023-01-24 03:27:15.704497: step: 870/459, loss: 0.00014451197057496756 2023-01-24 03:27:16.295803: step: 872/459, loss: 0.008264048956334591 2023-01-24 03:27:16.937947: step: 874/459, loss: 0.026187840849161148 2023-01-24 03:27:17.575749: step: 876/459, loss: 0.029853999614715576 2023-01-24 03:27:18.188516: step: 878/459, loss: 0.002696150215342641 2023-01-24 03:27:18.773403: step: 880/459, loss: 0.056727662682533264 2023-01-24 03:27:19.428312: step: 882/459, loss: 0.2279149740934372 2023-01-24 03:27:20.084176: step: 884/459, loss: 0.00843701884150505 2023-01-24 03:27:20.823510: step: 886/459, loss: 0.03036302886903286 2023-01-24 03:27:21.426043: step: 888/459, loss: 0.027449987828731537 2023-01-24 03:27:22.046170: step: 890/459, loss: 0.02659996598958969 2023-01-24 03:27:22.673150: step: 892/459, loss: 0.005387058015912771 2023-01-24 03:27:23.311173: step: 894/459, loss: 0.041465289890766144 2023-01-24 03:27:23.891706: step: 896/459, loss: 0.010579749941825867 2023-01-24 03:27:24.573887: step: 898/459, loss: 0.1770404428243637 2023-01-24 03:27:25.238459: step: 900/459, loss: 0.030489271506667137 2023-01-24 03:27:25.887093: step: 902/459, loss: 0.34260430932044983 2023-01-24 03:27:26.503909: step: 904/459, loss: 0.058502841740846634 2023-01-24 03:27:27.132197: step: 906/459, loss: 0.005548506043851376 2023-01-24 03:27:27.730946: step: 908/459, loss: 0.028057053685188293 2023-01-24 03:27:28.385854: step: 910/459, loss: 0.02045440673828125 2023-01-24 03:27:29.007167: step: 912/459, loss: 0.035018108785152435 2023-01-24 03:27:29.622583: step: 914/459, loss: 0.11486873030662537 2023-01-24 03:27:30.307711: step: 916/459, loss: 0.009794509038329124 2023-01-24 03:27:30.946193: step: 918/459, loss: 0.034819237887859344 2023-01-24 03:27:31.400693: step: 920/459, loss: 0.000829750148113817 ================================================== Loss: 0.066 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34126908957266894, 'r': 0.3160138818054316, 'f1': 0.32815628711618217}, 'combined': 0.24179936945402894, 'epoch': 27} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.344586357878923, 'r': 0.2912836455591624, 'f1': 0.3157009136780271}, 'combined': 0.2020485847539373, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33056102362204726, 'r': 0.31864326375711577, 'f1': 0.3244927536231884}, 'combined': 0.23909992372234934, 'epoch': 27} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3519534244624498, 'r': 0.2895616810350155, 'f1': 0.3177235402877976}, 'combined': 0.20334306578419042, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3509088819281986, 'r': 0.31828168038269244, 'f1': 0.3337998916650327}, 'combined': 0.2459578149110767, 'epoch': 27} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3590577323825888, 'r': 0.31103090193651006, 'f1': 0.33332321914015073}, 'combined': 0.23898645900614582, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36882716049382713, 'r': 0.2845238095238095, 'f1': 0.32123655913978494}, 'combined': 0.21415770609318996, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25, 'r': 0.25, 'f1': 0.25}, 'combined': 0.125, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.10344827586206896, 'f1': 0.15384615384615385}, 'combined': 0.10256410256410256, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 28 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:30:06.892757: step: 2/459, loss: 0.02014070190489292 2023-01-24 03:30:07.504238: step: 4/459, loss: 0.06527265161275864 2023-01-24 03:30:08.130956: step: 6/459, loss: 0.0036657475866377354 2023-01-24 03:30:08.707925: step: 8/459, loss: 0.0407307967543602 2023-01-24 03:30:09.326479: step: 10/459, loss: 0.007884763181209564 2023-01-24 03:30:09.922381: step: 12/459, loss: 0.01121477410197258 2023-01-24 03:30:10.559236: step: 14/459, loss: 0.021496472880244255 2023-01-24 03:30:11.183485: step: 16/459, loss: 0.038929592818021774 2023-01-24 03:30:11.829732: step: 18/459, loss: 0.011071408167481422 2023-01-24 03:30:12.507946: step: 20/459, loss: 0.003131933743134141 2023-01-24 03:30:13.137158: step: 22/459, loss: 0.12731872498989105 2023-01-24 03:30:13.777235: step: 24/459, loss: 0.010657030157744884 2023-01-24 03:30:14.408237: step: 26/459, loss: 0.011505967937409878 2023-01-24 03:30:15.068590: step: 28/459, loss: 0.0013389656087383628 2023-01-24 03:30:15.801517: step: 30/459, loss: 0.0041150739416480064 2023-01-24 03:30:16.419439: step: 32/459, loss: 0.021046580746769905 2023-01-24 03:30:17.045535: step: 34/459, loss: 0.41989290714263916 2023-01-24 03:30:17.597073: step: 36/459, loss: 0.016628950834274292 2023-01-24 03:30:18.260207: step: 38/459, loss: 0.02217714488506317 2023-01-24 03:30:18.904352: step: 40/459, loss: 0.010880970396101475 2023-01-24 03:30:19.529099: step: 42/459, loss: 0.0031486134976148605 2023-01-24 03:30:20.073110: step: 44/459, loss: 0.0076868413016200066 2023-01-24 03:30:20.664165: step: 46/459, loss: 0.054649241268634796 2023-01-24 03:30:21.274615: step: 48/459, loss: 0.3364073932170868 2023-01-24 03:30:21.869832: step: 50/459, loss: 0.02426879294216633 2023-01-24 03:30:22.522536: step: 52/459, loss: 0.07701877504587173 2023-01-24 03:30:23.157470: step: 54/459, loss: 0.013704553246498108 2023-01-24 03:30:23.792843: step: 56/459, loss: 0.0037976631429046392 2023-01-24 03:30:24.430916: step: 58/459, loss: 0.007330820895731449 2023-01-24 03:30:25.058934: step: 60/459, loss: 0.011235436424612999 2023-01-24 03:30:25.678146: step: 62/459, loss: 0.02244791015982628 2023-01-24 03:30:26.318486: step: 64/459, loss: 0.042782336473464966 2023-01-24 03:30:26.921314: step: 66/459, loss: 0.003627544967457652 2023-01-24 03:30:27.573696: step: 68/459, loss: 0.01038856990635395 2023-01-24 03:30:28.261590: step: 70/459, loss: 0.04346631467342377 2023-01-24 03:30:28.859278: step: 72/459, loss: 0.003290200838819146 2023-01-24 03:30:29.547322: step: 74/459, loss: 0.33734047412872314 2023-01-24 03:30:30.085163: step: 76/459, loss: 0.008111984468996525 2023-01-24 03:30:30.672055: step: 78/459, loss: 0.017326515167951584 2023-01-24 03:30:31.318555: step: 80/459, loss: 0.2192731350660324 2023-01-24 03:30:31.898835: step: 82/459, loss: 0.01165472250431776 2023-01-24 03:30:32.568783: step: 84/459, loss: 0.38156384229660034 2023-01-24 03:30:33.144232: step: 86/459, loss: 0.008823827840387821 2023-01-24 03:30:33.750478: step: 88/459, loss: 0.002671560039743781 2023-01-24 03:30:34.393831: step: 90/459, loss: 0.0567958727478981 2023-01-24 03:30:34.998260: step: 92/459, loss: 0.0041510374285280704 2023-01-24 03:30:35.643357: step: 94/459, loss: 0.003798644058406353 2023-01-24 03:30:36.251361: step: 96/459, loss: 0.02065461128950119 2023-01-24 03:30:36.920475: step: 98/459, loss: 0.15984003245830536 2023-01-24 03:30:37.546711: step: 100/459, loss: 0.08194416761398315 2023-01-24 03:30:38.201063: step: 102/459, loss: 0.012222085148096085 2023-01-24 03:30:38.892656: step: 104/459, loss: 0.025483468547463417 2023-01-24 03:30:39.480735: step: 106/459, loss: 0.009081264026463032 2023-01-24 03:30:40.130911: step: 108/459, loss: 0.003572913119569421 2023-01-24 03:30:40.680156: step: 110/459, loss: 0.01855616085231304 2023-01-24 03:30:41.286457: step: 112/459, loss: 0.016151154413819313 2023-01-24 03:30:41.877433: step: 114/459, loss: 0.023450393229722977 2023-01-24 03:30:42.467466: step: 116/459, loss: 0.03499191254377365 2023-01-24 03:30:43.215045: step: 118/459, loss: 0.015949977561831474 2023-01-24 03:30:43.798147: step: 120/459, loss: 0.06760915368795395 2023-01-24 03:30:44.424971: step: 122/459, loss: 0.0661529153585434 2023-01-24 03:30:45.018737: step: 124/459, loss: 0.005963393487036228 2023-01-24 03:30:45.633445: step: 126/459, loss: 0.011572030372917652 2023-01-24 03:30:46.255441: step: 128/459, loss: 0.08663984388113022 2023-01-24 03:30:46.835277: step: 130/459, loss: 0.021598445251584053 2023-01-24 03:30:47.378113: step: 132/459, loss: 0.004819877911359072 2023-01-24 03:30:47.956127: step: 134/459, loss: 0.012047969736158848 2023-01-24 03:30:48.580129: step: 136/459, loss: 0.00423097750172019 2023-01-24 03:30:49.149782: step: 138/459, loss: 0.001271248678676784 2023-01-24 03:30:49.839131: step: 140/459, loss: 0.023586755618453026 2023-01-24 03:30:50.482016: step: 142/459, loss: 0.003925510682165623 2023-01-24 03:30:51.086514: step: 144/459, loss: 0.004766975995153189 2023-01-24 03:30:51.722659: step: 146/459, loss: 0.022059833630919456 2023-01-24 03:30:52.381851: step: 148/459, loss: 0.027011768892407417 2023-01-24 03:30:52.998888: step: 150/459, loss: 0.00859806314110756 2023-01-24 03:30:53.644139: step: 152/459, loss: 0.05028621479868889 2023-01-24 03:30:54.319739: step: 154/459, loss: 0.022875668480992317 2023-01-24 03:30:54.922511: step: 156/459, loss: 0.0009970105020329356 2023-01-24 03:30:55.560495: step: 158/459, loss: 0.05114772543311119 2023-01-24 03:30:56.251670: step: 160/459, loss: 0.05025250464677811 2023-01-24 03:30:56.886230: step: 162/459, loss: 0.0012891938677057624 2023-01-24 03:30:57.507814: step: 164/459, loss: 0.01420834381133318 2023-01-24 03:30:58.142887: step: 166/459, loss: 0.005464404821395874 2023-01-24 03:30:58.817008: step: 168/459, loss: 0.003748902352526784 2023-01-24 03:30:59.414798: step: 170/459, loss: 0.021594228222966194 2023-01-24 03:31:00.040892: step: 172/459, loss: 0.034797027707099915 2023-01-24 03:31:00.692923: step: 174/459, loss: 0.0263343658298254 2023-01-24 03:31:01.285038: step: 176/459, loss: 0.001578831928782165 2023-01-24 03:31:01.904258: step: 178/459, loss: 0.13792267441749573 2023-01-24 03:31:02.510219: step: 180/459, loss: 0.013000383041799068 2023-01-24 03:31:03.148480: step: 182/459, loss: 0.22156888246536255 2023-01-24 03:31:03.771883: step: 184/459, loss: 0.08611947298049927 2023-01-24 03:31:04.353969: step: 186/459, loss: 0.04264267534017563 2023-01-24 03:31:04.973676: step: 188/459, loss: 0.048491448163986206 2023-01-24 03:31:05.585055: step: 190/459, loss: 0.029856814071536064 2023-01-24 03:31:06.179004: step: 192/459, loss: 0.9693707227706909 2023-01-24 03:31:06.769267: step: 194/459, loss: 0.0056153289042413235 2023-01-24 03:31:07.373481: step: 196/459, loss: 0.01811029016971588 2023-01-24 03:31:07.954130: step: 198/459, loss: 0.10922054201364517 2023-01-24 03:31:08.579262: step: 200/459, loss: 0.00446933601051569 2023-01-24 03:31:09.205442: step: 202/459, loss: 0.06639500707387924 2023-01-24 03:31:09.770660: step: 204/459, loss: 0.015371095389127731 2023-01-24 03:31:10.416895: step: 206/459, loss: 0.04386173561215401 2023-01-24 03:31:10.978718: step: 208/459, loss: 0.336812287569046 2023-01-24 03:31:11.622969: step: 210/459, loss: 0.08859939873218536 2023-01-24 03:31:12.186833: step: 212/459, loss: 0.02399505116045475 2023-01-24 03:31:12.791857: step: 214/459, loss: 0.030918266624212265 2023-01-24 03:31:13.442234: step: 216/459, loss: 0.005606380756944418 2023-01-24 03:31:14.134417: step: 218/459, loss: 0.05432296171784401 2023-01-24 03:31:14.759258: step: 220/459, loss: 0.005952837876975536 2023-01-24 03:31:15.374864: step: 222/459, loss: 0.05056359991431236 2023-01-24 03:31:15.953245: step: 224/459, loss: 0.05754011869430542 2023-01-24 03:31:16.609153: step: 226/459, loss: 0.02636134810745716 2023-01-24 03:31:17.162900: step: 228/459, loss: 0.006207300815731287 2023-01-24 03:31:17.734723: step: 230/459, loss: 0.03345344215631485 2023-01-24 03:31:18.320173: step: 232/459, loss: 0.02350485697388649 2023-01-24 03:31:18.937981: step: 234/459, loss: 0.006858061999082565 2023-01-24 03:31:19.600504: step: 236/459, loss: 0.009281116537749767 2023-01-24 03:31:20.225475: step: 238/459, loss: 0.16675467789173126 2023-01-24 03:31:20.866201: step: 240/459, loss: 0.04601959139108658 2023-01-24 03:31:21.491531: step: 242/459, loss: 0.007564854808151722 2023-01-24 03:31:22.105217: step: 244/459, loss: 0.06863266229629517 2023-01-24 03:31:22.712849: step: 246/459, loss: 2.0375185012817383 2023-01-24 03:31:23.321337: step: 248/459, loss: 0.011380204930901527 2023-01-24 03:31:23.953774: step: 250/459, loss: 0.0021413781214505434 2023-01-24 03:31:24.614400: step: 252/459, loss: 0.014202521182596684 2023-01-24 03:31:25.192011: step: 254/459, loss: 0.09119713306427002 2023-01-24 03:31:25.847369: step: 256/459, loss: 0.011088808067142963 2023-01-24 03:31:26.457740: step: 258/459, loss: 0.0513606034219265 2023-01-24 03:31:27.041354: step: 260/459, loss: 0.014794846996665001 2023-01-24 03:31:27.680497: step: 262/459, loss: 0.016654688864946365 2023-01-24 03:31:28.242695: step: 264/459, loss: 0.0009080765303224325 2023-01-24 03:31:28.887975: step: 266/459, loss: 0.013300607912242413 2023-01-24 03:31:29.502077: step: 268/459, loss: 0.03222070634365082 2023-01-24 03:31:30.108017: step: 270/459, loss: 0.002407772932201624 2023-01-24 03:31:30.786020: step: 272/459, loss: 0.09246400743722916 2023-01-24 03:31:31.388287: step: 274/459, loss: 0.014811528846621513 2023-01-24 03:31:32.011185: step: 276/459, loss: 0.011387956328690052 2023-01-24 03:31:32.636381: step: 278/459, loss: 0.024671481922268867 2023-01-24 03:31:33.214228: step: 280/459, loss: 0.02602875418961048 2023-01-24 03:31:33.844830: step: 282/459, loss: 0.0827517956495285 2023-01-24 03:31:34.452918: step: 284/459, loss: 0.06480486690998077 2023-01-24 03:31:35.046573: step: 286/459, loss: 0.05801810696721077 2023-01-24 03:31:35.703520: step: 288/459, loss: 0.012866855598986149 2023-01-24 03:31:36.328181: step: 290/459, loss: 0.01865510642528534 2023-01-24 03:31:36.955347: step: 292/459, loss: 0.02021150104701519 2023-01-24 03:31:37.562791: step: 294/459, loss: 0.002879205159842968 2023-01-24 03:31:38.252262: step: 296/459, loss: 0.018118027597665787 2023-01-24 03:31:38.810216: step: 298/459, loss: 0.00935292523354292 2023-01-24 03:31:39.374449: step: 300/459, loss: 0.012328785844147205 2023-01-24 03:31:39.974480: step: 302/459, loss: 0.05342510715126991 2023-01-24 03:31:40.620152: step: 304/459, loss: 0.0270284041762352 2023-01-24 03:31:41.204513: step: 306/459, loss: 0.037583764642477036 2023-01-24 03:31:41.819584: step: 308/459, loss: 0.06828830391168594 2023-01-24 03:31:42.461460: step: 310/459, loss: 0.049053847789764404 2023-01-24 03:31:43.132502: step: 312/459, loss: 0.006132692564278841 2023-01-24 03:31:43.740882: step: 314/459, loss: 0.02713811956346035 2023-01-24 03:31:44.378108: step: 316/459, loss: 0.0054324050433933735 2023-01-24 03:31:45.000301: step: 318/459, loss: 0.024493729695677757 2023-01-24 03:31:45.663934: step: 320/459, loss: 0.0136044230312109 2023-01-24 03:31:46.293762: step: 322/459, loss: 0.013121229596436024 2023-01-24 03:31:46.877438: step: 324/459, loss: 0.002978289034217596 2023-01-24 03:31:47.484895: step: 326/459, loss: 0.026791565120220184 2023-01-24 03:31:48.122732: step: 328/459, loss: 0.04051090031862259 2023-01-24 03:31:48.729225: step: 330/459, loss: 0.0030480287969112396 2023-01-24 03:31:49.363099: step: 332/459, loss: 0.004011988639831543 2023-01-24 03:31:49.959157: step: 334/459, loss: 0.014098860323429108 2023-01-24 03:31:50.539838: step: 336/459, loss: 0.024871423840522766 2023-01-24 03:31:51.154350: step: 338/459, loss: 0.04726562649011612 2023-01-24 03:31:51.750644: step: 340/459, loss: 0.01732986979186535 2023-01-24 03:31:52.395633: step: 342/459, loss: 0.0014779919292777777 2023-01-24 03:31:53.023462: step: 344/459, loss: 0.0011286895023658872 2023-01-24 03:31:53.675912: step: 346/459, loss: 0.10016424208879471 2023-01-24 03:31:54.278009: step: 348/459, loss: 0.007883014157414436 2023-01-24 03:31:54.873211: step: 350/459, loss: 0.04280100017786026 2023-01-24 03:31:55.522046: step: 352/459, loss: 0.004512172657996416 2023-01-24 03:31:56.107796: step: 354/459, loss: 0.03775735944509506 2023-01-24 03:31:56.761771: step: 356/459, loss: 0.026891008019447327 2023-01-24 03:31:57.421093: step: 358/459, loss: 0.5514569878578186 2023-01-24 03:31:58.041205: step: 360/459, loss: 0.03212106227874756 2023-01-24 03:31:58.672110: step: 362/459, loss: 0.021982429549098015 2023-01-24 03:31:59.310353: step: 364/459, loss: 0.03693658486008644 2023-01-24 03:31:59.944440: step: 366/459, loss: 0.007993332110345364 2023-01-24 03:32:00.616955: step: 368/459, loss: 0.0006069935043342412 2023-01-24 03:32:01.225645: step: 370/459, loss: 0.01373308151960373 2023-01-24 03:32:01.806665: step: 372/459, loss: 0.03945217654109001 2023-01-24 03:32:02.401351: step: 374/459, loss: 0.054649677127599716 2023-01-24 03:32:03.068095: step: 376/459, loss: 0.04136514663696289 2023-01-24 03:32:03.736919: step: 378/459, loss: 0.05785149708390236 2023-01-24 03:32:04.337765: step: 380/459, loss: 0.0005296290037222207 2023-01-24 03:32:04.930527: step: 382/459, loss: 0.027363374829292297 2023-01-24 03:32:05.515396: step: 384/459, loss: 0.0112832672894001 2023-01-24 03:32:06.137525: step: 386/459, loss: 0.01155503187328577 2023-01-24 03:32:06.753846: step: 388/459, loss: 0.013972398824989796 2023-01-24 03:32:07.399491: step: 390/459, loss: 0.005122013855725527 2023-01-24 03:32:08.016257: step: 392/459, loss: 0.026290694251656532 2023-01-24 03:32:08.631583: step: 394/459, loss: 0.010384773835539818 2023-01-24 03:32:09.254517: step: 396/459, loss: 0.016425976529717445 2023-01-24 03:32:09.863230: step: 398/459, loss: 0.0008529401384294033 2023-01-24 03:32:10.413941: step: 400/459, loss: 0.027704967185854912 2023-01-24 03:32:11.032763: step: 402/459, loss: 0.04577941074967384 2023-01-24 03:32:11.653927: step: 404/459, loss: 0.014401474967598915 2023-01-24 03:32:12.352926: step: 406/459, loss: 0.017888128757476807 2023-01-24 03:32:12.954193: step: 408/459, loss: 0.006081834435462952 2023-01-24 03:32:13.539203: step: 410/459, loss: 0.004845693241804838 2023-01-24 03:32:14.178202: step: 412/459, loss: 0.0795246809720993 2023-01-24 03:32:14.824539: step: 414/459, loss: 0.04649371653795242 2023-01-24 03:32:15.432242: step: 416/459, loss: 0.007822016254067421 2023-01-24 03:32:15.998273: step: 418/459, loss: 0.022867461666464806 2023-01-24 03:32:16.609584: step: 420/459, loss: 0.07985074073076248 2023-01-24 03:32:17.223509: step: 422/459, loss: 0.04133915156126022 2023-01-24 03:32:17.877713: step: 424/459, loss: 0.06218499317765236 2023-01-24 03:32:18.492402: step: 426/459, loss: 0.005980003159493208 2023-01-24 03:32:19.198140: step: 428/459, loss: 0.000545116257853806 2023-01-24 03:32:19.869377: step: 430/459, loss: 0.019510459154844284 2023-01-24 03:32:20.526500: step: 432/459, loss: 0.03507695719599724 2023-01-24 03:32:21.183468: step: 434/459, loss: 0.01098482683300972 2023-01-24 03:32:21.841777: step: 436/459, loss: 0.0045224749483168125 2023-01-24 03:32:22.410421: step: 438/459, loss: 0.39770743250846863 2023-01-24 03:32:23.065617: step: 440/459, loss: 0.014669207856059074 2023-01-24 03:32:23.686258: step: 442/459, loss: 0.03241858258843422 2023-01-24 03:32:24.313078: step: 444/459, loss: 0.1357102394104004 2023-01-24 03:32:24.927934: step: 446/459, loss: 0.013484742492437363 2023-01-24 03:32:25.500639: step: 448/459, loss: 0.0073324451223015785 2023-01-24 03:32:26.134194: step: 450/459, loss: 0.07168355584144592 2023-01-24 03:32:26.745344: step: 452/459, loss: 0.009637702256441116 2023-01-24 03:32:27.363992: step: 454/459, loss: 0.024336740374565125 2023-01-24 03:32:27.948539: step: 456/459, loss: 0.031023390591144562 2023-01-24 03:32:28.636304: step: 458/459, loss: 0.01442807912826538 2023-01-24 03:32:29.238425: step: 460/459, loss: 0.2079174518585205 2023-01-24 03:32:29.843420: step: 462/459, loss: 0.012286082841455936 2023-01-24 03:32:30.523215: step: 464/459, loss: 0.02629934251308441 2023-01-24 03:32:31.109610: step: 466/459, loss: 0.03617009148001671 2023-01-24 03:32:31.727720: step: 468/459, loss: 0.04252045229077339 2023-01-24 03:32:32.334036: step: 470/459, loss: 0.05997997522354126 2023-01-24 03:32:32.959216: step: 472/459, loss: 0.07881109416484833 2023-01-24 03:32:33.576529: step: 474/459, loss: 0.011771769262850285 2023-01-24 03:32:34.242201: step: 476/459, loss: 0.013830423355102539 2023-01-24 03:32:34.858435: step: 478/459, loss: 0.0017390614375472069 2023-01-24 03:32:35.490054: step: 480/459, loss: 0.2681528329849243 2023-01-24 03:32:36.110049: step: 482/459, loss: 0.032377105206251144 2023-01-24 03:32:36.739685: step: 484/459, loss: 0.04769745096564293 2023-01-24 03:32:37.332142: step: 486/459, loss: 0.006254702806472778 2023-01-24 03:32:38.006472: step: 488/459, loss: 0.011946805752813816 2023-01-24 03:32:38.621991: step: 490/459, loss: 0.0016339210560545325 2023-01-24 03:32:39.265228: step: 492/459, loss: 0.08018308877944946 2023-01-24 03:32:39.864289: step: 494/459, loss: 0.015311733819544315 2023-01-24 03:32:40.549144: step: 496/459, loss: 0.006961124483495951 2023-01-24 03:32:41.129605: step: 498/459, loss: 0.00900696124881506 2023-01-24 03:32:41.758772: step: 500/459, loss: 0.005840933881700039 2023-01-24 03:32:42.370077: step: 502/459, loss: 0.009037842974066734 2023-01-24 03:32:43.065643: step: 504/459, loss: 0.03022112511098385 2023-01-24 03:32:43.664074: step: 506/459, loss: 0.03136993944644928 2023-01-24 03:32:44.311155: step: 508/459, loss: 0.003134649945423007 2023-01-24 03:32:44.911367: step: 510/459, loss: 0.07106585800647736 2023-01-24 03:32:45.533946: step: 512/459, loss: 0.39449408650398254 2023-01-24 03:32:46.120673: step: 514/459, loss: 0.030371660366654396 2023-01-24 03:32:46.701953: step: 516/459, loss: 0.0033170506358146667 2023-01-24 03:32:47.282978: step: 518/459, loss: 0.06957333534955978 2023-01-24 03:32:47.887626: step: 520/459, loss: 0.007989508099853992 2023-01-24 03:32:48.461977: step: 522/459, loss: 0.005630733910948038 2023-01-24 03:32:49.060509: step: 524/459, loss: 0.023519791662693024 2023-01-24 03:32:49.711076: step: 526/459, loss: 0.1390819251537323 2023-01-24 03:32:50.358657: step: 528/459, loss: 0.06382070481777191 2023-01-24 03:32:50.939724: step: 530/459, loss: 0.026415441185235977 2023-01-24 03:32:51.600272: step: 532/459, loss: 0.06504681706428528 2023-01-24 03:32:52.228316: step: 534/459, loss: 0.02697015181183815 2023-01-24 03:32:52.887555: step: 536/459, loss: 0.01763060875236988 2023-01-24 03:32:53.485886: step: 538/459, loss: 0.04299737140536308 2023-01-24 03:32:54.091526: step: 540/459, loss: 0.002607745584100485 2023-01-24 03:32:54.775872: step: 542/459, loss: 0.4508575201034546 2023-01-24 03:32:55.425376: step: 544/459, loss: 0.004905528388917446 2023-01-24 03:32:56.021479: step: 546/459, loss: 0.14384840428829193 2023-01-24 03:32:56.641798: step: 548/459, loss: 0.07600490003824234 2023-01-24 03:32:57.271768: step: 550/459, loss: 0.1361542046070099 2023-01-24 03:32:57.912887: step: 552/459, loss: 0.019808650016784668 2023-01-24 03:32:58.589227: step: 554/459, loss: 0.01282311137765646 2023-01-24 03:32:59.246123: step: 556/459, loss: 0.013079429045319557 2023-01-24 03:32:59.847947: step: 558/459, loss: 0.01407181192189455 2023-01-24 03:33:00.451011: step: 560/459, loss: 0.02172587439417839 2023-01-24 03:33:01.039550: step: 562/459, loss: 0.004513172432780266 2023-01-24 03:33:01.615540: step: 564/459, loss: 0.009664719924330711 2023-01-24 03:33:02.205625: step: 566/459, loss: 0.05533752590417862 2023-01-24 03:33:02.794870: step: 568/459, loss: 0.019162461161613464 2023-01-24 03:33:03.399341: step: 570/459, loss: 0.0012974783312529325 2023-01-24 03:33:04.021442: step: 572/459, loss: 0.03257708624005318 2023-01-24 03:33:04.620476: step: 574/459, loss: 0.022640982642769814 2023-01-24 03:33:05.191462: step: 576/459, loss: 0.011982087045907974 2023-01-24 03:33:05.815903: step: 578/459, loss: 0.010856336914002895 2023-01-24 03:33:06.492527: step: 580/459, loss: 0.028812041506171227 2023-01-24 03:33:07.121344: step: 582/459, loss: 0.012596558779478073 2023-01-24 03:33:07.696896: step: 584/459, loss: 0.05539519712328911 2023-01-24 03:33:08.310886: step: 586/459, loss: 0.027459649369120598 2023-01-24 03:33:09.005226: step: 588/459, loss: 0.038701098412275314 2023-01-24 03:33:09.698826: step: 590/459, loss: 0.019063888117671013 2023-01-24 03:33:10.310471: step: 592/459, loss: 0.005292028654366732 2023-01-24 03:33:10.931308: step: 594/459, loss: 0.0021905130706727505 2023-01-24 03:33:11.535779: step: 596/459, loss: 0.0708777904510498 2023-01-24 03:33:12.156269: step: 598/459, loss: 0.010446179658174515 2023-01-24 03:33:12.772555: step: 600/459, loss: 0.00480992067605257 2023-01-24 03:33:13.374218: step: 602/459, loss: 0.012033739127218723 2023-01-24 03:33:14.208812: step: 604/459, loss: 0.041855208575725555 2023-01-24 03:33:14.776591: step: 606/459, loss: 0.008353384211659431 2023-01-24 03:33:15.370996: step: 608/459, loss: 0.01297040842473507 2023-01-24 03:33:15.989775: step: 610/459, loss: 0.002088322304189205 2023-01-24 03:33:16.651827: step: 612/459, loss: 0.0024375186767429113 2023-01-24 03:33:17.169076: step: 614/459, loss: 0.029122959822416306 2023-01-24 03:33:17.811652: step: 616/459, loss: 0.01854405179619789 2023-01-24 03:33:18.486808: step: 618/459, loss: 0.018622390925884247 2023-01-24 03:33:19.104937: step: 620/459, loss: 0.08210930973291397 2023-01-24 03:33:19.805278: step: 622/459, loss: 0.03788954019546509 2023-01-24 03:33:20.408076: step: 624/459, loss: 0.11208145320415497 2023-01-24 03:33:20.998389: step: 626/459, loss: 0.0031169389840215445 2023-01-24 03:33:21.603929: step: 628/459, loss: 0.0689292773604393 2023-01-24 03:33:22.236775: step: 630/459, loss: 0.04962358623743057 2023-01-24 03:33:22.908106: step: 632/459, loss: 0.034499391913414 2023-01-24 03:33:23.472008: step: 634/459, loss: 0.009900323115289211 2023-01-24 03:33:24.061748: step: 636/459, loss: 0.019958848133683205 2023-01-24 03:33:24.634933: step: 638/459, loss: 0.06378374993801117 2023-01-24 03:33:25.201177: step: 640/459, loss: 0.0051453132182359695 2023-01-24 03:33:25.825014: step: 642/459, loss: 0.0015504024922847748 2023-01-24 03:33:26.430681: step: 644/459, loss: 0.04257054626941681 2023-01-24 03:33:27.011626: step: 646/459, loss: 0.0009819892002269626 2023-01-24 03:33:27.691921: step: 648/459, loss: 0.4080725908279419 2023-01-24 03:33:28.319940: step: 650/459, loss: 0.0033003329299390316 2023-01-24 03:33:28.994286: step: 652/459, loss: 0.0493033267557621 2023-01-24 03:33:29.681708: step: 654/459, loss: 0.025897076353430748 2023-01-24 03:33:30.293665: step: 656/459, loss: 0.010324331931769848 2023-01-24 03:33:30.928474: step: 658/459, loss: 0.008184874430298805 2023-01-24 03:33:31.500788: step: 660/459, loss: 0.013776950538158417 2023-01-24 03:33:32.044747: step: 662/459, loss: 0.011326681822538376 2023-01-24 03:33:32.673105: step: 664/459, loss: 0.10974901914596558 2023-01-24 03:33:33.307483: step: 666/459, loss: 0.03722035139799118 2023-01-24 03:33:33.960911: step: 668/459, loss: 0.43741756677627563 2023-01-24 03:33:34.581691: step: 670/459, loss: 0.14585448801517487 2023-01-24 03:33:35.301747: step: 672/459, loss: 0.06268046051263809 2023-01-24 03:33:35.884321: step: 674/459, loss: 0.004747139289975166 2023-01-24 03:33:36.569916: step: 676/459, loss: 0.024709586054086685 2023-01-24 03:33:37.252627: step: 678/459, loss: 0.07968175411224365 2023-01-24 03:33:37.817283: step: 680/459, loss: 0.016188912093639374 2023-01-24 03:33:38.459789: step: 682/459, loss: 0.01871510036289692 2023-01-24 03:33:39.121328: step: 684/459, loss: 0.06690306216478348 2023-01-24 03:33:39.689208: step: 686/459, loss: 0.0477699413895607 2023-01-24 03:33:40.278380: step: 688/459, loss: 0.02761949598789215 2023-01-24 03:33:40.941255: step: 690/459, loss: 0.0008204755140468478 2023-01-24 03:33:41.636987: step: 692/459, loss: 0.010138310492038727 2023-01-24 03:33:42.272005: step: 694/459, loss: 0.012488953769207 2023-01-24 03:33:42.941174: step: 696/459, loss: 0.0021088195499032736 2023-01-24 03:33:43.546918: step: 698/459, loss: 0.0011600216384977102 2023-01-24 03:33:44.218032: step: 700/459, loss: 0.016987022012472153 2023-01-24 03:33:44.838068: step: 702/459, loss: 0.022914165630936623 2023-01-24 03:33:45.518259: step: 704/459, loss: 0.06557287275791168 2023-01-24 03:33:46.169934: step: 706/459, loss: 0.007324625737965107 2023-01-24 03:33:46.741788: step: 708/459, loss: 0.0010199880925938487 2023-01-24 03:33:47.354068: step: 710/459, loss: 0.07818463444709778 2023-01-24 03:33:47.991350: step: 712/459, loss: 0.02263748273253441 2023-01-24 03:33:48.609871: step: 714/459, loss: 0.018820438534021378 2023-01-24 03:33:49.241679: step: 716/459, loss: 0.027416273951530457 2023-01-24 03:33:49.898761: step: 718/459, loss: 0.06188127398490906 2023-01-24 03:33:50.562546: step: 720/459, loss: 0.031491026282310486 2023-01-24 03:33:51.177998: step: 722/459, loss: 0.03906549513339996 2023-01-24 03:33:51.858530: step: 724/459, loss: 0.036092061549425125 2023-01-24 03:33:52.526818: step: 726/459, loss: 0.023232867941260338 2023-01-24 03:33:53.141800: step: 728/459, loss: 0.037600915879011154 2023-01-24 03:33:53.716209: step: 730/459, loss: 0.009802216663956642 2023-01-24 03:33:54.369621: step: 732/459, loss: 0.02331903576850891 2023-01-24 03:33:55.016871: step: 734/459, loss: 0.01442367397248745 2023-01-24 03:33:55.586545: step: 736/459, loss: 0.0015983468620106578 2023-01-24 03:33:56.194129: step: 738/459, loss: 0.023344412446022034 2023-01-24 03:33:56.803366: step: 740/459, loss: 0.0496189147233963 2023-01-24 03:33:57.410422: step: 742/459, loss: 0.00028647761791944504 2023-01-24 03:33:58.011185: step: 744/459, loss: 0.016190320253372192 2023-01-24 03:33:58.600966: step: 746/459, loss: 0.00011340487253619358 2023-01-24 03:33:59.210275: step: 748/459, loss: 0.009969991631805897 2023-01-24 03:33:59.807023: step: 750/459, loss: 0.021625535562634468 2023-01-24 03:34:00.488038: step: 752/459, loss: 0.032852742820978165 2023-01-24 03:34:01.056060: step: 754/459, loss: 0.004088934976607561 2023-01-24 03:34:01.644594: step: 756/459, loss: 0.04526763781905174 2023-01-24 03:34:02.275789: step: 758/459, loss: 0.0025824601761996746 2023-01-24 03:34:02.913902: step: 760/459, loss: 0.017016975209116936 2023-01-24 03:34:03.510392: step: 762/459, loss: 0.05594045668840408 2023-01-24 03:34:04.189793: step: 764/459, loss: 0.1474377065896988 2023-01-24 03:34:04.903911: step: 766/459, loss: 0.014110606163740158 2023-01-24 03:34:05.562931: step: 768/459, loss: 0.000801481248345226 2023-01-24 03:34:06.192100: step: 770/459, loss: 0.028324497863650322 2023-01-24 03:34:06.764318: step: 772/459, loss: 0.013339634984731674 2023-01-24 03:34:07.423172: step: 774/459, loss: 0.01527258101850748 2023-01-24 03:34:08.124303: step: 776/459, loss: 0.0918734222650528 2023-01-24 03:34:08.756863: step: 778/459, loss: 0.01071612536907196 2023-01-24 03:34:09.374980: step: 780/459, loss: 0.5234488844871521 2023-01-24 03:34:10.052542: step: 782/459, loss: 4.0840968722477555e-05 2023-01-24 03:34:10.642355: step: 784/459, loss: 0.03232257440686226 2023-01-24 03:34:11.313409: step: 786/459, loss: 0.03653562441468239 2023-01-24 03:34:12.026451: step: 788/459, loss: 0.01815078780055046 2023-01-24 03:34:12.565431: step: 790/459, loss: 0.09894447028636932 2023-01-24 03:34:13.201927: step: 792/459, loss: 0.00017813708109315485 2023-01-24 03:34:13.792444: step: 794/459, loss: 0.456792950630188 2023-01-24 03:34:14.417493: step: 796/459, loss: 0.005841420032083988 2023-01-24 03:34:15.050411: step: 798/459, loss: 0.008267181925475597 2023-01-24 03:34:15.701063: step: 800/459, loss: 0.04349711537361145 2023-01-24 03:34:16.350434: step: 802/459, loss: 0.07005327939987183 2023-01-24 03:34:17.021589: step: 804/459, loss: 0.03968295827507973 2023-01-24 03:34:17.664147: step: 806/459, loss: 0.045328978449106216 2023-01-24 03:34:18.259530: step: 808/459, loss: 0.0014341180212795734 2023-01-24 03:34:18.894802: step: 810/459, loss: 0.0007039543124847114 2023-01-24 03:34:19.551136: step: 812/459, loss: 0.07526720315217972 2023-01-24 03:34:20.212207: step: 814/459, loss: 0.016118016093969345 2023-01-24 03:34:20.878259: step: 816/459, loss: 0.07977328449487686 2023-01-24 03:34:21.511344: step: 818/459, loss: 0.05457331985235214 2023-01-24 03:34:22.099947: step: 820/459, loss: 0.4809856414794922 2023-01-24 03:34:22.722249: step: 822/459, loss: 0.028550997376441956 2023-01-24 03:34:23.390697: step: 824/459, loss: 0.0145775331184268 2023-01-24 03:34:24.057303: step: 826/459, loss: 0.04138891398906708 2023-01-24 03:34:24.780021: step: 828/459, loss: 0.03739435598254204 2023-01-24 03:34:25.334482: step: 830/459, loss: 0.0017541988054290414 2023-01-24 03:34:25.915553: step: 832/459, loss: 0.009949898347258568 2023-01-24 03:34:26.591098: step: 834/459, loss: 0.013120057061314583 2023-01-24 03:34:27.199094: step: 836/459, loss: 0.016993658617138863 2023-01-24 03:34:27.750738: step: 838/459, loss: 0.029163308441638947 2023-01-24 03:34:28.389843: step: 840/459, loss: 0.0045989807695150375 2023-01-24 03:34:29.000645: step: 842/459, loss: 0.008524301461875439 2023-01-24 03:34:29.537261: step: 844/459, loss: 0.012504379265010357 2023-01-24 03:34:30.106509: step: 846/459, loss: 0.008285482414066792 2023-01-24 03:34:30.732528: step: 848/459, loss: 0.05397731065750122 2023-01-24 03:34:31.432898: step: 850/459, loss: 0.0330098532140255 2023-01-24 03:34:32.076061: step: 852/459, loss: 0.076438307762146 2023-01-24 03:34:32.686519: step: 854/459, loss: 0.02663891203701496 2023-01-24 03:34:33.301841: step: 856/459, loss: 0.001993186306208372 2023-01-24 03:34:33.912347: step: 858/459, loss: 0.00378460343927145 2023-01-24 03:34:34.491467: step: 860/459, loss: 0.008857300505042076 2023-01-24 03:34:35.083251: step: 862/459, loss: 0.05214904993772507 2023-01-24 03:34:35.702271: step: 864/459, loss: 0.0020192319061607122 2023-01-24 03:34:36.463679: step: 866/459, loss: 0.024693571031093597 2023-01-24 03:34:37.087881: step: 868/459, loss: 0.017735330387949944 2023-01-24 03:34:37.752016: step: 870/459, loss: 0.020125554874539375 2023-01-24 03:34:38.400054: step: 872/459, loss: 0.013405416160821915 2023-01-24 03:34:38.993271: step: 874/459, loss: 0.03525625169277191 2023-01-24 03:34:39.576940: step: 876/459, loss: 0.0038644163869321346 2023-01-24 03:34:40.221833: step: 878/459, loss: 0.015150866471230984 2023-01-24 03:34:40.770966: step: 880/459, loss: 0.0019326976034790277 2023-01-24 03:34:41.367159: step: 882/459, loss: 0.0020264270715415478 2023-01-24 03:34:41.932885: step: 884/459, loss: 0.012287277728319168 2023-01-24 03:34:42.516780: step: 886/459, loss: 0.022778430953621864 2023-01-24 03:34:43.202290: step: 888/459, loss: 0.03791544586420059 2023-01-24 03:34:43.807860: step: 890/459, loss: 0.019959183409810066 2023-01-24 03:34:44.426915: step: 892/459, loss: 0.2473151534795761 2023-01-24 03:34:45.021721: step: 894/459, loss: 0.05541641637682915 2023-01-24 03:34:45.645458: step: 896/459, loss: 0.03198179602622986 2023-01-24 03:34:46.281516: step: 898/459, loss: 0.09184055775403976 2023-01-24 03:34:46.980638: step: 900/459, loss: 0.05918189883232117 2023-01-24 03:34:47.547798: step: 902/459, loss: 0.05036647990345955 2023-01-24 03:34:48.161059: step: 904/459, loss: 0.02776075154542923 2023-01-24 03:34:48.802417: step: 906/459, loss: 0.03085213340818882 2023-01-24 03:34:49.380979: step: 908/459, loss: 0.060978908091783524 2023-01-24 03:34:50.007990: step: 910/459, loss: 0.0033390005119144917 2023-01-24 03:34:50.656388: step: 912/459, loss: 0.017203887924551964 2023-01-24 03:34:51.251530: step: 914/459, loss: 0.01334420870989561 2023-01-24 03:34:51.889940: step: 916/459, loss: 0.02138669788837433 2023-01-24 03:34:52.522769: step: 918/459, loss: 0.13772551715373993 2023-01-24 03:34:52.991878: step: 920/459, loss: 0.000737383437808603 ================================================== Loss: 0.050 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3307292322009017, 'r': 0.32006054729119526, 'f1': 0.3253074415090837}, 'combined': 0.23970022005932481, 'epoch': 28} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3489603356863765, 'r': 0.28741642193805195, 'f1': 0.3152124268513032}, 'combined': 0.201735953184834, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.328963050403914, 'r': 0.32272086728429517, 'f1': 0.32581206333107954}, 'combined': 0.24007204666500595, 'epoch': 28} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3511184680102383, 'r': 0.2860610951909026, 'f1': 0.3152685343395233}, 'combined': 0.20177186197729488, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.344818824278438, 'r': 0.3271525847044004, 'f1': 0.33575348031006624}, 'combined': 0.24739730128110143, 'epoch': 28} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.35723909315047103, 'r': 0.30420633159898947, 'f1': 0.3285967064394396}, 'combined': 0.23559763857922086, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3083333333333333, 'r': 0.35238095238095235, 'f1': 0.32888888888888884}, 'combined': 0.2192592592592592, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2962962962962963, 'r': 0.34782608695652173, 'f1': 0.31999999999999995}, 'combined': 0.15999999999999998, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.13793103448275862, 'f1': 0.1951219512195122}, 'combined': 0.13008130081300812, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 29 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:37:29.563327: step: 2/459, loss: 0.020763786509633064 2023-01-24 03:37:30.215946: step: 4/459, loss: 0.0006174935842864215 2023-01-24 03:37:30.863564: step: 6/459, loss: 0.010483178310096264 2023-01-24 03:37:31.485689: step: 8/459, loss: 0.023790888488292694 2023-01-24 03:37:32.014362: step: 10/459, loss: 0.027975359931588173 2023-01-24 03:37:32.634114: step: 12/459, loss: 0.020847098901867867 2023-01-24 03:37:33.256088: step: 14/459, loss: 0.0003248160646762699 2023-01-24 03:37:33.868509: step: 16/459, loss: 0.0013376454589888453 2023-01-24 03:37:34.520532: step: 18/459, loss: 0.05614444240927696 2023-01-24 03:37:35.115972: step: 20/459, loss: 0.038801368325948715 2023-01-24 03:37:35.677966: step: 22/459, loss: 0.009839197620749474 2023-01-24 03:37:36.290974: step: 24/459, loss: 0.01778397336602211 2023-01-24 03:37:36.889466: step: 26/459, loss: 0.041040096431970596 2023-01-24 03:37:37.470138: step: 28/459, loss: 0.02667672373354435 2023-01-24 03:37:38.066370: step: 30/459, loss: 0.013222310692071915 2023-01-24 03:37:38.664180: step: 32/459, loss: 1.5149023532867432 2023-01-24 03:37:39.215876: step: 34/459, loss: 0.003169361036270857 2023-01-24 03:37:39.892638: step: 36/459, loss: 0.023298054933547974 2023-01-24 03:37:40.495655: step: 38/459, loss: 0.0020357209723442793 2023-01-24 03:37:41.079042: step: 40/459, loss: 0.00932586845010519 2023-01-24 03:37:41.709534: step: 42/459, loss: 0.03021630458533764 2023-01-24 03:37:42.348958: step: 44/459, loss: 0.03349095955491066 2023-01-24 03:37:42.960052: step: 46/459, loss: 0.00550851970911026 2023-01-24 03:37:43.608678: step: 48/459, loss: 0.002687011146917939 2023-01-24 03:37:44.272962: step: 50/459, loss: 0.004641312174499035 2023-01-24 03:37:44.896691: step: 52/459, loss: 0.001752714510075748 2023-01-24 03:37:45.513941: step: 54/459, loss: 0.0029116717632859945 2023-01-24 03:37:46.162794: step: 56/459, loss: 0.030314965173602104 2023-01-24 03:37:46.736639: step: 58/459, loss: 0.04606277495622635 2023-01-24 03:37:47.328034: step: 60/459, loss: 0.00395751278847456 2023-01-24 03:37:47.945762: step: 62/459, loss: 0.013817768543958664 2023-01-24 03:37:48.535594: step: 64/459, loss: 0.03442169725894928 2023-01-24 03:37:49.266552: step: 66/459, loss: 0.0005334342131391168 2023-01-24 03:37:49.924929: step: 68/459, loss: 0.004641116596758366 2023-01-24 03:37:50.549478: step: 70/459, loss: 0.006042947992682457 2023-01-24 03:37:51.180565: step: 72/459, loss: 0.016022447496652603 2023-01-24 03:37:51.860475: step: 74/459, loss: 0.13660401105880737 2023-01-24 03:37:52.478821: step: 76/459, loss: 0.039594605565071106 2023-01-24 03:37:53.138373: step: 78/459, loss: 0.0048706019297242165 2023-01-24 03:37:53.760138: step: 80/459, loss: 0.0014813599409535527 2023-01-24 03:37:54.389925: step: 82/459, loss: 0.09080354124307632 2023-01-24 03:37:55.020161: step: 84/459, loss: 0.005862884223461151 2023-01-24 03:37:55.625268: step: 86/459, loss: 0.01637057028710842 2023-01-24 03:37:56.265852: step: 88/459, loss: 0.011388765648007393 2023-01-24 03:37:56.901005: step: 90/459, loss: 0.05720578506588936 2023-01-24 03:37:57.554501: step: 92/459, loss: 0.0076682246290147305 2023-01-24 03:37:58.204896: step: 94/459, loss: 0.013562344014644623 2023-01-24 03:37:58.836210: step: 96/459, loss: 0.9324160218238831 2023-01-24 03:37:59.541359: step: 98/459, loss: 0.6374057531356812 2023-01-24 03:38:00.185402: step: 100/459, loss: 0.07122885435819626 2023-01-24 03:38:00.788563: step: 102/459, loss: 0.024930350482463837 2023-01-24 03:38:01.355972: step: 104/459, loss: 0.0014243981568142772 2023-01-24 03:38:01.999456: step: 106/459, loss: 0.03622257709503174 2023-01-24 03:38:02.627072: step: 108/459, loss: 0.17831407487392426 2023-01-24 03:38:03.245585: step: 110/459, loss: 0.021215377375483513 2023-01-24 03:38:03.863689: step: 112/459, loss: 0.0025188608560711145 2023-01-24 03:38:04.463706: step: 114/459, loss: 0.017746873199939728 2023-01-24 03:38:05.073706: step: 116/459, loss: 0.04151235520839691 2023-01-24 03:38:05.656459: step: 118/459, loss: 0.005444907583296299 2023-01-24 03:38:06.253995: step: 120/459, loss: 0.05339483544230461 2023-01-24 03:38:06.835262: step: 122/459, loss: 0.0136991823092103 2023-01-24 03:38:07.435583: step: 124/459, loss: 0.006436762399971485 2023-01-24 03:38:08.030837: step: 126/459, loss: 0.1262318193912506 2023-01-24 03:38:08.645892: step: 128/459, loss: 0.014152058400213718 2023-01-24 03:38:09.239276: step: 130/459, loss: 0.0002971517969854176 2023-01-24 03:38:09.873729: step: 132/459, loss: 0.17053183913230896 2023-01-24 03:38:10.426944: step: 134/459, loss: 0.004290853161364794 2023-01-24 03:38:10.999142: step: 136/459, loss: 0.0060942829586565495 2023-01-24 03:38:11.620163: step: 138/459, loss: 0.06425930559635162 2023-01-24 03:38:12.240027: step: 140/459, loss: 0.4401618242263794 2023-01-24 03:38:12.853254: step: 142/459, loss: 0.018811529502272606 2023-01-24 03:38:13.466413: step: 144/459, loss: 0.025629498064517975 2023-01-24 03:38:14.093946: step: 146/459, loss: 0.004441289231181145 2023-01-24 03:38:14.705628: step: 148/459, loss: 0.02071825973689556 2023-01-24 03:38:15.338689: step: 150/459, loss: 0.00014045379066374153 2023-01-24 03:38:15.992150: step: 152/459, loss: 0.009911094792187214 2023-01-24 03:38:16.662104: step: 154/459, loss: 0.18600723147392273 2023-01-24 03:38:17.317004: step: 156/459, loss: 0.0008057950180955231 2023-01-24 03:38:18.039939: step: 158/459, loss: 0.01618104800581932 2023-01-24 03:38:18.614884: step: 160/459, loss: 0.0073263864032924175 2023-01-24 03:38:19.279491: step: 162/459, loss: 0.014298207126557827 2023-01-24 03:38:19.924737: step: 164/459, loss: 0.013705860823392868 2023-01-24 03:38:20.566890: step: 166/459, loss: 0.039496973156929016 2023-01-24 03:38:21.147574: step: 168/459, loss: 0.0024137706495821476 2023-01-24 03:38:21.734237: step: 170/459, loss: 0.00669556949287653 2023-01-24 03:38:22.340416: step: 172/459, loss: 0.0006158421165309846 2023-01-24 03:38:22.985135: step: 174/459, loss: 0.0675773024559021 2023-01-24 03:38:23.568275: step: 176/459, loss: 0.01906546577811241 2023-01-24 03:38:24.166146: step: 178/459, loss: 0.00789159070700407 2023-01-24 03:38:24.781301: step: 180/459, loss: 0.003007366321980953 2023-01-24 03:38:25.343029: step: 182/459, loss: 0.00013643570127896965 2023-01-24 03:38:25.929141: step: 184/459, loss: 0.001955175306648016 2023-01-24 03:38:26.565885: step: 186/459, loss: 0.12902957201004028 2023-01-24 03:38:27.193759: step: 188/459, loss: 0.007263165432959795 2023-01-24 03:38:27.840042: step: 190/459, loss: 0.07676491886377335 2023-01-24 03:38:28.404517: step: 192/459, loss: 0.023299891501665115 2023-01-24 03:38:29.037299: step: 194/459, loss: 0.07543982565402985 2023-01-24 03:38:29.685837: step: 196/459, loss: 0.013855348341166973 2023-01-24 03:38:30.305798: step: 198/459, loss: 0.019271699711680412 2023-01-24 03:38:31.024834: step: 200/459, loss: 0.0007410520338453352 2023-01-24 03:38:31.720157: step: 202/459, loss: 0.059294670820236206 2023-01-24 03:38:32.367796: step: 204/459, loss: 0.010595125146210194 2023-01-24 03:38:33.060835: step: 206/459, loss: 0.009196861647069454 2023-01-24 03:38:33.680169: step: 208/459, loss: 0.05763350799679756 2023-01-24 03:38:34.321210: step: 210/459, loss: 0.01504325307905674 2023-01-24 03:38:34.979641: step: 212/459, loss: 0.061190687119960785 2023-01-24 03:38:35.573768: step: 214/459, loss: 0.0012308878358453512 2023-01-24 03:38:36.131788: step: 216/459, loss: 0.0021557610016316175 2023-01-24 03:38:36.804854: step: 218/459, loss: 0.0017861570231616497 2023-01-24 03:38:37.391977: step: 220/459, loss: 0.04160407558083534 2023-01-24 03:38:38.073950: step: 222/459, loss: 0.01023244857788086 2023-01-24 03:38:38.687366: step: 224/459, loss: 0.009212901815772057 2023-01-24 03:38:39.344797: step: 226/459, loss: 0.00391991576179862 2023-01-24 03:38:39.887086: step: 228/459, loss: 0.028750376775860786 2023-01-24 03:38:40.543611: step: 230/459, loss: 0.04186530411243439 2023-01-24 03:38:41.177854: step: 232/459, loss: 0.0005629690131172538 2023-01-24 03:38:41.775591: step: 234/459, loss: 0.10572918504476547 2023-01-24 03:38:42.389485: step: 236/459, loss: 0.09255655854940414 2023-01-24 03:38:43.028419: step: 238/459, loss: 0.010814176872372627 2023-01-24 03:38:43.667185: step: 240/459, loss: 0.0008449426386505365 2023-01-24 03:38:44.246051: step: 242/459, loss: 0.03876589983701706 2023-01-24 03:38:44.807991: step: 244/459, loss: 0.016490735113620758 2023-01-24 03:38:45.463506: step: 246/459, loss: 0.0067900181747972965 2023-01-24 03:38:46.059278: step: 248/459, loss: 0.013606108725070953 2023-01-24 03:38:46.695572: step: 250/459, loss: 0.08065411448478699 2023-01-24 03:38:47.326733: step: 252/459, loss: 0.022240908816456795 2023-01-24 03:38:47.921206: step: 254/459, loss: 0.003844247432425618 2023-01-24 03:38:48.502409: step: 256/459, loss: 0.03277676925063133 2023-01-24 03:38:49.173466: step: 258/459, loss: 1.783503770828247 2023-01-24 03:38:49.815318: step: 260/459, loss: 0.013712420128285885 2023-01-24 03:38:50.444130: step: 262/459, loss: 0.0142991216853261 2023-01-24 03:38:51.144505: step: 264/459, loss: 0.021122777834534645 2023-01-24 03:38:51.738152: step: 266/459, loss: 0.01052930112928152 2023-01-24 03:38:52.407146: step: 268/459, loss: 0.0076851318590343 2023-01-24 03:38:53.050643: step: 270/459, loss: 0.0002698442549444735 2023-01-24 03:38:53.627281: step: 272/459, loss: 0.012616727501153946 2023-01-24 03:38:54.285015: step: 274/459, loss: 0.0049834768287837505 2023-01-24 03:38:54.913120: step: 276/459, loss: 0.03957995027303696 2023-01-24 03:38:55.491612: step: 278/459, loss: 0.030988290905952454 2023-01-24 03:38:56.088295: step: 280/459, loss: 0.024928266182541847 2023-01-24 03:38:56.673239: step: 282/459, loss: 0.02715473063290119 2023-01-24 03:38:57.248854: step: 284/459, loss: 0.013202632777392864 2023-01-24 03:38:57.917740: step: 286/459, loss: 0.006019834894686937 2023-01-24 03:38:58.611632: step: 288/459, loss: 0.04495598003268242 2023-01-24 03:38:59.315508: step: 290/459, loss: 0.0014810668071731925 2023-01-24 03:38:59.958269: step: 292/459, loss: 0.009706055745482445 2023-01-24 03:39:00.522287: step: 294/459, loss: 0.027678119018673897 2023-01-24 03:39:01.264593: step: 296/459, loss: 0.014754923060536385 2023-01-24 03:39:01.893591: step: 298/459, loss: 0.017694653943181038 2023-01-24 03:39:02.451681: step: 300/459, loss: 0.010329166427254677 2023-01-24 03:39:03.197452: step: 302/459, loss: 0.03231239691376686 2023-01-24 03:39:03.791032: step: 304/459, loss: 0.004854681435972452 2023-01-24 03:39:04.371341: step: 306/459, loss: 0.005921223200857639 2023-01-24 03:39:04.992503: step: 308/459, loss: 0.014542152173817158 2023-01-24 03:39:05.641943: step: 310/459, loss: 0.018854795023798943 2023-01-24 03:39:06.302594: step: 312/459, loss: 0.0020278762094676495 2023-01-24 03:39:06.862703: step: 314/459, loss: 0.006924374960362911 2023-01-24 03:39:07.485865: step: 316/459, loss: 0.003667932702228427 2023-01-24 03:39:08.124348: step: 318/459, loss: 0.03436372056603432 2023-01-24 03:39:08.722070: step: 320/459, loss: 0.005036013200879097 2023-01-24 03:39:09.353852: step: 322/459, loss: 0.012387016788125038 2023-01-24 03:39:09.994317: step: 324/459, loss: 0.024708911776542664 2023-01-24 03:39:10.648315: step: 326/459, loss: 0.04136097803711891 2023-01-24 03:39:11.249447: step: 328/459, loss: 0.02197839878499508 2023-01-24 03:39:11.973193: step: 330/459, loss: 0.08774450421333313 2023-01-24 03:39:12.553981: step: 332/459, loss: 4.1361869079992175e-05 2023-01-24 03:39:13.172278: step: 334/459, loss: 0.06033990904688835 2023-01-24 03:39:13.793893: step: 336/459, loss: 0.008539068512618542 2023-01-24 03:39:14.440348: step: 338/459, loss: 0.003983272239565849 2023-01-24 03:39:15.039801: step: 340/459, loss: 0.28416454792022705 2023-01-24 03:39:15.638429: step: 342/459, loss: 0.02166743017733097 2023-01-24 03:39:16.286860: step: 344/459, loss: 0.01759008690714836 2023-01-24 03:39:16.921417: step: 346/459, loss: 0.009929939173161983 2023-01-24 03:39:17.510828: step: 348/459, loss: 0.02714213915169239 2023-01-24 03:39:18.058714: step: 350/459, loss: 0.08659033477306366 2023-01-24 03:39:18.674686: step: 352/459, loss: 0.018883338198065758 2023-01-24 03:39:19.297554: step: 354/459, loss: 0.03717219829559326 2023-01-24 03:39:19.906502: step: 356/459, loss: 0.0006391191855072975 2023-01-24 03:39:20.524283: step: 358/459, loss: 0.07184448093175888 2023-01-24 03:39:21.113264: step: 360/459, loss: 0.017385488376021385 2023-01-24 03:39:21.712897: step: 362/459, loss: 0.041036203503608704 2023-01-24 03:39:22.303391: step: 364/459, loss: 0.0009654869791120291 2023-01-24 03:39:22.910754: step: 366/459, loss: 0.053865741938352585 2023-01-24 03:39:23.481831: step: 368/459, loss: 0.04158803075551987 2023-01-24 03:39:24.045506: step: 370/459, loss: 0.05550607293844223 2023-01-24 03:39:24.710638: step: 372/459, loss: 0.024449285119771957 2023-01-24 03:39:25.302361: step: 374/459, loss: 0.021642589941620827 2023-01-24 03:39:25.875020: step: 376/459, loss: 0.0035452248994261026 2023-01-24 03:39:26.582837: step: 378/459, loss: 0.008868085220456123 2023-01-24 03:39:27.169153: step: 380/459, loss: 0.08170263469219208 2023-01-24 03:39:27.785050: step: 382/459, loss: 0.008039543405175209 2023-01-24 03:39:28.434493: step: 384/459, loss: 0.018831104040145874 2023-01-24 03:39:29.110138: step: 386/459, loss: 0.004686756059527397 2023-01-24 03:39:29.707442: step: 388/459, loss: 0.03575321286916733 2023-01-24 03:39:30.378733: step: 390/459, loss: 0.03515317663550377 2023-01-24 03:39:31.050108: step: 392/459, loss: 0.005303357262164354 2023-01-24 03:39:31.612222: step: 394/459, loss: 0.0016115043545141816 2023-01-24 03:39:32.229335: step: 396/459, loss: 0.009982817806303501 2023-01-24 03:39:32.857566: step: 398/459, loss: 0.027345748618245125 2023-01-24 03:39:33.405861: step: 400/459, loss: 0.02992206998169422 2023-01-24 03:39:34.039983: step: 402/459, loss: 0.012044349685311317 2023-01-24 03:39:34.668957: step: 404/459, loss: 0.0020644525066018105 2023-01-24 03:39:35.325486: step: 406/459, loss: 0.0017055175267159939 2023-01-24 03:39:35.931826: step: 408/459, loss: 0.019227875396609306 2023-01-24 03:39:36.576470: step: 410/459, loss: 0.09641975164413452 2023-01-24 03:39:37.230628: step: 412/459, loss: 0.03673598915338516 2023-01-24 03:39:37.873958: step: 414/459, loss: 0.00600712513551116 2023-01-24 03:39:38.522117: step: 416/459, loss: 0.026283906772732735 2023-01-24 03:39:39.092743: step: 418/459, loss: 0.018883338198065758 2023-01-24 03:39:39.715619: step: 420/459, loss: 0.004169698338955641 2023-01-24 03:39:40.363382: step: 422/459, loss: 0.14879582822322845 2023-01-24 03:39:40.951002: step: 424/459, loss: 0.0273690614849329 2023-01-24 03:39:41.492078: step: 426/459, loss: 0.013997108675539494 2023-01-24 03:39:42.085777: step: 428/459, loss: 0.06193672865629196 2023-01-24 03:39:42.716672: step: 430/459, loss: 0.015378601849079132 2023-01-24 03:39:43.340119: step: 432/459, loss: 0.00874734204262495 2023-01-24 03:39:43.924885: step: 434/459, loss: 0.07572653144598007 2023-01-24 03:39:44.553097: step: 436/459, loss: 0.011995370499789715 2023-01-24 03:39:45.201397: step: 438/459, loss: 0.007508764509111643 2023-01-24 03:39:45.860544: step: 440/459, loss: 0.0005184131441637874 2023-01-24 03:39:46.442312: step: 442/459, loss: 0.002140658674761653 2023-01-24 03:39:47.061034: step: 444/459, loss: 0.004296074155718088 2023-01-24 03:39:47.647928: step: 446/459, loss: 0.01516803354024887 2023-01-24 03:39:48.218892: step: 448/459, loss: 0.32528427243232727 2023-01-24 03:39:48.862109: step: 450/459, loss: 0.07048619538545609 2023-01-24 03:39:49.483502: step: 452/459, loss: 0.019721077755093575 2023-01-24 03:39:50.096922: step: 454/459, loss: 0.003894037799909711 2023-01-24 03:39:50.726520: step: 456/459, loss: 0.00027351436438038945 2023-01-24 03:39:51.411606: step: 458/459, loss: 0.011181176640093327 2023-01-24 03:39:52.043521: step: 460/459, loss: 0.0018295308109372854 2023-01-24 03:39:52.646334: step: 462/459, loss: 0.013135110028088093 2023-01-24 03:39:53.238671: step: 464/459, loss: 0.02591325342655182 2023-01-24 03:39:53.789898: step: 466/459, loss: 0.0004670961352530867 2023-01-24 03:39:54.402537: step: 468/459, loss: 0.03770843520760536 2023-01-24 03:39:55.019878: step: 470/459, loss: 0.0060883937403559685 2023-01-24 03:39:55.553696: step: 472/459, loss: 0.02518143504858017 2023-01-24 03:39:56.172303: step: 474/459, loss: 0.0027213909197598696 2023-01-24 03:39:56.778917: step: 476/459, loss: 0.23515081405639648 2023-01-24 03:39:57.435711: step: 478/459, loss: 0.0038168735336512327 2023-01-24 03:39:58.070519: step: 480/459, loss: 0.02362491935491562 2023-01-24 03:39:58.753308: step: 482/459, loss: 0.003008761443197727 2023-01-24 03:39:59.424683: step: 484/459, loss: 0.07593414187431335 2023-01-24 03:40:00.035124: step: 486/459, loss: 0.0008090758929029107 2023-01-24 03:40:00.690665: step: 488/459, loss: 0.001034740125760436 2023-01-24 03:40:01.355826: step: 490/459, loss: 0.0003483631007838994 2023-01-24 03:40:01.974993: step: 492/459, loss: 0.15384705364704132 2023-01-24 03:40:02.615669: step: 494/459, loss: 0.07246986776590347 2023-01-24 03:40:03.210074: step: 496/459, loss: 0.0033438762184232473 2023-01-24 03:40:03.884427: step: 498/459, loss: 0.007692870683968067 2023-01-24 03:40:04.416083: step: 500/459, loss: 0.0017869347939267755 2023-01-24 03:40:05.047437: step: 502/459, loss: 0.030574340373277664 2023-01-24 03:40:05.701576: step: 504/459, loss: 0.003442509099841118 2023-01-24 03:40:06.375603: step: 506/459, loss: 0.3003910183906555 2023-01-24 03:40:06.942370: step: 508/459, loss: 0.0008438106742687523 2023-01-24 03:40:07.536223: step: 510/459, loss: 0.0039049996994435787 2023-01-24 03:40:08.126229: step: 512/459, loss: 0.01075745839625597 2023-01-24 03:40:08.771737: step: 514/459, loss: 0.011261188425123692 2023-01-24 03:40:09.450900: step: 516/459, loss: 0.03936687484383583 2023-01-24 03:40:10.099626: step: 518/459, loss: 0.008411924354732037 2023-01-24 03:40:10.715345: step: 520/459, loss: 0.0024057687260210514 2023-01-24 03:40:11.355695: step: 522/459, loss: 0.0020753417629748583 2023-01-24 03:40:11.968852: step: 524/459, loss: 0.0005003126570954919 2023-01-24 03:40:12.552889: step: 526/459, loss: 0.0009389762999489903 2023-01-24 03:40:13.184600: step: 528/459, loss: 0.016029704362154007 2023-01-24 03:40:13.749063: step: 530/459, loss: 0.004977842792868614 2023-01-24 03:40:14.396987: step: 532/459, loss: 0.010811909101903439 2023-01-24 03:40:14.965116: step: 534/459, loss: 0.0010417019948363304 2023-01-24 03:40:15.692200: step: 536/459, loss: 0.010255401954054832 2023-01-24 03:40:16.305890: step: 538/459, loss: 0.0008397845667786896 2023-01-24 03:40:16.914258: step: 540/459, loss: 0.032229624688625336 2023-01-24 03:40:17.621208: step: 542/459, loss: 0.013448688201606274 2023-01-24 03:40:18.260602: step: 544/459, loss: 0.03457087278366089 2023-01-24 03:40:18.920768: step: 546/459, loss: 0.05185893923044205 2023-01-24 03:40:19.544142: step: 548/459, loss: 0.22857630252838135 2023-01-24 03:40:20.164509: step: 550/459, loss: 0.0012447809567674994 2023-01-24 03:40:20.845994: step: 552/459, loss: 0.09413229674100876 2023-01-24 03:40:21.522094: step: 554/459, loss: 1.0156984329223633 2023-01-24 03:40:22.130531: step: 556/459, loss: 0.012141186743974686 2023-01-24 03:40:22.755685: step: 558/459, loss: 0.001674047438427806 2023-01-24 03:40:23.344616: step: 560/459, loss: 0.0024301214143633842 2023-01-24 03:40:23.957904: step: 562/459, loss: 0.028708480298519135 2023-01-24 03:40:24.605665: step: 564/459, loss: 0.027135221287608147 2023-01-24 03:40:25.168954: step: 566/459, loss: 0.04143426939845085 2023-01-24 03:40:25.801331: step: 568/459, loss: 0.0934886559844017 2023-01-24 03:40:26.412843: step: 570/459, loss: 0.09327779710292816 2023-01-24 03:40:26.989326: step: 572/459, loss: 0.004653511568903923 2023-01-24 03:40:27.615113: step: 574/459, loss: 0.017045972868800163 2023-01-24 03:40:28.305498: step: 576/459, loss: 0.005588263738900423 2023-01-24 03:40:28.970485: step: 578/459, loss: 0.007468739058822393 2023-01-24 03:40:29.678708: step: 580/459, loss: 0.017062770202755928 2023-01-24 03:40:30.306829: step: 582/459, loss: 0.012510848231613636 2023-01-24 03:40:31.039223: step: 584/459, loss: 0.24319009482860565 2023-01-24 03:40:31.738251: step: 586/459, loss: 0.01653856784105301 2023-01-24 03:40:32.365476: step: 588/459, loss: 0.00018943144823424518 2023-01-24 03:40:33.055445: step: 590/459, loss: 0.17999513447284698 2023-01-24 03:40:33.723134: step: 592/459, loss: 0.015502041205763817 2023-01-24 03:40:34.330172: step: 594/459, loss: 0.003276975592598319 2023-01-24 03:40:34.855061: step: 596/459, loss: 0.013500492088496685 2023-01-24 03:40:35.558876: step: 598/459, loss: 0.14473405480384827 2023-01-24 03:40:36.168000: step: 600/459, loss: 0.0077173737809062 2023-01-24 03:40:36.751243: step: 602/459, loss: 0.04060986638069153 2023-01-24 03:40:37.364981: step: 604/459, loss: 0.011984952725470066 2023-01-24 03:40:37.914408: step: 606/459, loss: 0.025587022304534912 2023-01-24 03:40:38.448138: step: 608/459, loss: 0.009112726897001266 2023-01-24 03:40:39.001924: step: 610/459, loss: 0.017908969894051552 2023-01-24 03:40:39.658413: step: 612/459, loss: 0.08218645304441452 2023-01-24 03:40:40.276703: step: 614/459, loss: 0.007886591367423534 2023-01-24 03:40:40.942176: step: 616/459, loss: 0.022137578576803207 2023-01-24 03:40:41.615061: step: 618/459, loss: 0.012082450091838837 2023-01-24 03:40:42.284527: step: 620/459, loss: 0.002872261218726635 2023-01-24 03:40:42.898087: step: 622/459, loss: 0.015434329397976398 2023-01-24 03:40:43.513989: step: 624/459, loss: 0.0012444667518138885 2023-01-24 03:40:44.058054: step: 626/459, loss: 0.0003881501907017082 2023-01-24 03:40:44.712803: step: 628/459, loss: 0.02633276768028736 2023-01-24 03:40:45.294609: step: 630/459, loss: 0.0007580122910439968 2023-01-24 03:40:45.943247: step: 632/459, loss: 0.11951480805873871 2023-01-24 03:40:46.590702: step: 634/459, loss: 0.033196430653333664 2023-01-24 03:40:47.242845: step: 636/459, loss: 0.05054225027561188 2023-01-24 03:40:47.920646: step: 638/459, loss: 0.013740974478423595 2023-01-24 03:40:48.495479: step: 640/459, loss: 0.0008831777377054095 2023-01-24 03:40:49.046080: step: 642/459, loss: 0.059525225311517715 2023-01-24 03:40:49.632109: step: 644/459, loss: 0.0009823271539062262 2023-01-24 03:40:50.255997: step: 646/459, loss: 0.007305045146495104 2023-01-24 03:40:50.852789: step: 648/459, loss: 0.013156455010175705 2023-01-24 03:40:51.510723: step: 650/459, loss: 0.04121730104088783 2023-01-24 03:40:52.112360: step: 652/459, loss: 0.009454536251723766 2023-01-24 03:40:52.706028: step: 654/459, loss: 0.017638202756643295 2023-01-24 03:40:53.349628: step: 656/459, loss: 0.030034808441996574 2023-01-24 03:40:54.012838: step: 658/459, loss: 0.028375571593642235 2023-01-24 03:40:54.673231: step: 660/459, loss: 0.06349187344312668 2023-01-24 03:40:55.343864: step: 662/459, loss: 0.004688021261245012 2023-01-24 03:40:55.947767: step: 664/459, loss: 0.03467513993382454 2023-01-24 03:40:56.527985: step: 666/459, loss: 0.043459564447402954 2023-01-24 03:40:57.303631: step: 668/459, loss: 0.08829988539218903 2023-01-24 03:40:57.976533: step: 670/459, loss: 0.02253684401512146 2023-01-24 03:40:58.581943: step: 672/459, loss: 0.006506914738565683 2023-01-24 03:40:59.157121: step: 674/459, loss: 0.008917467668652534 2023-01-24 03:40:59.803310: step: 676/459, loss: 0.02840626798570156 2023-01-24 03:41:00.399431: step: 678/459, loss: 0.012999277561903 2023-01-24 03:41:01.072005: step: 680/459, loss: 0.019177861511707306 2023-01-24 03:41:01.787805: step: 682/459, loss: 0.03030211664736271 2023-01-24 03:41:02.388864: step: 684/459, loss: 0.01580904610455036 2023-01-24 03:41:03.032580: step: 686/459, loss: 0.017244048416614532 2023-01-24 03:41:03.724030: step: 688/459, loss: 0.2798893451690674 2023-01-24 03:41:04.343346: step: 690/459, loss: 0.00964568741619587 2023-01-24 03:41:04.920580: step: 692/459, loss: 0.008422129787504673 2023-01-24 03:41:05.503406: step: 694/459, loss: 0.10648163408041 2023-01-24 03:41:06.035715: step: 696/459, loss: 0.006702106911689043 2023-01-24 03:41:06.642543: step: 698/459, loss: 0.0025006902869790792 2023-01-24 03:41:07.299380: step: 700/459, loss: 0.05706685781478882 2023-01-24 03:41:07.941808: step: 702/459, loss: 0.020626574754714966 2023-01-24 03:41:08.561055: step: 704/459, loss: 0.012499440461397171 2023-01-24 03:41:09.157847: step: 706/459, loss: 0.008215563371777534 2023-01-24 03:41:09.709637: step: 708/459, loss: 0.0029619522392749786 2023-01-24 03:41:10.379618: step: 710/459, loss: 0.015109676867723465 2023-01-24 03:41:10.959019: step: 712/459, loss: 0.003642162075266242 2023-01-24 03:41:11.562104: step: 714/459, loss: 0.0007849183166399598 2023-01-24 03:41:12.160713: step: 716/459, loss: 0.008779550902545452 2023-01-24 03:41:12.801195: step: 718/459, loss: 0.18603941798210144 2023-01-24 03:41:13.488336: step: 720/459, loss: 0.04993754252791405 2023-01-24 03:41:14.100477: step: 722/459, loss: 0.009199978783726692 2023-01-24 03:41:14.745899: step: 724/459, loss: 0.012468026019632816 2023-01-24 03:41:15.312329: step: 726/459, loss: 0.01234308909624815 2023-01-24 03:41:15.884359: step: 728/459, loss: 0.010719121433794498 2023-01-24 03:41:16.535685: step: 730/459, loss: 0.0020755664445459843 2023-01-24 03:41:17.098429: step: 732/459, loss: 0.0037750136107206345 2023-01-24 03:41:17.638002: step: 734/459, loss: 0.034913964569568634 2023-01-24 03:41:18.323217: step: 736/459, loss: 0.017906783148646355 2023-01-24 03:41:19.027214: step: 738/459, loss: 1.096289873123169 2023-01-24 03:41:19.648271: step: 740/459, loss: 0.0006147835520096123 2023-01-24 03:41:20.220420: step: 742/459, loss: 0.042051732540130615 2023-01-24 03:41:20.755933: step: 744/459, loss: 0.00015670672291889787 2023-01-24 03:41:21.416977: step: 746/459, loss: 0.29578539729118347 2023-01-24 03:41:22.002549: step: 748/459, loss: 0.0001159568564617075 2023-01-24 03:41:22.697707: step: 750/459, loss: 0.027658682316541672 2023-01-24 03:41:23.297394: step: 752/459, loss: 0.008933293633162975 2023-01-24 03:41:23.861794: step: 754/459, loss: 0.0062838345766067505 2023-01-24 03:41:24.424673: step: 756/459, loss: 0.000187703815754503 2023-01-24 03:41:25.065734: step: 758/459, loss: 0.007734156679362059 2023-01-24 03:41:25.658301: step: 760/459, loss: 0.017920495942234993 2023-01-24 03:41:26.261416: step: 762/459, loss: 0.009238891303539276 2023-01-24 03:41:26.843804: step: 764/459, loss: 0.03660673648118973 2023-01-24 03:41:27.492067: step: 766/459, loss: 0.024048665538430214 2023-01-24 03:41:28.145100: step: 768/459, loss: 0.011286824941635132 2023-01-24 03:41:28.741619: step: 770/459, loss: 0.01362184714525938 2023-01-24 03:41:29.352581: step: 772/459, loss: 0.047601472586393356 2023-01-24 03:41:29.956259: step: 774/459, loss: 0.004482116550207138 2023-01-24 03:41:30.628368: step: 776/459, loss: 0.014779562130570412 2023-01-24 03:41:31.349483: step: 778/459, loss: 0.0100523317232728 2023-01-24 03:41:32.062299: step: 780/459, loss: 0.0031337698455899954 2023-01-24 03:41:32.672144: step: 782/459, loss: 0.018301865085959435 2023-01-24 03:41:33.213862: step: 784/459, loss: 0.015326536260545254 2023-01-24 03:41:33.910622: step: 786/459, loss: 0.01485794223845005 2023-01-24 03:41:34.542401: step: 788/459, loss: 0.002278352854773402 2023-01-24 03:41:35.143966: step: 790/459, loss: 0.011844885535538197 2023-01-24 03:41:35.715973: step: 792/459, loss: 0.006268856581300497 2023-01-24 03:41:36.461586: step: 794/459, loss: 0.01085229404270649 2023-01-24 03:41:37.085434: step: 796/459, loss: 0.0056832656264305115 2023-01-24 03:41:37.782711: step: 798/459, loss: 0.035758085548877716 2023-01-24 03:41:38.347718: step: 800/459, loss: 0.002268141368404031 2023-01-24 03:41:38.995395: step: 802/459, loss: 0.05243174731731415 2023-01-24 03:41:39.592379: step: 804/459, loss: 0.010395101271569729 2023-01-24 03:41:40.232873: step: 806/459, loss: 0.00524560920894146 2023-01-24 03:41:40.874610: step: 808/459, loss: 0.23192098736763 2023-01-24 03:41:41.481208: step: 810/459, loss: 0.00043000216828659177 2023-01-24 03:41:42.101412: step: 812/459, loss: 0.005046218167990446 2023-01-24 03:41:42.737114: step: 814/459, loss: 0.061164937913417816 2023-01-24 03:41:43.358626: step: 816/459, loss: 0.0027684979140758514 2023-01-24 03:41:43.987186: step: 818/459, loss: 0.08570786565542221 2023-01-24 03:41:44.562696: step: 820/459, loss: 0.015020015649497509 2023-01-24 03:41:45.176472: step: 822/459, loss: 0.07305755466222763 2023-01-24 03:41:45.817290: step: 824/459, loss: 0.005968265701085329 2023-01-24 03:41:46.445748: step: 826/459, loss: 0.0013601701939478517 2023-01-24 03:41:47.031054: step: 828/459, loss: 0.0278239194303751 2023-01-24 03:41:47.635466: step: 830/459, loss: 0.0029203330632299185 2023-01-24 03:41:48.313611: step: 832/459, loss: 0.016523249447345734 2023-01-24 03:41:49.012418: step: 834/459, loss: 0.0048157889395952225 2023-01-24 03:41:49.663767: step: 836/459, loss: 1.2051161527633667 2023-01-24 03:41:50.256192: step: 838/459, loss: 0.007416002452373505 2023-01-24 03:41:50.948757: step: 840/459, loss: 0.040344495326280594 2023-01-24 03:41:51.585050: step: 842/459, loss: 0.03702118992805481 2023-01-24 03:41:52.161545: step: 844/459, loss: 0.013093979097902775 2023-01-24 03:41:52.747275: step: 846/459, loss: 0.005085431504994631 2023-01-24 03:41:53.350500: step: 848/459, loss: 0.002844612579792738 2023-01-24 03:41:54.023856: step: 850/459, loss: 0.001033469452522695 2023-01-24 03:41:54.635025: step: 852/459, loss: 0.040893442928791046 2023-01-24 03:41:55.252991: step: 854/459, loss: 0.01269544567912817 2023-01-24 03:41:55.939042: step: 856/459, loss: 0.017027387395501137 2023-01-24 03:41:56.576680: step: 858/459, loss: 0.025478068739175797 2023-01-24 03:41:57.171746: step: 860/459, loss: 0.0562034510076046 2023-01-24 03:41:57.791895: step: 862/459, loss: 0.0013724283780902624 2023-01-24 03:41:58.339581: step: 864/459, loss: 0.07355601340532303 2023-01-24 03:41:58.983843: step: 866/459, loss: 0.010977435857057571 2023-01-24 03:41:59.571315: step: 868/459, loss: 0.040957916527986526 2023-01-24 03:42:00.291995: step: 870/459, loss: 0.04906667396426201 2023-01-24 03:42:00.919431: step: 872/459, loss: 0.03201683983206749 2023-01-24 03:42:01.501368: step: 874/459, loss: 0.025240574032068253 2023-01-24 03:42:02.093327: step: 876/459, loss: 0.0023858847562223673 2023-01-24 03:42:02.694057: step: 878/459, loss: 0.0017234949627891183 2023-01-24 03:42:03.370999: step: 880/459, loss: 0.010888160206377506 2023-01-24 03:42:03.970911: step: 882/459, loss: 0.005166711285710335 2023-01-24 03:42:04.537418: step: 884/459, loss: 0.013084332458674908 2023-01-24 03:42:05.131029: step: 886/459, loss: 0.00995495356619358 2023-01-24 03:42:05.745541: step: 888/459, loss: 0.0006281676469370723 2023-01-24 03:42:06.353857: step: 890/459, loss: 0.249897301197052 2023-01-24 03:42:07.006298: step: 892/459, loss: 0.06577090173959732 2023-01-24 03:42:07.655367: step: 894/459, loss: 0.0038052378222346306 2023-01-24 03:42:08.192058: step: 896/459, loss: 0.04949374869465828 2023-01-24 03:42:08.855365: step: 898/459, loss: 0.2844613492488861 2023-01-24 03:42:09.488942: step: 900/459, loss: 0.017466694116592407 2023-01-24 03:42:10.124128: step: 902/459, loss: 0.004324205219745636 2023-01-24 03:42:10.775691: step: 904/459, loss: 0.02355961874127388 2023-01-24 03:42:11.427620: step: 906/459, loss: 0.0682283416390419 2023-01-24 03:42:12.016410: step: 908/459, loss: 0.004055839963257313 2023-01-24 03:42:12.563765: step: 910/459, loss: 0.04024261608719826 2023-01-24 03:42:13.265342: step: 912/459, loss: 0.019982587546110153 2023-01-24 03:42:13.846143: step: 914/459, loss: 0.36108970642089844 2023-01-24 03:42:14.542997: step: 916/459, loss: 0.004598910454660654 2023-01-24 03:42:15.156178: step: 918/459, loss: 0.08038664609193802 2023-01-24 03:42:15.631823: step: 920/459, loss: 2.3841835172788706e-08 ================================================== Loss: 0.049 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3456856476056442, 'r': 0.32338334776011873, 'f1': 0.33416279268545607}, 'combined': 0.24622521566296762, 'epoch': 29} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.349869159965573, 'r': 0.28906387374771636, 'f1': 0.31657319107996046}, 'combined': 0.20260684229117465, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3448952841069931, 'r': 0.33442408003543356, 'f1': 0.3395789791496599}, 'combined': 0.25021608989974936, 'epoch': 29} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3533224539551235, 'r': 0.2897244122432013, 'f1': 0.31837847499252886}, 'combined': 0.20376222399521843, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35575959742793833, 'r': 0.3280819057874346, 'f1': 0.34136064037508}, 'combined': 0.25152889290795366, 'epoch': 29} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.35112062156785767, 'r': 0.3015544136051819, 'f1': 0.3244554102287994}, 'combined': 0.2326284073338562, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25203252032520324, 'r': 0.2952380952380952, 'f1': 0.27192982456140347}, 'combined': 0.1812865497076023, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25862068965517243, 'r': 0.32608695652173914, 'f1': 0.2884615384615385}, 'combined': 0.14423076923076925, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.13793103448275862, 'f1': 0.2}, 'combined': 0.13333333333333333, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 30 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:44:51.443661: step: 2/459, loss: 0.014910481870174408 2023-01-24 03:44:52.028764: step: 4/459, loss: 0.005932518746703863 2023-01-24 03:44:52.698314: step: 6/459, loss: 0.004344563465565443 2023-01-24 03:44:53.274604: step: 8/459, loss: 0.000535528059117496 2023-01-24 03:44:53.881804: step: 10/459, loss: 0.0012826977763324976 2023-01-24 03:44:54.521663: step: 12/459, loss: 0.08268237113952637 2023-01-24 03:44:55.136746: step: 14/459, loss: 0.02625052072107792 2023-01-24 03:44:55.709813: step: 16/459, loss: 0.00821360107511282 2023-01-24 03:44:56.342792: step: 18/459, loss: 0.0083686001598835 2023-01-24 03:44:56.908777: step: 20/459, loss: 0.021830826997756958 2023-01-24 03:44:57.591012: step: 22/459, loss: 0.19930461049079895 2023-01-24 03:44:58.233858: step: 24/459, loss: 0.01859212853014469 2023-01-24 03:44:58.882361: step: 26/459, loss: 0.03477661311626434 2023-01-24 03:44:59.512082: step: 28/459, loss: 0.05995073914527893 2023-01-24 03:45:00.120881: step: 30/459, loss: 0.0028291104827076197 2023-01-24 03:45:00.731680: step: 32/459, loss: 0.02028093859553337 2023-01-24 03:45:01.377046: step: 34/459, loss: 0.08973291516304016 2023-01-24 03:45:01.891001: step: 36/459, loss: 2.434198177070357e-05 2023-01-24 03:45:02.501800: step: 38/459, loss: 0.011776304803788662 2023-01-24 03:45:03.060496: step: 40/459, loss: 0.03749333694577217 2023-01-24 03:45:03.768998: step: 42/459, loss: 0.06524718552827835 2023-01-24 03:45:04.399960: step: 44/459, loss: 0.0005758956540375948 2023-01-24 03:45:05.040517: step: 46/459, loss: 0.029781628400087357 2023-01-24 03:45:05.627234: step: 48/459, loss: 0.019613374024629593 2023-01-24 03:45:06.240037: step: 50/459, loss: 0.004601107910275459 2023-01-24 03:45:06.835147: step: 52/459, loss: 0.0028228950686752796 2023-01-24 03:45:07.462605: step: 54/459, loss: 0.0025148007553070784 2023-01-24 03:45:08.112169: step: 56/459, loss: 0.012851478531956673 2023-01-24 03:45:08.670371: step: 58/459, loss: 0.0020574068184942007 2023-01-24 03:45:09.267711: step: 60/459, loss: 0.08292889595031738 2023-01-24 03:45:09.894337: step: 62/459, loss: 0.026578810065984726 2023-01-24 03:45:10.511993: step: 64/459, loss: 0.023968273773789406 2023-01-24 03:45:11.118466: step: 66/459, loss: 0.03013773448765278 2023-01-24 03:45:11.793135: step: 68/459, loss: 0.003802303923293948 2023-01-24 03:45:12.371043: step: 70/459, loss: 0.036366190761327744 2023-01-24 03:45:13.007710: step: 72/459, loss: 0.011638801544904709 2023-01-24 03:45:13.603713: step: 74/459, loss: 0.04288991168141365 2023-01-24 03:45:14.261393: step: 76/459, loss: 0.007077295333147049 2023-01-24 03:45:14.917565: step: 78/459, loss: 0.015776515007019043 2023-01-24 03:45:15.549656: step: 80/459, loss: 0.016172563657164574 2023-01-24 03:45:16.142770: step: 82/459, loss: 0.0006297159125097096 2023-01-24 03:45:16.745522: step: 84/459, loss: 0.0914708599448204 2023-01-24 03:45:17.348480: step: 86/459, loss: 0.0191170796751976 2023-01-24 03:45:18.029575: step: 88/459, loss: 0.03418998792767525 2023-01-24 03:45:18.671252: step: 90/459, loss: 0.0004617848608177155 2023-01-24 03:45:19.295634: step: 92/459, loss: 0.005430355202406645 2023-01-24 03:45:19.997254: step: 94/459, loss: 0.016030441969633102 2023-01-24 03:45:20.640256: step: 96/459, loss: 0.02770119160413742 2023-01-24 03:45:21.255810: step: 98/459, loss: 0.012819575145840645 2023-01-24 03:45:21.859516: step: 100/459, loss: 0.01045353151857853 2023-01-24 03:45:22.550332: step: 102/459, loss: 0.018354736268520355 2023-01-24 03:45:23.178725: step: 104/459, loss: 0.0009438840788789093 2023-01-24 03:45:23.812599: step: 106/459, loss: 0.0020695896819233894 2023-01-24 03:45:24.481333: step: 108/459, loss: 0.01668705977499485 2023-01-24 03:45:25.099043: step: 110/459, loss: 0.047641415148973465 2023-01-24 03:45:25.727052: step: 112/459, loss: 0.0026059960946440697 2023-01-24 03:45:26.312333: step: 114/459, loss: 0.0038357859011739492 2023-01-24 03:45:27.011685: step: 116/459, loss: 0.0061380029655992985 2023-01-24 03:45:27.629584: step: 118/459, loss: 0.0007186575676314533 2023-01-24 03:45:28.268561: step: 120/459, loss: 0.003922498784959316 2023-01-24 03:45:28.860513: step: 122/459, loss: 0.02104892209172249 2023-01-24 03:45:29.459592: step: 124/459, loss: 0.04350566118955612 2023-01-24 03:45:30.015575: step: 126/459, loss: 0.023417344316840172 2023-01-24 03:45:30.535019: step: 128/459, loss: 0.000478795962408185 2023-01-24 03:45:31.188196: step: 130/459, loss: 0.05694112554192543 2023-01-24 03:45:31.810262: step: 132/459, loss: 0.04164251685142517 2023-01-24 03:45:32.407303: step: 134/459, loss: 0.0009130224934779108 2023-01-24 03:45:33.009926: step: 136/459, loss: 0.3111235499382019 2023-01-24 03:45:33.603056: step: 138/459, loss: 5.9339665313018486e-05 2023-01-24 03:45:34.260587: step: 140/459, loss: 0.08472876250743866 2023-01-24 03:45:34.865321: step: 142/459, loss: 0.17002512514591217 2023-01-24 03:45:35.508322: step: 144/459, loss: 0.010933320969343185 2023-01-24 03:45:36.171257: step: 146/459, loss: 0.001688761985860765 2023-01-24 03:45:36.862349: step: 148/459, loss: 0.017042692750692368 2023-01-24 03:45:37.488847: step: 150/459, loss: 0.018493320792913437 2023-01-24 03:45:38.156588: step: 152/459, loss: 0.008840611204504967 2023-01-24 03:45:38.724842: step: 154/459, loss: 0.0024159320164471865 2023-01-24 03:45:39.331929: step: 156/459, loss: 0.004399512894451618 2023-01-24 03:45:39.899393: step: 158/459, loss: 0.003473625285550952 2023-01-24 03:45:40.546988: step: 160/459, loss: 0.0033617631997913122 2023-01-24 03:45:41.155268: step: 162/459, loss: 0.07438057661056519 2023-01-24 03:45:41.774304: step: 164/459, loss: 0.035430412739515305 2023-01-24 03:45:42.357243: step: 166/459, loss: 0.007735422812402248 2023-01-24 03:45:43.097777: step: 168/459, loss: 0.06593453884124756 2023-01-24 03:45:43.712122: step: 170/459, loss: 0.053003113716840744 2023-01-24 03:45:44.268116: step: 172/459, loss: 0.0015376127557829022 2023-01-24 03:45:44.913176: step: 174/459, loss: 0.05657988786697388 2023-01-24 03:45:45.527442: step: 176/459, loss: 0.05322033911943436 2023-01-24 03:45:46.199259: step: 178/459, loss: 0.030057022348046303 2023-01-24 03:45:46.832460: step: 180/459, loss: 0.0017792031867429614 2023-01-24 03:45:47.476148: step: 182/459, loss: 0.004157264716923237 2023-01-24 03:45:48.103731: step: 184/459, loss: 0.025820231065154076 2023-01-24 03:45:48.724779: step: 186/459, loss: 0.0033406871370971203 2023-01-24 03:45:49.349970: step: 188/459, loss: 0.0030108706559985876 2023-01-24 03:45:49.966877: step: 190/459, loss: 0.5305126905441284 2023-01-24 03:45:50.610442: step: 192/459, loss: 0.00042419511009939015 2023-01-24 03:45:51.206174: step: 194/459, loss: 0.0029436368495225906 2023-01-24 03:45:51.766674: step: 196/459, loss: 0.005079623777419329 2023-01-24 03:45:52.388000: step: 198/459, loss: 0.01901412568986416 2023-01-24 03:45:53.006000: step: 200/459, loss: 0.019168049097061157 2023-01-24 03:45:53.609696: step: 202/459, loss: 0.010423420928418636 2023-01-24 03:45:54.208312: step: 204/459, loss: 0.0009082996402867138 2023-01-24 03:45:54.857318: step: 206/459, loss: 0.004455155227333307 2023-01-24 03:45:55.487883: step: 208/459, loss: 0.007864773273468018 2023-01-24 03:45:56.080312: step: 210/459, loss: 0.01791224628686905 2023-01-24 03:45:56.715019: step: 212/459, loss: 0.027607634663581848 2023-01-24 03:45:57.251395: step: 214/459, loss: 0.012363261543214321 2023-01-24 03:45:57.863722: step: 216/459, loss: 0.01373585220426321 2023-01-24 03:45:58.476545: step: 218/459, loss: 0.012759021483361721 2023-01-24 03:45:59.095747: step: 220/459, loss: 0.022287623956799507 2023-01-24 03:45:59.737738: step: 222/459, loss: 0.04733110964298248 2023-01-24 03:46:00.364268: step: 224/459, loss: 0.022434215992689133 2023-01-24 03:46:00.998991: step: 226/459, loss: 0.011404828168451786 2023-01-24 03:46:01.605257: step: 228/459, loss: 0.24531924724578857 2023-01-24 03:46:02.200238: step: 230/459, loss: 0.01391792856156826 2023-01-24 03:46:02.832205: step: 232/459, loss: 0.012982658110558987 2023-01-24 03:46:03.535866: step: 234/459, loss: 0.010765713639557362 2023-01-24 03:46:04.121039: step: 236/459, loss: 0.0014421229716390371 2023-01-24 03:46:04.809882: step: 238/459, loss: 0.015250117518007755 2023-01-24 03:46:05.416390: step: 240/459, loss: 0.06191393360495567 2023-01-24 03:46:06.050841: step: 242/459, loss: 0.01233159750699997 2023-01-24 03:46:06.685371: step: 244/459, loss: 0.17304331064224243 2023-01-24 03:46:07.335535: step: 246/459, loss: 0.0020035901106894016 2023-01-24 03:46:07.891786: step: 248/459, loss: 0.016115784645080566 2023-01-24 03:46:08.505975: step: 250/459, loss: 0.028462417423725128 2023-01-24 03:46:09.133712: step: 252/459, loss: 0.00830465741455555 2023-01-24 03:46:09.689156: step: 254/459, loss: 0.00048410939052700996 2023-01-24 03:46:10.424989: step: 256/459, loss: 0.031341925263404846 2023-01-24 03:46:11.033621: step: 258/459, loss: 0.07162265479564667 2023-01-24 03:46:11.566531: step: 260/459, loss: 0.007958954200148582 2023-01-24 03:46:12.215277: step: 262/459, loss: 0.015572363510727882 2023-01-24 03:46:12.849507: step: 264/459, loss: 0.03755836933851242 2023-01-24 03:46:13.528810: step: 266/459, loss: 0.07732320576906204 2023-01-24 03:46:14.245664: step: 268/459, loss: 0.013155203312635422 2023-01-24 03:46:14.907814: step: 270/459, loss: 0.017635395750403404 2023-01-24 03:46:15.514735: step: 272/459, loss: 0.016606483608484268 2023-01-24 03:46:16.219312: step: 274/459, loss: 0.0252072811126709 2023-01-24 03:46:16.862160: step: 276/459, loss: 0.01702033169567585 2023-01-24 03:46:17.439415: step: 278/459, loss: 0.10386411845684052 2023-01-24 03:46:18.063260: step: 280/459, loss: 0.00016070110723376274 2023-01-24 03:46:18.684659: step: 282/459, loss: 0.0026032314635813236 2023-01-24 03:46:19.295449: step: 284/459, loss: 0.417090505361557 2023-01-24 03:46:19.946587: step: 286/459, loss: 0.015979604795575142 2023-01-24 03:46:20.487027: step: 288/459, loss: 0.0029556374065577984 2023-01-24 03:46:21.072225: step: 290/459, loss: 0.01787298172712326 2023-01-24 03:46:21.736457: step: 292/459, loss: 0.01397708710283041 2023-01-24 03:46:22.374109: step: 294/459, loss: 0.04147430509328842 2023-01-24 03:46:22.960620: step: 296/459, loss: 0.006026682909578085 2023-01-24 03:46:23.645765: step: 298/459, loss: 0.009324941784143448 2023-01-24 03:46:24.203186: step: 300/459, loss: 0.0049582733772695065 2023-01-24 03:46:24.830473: step: 302/459, loss: 0.0069375066086649895 2023-01-24 03:46:25.555769: step: 304/459, loss: 0.014835850335657597 2023-01-24 03:46:26.142450: step: 306/459, loss: 0.01226282399147749 2023-01-24 03:46:26.886941: step: 308/459, loss: 0.015838170424103737 2023-01-24 03:46:27.533766: step: 310/459, loss: 0.027976978570222855 2023-01-24 03:46:28.143068: step: 312/459, loss: 0.004851961042732 2023-01-24 03:46:28.695410: step: 314/459, loss: 0.0005420303787104785 2023-01-24 03:46:29.327653: step: 316/459, loss: 0.005150407552719116 2023-01-24 03:46:29.946692: step: 318/459, loss: 0.00531751848757267 2023-01-24 03:46:30.607004: step: 320/459, loss: 0.004700932186096907 2023-01-24 03:46:31.257492: step: 322/459, loss: 0.011518552899360657 2023-01-24 03:46:31.962636: step: 324/459, loss: 0.010976692661643028 2023-01-24 03:46:32.604661: step: 326/459, loss: 0.011413292959332466 2023-01-24 03:46:33.160061: step: 328/459, loss: 0.004313381854444742 2023-01-24 03:46:33.756519: step: 330/459, loss: 0.02469734475016594 2023-01-24 03:46:34.352704: step: 332/459, loss: 0.00863763689994812 2023-01-24 03:46:34.961313: step: 334/459, loss: 0.0005341434152796865 2023-01-24 03:46:35.585128: step: 336/459, loss: 0.001355118933133781 2023-01-24 03:46:36.255775: step: 338/459, loss: 0.05051184073090553 2023-01-24 03:46:36.826576: step: 340/459, loss: 0.0015359389362856746 2023-01-24 03:46:37.393587: step: 342/459, loss: 0.0005856509087607265 2023-01-24 03:46:37.977497: step: 344/459, loss: 0.0036298399791121483 2023-01-24 03:46:38.595332: step: 346/459, loss: 0.024010349065065384 2023-01-24 03:46:39.221332: step: 348/459, loss: 0.00047898164484649897 2023-01-24 03:46:39.943371: step: 350/459, loss: 2.2022124539944343e-05 2023-01-24 03:46:40.558622: step: 352/459, loss: 0.7279503345489502 2023-01-24 03:46:41.198079: step: 354/459, loss: 0.026747524738311768 2023-01-24 03:46:41.781939: step: 356/459, loss: 0.001439630868844688 2023-01-24 03:46:42.352404: step: 358/459, loss: 0.0017520999535918236 2023-01-24 03:46:42.949448: step: 360/459, loss: 0.0007674497901462018 2023-01-24 03:46:43.514805: step: 362/459, loss: 0.006386906839907169 2023-01-24 03:46:44.115717: step: 364/459, loss: 0.001332573127001524 2023-01-24 03:46:44.683864: step: 366/459, loss: 0.014100762084126472 2023-01-24 03:46:45.259270: step: 368/459, loss: 0.00969723705202341 2023-01-24 03:46:45.872608: step: 370/459, loss: 0.00039239853504113853 2023-01-24 03:46:46.444797: step: 372/459, loss: 0.0015958750154823065 2023-01-24 03:46:47.083524: step: 374/459, loss: 0.00224565458483994 2023-01-24 03:46:47.677896: step: 376/459, loss: 4.52835884061642e-05 2023-01-24 03:46:48.359880: step: 378/459, loss: 0.054151616990566254 2023-01-24 03:46:49.009998: step: 380/459, loss: 0.006304911337792873 2023-01-24 03:46:49.614726: step: 382/459, loss: 0.0051602390594780445 2023-01-24 03:46:50.212199: step: 384/459, loss: 3.074212509091012e-05 2023-01-24 03:46:50.846513: step: 386/459, loss: 0.0020720725879073143 2023-01-24 03:46:51.429000: step: 388/459, loss: 0.01859378255903721 2023-01-24 03:46:52.059705: step: 390/459, loss: 0.06037577986717224 2023-01-24 03:46:52.693668: step: 392/459, loss: 0.11418560892343521 2023-01-24 03:46:53.260534: step: 394/459, loss: 0.008265268988907337 2023-01-24 03:46:53.845936: step: 396/459, loss: 0.03354920446872711 2023-01-24 03:46:54.429277: step: 398/459, loss: 0.0343492329120636 2023-01-24 03:46:55.053801: step: 400/459, loss: 0.0013351708184927702 2023-01-24 03:46:55.646871: step: 402/459, loss: 0.00975511409342289 2023-01-24 03:46:56.245758: step: 404/459, loss: 0.006766234524548054 2023-01-24 03:46:56.815873: step: 406/459, loss: 0.15588019788265228 2023-01-24 03:46:57.390958: step: 408/459, loss: 0.00018391342018730938 2023-01-24 03:46:57.972952: step: 410/459, loss: 0.013888278044760227 2023-01-24 03:46:58.601661: step: 412/459, loss: 0.0015054558170959353 2023-01-24 03:46:59.228485: step: 414/459, loss: 0.01293299812823534 2023-01-24 03:46:59.864775: step: 416/459, loss: 0.0025213826447725296 2023-01-24 03:47:00.493041: step: 418/459, loss: 0.061626896262168884 2023-01-24 03:47:01.127836: step: 420/459, loss: 0.03309525549411774 2023-01-24 03:47:01.775805: step: 422/459, loss: 0.04906989261507988 2023-01-24 03:47:02.432768: step: 424/459, loss: 0.014726518653333187 2023-01-24 03:47:03.069859: step: 426/459, loss: 0.11423923075199127 2023-01-24 03:47:03.678781: step: 428/459, loss: 0.00277215545065701 2023-01-24 03:47:04.265243: step: 430/459, loss: 0.06531518697738647 2023-01-24 03:47:04.878931: step: 432/459, loss: 0.03635229170322418 2023-01-24 03:47:05.545099: step: 434/459, loss: 0.046059317886829376 2023-01-24 03:47:06.206856: step: 436/459, loss: 0.03539987653493881 2023-01-24 03:47:06.761861: step: 438/459, loss: 0.01315388549119234 2023-01-24 03:47:07.426291: step: 440/459, loss: 0.15612705051898956 2023-01-24 03:47:08.041346: step: 442/459, loss: 0.03973471373319626 2023-01-24 03:47:08.665113: step: 444/459, loss: 0.0017640675650909543 2023-01-24 03:47:09.313724: step: 446/459, loss: 0.03629248961806297 2023-01-24 03:47:09.943436: step: 448/459, loss: 0.010772665962576866 2023-01-24 03:47:10.567404: step: 450/459, loss: 0.05820620432496071 2023-01-24 03:47:11.188865: step: 452/459, loss: 0.08823796361684799 2023-01-24 03:47:11.826356: step: 454/459, loss: 0.01783677190542221 2023-01-24 03:47:12.464841: step: 456/459, loss: 0.005845604930073023 2023-01-24 03:47:13.101057: step: 458/459, loss: 0.006986425723880529 2023-01-24 03:47:13.676869: step: 460/459, loss: 0.019923659041523933 2023-01-24 03:47:14.256956: step: 462/459, loss: 0.01544477790594101 2023-01-24 03:47:14.839023: step: 464/459, loss: 0.015854960307478905 2023-01-24 03:47:15.477794: step: 466/459, loss: 0.0011922973208129406 2023-01-24 03:47:16.078979: step: 468/459, loss: 8.200189040508121e-05 2023-01-24 03:47:16.713423: step: 470/459, loss: 0.008584151044487953 2023-01-24 03:47:17.327185: step: 472/459, loss: 0.03542543947696686 2023-01-24 03:47:17.919583: step: 474/459, loss: 0.003217250807210803 2023-01-24 03:47:18.533009: step: 476/459, loss: 0.027810420840978622 2023-01-24 03:47:19.163435: step: 478/459, loss: 2.5431430339813232 2023-01-24 03:47:19.795601: step: 480/459, loss: 0.06910637021064758 2023-01-24 03:47:20.400960: step: 482/459, loss: 0.0441950261592865 2023-01-24 03:47:20.976792: step: 484/459, loss: 0.02424691803753376 2023-01-24 03:47:21.619220: step: 486/459, loss: 0.00032066411222331226 2023-01-24 03:47:22.234807: step: 488/459, loss: 0.05623704195022583 2023-01-24 03:47:22.849307: step: 490/459, loss: 0.008884445764124393 2023-01-24 03:47:23.481655: step: 492/459, loss: 0.03728770837187767 2023-01-24 03:47:24.082374: step: 494/459, loss: 0.0634959265589714 2023-01-24 03:47:24.675834: step: 496/459, loss: 0.00038845784729346633 2023-01-24 03:47:25.249917: step: 498/459, loss: 0.0269345473498106 2023-01-24 03:47:25.919470: step: 500/459, loss: 0.004486780613660812 2023-01-24 03:47:26.546867: step: 502/459, loss: 0.01787237636744976 2023-01-24 03:47:27.185589: step: 504/459, loss: 0.0011124002048745751 2023-01-24 03:47:27.795209: step: 506/459, loss: 0.027922244742512703 2023-01-24 03:47:28.426859: step: 508/459, loss: 0.04599940776824951 2023-01-24 03:47:29.217316: step: 510/459, loss: 0.004600734915584326 2023-01-24 03:47:29.839286: step: 512/459, loss: 0.0011268999660387635 2023-01-24 03:47:30.461661: step: 514/459, loss: 0.007138758432120085 2023-01-24 03:47:31.029013: step: 516/459, loss: 0.0026226146146655083 2023-01-24 03:47:31.649000: step: 518/459, loss: 0.008563215844333172 2023-01-24 03:47:32.221795: step: 520/459, loss: 1.1953307390213013 2023-01-24 03:47:32.861640: step: 522/459, loss: 0.013078540563583374 2023-01-24 03:47:33.532963: step: 524/459, loss: 0.028435276821255684 2023-01-24 03:47:34.185079: step: 526/459, loss: 0.08546190708875656 2023-01-24 03:47:34.699802: step: 528/459, loss: 0.0465494841337204 2023-01-24 03:47:35.281078: step: 530/459, loss: 0.0040944842621684074 2023-01-24 03:47:35.868234: step: 532/459, loss: 0.04818112403154373 2023-01-24 03:47:36.481962: step: 534/459, loss: 0.006564842537045479 2023-01-24 03:47:37.115797: step: 536/459, loss: 0.0018481385195627809 2023-01-24 03:47:37.747965: step: 538/459, loss: 0.023060768842697144 2023-01-24 03:47:38.438651: step: 540/459, loss: 0.08878973126411438 2023-01-24 03:47:39.105980: step: 542/459, loss: 0.0022218292579054832 2023-01-24 03:47:39.717426: step: 544/459, loss: 0.01006826851516962 2023-01-24 03:47:40.334695: step: 546/459, loss: 0.012437655590474606 2023-01-24 03:47:40.954819: step: 548/459, loss: 0.00801883079111576 2023-01-24 03:47:41.575841: step: 550/459, loss: 0.029841681942343712 2023-01-24 03:47:42.183607: step: 552/459, loss: 0.016070587560534477 2023-01-24 03:47:42.783420: step: 554/459, loss: 0.011221864260733128 2023-01-24 03:47:43.437486: step: 556/459, loss: 9.288477897644043 2023-01-24 03:47:44.015385: step: 558/459, loss: 0.005443606525659561 2023-01-24 03:47:44.635181: step: 560/459, loss: 0.003337146481499076 2023-01-24 03:47:45.263615: step: 562/459, loss: 0.0014250748790800571 2023-01-24 03:47:45.909083: step: 564/459, loss: 0.00801643542945385 2023-01-24 03:47:46.505735: step: 566/459, loss: 0.013177369721233845 2023-01-24 03:47:47.119311: step: 568/459, loss: 0.024609485641121864 2023-01-24 03:47:47.760255: step: 570/459, loss: 0.012960552237927914 2023-01-24 03:47:48.358500: step: 572/459, loss: 0.060094282031059265 2023-01-24 03:47:49.042900: step: 574/459, loss: 0.00813651829957962 2023-01-24 03:47:49.659832: step: 576/459, loss: 0.005584185477346182 2023-01-24 03:47:50.248549: step: 578/459, loss: 0.0008680379833094776 2023-01-24 03:47:50.877642: step: 580/459, loss: 0.010946196503937244 2023-01-24 03:47:51.473018: step: 582/459, loss: 0.0017344917869195342 2023-01-24 03:47:52.038951: step: 584/459, loss: 0.0030771575402468443 2023-01-24 03:47:52.643163: step: 586/459, loss: 0.03430717810988426 2023-01-24 03:47:53.271930: step: 588/459, loss: 0.04506222531199455 2023-01-24 03:47:53.831595: step: 590/459, loss: 0.004058033227920532 2023-01-24 03:47:54.431188: step: 592/459, loss: 0.004001768305897713 2023-01-24 03:47:55.045649: step: 594/459, loss: 0.018537407740950584 2023-01-24 03:47:55.666797: step: 596/459, loss: 0.000489353493321687 2023-01-24 03:47:56.352828: step: 598/459, loss: 0.007890233770012856 2023-01-24 03:47:56.978019: step: 600/459, loss: 0.0444871261715889 2023-01-24 03:47:57.682185: step: 602/459, loss: 0.01652003824710846 2023-01-24 03:47:58.282351: step: 604/459, loss: 0.002785859862342477 2023-01-24 03:47:58.930370: step: 606/459, loss: 0.01872415840625763 2023-01-24 03:47:59.551572: step: 608/459, loss: 0.0015370084438472986 2023-01-24 03:48:00.171720: step: 610/459, loss: 0.04942959547042847 2023-01-24 03:48:00.798544: step: 612/459, loss: 0.01622886396944523 2023-01-24 03:48:01.414406: step: 614/459, loss: 0.0350382961332798 2023-01-24 03:48:02.015204: step: 616/459, loss: 0.08111365884542465 2023-01-24 03:48:02.616885: step: 618/459, loss: 0.0015952029498293996 2023-01-24 03:48:03.263374: step: 620/459, loss: 0.03918926790356636 2023-01-24 03:48:03.829210: step: 622/459, loss: 0.00025531963910907507 2023-01-24 03:48:04.438635: step: 624/459, loss: 0.017725635319948196 2023-01-24 03:48:05.023346: step: 626/459, loss: 0.0135548897087574 2023-01-24 03:48:05.605215: step: 628/459, loss: 0.023473743349313736 2023-01-24 03:48:06.255198: step: 630/459, loss: 0.013099350035190582 2023-01-24 03:48:06.866582: step: 632/459, loss: 0.010910948738455772 2023-01-24 03:48:07.534020: step: 634/459, loss: 0.018099969252943993 2023-01-24 03:48:08.119783: step: 636/459, loss: 0.03608383238315582 2023-01-24 03:48:08.776810: step: 638/459, loss: 0.006658182013779879 2023-01-24 03:48:09.417651: step: 640/459, loss: 0.034555722028017044 2023-01-24 03:48:09.980694: step: 642/459, loss: 0.003933314699679613 2023-01-24 03:48:10.559558: step: 644/459, loss: 0.005736960098147392 2023-01-24 03:48:11.226789: step: 646/459, loss: 0.16029903292655945 2023-01-24 03:48:11.876429: step: 648/459, loss: 0.0073745292611420155 2023-01-24 03:48:12.460230: step: 650/459, loss: 0.037096746265888214 2023-01-24 03:48:13.069705: step: 652/459, loss: 0.001708777854219079 2023-01-24 03:48:13.752259: step: 654/459, loss: 0.002747389255091548 2023-01-24 03:48:14.301267: step: 656/459, loss: 0.015616623684763908 2023-01-24 03:48:14.902761: step: 658/459, loss: 0.0021025435999035835 2023-01-24 03:48:15.495365: step: 660/459, loss: 0.010565674863755703 2023-01-24 03:48:16.142302: step: 662/459, loss: 0.051082294434309006 2023-01-24 03:48:16.786519: step: 664/459, loss: 0.04153292253613472 2023-01-24 03:48:17.400057: step: 666/459, loss: 0.036586932837963104 2023-01-24 03:48:18.043012: step: 668/459, loss: 0.0011201738379895687 2023-01-24 03:48:18.650351: step: 670/459, loss: 3.7273740768432617 2023-01-24 03:48:19.280536: step: 672/459, loss: 0.02100210264325142 2023-01-24 03:48:19.953568: step: 674/459, loss: 0.029841067269444466 2023-01-24 03:48:20.627598: step: 676/459, loss: 0.09814178943634033 2023-01-24 03:48:21.209216: step: 678/459, loss: 0.2172936350107193 2023-01-24 03:48:21.759418: step: 680/459, loss: 0.002042256062850356 2023-01-24 03:48:22.328683: step: 682/459, loss: 0.08190726488828659 2023-01-24 03:48:22.943456: step: 684/459, loss: 0.0030123714823275805 2023-01-24 03:48:23.585494: step: 686/459, loss: 0.02035401202738285 2023-01-24 03:48:24.253023: step: 688/459, loss: 0.03154322877526283 2023-01-24 03:48:24.869958: step: 690/459, loss: 2.0591912269592285 2023-01-24 03:48:25.472214: step: 692/459, loss: 0.0418601892888546 2023-01-24 03:48:26.057431: step: 694/459, loss: 0.03373988717794418 2023-01-24 03:48:26.780473: step: 696/459, loss: 0.002586325164884329 2023-01-24 03:48:27.400997: step: 698/459, loss: 0.034663014113903046 2023-01-24 03:48:27.946845: step: 700/459, loss: 0.060513563454151154 2023-01-24 03:48:28.600886: step: 702/459, loss: 0.008792434819042683 2023-01-24 03:48:29.240710: step: 704/459, loss: 0.0048340545035898685 2023-01-24 03:48:29.902843: step: 706/459, loss: 0.0071281204000115395 2023-01-24 03:48:30.557691: step: 708/459, loss: 0.011001789942383766 2023-01-24 03:48:31.195981: step: 710/459, loss: 0.020481051877141 2023-01-24 03:48:31.818618: step: 712/459, loss: 0.004764649551361799 2023-01-24 03:48:32.426747: step: 714/459, loss: 0.00511149549856782 2023-01-24 03:48:33.054303: step: 716/459, loss: 0.02307422272861004 2023-01-24 03:48:33.638067: step: 718/459, loss: 0.05768810957670212 2023-01-24 03:48:34.198834: step: 720/459, loss: 0.0016367131611332297 2023-01-24 03:48:34.881683: step: 722/459, loss: 0.05602371692657471 2023-01-24 03:48:35.476067: step: 724/459, loss: 0.014943762682378292 2023-01-24 03:48:36.053163: step: 726/459, loss: 0.005132453516125679 2023-01-24 03:48:36.665646: step: 728/459, loss: 0.004101622849702835 2023-01-24 03:48:37.370107: step: 730/459, loss: 0.0464865043759346 2023-01-24 03:48:38.088553: step: 732/459, loss: 0.007229621056467295 2023-01-24 03:48:38.704493: step: 734/459, loss: 0.0315900482237339 2023-01-24 03:48:39.378303: step: 736/459, loss: 0.00047792791156098247 2023-01-24 03:48:40.003292: step: 738/459, loss: 0.014448371715843678 2023-01-24 03:48:40.627811: step: 740/459, loss: 0.03211522102355957 2023-01-24 03:48:41.366293: step: 742/459, loss: 0.019449273124337196 2023-01-24 03:48:41.953154: step: 744/459, loss: 0.015209305100142956 2023-01-24 03:48:42.648212: step: 746/459, loss: 0.04008972644805908 2023-01-24 03:48:43.296842: step: 748/459, loss: 0.030270002782344818 2023-01-24 03:48:43.902819: step: 750/459, loss: 0.029033049941062927 2023-01-24 03:48:44.528813: step: 752/459, loss: 0.006346540059894323 2023-01-24 03:48:45.128452: step: 754/459, loss: 0.02252040058374405 2023-01-24 03:48:45.765083: step: 756/459, loss: 0.04599121958017349 2023-01-24 03:48:46.495591: step: 758/459, loss: 0.07799522578716278 2023-01-24 03:48:47.090972: step: 760/459, loss: 0.051758069545030594 2023-01-24 03:48:47.716508: step: 762/459, loss: 0.02054351381957531 2023-01-24 03:48:48.359439: step: 764/459, loss: 0.10213804990053177 2023-01-24 03:48:49.067309: step: 766/459, loss: 0.028581928461790085 2023-01-24 03:48:49.718841: step: 768/459, loss: 0.017777204513549805 2023-01-24 03:48:50.334137: step: 770/459, loss: 0.13240818679332733 2023-01-24 03:48:50.968153: step: 772/459, loss: 0.014331232756376266 2023-01-24 03:48:51.645965: step: 774/459, loss: 0.011562218889594078 2023-01-24 03:48:52.294338: step: 776/459, loss: 0.29734301567077637 2023-01-24 03:48:52.951938: step: 778/459, loss: 0.052105456590652466 2023-01-24 03:48:53.582171: step: 780/459, loss: 3.3855135440826416 2023-01-24 03:48:54.175101: step: 782/459, loss: 0.6360824704170227 2023-01-24 03:48:54.795231: step: 784/459, loss: 0.00944860465824604 2023-01-24 03:48:55.399896: step: 786/459, loss: 0.022564832121133804 2023-01-24 03:48:56.015734: step: 788/459, loss: 0.5800425410270691 2023-01-24 03:48:56.678653: step: 790/459, loss: 0.21023599803447723 2023-01-24 03:48:57.291159: step: 792/459, loss: 44.55595397949219 2023-01-24 03:48:57.938559: step: 794/459, loss: 0.15633830428123474 2023-01-24 03:48:58.539877: step: 796/459, loss: 0.09543262422084808 2023-01-24 03:48:59.134517: step: 798/459, loss: 0.18098540604114532 2023-01-24 03:48:59.744722: step: 800/459, loss: 10.01526165008545 2023-01-24 03:49:00.418737: step: 802/459, loss: 3.8225436210632324 2023-01-24 03:49:01.087081: step: 804/459, loss: 1.7698249816894531 2023-01-24 03:49:01.774209: step: 806/459, loss: 0.4368470311164856 2023-01-24 03:49:02.378263: step: 808/459, loss: 0.12039543688297272 2023-01-24 03:49:02.962919: step: 810/459, loss: 0.3024674654006958 2023-01-24 03:49:03.601998: step: 812/459, loss: 0.3660544157028198 2023-01-24 03:49:04.192408: step: 814/459, loss: 0.08893898129463196 2023-01-24 03:49:04.797529: step: 816/459, loss: 0.3827812373638153 2023-01-24 03:49:05.387192: step: 818/459, loss: 0.13909341394901276 2023-01-24 03:49:06.007837: step: 820/459, loss: 9.021913528442383 2023-01-24 03:49:06.578092: step: 822/459, loss: 0.11428365111351013 2023-01-24 03:49:07.169439: step: 824/459, loss: 0.714241623878479 2023-01-24 03:49:07.806802: step: 826/459, loss: 0.06886487454175949 2023-01-24 03:49:08.432554: step: 828/459, loss: 0.8287029266357422 2023-01-24 03:49:09.032337: step: 830/459, loss: 0.12026642262935638 2023-01-24 03:49:09.603836: step: 832/459, loss: 0.06240515410900116 2023-01-24 03:49:10.206472: step: 834/459, loss: 0.0811469778418541 2023-01-24 03:49:10.841677: step: 836/459, loss: 0.04993806406855583 2023-01-24 03:49:11.430264: step: 838/459, loss: 0.22097252309322357 2023-01-24 03:49:12.067739: step: 840/459, loss: 0.04781309887766838 2023-01-24 03:49:12.669182: step: 842/459, loss: 0.02523753233253956 2023-01-24 03:49:13.273675: step: 844/459, loss: 0.03608804941177368 2023-01-24 03:49:13.811788: step: 846/459, loss: 0.013212037272751331 2023-01-24 03:49:14.438303: step: 848/459, loss: 0.010490722954273224 2023-01-24 03:49:14.973572: step: 850/459, loss: 0.0037325453013181686 2023-01-24 03:49:15.598517: step: 852/459, loss: 0.04593127220869064 2023-01-24 03:49:16.228477: step: 854/459, loss: 0.044746071100234985 2023-01-24 03:49:16.828922: step: 856/459, loss: 0.03442002832889557 2023-01-24 03:49:17.503304: step: 858/459, loss: 0.02628861553966999 2023-01-24 03:49:18.194717: step: 860/459, loss: 0.2606162428855896 2023-01-24 03:49:18.922489: step: 862/459, loss: 0.10807449370622635 2023-01-24 03:49:19.535660: step: 864/459, loss: 0.020875999704003334 2023-01-24 03:49:20.256240: step: 866/459, loss: 0.07340864837169647 2023-01-24 03:49:20.858514: step: 868/459, loss: 0.015463783405721188 2023-01-24 03:49:21.505184: step: 870/459, loss: 0.06284695118665695 2023-01-24 03:49:22.074069: step: 872/459, loss: 0.670619785785675 2023-01-24 03:49:22.667021: step: 874/459, loss: 0.13074783980846405 2023-01-24 03:49:23.320783: step: 876/459, loss: 0.0027104460168629885 2023-01-24 03:49:23.947185: step: 878/459, loss: 0.10260242223739624 2023-01-24 03:49:24.541292: step: 880/459, loss: 2.4872047901153564 2023-01-24 03:49:25.166343: step: 882/459, loss: 0.05586446449160576 2023-01-24 03:49:25.781211: step: 884/459, loss: 0.2219143509864807 2023-01-24 03:49:26.458594: step: 886/459, loss: 0.0712456926703453 2023-01-24 03:49:27.051959: step: 888/459, loss: 0.002148742787539959 2023-01-24 03:49:27.671068: step: 890/459, loss: 0.01034699846059084 2023-01-24 03:49:28.235163: step: 892/459, loss: 0.047062039375305176 2023-01-24 03:49:28.850856: step: 894/459, loss: 0.04035262390971184 2023-01-24 03:49:29.435327: step: 896/459, loss: 0.024601127952337265 2023-01-24 03:49:30.073149: step: 898/459, loss: 0.017477601766586304 2023-01-24 03:49:30.738104: step: 900/459, loss: 0.06748461723327637 2023-01-24 03:49:31.316219: step: 902/459, loss: 0.035718705505132675 2023-01-24 03:49:31.898792: step: 904/459, loss: 0.008956462144851685 2023-01-24 03:49:32.498039: step: 906/459, loss: 0.014126504771411419 2023-01-24 03:49:33.212732: step: 908/459, loss: 0.02441484108567238 2023-01-24 03:49:33.892696: step: 910/459, loss: 0.042878322303295135 2023-01-24 03:49:34.432064: step: 912/459, loss: 0.005639838054776192 2023-01-24 03:49:35.050680: step: 914/459, loss: 0.45435959100723267 2023-01-24 03:49:35.657255: step: 916/459, loss: 0.029968274757266045 2023-01-24 03:49:36.314726: step: 918/459, loss: 0.016998670995235443 2023-01-24 03:49:36.763468: step: 920/459, loss: 2.2203812477528118e-05 ================================================== Loss: 0.249 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31430807438212166, 'r': 0.296415774132665, 'f1': 0.30509983001545793}, 'combined': 0.22481040106402161, 'epoch': 30} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3622725438244103, 'r': 0.29843887125226176, 'f1': 0.32727210881348623}, 'combined': 0.20945414964063117, 'epoch': 30} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.31073424556988577, 'r': 0.3007105602289217, 'f1': 0.3056402415441499}, 'combined': 0.22520859903253151, 'epoch': 30} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.37007354541978166, 'r': 0.293367392369136, 'f1': 0.3272861375314905}, 'combined': 0.20946312802015388, 'epoch': 30} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33042300525415447, 'r': 0.300954539889932, 'f1': 0.315001077501478}, 'combined': 0.23210605710635218, 'epoch': 30} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3687789432105716, 'r': 0.30200277423977656, 'f1': 0.3320670824307298}, 'combined': 0.23808583268618366, 'epoch': 30} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3131313131313131, 'r': 0.2952380952380952, 'f1': 0.30392156862745096}, 'combined': 0.2026143790849673, 'epoch': 30} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2037037037037037, 'r': 0.2391304347826087, 'f1': 0.22000000000000003}, 'combined': 0.11000000000000001, 'epoch': 30} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4444444444444444, 'r': 0.13793103448275862, 'f1': 0.21052631578947367}, 'combined': 0.14035087719298245, 'epoch': 30} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 31 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:52:12.693907: step: 2/459, loss: 0.11639727652072906 2023-01-24 03:52:13.337593: step: 4/459, loss: 0.06600699573755264 2023-01-24 03:52:13.926797: step: 6/459, loss: 0.0004020802734885365 2023-01-24 03:52:14.537002: step: 8/459, loss: 0.01678435131907463 2023-01-24 03:52:15.227364: step: 10/459, loss: 0.0034812712110579014 2023-01-24 03:52:15.830354: step: 12/459, loss: 0.00023609190247952938 2023-01-24 03:52:16.441428: step: 14/459, loss: 0.016141340136528015 2023-01-24 03:52:17.036074: step: 16/459, loss: 0.04256970062851906 2023-01-24 03:52:17.686252: step: 18/459, loss: 0.020973872393369675 2023-01-24 03:52:18.333436: step: 20/459, loss: 0.0291434433311224 2023-01-24 03:52:18.952576: step: 22/459, loss: 0.07635637372732162 2023-01-24 03:52:19.607296: step: 24/459, loss: 0.44229161739349365 2023-01-24 03:52:20.226728: step: 26/459, loss: 0.02421683445572853 2023-01-24 03:52:20.825040: step: 28/459, loss: 0.016720809042453766 2023-01-24 03:52:21.441147: step: 30/459, loss: 0.01384560577571392 2023-01-24 03:52:22.044962: step: 32/459, loss: 0.010489688254892826 2023-01-24 03:52:22.680330: step: 34/459, loss: 0.010953865014016628 2023-01-24 03:52:23.359716: step: 36/459, loss: 0.013136404566466808 2023-01-24 03:52:23.958323: step: 38/459, loss: 0.049959953874349594 2023-01-24 03:52:24.560325: step: 40/459, loss: 0.08619759976863861 2023-01-24 03:52:25.239324: step: 42/459, loss: 0.012048260308802128 2023-01-24 03:52:25.819592: step: 44/459, loss: 0.031301349401474 2023-01-24 03:52:26.513420: step: 46/459, loss: 0.046990055590867996 2023-01-24 03:52:27.128733: step: 48/459, loss: 0.04179821535944939 2023-01-24 03:52:27.750709: step: 50/459, loss: 0.013736954890191555 2023-01-24 03:52:28.334703: step: 52/459, loss: 0.024151859804987907 2023-01-24 03:52:29.065970: step: 54/459, loss: 0.008089632727205753 2023-01-24 03:52:29.694608: step: 56/459, loss: 0.018209058791399002 2023-01-24 03:52:30.324488: step: 58/459, loss: 0.01977146789431572 2023-01-24 03:52:30.921284: step: 60/459, loss: 0.031056756153702736 2023-01-24 03:52:31.526366: step: 62/459, loss: 0.009726413525640965 2023-01-24 03:52:32.125686: step: 64/459, loss: 0.014271916821599007 2023-01-24 03:52:32.805326: step: 66/459, loss: 0.004786116071045399 2023-01-24 03:52:33.492120: step: 68/459, loss: 0.030121251940727234 2023-01-24 03:52:34.072169: step: 70/459, loss: 0.002759497379884124 2023-01-24 03:52:34.711515: step: 72/459, loss: 0.005128871183842421 2023-01-24 03:52:35.397675: step: 74/459, loss: 0.15534813702106476 2023-01-24 03:52:35.973751: step: 76/459, loss: 0.01883537881076336 2023-01-24 03:52:36.602315: step: 78/459, loss: 0.0016180335078388453 2023-01-24 03:52:37.163689: step: 80/459, loss: 0.04840220883488655 2023-01-24 03:52:37.790721: step: 82/459, loss: 0.05618644133210182 2023-01-24 03:52:38.411755: step: 84/459, loss: 0.007943175733089447 2023-01-24 03:52:39.030706: step: 86/459, loss: 0.010549483820796013 2023-01-24 03:52:39.653318: step: 88/459, loss: 0.02031179703772068 2023-01-24 03:52:40.287672: step: 90/459, loss: 0.0027730814181268215 2023-01-24 03:52:40.866902: step: 92/459, loss: 0.0337551012635231 2023-01-24 03:52:41.523904: step: 94/459, loss: 0.030438996851444244 2023-01-24 03:52:42.877256: step: 96/459, loss: 0.0001237447140738368 2023-01-24 03:52:43.537364: step: 98/459, loss: 0.0017054586205631495 2023-01-24 03:52:44.129398: step: 100/459, loss: 0.0001818859891500324 2023-01-24 03:52:44.736918: step: 102/459, loss: 0.0076607270166277885 2023-01-24 03:52:45.379796: step: 104/459, loss: 0.0038557269144803286 2023-01-24 03:52:46.020912: step: 106/459, loss: 0.024612030014395714 2023-01-24 03:52:46.674842: step: 108/459, loss: 0.2560913860797882 2023-01-24 03:52:47.242699: step: 110/459, loss: 0.3707655072212219 2023-01-24 03:52:47.898105: step: 112/459, loss: 0.003390396246686578 2023-01-24 03:52:48.486461: step: 114/459, loss: 0.04430088773369789 2023-01-24 03:52:49.129249: step: 116/459, loss: 0.010669096373021603 2023-01-24 03:52:49.771172: step: 118/459, loss: 0.009892426431179047 2023-01-24 03:52:50.502867: step: 120/459, loss: 0.02229899913072586 2023-01-24 03:52:51.072891: step: 122/459, loss: 0.044156525284051895 2023-01-24 03:52:51.745251: step: 124/459, loss: 0.026767127215862274 2023-01-24 03:52:52.269790: step: 126/459, loss: 0.00886260811239481 2023-01-24 03:52:52.858960: step: 128/459, loss: 0.019264034926891327 2023-01-24 03:52:53.452343: step: 130/459, loss: 0.07551407068967819 2023-01-24 03:52:53.996800: step: 132/459, loss: 0.035154473036527634 2023-01-24 03:52:54.582157: step: 134/459, loss: 0.0033889864571392536 2023-01-24 03:52:55.178879: step: 136/459, loss: 0.018227912485599518 2023-01-24 03:52:55.790037: step: 138/459, loss: 0.10903732478618622 2023-01-24 03:52:56.389772: step: 140/459, loss: 0.01031903550028801 2023-01-24 03:52:57.062709: step: 142/459, loss: 0.015128117986023426 2023-01-24 03:52:57.761592: step: 144/459, loss: 0.0071903495118021965 2023-01-24 03:52:58.366325: step: 146/459, loss: 0.009462553076446056 2023-01-24 03:52:58.920047: step: 148/459, loss: 0.004360587801784277 2023-01-24 03:52:59.478652: step: 150/459, loss: 0.0008454619091935456 2023-01-24 03:53:00.039099: step: 152/459, loss: 0.04571496322751045 2023-01-24 03:53:00.672201: step: 154/459, loss: 0.023791709914803505 2023-01-24 03:53:01.416618: step: 156/459, loss: 0.5283545255661011 2023-01-24 03:53:02.026969: step: 158/459, loss: 0.028441036120057106 2023-01-24 03:53:02.644414: step: 160/459, loss: 0.05548565834760666 2023-01-24 03:53:03.283061: step: 162/459, loss: 0.03823452815413475 2023-01-24 03:53:03.894734: step: 164/459, loss: 2.064972400665283 2023-01-24 03:53:04.505572: step: 166/459, loss: 0.00024257387849502265 2023-01-24 03:53:05.131801: step: 168/459, loss: 0.03458135202527046 2023-01-24 03:53:05.763259: step: 170/459, loss: 0.024566257372498512 2023-01-24 03:53:06.383623: step: 172/459, loss: 0.0032609791960567236 2023-01-24 03:53:06.992080: step: 174/459, loss: 0.03211427107453346 2023-01-24 03:53:07.670471: step: 176/459, loss: 0.003603515913709998 2023-01-24 03:53:08.268871: step: 178/459, loss: 0.9427171349525452 2023-01-24 03:53:08.856633: step: 180/459, loss: 0.014630839228630066 2023-01-24 03:53:09.459992: step: 182/459, loss: 0.004391971975564957 2023-01-24 03:53:10.034649: step: 184/459, loss: 8.417018398176879e-05 2023-01-24 03:53:10.634591: step: 186/459, loss: 0.0028493546415120363 2023-01-24 03:53:11.268310: step: 188/459, loss: 0.07832251489162445 2023-01-24 03:53:11.881203: step: 190/459, loss: 0.01855659857392311 2023-01-24 03:53:12.473171: step: 192/459, loss: 0.014727197587490082 2023-01-24 03:53:13.058419: step: 194/459, loss: 0.001983740832656622 2023-01-24 03:53:13.726612: step: 196/459, loss: 0.02549142949283123 2023-01-24 03:53:14.337168: step: 198/459, loss: 0.011620590463280678 2023-01-24 03:53:14.925756: step: 200/459, loss: 0.03360743820667267 2023-01-24 03:53:15.545235: step: 202/459, loss: 0.015928447246551514 2023-01-24 03:53:16.171871: step: 204/459, loss: 0.09079810231924057 2023-01-24 03:53:16.742644: step: 206/459, loss: 0.0132521390914917 2023-01-24 03:53:17.382403: step: 208/459, loss: 0.09687486290931702 2023-01-24 03:53:18.053741: step: 210/459, loss: 0.009914042428135872 2023-01-24 03:53:18.675990: step: 212/459, loss: 0.043096356093883514 2023-01-24 03:53:19.296338: step: 214/459, loss: 0.0010095859179273248 2023-01-24 03:53:20.020433: step: 216/459, loss: 0.02304093725979328 2023-01-24 03:53:20.572464: step: 218/459, loss: 0.0014642201131209731 2023-01-24 03:53:21.132146: step: 220/459, loss: 0.008593712002038956 2023-01-24 03:53:21.786134: step: 222/459, loss: 0.0016739129787310958 2023-01-24 03:53:22.491078: step: 224/459, loss: 0.05668366700410843 2023-01-24 03:53:23.193145: step: 226/459, loss: 0.00047512343735434115 2023-01-24 03:53:23.802342: step: 228/459, loss: 0.009302089922130108 2023-01-24 03:53:24.364745: step: 230/459, loss: 0.003725750371813774 2023-01-24 03:53:25.011364: step: 232/459, loss: 0.016146307811141014 2023-01-24 03:53:25.660774: step: 234/459, loss: 0.06552322953939438 2023-01-24 03:53:26.281564: step: 236/459, loss: 0.001487987581640482 2023-01-24 03:53:26.880562: step: 238/459, loss: 0.008170939981937408 2023-01-24 03:53:27.607443: step: 240/459, loss: 0.0027221969794481993 2023-01-24 03:53:28.168825: step: 242/459, loss: 0.028433658182621002 2023-01-24 03:53:28.758410: step: 244/459, loss: 0.0025009966921061277 2023-01-24 03:53:29.405148: step: 246/459, loss: 0.014685239642858505 2023-01-24 03:53:30.058306: step: 248/459, loss: 0.06639797240495682 2023-01-24 03:53:30.633211: step: 250/459, loss: 0.06474443525075912 2023-01-24 03:53:31.249074: step: 252/459, loss: 0.18252408504486084 2023-01-24 03:53:31.843975: step: 254/459, loss: 0.0028052241541445255 2023-01-24 03:53:32.498997: step: 256/459, loss: 0.02568134106695652 2023-01-24 03:53:33.199133: step: 258/459, loss: 0.0005739213083870709 2023-01-24 03:53:33.771984: step: 260/459, loss: 0.017475511878728867 2023-01-24 03:53:34.407933: step: 262/459, loss: 0.036115098744630814 2023-01-24 03:53:35.053896: step: 264/459, loss: 0.00047294743126258254 2023-01-24 03:53:35.651748: step: 266/459, loss: 0.002688748063519597 2023-01-24 03:53:36.248677: step: 268/459, loss: 0.01993432641029358 2023-01-24 03:53:36.894709: step: 270/459, loss: 0.01302236970514059 2023-01-24 03:53:37.519746: step: 272/459, loss: 0.001626859768293798 2023-01-24 03:53:38.200008: step: 274/459, loss: 0.1312255859375 2023-01-24 03:53:38.805579: step: 276/459, loss: 0.07993265986442566 2023-01-24 03:53:39.366690: step: 278/459, loss: 0.009561249054968357 2023-01-24 03:53:39.979247: step: 280/459, loss: 0.9759014248847961 2023-01-24 03:53:40.569615: step: 282/459, loss: 4.5060256525175646e-05 2023-01-24 03:53:41.109952: step: 284/459, loss: 0.0005347654805518687 2023-01-24 03:53:41.708667: step: 286/459, loss: 0.008900748565793037 2023-01-24 03:53:42.334897: step: 288/459, loss: 0.025065509602427483 2023-01-24 03:53:42.969546: step: 290/459, loss: 0.02246633917093277 2023-01-24 03:53:43.569807: step: 292/459, loss: 0.05438963696360588 2023-01-24 03:53:44.208978: step: 294/459, loss: 0.0036647911183536053 2023-01-24 03:53:44.924332: step: 296/459, loss: 0.19662131369113922 2023-01-24 03:53:45.532437: step: 298/459, loss: 0.013410902582108974 2023-01-24 03:53:46.043184: step: 300/459, loss: 0.001831626403145492 2023-01-24 03:53:46.730631: step: 302/459, loss: 0.06020839139819145 2023-01-24 03:53:47.354297: step: 304/459, loss: 0.0016770384972915053 2023-01-24 03:53:47.934074: step: 306/459, loss: 0.11297744512557983 2023-01-24 03:53:48.577472: step: 308/459, loss: 0.04190801829099655 2023-01-24 03:53:49.355243: step: 310/459, loss: 0.01181124523282051 2023-01-24 03:53:49.939528: step: 312/459, loss: 0.0002506557502783835 2023-01-24 03:53:50.655940: step: 314/459, loss: 0.046875301748514175 2023-01-24 03:53:51.292722: step: 316/459, loss: 0.022253258153796196 2023-01-24 03:53:51.906556: step: 318/459, loss: 0.004601465072482824 2023-01-24 03:53:52.546994: step: 320/459, loss: 2.1583226043730974e-05 2023-01-24 03:53:53.171985: step: 322/459, loss: 0.021954871714115143 2023-01-24 03:53:53.787038: step: 324/459, loss: 0.0014345578383654356 2023-01-24 03:53:54.342006: step: 326/459, loss: 0.07169710099697113 2023-01-24 03:53:54.947584: step: 328/459, loss: 0.012742114253342152 2023-01-24 03:53:55.555142: step: 330/459, loss: 0.0014062016271054745 2023-01-24 03:53:56.180794: step: 332/459, loss: 0.06634141504764557 2023-01-24 03:53:56.762527: step: 334/459, loss: 0.00871820654720068 2023-01-24 03:53:57.395196: step: 336/459, loss: 0.0014054650673642755 2023-01-24 03:53:58.005040: step: 338/459, loss: 0.006348360795527697 2023-01-24 03:53:58.593870: step: 340/459, loss: 0.011303437873721123 2023-01-24 03:53:59.246000: step: 342/459, loss: 0.0001800905738491565 2023-01-24 03:53:59.872775: step: 344/459, loss: 0.0019530390854924917 2023-01-24 03:54:00.438469: step: 346/459, loss: 0.014627741649746895 2023-01-24 03:54:01.058141: step: 348/459, loss: 0.04156123846769333 2023-01-24 03:54:01.674827: step: 350/459, loss: 0.010947596281766891 2023-01-24 03:54:02.300481: step: 352/459, loss: 0.0021426246967166662 2023-01-24 03:54:02.947970: step: 354/459, loss: 0.005883106030523777 2023-01-24 03:54:03.524956: step: 356/459, loss: 0.04364943876862526 2023-01-24 03:54:04.186405: step: 358/459, loss: 0.044860970228910446 2023-01-24 03:54:04.789102: step: 360/459, loss: 0.15338173508644104 2023-01-24 03:54:05.434453: step: 362/459, loss: 0.020516203716397285 2023-01-24 03:54:06.018868: step: 364/459, loss: 0.02264227531850338 2023-01-24 03:54:06.684669: step: 366/459, loss: 0.0026351644191890955 2023-01-24 03:54:07.392392: step: 368/459, loss: 0.020818665623664856 2023-01-24 03:54:08.022371: step: 370/459, loss: 0.004157358314841986 2023-01-24 03:54:08.603324: step: 372/459, loss: 0.0032077725045382977 2023-01-24 03:54:09.221946: step: 374/459, loss: 0.35990041494369507 2023-01-24 03:54:09.800846: step: 376/459, loss: 0.004548488184809685 2023-01-24 03:54:10.403410: step: 378/459, loss: 0.4031450152397156 2023-01-24 03:54:10.975649: step: 380/459, loss: 0.003320808755233884 2023-01-24 03:54:11.633428: step: 382/459, loss: 0.0031769087072461843 2023-01-24 03:54:12.284034: step: 384/459, loss: 0.06716512143611908 2023-01-24 03:54:12.947839: step: 386/459, loss: 0.0005060411058366299 2023-01-24 03:54:13.610382: step: 388/459, loss: 0.003960090223699808 2023-01-24 03:54:14.216800: step: 390/459, loss: 0.07602556049823761 2023-01-24 03:54:14.862956: step: 392/459, loss: 0.01750107668340206 2023-01-24 03:54:15.444448: step: 394/459, loss: 0.00804806686937809 2023-01-24 03:54:16.114284: step: 396/459, loss: 0.002430332824587822 2023-01-24 03:54:16.742496: step: 398/459, loss: 0.0024288459680974483 2023-01-24 03:54:17.308371: step: 400/459, loss: 0.08036378026008606 2023-01-24 03:54:17.887865: step: 402/459, loss: 0.019106948748230934 2023-01-24 03:54:18.475913: step: 404/459, loss: 0.0018587167141959071 2023-01-24 03:54:19.141874: step: 406/459, loss: 0.025116320699453354 2023-01-24 03:54:19.834440: step: 408/459, loss: 0.0019216161454096437 2023-01-24 03:54:20.446260: step: 410/459, loss: 0.3519969582557678 2023-01-24 03:54:21.147233: step: 412/459, loss: 0.04925873503088951 2023-01-24 03:54:21.738675: step: 414/459, loss: 0.03891848027706146 2023-01-24 03:54:22.323814: step: 416/459, loss: 0.0037702620029449463 2023-01-24 03:54:22.894852: step: 418/459, loss: 0.009770243428647518 2023-01-24 03:54:23.504984: step: 420/459, loss: 0.0838962271809578 2023-01-24 03:54:24.141049: step: 422/459, loss: 0.004004433285444975 2023-01-24 03:54:24.793857: step: 424/459, loss: 0.009588190354406834 2023-01-24 03:54:25.356084: step: 426/459, loss: 0.008744190447032452 2023-01-24 03:54:26.002037: step: 428/459, loss: 0.02314799278974533 2023-01-24 03:54:26.628007: step: 430/459, loss: 0.04516763612627983 2023-01-24 03:54:27.236804: step: 432/459, loss: 0.015032046474516392 2023-01-24 03:54:27.871607: step: 434/459, loss: 0.03706764802336693 2023-01-24 03:54:28.565125: step: 436/459, loss: 0.03079119697213173 2023-01-24 03:54:29.122847: step: 438/459, loss: 0.00910855457186699 2023-01-24 03:54:29.789483: step: 440/459, loss: 0.004590398166328669 2023-01-24 03:54:30.360395: step: 442/459, loss: 0.012899479828774929 2023-01-24 03:54:30.977504: step: 444/459, loss: 0.00972601305693388 2023-01-24 03:54:31.591019: step: 446/459, loss: 0.000373634829884395 2023-01-24 03:54:32.202086: step: 448/459, loss: 0.0003883271710947156 2023-01-24 03:54:32.814247: step: 450/459, loss: 0.040052421391010284 2023-01-24 03:54:33.406695: step: 452/459, loss: 0.016246480867266655 2023-01-24 03:54:34.010739: step: 454/459, loss: 0.028501294553279877 2023-01-24 03:54:34.613901: step: 456/459, loss: 0.010112874209880829 2023-01-24 03:54:35.212987: step: 458/459, loss: 0.02957557700574398 2023-01-24 03:54:35.919753: step: 460/459, loss: 0.004022431559860706 2023-01-24 03:54:36.497825: step: 462/459, loss: 0.003214056370779872 2023-01-24 03:54:37.113480: step: 464/459, loss: 0.01960635930299759 2023-01-24 03:54:37.752207: step: 466/459, loss: 0.010376803576946259 2023-01-24 03:54:38.342149: step: 468/459, loss: 0.020201776176691055 2023-01-24 03:54:38.934577: step: 470/459, loss: 0.009007662534713745 2023-01-24 03:54:39.575485: step: 472/459, loss: 0.005022779572755098 2023-01-24 03:54:40.298113: step: 474/459, loss: 0.03852350264787674 2023-01-24 03:54:40.895155: step: 476/459, loss: 0.008670186623930931 2023-01-24 03:54:41.560288: step: 478/459, loss: 0.08384773135185242 2023-01-24 03:54:42.199203: step: 480/459, loss: 0.006680930498987436 2023-01-24 03:54:42.789051: step: 482/459, loss: 0.026255575940012932 2023-01-24 03:54:43.389069: step: 484/459, loss: 0.029316773638129234 2023-01-24 03:54:44.024459: step: 486/459, loss: 0.011658316478133202 2023-01-24 03:54:44.627741: step: 488/459, loss: 0.10879867523908615 2023-01-24 03:54:45.237725: step: 490/459, loss: 0.23303338885307312 2023-01-24 03:54:45.884924: step: 492/459, loss: 0.01616489328444004 2023-01-24 03:54:46.432332: step: 494/459, loss: 0.003347319085150957 2023-01-24 03:54:47.107154: step: 496/459, loss: 0.017096657305955887 2023-01-24 03:54:47.719491: step: 498/459, loss: 0.06525146961212158 2023-01-24 03:54:48.339493: step: 500/459, loss: 0.08813116699457169 2023-01-24 03:54:48.954833: step: 502/459, loss: 0.0631331130862236 2023-01-24 03:54:49.674061: step: 504/459, loss: 0.028190862387418747 2023-01-24 03:54:50.333682: step: 506/459, loss: 0.006409741006791592 2023-01-24 03:54:50.937549: step: 508/459, loss: 0.007414119318127632 2023-01-24 03:54:51.550151: step: 510/459, loss: 0.076271191239357 2023-01-24 03:54:52.159598: step: 512/459, loss: 0.0007231879862956703 2023-01-24 03:54:52.795604: step: 514/459, loss: 0.06299881637096405 2023-01-24 03:54:53.402181: step: 516/459, loss: 0.0019134519388899207 2023-01-24 03:54:54.044964: step: 518/459, loss: 0.667677640914917 2023-01-24 03:54:54.709834: step: 520/459, loss: 0.003955689258873463 2023-01-24 03:54:55.309675: step: 522/459, loss: 0.01017338503152132 2023-01-24 03:54:55.915685: step: 524/459, loss: 0.0030172618571668863 2023-01-24 03:54:56.643487: step: 526/459, loss: 0.07974731177091599 2023-01-24 03:54:57.265132: step: 528/459, loss: 0.03680906072258949 2023-01-24 03:54:57.799347: step: 530/459, loss: 0.0007880870834924281 2023-01-24 03:54:58.382733: step: 532/459, loss: 0.0006186314858496189 2023-01-24 03:54:59.014358: step: 534/459, loss: 0.015157690271735191 2023-01-24 03:54:59.591086: step: 536/459, loss: 0.004496397916227579 2023-01-24 03:55:00.274230: step: 538/459, loss: 0.09721246361732483 2023-01-24 03:55:00.827689: step: 540/459, loss: 0.0024500612635165453 2023-01-24 03:55:01.547687: step: 542/459, loss: 0.06312676519155502 2023-01-24 03:55:02.213355: step: 544/459, loss: 0.0037921227049082518 2023-01-24 03:55:02.777466: step: 546/459, loss: 0.017375707626342773 2023-01-24 03:55:03.350913: step: 548/459, loss: 0.015193987637758255 2023-01-24 03:55:04.025898: step: 550/459, loss: 0.0015275198966264725 2023-01-24 03:55:04.581676: step: 552/459, loss: 0.0349520742893219 2023-01-24 03:55:05.183852: step: 554/459, loss: 0.18052957952022552 2023-01-24 03:55:05.798417: step: 556/459, loss: 0.03798510506749153 2023-01-24 03:55:06.364777: step: 558/459, loss: 0.03226960077881813 2023-01-24 03:55:06.971241: step: 560/459, loss: 0.011706565506756306 2023-01-24 03:55:07.721080: step: 562/459, loss: 0.006706173997372389 2023-01-24 03:55:08.324085: step: 564/459, loss: 0.0008525490411557257 2023-01-24 03:55:08.973476: step: 566/459, loss: 0.008967679925262928 2023-01-24 03:55:09.670097: step: 568/459, loss: 0.15660931169986725 2023-01-24 03:55:10.295169: step: 570/459, loss: 0.10599968582391739 2023-01-24 03:55:10.879921: step: 572/459, loss: 0.008978378027677536 2023-01-24 03:55:11.544046: step: 574/459, loss: 0.07869431376457214 2023-01-24 03:55:12.234376: step: 576/459, loss: 0.0015006817411631346 2023-01-24 03:55:12.830807: step: 578/459, loss: 0.020580070093274117 2023-01-24 03:55:13.496660: step: 580/459, loss: 0.1921059638261795 2023-01-24 03:55:14.119721: step: 582/459, loss: 0.017305418848991394 2023-01-24 03:55:14.763747: step: 584/459, loss: 0.002476142253726721 2023-01-24 03:55:15.378896: step: 586/459, loss: 0.001435768324881792 2023-01-24 03:55:15.993979: step: 588/459, loss: 0.027498707175254822 2023-01-24 03:55:16.597263: step: 590/459, loss: 0.006608245428651571 2023-01-24 03:55:17.213024: step: 592/459, loss: 0.0007744677714072168 2023-01-24 03:55:17.791412: step: 594/459, loss: 0.0015993909910321236 2023-01-24 03:55:18.481928: step: 596/459, loss: 0.02594669722020626 2023-01-24 03:55:19.031881: step: 598/459, loss: 0.03656218573451042 2023-01-24 03:55:19.687418: step: 600/459, loss: 0.000776270346250385 2023-01-24 03:55:20.246239: step: 602/459, loss: 0.3597613275051117 2023-01-24 03:55:20.850087: step: 604/459, loss: 0.06269185245037079 2023-01-24 03:55:21.468978: step: 606/459, loss: 0.04247952252626419 2023-01-24 03:55:22.154845: step: 608/459, loss: 0.00045431090984493494 2023-01-24 03:55:22.780300: step: 610/459, loss: 0.06855787336826324 2023-01-24 03:55:23.429550: step: 612/459, loss: 0.002437078859657049 2023-01-24 03:55:24.032323: step: 614/459, loss: 0.0005612584645859897 2023-01-24 03:55:24.592307: step: 616/459, loss: 0.13866575062274933 2023-01-24 03:55:25.170771: step: 618/459, loss: 0.04559342563152313 2023-01-24 03:55:25.741949: step: 620/459, loss: 0.003951556980609894 2023-01-24 03:55:26.310290: step: 622/459, loss: 0.0013049362460151315 2023-01-24 03:55:26.898820: step: 624/459, loss: 0.06824611872434616 2023-01-24 03:55:27.565598: step: 626/459, loss: 0.0002735429152380675 2023-01-24 03:55:28.190563: step: 628/459, loss: 0.0010762671008706093 2023-01-24 03:55:28.803620: step: 630/459, loss: 0.011407595127820969 2023-01-24 03:55:29.430803: step: 632/459, loss: 0.02085067518055439 2023-01-24 03:55:30.088110: step: 634/459, loss: 0.0009275642805732787 2023-01-24 03:55:30.725609: step: 636/459, loss: 0.12075734883546829 2023-01-24 03:55:31.308696: step: 638/459, loss: 0.00234418036416173 2023-01-24 03:55:31.909792: step: 640/459, loss: 0.011866050772368908 2023-01-24 03:55:32.608790: step: 642/459, loss: 0.001696441788226366 2023-01-24 03:55:33.177876: step: 644/459, loss: 0.023127593100070953 2023-01-24 03:55:33.826327: step: 646/459, loss: 0.000185811280971393 2023-01-24 03:55:34.438778: step: 648/459, loss: 0.009523244574666023 2023-01-24 03:55:35.123235: step: 650/459, loss: 0.011716450564563274 2023-01-24 03:55:35.799195: step: 652/459, loss: 0.023150525987148285 2023-01-24 03:55:36.431145: step: 654/459, loss: 0.07638442516326904 2023-01-24 03:55:37.046790: step: 656/459, loss: 0.010763298720121384 2023-01-24 03:55:37.656817: step: 658/459, loss: 0.011287077330052853 2023-01-24 03:55:38.268267: step: 660/459, loss: 0.023248305544257164 2023-01-24 03:55:38.973588: step: 662/459, loss: 0.007049893960356712 2023-01-24 03:55:39.605140: step: 664/459, loss: 0.011304252780973911 2023-01-24 03:55:40.246452: step: 666/459, loss: 0.020645765587687492 2023-01-24 03:55:40.859388: step: 668/459, loss: 0.012477855198085308 2023-01-24 03:55:41.443937: step: 670/459, loss: 0.6099754571914673 2023-01-24 03:55:42.120543: step: 672/459, loss: 0.008739089593291283 2023-01-24 03:55:42.772360: step: 674/459, loss: 0.010115781798958778 2023-01-24 03:55:43.375355: step: 676/459, loss: 0.3913722634315491 2023-01-24 03:55:43.982109: step: 678/459, loss: 0.23037029802799225 2023-01-24 03:55:44.628305: step: 680/459, loss: 0.015350976958870888 2023-01-24 03:55:45.249598: step: 682/459, loss: 0.000985857448540628 2023-01-24 03:55:45.907284: step: 684/459, loss: 0.01263222936540842 2023-01-24 03:55:46.526027: step: 686/459, loss: 0.04377437010407448 2023-01-24 03:55:47.142412: step: 688/459, loss: 0.0009948436636477709 2023-01-24 03:55:47.721459: step: 690/459, loss: 0.06305909156799316 2023-01-24 03:55:48.399829: step: 692/459, loss: 0.0020132153294980526 2023-01-24 03:55:49.033483: step: 694/459, loss: 0.025049513205885887 2023-01-24 03:55:49.660042: step: 696/459, loss: 0.0019143702229484916 2023-01-24 03:55:50.226086: step: 698/459, loss: 0.024594495072960854 2023-01-24 03:55:50.839242: step: 700/459, loss: 0.009042535908520222 2023-01-24 03:55:51.461537: step: 702/459, loss: 0.0020570512861013412 2023-01-24 03:55:52.009535: step: 704/459, loss: 0.10245510935783386 2023-01-24 03:55:52.568269: step: 706/459, loss: 0.1172410100698471 2023-01-24 03:55:53.141561: step: 708/459, loss: 0.11014746129512787 2023-01-24 03:55:53.763458: step: 710/459, loss: 0.05150088667869568 2023-01-24 03:55:54.349065: step: 712/459, loss: 0.00939325150102377 2023-01-24 03:55:54.921949: step: 714/459, loss: 0.02245236188173294 2023-01-24 03:55:55.632067: step: 716/459, loss: 0.08978982269763947 2023-01-24 03:55:56.288493: step: 718/459, loss: 0.002009912393987179 2023-01-24 03:55:56.920564: step: 720/459, loss: 0.005968477576971054 2023-01-24 03:55:57.517692: step: 722/459, loss: 0.0014693343546241522 2023-01-24 03:55:58.119891: step: 724/459, loss: 0.005611165892332792 2023-01-24 03:55:58.764555: step: 726/459, loss: 0.0002599535509943962 2023-01-24 03:55:59.343549: step: 728/459, loss: 0.00295237940736115 2023-01-24 03:55:59.932449: step: 730/459, loss: 0.012094269506633282 2023-01-24 03:56:00.551859: step: 732/459, loss: 0.0025381671730428934 2023-01-24 03:56:01.139227: step: 734/459, loss: 0.1439618319272995 2023-01-24 03:56:01.749366: step: 736/459, loss: 0.00121320562902838 2023-01-24 03:56:02.314734: step: 738/459, loss: 0.022726822644472122 2023-01-24 03:56:02.920905: step: 740/459, loss: 0.033158157020807266 2023-01-24 03:56:03.577224: step: 742/459, loss: 0.05900775268673897 2023-01-24 03:56:04.180728: step: 744/459, loss: 0.012176009826362133 2023-01-24 03:56:04.792322: step: 746/459, loss: 0.00477979239076376 2023-01-24 03:56:05.371011: step: 748/459, loss: 0.0237260814756155 2023-01-24 03:56:05.963176: step: 750/459, loss: 0.02457926794886589 2023-01-24 03:56:06.524723: step: 752/459, loss: 0.001082378439605236 2023-01-24 03:56:07.122795: step: 754/459, loss: 0.0029745250940322876 2023-01-24 03:56:07.733853: step: 756/459, loss: 0.15919673442840576 2023-01-24 03:56:08.314896: step: 758/459, loss: 0.020390883088111877 2023-01-24 03:56:08.924010: step: 760/459, loss: 0.025589706376194954 2023-01-24 03:56:09.551965: step: 762/459, loss: 0.0037355502136051655 2023-01-24 03:56:10.143420: step: 764/459, loss: 0.0004469223495107144 2023-01-24 03:56:10.751777: step: 766/459, loss: 0.00666045630350709 2023-01-24 03:56:11.361316: step: 768/459, loss: 0.2777055501937866 2023-01-24 03:56:12.034598: step: 770/459, loss: 0.004564672242850065 2023-01-24 03:56:12.651344: step: 772/459, loss: 0.020851967856287956 2023-01-24 03:56:13.320938: step: 774/459, loss: 0.09962157905101776 2023-01-24 03:56:13.935562: step: 776/459, loss: 0.1670210063457489 2023-01-24 03:56:14.553498: step: 778/459, loss: 0.12606856226921082 2023-01-24 03:56:15.105701: step: 780/459, loss: 0.0005655401619151235 2023-01-24 03:56:15.739945: step: 782/459, loss: 0.004559805616736412 2023-01-24 03:56:16.334374: step: 784/459, loss: 0.02247568964958191 2023-01-24 03:56:17.019852: step: 786/459, loss: 0.025195756927132607 2023-01-24 03:56:17.690313: step: 788/459, loss: 0.0011185925686731935 2023-01-24 03:56:18.298082: step: 790/459, loss: 0.023950256407260895 2023-01-24 03:56:18.959018: step: 792/459, loss: 0.00013087537081446499 2023-01-24 03:56:19.672543: step: 794/459, loss: 0.030162498354911804 2023-01-24 03:56:20.317673: step: 796/459, loss: 0.0004371502436697483 2023-01-24 03:56:20.978487: step: 798/459, loss: 0.000521937501616776 2023-01-24 03:56:21.695083: step: 800/459, loss: 0.014462068676948547 2023-01-24 03:56:22.324280: step: 802/459, loss: 0.06295058131217957 2023-01-24 03:56:22.924275: step: 804/459, loss: 0.001953237922862172 2023-01-24 03:56:23.559637: step: 806/459, loss: 0.5101849436759949 2023-01-24 03:56:24.154333: step: 808/459, loss: 0.01943136937916279 2023-01-24 03:56:24.751246: step: 810/459, loss: 0.04245943948626518 2023-01-24 03:56:25.338255: step: 812/459, loss: 0.005685132462531328 2023-01-24 03:56:25.957896: step: 814/459, loss: 0.021812885999679565 2023-01-24 03:56:26.605674: step: 816/459, loss: 0.05702119693160057 2023-01-24 03:56:27.206203: step: 818/459, loss: 0.02306227758526802 2023-01-24 03:56:27.809745: step: 820/459, loss: 0.00040987791726365685 2023-01-24 03:56:28.453403: step: 822/459, loss: 0.053321316838264465 2023-01-24 03:56:29.047151: step: 824/459, loss: 0.002566336886957288 2023-01-24 03:56:29.682802: step: 826/459, loss: 0.007282747887074947 2023-01-24 03:56:30.275779: step: 828/459, loss: 0.03594726324081421 2023-01-24 03:56:30.812008: step: 830/459, loss: 0.003315082984045148 2023-01-24 03:56:31.420729: step: 832/459, loss: 0.021517347544431686 2023-01-24 03:56:32.081397: step: 834/459, loss: 0.010202588513493538 2023-01-24 03:56:32.648738: step: 836/459, loss: 0.0009032540256157517 2023-01-24 03:56:33.304383: step: 838/459, loss: 0.039335355162620544 2023-01-24 03:56:33.965010: step: 840/459, loss: 0.004141459707170725 2023-01-24 03:56:34.620533: step: 842/459, loss: 0.014291292987763882 2023-01-24 03:56:35.227471: step: 844/459, loss: 0.009431645274162292 2023-01-24 03:56:35.888845: step: 846/459, loss: 0.11450529843568802 2023-01-24 03:56:36.554818: step: 848/459, loss: 0.0022984833922237158 2023-01-24 03:56:37.117048: step: 850/459, loss: 0.011400380171835423 2023-01-24 03:56:37.716315: step: 852/459, loss: 0.0184054896235466 2023-01-24 03:56:38.494777: step: 854/459, loss: 0.009060900658369064 2023-01-24 03:56:39.062256: step: 856/459, loss: 0.09316863119602203 2023-01-24 03:56:39.682203: step: 858/459, loss: 0.0014006986748427153 2023-01-24 03:56:40.314496: step: 860/459, loss: 0.01683296635746956 2023-01-24 03:56:40.986543: step: 862/459, loss: 0.001533371745608747 2023-01-24 03:56:41.589905: step: 864/459, loss: 0.0022352514788508415 2023-01-24 03:56:42.187040: step: 866/459, loss: 4.843481656280346e-05 2023-01-24 03:56:42.774070: step: 868/459, loss: 0.0005959517438896 2023-01-24 03:56:43.348033: step: 870/459, loss: 0.20119404792785645 2023-01-24 03:56:43.995606: step: 872/459, loss: 0.03593379631638527 2023-01-24 03:56:44.607063: step: 874/459, loss: 0.00845309253782034 2023-01-24 03:56:45.236899: step: 876/459, loss: 0.013556182384490967 2023-01-24 03:56:46.001489: step: 878/459, loss: 0.002718328032642603 2023-01-24 03:56:46.600858: step: 880/459, loss: 0.01841811276972294 2023-01-24 03:56:47.263747: step: 882/459, loss: 5.243501436780207e-05 2023-01-24 03:56:47.913027: step: 884/459, loss: 0.3174951672554016 2023-01-24 03:56:48.515300: step: 886/459, loss: 0.0015050313668325543 2023-01-24 03:56:49.166994: step: 888/459, loss: 0.0007082739030010998 2023-01-24 03:56:49.785435: step: 890/459, loss: 0.03884778916835785 2023-01-24 03:56:50.382682: step: 892/459, loss: 0.012006483040750027 2023-01-24 03:56:51.049521: step: 894/459, loss: 0.014874651096761227 2023-01-24 03:56:51.692661: step: 896/459, loss: 0.009214404970407486 2023-01-24 03:56:52.296680: step: 898/459, loss: 0.07762229442596436 2023-01-24 03:56:52.929738: step: 900/459, loss: 0.0848405510187149 2023-01-24 03:56:53.568169: step: 902/459, loss: 0.18650053441524506 2023-01-24 03:56:54.206984: step: 904/459, loss: 0.001232527894899249 2023-01-24 03:56:54.837580: step: 906/459, loss: 0.006436806172132492 2023-01-24 03:56:55.432796: step: 908/459, loss: 0.09838427603244781 2023-01-24 03:56:56.165973: step: 910/459, loss: 0.026080431416630745 2023-01-24 03:56:56.741378: step: 912/459, loss: 0.0030919716227799654 2023-01-24 03:56:57.346550: step: 914/459, loss: 0.031565289944410324 2023-01-24 03:56:57.985122: step: 916/459, loss: 0.0006479120347648859 2023-01-24 03:56:58.644730: step: 918/459, loss: 0.0027900540735572577 2023-01-24 03:56:59.085934: step: 920/459, loss: 0.04325193911790848 ================================================== Loss: 0.049 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3357230392156863, 'r': 0.2924039373814042, 'f1': 0.3125697261663286}, 'combined': 0.23031453506992633, 'epoch': 31} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3457725464166149, 'r': 0.2894547249347459, 'f1': 0.3151171299685842}, 'combined': 0.20167496317989386, 'epoch': 31} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3309486373165619, 'r': 0.2995493358633776, 'f1': 0.3144671314741036}, 'combined': 0.23171262319144476, 'epoch': 31} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.342808179265167, 'r': 0.2795444880007771, 'f1': 0.30796087811803186}, 'combined': 0.19709496199554036, 'epoch': 31} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35415729127078865, 'r': 0.30039527362057405, 'f1': 0.32506839671055965}, 'combined': 0.23952408178672815, 'epoch': 31} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3504054421246324, 'r': 0.30440873869168156, 'f1': 0.3257915964339265}, 'combined': 0.23358642763187187, 'epoch': 31} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.31111111111111106, 'r': 0.2666666666666666, 'f1': 0.2871794871794871}, 'combined': 0.19145299145299138, 'epoch': 31} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2391304347826087, 'r': 0.2391304347826087, 'f1': 0.2391304347826087}, 'combined': 0.11956521739130435, 'epoch': 31} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.10344827586206896, 'f1': 0.15384615384615385}, 'combined': 0.10256410256410256, 'epoch': 31} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 32 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 03:59:34.420658: step: 2/459, loss: 0.014455147087574005 2023-01-24 03:59:34.991988: step: 4/459, loss: 0.011092949658632278 2023-01-24 03:59:35.614641: step: 6/459, loss: 0.03503834456205368 2023-01-24 03:59:36.242229: step: 8/459, loss: 0.0005288568208925426 2023-01-24 03:59:36.855872: step: 10/459, loss: 0.0013723996235057712 2023-01-24 03:59:37.476636: step: 12/459, loss: 0.016624314710497856 2023-01-24 03:59:38.102933: step: 14/459, loss: 0.09625258296728134 2023-01-24 03:59:38.771495: step: 16/459, loss: 0.005020035430788994 2023-01-24 03:59:39.390694: step: 18/459, loss: 0.39160141348838806 2023-01-24 03:59:40.032223: step: 20/459, loss: 0.01839500665664673 2023-01-24 03:59:40.650843: step: 22/459, loss: 0.005618041846901178 2023-01-24 03:59:41.313412: step: 24/459, loss: 0.01838867738842964 2023-01-24 03:59:41.941082: step: 26/459, loss: 0.005291650537401438 2023-01-24 03:59:42.569773: step: 28/459, loss: 0.015113216824829578 2023-01-24 03:59:43.146232: step: 30/459, loss: 0.00966006051748991 2023-01-24 03:59:43.797798: step: 32/459, loss: 0.022360485047101974 2023-01-24 03:59:44.372936: step: 34/459, loss: 0.09635263681411743 2023-01-24 03:59:44.933815: step: 36/459, loss: 8.982356666820124e-05 2023-01-24 03:59:45.562108: step: 38/459, loss: 0.000582952459808439 2023-01-24 03:59:46.301729: step: 40/459, loss: 0.015423756092786789 2023-01-24 03:59:46.913769: step: 42/459, loss: 0.03932933136820793 2023-01-24 03:59:47.552038: step: 44/459, loss: 0.020165342837572098 2023-01-24 03:59:48.153997: step: 46/459, loss: 0.003538979683071375 2023-01-24 03:59:48.734822: step: 48/459, loss: 0.016219744458794594 2023-01-24 03:59:49.295081: step: 50/459, loss: 0.06001574173569679 2023-01-24 03:59:49.983228: step: 52/459, loss: 0.028590207919478416 2023-01-24 03:59:50.547667: step: 54/459, loss: 0.1836594194173813 2023-01-24 03:59:51.198187: step: 56/459, loss: 0.003402682952582836 2023-01-24 03:59:51.770276: step: 58/459, loss: 0.028001287952065468 2023-01-24 03:59:52.359840: step: 60/459, loss: 0.0072120013646781445 2023-01-24 03:59:52.948373: step: 62/459, loss: 0.01941986195743084 2023-01-24 03:59:53.591064: step: 64/459, loss: 0.01753930002450943 2023-01-24 03:59:54.227918: step: 66/459, loss: 0.006164215505123138 2023-01-24 03:59:54.804445: step: 68/459, loss: 0.031095469370484352 2023-01-24 03:59:55.344760: step: 70/459, loss: 0.14307206869125366 2023-01-24 03:59:55.981197: step: 72/459, loss: 0.0035800819750875235 2023-01-24 03:59:56.590273: step: 74/459, loss: 0.052580006420612335 2023-01-24 03:59:57.210292: step: 76/459, loss: 0.06977327913045883 2023-01-24 03:59:57.817821: step: 78/459, loss: 0.0009242456872016191 2023-01-24 03:59:58.382149: step: 80/459, loss: 0.00718060415238142 2023-01-24 03:59:58.982003: step: 82/459, loss: 0.017533181235194206 2023-01-24 03:59:59.657385: step: 84/459, loss: 0.08534672111272812 2023-01-24 04:00:00.300217: step: 86/459, loss: 0.01841016672551632 2023-01-24 04:00:00.973148: step: 88/459, loss: 0.02725670114159584 2023-01-24 04:00:01.527802: step: 90/459, loss: 0.014921951107680798 2023-01-24 04:00:02.160008: step: 92/459, loss: 1.2428370714187622 2023-01-24 04:00:02.777486: step: 94/459, loss: 0.022227078676223755 2023-01-24 04:00:03.406727: step: 96/459, loss: 0.0015099742449820042 2023-01-24 04:00:04.086189: step: 98/459, loss: 0.02796153537929058 2023-01-24 04:00:04.690205: step: 100/459, loss: 0.006842422764748335 2023-01-24 04:00:05.249579: step: 102/459, loss: 0.0019861485343426466 2023-01-24 04:00:05.868362: step: 104/459, loss: 0.03792892396450043 2023-01-24 04:00:06.460683: step: 106/459, loss: 0.002019218634814024 2023-01-24 04:00:07.053967: step: 108/459, loss: 0.008044193498790264 2023-01-24 04:00:07.625806: step: 110/459, loss: 0.0012088853400200605 2023-01-24 04:00:08.190880: step: 112/459, loss: 0.002046177163720131 2023-01-24 04:00:08.819303: step: 114/459, loss: 0.0015396308153867722 2023-01-24 04:00:09.461210: step: 116/459, loss: 0.0028292639181017876 2023-01-24 04:00:10.071262: step: 118/459, loss: 0.003422696143388748 2023-01-24 04:00:10.706937: step: 120/459, loss: 0.009553218260407448 2023-01-24 04:00:11.319510: step: 122/459, loss: 0.4107373356819153 2023-01-24 04:00:11.962070: step: 124/459, loss: 0.03511001542210579 2023-01-24 04:00:12.578553: step: 126/459, loss: 0.032586719840765 2023-01-24 04:00:13.150694: step: 128/459, loss: 0.0008413014584220946 2023-01-24 04:00:13.752893: step: 130/459, loss: 0.024537784978747368 2023-01-24 04:00:14.318963: step: 132/459, loss: 0.006444194819778204 2023-01-24 04:00:15.055141: step: 134/459, loss: 0.024073628708720207 2023-01-24 04:00:15.692569: step: 136/459, loss: 0.03259694203734398 2023-01-24 04:00:16.397144: step: 138/459, loss: 0.0011700732866302133 2023-01-24 04:00:17.019315: step: 140/459, loss: 0.006000634282827377 2023-01-24 04:00:17.615440: step: 142/459, loss: 0.0003796257951762527 2023-01-24 04:00:18.158336: step: 144/459, loss: 0.03898616507649422 2023-01-24 04:00:18.808635: step: 146/459, loss: 0.001012840191833675 2023-01-24 04:00:19.461891: step: 148/459, loss: 0.0009370942716486752 2023-01-24 04:00:20.060535: step: 150/459, loss: 0.04616972431540489 2023-01-24 04:00:20.785464: step: 152/459, loss: 0.016728537157177925 2023-01-24 04:00:21.431857: step: 154/459, loss: 0.015277711674571037 2023-01-24 04:00:22.117495: step: 156/459, loss: 0.0033239119220525026 2023-01-24 04:00:22.704916: step: 158/459, loss: 0.1438046246767044 2023-01-24 04:00:23.363559: step: 160/459, loss: 0.010945955291390419 2023-01-24 04:00:23.981813: step: 162/459, loss: 0.002382240490987897 2023-01-24 04:00:24.553898: step: 164/459, loss: 0.012415314093232155 2023-01-24 04:00:25.216337: step: 166/459, loss: 0.0032019317150115967 2023-01-24 04:00:25.835172: step: 168/459, loss: 0.005599508062005043 2023-01-24 04:00:26.426462: step: 170/459, loss: 0.2232530266046524 2023-01-24 04:00:27.026906: step: 172/459, loss: 0.0004059758211951703 2023-01-24 04:00:27.640012: step: 174/459, loss: 0.018008766695857048 2023-01-24 04:00:28.273362: step: 176/459, loss: 0.026172596961259842 2023-01-24 04:00:28.907526: step: 178/459, loss: 0.020641256123781204 2023-01-24 04:00:29.497685: step: 180/459, loss: 0.01127415057271719 2023-01-24 04:00:30.120440: step: 182/459, loss: 0.030807672068476677 2023-01-24 04:00:30.718967: step: 184/459, loss: 0.026838835328817368 2023-01-24 04:00:31.350117: step: 186/459, loss: 0.049927860498428345 2023-01-24 04:00:31.971627: step: 188/459, loss: 0.041009996086359024 2023-01-24 04:00:32.638602: step: 190/459, loss: 0.04548192769289017 2023-01-24 04:00:33.269460: step: 192/459, loss: 0.003280897857621312 2023-01-24 04:00:33.893541: step: 194/459, loss: 0.001441556727513671 2023-01-24 04:00:34.565848: step: 196/459, loss: 0.0262062419205904 2023-01-24 04:00:35.207906: step: 198/459, loss: 0.015740958973765373 2023-01-24 04:00:35.804697: step: 200/459, loss: 0.006499622482806444 2023-01-24 04:00:36.444222: step: 202/459, loss: 0.15718644857406616 2023-01-24 04:00:37.083894: step: 204/459, loss: 0.003918918780982494 2023-01-24 04:00:37.700162: step: 206/459, loss: 0.01617158204317093 2023-01-24 04:00:38.283485: step: 208/459, loss: 0.03366485610604286 2023-01-24 04:00:38.886442: step: 210/459, loss: 0.017743118107318878 2023-01-24 04:00:39.491397: step: 212/459, loss: 0.016643116250634193 2023-01-24 04:00:40.175470: step: 214/459, loss: 0.024857383221387863 2023-01-24 04:00:40.830797: step: 216/459, loss: 0.022534793242812157 2023-01-24 04:00:41.423306: step: 218/459, loss: 0.09790059924125671 2023-01-24 04:00:42.056033: step: 220/459, loss: 0.003440906060859561 2023-01-24 04:00:42.636554: step: 222/459, loss: 0.0004117671342100948 2023-01-24 04:00:43.362064: step: 224/459, loss: 0.012308414094150066 2023-01-24 04:00:43.908993: step: 226/459, loss: 0.00464964983984828 2023-01-24 04:00:44.496934: step: 228/459, loss: 0.013662323355674744 2023-01-24 04:00:45.177423: step: 230/459, loss: 0.0023600049316883087 2023-01-24 04:00:45.903600: step: 232/459, loss: 0.15680816769599915 2023-01-24 04:00:46.564144: step: 234/459, loss: 0.006590652279555798 2023-01-24 04:00:47.213305: step: 236/459, loss: 0.0034977009054273367 2023-01-24 04:00:47.868362: step: 238/459, loss: 0.00861838273704052 2023-01-24 04:00:48.477242: step: 240/459, loss: 0.19131921231746674 2023-01-24 04:00:49.094026: step: 242/459, loss: 0.01147405058145523 2023-01-24 04:00:49.728384: step: 244/459, loss: 0.004390231333673 2023-01-24 04:00:50.298985: step: 246/459, loss: 0.005120153538882732 2023-01-24 04:00:50.934320: step: 248/459, loss: 0.08603625744581223 2023-01-24 04:00:51.504185: step: 250/459, loss: 0.009753568097949028 2023-01-24 04:00:52.180028: step: 252/459, loss: 0.008540397509932518 2023-01-24 04:00:52.794240: step: 254/459, loss: 0.0013046137755736709 2023-01-24 04:00:53.431485: step: 256/459, loss: 0.019870657473802567 2023-01-24 04:00:54.016073: step: 258/459, loss: 0.01829030178487301 2023-01-24 04:00:54.629595: step: 260/459, loss: 0.02119383215904236 2023-01-24 04:00:55.239975: step: 262/459, loss: 0.0013019312173128128 2023-01-24 04:00:55.875459: step: 264/459, loss: 0.10160775482654572 2023-01-24 04:00:56.489322: step: 266/459, loss: 0.6568189859390259 2023-01-24 04:00:57.099441: step: 268/459, loss: 0.09982924908399582 2023-01-24 04:00:57.659703: step: 270/459, loss: 0.013028161600232124 2023-01-24 04:00:58.300693: step: 272/459, loss: 0.0022219454403966665 2023-01-24 04:00:58.900473: step: 274/459, loss: 0.024715175852179527 2023-01-24 04:00:59.532731: step: 276/459, loss: 0.05730394273996353 2023-01-24 04:01:00.088798: step: 278/459, loss: 0.002302495762705803 2023-01-24 04:01:00.704770: step: 280/459, loss: 0.0427582748234272 2023-01-24 04:01:01.315878: step: 282/459, loss: 0.0016471492126584053 2023-01-24 04:01:01.895488: step: 284/459, loss: 0.014757560566067696 2023-01-24 04:01:02.559537: step: 286/459, loss: 0.003605879144743085 2023-01-24 04:01:03.101115: step: 288/459, loss: 0.023456353694200516 2023-01-24 04:01:03.746917: step: 290/459, loss: 0.038747962564229965 2023-01-24 04:01:04.357392: step: 292/459, loss: 0.007830155082046986 2023-01-24 04:01:04.979532: step: 294/459, loss: 0.012368209660053253 2023-01-24 04:01:05.566746: step: 296/459, loss: 0.0171767957508564 2023-01-24 04:01:06.178282: step: 298/459, loss: 5.562617778778076 2023-01-24 04:01:06.759386: step: 300/459, loss: 0.012089096941053867 2023-01-24 04:01:07.405019: step: 302/459, loss: 1.0674302577972412 2023-01-24 04:01:08.034717: step: 304/459, loss: 0.007298425305634737 2023-01-24 04:01:08.648140: step: 306/459, loss: 0.0018206796376034617 2023-01-24 04:01:09.199586: step: 308/459, loss: 0.009252319112420082 2023-01-24 04:01:09.878769: step: 310/459, loss: 0.05910125747323036 2023-01-24 04:01:10.480960: step: 312/459, loss: 0.04911847412586212 2023-01-24 04:01:11.062828: step: 314/459, loss: 0.0017640372971072793 2023-01-24 04:01:11.695968: step: 316/459, loss: 0.025101877748966217 2023-01-24 04:01:12.309779: step: 318/459, loss: 0.04394622519612312 2023-01-24 04:01:12.952708: step: 320/459, loss: 0.0792049914598465 2023-01-24 04:01:13.592833: step: 322/459, loss: 0.005023245699703693 2023-01-24 04:01:14.164485: step: 324/459, loss: 0.015659628435969353 2023-01-24 04:01:14.778519: step: 326/459, loss: 0.01433363277465105 2023-01-24 04:01:15.426009: step: 328/459, loss: 0.024341026321053505 2023-01-24 04:01:15.999574: step: 330/459, loss: 0.0008176196715794504 2023-01-24 04:01:16.592606: step: 332/459, loss: 0.2546057105064392 2023-01-24 04:01:17.300618: step: 334/459, loss: 0.03769434243440628 2023-01-24 04:01:17.905636: step: 336/459, loss: 0.0003591907152440399 2023-01-24 04:01:18.528552: step: 338/459, loss: 0.04895499721169472 2023-01-24 04:01:19.123269: step: 340/459, loss: 0.0030901506543159485 2023-01-24 04:01:19.774588: step: 342/459, loss: 0.003791496157646179 2023-01-24 04:01:20.502213: step: 344/459, loss: 0.02716829627752304 2023-01-24 04:01:21.077521: step: 346/459, loss: 0.0006089017260819674 2023-01-24 04:01:21.695532: step: 348/459, loss: 0.12563568353652954 2023-01-24 04:01:22.243503: step: 350/459, loss: 0.0033451940398663282 2023-01-24 04:01:22.879526: step: 352/459, loss: 0.010580066591501236 2023-01-24 04:01:23.511880: step: 354/459, loss: 0.0026982880663126707 2023-01-24 04:01:24.104685: step: 356/459, loss: 0.07909297943115234 2023-01-24 04:01:24.760999: step: 358/459, loss: 0.051519058644771576 2023-01-24 04:01:25.448865: step: 360/459, loss: 0.0023264018818736076 2023-01-24 04:01:26.059238: step: 362/459, loss: 0.004948192276060581 2023-01-24 04:01:26.705196: step: 364/459, loss: 0.0354459248483181 2023-01-24 04:01:27.343353: step: 366/459, loss: 0.003917681984603405 2023-01-24 04:01:27.936300: step: 368/459, loss: 0.0009840894490480423 2023-01-24 04:01:28.560413: step: 370/459, loss: 0.1251908391714096 2023-01-24 04:01:29.221317: step: 372/459, loss: 0.009402678348124027 2023-01-24 04:01:29.808202: step: 374/459, loss: 0.019756751134991646 2023-01-24 04:01:30.464878: step: 376/459, loss: 0.029857397079467773 2023-01-24 04:01:31.153002: step: 378/459, loss: 0.33148202300071716 2023-01-24 04:01:31.781123: step: 380/459, loss: 0.03126968443393707 2023-01-24 04:01:32.315133: step: 382/459, loss: 0.004535527899861336 2023-01-24 04:01:32.913971: step: 384/459, loss: 0.004309564363211393 2023-01-24 04:01:33.505339: step: 386/459, loss: 0.003333170898258686 2023-01-24 04:01:34.077302: step: 388/459, loss: 0.04570471867918968 2023-01-24 04:01:34.663161: step: 390/459, loss: 0.21468429267406464 2023-01-24 04:01:35.237325: step: 392/459, loss: 0.024144429713487625 2023-01-24 04:01:35.848100: step: 394/459, loss: 0.0034484174102544785 2023-01-24 04:01:36.471225: step: 396/459, loss: 0.03334115445613861 2023-01-24 04:01:37.154827: step: 398/459, loss: 0.02801843173801899 2023-01-24 04:01:37.811356: step: 400/459, loss: 0.0102032870054245 2023-01-24 04:01:38.400536: step: 402/459, loss: 0.00369156152009964 2023-01-24 04:01:38.974013: step: 404/459, loss: 0.016879191622138023 2023-01-24 04:01:39.593142: step: 406/459, loss: 0.009198823943734169 2023-01-24 04:01:40.214544: step: 408/459, loss: 0.026544295251369476 2023-01-24 04:01:40.854917: step: 410/459, loss: 0.03664183244109154 2023-01-24 04:01:41.482565: step: 412/459, loss: 0.007163193076848984 2023-01-24 04:01:42.032739: step: 414/459, loss: 0.19229386746883392 2023-01-24 04:01:42.644234: step: 416/459, loss: 0.13763605058193207 2023-01-24 04:01:43.237191: step: 418/459, loss: 0.3384253680706024 2023-01-24 04:01:43.844104: step: 420/459, loss: 0.24284178018569946 2023-01-24 04:01:44.480799: step: 422/459, loss: 0.0009921493474394083 2023-01-24 04:01:45.131546: step: 424/459, loss: 0.28724849224090576 2023-01-24 04:01:45.834324: step: 426/459, loss: 0.03393351286649704 2023-01-24 04:01:46.458437: step: 428/459, loss: 0.022794459015130997 2023-01-24 04:01:47.054659: step: 430/459, loss: 0.001216776086948812 2023-01-24 04:01:47.683861: step: 432/459, loss: 0.0008526111487299204 2023-01-24 04:01:48.375684: step: 434/459, loss: 0.057135358452796936 2023-01-24 04:01:49.007713: step: 436/459, loss: 0.0055615259334445 2023-01-24 04:01:49.645009: step: 438/459, loss: 0.01817977987229824 2023-01-24 04:01:50.305437: step: 440/459, loss: 0.0071229394525289536 2023-01-24 04:01:50.930484: step: 442/459, loss: 0.04011548310518265 2023-01-24 04:01:51.589372: step: 444/459, loss: 0.011253345757722855 2023-01-24 04:01:52.178402: step: 446/459, loss: 0.008699045516550541 2023-01-24 04:01:52.788856: step: 448/459, loss: 0.06359483301639557 2023-01-24 04:01:53.370594: step: 450/459, loss: 0.008044005371630192 2023-01-24 04:01:53.965054: step: 452/459, loss: 0.43775704503059387 2023-01-24 04:01:54.607879: step: 454/459, loss: 0.011031849309802055 2023-01-24 04:01:55.229206: step: 456/459, loss: 0.018112409859895706 2023-01-24 04:01:55.928715: step: 458/459, loss: 0.46012070775032043 2023-01-24 04:01:56.579133: step: 460/459, loss: 0.035998206585645676 2023-01-24 04:01:57.317751: step: 462/459, loss: 0.006756360176950693 2023-01-24 04:01:57.930333: step: 464/459, loss: 0.04899957403540611 2023-01-24 04:01:58.500070: step: 466/459, loss: 8.738868200452998e-05 2023-01-24 04:01:59.098823: step: 468/459, loss: 0.023376772180199623 2023-01-24 04:01:59.708403: step: 470/459, loss: 0.0029648225754499435 2023-01-24 04:02:00.342663: step: 472/459, loss: 0.14032573997974396 2023-01-24 04:02:01.077408: step: 474/459, loss: 0.031668614596128464 2023-01-24 04:02:01.745637: step: 476/459, loss: 0.056959208101034164 2023-01-24 04:02:02.340106: step: 478/459, loss: 0.041156966239213943 2023-01-24 04:02:02.949994: step: 480/459, loss: 0.009235804900527 2023-01-24 04:02:03.529782: step: 482/459, loss: 0.0006621904904022813 2023-01-24 04:02:04.172662: step: 484/459, loss: 0.009051407687366009 2023-01-24 04:02:04.752094: step: 486/459, loss: 0.003491695737466216 2023-01-24 04:02:05.366114: step: 488/459, loss: 0.001440965454094112 2023-01-24 04:02:06.020899: step: 490/459, loss: 0.0008561965078115463 2023-01-24 04:02:06.619404: step: 492/459, loss: 0.020364776253700256 2023-01-24 04:02:07.224272: step: 494/459, loss: 0.0008927892777137458 2023-01-24 04:02:07.860370: step: 496/459, loss: 0.039242785423994064 2023-01-24 04:02:08.510396: step: 498/459, loss: 0.016365183517336845 2023-01-24 04:02:09.155036: step: 500/459, loss: 0.018824705854058266 2023-01-24 04:02:09.778445: step: 502/459, loss: 0.036548301577568054 2023-01-24 04:02:10.415617: step: 504/459, loss: 0.011665776371955872 2023-01-24 04:02:10.954662: step: 506/459, loss: 0.0019067182438448071 2023-01-24 04:02:11.557637: step: 508/459, loss: 0.0025141562800854445 2023-01-24 04:02:12.220321: step: 510/459, loss: 0.018980462104082108 2023-01-24 04:02:12.833653: step: 512/459, loss: 0.10920777916908264 2023-01-24 04:02:13.383325: step: 514/459, loss: 0.03965570032596588 2023-01-24 04:02:14.019122: step: 516/459, loss: 0.407824844121933 2023-01-24 04:02:14.630633: step: 518/459, loss: 0.019483186304569244 2023-01-24 04:02:15.351356: step: 520/459, loss: 0.0008181874873116612 2023-01-24 04:02:16.008181: step: 522/459, loss: 0.08558426797389984 2023-01-24 04:02:16.541260: step: 524/459, loss: 0.00017123536963481456 2023-01-24 04:02:17.118669: step: 526/459, loss: 0.01906164176762104 2023-01-24 04:02:17.699362: step: 528/459, loss: 0.0005563376471400261 2023-01-24 04:02:18.352151: step: 530/459, loss: 0.00621548667550087 2023-01-24 04:02:18.946710: step: 532/459, loss: 0.03944503888487816 2023-01-24 04:02:19.623660: step: 534/459, loss: 0.01589900255203247 2023-01-24 04:02:20.224804: step: 536/459, loss: 0.0038693470414727926 2023-01-24 04:02:20.856574: step: 538/459, loss: 0.00818319246172905 2023-01-24 04:02:21.486482: step: 540/459, loss: 0.07347419857978821 2023-01-24 04:02:22.161133: step: 542/459, loss: 0.05235062912106514 2023-01-24 04:02:22.746555: step: 544/459, loss: 0.004662286955863237 2023-01-24 04:02:23.323322: step: 546/459, loss: 0.0030436180531978607 2023-01-24 04:02:23.979589: step: 548/459, loss: 0.023041153326630592 2023-01-24 04:02:24.574420: step: 550/459, loss: 0.534621000289917 2023-01-24 04:02:25.093329: step: 552/459, loss: 0.6096300482749939 2023-01-24 04:02:25.680062: step: 554/459, loss: 0.004257469903677702 2023-01-24 04:02:26.262167: step: 556/459, loss: 0.0040115551091730595 2023-01-24 04:02:26.859934: step: 558/459, loss: 0.007415781728923321 2023-01-24 04:02:27.518378: step: 560/459, loss: 0.020496854558587074 2023-01-24 04:02:28.159893: step: 562/459, loss: 0.017719978466629982 2023-01-24 04:02:28.756985: step: 564/459, loss: 0.006862381473183632 2023-01-24 04:02:29.317599: step: 566/459, loss: 0.002099419478327036 2023-01-24 04:02:29.908419: step: 568/459, loss: 0.008686782792210579 2023-01-24 04:02:30.546042: step: 570/459, loss: 0.02042168751358986 2023-01-24 04:02:31.139664: step: 572/459, loss: 0.007708108518272638 2023-01-24 04:02:31.800673: step: 574/459, loss: 0.003852379973977804 2023-01-24 04:02:32.413056: step: 576/459, loss: 0.02724766544997692 2023-01-24 04:02:33.040909: step: 578/459, loss: 0.010371326468884945 2023-01-24 04:02:33.649604: step: 580/459, loss: 0.021511506289243698 2023-01-24 04:02:34.339543: step: 582/459, loss: 0.007806050591170788 2023-01-24 04:02:34.991330: step: 584/459, loss: 0.012792042456567287 2023-01-24 04:02:35.599439: step: 586/459, loss: 0.0037340710405260324 2023-01-24 04:02:36.246940: step: 588/459, loss: 0.021429989486932755 2023-01-24 04:02:36.814852: step: 590/459, loss: 0.00566646596416831 2023-01-24 04:02:37.470319: step: 592/459, loss: 0.43512433767318726 2023-01-24 04:02:38.172789: step: 594/459, loss: 0.34013789892196655 2023-01-24 04:02:38.766471: step: 596/459, loss: 0.05721932649612427 2023-01-24 04:02:39.422211: step: 598/459, loss: 0.0040904865600168705 2023-01-24 04:02:40.081361: step: 600/459, loss: 0.021361464634537697 2023-01-24 04:02:40.767138: step: 602/459, loss: 0.019384345039725304 2023-01-24 04:02:41.413379: step: 604/459, loss: 0.16104130446910858 2023-01-24 04:02:42.022474: step: 606/459, loss: 0.008601576089859009 2023-01-24 04:02:42.607302: step: 608/459, loss: 0.012875390239059925 2023-01-24 04:02:43.146207: step: 610/459, loss: 0.010551300831139088 2023-01-24 04:02:43.795445: step: 612/459, loss: 0.020139694213867188 2023-01-24 04:02:44.602484: step: 614/459, loss: 0.030838290229439735 2023-01-24 04:02:45.203629: step: 616/459, loss: 0.010635626502335072 2023-01-24 04:02:45.774972: step: 618/459, loss: 0.00688158581033349 2023-01-24 04:02:46.347440: step: 620/459, loss: 0.000419851450715214 2023-01-24 04:02:46.952810: step: 622/459, loss: 0.006919875741004944 2023-01-24 04:02:47.571918: step: 624/459, loss: 0.005471152253448963 2023-01-24 04:02:48.160031: step: 626/459, loss: 0.0031836836133152246 2023-01-24 04:02:48.817267: step: 628/459, loss: 0.005571362096816301 2023-01-24 04:02:49.427073: step: 630/459, loss: 0.03440529853105545 2023-01-24 04:02:49.972235: step: 632/459, loss: 0.014623139053583145 2023-01-24 04:02:50.622815: step: 634/459, loss: 0.014958135783672333 2023-01-24 04:02:51.277415: step: 636/459, loss: 0.030585825443267822 2023-01-24 04:02:51.951064: step: 638/459, loss: 0.0009103771299123764 2023-01-24 04:02:52.595339: step: 640/459, loss: 0.013243986293673515 2023-01-24 04:02:53.154951: step: 642/459, loss: 0.008470272645354271 2023-01-24 04:02:53.765210: step: 644/459, loss: 0.008055093698203564 2023-01-24 04:02:54.426193: step: 646/459, loss: 0.00734227430075407 2023-01-24 04:02:55.098061: step: 648/459, loss: 0.002907573012635112 2023-01-24 04:02:55.755221: step: 650/459, loss: 0.0314655564725399 2023-01-24 04:02:56.388276: step: 652/459, loss: 0.0007975703338161111 2023-01-24 04:02:57.004851: step: 654/459, loss: 0.001078990288078785 2023-01-24 04:02:57.631806: step: 656/459, loss: 0.002046877983957529 2023-01-24 04:02:58.298739: step: 658/459, loss: 0.03916697949171066 2023-01-24 04:02:58.958233: step: 660/459, loss: 0.003722480498254299 2023-01-24 04:02:59.588970: step: 662/459, loss: 0.0012609369587153196 2023-01-24 04:03:00.259970: step: 664/459, loss: 0.007785235997289419 2023-01-24 04:03:00.851655: step: 666/459, loss: 0.002740071387961507 2023-01-24 04:03:01.580408: step: 668/459, loss: 0.004009890370070934 2023-01-24 04:03:02.233986: step: 670/459, loss: 0.023360515013337135 2023-01-24 04:03:02.918074: step: 672/459, loss: 0.4272877275943756 2023-01-24 04:03:03.579639: step: 674/459, loss: 0.0027928270865231752 2023-01-24 04:03:04.260218: step: 676/459, loss: 0.10886221379041672 2023-01-24 04:03:04.822397: step: 678/459, loss: 0.019877782091498375 2023-01-24 04:03:05.457994: step: 680/459, loss: 0.24881961941719055 2023-01-24 04:03:06.121305: step: 682/459, loss: 0.043038394302129745 2023-01-24 04:03:06.792180: step: 684/459, loss: 0.0010793035617098212 2023-01-24 04:03:07.371034: step: 686/459, loss: 0.014951437711715698 2023-01-24 04:03:07.984519: step: 688/459, loss: 0.010132789611816406 2023-01-24 04:03:08.578699: step: 690/459, loss: 0.0008770802523940802 2023-01-24 04:03:09.284753: step: 692/459, loss: 0.02846825122833252 2023-01-24 04:03:09.950064: step: 694/459, loss: 0.0004755919799208641 2023-01-24 04:03:10.662653: step: 696/459, loss: 0.08592637628316879 2023-01-24 04:03:11.269363: step: 698/459, loss: 0.0035425990354269743 2023-01-24 04:03:11.921236: step: 700/459, loss: 0.002367787528783083 2023-01-24 04:03:12.487642: step: 702/459, loss: 0.006887106690555811 2023-01-24 04:03:13.081343: step: 704/459, loss: 0.01025327667593956 2023-01-24 04:03:13.739988: step: 706/459, loss: 0.08279450237751007 2023-01-24 04:03:14.358373: step: 708/459, loss: 0.0019853105768561363 2023-01-24 04:03:14.942550: step: 710/459, loss: 0.0011951905908063054 2023-01-24 04:03:15.581726: step: 712/459, loss: 0.09989157319068909 2023-01-24 04:03:16.231129: step: 714/459, loss: 0.0002277102757943794 2023-01-24 04:03:16.805908: step: 716/459, loss: 0.006395254284143448 2023-01-24 04:03:17.429484: step: 718/459, loss: 0.02713399939239025 2023-01-24 04:03:18.012969: step: 720/459, loss: 0.013146976009011269 2023-01-24 04:03:18.618734: step: 722/459, loss: 0.001030429033562541 2023-01-24 04:03:19.294114: step: 724/459, loss: 0.05565972998738289 2023-01-24 04:03:19.882624: step: 726/459, loss: 0.003498447360470891 2023-01-24 04:03:20.498411: step: 728/459, loss: 0.008740030229091644 2023-01-24 04:03:21.130660: step: 730/459, loss: 0.10303565114736557 2023-01-24 04:03:21.737101: step: 732/459, loss: 0.01648586615920067 2023-01-24 04:03:22.393144: step: 734/459, loss: 0.034938886761665344 2023-01-24 04:03:22.994434: step: 736/459, loss: 0.0013839355669915676 2023-01-24 04:03:23.584961: step: 738/459, loss: 0.006296911742538214 2023-01-24 04:03:24.280590: step: 740/459, loss: 0.009935946203768253 2023-01-24 04:03:24.913125: step: 742/459, loss: 0.001184144988656044 2023-01-24 04:03:25.520300: step: 744/459, loss: 0.013551847077906132 2023-01-24 04:03:26.099055: step: 746/459, loss: 0.002595585770905018 2023-01-24 04:03:26.695338: step: 748/459, loss: 0.013290639035403728 2023-01-24 04:03:27.324590: step: 750/459, loss: 9.381055133417249e-05 2023-01-24 04:03:27.942328: step: 752/459, loss: 0.0008199065923690796 2023-01-24 04:03:28.611374: step: 754/459, loss: 0.059561554342508316 2023-01-24 04:03:29.203905: step: 756/459, loss: 0.00558463716879487 2023-01-24 04:03:29.839939: step: 758/459, loss: 0.022295799106359482 2023-01-24 04:03:30.508341: step: 760/459, loss: 0.034598495811223984 2023-01-24 04:03:31.229008: step: 762/459, loss: 0.004962623585015535 2023-01-24 04:03:31.874500: step: 764/459, loss: 0.00038804972427897155 2023-01-24 04:03:32.503210: step: 766/459, loss: 0.011458005756139755 2023-01-24 04:03:33.164138: step: 768/459, loss: 0.0024708248674869537 2023-01-24 04:03:33.792899: step: 770/459, loss: 0.01221047155559063 2023-01-24 04:03:34.406770: step: 772/459, loss: 0.0011601813603192568 2023-01-24 04:03:35.035959: step: 774/459, loss: 0.03128684312105179 2023-01-24 04:03:35.650349: step: 776/459, loss: 0.021428028121590614 2023-01-24 04:03:36.264613: step: 778/459, loss: 0.1564655601978302 2023-01-24 04:03:36.875713: step: 780/459, loss: 0.004861115012317896 2023-01-24 04:03:37.509990: step: 782/459, loss: 0.040629446506500244 2023-01-24 04:03:38.122461: step: 784/459, loss: 0.22923697531223297 2023-01-24 04:03:38.768979: step: 786/459, loss: 0.009077409282326698 2023-01-24 04:03:39.315267: step: 788/459, loss: 0.005845227278769016 2023-01-24 04:03:39.837566: step: 790/459, loss: 0.01959327422082424 2023-01-24 04:03:40.463357: step: 792/459, loss: 0.003717363579198718 2023-01-24 04:03:41.097644: step: 794/459, loss: 0.27582091093063354 2023-01-24 04:03:41.730607: step: 796/459, loss: 0.023553308099508286 2023-01-24 04:03:42.347870: step: 798/459, loss: 0.0014000444207340479 2023-01-24 04:03:42.952063: step: 800/459, loss: 0.005529957823455334 2023-01-24 04:03:43.566130: step: 802/459, loss: 0.0006174386362545192 2023-01-24 04:03:44.169605: step: 804/459, loss: 0.0024357703514397144 2023-01-24 04:03:44.782207: step: 806/459, loss: 0.00439958181232214 2023-01-24 04:03:45.406746: step: 808/459, loss: 0.010610561817884445 2023-01-24 04:03:46.030079: step: 810/459, loss: 0.06061520427465439 2023-01-24 04:03:46.613614: step: 812/459, loss: 0.007323059719055891 2023-01-24 04:03:47.311486: step: 814/459, loss: 0.011640024371445179 2023-01-24 04:03:47.955464: step: 816/459, loss: 0.01227355096489191 2023-01-24 04:03:48.601760: step: 818/459, loss: 0.0023540242109447718 2023-01-24 04:03:49.201298: step: 820/459, loss: 0.002759709022939205 2023-01-24 04:03:49.846735: step: 822/459, loss: 0.04952708259224892 2023-01-24 04:03:50.432376: step: 824/459, loss: 0.0017531317425891757 2023-01-24 04:03:51.042522: step: 826/459, loss: 0.0010464215883985162 2023-01-24 04:03:51.747386: step: 828/459, loss: 0.01765269972383976 2023-01-24 04:03:52.328365: step: 830/459, loss: 0.0816119834780693 2023-01-24 04:03:52.917755: step: 832/459, loss: 0.013103058561682701 2023-01-24 04:03:53.512234: step: 834/459, loss: 0.0016949406126514077 2023-01-24 04:03:54.111596: step: 836/459, loss: 0.024053361266851425 2023-01-24 04:03:54.767594: step: 838/459, loss: 0.013157217763364315 2023-01-24 04:03:55.376807: step: 840/459, loss: 0.002764857606962323 2023-01-24 04:03:55.967145: step: 842/459, loss: 0.03829522803425789 2023-01-24 04:03:56.545228: step: 844/459, loss: 0.018373334780335426 2023-01-24 04:03:57.169059: step: 846/459, loss: 0.004815445281565189 2023-01-24 04:03:57.882761: step: 848/459, loss: 0.012635987251996994 2023-01-24 04:03:58.548263: step: 850/459, loss: 1.3856824807589874e-05 2023-01-24 04:03:59.169232: step: 852/459, loss: 0.0011417089262977242 2023-01-24 04:03:59.753488: step: 854/459, loss: 0.014623826369643211 2023-01-24 04:04:00.401024: step: 856/459, loss: 0.00013001610932406038 2023-01-24 04:04:01.053281: step: 858/459, loss: 0.8613221049308777 2023-01-24 04:04:01.752466: step: 860/459, loss: 0.00880276970565319 2023-01-24 04:04:02.353275: step: 862/459, loss: 0.033645812422037125 2023-01-24 04:04:03.114144: step: 864/459, loss: 1.270424485206604 2023-01-24 04:04:03.769297: step: 866/459, loss: 0.027375103905797005 2023-01-24 04:04:04.340708: step: 868/459, loss: 0.03999434411525726 2023-01-24 04:04:04.910559: step: 870/459, loss: 0.04362180083990097 2023-01-24 04:04:05.629268: step: 872/459, loss: 0.012106381356716156 2023-01-24 04:04:06.200480: step: 874/459, loss: 0.0016276584938168526 2023-01-24 04:04:06.825477: step: 876/459, loss: 0.029117366299033165 2023-01-24 04:04:07.373992: step: 878/459, loss: 0.006155791692435741 2023-01-24 04:04:07.895764: step: 880/459, loss: 0.05003230273723602 2023-01-24 04:04:08.500745: step: 882/459, loss: 0.1420736312866211 2023-01-24 04:04:09.085978: step: 884/459, loss: 0.019117726013064384 2023-01-24 04:04:09.723130: step: 886/459, loss: 0.000534099293872714 2023-01-24 04:04:10.299751: step: 888/459, loss: 0.06861867010593414 2023-01-24 04:04:10.944566: step: 890/459, loss: 0.017640460282564163 2023-01-24 04:04:11.516008: step: 892/459, loss: 0.0008356542093679309 2023-01-24 04:04:12.130921: step: 894/459, loss: 0.0405648835003376 2023-01-24 04:04:12.741650: step: 896/459, loss: 0.014838461764156818 2023-01-24 04:04:13.383412: step: 898/459, loss: 0.03323787823319435 2023-01-24 04:04:13.986373: step: 900/459, loss: 0.013910953886806965 2023-01-24 04:04:14.595750: step: 902/459, loss: 0.0007412419654428959 2023-01-24 04:04:15.173110: step: 904/459, loss: 0.002410991583019495 2023-01-24 04:04:15.804994: step: 906/459, loss: 0.02657579444348812 2023-01-24 04:04:16.349944: step: 908/459, loss: 0.00019873717974405736 2023-01-24 04:04:17.048514: step: 910/459, loss: 0.0002468722523190081 2023-01-24 04:04:17.626085: step: 912/459, loss: 0.004836377687752247 2023-01-24 04:04:18.201676: step: 914/459, loss: 0.013898981735110283 2023-01-24 04:04:18.824996: step: 916/459, loss: 0.0074372426606714725 2023-01-24 04:04:19.515081: step: 918/459, loss: 0.052401524037122726 2023-01-24 04:04:19.939304: step: 920/459, loss: 0.019766459241509438 ================================================== Loss: 0.060 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3345614125932063, 'r': 0.31551617088960826, 'f1': 0.3247598087086398}, 'combined': 0.23929670115373458, 'epoch': 32} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3505000322496816, 'r': 0.3017486641276805, 'f1': 0.3243024235861734}, 'combined': 0.20755355109515095, 'epoch': 32} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3282851996835048, 'r': 0.32267881107410906, 'f1': 0.3254578630355129}, 'combined': 0.2398110569735358, 'epoch': 32} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35470510896184304, 'r': 0.2976816718262879, 'f1': 0.3237012549933264}, 'combined': 0.20716880319572886, 'epoch': 32} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34900665501336725, 'r': 0.3165563208660143, 'f1': 0.33199041014206876}, 'combined': 0.24462451273626118, 'epoch': 32} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.35640011492308116, 'r': 0.31907223403405166, 'f1': 0.33670476979278113}, 'combined': 0.24141096702123932, 'epoch': 32} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.32380952380952377, 'r': 0.32380952380952377, 'f1': 0.32380952380952377}, 'combined': 0.21587301587301583, 'epoch': 32} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22916666666666666, 'r': 0.2391304347826087, 'f1': 0.23404255319148934}, 'combined': 0.11702127659574467, 'epoch': 32} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3, 'r': 0.10344827586206896, 'f1': 0.15384615384615385}, 'combined': 0.10256410256410256, 'epoch': 32} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 33 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:06:56.172263: step: 2/459, loss: 0.00900669302791357 2023-01-24 04:06:56.712876: step: 4/459, loss: 0.03207770735025406 2023-01-24 04:06:57.307667: step: 6/459, loss: 0.0013489507837221026 2023-01-24 04:06:57.962765: step: 8/459, loss: 0.04848755896091461 2023-01-24 04:06:58.526295: step: 10/459, loss: 0.003474109573289752 2023-01-24 04:06:59.097397: step: 12/459, loss: 0.004965201020240784 2023-01-24 04:06:59.641063: step: 14/459, loss: 0.03638499975204468 2023-01-24 04:07:00.297591: step: 16/459, loss: 0.12728042900562286 2023-01-24 04:07:00.910642: step: 18/459, loss: 0.02887093834578991 2023-01-24 04:07:01.467848: step: 20/459, loss: 0.008714600466191769 2023-01-24 04:07:02.090870: step: 22/459, loss: 0.0314282663166523 2023-01-24 04:07:02.709991: step: 24/459, loss: 0.018506567925214767 2023-01-24 04:07:03.324955: step: 26/459, loss: 0.00040830092621035874 2023-01-24 04:07:03.901145: step: 28/459, loss: 0.008640686050057411 2023-01-24 04:07:04.521150: step: 30/459, loss: 0.014892218634486198 2023-01-24 04:07:05.194568: step: 32/459, loss: 0.005321573466062546 2023-01-24 04:07:05.853980: step: 34/459, loss: 0.013614637777209282 2023-01-24 04:07:06.472411: step: 36/459, loss: 0.195688396692276 2023-01-24 04:07:07.097177: step: 38/459, loss: 0.0020287432707846165 2023-01-24 04:07:07.702909: step: 40/459, loss: 0.05934680625796318 2023-01-24 04:07:08.359525: step: 42/459, loss: 0.03445776551961899 2023-01-24 04:07:08.918736: step: 44/459, loss: 0.004755337722599506 2023-01-24 04:07:09.460705: step: 46/459, loss: 0.010888496413826942 2023-01-24 04:07:10.110797: step: 48/459, loss: 0.0766378715634346 2023-01-24 04:07:10.732063: step: 50/459, loss: 0.16895702481269836 2023-01-24 04:07:11.339640: step: 52/459, loss: 0.026317695155739784 2023-01-24 04:07:11.949268: step: 54/459, loss: 0.006673530209809542 2023-01-24 04:07:12.594008: step: 56/459, loss: 0.008618353866040707 2023-01-24 04:07:13.200002: step: 58/459, loss: 0.0029741704929620028 2023-01-24 04:07:13.812372: step: 60/459, loss: 2.9868755518691614e-05 2023-01-24 04:07:14.437541: step: 62/459, loss: 0.023752009496092796 2023-01-24 04:07:15.097648: step: 64/459, loss: 0.09007386118173599 2023-01-24 04:07:15.726437: step: 66/459, loss: 0.001606454374268651 2023-01-24 04:07:16.285904: step: 68/459, loss: 0.0028469515964388847 2023-01-24 04:07:16.918884: step: 70/459, loss: 0.015069092623889446 2023-01-24 04:07:17.527189: step: 72/459, loss: 0.00016353737737517804 2023-01-24 04:07:18.200360: step: 74/459, loss: 0.0028911232948303223 2023-01-24 04:07:18.784817: step: 76/459, loss: 0.051404330879449844 2023-01-24 04:07:19.481567: step: 78/459, loss: 0.010467803105711937 2023-01-24 04:07:20.084575: step: 80/459, loss: 0.024102061986923218 2023-01-24 04:07:20.689504: step: 82/459, loss: 0.04825268313288689 2023-01-24 04:07:21.264238: step: 84/459, loss: 0.023964103311300278 2023-01-24 04:07:21.886183: step: 86/459, loss: 0.0005635428242385387 2023-01-24 04:07:22.559047: step: 88/459, loss: 0.0002502049901522696 2023-01-24 04:07:23.200497: step: 90/459, loss: 0.00495621794834733 2023-01-24 04:07:23.835178: step: 92/459, loss: 0.00039627013029530644 2023-01-24 04:07:24.453283: step: 94/459, loss: 0.014448517002165318 2023-01-24 04:07:25.022444: step: 96/459, loss: 0.004767943173646927 2023-01-24 04:07:25.595592: step: 98/459, loss: 0.015289267525076866 2023-01-24 04:07:26.246771: step: 100/459, loss: 0.013856136240065098 2023-01-24 04:07:26.874269: step: 102/459, loss: 0.0007732813828624785 2023-01-24 04:07:27.425337: step: 104/459, loss: 0.0002568109193816781 2023-01-24 04:07:28.071993: step: 106/459, loss: 0.0004965873085893691 2023-01-24 04:07:28.776442: step: 108/459, loss: 0.0021122058387845755 2023-01-24 04:07:29.533388: step: 110/459, loss: 0.004144372418522835 2023-01-24 04:07:30.162193: step: 112/459, loss: 0.004756892565637827 2023-01-24 04:07:30.725058: step: 114/459, loss: 0.002381142694503069 2023-01-24 04:07:31.356122: step: 116/459, loss: 0.020754801109433174 2023-01-24 04:07:31.987214: step: 118/459, loss: 0.10454518347978592 2023-01-24 04:07:32.726779: step: 120/459, loss: 0.028297726064920425 2023-01-24 04:07:33.422780: step: 122/459, loss: 0.005574744660407305 2023-01-24 04:07:33.967837: step: 124/459, loss: 0.016973963007330894 2023-01-24 04:07:34.593384: step: 126/459, loss: 0.007201109081506729 2023-01-24 04:07:35.235396: step: 128/459, loss: 0.010637158527970314 2023-01-24 04:07:35.822825: step: 130/459, loss: 0.0028511176351457834 2023-01-24 04:07:36.433977: step: 132/459, loss: 0.020945753902196884 2023-01-24 04:07:37.111952: step: 134/459, loss: 0.0013976346235722303 2023-01-24 04:07:37.665552: step: 136/459, loss: 0.008502321317791939 2023-01-24 04:07:38.346408: step: 138/459, loss: 0.0010485938983038068 2023-01-24 04:07:38.956595: step: 140/459, loss: 0.08727104216814041 2023-01-24 04:07:39.542908: step: 142/459, loss: 0.39688730239868164 2023-01-24 04:07:40.147787: step: 144/459, loss: 0.03312535211443901 2023-01-24 04:07:40.735584: step: 146/459, loss: 0.00042548743658699095 2023-01-24 04:07:41.301208: step: 148/459, loss: 0.010134375654160976 2023-01-24 04:07:41.886527: step: 150/459, loss: 0.00611899746581912 2023-01-24 04:07:42.503816: step: 152/459, loss: 0.005969101097434759 2023-01-24 04:07:43.134696: step: 154/459, loss: 0.0005819158395752311 2023-01-24 04:07:43.785998: step: 156/459, loss: 0.00022858202282804996 2023-01-24 04:07:44.408248: step: 158/459, loss: 0.019775185734033585 2023-01-24 04:07:44.972255: step: 160/459, loss: 0.008141952566802502 2023-01-24 04:07:45.562081: step: 162/459, loss: 0.003346902783960104 2023-01-24 04:07:46.187814: step: 164/459, loss: 0.0025408074725419283 2023-01-24 04:07:46.812411: step: 166/459, loss: 0.000785558542702347 2023-01-24 04:07:47.408228: step: 168/459, loss: 0.00029618569533340633 2023-01-24 04:07:48.025786: step: 170/459, loss: 0.002312273485586047 2023-01-24 04:07:48.715829: step: 172/459, loss: 0.01426230650395155 2023-01-24 04:07:49.366481: step: 174/459, loss: 0.030189642682671547 2023-01-24 04:07:49.932595: step: 176/459, loss: 2.3464436531066895 2023-01-24 04:07:50.558453: step: 178/459, loss: 0.0048062424175441265 2023-01-24 04:07:51.158510: step: 180/459, loss: 0.3006010353565216 2023-01-24 04:07:51.757098: step: 182/459, loss: 0.035523004829883575 2023-01-24 04:07:52.338804: step: 184/459, loss: 0.0005746120004914701 2023-01-24 04:07:52.877294: step: 186/459, loss: 0.040806155651807785 2023-01-24 04:07:53.456845: step: 188/459, loss: 0.013805191032588482 2023-01-24 04:07:54.059707: step: 190/459, loss: 0.00824222806841135 2023-01-24 04:07:54.679548: step: 192/459, loss: 0.07149527966976166 2023-01-24 04:07:55.303776: step: 194/459, loss: 0.002386444713920355 2023-01-24 04:07:55.977635: step: 196/459, loss: 0.020124824717640877 2023-01-24 04:07:56.622980: step: 198/459, loss: 0.046991318464279175 2023-01-24 04:07:57.200516: step: 200/459, loss: 0.02499466761946678 2023-01-24 04:07:57.787571: step: 202/459, loss: 0.0028980860952287912 2023-01-24 04:07:58.413092: step: 204/459, loss: 0.02464083395898342 2023-01-24 04:07:59.057585: step: 206/459, loss: 0.014753873459994793 2023-01-24 04:07:59.705431: step: 208/459, loss: 0.019636135548353195 2023-01-24 04:08:00.385933: step: 210/459, loss: 0.022654026746749878 2023-01-24 04:08:01.062685: step: 212/459, loss: 0.0006082503823563457 2023-01-24 04:08:01.675817: step: 214/459, loss: 0.10615311563014984 2023-01-24 04:08:02.245946: step: 216/459, loss: 3.3894189982675016e-05 2023-01-24 04:08:02.869124: step: 218/459, loss: 0.003508320078253746 2023-01-24 04:08:03.471428: step: 220/459, loss: 0.023616772145032883 2023-01-24 04:08:04.162065: step: 222/459, loss: 0.042517293244600296 2023-01-24 04:08:04.837600: step: 224/459, loss: 0.022013146430253983 2023-01-24 04:08:05.483018: step: 226/459, loss: 0.01335515733808279 2023-01-24 04:08:06.103245: step: 228/459, loss: 0.015868976712226868 2023-01-24 04:08:06.711213: step: 230/459, loss: 0.0044401660561561584 2023-01-24 04:08:07.284916: step: 232/459, loss: 0.00018752989126369357 2023-01-24 04:08:07.892296: step: 234/459, loss: 0.0010080598294734955 2023-01-24 04:08:08.508153: step: 236/459, loss: 0.04101002216339111 2023-01-24 04:08:09.170153: step: 238/459, loss: 0.010745173320174217 2023-01-24 04:08:09.756660: step: 240/459, loss: 0.009074011817574501 2023-01-24 04:08:10.447815: step: 242/459, loss: 0.01851801760494709 2023-01-24 04:08:11.017933: step: 244/459, loss: 0.01092496793717146 2023-01-24 04:08:11.687028: step: 246/459, loss: 0.005880335345864296 2023-01-24 04:08:12.327685: step: 248/459, loss: 0.6737524271011353 2023-01-24 04:08:12.964452: step: 250/459, loss: 0.0026756420265883207 2023-01-24 04:08:13.612025: step: 252/459, loss: 0.011230259202420712 2023-01-24 04:08:14.310076: step: 254/459, loss: 0.012013843283057213 2023-01-24 04:08:15.007686: step: 256/459, loss: 0.00428758142516017 2023-01-24 04:08:15.623840: step: 258/459, loss: 0.011744673363864422 2023-01-24 04:08:16.258421: step: 260/459, loss: 0.05281919240951538 2023-01-24 04:08:16.890732: step: 262/459, loss: 0.0017892499454319477 2023-01-24 04:08:17.499286: step: 264/459, loss: 0.017445413395762444 2023-01-24 04:08:18.049835: step: 266/459, loss: 0.0011455632047727704 2023-01-24 04:08:18.646760: step: 268/459, loss: 0.0016493318835273385 2023-01-24 04:08:19.194252: step: 270/459, loss: 0.006891575641930103 2023-01-24 04:08:19.784138: step: 272/459, loss: 0.032224591821432114 2023-01-24 04:08:20.422993: step: 274/459, loss: 0.034093063324689865 2023-01-24 04:08:21.119276: step: 276/459, loss: 0.007405088283121586 2023-01-24 04:08:21.744301: step: 278/459, loss: 0.011779053136706352 2023-01-24 04:08:22.329582: step: 280/459, loss: 0.7241467833518982 2023-01-24 04:08:22.904010: step: 282/459, loss: 0.0012657337356358767 2023-01-24 04:08:23.492374: step: 284/459, loss: 0.039371389895677567 2023-01-24 04:08:24.083670: step: 286/459, loss: 0.06573788076639175 2023-01-24 04:08:24.740508: step: 288/459, loss: 0.00020712993864435703 2023-01-24 04:08:25.411928: step: 290/459, loss: 0.5142025351524353 2023-01-24 04:08:26.106868: step: 292/459, loss: 0.019087759777903557 2023-01-24 04:08:26.697427: step: 294/459, loss: 0.028616633266210556 2023-01-24 04:08:27.282536: step: 296/459, loss: 0.006510465405881405 2023-01-24 04:08:27.890130: step: 298/459, loss: 0.013199466280639172 2023-01-24 04:08:28.485767: step: 300/459, loss: 0.008998338133096695 2023-01-24 04:08:29.110766: step: 302/459, loss: 0.0005881294491700828 2023-01-24 04:08:29.698171: step: 304/459, loss: 0.00041816840530373156 2023-01-24 04:08:30.403056: step: 306/459, loss: 0.026210928335785866 2023-01-24 04:08:30.969262: step: 308/459, loss: 0.018932493403553963 2023-01-24 04:08:31.648580: step: 310/459, loss: 0.02629949524998665 2023-01-24 04:08:32.254862: step: 312/459, loss: 0.09341802448034286 2023-01-24 04:08:32.850455: step: 314/459, loss: 0.016586707904934883 2023-01-24 04:08:33.507801: step: 316/459, loss: 0.04332401603460312 2023-01-24 04:08:34.119452: step: 318/459, loss: 0.009985694661736488 2023-01-24 04:08:34.699904: step: 320/459, loss: 0.001807078835554421 2023-01-24 04:08:35.366732: step: 322/459, loss: 0.0004148944572079927 2023-01-24 04:08:35.980295: step: 324/459, loss: 0.06174669787287712 2023-01-24 04:08:36.579185: step: 326/459, loss: 0.07690159231424332 2023-01-24 04:08:37.241640: step: 328/459, loss: 0.0037993593141436577 2023-01-24 04:08:37.918202: step: 330/459, loss: 0.029935311526060104 2023-01-24 04:08:38.553109: step: 332/459, loss: 0.0001321854506386444 2023-01-24 04:08:39.178268: step: 334/459, loss: 0.014615637250244617 2023-01-24 04:08:39.840591: step: 336/459, loss: 0.006259177811443806 2023-01-24 04:08:40.449598: step: 338/459, loss: 0.017365271225571632 2023-01-24 04:08:41.097158: step: 340/459, loss: 0.001758615137077868 2023-01-24 04:08:41.665339: step: 342/459, loss: 0.0018796725198626518 2023-01-24 04:08:42.332065: step: 344/459, loss: 0.00858315173536539 2023-01-24 04:08:42.933132: step: 346/459, loss: 0.08885439485311508 2023-01-24 04:08:43.652371: step: 348/459, loss: 0.0004424923099577427 2023-01-24 04:08:44.257600: step: 350/459, loss: 0.003237442346289754 2023-01-24 04:08:44.858426: step: 352/459, loss: 0.0009531163377687335 2023-01-24 04:08:45.469927: step: 354/459, loss: 0.011494209058582783 2023-01-24 04:08:46.126997: step: 356/459, loss: 0.0006584760849364102 2023-01-24 04:08:46.727795: step: 358/459, loss: 0.012128937989473343 2023-01-24 04:08:47.391033: step: 360/459, loss: 0.018711885437369347 2023-01-24 04:08:47.974556: step: 362/459, loss: 0.02301083318889141 2023-01-24 04:08:48.638149: step: 364/459, loss: 0.03910105675458908 2023-01-24 04:08:49.283613: step: 366/459, loss: 0.022596631199121475 2023-01-24 04:08:49.887920: step: 368/459, loss: 0.003785855369642377 2023-01-24 04:08:50.521306: step: 370/459, loss: 0.0010505137033760548 2023-01-24 04:08:51.105383: step: 372/459, loss: 0.010706937871873379 2023-01-24 04:08:51.821145: step: 374/459, loss: 0.00818822905421257 2023-01-24 04:08:52.505755: step: 376/459, loss: 0.014704515226185322 2023-01-24 04:08:53.118405: step: 378/459, loss: 0.03434984013438225 2023-01-24 04:08:53.705481: step: 380/459, loss: 0.002268202370032668 2023-01-24 04:08:54.350256: step: 382/459, loss: 0.034281808882951736 2023-01-24 04:08:55.052265: step: 384/459, loss: 0.08662735670804977 2023-01-24 04:08:55.741011: step: 386/459, loss: 0.0006857308326289058 2023-01-24 04:08:56.372268: step: 388/459, loss: 0.01713232696056366 2023-01-24 04:08:56.976797: step: 390/459, loss: 0.014422386884689331 2023-01-24 04:08:57.601029: step: 392/459, loss: 0.007332586217671633 2023-01-24 04:08:58.272208: step: 394/459, loss: 0.009398088790476322 2023-01-24 04:08:58.881467: step: 396/459, loss: 0.015547394752502441 2023-01-24 04:08:59.479836: step: 398/459, loss: 0.001526706269942224 2023-01-24 04:09:00.136585: step: 400/459, loss: 0.0014916581567376852 2023-01-24 04:09:00.754812: step: 402/459, loss: 0.002648675348609686 2023-01-24 04:09:01.382431: step: 404/459, loss: 0.5449955463409424 2023-01-24 04:09:01.960616: step: 406/459, loss: 0.004185285419225693 2023-01-24 04:09:02.583201: step: 408/459, loss: 0.03824300691485405 2023-01-24 04:09:03.164925: step: 410/459, loss: 0.20170678198337555 2023-01-24 04:09:03.807200: step: 412/459, loss: 0.20407189428806305 2023-01-24 04:09:04.417130: step: 414/459, loss: 0.04713667184114456 2023-01-24 04:09:04.989733: step: 416/459, loss: 0.0196694228798151 2023-01-24 04:09:05.564484: step: 418/459, loss: 0.045683927834033966 2023-01-24 04:09:06.191201: step: 420/459, loss: 0.005383344832807779 2023-01-24 04:09:06.878686: step: 422/459, loss: 0.00680909538641572 2023-01-24 04:09:07.639154: step: 424/459, loss: 0.20871731638908386 2023-01-24 04:09:08.295882: step: 426/459, loss: 0.007884620688855648 2023-01-24 04:09:08.975708: step: 428/459, loss: 0.019705679267644882 2023-01-24 04:09:09.660169: step: 430/459, loss: 0.0007072322769090533 2023-01-24 04:09:10.324532: step: 432/459, loss: 0.0050585828721523285 2023-01-24 04:09:10.964119: step: 434/459, loss: 0.016251232475042343 2023-01-24 04:09:11.549358: step: 436/459, loss: 0.0009635327151045203 2023-01-24 04:09:12.146988: step: 438/459, loss: 0.0013315998949110508 2023-01-24 04:09:12.733356: step: 440/459, loss: 0.0003321488038636744 2023-01-24 04:09:13.328325: step: 442/459, loss: 0.023186830803751945 2023-01-24 04:09:13.992175: step: 444/459, loss: 0.007277747616171837 2023-01-24 04:09:14.587616: step: 446/459, loss: 0.0018746028654277325 2023-01-24 04:09:15.275295: step: 448/459, loss: 0.011354313232004642 2023-01-24 04:09:15.924110: step: 450/459, loss: 0.0016336748376488686 2023-01-24 04:09:16.476664: step: 452/459, loss: 0.0027953903190791607 2023-01-24 04:09:17.084647: step: 454/459, loss: 0.02274574153125286 2023-01-24 04:09:17.667117: step: 456/459, loss: 0.02902575023472309 2023-01-24 04:09:18.287734: step: 458/459, loss: 0.011181644164025784 2023-01-24 04:09:18.837880: step: 460/459, loss: 0.0012673051096498966 2023-01-24 04:09:19.486685: step: 462/459, loss: 0.011060663498938084 2023-01-24 04:09:20.037483: step: 464/459, loss: 0.0024475958198308945 2023-01-24 04:09:20.620143: step: 466/459, loss: 0.00308630196377635 2023-01-24 04:09:21.235863: step: 468/459, loss: 0.01967892423272133 2023-01-24 04:09:21.948024: step: 470/459, loss: 0.0003626011894084513 2023-01-24 04:09:22.571649: step: 472/459, loss: 0.0006764113786630332 2023-01-24 04:09:23.177560: step: 474/459, loss: 0.013919954188168049 2023-01-24 04:09:23.859911: step: 476/459, loss: 0.005290847271680832 2023-01-24 04:09:24.508224: step: 478/459, loss: 0.0013289519120007753 2023-01-24 04:09:25.125848: step: 480/459, loss: 0.006136826705187559 2023-01-24 04:09:25.711346: step: 482/459, loss: 0.06720244884490967 2023-01-24 04:09:26.381034: step: 484/459, loss: 0.05993543937802315 2023-01-24 04:09:26.984668: step: 486/459, loss: 0.00029989719041623175 2023-01-24 04:09:27.604439: step: 488/459, loss: 0.00034163930104114115 2023-01-24 04:09:28.226777: step: 490/459, loss: 0.002111702458932996 2023-01-24 04:09:28.864691: step: 492/459, loss: 0.03711671754717827 2023-01-24 04:09:29.516505: step: 494/459, loss: 0.014390714466571808 2023-01-24 04:09:30.082906: step: 496/459, loss: 0.0017564320005476475 2023-01-24 04:09:30.742576: step: 498/459, loss: 0.0015869061462581158 2023-01-24 04:09:31.316227: step: 500/459, loss: 0.034588322043418884 2023-01-24 04:09:31.976147: step: 502/459, loss: 0.0032344490755349398 2023-01-24 04:09:32.599361: step: 504/459, loss: 0.003086258191615343 2023-01-24 04:09:33.232010: step: 506/459, loss: 0.0019517495529726148 2023-01-24 04:09:33.811489: step: 508/459, loss: 0.0012194005539640784 2023-01-24 04:09:34.521908: step: 510/459, loss: 0.02713714726269245 2023-01-24 04:09:35.173912: step: 512/459, loss: 0.006085523869842291 2023-01-24 04:09:35.845771: step: 514/459, loss: 0.004511498846113682 2023-01-24 04:09:36.460897: step: 516/459, loss: 0.1219790056347847 2023-01-24 04:09:37.061865: step: 518/459, loss: 0.026831679046154022 2023-01-24 04:09:37.655944: step: 520/459, loss: 0.04397260770201683 2023-01-24 04:09:38.309424: step: 522/459, loss: 0.051375020295381546 2023-01-24 04:09:38.960139: step: 524/459, loss: 0.010182647965848446 2023-01-24 04:09:39.597696: step: 526/459, loss: 0.009521910920739174 2023-01-24 04:09:40.131610: step: 528/459, loss: 3.132834535790607e-05 2023-01-24 04:09:40.744253: step: 530/459, loss: 0.000297321705147624 2023-01-24 04:09:41.300288: step: 532/459, loss: 0.02409237250685692 2023-01-24 04:09:41.795715: step: 534/459, loss: 0.0002756978792604059 2023-01-24 04:09:42.403443: step: 536/459, loss: 0.004404919687658548 2023-01-24 04:09:43.083249: step: 538/459, loss: 0.0032705115154385567 2023-01-24 04:09:43.696945: step: 540/459, loss: 0.0008311884594149888 2023-01-24 04:09:44.293127: step: 542/459, loss: 0.005442798603326082 2023-01-24 04:09:44.873542: step: 544/459, loss: 0.03349751606583595 2023-01-24 04:09:45.524862: step: 546/459, loss: 0.0033961657900363207 2023-01-24 04:09:46.171992: step: 548/459, loss: 0.004954996053129435 2023-01-24 04:09:46.774669: step: 550/459, loss: 0.05232950672507286 2023-01-24 04:09:47.396188: step: 552/459, loss: 0.008407402783632278 2023-01-24 04:09:47.981312: step: 554/459, loss: 0.0025584539398550987 2023-01-24 04:09:48.646481: step: 556/459, loss: 0.0020825460087507963 2023-01-24 04:09:49.253875: step: 558/459, loss: 0.0367790125310421 2023-01-24 04:09:49.863565: step: 560/459, loss: 0.02127833664417267 2023-01-24 04:09:50.472093: step: 562/459, loss: 0.020986659452319145 2023-01-24 04:09:51.044398: step: 564/459, loss: 0.00929377879947424 2023-01-24 04:09:51.671222: step: 566/459, loss: 0.5389828085899353 2023-01-24 04:09:52.300955: step: 568/459, loss: 0.004883480258285999 2023-01-24 04:09:52.944555: step: 570/459, loss: 0.01004112884402275 2023-01-24 04:09:53.505608: step: 572/459, loss: 0.028491320088505745 2023-01-24 04:09:54.188787: step: 574/459, loss: 0.4233422875404358 2023-01-24 04:09:54.855154: step: 576/459, loss: 0.00026788271497935057 2023-01-24 04:09:55.501973: step: 578/459, loss: 0.0164122823625803 2023-01-24 04:09:56.179252: step: 580/459, loss: 0.003164044115692377 2023-01-24 04:09:56.906401: step: 582/459, loss: 0.009992402046918869 2023-01-24 04:09:57.488053: step: 584/459, loss: 0.0027654070872813463 2023-01-24 04:09:58.091986: step: 586/459, loss: 0.006795929744839668 2023-01-24 04:09:58.701649: step: 588/459, loss: 0.003366776742041111 2023-01-24 04:09:59.314929: step: 590/459, loss: 0.0011891394387930632 2023-01-24 04:09:59.963456: step: 592/459, loss: 0.06595584750175476 2023-01-24 04:10:00.603284: step: 594/459, loss: 0.005678780842572451 2023-01-24 04:10:01.218402: step: 596/459, loss: 0.010519878938794136 2023-01-24 04:10:01.819983: step: 598/459, loss: 0.035808153450489044 2023-01-24 04:10:02.442987: step: 600/459, loss: 0.043352194130420685 2023-01-24 04:10:03.045252: step: 602/459, loss: 0.06359440833330154 2023-01-24 04:10:03.702891: step: 604/459, loss: 0.004071968141943216 2023-01-24 04:10:04.279974: step: 606/459, loss: 0.027857612818479538 2023-01-24 04:10:04.924895: step: 608/459, loss: 0.6035400629043579 2023-01-24 04:10:05.541712: step: 610/459, loss: 0.010390124283730984 2023-01-24 04:10:06.055788: step: 612/459, loss: 0.06442967057228088 2023-01-24 04:10:06.680024: step: 614/459, loss: 0.037445250898599625 2023-01-24 04:10:07.396576: step: 616/459, loss: 0.014508340507745743 2023-01-24 04:10:08.011331: step: 618/459, loss: 0.002492282073944807 2023-01-24 04:10:08.574334: step: 620/459, loss: 0.021191207692027092 2023-01-24 04:10:09.167823: step: 622/459, loss: 0.012004340067505836 2023-01-24 04:10:09.836560: step: 624/459, loss: 0.00020055007189512253 2023-01-24 04:10:10.403684: step: 626/459, loss: 0.0022975171450525522 2023-01-24 04:10:11.033128: step: 628/459, loss: 0.024311311542987823 2023-01-24 04:10:11.627763: step: 630/459, loss: 0.0692073404788971 2023-01-24 04:10:12.299526: step: 632/459, loss: 0.04246670380234718 2023-01-24 04:10:12.959378: step: 634/459, loss: 0.0037233910989016294 2023-01-24 04:10:13.624086: step: 636/459, loss: 0.42715558409690857 2023-01-24 04:10:14.162742: step: 638/459, loss: 0.01752326823771 2023-01-24 04:10:14.798099: step: 640/459, loss: 0.02845185063779354 2023-01-24 04:10:15.455543: step: 642/459, loss: 0.036657724529504776 2023-01-24 04:10:16.227655: step: 644/459, loss: 0.013413453474640846 2023-01-24 04:10:16.838035: step: 646/459, loss: 0.023336591199040413 2023-01-24 04:10:17.471098: step: 648/459, loss: 0.03448215126991272 2023-01-24 04:10:18.028297: step: 650/459, loss: 0.000727008213289082 2023-01-24 04:10:18.610307: step: 652/459, loss: 0.003562175901606679 2023-01-24 04:10:19.296087: step: 654/459, loss: 0.0011811585864052176 2023-01-24 04:10:19.892533: step: 656/459, loss: 0.022363219410181046 2023-01-24 04:10:20.532225: step: 658/459, loss: 0.009327179752290249 2023-01-24 04:10:21.141352: step: 660/459, loss: 0.024715043604373932 2023-01-24 04:10:21.745958: step: 662/459, loss: 0.007147836033254862 2023-01-24 04:10:22.417697: step: 664/459, loss: 0.046633731573820114 2023-01-24 04:10:23.075249: step: 666/459, loss: 0.005737719126045704 2023-01-24 04:10:23.694171: step: 668/459, loss: 0.0005011969478800893 2023-01-24 04:10:24.314891: step: 670/459, loss: 0.011912407353520393 2023-01-24 04:10:24.895022: step: 672/459, loss: 0.006340674590319395 2023-01-24 04:10:25.440592: step: 674/459, loss: 0.006371942348778248 2023-01-24 04:10:26.044609: step: 676/459, loss: 0.02448868192732334 2023-01-24 04:10:26.681130: step: 678/459, loss: 0.016052385792136192 2023-01-24 04:10:27.300254: step: 680/459, loss: 0.0629238411784172 2023-01-24 04:10:27.857218: step: 682/459, loss: 0.0890563353896141 2023-01-24 04:10:28.458959: step: 684/459, loss: 0.006578653119504452 2023-01-24 04:10:29.014615: step: 686/459, loss: 0.00042380692320875823 2023-01-24 04:10:29.600631: step: 688/459, loss: 0.014447435736656189 2023-01-24 04:10:30.224916: step: 690/459, loss: 0.017336901277303696 2023-01-24 04:10:30.808875: step: 692/459, loss: 0.026515789330005646 2023-01-24 04:10:31.487911: step: 694/459, loss: 0.024100106209516525 2023-01-24 04:10:32.137248: step: 696/459, loss: 0.02405495010316372 2023-01-24 04:10:32.748958: step: 698/459, loss: 0.004964218940585852 2023-01-24 04:10:33.328311: step: 700/459, loss: 0.018861735239624977 2023-01-24 04:10:33.953155: step: 702/459, loss: 0.001677427557297051 2023-01-24 04:10:34.519300: step: 704/459, loss: 0.12301169335842133 2023-01-24 04:10:35.154802: step: 706/459, loss: 0.00030029675690457225 2023-01-24 04:10:35.856207: step: 708/459, loss: 0.1455966979265213 2023-01-24 04:10:36.460946: step: 710/459, loss: 0.028869202360510826 2023-01-24 04:10:37.002643: step: 712/459, loss: 0.012148015201091766 2023-01-24 04:10:37.572059: step: 714/459, loss: 0.47847849130630493 2023-01-24 04:10:38.196976: step: 716/459, loss: 5.334663391113281 2023-01-24 04:10:38.866384: step: 718/459, loss: 0.009929070249199867 2023-01-24 04:10:39.471260: step: 720/459, loss: 0.029723187908530235 2023-01-24 04:10:40.080260: step: 722/459, loss: 2.969058186863549e-06 2023-01-24 04:10:40.674747: step: 724/459, loss: 0.16584184765815735 2023-01-24 04:10:41.307130: step: 726/459, loss: 0.06621190905570984 2023-01-24 04:10:41.912244: step: 728/459, loss: 0.022470921277999878 2023-01-24 04:10:42.531868: step: 730/459, loss: 0.005620911251753569 2023-01-24 04:10:43.120065: step: 732/459, loss: 0.008084583096206188 2023-01-24 04:10:43.684595: step: 734/459, loss: 0.004152902867645025 2023-01-24 04:10:44.269428: step: 736/459, loss: 0.025153599679470062 2023-01-24 04:10:44.866952: step: 738/459, loss: 0.005873425398021936 2023-01-24 04:10:45.493127: step: 740/459, loss: 0.004972093738615513 2023-01-24 04:10:46.111914: step: 742/459, loss: 0.0018803081475198269 2023-01-24 04:10:46.728334: step: 744/459, loss: 0.01827205717563629 2023-01-24 04:10:47.325471: step: 746/459, loss: 0.1217060461640358 2023-01-24 04:10:47.958841: step: 748/459, loss: 0.02127651870250702 2023-01-24 04:10:48.547798: step: 750/459, loss: 0.00012165631778771058 2023-01-24 04:10:49.164149: step: 752/459, loss: 0.012966379523277283 2023-01-24 04:10:49.807512: step: 754/459, loss: 0.0008357514161616564 2023-01-24 04:10:50.399578: step: 756/459, loss: 0.0271316971629858 2023-01-24 04:10:51.004018: step: 758/459, loss: 0.001153924036771059 2023-01-24 04:10:51.610993: step: 760/459, loss: 0.0026178353000432253 2023-01-24 04:10:52.244169: step: 762/459, loss: 0.0034306985326111317 2023-01-24 04:10:52.942562: step: 764/459, loss: 0.03802122175693512 2023-01-24 04:10:53.545730: step: 766/459, loss: 0.0060294256545603275 2023-01-24 04:10:54.219547: step: 768/459, loss: 0.1709095537662506 2023-01-24 04:10:54.826551: step: 770/459, loss: 0.025925062596797943 2023-01-24 04:10:55.438745: step: 772/459, loss: 0.006581531371921301 2023-01-24 04:10:56.010479: step: 774/459, loss: 0.020673314109444618 2023-01-24 04:10:56.614359: step: 776/459, loss: 0.0008638522704131901 2023-01-24 04:10:57.346304: step: 778/459, loss: 0.0282905213534832 2023-01-24 04:10:57.955194: step: 780/459, loss: 0.022342916578054428 2023-01-24 04:10:58.542420: step: 782/459, loss: 5.229704765952192e-05 2023-01-24 04:10:59.141053: step: 784/459, loss: 0.11020659655332565 2023-01-24 04:10:59.711392: step: 786/459, loss: 0.01470144186168909 2023-01-24 04:11:00.467037: step: 788/459, loss: 0.18128801882266998 2023-01-24 04:11:01.071241: step: 790/459, loss: 0.0015145005891099572 2023-01-24 04:11:01.682880: step: 792/459, loss: 0.003740820800885558 2023-01-24 04:11:02.378581: step: 794/459, loss: 0.008634502068161964 2023-01-24 04:11:03.026931: step: 796/459, loss: 0.016712309792637825 2023-01-24 04:11:03.600014: step: 798/459, loss: 0.004601193591952324 2023-01-24 04:11:04.225925: step: 800/459, loss: 0.003999548964202404 2023-01-24 04:11:04.837673: step: 802/459, loss: 0.0009289825102314353 2023-01-24 04:11:05.417123: step: 804/459, loss: 0.04089230298995972 2023-01-24 04:11:05.997250: step: 806/459, loss: 0.07924479246139526 2023-01-24 04:11:06.666187: step: 808/459, loss: 0.014865213073790073 2023-01-24 04:11:07.287878: step: 810/459, loss: 0.005566664971411228 2023-01-24 04:11:07.884705: step: 812/459, loss: 0.0001372306142002344 2023-01-24 04:11:08.449773: step: 814/459, loss: 0.011508110910654068 2023-01-24 04:11:09.167610: step: 816/459, loss: 0.0011181219015270472 2023-01-24 04:11:09.841211: step: 818/459, loss: 0.0012131164548918605 2023-01-24 04:11:10.508650: step: 820/459, loss: 0.015774039551615715 2023-01-24 04:11:11.131547: step: 822/459, loss: 0.0178654957562685 2023-01-24 04:11:11.743010: step: 824/459, loss: 0.00044875938328914344 2023-01-24 04:11:12.335033: step: 826/459, loss: 0.027306726202368736 2023-01-24 04:11:12.928667: step: 828/459, loss: 0.019184106960892677 2023-01-24 04:11:13.510665: step: 830/459, loss: 0.1272994577884674 2023-01-24 04:11:14.137107: step: 832/459, loss: 0.0020985237788408995 2023-01-24 04:11:14.798439: step: 834/459, loss: 0.0019267494790256023 2023-01-24 04:11:15.429368: step: 836/459, loss: 0.000700689444784075 2023-01-24 04:11:15.957227: step: 838/459, loss: 0.009986014105379581 2023-01-24 04:11:16.551730: step: 840/459, loss: 0.02355281263589859 2023-01-24 04:11:17.110890: step: 842/459, loss: 0.0018352032639086246 2023-01-24 04:11:17.708176: step: 844/459, loss: 1.0295764207839966 2023-01-24 04:11:18.361088: step: 846/459, loss: 0.02509164996445179 2023-01-24 04:11:19.050640: step: 848/459, loss: 0.005838803481310606 2023-01-24 04:11:19.692057: step: 850/459, loss: 0.060799360275268555 2023-01-24 04:11:20.325507: step: 852/459, loss: 0.07530485093593597 2023-01-24 04:11:20.932257: step: 854/459, loss: 0.009687742218375206 2023-01-24 04:11:21.551018: step: 856/459, loss: 0.008123824372887611 2023-01-24 04:11:22.145140: step: 858/459, loss: 0.01223136018961668 2023-01-24 04:11:22.818371: step: 860/459, loss: 0.01263267733156681 2023-01-24 04:11:23.413235: step: 862/459, loss: 0.04277883097529411 2023-01-24 04:11:24.039436: step: 864/459, loss: 0.003983926959335804 2023-01-24 04:11:24.611431: step: 866/459, loss: 0.002405341248959303 2023-01-24 04:11:25.230637: step: 868/459, loss: 9.256871999241412e-05 2023-01-24 04:11:25.921331: step: 870/459, loss: 0.040867652744054794 2023-01-24 04:11:26.646699: step: 872/459, loss: 0.15317873656749725 2023-01-24 04:11:27.389714: step: 874/459, loss: 0.008864167146384716 2023-01-24 04:11:28.019559: step: 876/459, loss: 0.08257473260164261 2023-01-24 04:11:28.595641: step: 878/459, loss: 0.0012070717057213187 2023-01-24 04:11:29.207712: step: 880/459, loss: 0.007772853132337332 2023-01-24 04:11:29.825572: step: 882/459, loss: 0.0076830582693219185 2023-01-24 04:11:30.483869: step: 884/459, loss: 0.0001917563349707052 2023-01-24 04:11:31.039347: step: 886/459, loss: 0.0011206672061234713 2023-01-24 04:11:31.643690: step: 888/459, loss: 0.012900722213089466 2023-01-24 04:11:32.251376: step: 890/459, loss: 0.0020196211989969015 2023-01-24 04:11:32.901204: step: 892/459, loss: 0.021762851625680923 2023-01-24 04:11:33.463797: step: 894/459, loss: 0.016188472509384155 2023-01-24 04:11:34.066889: step: 896/459, loss: 0.07381707429885864 2023-01-24 04:11:34.707468: step: 898/459, loss: 0.0034147186670452356 2023-01-24 04:11:35.321784: step: 900/459, loss: 0.013090922497212887 2023-01-24 04:11:35.933378: step: 902/459, loss: 0.01140896137803793 2023-01-24 04:11:36.563460: step: 904/459, loss: 0.031491976231336594 2023-01-24 04:11:37.258789: step: 906/459, loss: 0.005830521695315838 2023-01-24 04:11:37.858357: step: 908/459, loss: 0.4985223412513733 2023-01-24 04:11:38.403863: step: 910/459, loss: 0.0010905717499554157 2023-01-24 04:11:38.998782: step: 912/459, loss: 0.07760102301836014 2023-01-24 04:11:39.642397: step: 914/459, loss: 0.007813592441380024 2023-01-24 04:11:40.274147: step: 916/459, loss: 0.027530884370207787 2023-01-24 04:11:40.897233: step: 918/459, loss: 0.007574743591248989 2023-01-24 04:11:41.348188: step: 920/459, loss: 0.01831836812198162 ================================================== Loss: 0.053 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3375684610263859, 'r': 0.3113060190869517, 'f1': 0.3239057691190988}, 'combined': 0.2386674088245991, 'epoch': 33} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34897699821211436, 'r': 0.29726495211341014, 'f1': 0.3210519855913119}, 'combined': 0.20547327077843958, 'epoch': 33} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3290319380337873, 'r': 0.3127988632920824, 'f1': 0.32071011858935294}, 'combined': 0.23631271896057585, 'epoch': 33} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3583812771333618, 'r': 0.29973706814790263, 'f1': 0.3264463118442504}, 'combined': 0.2089256395803202, 'epoch': 33} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3495493357431564, 'r': 0.3130688547832444, 'f1': 0.330304877819359}, 'combined': 0.24338254155110664, 'epoch': 33} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.361613629739557, 'r': 0.3155062452554605, 'f1': 0.3369901335510658}, 'combined': 0.24161556745170756, 'epoch': 33} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3223039215686274, 'r': 0.31309523809523804, 'f1': 0.3176328502415458}, 'combined': 0.21175523349436387, 'epoch': 33} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.22, 'r': 0.2391304347826087, 'f1': 0.22916666666666666}, 'combined': 0.11458333333333333, 'epoch': 33} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.38461538461538464, 'r': 0.1724137931034483, 'f1': 0.23809523809523808}, 'combined': 0.15873015873015872, 'epoch': 33} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 34 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:14:18.371519: step: 2/459, loss: 0.0007860889309085906 2023-01-24 04:14:19.000707: step: 4/459, loss: 0.010249810293316841 2023-01-24 04:14:19.635773: step: 6/459, loss: 0.0009488826035521924 2023-01-24 04:14:20.281746: step: 8/459, loss: 0.004591220524162054 2023-01-24 04:14:20.919087: step: 10/459, loss: 0.008691627532243729 2023-01-24 04:14:21.532997: step: 12/459, loss: 0.015964772552251816 2023-01-24 04:14:22.067781: step: 14/459, loss: 0.001529695582576096 2023-01-24 04:14:22.680526: step: 16/459, loss: 0.0003984567883890122 2023-01-24 04:14:23.257605: step: 18/459, loss: 0.002817128086462617 2023-01-24 04:14:23.926059: step: 20/459, loss: 0.002905646339058876 2023-01-24 04:14:24.521212: step: 22/459, loss: 0.01058137696236372 2023-01-24 04:14:25.133396: step: 24/459, loss: 0.00015814868675079197 2023-01-24 04:14:25.765664: step: 26/459, loss: 0.003030400723218918 2023-01-24 04:14:26.388427: step: 28/459, loss: 0.011103030294179916 2023-01-24 04:14:27.044970: step: 30/459, loss: 0.08716919273138046 2023-01-24 04:14:27.748410: step: 32/459, loss: 0.0866660624742508 2023-01-24 04:14:28.377340: step: 34/459, loss: 0.006129427347332239 2023-01-24 04:14:29.044634: step: 36/459, loss: 0.01943746954202652 2023-01-24 04:14:29.659843: step: 38/459, loss: 0.004756723064929247 2023-01-24 04:14:30.211413: step: 40/459, loss: 0.0014846834819763899 2023-01-24 04:14:30.857329: step: 42/459, loss: 0.027703914791345596 2023-01-24 04:14:31.483312: step: 44/459, loss: 0.0015918330755084753 2023-01-24 04:14:32.111254: step: 46/459, loss: 0.007021760102361441 2023-01-24 04:14:32.809540: step: 48/459, loss: 0.00033182065817527473 2023-01-24 04:14:33.408388: step: 50/459, loss: 0.031184596940875053 2023-01-24 04:14:34.038627: step: 52/459, loss: 0.003148028627038002 2023-01-24 04:14:34.668905: step: 54/459, loss: 0.18196836113929749 2023-01-24 04:14:35.301420: step: 56/459, loss: 5.160931686987169e-05 2023-01-24 04:14:35.919518: step: 58/459, loss: 0.003668592544272542 2023-01-24 04:14:36.519114: step: 60/459, loss: 0.01284951251000166 2023-01-24 04:14:37.125960: step: 62/459, loss: 0.0006199380732141435 2023-01-24 04:14:37.745059: step: 64/459, loss: 0.004980988800525665 2023-01-24 04:14:38.318359: step: 66/459, loss: 0.004190549720078707 2023-01-24 04:14:38.910249: step: 68/459, loss: 0.0069898637011647224 2023-01-24 04:14:39.526097: step: 70/459, loss: 0.009675154462456703 2023-01-24 04:14:40.217933: step: 72/459, loss: 0.001007915590889752 2023-01-24 04:14:40.855798: step: 74/459, loss: 0.0001118038417189382 2023-01-24 04:14:41.521633: step: 76/459, loss: 0.006469197105616331 2023-01-24 04:14:42.242574: step: 78/459, loss: 0.14655280113220215 2023-01-24 04:14:42.813955: step: 80/459, loss: 0.01617272198200226 2023-01-24 04:14:43.444309: step: 82/459, loss: 0.015384165570139885 2023-01-24 04:14:44.098141: step: 84/459, loss: 0.0005781662184745073 2023-01-24 04:14:44.704362: step: 86/459, loss: 0.03586498275399208 2023-01-24 04:14:45.397929: step: 88/459, loss: 0.007554609794169664 2023-01-24 04:14:46.044392: step: 90/459, loss: 0.00018612443818710744 2023-01-24 04:14:46.670009: step: 92/459, loss: 0.010068940930068493 2023-01-24 04:14:47.270309: step: 94/459, loss: 0.01011352427303791 2023-01-24 04:14:47.835237: step: 96/459, loss: 0.008943099528551102 2023-01-24 04:14:48.510150: step: 98/459, loss: 0.0011164286406710744 2023-01-24 04:14:49.131773: step: 100/459, loss: 0.0007390590035356581 2023-01-24 04:14:49.734136: step: 102/459, loss: 0.0014026506105437875 2023-01-24 04:14:50.302774: step: 104/459, loss: 0.30786824226379395 2023-01-24 04:14:50.895958: step: 106/459, loss: 0.009366068057715893 2023-01-24 04:14:51.496781: step: 108/459, loss: 0.009402705356478691 2023-01-24 04:14:52.128369: step: 110/459, loss: 0.008307778276503086 2023-01-24 04:14:52.722036: step: 112/459, loss: 0.005159604363143444 2023-01-24 04:14:53.332111: step: 114/459, loss: 0.16655026376247406 2023-01-24 04:14:53.963535: step: 116/459, loss: 0.004807945340871811 2023-01-24 04:14:54.535596: step: 118/459, loss: 0.008367490023374557 2023-01-24 04:14:55.090099: step: 120/459, loss: 0.019932175055146217 2023-01-24 04:14:55.720492: step: 122/459, loss: 0.044587068259716034 2023-01-24 04:14:56.354386: step: 124/459, loss: 0.08597313612699509 2023-01-24 04:14:56.975921: step: 126/459, loss: 0.00013591547030955553 2023-01-24 04:14:57.631360: step: 128/459, loss: 0.0005751277785748243 2023-01-24 04:14:58.312969: step: 130/459, loss: 0.00566474162042141 2023-01-24 04:14:58.971322: step: 132/459, loss: 0.034538500010967255 2023-01-24 04:14:59.609749: step: 134/459, loss: 0.0036692782305181026 2023-01-24 04:15:00.214719: step: 136/459, loss: 0.0020183518063277006 2023-01-24 04:15:00.808895: step: 138/459, loss: 0.04128638654947281 2023-01-24 04:15:01.400136: step: 140/459, loss: 0.020092710852622986 2023-01-24 04:15:02.071317: step: 142/459, loss: 0.004341184627264738 2023-01-24 04:15:02.718614: step: 144/459, loss: 0.002116719027981162 2023-01-24 04:15:03.328884: step: 146/459, loss: 0.003569945925846696 2023-01-24 04:15:03.926415: step: 148/459, loss: 0.04203037917613983 2023-01-24 04:15:04.534806: step: 150/459, loss: 0.0009957313304767013 2023-01-24 04:15:05.146308: step: 152/459, loss: 1.9111754894256592 2023-01-24 04:15:05.788192: step: 154/459, loss: 3.9551367081003264e-05 2023-01-24 04:15:06.415085: step: 156/459, loss: 0.01863774284720421 2023-01-24 04:15:07.148532: step: 158/459, loss: 0.00038196987588889897 2023-01-24 04:15:07.719964: step: 160/459, loss: 0.0003656781336758286 2023-01-24 04:15:08.289898: step: 162/459, loss: 0.0046348245814442635 2023-01-24 04:15:08.926871: step: 164/459, loss: 0.00030156070715747774 2023-01-24 04:15:09.585316: step: 166/459, loss: 0.002725000958889723 2023-01-24 04:15:10.204629: step: 168/459, loss: 0.055491507053375244 2023-01-24 04:15:10.822594: step: 170/459, loss: 0.06383760273456573 2023-01-24 04:15:11.441510: step: 172/459, loss: 0.05717936530709267 2023-01-24 04:15:12.064047: step: 174/459, loss: 0.4241546392440796 2023-01-24 04:15:12.628013: step: 176/459, loss: 0.00022821906895842403 2023-01-24 04:15:13.210400: step: 178/459, loss: 0.0007636290974915028 2023-01-24 04:15:13.844421: step: 180/459, loss: 0.0001275296526728198 2023-01-24 04:15:14.519010: step: 182/459, loss: 0.0033572532702237368 2023-01-24 04:15:15.121762: step: 184/459, loss: 1.8730525970458984 2023-01-24 04:15:15.716304: step: 186/459, loss: 0.007948494516313076 2023-01-24 04:15:16.270988: step: 188/459, loss: 0.011561090126633644 2023-01-24 04:15:16.847461: step: 190/459, loss: 0.011298141442239285 2023-01-24 04:15:17.521196: step: 192/459, loss: 0.0043578976765275 2023-01-24 04:15:18.118709: step: 194/459, loss: 0.016756681725382805 2023-01-24 04:15:18.773371: step: 196/459, loss: 0.4040021002292633 2023-01-24 04:15:19.396529: step: 198/459, loss: 0.0011383743258193135 2023-01-24 04:15:20.037346: step: 200/459, loss: 0.00039388632285408676 2023-01-24 04:15:20.632562: step: 202/459, loss: 0.00018658058252185583 2023-01-24 04:15:21.179235: step: 204/459, loss: 0.004395599476993084 2023-01-24 04:15:21.772031: step: 206/459, loss: 0.004891971591860056 2023-01-24 04:15:22.365480: step: 208/459, loss: 0.000173415188328363 2023-01-24 04:15:22.911970: step: 210/459, loss: 0.22322514653205872 2023-01-24 04:15:23.492215: step: 212/459, loss: 0.0035785080399364233 2023-01-24 04:15:24.177408: step: 214/459, loss: 0.700796902179718 2023-01-24 04:15:24.847034: step: 216/459, loss: 0.0028848538640886545 2023-01-24 04:15:25.448884: step: 218/459, loss: 0.008953171782195568 2023-01-24 04:15:26.073866: step: 220/459, loss: 0.015774179250001907 2023-01-24 04:15:26.682783: step: 222/459, loss: 0.0019019725732505322 2023-01-24 04:15:27.272097: step: 224/459, loss: 0.00014113073120824993 2023-01-24 04:15:27.892082: step: 226/459, loss: 0.007675721310079098 2023-01-24 04:15:28.554762: step: 228/459, loss: 0.10531344264745712 2023-01-24 04:15:29.178814: step: 230/459, loss: 0.004407827742397785 2023-01-24 04:15:29.748692: step: 232/459, loss: 0.007337385322898626 2023-01-24 04:15:30.451265: step: 234/459, loss: 0.05338071286678314 2023-01-24 04:15:31.019590: step: 236/459, loss: 0.018699046224355698 2023-01-24 04:15:31.668004: step: 238/459, loss: 0.017363207414746284 2023-01-24 04:15:32.242606: step: 240/459, loss: 0.004143559839576483 2023-01-24 04:15:32.813301: step: 242/459, loss: 0.004040404688566923 2023-01-24 04:15:33.399029: step: 244/459, loss: 0.007563333958387375 2023-01-24 04:15:33.996469: step: 246/459, loss: 0.0011583252344280481 2023-01-24 04:15:34.542621: step: 248/459, loss: 0.033594146370887756 2023-01-24 04:15:35.154409: step: 250/459, loss: 0.0018894925015047193 2023-01-24 04:15:35.759994: step: 252/459, loss: 0.022937264293432236 2023-01-24 04:15:36.417448: step: 254/459, loss: 0.03281261399388313 2023-01-24 04:15:37.066147: step: 256/459, loss: 0.02025078795850277 2023-01-24 04:15:37.757456: step: 258/459, loss: 0.25969353318214417 2023-01-24 04:15:38.335745: step: 260/459, loss: 0.003697809064760804 2023-01-24 04:15:38.979693: step: 262/459, loss: 0.0005481013213284314 2023-01-24 04:15:39.587671: step: 264/459, loss: 0.01675688475370407 2023-01-24 04:15:40.376251: step: 266/459, loss: 0.16855233907699585 2023-01-24 04:15:40.947202: step: 268/459, loss: 0.003816698445007205 2023-01-24 04:15:41.556536: step: 270/459, loss: 0.0035447808913886547 2023-01-24 04:15:42.193600: step: 272/459, loss: 0.1475132405757904 2023-01-24 04:15:42.859376: step: 274/459, loss: 0.00770155992358923 2023-01-24 04:15:43.495510: step: 276/459, loss: 0.02212068811058998 2023-01-24 04:15:44.140996: step: 278/459, loss: 0.04466799646615982 2023-01-24 04:15:44.735737: step: 280/459, loss: 0.011487913317978382 2023-01-24 04:15:45.295542: step: 282/459, loss: 0.005365549586713314 2023-01-24 04:15:45.855365: step: 284/459, loss: 0.007051353342831135 2023-01-24 04:15:46.420272: step: 286/459, loss: 0.0021620073821395636 2023-01-24 04:15:47.015056: step: 288/459, loss: 0.002503901021555066 2023-01-24 04:15:47.591643: step: 290/459, loss: 0.01842760667204857 2023-01-24 04:15:48.168968: step: 292/459, loss: 0.03479037061333656 2023-01-24 04:15:48.833605: step: 294/459, loss: 0.029852626845240593 2023-01-24 04:15:49.543933: step: 296/459, loss: 0.0784061998128891 2023-01-24 04:15:50.129743: step: 298/459, loss: 0.0006266637938097119 2023-01-24 04:15:50.667046: step: 300/459, loss: 0.005447412375360727 2023-01-24 04:15:51.314479: step: 302/459, loss: 0.01369614526629448 2023-01-24 04:15:51.917361: step: 304/459, loss: 0.009603941813111305 2023-01-24 04:15:52.550021: step: 306/459, loss: 0.012480669654905796 2023-01-24 04:15:53.125700: step: 308/459, loss: 0.0009774731006473303 2023-01-24 04:15:53.660460: step: 310/459, loss: 0.16063502430915833 2023-01-24 04:15:54.236136: step: 312/459, loss: 0.00701830955222249 2023-01-24 04:15:54.880533: step: 314/459, loss: 0.001031603547744453 2023-01-24 04:15:55.526917: step: 316/459, loss: 0.02196994423866272 2023-01-24 04:15:56.150121: step: 318/459, loss: 0.0013149975566193461 2023-01-24 04:15:56.804945: step: 320/459, loss: 0.00858800858259201 2023-01-24 04:15:57.399292: step: 322/459, loss: 0.0007549761794507504 2023-01-24 04:15:58.058876: step: 324/459, loss: 0.007364786695688963 2023-01-24 04:15:58.659302: step: 326/459, loss: 0.0008814189932309091 2023-01-24 04:15:59.297163: step: 328/459, loss: 0.023569192737340927 2023-01-24 04:16:00.034161: step: 330/459, loss: 0.01432412676513195 2023-01-24 04:16:00.657815: step: 332/459, loss: 5.6051234423648566e-05 2023-01-24 04:16:01.296325: step: 334/459, loss: 0.01875581406056881 2023-01-24 04:16:01.900042: step: 336/459, loss: 0.0012227763654664159 2023-01-24 04:16:02.530142: step: 338/459, loss: 0.02281675860285759 2023-01-24 04:16:03.157607: step: 340/459, loss: 0.0038239762652665377 2023-01-24 04:16:03.792787: step: 342/459, loss: 0.005275454837828875 2023-01-24 04:16:04.390691: step: 344/459, loss: 0.00157101487275213 2023-01-24 04:16:04.991008: step: 346/459, loss: 0.0016736085526645184 2023-01-24 04:16:05.634181: step: 348/459, loss: 0.012024170719087124 2023-01-24 04:16:06.148871: step: 350/459, loss: 0.00013729136844631284 2023-01-24 04:16:06.826611: step: 352/459, loss: 0.0015833061188459396 2023-01-24 04:16:07.394241: step: 354/459, loss: 0.00024127826327458024 2023-01-24 04:16:08.054810: step: 356/459, loss: 0.027391595765948296 2023-01-24 04:16:08.712919: step: 358/459, loss: 0.07128644734621048 2023-01-24 04:16:09.279215: step: 360/459, loss: 0.024346010759472847 2023-01-24 04:16:09.880834: step: 362/459, loss: 0.004458078183233738 2023-01-24 04:16:10.485115: step: 364/459, loss: 0.011656737886369228 2023-01-24 04:16:11.094257: step: 366/459, loss: 0.0050717126578092575 2023-01-24 04:16:11.709479: step: 368/459, loss: 0.0022402203176170588 2023-01-24 04:16:12.357447: step: 370/459, loss: 0.008666587062180042 2023-01-24 04:16:12.976204: step: 372/459, loss: 0.001455556834116578 2023-01-24 04:16:13.572227: step: 374/459, loss: 0.008615369908511639 2023-01-24 04:16:14.202619: step: 376/459, loss: 0.046997275203466415 2023-01-24 04:16:14.844706: step: 378/459, loss: 0.015087798237800598 2023-01-24 04:16:15.479294: step: 380/459, loss: 0.02525317668914795 2023-01-24 04:16:16.159933: step: 382/459, loss: 0.000584287045057863 2023-01-24 04:16:16.820786: step: 384/459, loss: 0.005455180536955595 2023-01-24 04:16:17.529245: step: 386/459, loss: 0.0008051414624787867 2023-01-24 04:16:18.267210: step: 388/459, loss: 0.004125979263335466 2023-01-24 04:16:18.857343: step: 390/459, loss: 0.18705646693706512 2023-01-24 04:16:19.444025: step: 392/459, loss: 0.00045604753540828824 2023-01-24 04:16:20.016069: step: 394/459, loss: 0.000551038421690464 2023-01-24 04:16:20.627664: step: 396/459, loss: 0.05304507166147232 2023-01-24 04:16:21.260500: step: 398/459, loss: 0.0008951150812208652 2023-01-24 04:16:21.838562: step: 400/459, loss: 0.016994891688227654 2023-01-24 04:16:22.547451: step: 402/459, loss: 0.012469382025301456 2023-01-24 04:16:23.166641: step: 404/459, loss: 0.0728677287697792 2023-01-24 04:16:23.875645: step: 406/459, loss: 0.0227839145809412 2023-01-24 04:16:24.431345: step: 408/459, loss: 0.00035354745341464877 2023-01-24 04:16:25.093539: step: 410/459, loss: 0.0006694809417240322 2023-01-24 04:16:25.729860: step: 412/459, loss: 0.0023846940603107214 2023-01-24 04:16:26.506754: step: 414/459, loss: 0.049785032868385315 2023-01-24 04:16:27.140611: step: 416/459, loss: 0.0016152051975950599 2023-01-24 04:16:27.748715: step: 418/459, loss: 0.008969232439994812 2023-01-24 04:16:28.356778: step: 420/459, loss: 0.001394223072566092 2023-01-24 04:16:28.986327: step: 422/459, loss: 0.03395439311861992 2023-01-24 04:16:29.645933: step: 424/459, loss: 0.004131595604121685 2023-01-24 04:16:30.327859: step: 426/459, loss: 0.0019155220361426473 2023-01-24 04:16:30.998393: step: 428/459, loss: 0.029087910428643227 2023-01-24 04:16:31.630523: step: 430/459, loss: 0.0001928538695210591 2023-01-24 04:16:32.266307: step: 432/459, loss: 0.02739032544195652 2023-01-24 04:16:32.859744: step: 434/459, loss: 0.009421548806130886 2023-01-24 04:16:33.484993: step: 436/459, loss: 0.003548928303644061 2023-01-24 04:16:34.059669: step: 438/459, loss: 0.10642753541469574 2023-01-24 04:16:34.653272: step: 440/459, loss: 0.01252240315079689 2023-01-24 04:16:35.295465: step: 442/459, loss: 0.002441196469590068 2023-01-24 04:16:36.017680: step: 444/459, loss: 0.001618976122699678 2023-01-24 04:16:36.638016: step: 446/459, loss: 0.0026764923240989447 2023-01-24 04:16:37.279666: step: 448/459, loss: 0.0008424219558946788 2023-01-24 04:16:37.886187: step: 450/459, loss: 0.008669265545904636 2023-01-24 04:16:38.470869: step: 452/459, loss: 0.01713527925312519 2023-01-24 04:16:39.095294: step: 454/459, loss: 1.7502540003988543e-06 2023-01-24 04:16:39.795882: step: 456/459, loss: 0.0475480891764164 2023-01-24 04:16:40.467635: step: 458/459, loss: 0.004869433119893074 2023-01-24 04:16:41.054011: step: 460/459, loss: 0.04146508499979973 2023-01-24 04:16:41.722230: step: 462/459, loss: 0.18128496408462524 2023-01-24 04:16:42.292228: step: 464/459, loss: 0.0017515025101602077 2023-01-24 04:16:42.914049: step: 466/459, loss: 0.07564689964056015 2023-01-24 04:16:43.560848: step: 468/459, loss: 0.03423915058374405 2023-01-24 04:16:44.144905: step: 470/459, loss: 0.013262455351650715 2023-01-24 04:16:44.769592: step: 472/459, loss: 0.06636325269937515 2023-01-24 04:16:45.334033: step: 474/459, loss: 0.00042963659507222474 2023-01-24 04:16:46.005145: step: 476/459, loss: 0.1509520262479782 2023-01-24 04:16:46.655341: step: 478/459, loss: 6.213364395080134e-05 2023-01-24 04:16:47.263242: step: 480/459, loss: 0.05072600394487381 2023-01-24 04:16:47.934588: step: 482/459, loss: 0.13511891663074493 2023-01-24 04:16:48.547981: step: 484/459, loss: 0.0012046585325151682 2023-01-24 04:16:49.333180: step: 486/459, loss: 0.002011776203289628 2023-01-24 04:16:50.023184: step: 488/459, loss: 0.028128791600465775 2023-01-24 04:16:50.606273: step: 490/459, loss: 0.002154703252017498 2023-01-24 04:16:51.315849: step: 492/459, loss: 0.05983719229698181 2023-01-24 04:16:51.985745: step: 494/459, loss: 0.006666385568678379 2023-01-24 04:16:52.576019: step: 496/459, loss: 0.0004259603447280824 2023-01-24 04:16:53.152004: step: 498/459, loss: 0.0046797506511211395 2023-01-24 04:16:53.748015: step: 500/459, loss: 0.004177418537437916 2023-01-24 04:16:54.366904: step: 502/459, loss: 0.013097495771944523 2023-01-24 04:16:54.986957: step: 504/459, loss: 0.040512196719646454 2023-01-24 04:16:55.535956: step: 506/459, loss: 0.12709373235702515 2023-01-24 04:16:56.054822: step: 508/459, loss: 0.0006315818754956126 2023-01-24 04:16:56.723210: step: 510/459, loss: 0.0017193396342918277 2023-01-24 04:16:57.362763: step: 512/459, loss: 0.017966579645872116 2023-01-24 04:16:57.954729: step: 514/459, loss: 0.0010718659032136202 2023-01-24 04:16:58.630465: step: 516/459, loss: 0.021632200106978416 2023-01-24 04:16:59.233068: step: 518/459, loss: 0.01837809756398201 2023-01-24 04:16:59.860590: step: 520/459, loss: 0.035129718482494354 2023-01-24 04:17:00.503810: step: 522/459, loss: 0.0343184620141983 2023-01-24 04:17:01.091199: step: 524/459, loss: 0.0005368518177419901 2023-01-24 04:17:01.729074: step: 526/459, loss: 0.015627508983016014 2023-01-24 04:17:02.354834: step: 528/459, loss: 0.0009309164597652853 2023-01-24 04:17:02.944297: step: 530/459, loss: 0.001514695817604661 2023-01-24 04:17:03.582540: step: 532/459, loss: 0.01919461414217949 2023-01-24 04:17:04.215219: step: 534/459, loss: 0.0017390829743817449 2023-01-24 04:17:04.794082: step: 536/459, loss: 0.028022006154060364 2023-01-24 04:17:05.348778: step: 538/459, loss: 0.014422665350139141 2023-01-24 04:17:05.989069: step: 540/459, loss: 0.0006281867390498519 2023-01-24 04:17:06.595800: step: 542/459, loss: 2.7029365810449235e-05 2023-01-24 04:17:07.276349: step: 544/459, loss: 0.000302708096569404 2023-01-24 04:17:07.926891: step: 546/459, loss: 0.2154596596956253 2023-01-24 04:17:08.561228: step: 548/459, loss: 0.0007247959147207439 2023-01-24 04:17:09.130757: step: 550/459, loss: 0.00029065157286822796 2023-01-24 04:17:09.736675: step: 552/459, loss: 0.0020624264143407345 2023-01-24 04:17:10.390942: step: 554/459, loss: 0.16509822010993958 2023-01-24 04:17:11.012507: step: 556/459, loss: 0.01162815373390913 2023-01-24 04:17:11.640162: step: 558/459, loss: 0.03616531938314438 2023-01-24 04:17:12.297341: step: 560/459, loss: 0.0009025055333040655 2023-01-24 04:17:12.906682: step: 562/459, loss: 0.01266174204647541 2023-01-24 04:17:13.467870: step: 564/459, loss: 0.015124497003853321 2023-01-24 04:17:14.088794: step: 566/459, loss: 0.020881080999970436 2023-01-24 04:17:14.717125: step: 568/459, loss: 0.007379308342933655 2023-01-24 04:17:15.334459: step: 570/459, loss: 0.00013089452113490552 2023-01-24 04:17:15.894338: step: 572/459, loss: 0.01382952556014061 2023-01-24 04:17:16.483916: step: 574/459, loss: 0.0020426728297024965 2023-01-24 04:17:17.081956: step: 576/459, loss: 0.0052749463357031345 2023-01-24 04:17:17.719597: step: 578/459, loss: 0.03133789822459221 2023-01-24 04:17:18.391964: step: 580/459, loss: 0.005823656916618347 2023-01-24 04:17:18.993669: step: 582/459, loss: 0.000350086746038869 2023-01-24 04:17:19.631106: step: 584/459, loss: 0.017426060512661934 2023-01-24 04:17:20.222332: step: 586/459, loss: 0.004992197267711163 2023-01-24 04:17:20.843829: step: 588/459, loss: 0.02229374460875988 2023-01-24 04:17:21.527928: step: 590/459, loss: 0.023690788075327873 2023-01-24 04:17:22.130351: step: 592/459, loss: 0.002777965972200036 2023-01-24 04:17:22.695473: step: 594/459, loss: 0.00406084256246686 2023-01-24 04:17:23.276110: step: 596/459, loss: 3.586259117582813e-05 2023-01-24 04:17:23.784979: step: 598/459, loss: 0.003811314469203353 2023-01-24 04:17:24.417749: step: 600/459, loss: 0.003512974828481674 2023-01-24 04:17:25.127337: step: 602/459, loss: 0.008034116588532925 2023-01-24 04:17:25.767303: step: 604/459, loss: 0.00579334981739521 2023-01-24 04:17:26.410635: step: 606/459, loss: 0.006492722779512405 2023-01-24 04:17:27.071483: step: 608/459, loss: 0.3020217716693878 2023-01-24 04:17:27.751183: step: 610/459, loss: 0.03088923916220665 2023-01-24 04:17:28.325099: step: 612/459, loss: 0.027830081060528755 2023-01-24 04:17:28.952722: step: 614/459, loss: 0.027045242488384247 2023-01-24 04:17:29.576877: step: 616/459, loss: 0.0037044731434434652 2023-01-24 04:17:30.176598: step: 618/459, loss: 0.0030835915822535753 2023-01-24 04:17:30.803698: step: 620/459, loss: 0.023061081767082214 2023-01-24 04:17:31.413535: step: 622/459, loss: 0.0006035319529473782 2023-01-24 04:17:32.081579: step: 624/459, loss: 0.01578308641910553 2023-01-24 04:17:32.720122: step: 626/459, loss: 0.008037710562348366 2023-01-24 04:17:33.284777: step: 628/459, loss: 0.0007042164215818048 2023-01-24 04:17:33.891270: step: 630/459, loss: 0.10164278000593185 2023-01-24 04:17:34.601914: step: 632/459, loss: 0.040341880172491074 2023-01-24 04:17:35.231121: step: 634/459, loss: 0.00016988224524538964 2023-01-24 04:17:35.847138: step: 636/459, loss: 0.010114146396517754 2023-01-24 04:17:36.450549: step: 638/459, loss: 0.025499001145362854 2023-01-24 04:17:37.046207: step: 640/459, loss: 0.03333574905991554 2023-01-24 04:17:37.715549: step: 642/459, loss: 0.049424756318330765 2023-01-24 04:17:38.369257: step: 644/459, loss: 0.04000137373805046 2023-01-24 04:17:38.950432: step: 646/459, loss: 0.02183620259165764 2023-01-24 04:17:39.519206: step: 648/459, loss: 0.00010560052032815292 2023-01-24 04:17:40.113951: step: 650/459, loss: 0.0011903399135917425 2023-01-24 04:17:40.718208: step: 652/459, loss: 0.005213462747633457 2023-01-24 04:17:41.345266: step: 654/459, loss: 0.0011060340330004692 2023-01-24 04:17:41.947043: step: 656/459, loss: 0.0053291115909814835 2023-01-24 04:17:42.577082: step: 658/459, loss: 0.001985063310712576 2023-01-24 04:17:43.143199: step: 660/459, loss: 0.0213131383061409 2023-01-24 04:17:43.782169: step: 662/459, loss: 0.015782397240400314 2023-01-24 04:17:44.382473: step: 664/459, loss: 0.0028920324984937906 2023-01-24 04:17:45.006585: step: 666/459, loss: 0.0032163639552891254 2023-01-24 04:17:45.582077: step: 668/459, loss: 0.021628819406032562 2023-01-24 04:17:46.191418: step: 670/459, loss: 0.04010579362511635 2023-01-24 04:17:46.806358: step: 672/459, loss: 0.0021388798486441374 2023-01-24 04:17:47.499565: step: 674/459, loss: 0.2246340960264206 2023-01-24 04:17:48.082198: step: 676/459, loss: 0.0007796427817083895 2023-01-24 04:17:48.697522: step: 678/459, loss: 0.0002553203667048365 2023-01-24 04:17:49.295589: step: 680/459, loss: 0.018508389592170715 2023-01-24 04:17:49.897999: step: 682/459, loss: 0.0016980215441435575 2023-01-24 04:17:50.517997: step: 684/459, loss: 0.003315340494737029 2023-01-24 04:17:51.109190: step: 686/459, loss: 0.017128175124526024 2023-01-24 04:17:51.703419: step: 688/459, loss: 0.0035442591179162264 2023-01-24 04:17:52.363641: step: 690/459, loss: 0.0019418736919760704 2023-01-24 04:17:52.988065: step: 692/459, loss: 0.006475924979895353 2023-01-24 04:17:53.624182: step: 694/459, loss: 0.024274472147226334 2023-01-24 04:17:54.247612: step: 696/459, loss: 0.015240455977618694 2023-01-24 04:17:54.779342: step: 698/459, loss: 0.0025623177643865347 2023-01-24 04:17:55.372936: step: 700/459, loss: 0.031662989407777786 2023-01-24 04:17:56.058440: step: 702/459, loss: 0.06850357353687286 2023-01-24 04:17:56.730917: step: 704/459, loss: 0.10227184742689133 2023-01-24 04:17:57.324276: step: 706/459, loss: 0.0005713459104299545 2023-01-24 04:17:57.964917: step: 708/459, loss: 0.03590019419789314 2023-01-24 04:17:58.570847: step: 710/459, loss: 0.008789176121354103 2023-01-24 04:17:59.208302: step: 712/459, loss: 0.0034968522377312183 2023-01-24 04:17:59.838317: step: 714/459, loss: 0.007357235066592693 2023-01-24 04:18:00.443844: step: 716/459, loss: 0.06302785128355026 2023-01-24 04:18:01.034018: step: 718/459, loss: 0.0404958501458168 2023-01-24 04:18:01.654531: step: 720/459, loss: 0.03087497502565384 2023-01-24 04:18:02.265763: step: 722/459, loss: 0.017947660759091377 2023-01-24 04:18:02.905445: step: 724/459, loss: 0.0025375166442245245 2023-01-24 04:18:03.548883: step: 726/459, loss: 0.035579659044742584 2023-01-24 04:18:04.090152: step: 728/459, loss: 0.004083070904016495 2023-01-24 04:18:04.714495: step: 730/459, loss: 0.010836265981197357 2023-01-24 04:18:05.297554: step: 732/459, loss: 0.1626056432723999 2023-01-24 04:18:06.031336: step: 734/459, loss: 0.019213667139410973 2023-01-24 04:18:06.630354: step: 736/459, loss: 0.08662974834442139 2023-01-24 04:18:07.267583: step: 738/459, loss: 0.11571557819843292 2023-01-24 04:18:07.856745: step: 740/459, loss: 0.007360607851296663 2023-01-24 04:18:08.475454: step: 742/459, loss: 0.0018116887658834457 2023-01-24 04:18:09.099105: step: 744/459, loss: 0.02357209287583828 2023-01-24 04:18:09.711286: step: 746/459, loss: 7.207103772088885e-05 2023-01-24 04:18:10.337135: step: 748/459, loss: 0.0076721953228116035 2023-01-24 04:18:10.954041: step: 750/459, loss: 0.20146015286445618 2023-01-24 04:18:11.558891: step: 752/459, loss: 0.008595945313572884 2023-01-24 04:18:12.185340: step: 754/459, loss: 0.1585015058517456 2023-01-24 04:18:12.868394: step: 756/459, loss: 0.08982807397842407 2023-01-24 04:18:13.485105: step: 758/459, loss: 0.020125489681959152 2023-01-24 04:18:14.061341: step: 760/459, loss: 0.02603038400411606 2023-01-24 04:18:14.759298: step: 762/459, loss: 0.0287952721118927 2023-01-24 04:18:15.311871: step: 764/459, loss: 0.018568741157650948 2023-01-24 04:18:15.996221: step: 766/459, loss: 0.0014099938562139869 2023-01-24 04:18:16.614634: step: 768/459, loss: 0.0008694313000887632 2023-01-24 04:18:17.188770: step: 770/459, loss: 0.05171336233615875 2023-01-24 04:18:17.782790: step: 772/459, loss: 0.08012010902166367 2023-01-24 04:18:18.357524: step: 774/459, loss: 0.0806000828742981 2023-01-24 04:18:19.038306: step: 776/459, loss: 0.008333377540111542 2023-01-24 04:18:19.751971: step: 778/459, loss: 0.005798701196908951 2023-01-24 04:18:20.313071: step: 780/459, loss: 0.024948790669441223 2023-01-24 04:18:20.970009: step: 782/459, loss: 0.07529143244028091 2023-01-24 04:18:21.626256: step: 784/459, loss: 0.004068408161401749 2023-01-24 04:18:22.285857: step: 786/459, loss: 0.009293756447732449 2023-01-24 04:18:22.925222: step: 788/459, loss: 0.01859157346189022 2023-01-24 04:18:23.561705: step: 790/459, loss: 0.0028775108512490988 2023-01-24 04:18:24.198843: step: 792/459, loss: 0.0034278107341378927 2023-01-24 04:18:24.820319: step: 794/459, loss: 0.02108335867524147 2023-01-24 04:18:25.410509: step: 796/459, loss: 0.08835954964160919 2023-01-24 04:18:26.026610: step: 798/459, loss: 0.003705644980072975 2023-01-24 04:18:26.677537: step: 800/459, loss: 0.0014173726085573435 2023-01-24 04:18:27.322643: step: 802/459, loss: 0.008781119249761105 2023-01-24 04:18:27.918518: step: 804/459, loss: 0.010109219700098038 2023-01-24 04:18:28.484982: step: 806/459, loss: 0.0009421214344911277 2023-01-24 04:18:29.092685: step: 808/459, loss: 0.011189961805939674 2023-01-24 04:18:29.655835: step: 810/459, loss: 0.0015084936749190092 2023-01-24 04:18:30.235425: step: 812/459, loss: 0.003728449810296297 2023-01-24 04:18:30.822999: step: 814/459, loss: 0.0020721997134387493 2023-01-24 04:18:31.432507: step: 816/459, loss: 0.004565676674246788 2023-01-24 04:18:32.015196: step: 818/459, loss: 0.007553483359515667 2023-01-24 04:18:32.600710: step: 820/459, loss: 0.0038829550612717867 2023-01-24 04:18:33.207214: step: 822/459, loss: 0.0009696568013168871 2023-01-24 04:18:33.800832: step: 824/459, loss: 0.029978731647133827 2023-01-24 04:18:34.408190: step: 826/459, loss: 0.07710614055395126 2023-01-24 04:18:34.994104: step: 828/459, loss: 0.014433737844228745 2023-01-24 04:18:35.657874: step: 830/459, loss: 0.00247353408485651 2023-01-24 04:18:36.306984: step: 832/459, loss: 0.02669692598283291 2023-01-24 04:18:36.960947: step: 834/459, loss: 0.061048723757267 2023-01-24 04:18:37.589925: step: 836/459, loss: 0.017433613538742065 2023-01-24 04:18:38.196269: step: 838/459, loss: 0.030245190486311913 2023-01-24 04:18:38.813967: step: 840/459, loss: 0.004377556964755058 2023-01-24 04:18:39.387115: step: 842/459, loss: 0.011431757360696793 2023-01-24 04:18:39.927567: step: 844/459, loss: 0.007997894659638405 2023-01-24 04:18:40.538660: step: 846/459, loss: 0.006004268303513527 2023-01-24 04:18:41.175561: step: 848/459, loss: 3.788273534155451e-05 2023-01-24 04:18:41.934749: step: 850/459, loss: 0.6789054274559021 2023-01-24 04:18:42.570866: step: 852/459, loss: 0.022716432809829712 2023-01-24 04:18:43.233857: step: 854/459, loss: 0.08831573277711868 2023-01-24 04:18:43.802292: step: 856/459, loss: 0.008294596336781979 2023-01-24 04:18:44.366401: step: 858/459, loss: 0.007837153039872646 2023-01-24 04:18:44.982047: step: 860/459, loss: 0.014799749478697777 2023-01-24 04:18:45.583407: step: 862/459, loss: 0.00016210542526096106 2023-01-24 04:18:46.197626: step: 864/459, loss: 0.0020414392929524183 2023-01-24 04:18:46.891614: step: 866/459, loss: 0.27313265204429626 2023-01-24 04:18:47.502958: step: 868/459, loss: 0.00026351361884735525 2023-01-24 04:18:48.167554: step: 870/459, loss: 0.0036526890471577644 2023-01-24 04:18:48.787021: step: 872/459, loss: 0.03348822519183159 2023-01-24 04:18:49.443822: step: 874/459, loss: 0.008737664669752121 2023-01-24 04:18:50.039095: step: 876/459, loss: 0.008489130064845085 2023-01-24 04:18:50.619871: step: 878/459, loss: 0.04339670017361641 2023-01-24 04:18:51.262549: step: 880/459, loss: 0.009717565961182117 2023-01-24 04:18:51.882029: step: 882/459, loss: 0.0014181024162098765 2023-01-24 04:18:52.478561: step: 884/459, loss: 0.02604641765356064 2023-01-24 04:18:53.223802: step: 886/459, loss: 0.0011944929137825966 2023-01-24 04:18:53.821152: step: 888/459, loss: 0.02115602232515812 2023-01-24 04:18:54.446526: step: 890/459, loss: 0.014743169769644737 2023-01-24 04:18:55.143910: step: 892/459, loss: 0.005062313750386238 2023-01-24 04:18:55.774275: step: 894/459, loss: 0.05136725679039955 2023-01-24 04:18:56.396287: step: 896/459, loss: 0.04566463455557823 2023-01-24 04:18:57.008125: step: 898/459, loss: 0.0704917162656784 2023-01-24 04:18:57.609624: step: 900/459, loss: 0.012433181516826153 2023-01-24 04:18:58.236382: step: 902/459, loss: 0.00010270643542753533 2023-01-24 04:18:58.850977: step: 904/459, loss: 0.00543243670836091 2023-01-24 04:18:59.485720: step: 906/459, loss: 0.012878728099167347 2023-01-24 04:19:00.080767: step: 908/459, loss: 0.004224705975502729 2023-01-24 04:19:00.666644: step: 910/459, loss: 0.05888562276959419 2023-01-24 04:19:01.254079: step: 912/459, loss: 0.014190301299095154 2023-01-24 04:19:01.848768: step: 914/459, loss: 0.009560298174619675 2023-01-24 04:19:02.506845: step: 916/459, loss: 0.00047281343722715974 2023-01-24 04:19:03.142661: step: 918/459, loss: 31.161100387573242 2023-01-24 04:19:03.586541: step: 920/459, loss: 0.0002821184752974659 ================================================== Loss: 0.105 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34013416028175975, 'r': 0.3072179512222346, 'f1': 0.32283920297929736}, 'combined': 0.2378815179847454, 'epoch': 34} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35746651327994183, 'r': 0.3031965971728961, 'f1': 0.3281025645747031}, 'combined': 0.20998564132780995, 'epoch': 34} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3274974645030426, 'r': 0.30636859582542697, 'f1': 0.31658088235294124}, 'combined': 0.23327012383900933, 'epoch': 34} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3565801129982209, 'r': 0.30017562239668416, 'f1': 0.3259557597594793}, 'combined': 0.20861168624606674, 'epoch': 34} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35511068899929893, 'r': 0.3086161206103964, 'f1': 0.33023491484604856}, 'combined': 0.24333098988656207, 'epoch': 34} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.35604337103220574, 'r': 0.3190771193949822, 'f1': 0.3365482008604135}, 'combined': 0.2412987100508625, 'epoch': 34} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2883771929824561, 'r': 0.31309523809523804, 'f1': 0.30022831050228305}, 'combined': 0.20015220700152203, 'epoch': 34} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2403846153846154, 'r': 0.2717391304347826, 'f1': 0.25510204081632654}, 'combined': 0.12755102040816327, 'epoch': 34} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2727272727272727, 'r': 0.10344827586206896, 'f1': 0.15}, 'combined': 0.09999999999999999, 'epoch': 34} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 35 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:21:39.806582: step: 2/459, loss: 0.00025308204931207 2023-01-24 04:21:40.407675: step: 4/459, loss: 0.044493623077869415 2023-01-24 04:21:41.045501: step: 6/459, loss: 0.00033613733830861747 2023-01-24 04:21:41.550232: step: 8/459, loss: 0.0013123495737090707 2023-01-24 04:21:42.123229: step: 10/459, loss: 0.002234055893495679 2023-01-24 04:21:42.758446: step: 12/459, loss: 0.02359500154852867 2023-01-24 04:21:43.344279: step: 14/459, loss: 0.003449276089668274 2023-01-24 04:21:43.927910: step: 16/459, loss: 0.005354395601898432 2023-01-24 04:21:44.481269: step: 18/459, loss: 4.772974534716923e-06 2023-01-24 04:21:45.075688: step: 20/459, loss: 0.001359670772217214 2023-01-24 04:21:45.692940: step: 22/459, loss: 0.005185521673411131 2023-01-24 04:21:46.312496: step: 24/459, loss: 0.0009291199967265129 2023-01-24 04:21:46.864787: step: 26/459, loss: 0.04919461905956268 2023-01-24 04:21:47.463605: step: 28/459, loss: 0.004470402374863625 2023-01-24 04:21:48.046864: step: 30/459, loss: 0.0026480769738554955 2023-01-24 04:21:48.621110: step: 32/459, loss: 0.01564500667154789 2023-01-24 04:21:49.280008: step: 34/459, loss: 0.017132727429270744 2023-01-24 04:21:49.947063: step: 36/459, loss: 0.003394892206415534 2023-01-24 04:21:50.569402: step: 38/459, loss: 0.001436628052033484 2023-01-24 04:21:51.127956: step: 40/459, loss: 0.0016174473566934466 2023-01-24 04:21:51.799285: step: 42/459, loss: 0.011074287816882133 2023-01-24 04:21:52.367379: step: 44/459, loss: 0.0030588055960834026 2023-01-24 04:21:52.908050: step: 46/459, loss: 0.00576825113967061 2023-01-24 04:21:53.504154: step: 48/459, loss: 0.002338344231247902 2023-01-24 04:21:54.054812: step: 50/459, loss: 0.06409210711717606 2023-01-24 04:21:54.594921: step: 52/459, loss: 0.004857231862843037 2023-01-24 04:21:55.200987: step: 54/459, loss: 0.0026065611746162176 2023-01-24 04:21:55.798171: step: 56/459, loss: 0.010542052797973156 2023-01-24 04:21:56.473755: step: 58/459, loss: 0.023362692445516586 2023-01-24 04:21:57.085077: step: 60/459, loss: 0.008654361590743065 2023-01-24 04:21:57.675017: step: 62/459, loss: 0.0023208679631352425 2023-01-24 04:21:58.276843: step: 64/459, loss: 0.01783641241490841 2023-01-24 04:21:59.043970: step: 66/459, loss: 0.0016561944503337145 2023-01-24 04:21:59.601970: step: 68/459, loss: 0.03231317922472954 2023-01-24 04:22:00.293087: step: 70/459, loss: 0.001251981477253139 2023-01-24 04:22:00.968527: step: 72/459, loss: 0.0003285213024355471 2023-01-24 04:22:01.575811: step: 74/459, loss: 0.07137850672006607 2023-01-24 04:22:02.291622: step: 76/459, loss: 0.010407398454844952 2023-01-24 04:22:02.946372: step: 78/459, loss: 0.04132827743887901 2023-01-24 04:22:03.558023: step: 80/459, loss: 0.017366986721754074 2023-01-24 04:22:04.188463: step: 82/459, loss: 0.006312056910246611 2023-01-24 04:22:04.776411: step: 84/459, loss: 0.017900880426168442 2023-01-24 04:22:05.433351: step: 86/459, loss: 0.07399985939264297 2023-01-24 04:22:06.050299: step: 88/459, loss: 0.034830257296562195 2023-01-24 04:22:06.769459: step: 90/459, loss: 0.007110928650945425 2023-01-24 04:22:07.372067: step: 92/459, loss: 0.03926921635866165 2023-01-24 04:22:07.991508: step: 94/459, loss: 0.00552390469238162 2023-01-24 04:22:08.624764: step: 96/459, loss: 0.014599250629544258 2023-01-24 04:22:09.199483: step: 98/459, loss: 0.010613271035254002 2023-01-24 04:22:09.789757: step: 100/459, loss: 0.0005143986199982464 2023-01-24 04:22:10.511234: step: 102/459, loss: 0.002621954772621393 2023-01-24 04:22:11.087918: step: 104/459, loss: 0.007485226262360811 2023-01-24 04:22:11.705565: step: 106/459, loss: 0.027486765757203102 2023-01-24 04:22:12.265313: step: 108/459, loss: 0.00025257401284761727 2023-01-24 04:22:12.993520: step: 110/459, loss: 0.0008993315859697759 2023-01-24 04:22:13.605850: step: 112/459, loss: 0.00034234343911521137 2023-01-24 04:22:14.247382: step: 114/459, loss: 0.012018491514027119 2023-01-24 04:22:14.788408: step: 116/459, loss: 0.0001157944425358437 2023-01-24 04:22:15.371416: step: 118/459, loss: 0.037362419068813324 2023-01-24 04:22:16.031222: step: 120/459, loss: 0.17577512562274933 2023-01-24 04:22:16.647144: step: 122/459, loss: 0.010567279532551765 2023-01-24 04:22:17.228455: step: 124/459, loss: 5.262974082143046e-05 2023-01-24 04:22:17.797305: step: 126/459, loss: 0.0007626119768247008 2023-01-24 04:22:18.395696: step: 128/459, loss: 0.00010865677904803306 2023-01-24 04:22:18.954891: step: 130/459, loss: 0.0003811316273640841 2023-01-24 04:22:19.591435: step: 132/459, loss: 0.005093438550829887 2023-01-24 04:22:20.189812: step: 134/459, loss: 0.041510939598083496 2023-01-24 04:22:20.814586: step: 136/459, loss: 0.002894484205171466 2023-01-24 04:22:21.436392: step: 138/459, loss: 0.050213996320962906 2023-01-24 04:22:22.080465: step: 140/459, loss: 0.044753313064575195 2023-01-24 04:22:22.729833: step: 142/459, loss: 0.05436250567436218 2023-01-24 04:22:23.358351: step: 144/459, loss: 0.0065245842561125755 2023-01-24 04:22:23.961824: step: 146/459, loss: 0.000999734504148364 2023-01-24 04:22:24.622995: step: 148/459, loss: 0.04912003502249718 2023-01-24 04:22:25.274565: step: 150/459, loss: 0.026891672983765602 2023-01-24 04:22:25.895606: step: 152/459, loss: 0.0006054192781448364 2023-01-24 04:22:26.516591: step: 154/459, loss: 0.007133795879781246 2023-01-24 04:22:27.110282: step: 156/459, loss: 0.0032862324733287096 2023-01-24 04:22:27.725987: step: 158/459, loss: 0.0006351360934786499 2023-01-24 04:22:28.316733: step: 160/459, loss: 0.002353057963773608 2023-01-24 04:22:28.957827: step: 162/459, loss: 0.19310495257377625 2023-01-24 04:22:29.606057: step: 164/459, loss: 0.01280034240335226 2023-01-24 04:22:30.177211: step: 166/459, loss: 0.009632880799472332 2023-01-24 04:22:30.818163: step: 168/459, loss: 0.0071096536703407764 2023-01-24 04:22:31.396647: step: 170/459, loss: 0.014668348245322704 2023-01-24 04:22:32.016670: step: 172/459, loss: 0.008236506953835487 2023-01-24 04:22:32.542381: step: 174/459, loss: 0.010233258828520775 2023-01-24 04:22:33.152421: step: 176/459, loss: 0.002676465315744281 2023-01-24 04:22:33.781758: step: 178/459, loss: 0.005501189734786749 2023-01-24 04:22:34.444948: step: 180/459, loss: 0.007558638229966164 2023-01-24 04:22:35.098500: step: 182/459, loss: 0.1724318563938141 2023-01-24 04:22:35.819699: step: 184/459, loss: 0.0068884133361279964 2023-01-24 04:22:36.491975: step: 186/459, loss: 0.0022080079652369022 2023-01-24 04:22:37.115878: step: 188/459, loss: 0.002082538791000843 2023-01-24 04:22:37.750527: step: 190/459, loss: 0.028827359899878502 2023-01-24 04:22:38.291569: step: 192/459, loss: 0.004779159091413021 2023-01-24 04:22:38.915867: step: 194/459, loss: 0.014133209362626076 2023-01-24 04:22:39.468054: step: 196/459, loss: 0.0017841943772509694 2023-01-24 04:22:40.079739: step: 198/459, loss: 0.05967428535223007 2023-01-24 04:22:40.672597: step: 200/459, loss: 0.00044419881305657327 2023-01-24 04:22:41.314432: step: 202/459, loss: 0.0001883496152004227 2023-01-24 04:22:41.957670: step: 204/459, loss: 0.1405482292175293 2023-01-24 04:22:42.525042: step: 206/459, loss: 0.016752174124121666 2023-01-24 04:22:43.185432: step: 208/459, loss: 0.007668415550142527 2023-01-24 04:22:43.752741: step: 210/459, loss: 0.005182027351111174 2023-01-24 04:22:44.374644: step: 212/459, loss: 0.011354922316968441 2023-01-24 04:22:44.925929: step: 214/459, loss: 0.000855928985401988 2023-01-24 04:22:45.499746: step: 216/459, loss: 0.04961840435862541 2023-01-24 04:22:46.139264: step: 218/459, loss: 0.013072722591459751 2023-01-24 04:22:46.756565: step: 220/459, loss: 0.01118654478341341 2023-01-24 04:22:47.359821: step: 222/459, loss: 0.0035923991817981005 2023-01-24 04:22:47.963172: step: 224/459, loss: 0.0005054284702055156 2023-01-24 04:22:48.601538: step: 226/459, loss: 0.19428902864456177 2023-01-24 04:22:49.167803: step: 228/459, loss: 0.000533199985511601 2023-01-24 04:22:49.817982: step: 230/459, loss: 0.0004933419404551387 2023-01-24 04:22:50.439760: step: 232/459, loss: 0.003911236766725779 2023-01-24 04:22:51.058449: step: 234/459, loss: 0.005634204018861055 2023-01-24 04:22:51.732192: step: 236/459, loss: 0.004364881198853254 2023-01-24 04:22:52.301621: step: 238/459, loss: 0.0011421628296375275 2023-01-24 04:22:52.869352: step: 240/459, loss: 0.006687885615974665 2023-01-24 04:22:53.552995: step: 242/459, loss: 0.01318820845335722 2023-01-24 04:22:54.171553: step: 244/459, loss: 0.0026252903044223785 2023-01-24 04:22:54.775843: step: 246/459, loss: 0.014646554365754128 2023-01-24 04:22:55.390040: step: 248/459, loss: 0.012338598258793354 2023-01-24 04:22:56.000442: step: 250/459, loss: 0.006288991309702396 2023-01-24 04:22:56.556815: step: 252/459, loss: 0.03231591358780861 2023-01-24 04:22:57.154480: step: 254/459, loss: 0.004561786539852619 2023-01-24 04:22:57.731709: step: 256/459, loss: 0.02214815653860569 2023-01-24 04:22:58.332131: step: 258/459, loss: 0.0012291172752156854 2023-01-24 04:22:58.924884: step: 260/459, loss: 2.0922317504882812 2023-01-24 04:22:59.499301: step: 262/459, loss: 0.002893906319513917 2023-01-24 04:23:00.031437: step: 264/459, loss: 0.00010280057904310524 2023-01-24 04:23:00.539472: step: 266/459, loss: 0.004266326315701008 2023-01-24 04:23:01.131414: step: 268/459, loss: 0.5027912855148315 2023-01-24 04:23:01.740869: step: 270/459, loss: 0.1367712765932083 2023-01-24 04:23:02.368187: step: 272/459, loss: 0.01846173033118248 2023-01-24 04:23:02.977652: step: 274/459, loss: 0.007023866754025221 2023-01-24 04:23:03.536125: step: 276/459, loss: 0.0002193251420976594 2023-01-24 04:23:04.186831: step: 278/459, loss: 0.02142767421901226 2023-01-24 04:23:04.764182: step: 280/459, loss: 0.0058035412803292274 2023-01-24 04:23:05.310317: step: 282/459, loss: 0.03739609941840172 2023-01-24 04:23:05.986535: step: 284/459, loss: 0.04558763653039932 2023-01-24 04:23:06.654458: step: 286/459, loss: 0.00013968907296657562 2023-01-24 04:23:07.287707: step: 288/459, loss: 0.03569885343313217 2023-01-24 04:23:07.907038: step: 290/459, loss: 8.789401363173965e-06 2023-01-24 04:23:08.546662: step: 292/459, loss: 0.019907992333173752 2023-01-24 04:23:09.169341: step: 294/459, loss: 0.008512802422046661 2023-01-24 04:23:09.855248: step: 296/459, loss: 0.0026580707635730505 2023-01-24 04:23:10.486595: step: 298/459, loss: 0.046905551105737686 2023-01-24 04:23:11.095795: step: 300/459, loss: 0.00040703476406633854 2023-01-24 04:23:11.673971: step: 302/459, loss: 0.005366281606256962 2023-01-24 04:23:12.267026: step: 304/459, loss: 0.008830832317471504 2023-01-24 04:23:12.873740: step: 306/459, loss: 0.0015437598340213299 2023-01-24 04:23:13.502991: step: 308/459, loss: 0.00010398068116046488 2023-01-24 04:23:14.179038: step: 310/459, loss: 0.018116073682904243 2023-01-24 04:23:14.814035: step: 312/459, loss: 0.03331749886274338 2023-01-24 04:23:15.402398: step: 314/459, loss: 0.08716511726379395 2023-01-24 04:23:16.034410: step: 316/459, loss: 0.0011188503121957183 2023-01-24 04:23:16.677188: step: 318/459, loss: 0.008484085090458393 2023-01-24 04:23:17.235119: step: 320/459, loss: 0.0015959369484335184 2023-01-24 04:23:17.910619: step: 322/459, loss: 0.00929470919072628 2023-01-24 04:23:18.536110: step: 324/459, loss: 0.041975077241659164 2023-01-24 04:23:19.137730: step: 326/459, loss: 0.0039605614729225636 2023-01-24 04:23:19.734954: step: 328/459, loss: 2.4164028218365274e-06 2023-01-24 04:23:20.327454: step: 330/459, loss: 0.005172932520508766 2023-01-24 04:23:20.942052: step: 332/459, loss: 0.0001340008748229593 2023-01-24 04:23:21.578262: step: 334/459, loss: 0.015897560864686966 2023-01-24 04:23:22.268569: step: 336/459, loss: 0.0006547840894199908 2023-01-24 04:23:22.885050: step: 338/459, loss: 1.475692629814148 2023-01-24 04:23:23.563853: step: 340/459, loss: 0.035278841853141785 2023-01-24 04:23:24.169378: step: 342/459, loss: 0.001897878828458488 2023-01-24 04:23:24.743748: step: 344/459, loss: 0.0008263358613476157 2023-01-24 04:23:25.335595: step: 346/459, loss: 0.00043235780321992934 2023-01-24 04:23:25.962623: step: 348/459, loss: 0.004193694796413183 2023-01-24 04:23:26.635238: step: 350/459, loss: 0.00465042470023036 2023-01-24 04:23:27.240584: step: 352/459, loss: 0.007630867417901754 2023-01-24 04:23:27.816501: step: 354/459, loss: 0.10285904258489609 2023-01-24 04:23:28.531968: step: 356/459, loss: 0.0002140537981176749 2023-01-24 04:23:29.186175: step: 358/459, loss: 0.00035353749990463257 2023-01-24 04:23:29.769112: step: 360/459, loss: 0.011626995168626308 2023-01-24 04:23:30.368482: step: 362/459, loss: 0.00583620835095644 2023-01-24 04:23:30.970785: step: 364/459, loss: 0.013120722025632858 2023-01-24 04:23:31.608291: step: 366/459, loss: 0.000548016803804785 2023-01-24 04:23:32.258955: step: 368/459, loss: 0.019501665607094765 2023-01-24 04:23:32.874504: step: 370/459, loss: 0.0014858634676784277 2023-01-24 04:23:33.494807: step: 372/459, loss: 0.12163041532039642 2023-01-24 04:23:34.156747: step: 374/459, loss: 0.004442072473466396 2023-01-24 04:23:34.757399: step: 376/459, loss: 0.0024248934350907803 2023-01-24 04:23:35.411624: step: 378/459, loss: 0.041603196412324905 2023-01-24 04:23:36.067690: step: 380/459, loss: 0.036823805421590805 2023-01-24 04:23:36.616118: step: 382/459, loss: 0.005131484940648079 2023-01-24 04:23:37.237913: step: 384/459, loss: 0.004287485033273697 2023-01-24 04:23:37.887462: step: 386/459, loss: 0.03633405268192291 2023-01-24 04:23:38.532550: step: 388/459, loss: 0.007234261371195316 2023-01-24 04:23:39.121479: step: 390/459, loss: 0.0062802438624203205 2023-01-24 04:23:39.765889: step: 392/459, loss: 0.0031141105573624372 2023-01-24 04:23:40.458484: step: 394/459, loss: 0.018524277955293655 2023-01-24 04:23:41.169244: step: 396/459, loss: 0.018405387178063393 2023-01-24 04:23:41.814980: step: 398/459, loss: 0.00274671777151525 2023-01-24 04:23:42.456140: step: 400/459, loss: 0.008136649616062641 2023-01-24 04:23:43.053086: step: 402/459, loss: 0.005749825853854418 2023-01-24 04:23:43.666208: step: 404/459, loss: 0.03479289636015892 2023-01-24 04:23:44.302184: step: 406/459, loss: 0.004475208465009928 2023-01-24 04:23:44.986720: step: 408/459, loss: 0.1161685660481453 2023-01-24 04:23:45.603643: step: 410/459, loss: 0.0002308586990693584 2023-01-24 04:23:46.240592: step: 412/459, loss: 0.0002066814195131883 2023-01-24 04:23:46.874401: step: 414/459, loss: 0.00032622762955725193 2023-01-24 04:23:47.546559: step: 416/459, loss: 0.0025414912961423397 2023-01-24 04:23:48.207014: step: 418/459, loss: 0.06870175153017044 2023-01-24 04:23:48.827247: step: 420/459, loss: 0.005804012063890696 2023-01-24 04:23:49.514211: step: 422/459, loss: 0.0018977796426042914 2023-01-24 04:23:50.146403: step: 424/459, loss: 0.002171766012907028 2023-01-24 04:23:50.819487: step: 426/459, loss: 0.06975379586219788 2023-01-24 04:23:51.411442: step: 428/459, loss: 0.0002764964592643082 2023-01-24 04:23:52.096763: step: 430/459, loss: 0.015982823446393013 2023-01-24 04:23:52.625270: step: 432/459, loss: 0.006675679702311754 2023-01-24 04:23:53.238572: step: 434/459, loss: 0.0024055459070950747 2023-01-24 04:23:53.870097: step: 436/459, loss: 0.0662955492734909 2023-01-24 04:23:54.505219: step: 438/459, loss: 6.316152575891465e-05 2023-01-24 04:23:55.098457: step: 440/459, loss: 0.0033320768270641565 2023-01-24 04:23:55.762686: step: 442/459, loss: 0.8163314461708069 2023-01-24 04:23:56.344136: step: 444/459, loss: 0.1023760586977005 2023-01-24 04:23:57.012985: step: 446/459, loss: 0.006648886017501354 2023-01-24 04:23:57.592933: step: 448/459, loss: 0.0008548784535378218 2023-01-24 04:23:58.195943: step: 450/459, loss: 0.026081282645463943 2023-01-24 04:23:58.798654: step: 452/459, loss: 0.002303008921444416 2023-01-24 04:23:59.360037: step: 454/459, loss: 0.0705748051404953 2023-01-24 04:24:00.025040: step: 456/459, loss: 0.0374598391354084 2023-01-24 04:24:00.600856: step: 458/459, loss: 0.0067210872657597065 2023-01-24 04:24:01.240558: step: 460/459, loss: 0.012095203623175621 2023-01-24 04:24:01.855898: step: 462/459, loss: 0.061501070857048035 2023-01-24 04:24:02.488623: step: 464/459, loss: 0.0022208557929843664 2023-01-24 04:24:03.173419: step: 466/459, loss: 0.019331367686390877 2023-01-24 04:24:03.746227: step: 468/459, loss: 0.3124731779098511 2023-01-24 04:24:04.256668: step: 470/459, loss: 0.01096438243985176 2023-01-24 04:24:04.870553: step: 472/459, loss: 0.004091776907444 2023-01-24 04:24:05.492742: step: 474/459, loss: 0.00546304089948535 2023-01-24 04:24:06.172537: step: 476/459, loss: 0.01889909617602825 2023-01-24 04:24:06.774956: step: 478/459, loss: 0.03583874925971031 2023-01-24 04:24:07.517873: step: 480/459, loss: 0.04841809719800949 2023-01-24 04:24:08.157439: step: 482/459, loss: 0.002199372975155711 2023-01-24 04:24:08.788776: step: 484/459, loss: 0.0673699602484703 2023-01-24 04:24:09.404116: step: 486/459, loss: 0.004642018582671881 2023-01-24 04:24:10.053706: step: 488/459, loss: 0.04605437442660332 2023-01-24 04:24:10.728574: step: 490/459, loss: 0.0339825265109539 2023-01-24 04:24:11.410871: step: 492/459, loss: 0.022106818854808807 2023-01-24 04:24:12.019120: step: 494/459, loss: 0.016604112461209297 2023-01-24 04:24:12.639014: step: 496/459, loss: 0.002754894318059087 2023-01-24 04:24:13.251246: step: 498/459, loss: 0.00016604278062004596 2023-01-24 04:24:13.802234: step: 500/459, loss: 0.008361655287444592 2023-01-24 04:24:14.379528: step: 502/459, loss: 0.006930883973836899 2023-01-24 04:24:14.994814: step: 504/459, loss: 0.039267316460609436 2023-01-24 04:24:15.637622: step: 506/459, loss: 0.00014083950372878462 2023-01-24 04:24:16.204487: step: 508/459, loss: 0.002570618875324726 2023-01-24 04:24:16.898096: step: 510/459, loss: 0.0014593283412978053 2023-01-24 04:24:17.473478: step: 512/459, loss: 0.022709013894200325 2023-01-24 04:24:18.068712: step: 514/459, loss: 0.023752374574542046 2023-01-24 04:24:18.732273: step: 516/459, loss: 0.052120186388492584 2023-01-24 04:24:19.320302: step: 518/459, loss: 0.00045538286212831736 2023-01-24 04:24:19.895033: step: 520/459, loss: 0.05180826038122177 2023-01-24 04:24:20.515160: step: 522/459, loss: 0.0006036697304807603 2023-01-24 04:24:21.162608: step: 524/459, loss: 0.011513419449329376 2023-01-24 04:24:21.778065: step: 526/459, loss: 0.0008173630922101438 2023-01-24 04:24:22.415795: step: 528/459, loss: 0.00048180599696934223 2023-01-24 04:24:23.038430: step: 530/459, loss: 0.5898366570472717 2023-01-24 04:24:23.653157: step: 532/459, loss: 0.002282390370965004 2023-01-24 04:24:24.276011: step: 534/459, loss: 0.04616870731115341 2023-01-24 04:24:24.861989: step: 536/459, loss: 0.019422881305217743 2023-01-24 04:24:25.469004: step: 538/459, loss: 0.01343695167452097 2023-01-24 04:24:26.130614: step: 540/459, loss: 0.007073636632412672 2023-01-24 04:24:26.774262: step: 542/459, loss: 0.006326089613139629 2023-01-24 04:24:27.398397: step: 544/459, loss: 0.0016911908751353621 2023-01-24 04:24:27.999125: step: 546/459, loss: 2.445797508698888e-05 2023-01-24 04:24:28.640796: step: 548/459, loss: 0.05303031578660011 2023-01-24 04:24:29.273206: step: 550/459, loss: 0.017651986330747604 2023-01-24 04:24:29.903986: step: 552/459, loss: 0.0008285782532766461 2023-01-24 04:24:30.480019: step: 554/459, loss: 0.0012043239548802376 2023-01-24 04:24:31.011169: step: 556/459, loss: 0.0005281729972921312 2023-01-24 04:24:31.633107: step: 558/459, loss: 0.024934815242886543 2023-01-24 04:24:32.222287: step: 560/459, loss: 0.0053094676695764065 2023-01-24 04:24:32.843913: step: 562/459, loss: 0.12566441297531128 2023-01-24 04:24:33.482403: step: 564/459, loss: 0.1641334891319275 2023-01-24 04:24:34.132798: step: 566/459, loss: 0.12675562500953674 2023-01-24 04:24:34.745324: step: 568/459, loss: 0.004711163695901632 2023-01-24 04:24:35.414423: step: 570/459, loss: 0.03767078369855881 2023-01-24 04:24:36.008044: step: 572/459, loss: 0.0003803926520049572 2023-01-24 04:24:36.657641: step: 574/459, loss: 0.0019207686418667436 2023-01-24 04:24:37.207954: step: 576/459, loss: 0.01750117726624012 2023-01-24 04:24:37.823901: step: 578/459, loss: 0.008968131616711617 2023-01-24 04:24:38.376971: step: 580/459, loss: 0.0016322456067427993 2023-01-24 04:24:38.978952: step: 582/459, loss: 0.0002034214703598991 2023-01-24 04:24:39.489852: step: 584/459, loss: 0.006486763712018728 2023-01-24 04:24:40.133839: step: 586/459, loss: 0.00033099332358688116 2023-01-24 04:24:40.701604: step: 588/459, loss: 0.008027268573641777 2023-01-24 04:24:41.314698: step: 590/459, loss: 0.0019385401392355561 2023-01-24 04:24:41.957945: step: 592/459, loss: 0.0024016278330236673 2023-01-24 04:24:42.651844: step: 594/459, loss: 0.0011318308534100652 2023-01-24 04:24:43.227110: step: 596/459, loss: 0.0009446285548619926 2023-01-24 04:24:43.845974: step: 598/459, loss: 0.02639007940888405 2023-01-24 04:24:44.441176: step: 600/459, loss: 0.009477665647864342 2023-01-24 04:24:45.128320: step: 602/459, loss: 0.04928409680724144 2023-01-24 04:24:45.758299: step: 604/459, loss: 0.013479984365403652 2023-01-24 04:24:46.360938: step: 606/459, loss: 0.021479936316609383 2023-01-24 04:24:46.977892: step: 608/459, loss: 0.03707192465662956 2023-01-24 04:24:47.703668: step: 610/459, loss: 0.008271031081676483 2023-01-24 04:24:48.309245: step: 612/459, loss: 0.000420246971771121 2023-01-24 04:24:48.900495: step: 614/459, loss: 0.010172002017498016 2023-01-24 04:24:49.525331: step: 616/459, loss: 0.0005459258682094514 2023-01-24 04:24:50.142747: step: 618/459, loss: 0.0476258285343647 2023-01-24 04:24:50.790006: step: 620/459, loss: 0.05626082420349121 2023-01-24 04:24:51.401460: step: 622/459, loss: 0.006587665528059006 2023-01-24 04:24:52.057347: step: 624/459, loss: 0.0074072182178497314 2023-01-24 04:24:52.653511: step: 626/459, loss: 0.005806801375001669 2023-01-24 04:24:53.265653: step: 628/459, loss: 0.14550554752349854 2023-01-24 04:24:53.882608: step: 630/459, loss: 0.004687511827796698 2023-01-24 04:24:54.440612: step: 632/459, loss: 0.0047716437838971615 2023-01-24 04:24:55.027268: step: 634/459, loss: 0.014874998480081558 2023-01-24 04:24:55.626396: step: 636/459, loss: 0.0011395785259082913 2023-01-24 04:24:56.261265: step: 638/459, loss: 0.0646631047129631 2023-01-24 04:24:56.905880: step: 640/459, loss: 0.003085371805354953 2023-01-24 04:24:57.510125: step: 642/459, loss: 0.0014781395439058542 2023-01-24 04:24:58.136971: step: 644/459, loss: 0.004187800455838442 2023-01-24 04:24:58.721082: step: 646/459, loss: 0.0012328576995059848 2023-01-24 04:24:59.445370: step: 648/459, loss: 0.0006554028368555009 2023-01-24 04:25:00.112961: step: 650/459, loss: 0.5875191688537598 2023-01-24 04:25:00.804924: step: 652/459, loss: 0.0012801483971998096 2023-01-24 04:25:01.398613: step: 654/459, loss: 0.006269217934459448 2023-01-24 04:25:02.037557: step: 656/459, loss: 0.08932843804359436 2023-01-24 04:25:02.699849: step: 658/459, loss: 0.0036072763614356518 2023-01-24 04:25:03.298609: step: 660/459, loss: 0.008181900717318058 2023-01-24 04:25:03.944464: step: 662/459, loss: 0.0019396060379222035 2023-01-24 04:25:04.580216: step: 664/459, loss: 0.022912954911589622 2023-01-24 04:25:05.196284: step: 666/459, loss: 0.11745593696832657 2023-01-24 04:25:05.865959: step: 668/459, loss: 0.013080962933599949 2023-01-24 04:25:06.504539: step: 670/459, loss: 0.022021431475877762 2023-01-24 04:25:07.110783: step: 672/459, loss: 0.00389297166839242 2023-01-24 04:25:07.729693: step: 674/459, loss: 0.04552401229739189 2023-01-24 04:25:08.350600: step: 676/459, loss: 0.0046214759349823 2023-01-24 04:25:09.014560: step: 678/459, loss: 0.00728224404156208 2023-01-24 04:25:09.617936: step: 680/459, loss: 0.020494213327765465 2023-01-24 04:25:10.220016: step: 682/459, loss: 8.739739132579416e-05 2023-01-24 04:25:10.795269: step: 684/459, loss: 0.05346272885799408 2023-01-24 04:25:11.371280: step: 686/459, loss: 0.001410383265465498 2023-01-24 04:25:12.000669: step: 688/459, loss: 0.048638321459293365 2023-01-24 04:25:12.566545: step: 690/459, loss: 1.0638326784828678e-05 2023-01-24 04:25:13.183281: step: 692/459, loss: 0.0006707563297823071 2023-01-24 04:25:13.739774: step: 694/459, loss: 7.280844874912873e-05 2023-01-24 04:25:14.379720: step: 696/459, loss: 0.00175387819763273 2023-01-24 04:25:15.051355: step: 698/459, loss: 0.0096982317045331 2023-01-24 04:25:15.663353: step: 700/459, loss: 0.014241906814277172 2023-01-24 04:25:16.353656: step: 702/459, loss: 0.02670326828956604 2023-01-24 04:25:16.999493: step: 704/459, loss: 0.25520506501197815 2023-01-24 04:25:17.631762: step: 706/459, loss: 0.020205536857247353 2023-01-24 04:25:18.202244: step: 708/459, loss: 0.0005195518024265766 2023-01-24 04:25:18.916709: step: 710/459, loss: 0.0011306783417239785 2023-01-24 04:25:19.636961: step: 712/459, loss: 0.003353656502440572 2023-01-24 04:25:20.282761: step: 714/459, loss: 0.9897342920303345 2023-01-24 04:25:20.877440: step: 716/459, loss: 4.535547759587644e-06 2023-01-24 04:25:21.500676: step: 718/459, loss: 0.00019061955390498042 2023-01-24 04:25:22.044874: step: 720/459, loss: 0.20923273265361786 2023-01-24 04:25:22.660911: step: 722/459, loss: 0.12310470640659332 2023-01-24 04:25:23.365109: step: 724/459, loss: 0.005366629455238581 2023-01-24 04:25:23.919500: step: 726/459, loss: 0.8781313300132751 2023-01-24 04:25:24.521352: step: 728/459, loss: 0.00028791988734155893 2023-01-24 04:25:25.176057: step: 730/459, loss: 0.031882647424936295 2023-01-24 04:25:25.738052: step: 732/459, loss: 0.19220344722270966 2023-01-24 04:25:26.331858: step: 734/459, loss: 0.008342336863279343 2023-01-24 04:25:27.037176: step: 736/459, loss: 0.01752450317144394 2023-01-24 04:25:27.644196: step: 738/459, loss: 0.009018899872899055 2023-01-24 04:25:28.275408: step: 740/459, loss: 0.0006698431679978967 2023-01-24 04:25:28.897976: step: 742/459, loss: 0.007922131568193436 2023-01-24 04:25:29.509133: step: 744/459, loss: 0.01694413274526596 2023-01-24 04:25:30.102547: step: 746/459, loss: 0.019642140716314316 2023-01-24 04:25:30.706276: step: 748/459, loss: 0.006335902493447065 2023-01-24 04:25:31.354715: step: 750/459, loss: 0.02308112569153309 2023-01-24 04:25:31.976359: step: 752/459, loss: 0.0051317666657269 2023-01-24 04:25:32.601750: step: 754/459, loss: 0.011938615702092648 2023-01-24 04:25:33.202260: step: 756/459, loss: 0.04203590750694275 2023-01-24 04:25:33.835672: step: 758/459, loss: 0.008675435557961464 2023-01-24 04:25:34.549780: step: 760/459, loss: 0.09816062450408936 2023-01-24 04:25:35.147531: step: 762/459, loss: 0.00031605700496584177 2023-01-24 04:25:35.818836: step: 764/459, loss: 0.03534446656703949 2023-01-24 04:25:36.525111: step: 766/459, loss: 0.011505681090056896 2023-01-24 04:25:37.101435: step: 768/459, loss: 0.0011104628210887313 2023-01-24 04:25:37.767282: step: 770/459, loss: 0.028989067301154137 2023-01-24 04:25:38.329693: step: 772/459, loss: 0.0012274616165086627 2023-01-24 04:25:38.931987: step: 774/459, loss: 0.004409284330904484 2023-01-24 04:25:39.520892: step: 776/459, loss: 0.0013985480181872845 2023-01-24 04:25:40.086771: step: 778/459, loss: 0.13511745631694794 2023-01-24 04:25:40.678755: step: 780/459, loss: 0.00249215099029243 2023-01-24 04:25:41.361302: step: 782/459, loss: 0.0027956152334809303 2023-01-24 04:25:41.955868: step: 784/459, loss: 0.00014861769159324467 2023-01-24 04:25:42.569630: step: 786/459, loss: 0.3912188410758972 2023-01-24 04:25:43.176260: step: 788/459, loss: 0.03731011599302292 2023-01-24 04:25:43.729215: step: 790/459, loss: 0.025254379957914352 2023-01-24 04:25:44.352046: step: 792/459, loss: 0.07405083626508713 2023-01-24 04:25:44.981157: step: 794/459, loss: 0.0693843811750412 2023-01-24 04:25:45.726587: step: 796/459, loss: 0.028746824711561203 2023-01-24 04:25:46.323309: step: 798/459, loss: 0.051227908581495285 2023-01-24 04:25:46.961821: step: 800/459, loss: 0.012634546495974064 2023-01-24 04:25:47.520977: step: 802/459, loss: 0.005080983974039555 2023-01-24 04:25:48.173037: step: 804/459, loss: 0.015379478223621845 2023-01-24 04:25:48.792703: step: 806/459, loss: 0.00024078629212453961 2023-01-24 04:25:49.413927: step: 808/459, loss: 2.642307117639575e-05 2023-01-24 04:25:50.071255: step: 810/459, loss: 0.017973218113183975 2023-01-24 04:25:50.733505: step: 812/459, loss: 0.012883739545941353 2023-01-24 04:25:51.382451: step: 814/459, loss: 0.0071116480976343155 2023-01-24 04:25:51.947002: step: 816/459, loss: 0.009452663362026215 2023-01-24 04:25:52.495830: step: 818/459, loss: 0.003233585972338915 2023-01-24 04:25:53.064988: step: 820/459, loss: 0.00018424067820888013 2023-01-24 04:25:53.626786: step: 822/459, loss: 0.007849110290408134 2023-01-24 04:25:54.312410: step: 824/459, loss: 0.007991977035999298 2023-01-24 04:25:54.906205: step: 826/459, loss: 0.03820182755589485 2023-01-24 04:25:55.527875: step: 828/459, loss: 0.03174961730837822 2023-01-24 04:25:56.224476: step: 830/459, loss: 0.002126523293554783 2023-01-24 04:25:56.820256: step: 832/459, loss: 0.01251569576561451 2023-01-24 04:25:57.420783: step: 834/459, loss: 0.004811507184058428 2023-01-24 04:25:58.085278: step: 836/459, loss: 0.0008805663674138486 2023-01-24 04:25:58.713173: step: 838/459, loss: 0.029331926256418228 2023-01-24 04:25:59.368710: step: 840/459, loss: 0.004660984966903925 2023-01-24 04:25:59.931964: step: 842/459, loss: 0.00029226733022369444 2023-01-24 04:26:00.521079: step: 844/459, loss: 0.000703822064679116 2023-01-24 04:26:01.225412: step: 846/459, loss: 0.004349836613982916 2023-01-24 04:26:01.855780: step: 848/459, loss: 0.11311600357294083 2023-01-24 04:26:02.412445: step: 850/459, loss: 0.0026345180813223124 2023-01-24 04:26:03.029048: step: 852/459, loss: 0.010625667870044708 2023-01-24 04:26:03.615991: step: 854/459, loss: 0.06276162713766098 2023-01-24 04:26:04.286379: step: 856/459, loss: 0.03844541311264038 2023-01-24 04:26:04.937098: step: 858/459, loss: 0.042121175676584244 2023-01-24 04:26:05.555177: step: 860/459, loss: 0.002090285997837782 2023-01-24 04:26:06.193622: step: 862/459, loss: 0.0019079620251432061 2023-01-24 04:26:06.842467: step: 864/459, loss: 0.002100938931107521 2023-01-24 04:26:07.441988: step: 866/459, loss: 0.0005034420755691826 2023-01-24 04:26:08.119864: step: 868/459, loss: 0.017147734761238098 2023-01-24 04:26:08.755816: step: 870/459, loss: 0.007201818749308586 2023-01-24 04:26:09.439574: step: 872/459, loss: 2.9531195163726807 2023-01-24 04:26:10.053379: step: 874/459, loss: 0.0002995569375343621 2023-01-24 04:26:10.641094: step: 876/459, loss: 0.00020210260117892176 2023-01-24 04:26:11.309732: step: 878/459, loss: 0.0021417986135929823 2023-01-24 04:26:11.901824: step: 880/459, loss: 0.00170178955886513 2023-01-24 04:26:12.547693: step: 882/459, loss: 0.0011510377516970038 2023-01-24 04:26:13.158528: step: 884/459, loss: 0.18487179279327393 2023-01-24 04:26:13.828665: step: 886/459, loss: 0.031884387135505676 2023-01-24 04:26:14.486346: step: 888/459, loss: 0.08705995231866837 2023-01-24 04:26:15.044531: step: 890/459, loss: 0.04219119995832443 2023-01-24 04:26:15.695471: step: 892/459, loss: 0.008784453384578228 2023-01-24 04:26:16.304184: step: 894/459, loss: 0.008139234967529774 2023-01-24 04:26:16.909870: step: 896/459, loss: 0.0454709529876709 2023-01-24 04:26:17.522707: step: 898/459, loss: 0.1431376338005066 2023-01-24 04:26:18.141625: step: 900/459, loss: 0.0019475332228466868 2023-01-24 04:26:18.752262: step: 902/459, loss: 0.016716301441192627 2023-01-24 04:26:19.296925: step: 904/459, loss: 0.0258359182626009 2023-01-24 04:26:19.868973: step: 906/459, loss: 0.007824139669537544 2023-01-24 04:26:20.473956: step: 908/459, loss: 0.01940510794520378 2023-01-24 04:26:21.116487: step: 910/459, loss: 0.019020332023501396 2023-01-24 04:26:21.743809: step: 912/459, loss: 0.006399769801646471 2023-01-24 04:26:22.353351: step: 914/459, loss: 0.008416805416345596 2023-01-24 04:26:22.939068: step: 916/459, loss: 0.0005817725323140621 2023-01-24 04:26:23.548053: step: 918/459, loss: 0.0018288294086232781 2023-01-24 04:26:23.978389: step: 920/459, loss: 0.0015708935679867864 ================================================== Loss: 0.046 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3180591634362014, 'r': 0.3138344686657016, 'f1': 0.31593269338457447}, 'combined': 0.2327925109149496, 'epoch': 35} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34494167615418203, 'r': 0.29759059151847156, 'f1': 0.3195213769354014}, 'combined': 0.20449368123865685, 'epoch': 35} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3146087770721381, 'r': 0.31878764128372244, 'f1': 0.3166844240462238}, 'combined': 0.23334641771827017, 'epoch': 35} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35837087443709775, 'r': 0.29907678430295975, 'f1': 0.32605001261967864}, 'combined': 0.2086720080765943, 'epoch': 35} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3375076791822658, 'r': 0.3227777425196622, 'f1': 0.3299784099085586}, 'combined': 0.24314198624841157, 'epoch': 35} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.35670042752638403, 'r': 0.3186913655768513, 'f1': 0.3366263774924317}, 'combined': 0.24135476122098878, 'epoch': 35} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.24632352941176472, 'r': 0.2392857142857143, 'f1': 0.2427536231884058}, 'combined': 0.16183574879227053, 'epoch': 35} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.23, 'r': 0.25, 'f1': 0.23958333333333334}, 'combined': 0.11979166666666667, 'epoch': 35} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.25, 'r': 0.10344827586206896, 'f1': 0.14634146341463414}, 'combined': 0.09756097560975609, 'epoch': 35} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 36 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:29:00.821767: step: 2/459, loss: 0.003413947531953454 2023-01-24 04:29:01.405189: step: 4/459, loss: 0.020013395696878433 2023-01-24 04:29:02.006580: step: 6/459, loss: 0.05428145080804825 2023-01-24 04:29:02.598906: step: 8/459, loss: 0.0005663410411216319 2023-01-24 04:29:03.222607: step: 10/459, loss: 0.006255780812352896 2023-01-24 04:29:03.828037: step: 12/459, loss: 0.0011889912420883775 2023-01-24 04:29:04.422512: step: 14/459, loss: 0.021372266113758087 2023-01-24 04:29:05.036744: step: 16/459, loss: 0.010013551451265812 2023-01-24 04:29:05.662773: step: 18/459, loss: 0.011148330755531788 2023-01-24 04:29:06.245619: step: 20/459, loss: 0.024697985500097275 2023-01-24 04:29:06.909996: step: 22/459, loss: 0.004649365786463022 2023-01-24 04:29:07.492468: step: 24/459, loss: 0.001301910844631493 2023-01-24 04:29:08.120000: step: 26/459, loss: 0.014064016751945019 2023-01-24 04:29:08.771169: step: 28/459, loss: 0.027415690943598747 2023-01-24 04:29:09.340578: step: 30/459, loss: 0.0006113697891123593 2023-01-24 04:29:09.985249: step: 32/459, loss: 0.009261337108910084 2023-01-24 04:29:10.617183: step: 34/459, loss: 0.005089198239147663 2023-01-24 04:29:11.221170: step: 36/459, loss: 0.0014520115219056606 2023-01-24 04:29:11.834406: step: 38/459, loss: 0.0032457925844937563 2023-01-24 04:29:12.524765: step: 40/459, loss: 0.018108120188117027 2023-01-24 04:29:13.172420: step: 42/459, loss: 0.0047406344674527645 2023-01-24 04:29:13.744474: step: 44/459, loss: 0.0010630530305206776 2023-01-24 04:29:14.395958: step: 46/459, loss: 0.0795319527387619 2023-01-24 04:29:15.032292: step: 48/459, loss: 0.0013411175459623337 2023-01-24 04:29:15.675791: step: 50/459, loss: 0.028649216517806053 2023-01-24 04:29:16.284875: step: 52/459, loss: 0.011649038642644882 2023-01-24 04:29:16.984317: step: 54/459, loss: 0.04617686569690704 2023-01-24 04:29:17.598564: step: 56/459, loss: 0.00042490786290727556 2023-01-24 04:29:18.217846: step: 58/459, loss: 0.010256605222821236 2023-01-24 04:29:18.846398: step: 60/459, loss: 0.04280081391334534 2023-01-24 04:29:19.483425: step: 62/459, loss: 0.003380800364539027 2023-01-24 04:29:19.996515: step: 64/459, loss: 0.0017714299028739333 2023-01-24 04:29:20.628753: step: 66/459, loss: 0.0015378721291199327 2023-01-24 04:29:21.179226: step: 68/459, loss: 0.00109572964720428 2023-01-24 04:29:21.756773: step: 70/459, loss: 0.0023492625914514065 2023-01-24 04:29:22.402443: step: 72/459, loss: 3.325037323520519e-05 2023-01-24 04:29:23.013012: step: 74/459, loss: 0.00037056507426314056 2023-01-24 04:29:23.682210: step: 76/459, loss: 0.034779444336891174 2023-01-24 04:29:24.301285: step: 78/459, loss: 0.08051051199436188 2023-01-24 04:29:24.887151: step: 80/459, loss: 0.0017499226378276944 2023-01-24 04:29:25.591672: step: 82/459, loss: 0.029337666928768158 2023-01-24 04:29:26.289926: step: 84/459, loss: 0.0009559524478390813 2023-01-24 04:29:26.941842: step: 86/459, loss: 1.5168618119787425e-05 2023-01-24 04:29:27.521038: step: 88/459, loss: 0.0034252877812832594 2023-01-24 04:29:28.091130: step: 90/459, loss: 0.000899867678526789 2023-01-24 04:29:28.728917: step: 92/459, loss: 2.4746645976847503e-06 2023-01-24 04:29:29.275806: step: 94/459, loss: 0.00034404892357997596 2023-01-24 04:29:29.827528: step: 96/459, loss: 0.06766442954540253 2023-01-24 04:29:30.427442: step: 98/459, loss: 0.010434532538056374 2023-01-24 04:29:31.073352: step: 100/459, loss: 0.00010187139560002834 2023-01-24 04:29:31.700084: step: 102/459, loss: 0.028309291228652 2023-01-24 04:29:32.348497: step: 104/459, loss: 0.04255795106291771 2023-01-24 04:29:33.009189: step: 106/459, loss: 0.001372015685774386 2023-01-24 04:29:33.706269: step: 108/459, loss: 0.03752635791897774 2023-01-24 04:29:34.328744: step: 110/459, loss: 0.00020663371833506972 2023-01-24 04:29:34.925987: step: 112/459, loss: 2.510265350341797 2023-01-24 04:29:35.501030: step: 114/459, loss: 0.00038153017521835864 2023-01-24 04:29:36.035915: step: 116/459, loss: 0.015119723044335842 2023-01-24 04:29:36.652966: step: 118/459, loss: 0.03758282586932182 2023-01-24 04:29:37.223863: step: 120/459, loss: 0.007481330074369907 2023-01-24 04:29:37.833191: step: 122/459, loss: 0.008659062907099724 2023-01-24 04:29:38.401192: step: 124/459, loss: 0.05000855028629303 2023-01-24 04:29:38.955577: step: 126/459, loss: 0.0012612364953383803 2023-01-24 04:29:39.488721: step: 128/459, loss: 0.02132895402610302 2023-01-24 04:29:40.179346: step: 130/459, loss: 0.03329265117645264 2023-01-24 04:29:40.826898: step: 132/459, loss: 0.036047205328941345 2023-01-24 04:29:41.436759: step: 134/459, loss: 0.0031106071546673775 2023-01-24 04:29:42.033377: step: 136/459, loss: 0.0692366287112236 2023-01-24 04:29:42.613974: step: 138/459, loss: 0.0068715414963662624 2023-01-24 04:29:43.269224: step: 140/459, loss: 0.004563051275908947 2023-01-24 04:29:43.918081: step: 142/459, loss: 0.05724478140473366 2023-01-24 04:29:44.524384: step: 144/459, loss: 0.0008691248949617147 2023-01-24 04:29:45.221784: step: 146/459, loss: 0.026011234149336815 2023-01-24 04:29:45.917819: step: 148/459, loss: 0.08444365859031677 2023-01-24 04:29:46.587140: step: 150/459, loss: 0.0044518932700157166 2023-01-24 04:29:47.235329: step: 152/459, loss: 0.0009160276385955513 2023-01-24 04:29:47.810027: step: 154/459, loss: 0.005787007976323366 2023-01-24 04:29:48.351764: step: 156/459, loss: 0.01464461162686348 2023-01-24 04:29:48.901127: step: 158/459, loss: 0.003490966511890292 2023-01-24 04:29:49.563121: step: 160/459, loss: 0.0023446788545697927 2023-01-24 04:29:50.167110: step: 162/459, loss: 0.0007385329809039831 2023-01-24 04:29:50.775599: step: 164/459, loss: 0.19835492968559265 2023-01-24 04:29:51.329364: step: 166/459, loss: 0.0030635467264801264 2023-01-24 04:29:51.961333: step: 168/459, loss: 0.00106525095179677 2023-01-24 04:29:52.604769: step: 170/459, loss: 0.0015678505878895521 2023-01-24 04:29:53.210026: step: 172/459, loss: 0.00032368721440434456 2023-01-24 04:29:53.824489: step: 174/459, loss: 0.008780835196375847 2023-01-24 04:29:54.455585: step: 176/459, loss: 0.026429036632180214 2023-01-24 04:29:55.028733: step: 178/459, loss: 0.0310591459274292 2023-01-24 04:29:55.650449: step: 180/459, loss: 0.002635594690218568 2023-01-24 04:29:56.239305: step: 182/459, loss: 0.0025616728235036135 2023-01-24 04:29:56.890145: step: 184/459, loss: 0.0009842868894338608 2023-01-24 04:29:57.487549: step: 186/459, loss: 0.0022447467781603336 2023-01-24 04:29:58.113283: step: 188/459, loss: 0.02914300374686718 2023-01-24 04:29:58.721614: step: 190/459, loss: 0.0036780836526304483 2023-01-24 04:29:59.368967: step: 192/459, loss: 0.06232075393199921 2023-01-24 04:30:00.028530: step: 194/459, loss: 0.006653483957052231 2023-01-24 04:30:00.610365: step: 196/459, loss: 0.009821603074669838 2023-01-24 04:30:01.271588: step: 198/459, loss: 0.007466341368854046 2023-01-24 04:30:01.970898: step: 200/459, loss: 0.07074154913425446 2023-01-24 04:30:02.531071: step: 202/459, loss: 0.0018196210730820894 2023-01-24 04:30:03.087835: step: 204/459, loss: 0.03828587010502815 2023-01-24 04:30:03.786772: step: 206/459, loss: 0.03347045183181763 2023-01-24 04:30:04.461029: step: 208/459, loss: 0.05730556696653366 2023-01-24 04:30:05.143723: step: 210/459, loss: 0.017904536798596382 2023-01-24 04:30:05.800895: step: 212/459, loss: 0.0003860267752315849 2023-01-24 04:30:06.413577: step: 214/459, loss: 0.000709383049979806 2023-01-24 04:30:06.942056: step: 216/459, loss: 0.018733948469161987 2023-01-24 04:30:07.562376: step: 218/459, loss: 0.002058169571682811 2023-01-24 04:30:08.236726: step: 220/459, loss: 0.004164156969636679 2023-01-24 04:30:08.892085: step: 222/459, loss: 0.007136038038879633 2023-01-24 04:30:09.576914: step: 224/459, loss: 0.005444127134978771 2023-01-24 04:30:10.259409: step: 226/459, loss: 0.07629884034395218 2023-01-24 04:30:10.885101: step: 228/459, loss: 0.08880071341991425 2023-01-24 04:30:11.568182: step: 230/459, loss: 0.003464682726189494 2023-01-24 04:30:12.250937: step: 232/459, loss: 0.041433122009038925 2023-01-24 04:30:12.862569: step: 234/459, loss: 0.01038865651935339 2023-01-24 04:30:13.474521: step: 236/459, loss: 0.006318261846899986 2023-01-24 04:30:14.099421: step: 238/459, loss: 0.0006517345318570733 2023-01-24 04:30:14.675841: step: 240/459, loss: 0.0016106731491163373 2023-01-24 04:30:15.246139: step: 242/459, loss: 0.02856411226093769 2023-01-24 04:30:15.913457: step: 244/459, loss: 0.03827290236949921 2023-01-24 04:30:16.463303: step: 246/459, loss: 0.10527758300304413 2023-01-24 04:30:17.122068: step: 248/459, loss: 0.00037309041363187134 2023-01-24 04:30:17.699538: step: 250/459, loss: 0.029103923588991165 2023-01-24 04:30:18.393917: step: 252/459, loss: 0.012491158209741116 2023-01-24 04:30:19.022040: step: 254/459, loss: 0.11566401273012161 2023-01-24 04:30:19.612467: step: 256/459, loss: 0.013895832002162933 2023-01-24 04:30:20.204159: step: 258/459, loss: 0.01498790830373764 2023-01-24 04:30:20.755810: step: 260/459, loss: 0.0010799776064231992 2023-01-24 04:30:21.348160: step: 262/459, loss: 0.011975186876952648 2023-01-24 04:30:22.018657: step: 264/459, loss: 0.0027188113890588284 2023-01-24 04:30:22.629892: step: 266/459, loss: 0.018913961946964264 2023-01-24 04:30:23.250631: step: 268/459, loss: 0.020021427422761917 2023-01-24 04:30:23.890672: step: 270/459, loss: 0.005463314708322287 2023-01-24 04:30:24.595307: step: 272/459, loss: 0.042485058307647705 2023-01-24 04:30:25.245069: step: 274/459, loss: 0.02421020343899727 2023-01-24 04:30:25.827650: step: 276/459, loss: 0.014136182144284248 2023-01-24 04:30:26.415523: step: 278/459, loss: 0.0017234311671927571 2023-01-24 04:30:26.993678: step: 280/459, loss: 0.011132911778986454 2023-01-24 04:30:27.637732: step: 282/459, loss: 0.004907671362161636 2023-01-24 04:30:28.259094: step: 284/459, loss: 0.01614605449140072 2023-01-24 04:30:28.899801: step: 286/459, loss: 0.02051330730319023 2023-01-24 04:30:29.487231: step: 288/459, loss: 0.009055477567017078 2023-01-24 04:30:30.084829: step: 290/459, loss: 0.00023947801673784852 2023-01-24 04:30:30.671357: step: 292/459, loss: 0.03347666189074516 2023-01-24 04:30:31.332910: step: 294/459, loss: 0.008860408328473568 2023-01-24 04:30:31.972572: step: 296/459, loss: 0.0003654996689874679 2023-01-24 04:30:32.587382: step: 298/459, loss: 0.0002720555057749152 2023-01-24 04:30:33.224145: step: 300/459, loss: 0.04607635736465454 2023-01-24 04:30:33.860735: step: 302/459, loss: 0.001492306124418974 2023-01-24 04:30:34.518396: step: 304/459, loss: 0.0023035244084894657 2023-01-24 04:30:35.185033: step: 306/459, loss: 0.15151554346084595 2023-01-24 04:30:35.816378: step: 308/459, loss: 0.029810335487127304 2023-01-24 04:30:36.440538: step: 310/459, loss: 0.0024029759224504232 2023-01-24 04:30:37.063643: step: 312/459, loss: 0.02603043243288994 2023-01-24 04:30:37.612800: step: 314/459, loss: 0.0001881079951999709 2023-01-24 04:30:38.203984: step: 316/459, loss: 0.0008572756196372211 2023-01-24 04:30:38.807689: step: 318/459, loss: 0.0008269260288216174 2023-01-24 04:30:39.403461: step: 320/459, loss: 0.0016650970792397857 2023-01-24 04:30:39.981808: step: 322/459, loss: 0.0016358421416953206 2023-01-24 04:30:40.683979: step: 324/459, loss: 0.01627100259065628 2023-01-24 04:30:41.295181: step: 326/459, loss: 0.05717511847615242 2023-01-24 04:30:41.874736: step: 328/459, loss: 0.9629316329956055 2023-01-24 04:30:42.505914: step: 330/459, loss: 0.0002884003333747387 2023-01-24 04:30:43.082599: step: 332/459, loss: 0.002512918319553137 2023-01-24 04:30:43.644981: step: 334/459, loss: 0.003175692167133093 2023-01-24 04:30:44.222052: step: 336/459, loss: 0.00036666577216237783 2023-01-24 04:30:44.844322: step: 338/459, loss: 0.002577963750809431 2023-01-24 04:30:45.453559: step: 340/459, loss: 0.01902926154434681 2023-01-24 04:30:46.052537: step: 342/459, loss: 0.002640474122017622 2023-01-24 04:30:46.619817: step: 344/459, loss: 0.012631687335669994 2023-01-24 04:30:47.208586: step: 346/459, loss: 0.0009567140368744731 2023-01-24 04:30:47.834371: step: 348/459, loss: 0.003536749631166458 2023-01-24 04:30:48.411817: step: 350/459, loss: 0.0025305768940597773 2023-01-24 04:30:49.019615: step: 352/459, loss: 0.005131634417921305 2023-01-24 04:30:49.590722: step: 354/459, loss: 0.009982042945921421 2023-01-24 04:30:50.255712: step: 356/459, loss: 0.005554657895117998 2023-01-24 04:30:50.830857: step: 358/459, loss: 0.0068021537736058235 2023-01-24 04:30:51.463748: step: 360/459, loss: 0.000701783923432231 2023-01-24 04:30:52.025248: step: 362/459, loss: 0.027967168018221855 2023-01-24 04:30:52.627353: step: 364/459, loss: 0.011712312698364258 2023-01-24 04:30:53.266919: step: 366/459, loss: 0.0002984672028105706 2023-01-24 04:30:53.920735: step: 368/459, loss: 0.0014795860042795539 2023-01-24 04:30:54.515267: step: 370/459, loss: 0.006348456721752882 2023-01-24 04:30:55.104071: step: 372/459, loss: 0.02653643675148487 2023-01-24 04:30:55.805771: step: 374/459, loss: 0.0028810747899115086 2023-01-24 04:30:56.425922: step: 376/459, loss: 0.0004367950023151934 2023-01-24 04:30:57.074643: step: 378/459, loss: 9.493931429460645e-05 2023-01-24 04:30:57.842290: step: 380/459, loss: 0.04183853045105934 2023-01-24 04:30:58.431672: step: 382/459, loss: 0.004329501651227474 2023-01-24 04:30:59.061178: step: 384/459, loss: 0.0023021006491035223 2023-01-24 04:30:59.653143: step: 386/459, loss: 0.0033247682731598616 2023-01-24 04:31:00.240702: step: 388/459, loss: 0.001783372717909515 2023-01-24 04:31:00.783199: step: 390/459, loss: 0.006443121004849672 2023-01-24 04:31:01.391710: step: 392/459, loss: 0.000762605166528374 2023-01-24 04:31:02.014849: step: 394/459, loss: 0.012517871335148811 2023-01-24 04:31:02.618726: step: 396/459, loss: 0.023481307551264763 2023-01-24 04:31:03.215489: step: 398/459, loss: 0.06862626224756241 2023-01-24 04:31:03.829786: step: 400/459, loss: 0.0008031228790059686 2023-01-24 04:31:04.452730: step: 402/459, loss: 0.008771484717726707 2023-01-24 04:31:05.093076: step: 404/459, loss: 0.0022125975228846073 2023-01-24 04:31:05.728808: step: 406/459, loss: 0.0017173188971355557 2023-01-24 04:31:06.326838: step: 408/459, loss: 0.00025178861687891185 2023-01-24 04:31:06.943707: step: 410/459, loss: 0.00044758847798220813 2023-01-24 04:31:07.559707: step: 412/459, loss: 0.013641460798680782 2023-01-24 04:31:08.208816: step: 414/459, loss: 0.007273562252521515 2023-01-24 04:31:08.816711: step: 416/459, loss: 0.001211234601214528 2023-01-24 04:31:09.496603: step: 418/459, loss: 0.012815537862479687 2023-01-24 04:31:10.055434: step: 420/459, loss: 0.0010195994982495904 2023-01-24 04:31:10.592697: step: 422/459, loss: 0.0016473758732900023 2023-01-24 04:31:11.228847: step: 424/459, loss: 0.007708017714321613 2023-01-24 04:31:11.821140: step: 426/459, loss: 7.386055949609727e-05 2023-01-24 04:31:12.396738: step: 428/459, loss: 0.0024220827035605907 2023-01-24 04:31:13.063229: step: 430/459, loss: 0.015159622766077518 2023-01-24 04:31:13.674385: step: 432/459, loss: 0.0539684072136879 2023-01-24 04:31:14.280212: step: 434/459, loss: 0.0005441046669147909 2023-01-24 04:31:14.856950: step: 436/459, loss: 0.013822660781443119 2023-01-24 04:31:15.410194: step: 438/459, loss: 3.13353884848766e-05 2023-01-24 04:31:15.999773: step: 440/459, loss: 0.00146639090962708 2023-01-24 04:31:16.639821: step: 442/459, loss: 0.006266175303608179 2023-01-24 04:31:17.227315: step: 444/459, loss: 0.0003229182038921863 2023-01-24 04:31:17.852899: step: 446/459, loss: 0.158670112490654 2023-01-24 04:31:18.471635: step: 448/459, loss: 0.20103603601455688 2023-01-24 04:31:19.089067: step: 450/459, loss: 0.020460577681660652 2023-01-24 04:31:19.754334: step: 452/459, loss: 5.0841249503719155e-06 2023-01-24 04:31:20.406045: step: 454/459, loss: 0.00042954040691256523 2023-01-24 04:31:21.046177: step: 456/459, loss: 0.00021698330237995833 2023-01-24 04:31:21.652733: step: 458/459, loss: 0.0012325289426371455 2023-01-24 04:31:22.361662: step: 460/459, loss: 0.3436052203178406 2023-01-24 04:31:22.978285: step: 462/459, loss: 0.010574311017990112 2023-01-24 04:31:23.618631: step: 464/459, loss: 0.005681202746927738 2023-01-24 04:31:24.296643: step: 466/459, loss: 0.01573135331273079 2023-01-24 04:31:24.960608: step: 468/459, loss: 0.029744410887360573 2023-01-24 04:31:25.626044: step: 470/459, loss: 0.0003635846369434148 2023-01-24 04:31:26.278163: step: 472/459, loss: 0.030798016116023064 2023-01-24 04:31:26.906471: step: 474/459, loss: 0.03468126058578491 2023-01-24 04:31:27.458515: step: 476/459, loss: 0.007188539020717144 2023-01-24 04:31:28.084617: step: 478/459, loss: 0.012682520784437656 2023-01-24 04:31:28.759731: step: 480/459, loss: 0.010776866227388382 2023-01-24 04:31:29.354641: step: 482/459, loss: 0.3927864730358124 2023-01-24 04:31:29.973358: step: 484/459, loss: 0.00038301097811199725 2023-01-24 04:31:30.588667: step: 486/459, loss: 0.0009396016830578446 2023-01-24 04:31:31.213173: step: 488/459, loss: 0.002983525861054659 2023-01-24 04:31:31.806681: step: 490/459, loss: 0.020316675305366516 2023-01-24 04:31:32.485977: step: 492/459, loss: 0.019687509164214134 2023-01-24 04:31:33.063913: step: 494/459, loss: 0.0025050381664186716 2023-01-24 04:31:33.632684: step: 496/459, loss: 0.0002554669918026775 2023-01-24 04:31:34.218444: step: 498/459, loss: 0.0004853667051065713 2023-01-24 04:31:34.841376: step: 500/459, loss: 0.005816951394081116 2023-01-24 04:31:35.478200: step: 502/459, loss: 0.0023752599954605103 2023-01-24 04:31:36.138870: step: 504/459, loss: 0.009218079037964344 2023-01-24 04:31:36.759024: step: 506/459, loss: 0.0022441050969064236 2023-01-24 04:31:37.384428: step: 508/459, loss: 0.001042829011566937 2023-01-24 04:31:37.987455: step: 510/459, loss: 0.03157483786344528 2023-01-24 04:31:38.582923: step: 512/459, loss: 0.01781877502799034 2023-01-24 04:31:39.231751: step: 514/459, loss: 0.025861820206046104 2023-01-24 04:31:39.845342: step: 516/459, loss: 5.022262096405029 2023-01-24 04:31:40.473698: step: 518/459, loss: 0.0027863881550729275 2023-01-24 04:31:41.133714: step: 520/459, loss: 0.015287665650248528 2023-01-24 04:31:41.697316: step: 522/459, loss: 0.0010986927663907409 2023-01-24 04:31:42.244753: step: 524/459, loss: 2.6906862331088632e-05 2023-01-24 04:31:42.848971: step: 526/459, loss: 0.002647985704243183 2023-01-24 04:31:43.469937: step: 528/459, loss: 0.003519452642649412 2023-01-24 04:31:44.137375: step: 530/459, loss: 0.0051610032096505165 2023-01-24 04:31:44.794759: step: 532/459, loss: 0.010122809559106827 2023-01-24 04:31:45.418174: step: 534/459, loss: 0.017222236841917038 2023-01-24 04:31:46.043339: step: 536/459, loss: 0.006268923636525869 2023-01-24 04:31:46.630884: step: 538/459, loss: 0.00168954161927104 2023-01-24 04:31:47.264071: step: 540/459, loss: 0.013206186704337597 2023-01-24 04:31:47.912766: step: 542/459, loss: 0.0005891482578590512 2023-01-24 04:31:48.561700: step: 544/459, loss: 0.1555609107017517 2023-01-24 04:31:49.237725: step: 546/459, loss: 0.10329855233430862 2023-01-24 04:31:49.848923: step: 548/459, loss: 0.06813758611679077 2023-01-24 04:31:50.451326: step: 550/459, loss: 0.0014004195109009743 2023-01-24 04:31:51.220438: step: 552/459, loss: 0.00041831607813946903 2023-01-24 04:31:51.913779: step: 554/459, loss: 0.0021382695995271206 2023-01-24 04:31:52.516116: step: 556/459, loss: 0.023260625079274178 2023-01-24 04:31:53.086971: step: 558/459, loss: 0.03595549985766411 2023-01-24 04:31:53.684099: step: 560/459, loss: 0.002751004183664918 2023-01-24 04:31:54.238069: step: 562/459, loss: 0.005091244820505381 2023-01-24 04:31:54.924194: step: 564/459, loss: 0.0018734113546088338 2023-01-24 04:31:55.557532: step: 566/459, loss: 0.007325601298362017 2023-01-24 04:31:56.185859: step: 568/459, loss: 0.000742870441172272 2023-01-24 04:31:56.834572: step: 570/459, loss: 0.00421486608684063 2023-01-24 04:31:57.498857: step: 572/459, loss: 0.028276043012738228 2023-01-24 04:31:58.192790: step: 574/459, loss: 0.3828584551811218 2023-01-24 04:31:58.827524: step: 576/459, loss: 0.04486788809299469 2023-01-24 04:31:59.422452: step: 578/459, loss: 0.00428602984175086 2023-01-24 04:32:00.020563: step: 580/459, loss: 0.0021320495288819075 2023-01-24 04:32:00.664331: step: 582/459, loss: 0.002402626909315586 2023-01-24 04:32:01.321950: step: 584/459, loss: 0.005988615099340677 2023-01-24 04:32:01.919224: step: 586/459, loss: 0.001384598552249372 2023-01-24 04:32:02.677072: step: 588/459, loss: 0.025848647579550743 2023-01-24 04:32:03.205898: step: 590/459, loss: 0.0017343664076179266 2023-01-24 04:32:03.787610: step: 592/459, loss: 0.017563914880156517 2023-01-24 04:32:04.511733: step: 594/459, loss: 0.03571196272969246 2023-01-24 04:32:05.137074: step: 596/459, loss: 0.0002358114143135026 2023-01-24 04:32:05.763550: step: 598/459, loss: 0.032404955476522446 2023-01-24 04:32:06.375601: step: 600/459, loss: 0.0033667271491140127 2023-01-24 04:32:07.016038: step: 602/459, loss: 0.018495604395866394 2023-01-24 04:32:07.649973: step: 604/459, loss: 0.002185066929087043 2023-01-24 04:32:08.372037: step: 606/459, loss: 0.10122939199209213 2023-01-24 04:32:09.040922: step: 608/459, loss: 0.0006628580158576369 2023-01-24 04:32:09.674128: step: 610/459, loss: 0.0006361309788189828 2023-01-24 04:32:10.229366: step: 612/459, loss: 0.09057503938674927 2023-01-24 04:32:10.822849: step: 614/459, loss: 0.02823885902762413 2023-01-24 04:32:11.451934: step: 616/459, loss: 0.017619827762246132 2023-01-24 04:32:12.080377: step: 618/459, loss: 0.001138394232839346 2023-01-24 04:32:12.638216: step: 620/459, loss: 0.0006117436569184065 2023-01-24 04:32:13.223582: step: 622/459, loss: 0.0005419771187007427 2023-01-24 04:32:13.851777: step: 624/459, loss: 0.06643273681402206 2023-01-24 04:32:14.392097: step: 626/459, loss: 0.006766187027096748 2023-01-24 04:32:14.985410: step: 628/459, loss: 0.08445398509502411 2023-01-24 04:32:15.650037: step: 630/459, loss: 0.005310762207955122 2023-01-24 04:32:16.285513: step: 632/459, loss: 0.20687294006347656 2023-01-24 04:32:16.849730: step: 634/459, loss: 0.010346358641982079 2023-01-24 04:32:17.466418: step: 636/459, loss: 0.007740706205368042 2023-01-24 04:32:18.066494: step: 638/459, loss: 0.06481623649597168 2023-01-24 04:32:18.661525: step: 640/459, loss: 0.01966739073395729 2023-01-24 04:32:19.331320: step: 642/459, loss: 0.005365303251892328 2023-01-24 04:32:20.084480: step: 644/459, loss: 0.03606148809194565 2023-01-24 04:32:20.624227: step: 646/459, loss: 0.051633402705192566 2023-01-24 04:32:21.248198: step: 648/459, loss: 0.0016385283088311553 2023-01-24 04:32:21.904703: step: 650/459, loss: 0.024624301120638847 2023-01-24 04:32:22.485082: step: 652/459, loss: 0.09078668802976608 2023-01-24 04:32:23.089682: step: 654/459, loss: 0.005002248100936413 2023-01-24 04:32:23.703051: step: 656/459, loss: 0.0001488471170887351 2023-01-24 04:32:24.273289: step: 658/459, loss: 0.0009805815061554313 2023-01-24 04:32:24.864351: step: 660/459, loss: 0.039375800639390945 2023-01-24 04:32:25.584723: step: 662/459, loss: 5.047353988629766e-05 2023-01-24 04:32:26.186203: step: 664/459, loss: 0.0008290797122754157 2023-01-24 04:32:26.790334: step: 666/459, loss: 0.055233895778656006 2023-01-24 04:32:27.357857: step: 668/459, loss: 1.1785874366760254 2023-01-24 04:32:27.957918: step: 670/459, loss: 0.033286720514297485 2023-01-24 04:32:28.666311: step: 672/459, loss: 0.004510761238634586 2023-01-24 04:32:29.280731: step: 674/459, loss: 0.024194559082388878 2023-01-24 04:32:29.896551: step: 676/459, loss: 0.013155006803572178 2023-01-24 04:32:30.498500: step: 678/459, loss: 0.001811149064451456 2023-01-24 04:32:31.070243: step: 680/459, loss: 0.0017380946082994342 2023-01-24 04:32:31.705634: step: 682/459, loss: 0.0021697324700653553 2023-01-24 04:32:32.317411: step: 684/459, loss: 0.015718698501586914 2023-01-24 04:32:32.958155: step: 686/459, loss: 0.0249358918517828 2023-01-24 04:32:33.537686: step: 688/459, loss: 0.0004848464159294963 2023-01-24 04:32:34.143924: step: 690/459, loss: 0.009101547300815582 2023-01-24 04:32:34.752610: step: 692/459, loss: 0.0028964218217879534 2023-01-24 04:32:35.351679: step: 694/459, loss: 0.10693230479955673 2023-01-24 04:32:35.940520: step: 696/459, loss: 0.00016339562716893852 2023-01-24 04:32:36.564348: step: 698/459, loss: 0.0069610681384801865 2023-01-24 04:32:37.226382: step: 700/459, loss: 0.0107971066609025 2023-01-24 04:32:37.861685: step: 702/459, loss: 0.00509782275184989 2023-01-24 04:32:38.495704: step: 704/459, loss: 0.008227599784731865 2023-01-24 04:32:39.134405: step: 706/459, loss: 0.014900639653205872 2023-01-24 04:32:39.721067: step: 708/459, loss: 0.017886167392134666 2023-01-24 04:32:40.381504: step: 710/459, loss: 0.010234535671770573 2023-01-24 04:32:40.961096: step: 712/459, loss: 0.000810798432212323 2023-01-24 04:32:41.591897: step: 714/459, loss: 0.002969609573483467 2023-01-24 04:32:42.241521: step: 716/459, loss: 0.010229718871414661 2023-01-24 04:32:42.826946: step: 718/459, loss: 0.011152852326631546 2023-01-24 04:32:43.436877: step: 720/459, loss: 0.10911131650209427 2023-01-24 04:32:44.099753: step: 722/459, loss: 0.0023292843252420425 2023-01-24 04:32:44.692798: step: 724/459, loss: 0.0007878006435930729 2023-01-24 04:32:45.396071: step: 726/459, loss: 8.206626892089844 2023-01-24 04:32:46.062246: step: 728/459, loss: 0.040383972227573395 2023-01-24 04:32:46.743195: step: 730/459, loss: 0.004701733123511076 2023-01-24 04:32:47.304094: step: 732/459, loss: 0.0004447000101208687 2023-01-24 04:32:47.907166: step: 734/459, loss: 0.01699916645884514 2023-01-24 04:32:48.476277: step: 736/459, loss: 0.07539788633584976 2023-01-24 04:32:49.109385: step: 738/459, loss: 0.0004797333385795355 2023-01-24 04:32:49.717558: step: 740/459, loss: 0.009170517325401306 2023-01-24 04:32:50.347152: step: 742/459, loss: 0.0036405131686478853 2023-01-24 04:32:50.950185: step: 744/459, loss: 0.0006810509948991239 2023-01-24 04:32:51.575017: step: 746/459, loss: 0.015100349672138691 2023-01-24 04:32:52.190784: step: 748/459, loss: 0.007425876799970865 2023-01-24 04:32:52.824518: step: 750/459, loss: 0.017804279923439026 2023-01-24 04:32:53.454166: step: 752/459, loss: 8.804678145679645e-06 2023-01-24 04:32:54.045466: step: 754/459, loss: 0.008303077891469002 2023-01-24 04:32:54.640557: step: 756/459, loss: 0.001806853455491364 2023-01-24 04:32:55.330110: step: 758/459, loss: 0.02249007858335972 2023-01-24 04:32:55.913419: step: 760/459, loss: 0.0021900900173932314 2023-01-24 04:32:56.556028: step: 762/459, loss: 0.0042450400069355965 2023-01-24 04:32:57.173610: step: 764/459, loss: 0.017616234719753265 2023-01-24 04:32:57.767876: step: 766/459, loss: 0.04355158656835556 2023-01-24 04:32:58.357565: step: 768/459, loss: 0.0041880435310304165 2023-01-24 04:32:58.971202: step: 770/459, loss: 0.0008380370563827455 2023-01-24 04:32:59.623220: step: 772/459, loss: 0.0011518514947965741 2023-01-24 04:33:00.247270: step: 774/459, loss: 0.0003793223004322499 2023-01-24 04:33:00.883550: step: 776/459, loss: 0.020669469609856606 2023-01-24 04:33:01.524082: step: 778/459, loss: 0.003151737619191408 2023-01-24 04:33:02.155166: step: 780/459, loss: 0.004954976495355368 2023-01-24 04:33:02.848695: step: 782/459, loss: 0.014988631941378117 2023-01-24 04:33:03.495688: step: 784/459, loss: 0.007383387070149183 2023-01-24 04:33:04.129585: step: 786/459, loss: 0.002992489608004689 2023-01-24 04:33:04.867656: step: 788/459, loss: 0.00763659505173564 2023-01-24 04:33:05.507176: step: 790/459, loss: 0.05018918588757515 2023-01-24 04:33:06.133186: step: 792/459, loss: 0.026869846507906914 2023-01-24 04:33:06.708442: step: 794/459, loss: 0.05841248482465744 2023-01-24 04:33:07.260708: step: 796/459, loss: 0.008042354136705399 2023-01-24 04:33:07.894973: step: 798/459, loss: 0.024051323533058167 2023-01-24 04:33:08.519159: step: 800/459, loss: 0.0008385946275666356 2023-01-24 04:33:09.135473: step: 802/459, loss: 0.020715363323688507 2023-01-24 04:33:09.700533: step: 804/459, loss: 0.06906284391880035 2023-01-24 04:33:10.322138: step: 806/459, loss: 1.970559787878301e-05 2023-01-24 04:33:10.892228: step: 808/459, loss: 0.04037405177950859 2023-01-24 04:33:11.472744: step: 810/459, loss: 0.0006657655467279255 2023-01-24 04:33:12.107444: step: 812/459, loss: 0.025657007470726967 2023-01-24 04:33:12.721797: step: 814/459, loss: 0.0005021268734708428 2023-01-24 04:33:13.257966: step: 816/459, loss: 0.000671935616992414 2023-01-24 04:33:13.843599: step: 818/459, loss: 0.03148636966943741 2023-01-24 04:33:14.497518: step: 820/459, loss: 9.659124374389648 2023-01-24 04:33:15.152726: step: 822/459, loss: 0.004403162747621536 2023-01-24 04:33:15.781704: step: 824/459, loss: 0.005052903201431036 2023-01-24 04:33:16.397823: step: 826/459, loss: 0.025097746402025223 2023-01-24 04:33:16.999369: step: 828/459, loss: 0.005435672122985125 2023-01-24 04:33:17.575167: step: 830/459, loss: 0.002869299380108714 2023-01-24 04:33:18.182973: step: 832/459, loss: 0.017476307228207588 2023-01-24 04:33:18.836658: step: 834/459, loss: 0.0065192896872758865 2023-01-24 04:33:19.471402: step: 836/459, loss: 0.005940665956586599 2023-01-24 04:33:20.103068: step: 838/459, loss: 0.04171653464436531 2023-01-24 04:33:20.749438: step: 840/459, loss: 0.01645841635763645 2023-01-24 04:33:21.336892: step: 842/459, loss: 0.0001641708513488993 2023-01-24 04:33:21.955393: step: 844/459, loss: 0.0018015153473243117 2023-01-24 04:33:22.522466: step: 846/459, loss: 0.0007528150454163551 2023-01-24 04:33:23.138462: step: 848/459, loss: 0.005731223151087761 2023-01-24 04:33:23.864479: step: 850/459, loss: 0.006987154018133879 2023-01-24 04:33:24.553402: step: 852/459, loss: 0.0030698301270604134 2023-01-24 04:33:25.180876: step: 854/459, loss: 0.010360033251345158 2023-01-24 04:33:25.758163: step: 856/459, loss: 0.0007127286517061293 2023-01-24 04:33:26.418934: step: 858/459, loss: 0.0034292966593056917 2023-01-24 04:33:27.047309: step: 860/459, loss: 0.0009398258989676833 2023-01-24 04:33:27.668202: step: 862/459, loss: 0.02497098781168461 2023-01-24 04:33:28.306597: step: 864/459, loss: 0.007510879077017307 2023-01-24 04:33:28.972888: step: 866/459, loss: 0.027134273201227188 2023-01-24 04:33:29.642531: step: 868/459, loss: 0.009392634965479374 2023-01-24 04:33:30.225349: step: 870/459, loss: 6.741960532963276e-05 2023-01-24 04:33:30.791758: step: 872/459, loss: 0.010615981183946133 2023-01-24 04:33:31.375842: step: 874/459, loss: 0.004063708707690239 2023-01-24 04:33:31.993249: step: 876/459, loss: 7.649148028576747e-05 2023-01-24 04:33:32.605116: step: 878/459, loss: 0.03218012675642967 2023-01-24 04:33:33.225697: step: 880/459, loss: 0.01850729249417782 2023-01-24 04:33:33.812636: step: 882/459, loss: 0.002345553133636713 2023-01-24 04:33:34.442722: step: 884/459, loss: 0.016875751316547394 2023-01-24 04:33:35.030176: step: 886/459, loss: 0.0034178677015006542 2023-01-24 04:33:35.641451: step: 888/459, loss: 0.002932592760771513 2023-01-24 04:33:36.242882: step: 890/459, loss: 0.006710506044328213 2023-01-24 04:33:36.885298: step: 892/459, loss: 0.006927433889359236 2023-01-24 04:33:37.554460: step: 894/459, loss: 0.04131027311086655 2023-01-24 04:33:38.186122: step: 896/459, loss: 0.0086074098944664 2023-01-24 04:33:38.789599: step: 898/459, loss: 0.0001832855778047815 2023-01-24 04:33:39.470453: step: 900/459, loss: 0.014036196283996105 2023-01-24 04:33:40.024825: step: 902/459, loss: 0.00028014485724270344 2023-01-24 04:33:40.627576: step: 904/459, loss: 0.0007629635510966182 2023-01-24 04:33:41.228847: step: 906/459, loss: 0.0018935124389827251 2023-01-24 04:33:41.801916: step: 908/459, loss: 0.0006282848189584911 2023-01-24 04:33:42.412659: step: 910/459, loss: 0.017525063827633858 2023-01-24 04:33:43.013294: step: 912/459, loss: 0.020025871694087982 2023-01-24 04:33:43.747255: step: 914/459, loss: 0.01022512186318636 2023-01-24 04:33:44.426009: step: 916/459, loss: 0.001703424728475511 2023-01-24 04:33:45.019235: step: 918/459, loss: 0.0035982858389616013 2023-01-24 04:33:45.431643: step: 920/459, loss: 6.0297166783129796e-05 ================================================== Loss: 0.079 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3332947893188079, 'r': 0.30609995831177045, 'f1': 0.31911904654857176}, 'combined': 0.2351403500884213, 'epoch': 36} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3487931053824366, 'r': 0.2999176383861716, 'f1': 0.3225141727851297}, 'combined': 0.206409070582483, 'epoch': 36} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.32683944032158313, 'r': 0.30389245874302795, 'f1': 0.3149485265635707}, 'combined': 0.23206733536263102, 'epoch': 36} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3518796412688924, 'r': 0.29557889866586956, 'f1': 0.3212814115933365}, 'combined': 0.20562010341973533, 'epoch': 36} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3505335470054708, 'r': 0.30663370999909306, 'f1': 0.32711733839984214}, 'combined': 0.2410338282946205, 'epoch': 36} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.35077422620378895, 'r': 0.3104847526379937, 'f1': 0.3294021021700958}, 'combined': 0.2361750921219555, 'epoch': 36} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.30176767676767674, 'r': 0.2845238095238095, 'f1': 0.29289215686274506}, 'combined': 0.1952614379084967, 'epoch': 36} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2604166666666667, 'r': 0.2717391304347826, 'f1': 0.2659574468085107}, 'combined': 0.13297872340425534, 'epoch': 36} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 36} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 37 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:36:21.392486: step: 2/459, loss: 0.0845658928155899 2023-01-24 04:36:22.013380: step: 4/459, loss: 0.010524159297347069 2023-01-24 04:36:22.619755: step: 6/459, loss: 0.024230975657701492 2023-01-24 04:36:23.198823: step: 8/459, loss: 0.01835322007536888 2023-01-24 04:36:23.922950: step: 10/459, loss: 0.006538672372698784 2023-01-24 04:36:24.528162: step: 12/459, loss: 0.017072521150112152 2023-01-24 04:36:25.130739: step: 14/459, loss: 0.015425873920321465 2023-01-24 04:36:25.722744: step: 16/459, loss: 0.06353946775197983 2023-01-24 04:36:26.327827: step: 18/459, loss: 0.0035211779177188873 2023-01-24 04:36:26.917171: step: 20/459, loss: 0.0002895837533287704 2023-01-24 04:36:27.509115: step: 22/459, loss: 0.0003200368955731392 2023-01-24 04:36:28.165632: step: 24/459, loss: 0.00024270275025628507 2023-01-24 04:36:28.725439: step: 26/459, loss: 0.004817814100533724 2023-01-24 04:36:29.353113: step: 28/459, loss: 0.0004700089921243489 2023-01-24 04:36:29.951353: step: 30/459, loss: 9.828879410633817e-05 2023-01-24 04:36:30.606515: step: 32/459, loss: 0.01273773517459631 2023-01-24 04:36:31.208498: step: 34/459, loss: 0.0006472886307165027 2023-01-24 04:36:31.835325: step: 36/459, loss: 0.008762827143073082 2023-01-24 04:36:32.468087: step: 38/459, loss: 0.04321591183543205 2023-01-24 04:36:33.095894: step: 40/459, loss: 0.007898194715380669 2023-01-24 04:36:33.715665: step: 42/459, loss: 0.0012457971461117268 2023-01-24 04:36:34.352076: step: 44/459, loss: 0.00037868740037083626 2023-01-24 04:36:34.923730: step: 46/459, loss: 6.905464033479802e-06 2023-01-24 04:36:35.554534: step: 48/459, loss: 0.025929361581802368 2023-01-24 04:36:36.207565: step: 50/459, loss: 0.003637992776930332 2023-01-24 04:36:36.811144: step: 52/459, loss: 0.08259853720664978 2023-01-24 04:36:37.497312: step: 54/459, loss: 0.006506235338747501 2023-01-24 04:36:38.066472: step: 56/459, loss: 0.006419170647859573 2023-01-24 04:36:38.667575: step: 58/459, loss: 0.012843833304941654 2023-01-24 04:36:39.323181: step: 60/459, loss: 0.0017773209838196635 2023-01-24 04:36:39.910240: step: 62/459, loss: 0.12234918028116226 2023-01-24 04:36:40.463205: step: 64/459, loss: 0.001203038264065981 2023-01-24 04:36:41.079107: step: 66/459, loss: 0.005140964407473803 2023-01-24 04:36:41.738760: step: 68/459, loss: 0.010130183771252632 2023-01-24 04:36:42.311741: step: 70/459, loss: 0.001195016666315496 2023-01-24 04:36:42.895094: step: 72/459, loss: 0.004168039187788963 2023-01-24 04:36:43.470373: step: 74/459, loss: 0.02376437559723854 2023-01-24 04:36:44.109076: step: 76/459, loss: 0.010306114330887794 2023-01-24 04:36:44.711874: step: 78/459, loss: 0.011113472282886505 2023-01-24 04:36:45.330780: step: 80/459, loss: 0.0002074393560178578 2023-01-24 04:36:45.900521: step: 82/459, loss: 0.04770611599087715 2023-01-24 04:36:46.516499: step: 84/459, loss: 0.008293235674500465 2023-01-24 04:36:47.193835: step: 86/459, loss: 0.07282085716724396 2023-01-24 04:36:47.898565: step: 88/459, loss: 0.7962502837181091 2023-01-24 04:36:48.524180: step: 90/459, loss: 0.0005322029464878142 2023-01-24 04:36:49.188712: step: 92/459, loss: 5.694608626072295e-05 2023-01-24 04:36:49.838453: step: 94/459, loss: 0.00218420778401196 2023-01-24 04:36:50.481380: step: 96/459, loss: 0.000470621045678854 2023-01-24 04:36:51.095554: step: 98/459, loss: 1.1314095900161192e-05 2023-01-24 04:36:51.793433: step: 100/459, loss: 0.00601858040317893 2023-01-24 04:36:52.362337: step: 102/459, loss: 0.0061747790314257145 2023-01-24 04:36:52.906807: step: 104/459, loss: 0.0011475422652438283 2023-01-24 04:36:53.475705: step: 106/459, loss: 5.904308636672795e-05 2023-01-24 04:36:54.145111: step: 108/459, loss: 0.0012318614171817899 2023-01-24 04:36:54.743860: step: 110/459, loss: 0.00425692368298769 2023-01-24 04:36:55.341867: step: 112/459, loss: 3.266487328801304e-05 2023-01-24 04:36:55.947769: step: 114/459, loss: 0.005308999679982662 2023-01-24 04:36:56.534650: step: 116/459, loss: 0.013818464241921902 2023-01-24 04:36:57.107811: step: 118/459, loss: 0.004799808841198683 2023-01-24 04:36:57.751144: step: 120/459, loss: 0.00483506266027689 2023-01-24 04:36:58.346201: step: 122/459, loss: 0.0021622106432914734 2023-01-24 04:36:58.957351: step: 124/459, loss: 0.008761425502598286 2023-01-24 04:36:59.698046: step: 126/459, loss: 0.01176671776920557 2023-01-24 04:37:00.305491: step: 128/459, loss: 0.0008731036214157939 2023-01-24 04:37:00.908447: step: 130/459, loss: 0.00024783058324828744 2023-01-24 04:37:01.513906: step: 132/459, loss: 0.002429783344268799 2023-01-24 04:37:02.064756: step: 134/459, loss: 4.033245915024963e-08 2023-01-24 04:37:02.711161: step: 136/459, loss: 0.011369403451681137 2023-01-24 04:37:03.357477: step: 138/459, loss: 0.004212386440485716 2023-01-24 04:37:03.993929: step: 140/459, loss: 0.005192221608012915 2023-01-24 04:37:04.641698: step: 142/459, loss: 0.002043794607743621 2023-01-24 04:37:05.281882: step: 144/459, loss: 0.38429057598114014 2023-01-24 04:37:05.855110: step: 146/459, loss: 0.003391411853954196 2023-01-24 04:37:06.432804: step: 148/459, loss: 0.0017324741929769516 2023-01-24 04:37:07.023705: step: 150/459, loss: 0.0002939398109447211 2023-01-24 04:37:07.652531: step: 152/459, loss: 0.012137962505221367 2023-01-24 04:37:08.225294: step: 154/459, loss: 0.0001350355742033571 2023-01-24 04:37:08.889250: step: 156/459, loss: 0.0002021961408900097 2023-01-24 04:37:09.523324: step: 158/459, loss: 0.0175931453704834 2023-01-24 04:37:10.089890: step: 160/459, loss: 0.0003844044404104352 2023-01-24 04:37:10.623616: step: 162/459, loss: 0.008105112239718437 2023-01-24 04:37:11.274789: step: 164/459, loss: 0.033070534467697144 2023-01-24 04:37:11.862248: step: 166/459, loss: 0.0020288678351789713 2023-01-24 04:37:12.518869: step: 168/459, loss: 0.004868582356721163 2023-01-24 04:37:13.114656: step: 170/459, loss: 0.004508410580456257 2023-01-24 04:37:13.701745: step: 172/459, loss: 0.0025724186562001705 2023-01-24 04:37:14.362767: step: 174/459, loss: 0.00017958796524908394 2023-01-24 04:37:14.976561: step: 176/459, loss: 0.03385094925761223 2023-01-24 04:37:15.546423: step: 178/459, loss: 0.0006327611627057195 2023-01-24 04:37:16.284017: step: 180/459, loss: 0.024419410154223442 2023-01-24 04:37:16.877418: step: 182/459, loss: 0.002385437488555908 2023-01-24 04:37:17.440814: step: 184/459, loss: 0.00010047860268969089 2023-01-24 04:37:18.134462: step: 186/459, loss: 0.00018871155043598264 2023-01-24 04:37:18.756950: step: 188/459, loss: 0.010431020520627499 2023-01-24 04:37:19.346865: step: 190/459, loss: 0.026556286960840225 2023-01-24 04:37:20.034965: step: 192/459, loss: 0.0015658233314752579 2023-01-24 04:37:20.651006: step: 194/459, loss: 0.0002692946291062981 2023-01-24 04:37:21.308341: step: 196/459, loss: 0.00908880215138197 2023-01-24 04:37:21.906992: step: 198/459, loss: 0.0002651949180290103 2023-01-24 04:37:22.579961: step: 200/459, loss: 0.08672084659337997 2023-01-24 04:37:23.240226: step: 202/459, loss: 0.009103396907448769 2023-01-24 04:37:23.936117: step: 204/459, loss: 0.0003539550816640258 2023-01-24 04:37:24.514322: step: 206/459, loss: 0.009181282483041286 2023-01-24 04:37:25.022787: step: 208/459, loss: 0.0002054852229775861 2023-01-24 04:37:25.609953: step: 210/459, loss: 0.01570533961057663 2023-01-24 04:37:26.223044: step: 212/459, loss: 0.04378151893615723 2023-01-24 04:37:26.768450: step: 214/459, loss: 0.002880700631067157 2023-01-24 04:37:27.410633: step: 216/459, loss: 0.004475221503525972 2023-01-24 04:37:28.053488: step: 218/459, loss: 0.007917182520031929 2023-01-24 04:37:28.669712: step: 220/459, loss: 0.02076559327542782 2023-01-24 04:37:29.341081: step: 222/459, loss: 0.008028347976505756 2023-01-24 04:37:29.968516: step: 224/459, loss: 0.009565864689648151 2023-01-24 04:37:30.611434: step: 226/459, loss: 0.5993791818618774 2023-01-24 04:37:31.234743: step: 228/459, loss: 0.040289975702762604 2023-01-24 04:37:31.786031: step: 230/459, loss: 0.04129711911082268 2023-01-24 04:37:32.433402: step: 232/459, loss: 0.001254140050150454 2023-01-24 04:37:33.059046: step: 234/459, loss: 0.05847727879881859 2023-01-24 04:37:33.701412: step: 236/459, loss: 0.015298042446374893 2023-01-24 04:37:34.315018: step: 238/459, loss: 0.029059110209345818 2023-01-24 04:37:34.914958: step: 240/459, loss: 0.01876538060605526 2023-01-24 04:37:35.580358: step: 242/459, loss: 0.038981515914201736 2023-01-24 04:37:36.185805: step: 244/459, loss: 0.018040310591459274 2023-01-24 04:37:36.775786: step: 246/459, loss: 0.027672402560710907 2023-01-24 04:37:37.391102: step: 248/459, loss: 0.003468393348157406 2023-01-24 04:37:37.976684: step: 250/459, loss: 0.001241041230969131 2023-01-24 04:37:38.547013: step: 252/459, loss: 7.702851144131273e-05 2023-01-24 04:37:39.171998: step: 254/459, loss: 0.03848903998732567 2023-01-24 04:37:39.741003: step: 256/459, loss: 0.001014201669022441 2023-01-24 04:37:40.353705: step: 258/459, loss: 0.0009449631324969232 2023-01-24 04:37:40.863634: step: 260/459, loss: 0.0017423179233446717 2023-01-24 04:37:41.427297: step: 262/459, loss: 0.0005130588542670012 2023-01-24 04:37:42.010703: step: 264/459, loss: 0.003513834672048688 2023-01-24 04:37:42.684750: step: 266/459, loss: 0.04773363098502159 2023-01-24 04:37:43.359194: step: 268/459, loss: 0.019732559099793434 2023-01-24 04:37:43.959136: step: 270/459, loss: 0.0009545194334350526 2023-01-24 04:37:44.593936: step: 272/459, loss: 0.0638323500752449 2023-01-24 04:37:45.269019: step: 274/459, loss: 0.008173511363565922 2023-01-24 04:37:45.880658: step: 276/459, loss: 0.0021535195410251617 2023-01-24 04:37:46.464760: step: 278/459, loss: 0.0010618313681334257 2023-01-24 04:37:47.012430: step: 280/459, loss: 0.00841524451971054 2023-01-24 04:37:47.552410: step: 282/459, loss: 0.020849186927080154 2023-01-24 04:37:48.243249: step: 284/459, loss: 0.005527608562260866 2023-01-24 04:37:48.818195: step: 286/459, loss: 0.029451211914420128 2023-01-24 04:37:49.607399: step: 288/459, loss: 0.9361441135406494 2023-01-24 04:37:50.179449: step: 290/459, loss: 0.0010172192705795169 2023-01-24 04:37:50.847050: step: 292/459, loss: 0.0002928987087216228 2023-01-24 04:37:51.409025: step: 294/459, loss: 0.01804727502167225 2023-01-24 04:37:52.120427: step: 296/459, loss: 0.06058691442012787 2023-01-24 04:37:52.727620: step: 298/459, loss: 0.0005169869400560856 2023-01-24 04:37:53.316629: step: 300/459, loss: 0.0009404083248227835 2023-01-24 04:37:53.872415: step: 302/459, loss: 0.0016682628775015473 2023-01-24 04:37:54.508636: step: 304/459, loss: 0.00794810801744461 2023-01-24 04:37:55.180771: step: 306/459, loss: 0.013779067434370518 2023-01-24 04:37:55.769311: step: 308/459, loss: 0.001530387788079679 2023-01-24 04:37:56.291127: step: 310/459, loss: 0.0001941329101100564 2023-01-24 04:37:56.916632: step: 312/459, loss: 0.0035321807954460382 2023-01-24 04:37:57.544481: step: 314/459, loss: 0.009791722521185875 2023-01-24 04:37:58.230514: step: 316/459, loss: 0.0018827795283868909 2023-01-24 04:37:58.810842: step: 318/459, loss: 0.008783062919974327 2023-01-24 04:37:59.410381: step: 320/459, loss: 0.04355006664991379 2023-01-24 04:38:00.051324: step: 322/459, loss: 7.817844561941456e-06 2023-01-24 04:38:00.659814: step: 324/459, loss: 0.0068744332529604435 2023-01-24 04:38:01.234489: step: 326/459, loss: 0.025627896189689636 2023-01-24 04:38:01.852840: step: 328/459, loss: 0.0036619813181459904 2023-01-24 04:38:02.490082: step: 330/459, loss: 0.0011933299247175455 2023-01-24 04:38:03.089756: step: 332/459, loss: 0.018695339560508728 2023-01-24 04:38:03.699481: step: 334/459, loss: 0.16675828397274017 2023-01-24 04:38:04.248064: step: 336/459, loss: 0.035050809383392334 2023-01-24 04:38:04.864006: step: 338/459, loss: 0.032732270658016205 2023-01-24 04:38:05.534221: step: 340/459, loss: 0.009443212300539017 2023-01-24 04:38:06.145336: step: 342/459, loss: 0.07697644829750061 2023-01-24 04:38:06.746186: step: 344/459, loss: 0.0007812321418896317 2023-01-24 04:38:07.348893: step: 346/459, loss: 0.00639432854950428 2023-01-24 04:38:07.971833: step: 348/459, loss: 0.24167057871818542 2023-01-24 04:38:08.598400: step: 350/459, loss: 0.02992818132042885 2023-01-24 04:38:09.283309: step: 352/459, loss: 2.1297577404766344e-05 2023-01-24 04:38:09.879248: step: 354/459, loss: 0.013324982486665249 2023-01-24 04:38:10.511518: step: 356/459, loss: 0.012247053906321526 2023-01-24 04:38:11.108991: step: 358/459, loss: 0.0005350405699573457 2023-01-24 04:38:11.792023: step: 360/459, loss: 0.012718478217720985 2023-01-24 04:38:12.377257: step: 362/459, loss: 0.022495212033391 2023-01-24 04:38:13.013135: step: 364/459, loss: 0.12225610762834549 2023-01-24 04:38:13.664156: step: 366/459, loss: 0.0011511478805914521 2023-01-24 04:38:14.234221: step: 368/459, loss: 0.003797589335590601 2023-01-24 04:38:14.844199: step: 370/459, loss: 0.00013184535782784224 2023-01-24 04:38:15.467050: step: 372/459, loss: 0.02141871117055416 2023-01-24 04:38:16.102202: step: 374/459, loss: 7.576491498184623e-06 2023-01-24 04:38:16.715884: step: 376/459, loss: 0.0020160104613751173 2023-01-24 04:38:17.390277: step: 378/459, loss: 0.0031293306965380907 2023-01-24 04:38:18.022598: step: 380/459, loss: 0.0611104741692543 2023-01-24 04:38:18.704747: step: 382/459, loss: 0.011189088225364685 2023-01-24 04:38:19.414176: step: 384/459, loss: 0.0008674098644405603 2023-01-24 04:38:20.027792: step: 386/459, loss: 0.0009926428319886327 2023-01-24 04:38:20.570919: step: 388/459, loss: 0.0029639508575201035 2023-01-24 04:38:21.165218: step: 390/459, loss: 0.000218678978853859 2023-01-24 04:38:21.793527: step: 392/459, loss: 0.012011227197945118 2023-01-24 04:38:22.394119: step: 394/459, loss: 0.000576618651393801 2023-01-24 04:38:23.012346: step: 396/459, loss: 0.004989030305296183 2023-01-24 04:38:23.652829: step: 398/459, loss: 0.007021021097898483 2023-01-24 04:38:24.278025: step: 400/459, loss: 0.02111729048192501 2023-01-24 04:38:24.896332: step: 402/459, loss: 0.0002560717111919075 2023-01-24 04:38:25.569335: step: 404/459, loss: 0.0034382985904812813 2023-01-24 04:38:26.171632: step: 406/459, loss: 0.016178522258996964 2023-01-24 04:38:26.816079: step: 408/459, loss: 0.014478093944489956 2023-01-24 04:38:27.371267: step: 410/459, loss: 0.0021790703758597374 2023-01-24 04:38:28.025015: step: 412/459, loss: 0.025147361680865288 2023-01-24 04:38:28.599434: step: 414/459, loss: 0.0003935422864742577 2023-01-24 04:38:29.212598: step: 416/459, loss: 0.0042432197369635105 2023-01-24 04:38:29.879815: step: 418/459, loss: 0.0017116704257205129 2023-01-24 04:38:30.507019: step: 420/459, loss: 0.011598408222198486 2023-01-24 04:38:31.059745: step: 422/459, loss: 0.0023979865945875645 2023-01-24 04:38:31.651284: step: 424/459, loss: 0.0007135843043215573 2023-01-24 04:38:32.329909: step: 426/459, loss: 0.008983643725514412 2023-01-24 04:38:32.920969: step: 428/459, loss: 0.026874518021941185 2023-01-24 04:38:33.589898: step: 430/459, loss: 0.059363797307014465 2023-01-24 04:38:34.206048: step: 432/459, loss: 0.016469383612275124 2023-01-24 04:38:34.764293: step: 434/459, loss: 0.0006043367320671678 2023-01-24 04:38:35.364562: step: 436/459, loss: 0.0032410267740488052 2023-01-24 04:38:36.062570: step: 438/459, loss: 0.002992602763697505 2023-01-24 04:38:36.647228: step: 440/459, loss: 0.009719884023070335 2023-01-24 04:38:37.271329: step: 442/459, loss: 0.015783706679940224 2023-01-24 04:38:37.890186: step: 444/459, loss: 0.00017010842566378415 2023-01-24 04:38:38.486693: step: 446/459, loss: 0.9769845604896545 2023-01-24 04:38:39.129574: step: 448/459, loss: 0.0003004646860063076 2023-01-24 04:38:39.743691: step: 450/459, loss: 0.0007590157329104841 2023-01-24 04:38:40.463421: step: 452/459, loss: 0.01277488935738802 2023-01-24 04:38:41.078012: step: 454/459, loss: 0.001252705929800868 2023-01-24 04:38:41.698578: step: 456/459, loss: 0.012632491067051888 2023-01-24 04:38:42.260718: step: 458/459, loss: 0.0001949947909452021 2023-01-24 04:38:42.839184: step: 460/459, loss: 0.0031994490418583155 2023-01-24 04:38:43.409452: step: 462/459, loss: 0.01142347976565361 2023-01-24 04:38:44.027029: step: 464/459, loss: 0.010232629254460335 2023-01-24 04:38:44.610929: step: 466/459, loss: 0.0006446518236771226 2023-01-24 04:38:45.159019: step: 468/459, loss: 0.0017425182741135359 2023-01-24 04:38:45.727253: step: 470/459, loss: 0.00948069803416729 2023-01-24 04:38:46.383826: step: 472/459, loss: 0.0009360662661492825 2023-01-24 04:38:46.987198: step: 474/459, loss: 0.058396000415086746 2023-01-24 04:38:47.630477: step: 476/459, loss: 0.005213453900068998 2023-01-24 04:38:48.288606: step: 478/459, loss: 0.007146898657083511 2023-01-24 04:38:48.877751: step: 480/459, loss: 0.019208943471312523 2023-01-24 04:38:49.457696: step: 482/459, loss: 0.003046802943572402 2023-01-24 04:38:50.087397: step: 484/459, loss: 5.291487468639389e-05 2023-01-24 04:38:50.761174: step: 486/459, loss: 0.0040151155553758144 2023-01-24 04:38:51.383524: step: 488/459, loss: 0.0019686571322381496 2023-01-24 04:38:52.026438: step: 490/459, loss: 0.669100821018219 2023-01-24 04:38:52.692021: step: 492/459, loss: 0.0008384776301681995 2023-01-24 04:38:53.409831: step: 494/459, loss: 0.06838797777891159 2023-01-24 04:38:54.067094: step: 496/459, loss: 0.007483388297259808 2023-01-24 04:38:54.627671: step: 498/459, loss: 0.00115564267616719 2023-01-24 04:38:55.201513: step: 500/459, loss: 0.0010565196862444282 2023-01-24 04:38:55.816329: step: 502/459, loss: 0.0025863845366984606 2023-01-24 04:38:56.463062: step: 504/459, loss: 0.007685751188546419 2023-01-24 04:38:57.145314: step: 506/459, loss: 0.03229769319295883 2023-01-24 04:38:57.744582: step: 508/459, loss: 0.04893651232123375 2023-01-24 04:38:58.462841: step: 510/459, loss: 0.10546787083148956 2023-01-24 04:38:59.071349: step: 512/459, loss: 0.012700558640062809 2023-01-24 04:38:59.639007: step: 514/459, loss: 0.0004008671676274389 2023-01-24 04:39:00.221927: step: 516/459, loss: 0.6161071062088013 2023-01-24 04:39:00.826493: step: 518/459, loss: 0.0007814464042894542 2023-01-24 04:39:01.425722: step: 520/459, loss: 0.0007691181963309646 2023-01-24 04:39:02.042290: step: 522/459, loss: 0.004617839120328426 2023-01-24 04:39:02.804331: step: 524/459, loss: 0.05960646644234657 2023-01-24 04:39:03.424889: step: 526/459, loss: 0.001557512441650033 2023-01-24 04:39:04.055062: step: 528/459, loss: 0.0014496243093162775 2023-01-24 04:39:04.671881: step: 530/459, loss: 0.00011108910257462412 2023-01-24 04:39:05.360731: step: 532/459, loss: 0.0755477100610733 2023-01-24 04:39:06.052324: step: 534/459, loss: 0.012720881029963493 2023-01-24 04:39:06.650810: step: 536/459, loss: 0.00021742160606663674 2023-01-24 04:39:07.246226: step: 538/459, loss: 0.00023129222972784191 2023-01-24 04:39:07.932727: step: 540/459, loss: 0.04516943171620369 2023-01-24 04:39:08.537651: step: 542/459, loss: 0.003452478675171733 2023-01-24 04:39:09.274683: step: 544/459, loss: 0.09104634821414948 2023-01-24 04:39:09.884228: step: 546/459, loss: 0.0003176217433065176 2023-01-24 04:39:10.545115: step: 548/459, loss: 0.0001527996500954032 2023-01-24 04:39:11.182988: step: 550/459, loss: 0.00157471455167979 2023-01-24 04:39:11.746096: step: 552/459, loss: 0.006081617437303066 2023-01-24 04:39:12.316126: step: 554/459, loss: 6.992001726757735e-05 2023-01-24 04:39:12.929506: step: 556/459, loss: 0.02144644781947136 2023-01-24 04:39:13.569091: step: 558/459, loss: 0.0030101831071078777 2023-01-24 04:39:14.155034: step: 560/459, loss: 1.4267042875289917 2023-01-24 04:39:14.844573: step: 562/459, loss: 0.0023684201296418905 2023-01-24 04:39:15.445511: step: 564/459, loss: 0.0004434739239513874 2023-01-24 04:39:15.964167: step: 566/459, loss: 0.00634642131626606 2023-01-24 04:39:16.568582: step: 568/459, loss: 0.00215476518496871 2023-01-24 04:39:17.175551: step: 570/459, loss: 0.0017907669534906745 2023-01-24 04:39:17.812004: step: 572/459, loss: 0.0031463245395570993 2023-01-24 04:39:18.463883: step: 574/459, loss: 0.03281570225954056 2023-01-24 04:39:19.090549: step: 576/459, loss: 0.0014144801534712315 2023-01-24 04:39:19.804520: step: 578/459, loss: 0.01878325268626213 2023-01-24 04:39:20.443945: step: 580/459, loss: 0.0004561760288197547 2023-01-24 04:39:21.040111: step: 582/459, loss: 0.02401587925851345 2023-01-24 04:39:21.736937: step: 584/459, loss: 0.2332606464624405 2023-01-24 04:39:22.376725: step: 586/459, loss: 0.020973509177565575 2023-01-24 04:39:22.982193: step: 588/459, loss: 0.26645344495773315 2023-01-24 04:39:23.562118: step: 590/459, loss: 0.0050672139041125774 2023-01-24 04:39:24.226360: step: 592/459, loss: 0.017061831429600716 2023-01-24 04:39:24.834261: step: 594/459, loss: 0.009058725088834763 2023-01-24 04:39:25.447357: step: 596/459, loss: 0.02123449742794037 2023-01-24 04:39:26.052487: step: 598/459, loss: 0.007377667352557182 2023-01-24 04:39:26.670688: step: 600/459, loss: 0.10740899294614792 2023-01-24 04:39:27.304990: step: 602/459, loss: 0.0019208575831726193 2023-01-24 04:39:27.896047: step: 604/459, loss: 3.341092087794095e-05 2023-01-24 04:39:28.539795: step: 606/459, loss: 0.010050149634480476 2023-01-24 04:39:29.128989: step: 608/459, loss: 4.053391456604004 2023-01-24 04:39:29.796205: step: 610/459, loss: 0.0027494262903928757 2023-01-24 04:39:30.373527: step: 612/459, loss: 0.025546642020344734 2023-01-24 04:39:30.985659: step: 614/459, loss: 0.0018459176644682884 2023-01-24 04:39:31.700542: step: 616/459, loss: 0.016919294372200966 2023-01-24 04:39:32.318822: step: 618/459, loss: 0.0010914209997281432 2023-01-24 04:39:32.948949: step: 620/459, loss: 0.009809660725295544 2023-01-24 04:39:33.611331: step: 622/459, loss: 0.0056499093770980835 2023-01-24 04:39:34.150136: step: 624/459, loss: 0.0173663180321455 2023-01-24 04:39:34.797175: step: 626/459, loss: 6.125008803792298e-05 2023-01-24 04:39:35.455033: step: 628/459, loss: 0.028099648654460907 2023-01-24 04:39:36.059109: step: 630/459, loss: 0.0007989624282345176 2023-01-24 04:39:36.676634: step: 632/459, loss: 0.0004536173364613205 2023-01-24 04:39:37.286960: step: 634/459, loss: 0.0018901531584560871 2023-01-24 04:39:37.912367: step: 636/459, loss: 0.0012697234051302075 2023-01-24 04:39:38.561223: step: 638/459, loss: 0.0012942919274792075 2023-01-24 04:39:39.232110: step: 640/459, loss: 0.006429359316825867 2023-01-24 04:39:39.764598: step: 642/459, loss: 0.014272877015173435 2023-01-24 04:39:40.337778: step: 644/459, loss: 0.0025483951903879642 2023-01-24 04:39:41.008918: step: 646/459, loss: 0.0013210250763222575 2023-01-24 04:39:41.655460: step: 648/459, loss: 0.0034712052438408136 2023-01-24 04:39:42.252401: step: 650/459, loss: 0.018589423969388008 2023-01-24 04:39:42.884190: step: 652/459, loss: 0.000395819399273023 2023-01-24 04:39:43.494627: step: 654/459, loss: 0.0001987551077036187 2023-01-24 04:39:44.132849: step: 656/459, loss: 0.013839391991496086 2023-01-24 04:39:44.817077: step: 658/459, loss: 0.005906836595386267 2023-01-24 04:39:45.428002: step: 660/459, loss: 0.0013319941936060786 2023-01-24 04:39:46.033967: step: 662/459, loss: 0.0010848131496459246 2023-01-24 04:39:46.651502: step: 664/459, loss: 0.0009276331402361393 2023-01-24 04:39:47.311183: step: 666/459, loss: 0.0009371109190396965 2023-01-24 04:39:47.907180: step: 668/459, loss: 0.001536969211883843 2023-01-24 04:39:48.527647: step: 670/459, loss: 0.037528954446315765 2023-01-24 04:39:49.119895: step: 672/459, loss: 0.09794749319553375 2023-01-24 04:39:49.763852: step: 674/459, loss: 0.001679448178038001 2023-01-24 04:39:50.360974: step: 676/459, loss: 3.305254358565435e-05 2023-01-24 04:39:51.006004: step: 678/459, loss: 0.009021996520459652 2023-01-24 04:39:51.572719: step: 680/459, loss: 0.016300372779369354 2023-01-24 04:39:52.212564: step: 682/459, loss: 0.021093152463436127 2023-01-24 04:39:52.792227: step: 684/459, loss: 1.4913787254045019e-06 2023-01-24 04:39:53.399027: step: 686/459, loss: 0.00026332007837481797 2023-01-24 04:39:54.031072: step: 688/459, loss: 0.003961898852139711 2023-01-24 04:39:54.617182: step: 690/459, loss: 0.003775527235120535 2023-01-24 04:39:55.228779: step: 692/459, loss: 0.001056589768268168 2023-01-24 04:39:55.896994: step: 694/459, loss: 0.05202684551477432 2023-01-24 04:39:56.560151: step: 696/459, loss: 0.008795591071248055 2023-01-24 04:39:57.187994: step: 698/459, loss: 0.0021731078159064054 2023-01-24 04:39:57.785816: step: 700/459, loss: 0.0005439550150185823 2023-01-24 04:39:58.385355: step: 702/459, loss: 0.010683804750442505 2023-01-24 04:39:59.002763: step: 704/459, loss: 0.0036585223861038685 2023-01-24 04:39:59.564342: step: 706/459, loss: 0.00017024540284182876 2023-01-24 04:40:00.137592: step: 708/459, loss: 0.0016375459963455796 2023-01-24 04:40:00.761375: step: 710/459, loss: 0.018223056569695473 2023-01-24 04:40:01.349929: step: 712/459, loss: 0.0017208644421771169 2023-01-24 04:40:01.985924: step: 714/459, loss: 0.03678359091281891 2023-01-24 04:40:02.602870: step: 716/459, loss: 0.002331193769350648 2023-01-24 04:40:03.169343: step: 718/459, loss: 0.0009210282005369663 2023-01-24 04:40:03.851259: step: 720/459, loss: 0.01759197935461998 2023-01-24 04:40:04.510945: step: 722/459, loss: 0.004081335850059986 2023-01-24 04:40:05.097800: step: 724/459, loss: 0.18271680176258087 2023-01-24 04:40:05.786527: step: 726/459, loss: 0.01559679675847292 2023-01-24 04:40:06.398612: step: 728/459, loss: 0.18506965041160583 2023-01-24 04:40:07.069374: step: 730/459, loss: 0.002070679096505046 2023-01-24 04:40:07.661355: step: 732/459, loss: 0.000695285911206156 2023-01-24 04:40:08.301375: step: 734/459, loss: 0.15064860880374908 2023-01-24 04:40:08.839685: step: 736/459, loss: 0.0008125067106448114 2023-01-24 04:40:09.452149: step: 738/459, loss: 0.002036971040070057 2023-01-24 04:40:10.066411: step: 740/459, loss: 0.002815346233546734 2023-01-24 04:40:10.735891: step: 742/459, loss: 0.05609910190105438 2023-01-24 04:40:11.361385: step: 744/459, loss: 0.26830652356147766 2023-01-24 04:40:12.024700: step: 746/459, loss: 0.008707558736205101 2023-01-24 04:40:12.613163: step: 748/459, loss: 1.760460145305842e-05 2023-01-24 04:40:13.192131: step: 750/459, loss: 0.018494103103876114 2023-01-24 04:40:13.776092: step: 752/459, loss: 0.060805272310972214 2023-01-24 04:40:14.390343: step: 754/459, loss: 0.00019812726532109082 2023-01-24 04:40:15.021065: step: 756/459, loss: 0.008531711995601654 2023-01-24 04:40:15.624958: step: 758/459, loss: 0.0036101131699979305 2023-01-24 04:40:16.273160: step: 760/459, loss: 0.00617488007992506 2023-01-24 04:40:16.884962: step: 762/459, loss: 0.0009289207519032061 2023-01-24 04:40:17.596125: step: 764/459, loss: 0.005276330281049013 2023-01-24 04:40:18.179355: step: 766/459, loss: 0.0010093428427353501 2023-01-24 04:40:18.763536: step: 768/459, loss: 1.3955530448583886e-05 2023-01-24 04:40:19.366037: step: 770/459, loss: 0.0016894082073122263 2023-01-24 04:40:20.031383: step: 772/459, loss: 0.016007862985134125 2023-01-24 04:40:20.657131: step: 774/459, loss: 0.058337338268756866 2023-01-24 04:40:21.340123: step: 776/459, loss: 0.00361677841283381 2023-01-24 04:40:21.908992: step: 778/459, loss: 0.012078329920768738 2023-01-24 04:40:22.519399: step: 780/459, loss: 0.47542497515678406 2023-01-24 04:40:23.146741: step: 782/459, loss: 0.006891090422868729 2023-01-24 04:40:23.753334: step: 784/459, loss: 0.07420102506875992 2023-01-24 04:40:24.383423: step: 786/459, loss: 0.0014907942386344075 2023-01-24 04:40:25.036742: step: 788/459, loss: 0.027720680460333824 2023-01-24 04:40:25.655583: step: 790/459, loss: 0.0009498500730842352 2023-01-24 04:40:26.288737: step: 792/459, loss: 0.038363512605428696 2023-01-24 04:40:26.906751: step: 794/459, loss: 0.0017259068554267287 2023-01-24 04:40:27.543258: step: 796/459, loss: 0.022687537595629692 2023-01-24 04:40:28.184099: step: 798/459, loss: 0.02740909531712532 2023-01-24 04:40:28.845060: step: 800/459, loss: 0.00016171124298125505 2023-01-24 04:40:29.506426: step: 802/459, loss: 0.14217956364154816 2023-01-24 04:40:30.145067: step: 804/459, loss: 0.012720232829451561 2023-01-24 04:40:30.735824: step: 806/459, loss: 0.019477106630802155 2023-01-24 04:40:31.343474: step: 808/459, loss: 0.003998294472694397 2023-01-24 04:40:31.923888: step: 810/459, loss: 0.0015245985705405474 2023-01-24 04:40:32.608968: step: 812/459, loss: 0.00016034294094424695 2023-01-24 04:40:33.253817: step: 814/459, loss: 0.006066071800887585 2023-01-24 04:40:33.849132: step: 816/459, loss: 0.21541063487529755 2023-01-24 04:40:34.468844: step: 818/459, loss: 0.005899945739656687 2023-01-24 04:40:35.133593: step: 820/459, loss: 0.199243426322937 2023-01-24 04:40:35.779004: step: 822/459, loss: 0.049882180988788605 2023-01-24 04:40:36.448046: step: 824/459, loss: 0.0034252190962433815 2023-01-24 04:40:37.042587: step: 826/459, loss: 0.17837055027484894 2023-01-24 04:40:37.657392: step: 828/459, loss: 0.05684913694858551 2023-01-24 04:40:38.254071: step: 830/459, loss: 0.00992503296583891 2023-01-24 04:40:38.869449: step: 832/459, loss: 0.0037606919649988413 2023-01-24 04:40:39.457888: step: 834/459, loss: 0.0015769123565405607 2023-01-24 04:40:40.121972: step: 836/459, loss: 0.0030912216752767563 2023-01-24 04:40:40.749503: step: 838/459, loss: 0.024958813562989235 2023-01-24 04:40:41.384114: step: 840/459, loss: 0.012818992137908936 2023-01-24 04:40:42.015687: step: 842/459, loss: 0.007524792104959488 2023-01-24 04:40:42.643404: step: 844/459, loss: 0.0018093986436724663 2023-01-24 04:40:43.345743: step: 846/459, loss: 0.013478119857609272 2023-01-24 04:40:43.977616: step: 848/459, loss: 0.0008694453281350434 2023-01-24 04:40:44.568567: step: 850/459, loss: 0.000698177726007998 2023-01-24 04:40:45.166916: step: 852/459, loss: 3.245156767661683e-05 2023-01-24 04:40:45.762283: step: 854/459, loss: 0.0024354516062885523 2023-01-24 04:40:46.370850: step: 856/459, loss: 0.045151785016059875 2023-01-24 04:40:46.984468: step: 858/459, loss: 0.030666081234812737 2023-01-24 04:40:47.598084: step: 860/459, loss: 0.011348927393555641 2023-01-24 04:40:48.234564: step: 862/459, loss: 0.0010695363162085414 2023-01-24 04:40:48.829607: step: 864/459, loss: 0.002486569806933403 2023-01-24 04:40:49.557722: step: 866/459, loss: 0.80592942237854 2023-01-24 04:40:50.270931: step: 868/459, loss: 0.07840249687433243 2023-01-24 04:40:50.921465: step: 870/459, loss: 0.1357606202363968 2023-01-24 04:40:51.537822: step: 872/459, loss: 0.012494809925556183 2023-01-24 04:40:52.276261: step: 874/459, loss: 0.0009725302807055414 2023-01-24 04:40:52.921652: step: 876/459, loss: 0.0023270179517567158 2023-01-24 04:40:53.574187: step: 878/459, loss: 0.002191917970776558 2023-01-24 04:40:54.192058: step: 880/459, loss: 0.017496488988399506 2023-01-24 04:40:54.872310: step: 882/459, loss: 0.000941956415772438 2023-01-24 04:40:55.417072: step: 884/459, loss: 0.012178107164800167 2023-01-24 04:40:56.029147: step: 886/459, loss: 1.0836573665073956e-06 2023-01-24 04:40:56.689444: step: 888/459, loss: 0.0021605845540761948 2023-01-24 04:40:57.283542: step: 890/459, loss: 0.004698346368968487 2023-01-24 04:40:57.847113: step: 892/459, loss: 0.000236643070820719 2023-01-24 04:40:58.519171: step: 894/459, loss: 0.019719738513231277 2023-01-24 04:40:59.107239: step: 896/459, loss: 2.207139550591819e-05 2023-01-24 04:40:59.763129: step: 898/459, loss: 0.08005663752555847 2023-01-24 04:41:00.424055: step: 900/459, loss: 0.032033856958150864 2023-01-24 04:41:01.024633: step: 902/459, loss: 0.030595703050494194 2023-01-24 04:41:01.613140: step: 904/459, loss: 7.657323294552043e-05 2023-01-24 04:41:02.231648: step: 906/459, loss: 0.1443134844303131 2023-01-24 04:41:02.846987: step: 908/459, loss: 0.0005506500601768494 2023-01-24 04:41:03.450604: step: 910/459, loss: 9.178004984278232e-05 2023-01-24 04:41:04.063036: step: 912/459, loss: 0.017389608547091484 2023-01-24 04:41:04.718513: step: 914/459, loss: 0.006783135235309601 2023-01-24 04:41:05.285653: step: 916/459, loss: 0.018257303163409233 2023-01-24 04:41:05.991876: step: 918/459, loss: 0.0016456382581964135 2023-01-24 04:41:06.457002: step: 920/459, loss: 2.9802316170446375e-09 ================================================== Loss: 0.044 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.338758429672447, 'r': 0.33361598671726755, 'f1': 0.3361675430210325}, 'combined': 0.24770240012076078, 'epoch': 37} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.347158623431603, 'r': 0.309602372351275, 'f1': 0.32730668869428403}, 'combined': 0.20947628076434174, 'epoch': 37} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.33051087395237305, 'r': 0.3430539811232411, 'f1': 0.3366656388304433}, 'combined': 0.2480694180855898, 'epoch': 37} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35329336012683327, 'r': 0.30672287174647794, 'f1': 0.32836511817141195}, 'combined': 0.2101536756297036, 'epoch': 37} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35288610223376277, 'r': 0.34150267958106073, 'f1': 0.3471010841643568}, 'combined': 0.2557586935947892, 'epoch': 37} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3620647576340381, 'r': 0.3237440351107275, 'f1': 0.341833778883774}, 'combined': 0.24508836976572476, 'epoch': 37} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2523674242424242, 'r': 0.2884199134199134, 'f1': 0.26919191919191915}, 'combined': 0.17946127946127943, 'epoch': 37} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.25806451612903225, 'r': 0.34782608695652173, 'f1': 0.2962962962962963}, 'combined': 0.14814814814814814, 'epoch': 37} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.13793103448275862, 'f1': 0.1951219512195122}, 'combined': 0.13008130081300812, 'epoch': 37} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 38 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:43:43.565910: step: 2/459, loss: 0.0014588795602321625 2023-01-24 04:43:44.202978: step: 4/459, loss: 0.014984391629695892 2023-01-24 04:43:44.828496: step: 6/459, loss: 0.025396769866347313 2023-01-24 04:43:45.479247: step: 8/459, loss: 0.1787632703781128 2023-01-24 04:43:46.079226: step: 10/459, loss: 0.011263391003012657 2023-01-24 04:43:46.634553: step: 12/459, loss: 0.13782134652137756 2023-01-24 04:43:47.274326: step: 14/459, loss: 0.014883909374475479 2023-01-24 04:43:47.900248: step: 16/459, loss: 0.008274640887975693 2023-01-24 04:43:48.442384: step: 18/459, loss: 0.0021562566980719566 2023-01-24 04:43:49.081021: step: 20/459, loss: 0.0023222854360938072 2023-01-24 04:43:49.732242: step: 22/459, loss: 0.03256942704319954 2023-01-24 04:43:50.307164: step: 24/459, loss: 0.01664215512573719 2023-01-24 04:43:50.834738: step: 26/459, loss: 0.002740604802966118 2023-01-24 04:43:51.426002: step: 28/459, loss: 0.030812274664640427 2023-01-24 04:43:52.014250: step: 30/459, loss: 0.00030716945184394717 2023-01-24 04:43:52.712668: step: 32/459, loss: 0.0030518544372171164 2023-01-24 04:43:53.380482: step: 34/459, loss: 0.008334463462233543 2023-01-24 04:43:53.994793: step: 36/459, loss: 0.0029392181895673275 2023-01-24 04:43:54.619878: step: 38/459, loss: 0.0018594678258523345 2023-01-24 04:43:55.223174: step: 40/459, loss: 6.601489440072328e-05 2023-01-24 04:43:55.813579: step: 42/459, loss: 0.00011029474990209565 2023-01-24 04:43:56.440801: step: 44/459, loss: 0.00467614596709609 2023-01-24 04:43:57.030037: step: 46/459, loss: 0.04723696783185005 2023-01-24 04:43:57.618746: step: 48/459, loss: 0.0006924986373633146 2023-01-24 04:43:58.278832: step: 50/459, loss: 0.0001108682990889065 2023-01-24 04:43:58.900431: step: 52/459, loss: 9.25263884710148e-05 2023-01-24 04:43:59.576640: step: 54/459, loss: 0.025442805141210556 2023-01-24 04:44:00.198093: step: 56/459, loss: 0.028684917837381363 2023-01-24 04:44:00.760969: step: 58/459, loss: 0.00016847239749040455 2023-01-24 04:44:01.423922: step: 60/459, loss: 0.00022740394342690706 2023-01-24 04:44:02.055369: step: 62/459, loss: 0.00801844708621502 2023-01-24 04:44:02.750594: step: 64/459, loss: 0.002169064711779356 2023-01-24 04:44:03.324328: step: 66/459, loss: 0.001618005451746285 2023-01-24 04:44:03.971932: step: 68/459, loss: 0.002066998742520809 2023-01-24 04:44:04.562254: step: 70/459, loss: 0.036660801619291306 2023-01-24 04:44:05.276696: step: 72/459, loss: 0.0028918832540512085 2023-01-24 04:44:05.888276: step: 74/459, loss: 0.005109088961035013 2023-01-24 04:44:06.502610: step: 76/459, loss: 0.002943611005321145 2023-01-24 04:44:07.122259: step: 78/459, loss: 0.005792264360934496 2023-01-24 04:44:07.704347: step: 80/459, loss: 0.0017182204173877835 2023-01-24 04:44:08.323160: step: 82/459, loss: 9.305894491262734e-05 2023-01-24 04:44:08.927974: step: 84/459, loss: 0.01698998548090458 2023-01-24 04:44:09.577195: step: 86/459, loss: 3.220831786165945e-05 2023-01-24 04:44:10.166935: step: 88/459, loss: 0.03642524778842926 2023-01-24 04:44:10.744866: step: 90/459, loss: 0.008506638929247856 2023-01-24 04:44:11.339347: step: 92/459, loss: 0.045252129435539246 2023-01-24 04:44:11.956441: step: 94/459, loss: 0.009901654906570911 2023-01-24 04:44:12.539401: step: 96/459, loss: 0.0009761383407749236 2023-01-24 04:44:13.172060: step: 98/459, loss: 0.0045546311885118484 2023-01-24 04:44:13.718875: step: 100/459, loss: 0.00024363295233342797 2023-01-24 04:44:14.360507: step: 102/459, loss: 0.008392552845180035 2023-01-24 04:44:15.010741: step: 104/459, loss: 0.0125965541228652 2023-01-24 04:44:15.622114: step: 106/459, loss: 0.0004477241018321365 2023-01-24 04:44:16.223605: step: 108/459, loss: 0.009327094070613384 2023-01-24 04:44:16.853410: step: 110/459, loss: 0.01970033533871174 2023-01-24 04:44:17.477715: step: 112/459, loss: 0.0030588279478251934 2023-01-24 04:44:18.078283: step: 114/459, loss: 3.650465077953413e-05 2023-01-24 04:44:18.655892: step: 116/459, loss: 0.012827971950173378 2023-01-24 04:44:19.285539: step: 118/459, loss: 0.0014113798970356584 2023-01-24 04:44:19.890696: step: 120/459, loss: 0.2777603268623352 2023-01-24 04:44:20.496557: step: 122/459, loss: 0.03911227732896805 2023-01-24 04:44:21.086528: step: 124/459, loss: 1.2493137546698563e-05 2023-01-24 04:44:21.703025: step: 126/459, loss: 0.0001285231701331213 2023-01-24 04:44:22.402093: step: 128/459, loss: 0.00027590326499193907 2023-01-24 04:44:22.995548: step: 130/459, loss: 0.010265353135764599 2023-01-24 04:44:23.632600: step: 132/459, loss: 0.002109479857608676 2023-01-24 04:44:24.240100: step: 134/459, loss: 0.025072025135159492 2023-01-24 04:44:24.864166: step: 136/459, loss: 0.00016274963854812086 2023-01-24 04:44:25.473917: step: 138/459, loss: 0.21895749866962433 2023-01-24 04:44:26.071120: step: 140/459, loss: 0.027793077751994133 2023-01-24 04:44:26.735808: step: 142/459, loss: 0.14904916286468506 2023-01-24 04:44:27.350974: step: 144/459, loss: 0.008113999851047993 2023-01-24 04:44:27.958867: step: 146/459, loss: 0.0004467081162147224 2023-01-24 04:44:28.542604: step: 148/459, loss: 0.012932438403367996 2023-01-24 04:44:29.161685: step: 150/459, loss: 0.16394083201885223 2023-01-24 04:44:29.698312: step: 152/459, loss: 0.0002836047497112304 2023-01-24 04:44:30.282781: step: 154/459, loss: 1.644413532631006e-05 2023-01-24 04:44:31.041295: step: 156/459, loss: 0.0021424449514597654 2023-01-24 04:44:31.687913: step: 158/459, loss: 0.0007017895113676786 2023-01-24 04:44:32.337099: step: 160/459, loss: 0.012041795998811722 2023-01-24 04:44:33.037505: step: 162/459, loss: 0.0005602086312137544 2023-01-24 04:44:33.648646: step: 164/459, loss: 0.0027601837646216154 2023-01-24 04:44:34.322872: step: 166/459, loss: 0.0063339052721858025 2023-01-24 04:44:34.960488: step: 168/459, loss: 0.0002210098464274779 2023-01-24 04:44:35.592136: step: 170/459, loss: 0.015580878593027592 2023-01-24 04:44:36.242490: step: 172/459, loss: 0.23118282854557037 2023-01-24 04:44:36.767392: step: 174/459, loss: 0.0007494708406738937 2023-01-24 04:44:37.406913: step: 176/459, loss: 0.0007202295237220824 2023-01-24 04:44:37.969307: step: 178/459, loss: 4.342832835391164e-05 2023-01-24 04:44:38.544659: step: 180/459, loss: 0.00018783465202432126 2023-01-24 04:44:39.123394: step: 182/459, loss: 0.0007413902785629034 2023-01-24 04:44:39.727855: step: 184/459, loss: 2.2383137547876686e-05 2023-01-24 04:44:40.437268: step: 186/459, loss: 0.09530650079250336 2023-01-24 04:44:41.053006: step: 188/459, loss: 0.0190433319658041 2023-01-24 04:44:41.623106: step: 190/459, loss: 0.018131306394934654 2023-01-24 04:44:42.220516: step: 192/459, loss: 0.023287111893296242 2023-01-24 04:44:42.814029: step: 194/459, loss: 0.0002880020474549383 2023-01-24 04:44:43.390334: step: 196/459, loss: 0.007642781361937523 2023-01-24 04:44:43.974432: step: 198/459, loss: 0.0011927258456125855 2023-01-24 04:44:44.569058: step: 200/459, loss: 0.030787428840994835 2023-01-24 04:44:45.251212: step: 202/459, loss: 0.005528704728931189 2023-01-24 04:44:45.836343: step: 204/459, loss: 0.027063224464654922 2023-01-24 04:44:46.462512: step: 206/459, loss: 0.011822911910712719 2023-01-24 04:44:47.113576: step: 208/459, loss: 0.03423825651407242 2023-01-24 04:44:47.782417: step: 210/459, loss: 0.022047920152544975 2023-01-24 04:44:48.383792: step: 212/459, loss: 0.033422648906707764 2023-01-24 04:44:49.056333: step: 214/459, loss: 0.035301703959703445 2023-01-24 04:44:49.735995: step: 216/459, loss: 0.0001564791309647262 2023-01-24 04:44:50.334165: step: 218/459, loss: 0.0009170136181637645 2023-01-24 04:44:50.943965: step: 220/459, loss: 0.0038573346100747585 2023-01-24 04:44:51.480427: step: 222/459, loss: 0.011305904015898705 2023-01-24 04:44:52.085963: step: 224/459, loss: 0.0007188088493421674 2023-01-24 04:44:52.759883: step: 226/459, loss: 0.006362097803503275 2023-01-24 04:44:53.410724: step: 228/459, loss: 0.045500800013542175 2023-01-24 04:44:54.021952: step: 230/459, loss: 0.03940321132540703 2023-01-24 04:44:54.703726: step: 232/459, loss: 0.005564861930906773 2023-01-24 04:44:55.320859: step: 234/459, loss: 0.09404119849205017 2023-01-24 04:44:55.936162: step: 236/459, loss: 5.263906859909184e-05 2023-01-24 04:44:56.644722: step: 238/459, loss: 0.005442783702164888 2023-01-24 04:44:57.238230: step: 240/459, loss: 0.0003933916741516441 2023-01-24 04:44:57.815689: step: 242/459, loss: 0.000789791636634618 2023-01-24 04:44:58.419503: step: 244/459, loss: 0.00029434484895318747 2023-01-24 04:44:59.115552: step: 246/459, loss: 0.0005847049760632217 2023-01-24 04:44:59.700524: step: 248/459, loss: 0.0057656774297356606 2023-01-24 04:45:00.293570: step: 250/459, loss: 0.00233726529404521 2023-01-24 04:45:00.873976: step: 252/459, loss: 0.009172067977488041 2023-01-24 04:45:01.520755: step: 254/459, loss: 0.014689318835735321 2023-01-24 04:45:02.133555: step: 256/459, loss: 0.001016234396956861 2023-01-24 04:45:02.714303: step: 258/459, loss: 0.0009285426349379122 2023-01-24 04:45:03.374604: step: 260/459, loss: 0.009133349172770977 2023-01-24 04:45:03.968793: step: 262/459, loss: 0.0004941511433571577 2023-01-24 04:45:04.594510: step: 264/459, loss: 9.081602001970168e-06 2023-01-24 04:45:05.236214: step: 266/459, loss: 0.00043544499203562737 2023-01-24 04:45:05.836802: step: 268/459, loss: 0.00993884727358818 2023-01-24 04:45:06.518553: step: 270/459, loss: 0.1226934865117073 2023-01-24 04:45:07.100263: step: 272/459, loss: 0.022625312209129333 2023-01-24 04:45:07.728153: step: 274/459, loss: 0.0028826200868934393 2023-01-24 04:45:08.338058: step: 276/459, loss: 0.0026290633250027895 2023-01-24 04:45:08.974750: step: 278/459, loss: 0.005725185386836529 2023-01-24 04:45:09.582038: step: 280/459, loss: 0.001008752966299653 2023-01-24 04:45:10.199896: step: 282/459, loss: 0.025943972170352936 2023-01-24 04:45:10.789014: step: 284/459, loss: 0.038858864456415176 2023-01-24 04:45:11.489613: step: 286/459, loss: 0.0017464859411120415 2023-01-24 04:45:12.122902: step: 288/459, loss: 0.0023985912557691336 2023-01-24 04:45:12.707872: step: 290/459, loss: 0.0020663670729845762 2023-01-24 04:45:13.299524: step: 292/459, loss: 0.004036621656268835 2023-01-24 04:45:13.895028: step: 294/459, loss: 0.025603819638490677 2023-01-24 04:45:14.532290: step: 296/459, loss: 0.006717344280332327 2023-01-24 04:45:15.135821: step: 298/459, loss: 0.0029901042580604553 2023-01-24 04:45:15.722829: step: 300/459, loss: 0.006558001972734928 2023-01-24 04:45:16.361479: step: 302/459, loss: 0.02402554638683796 2023-01-24 04:45:16.967840: step: 304/459, loss: 0.0006742271943949163 2023-01-24 04:45:17.567628: step: 306/459, loss: 0.011713492684066296 2023-01-24 04:45:18.227354: step: 308/459, loss: 0.005971349310129881 2023-01-24 04:45:18.907143: step: 310/459, loss: 0.0273271594196558 2023-01-24 04:45:19.609153: step: 312/459, loss: 0.009760374203324318 2023-01-24 04:45:20.154727: step: 314/459, loss: 0.03705465421080589 2023-01-24 04:45:20.738856: step: 316/459, loss: 0.1106385737657547 2023-01-24 04:45:21.362083: step: 318/459, loss: 0.6006984114646912 2023-01-24 04:45:21.960775: step: 320/459, loss: 0.001459872117266059 2023-01-24 04:45:22.666037: step: 322/459, loss: 0.022548872977495193 2023-01-24 04:45:23.268223: step: 324/459, loss: 0.008333946578204632 2023-01-24 04:45:23.950925: step: 326/459, loss: 0.03601020202040672 2023-01-24 04:45:24.579956: step: 328/459, loss: 0.006127593573182821 2023-01-24 04:45:25.172152: step: 330/459, loss: 0.00012412089563440531 2023-01-24 04:45:25.766002: step: 332/459, loss: 6.655070319538936e-05 2023-01-24 04:45:26.359942: step: 334/459, loss: 0.03532915934920311 2023-01-24 04:45:26.941985: step: 336/459, loss: 0.0117817847058177 2023-01-24 04:45:27.554396: step: 338/459, loss: 0.001813049428164959 2023-01-24 04:45:28.237084: step: 340/459, loss: 0.00034785488969646394 2023-01-24 04:45:28.865915: step: 342/459, loss: 0.026374947279691696 2023-01-24 04:45:29.507827: step: 344/459, loss: 0.009980316273868084 2023-01-24 04:45:30.104078: step: 346/459, loss: 0.03952684998512268 2023-01-24 04:45:30.724306: step: 348/459, loss: 0.0004163063131272793 2023-01-24 04:45:31.340658: step: 350/459, loss: 0.006992325186729431 2023-01-24 04:45:31.945062: step: 352/459, loss: 0.005442061461508274 2023-01-24 04:45:32.510481: step: 354/459, loss: 0.0034624948166310787 2023-01-24 04:45:33.111683: step: 356/459, loss: 0.0071471924893558025 2023-01-24 04:45:33.731081: step: 358/459, loss: 1.3645947547047399e-05 2023-01-24 04:45:34.316919: step: 360/459, loss: 0.002505689626559615 2023-01-24 04:45:34.909666: step: 362/459, loss: 0.002070203423500061 2023-01-24 04:45:35.569786: step: 364/459, loss: 0.0705859363079071 2023-01-24 04:45:36.155480: step: 366/459, loss: 0.06814031302928925 2023-01-24 04:45:36.722798: step: 368/459, loss: 0.012487039901316166 2023-01-24 04:45:37.276074: step: 370/459, loss: 0.00011876055941684172 2023-01-24 04:45:37.870240: step: 372/459, loss: 0.006760003510862589 2023-01-24 04:45:38.492536: step: 374/459, loss: 0.0072606755420565605 2023-01-24 04:45:39.124392: step: 376/459, loss: 0.018324188888072968 2023-01-24 04:45:39.762867: step: 378/459, loss: 0.05420760065317154 2023-01-24 04:45:40.374305: step: 380/459, loss: 0.00022522623476106673 2023-01-24 04:45:41.055034: step: 382/459, loss: 0.340384304523468 2023-01-24 04:45:41.634986: step: 384/459, loss: 0.01080006267875433 2023-01-24 04:45:42.346346: step: 386/459, loss: 0.016944268718361855 2023-01-24 04:45:42.973239: step: 388/459, loss: 0.0032878892961889505 2023-01-24 04:45:43.615539: step: 390/459, loss: 0.002416720613837242 2023-01-24 04:45:44.173398: step: 392/459, loss: 0.0039053219370543957 2023-01-24 04:45:44.788401: step: 394/459, loss: 0.006467001978307962 2023-01-24 04:45:45.429694: step: 396/459, loss: 0.020006779581308365 2023-01-24 04:45:46.053149: step: 398/459, loss: 0.00542067363858223 2023-01-24 04:45:46.627923: step: 400/459, loss: 0.004166472237557173 2023-01-24 04:45:47.269941: step: 402/459, loss: 0.021800855174660683 2023-01-24 04:45:47.872200: step: 404/459, loss: 0.015085924416780472 2023-01-24 04:45:48.560993: step: 406/459, loss: 0.0008794562891125679 2023-01-24 04:45:49.270256: step: 408/459, loss: 0.006802731193602085 2023-01-24 04:45:49.936118: step: 410/459, loss: 0.0070059457793831825 2023-01-24 04:45:50.535306: step: 412/459, loss: 0.016489291563630104 2023-01-24 04:45:51.199437: step: 414/459, loss: 0.00042469697655178607 2023-01-24 04:45:51.800356: step: 416/459, loss: 0.06200775131583214 2023-01-24 04:45:52.358466: step: 418/459, loss: 0.6816689372062683 2023-01-24 04:45:52.934320: step: 420/459, loss: 0.00422826549038291 2023-01-24 04:45:53.577174: step: 422/459, loss: 0.10751839727163315 2023-01-24 04:45:54.283435: step: 424/459, loss: 9.620982018532231e-05 2023-01-24 04:45:54.885654: step: 426/459, loss: 0.0071262177079916 2023-01-24 04:45:55.516942: step: 428/459, loss: 0.0025396873243153095 2023-01-24 04:45:56.088934: step: 430/459, loss: 0.005172580014914274 2023-01-24 04:45:56.706856: step: 432/459, loss: 0.0008690028334967792 2023-01-24 04:45:57.284713: step: 434/459, loss: 0.00033146230271086097 2023-01-24 04:45:57.960553: step: 436/459, loss: 0.0057528093457221985 2023-01-24 04:45:58.560400: step: 438/459, loss: 0.0043086521327495575 2023-01-24 04:45:59.226046: step: 440/459, loss: 0.0005522961146198213 2023-01-24 04:45:59.814313: step: 442/459, loss: 0.0016557247145101428 2023-01-24 04:46:00.407850: step: 444/459, loss: 0.009945311583578587 2023-01-24 04:46:01.011567: step: 446/459, loss: 0.0008327895775437355 2023-01-24 04:46:01.643248: step: 448/459, loss: 0.005733142141252756 2023-01-24 04:46:02.255920: step: 450/459, loss: 9.38292796490714e-05 2023-01-24 04:46:02.867511: step: 452/459, loss: 0.018352730199694633 2023-01-24 04:46:03.554198: step: 454/459, loss: 0.26979586482048035 2023-01-24 04:46:04.135845: step: 456/459, loss: 0.0012925358023494482 2023-01-24 04:46:04.800945: step: 458/459, loss: 0.0026923194527626038 2023-01-24 04:46:05.365317: step: 460/459, loss: 0.001029077684506774 2023-01-24 04:46:05.951050: step: 462/459, loss: 0.009495886042714119 2023-01-24 04:46:06.563224: step: 464/459, loss: 0.032469749450683594 2023-01-24 04:46:07.202550: step: 466/459, loss: 0.006148369051516056 2023-01-24 04:46:07.849725: step: 468/459, loss: 0.005175793543457985 2023-01-24 04:46:08.479087: step: 470/459, loss: 0.002412701491266489 2023-01-24 04:46:09.001561: step: 472/459, loss: 0.01606886461377144 2023-01-24 04:46:09.627691: step: 474/459, loss: 0.018737200647592545 2023-01-24 04:46:10.205353: step: 476/459, loss: 8.578799315728247e-05 2023-01-24 04:46:10.863105: step: 478/459, loss: 0.004624590277671814 2023-01-24 04:46:11.432262: step: 480/459, loss: 0.00472530722618103 2023-01-24 04:46:12.120762: step: 482/459, loss: 0.00044389383401721716 2023-01-24 04:46:12.700227: step: 484/459, loss: 0.0057045160792768 2023-01-24 04:46:13.296071: step: 486/459, loss: 0.0016984877875074744 2023-01-24 04:46:13.917181: step: 488/459, loss: 0.007600328419357538 2023-01-24 04:46:14.557490: step: 490/459, loss: 0.02329374849796295 2023-01-24 04:46:15.105647: step: 492/459, loss: 0.0007262135040946305 2023-01-24 04:46:15.681204: step: 494/459, loss: 0.0009378135437145829 2023-01-24 04:46:16.232651: step: 496/459, loss: 0.0005785457324236631 2023-01-24 04:46:16.924175: step: 498/459, loss: 0.08299929648637772 2023-01-24 04:46:17.563330: step: 500/459, loss: 0.000724013545550406 2023-01-24 04:46:18.229909: step: 502/459, loss: 0.004631571937352419 2023-01-24 04:46:18.800256: step: 504/459, loss: 0.003918944392353296 2023-01-24 04:46:19.429861: step: 506/459, loss: 0.005270006600767374 2023-01-24 04:46:20.095441: step: 508/459, loss: 0.017783787101507187 2023-01-24 04:46:20.723847: step: 510/459, loss: 0.00010568903235252947 2023-01-24 04:46:21.342326: step: 512/459, loss: 0.0020705796778202057 2023-01-24 04:46:21.983447: step: 514/459, loss: 0.047474004328250885 2023-01-24 04:46:22.586532: step: 516/459, loss: 0.014528983272612095 2023-01-24 04:46:23.185356: step: 518/459, loss: 0.0019068261608481407 2023-01-24 04:46:23.723489: step: 520/459, loss: 0.0019552859012037516 2023-01-24 04:46:24.320816: step: 522/459, loss: 0.036151301115751266 2023-01-24 04:46:24.943085: step: 524/459, loss: 0.0024455103557556868 2023-01-24 04:46:25.491340: step: 526/459, loss: 0.01800650544464588 2023-01-24 04:46:26.115952: step: 528/459, loss: 0.005194175988435745 2023-01-24 04:46:26.803637: step: 530/459, loss: 0.06961102783679962 2023-01-24 04:46:27.357639: step: 532/459, loss: 0.001702332985587418 2023-01-24 04:46:28.032966: step: 534/459, loss: 0.0161002054810524 2023-01-24 04:46:28.623036: step: 536/459, loss: 0.02841876447200775 2023-01-24 04:46:29.275840: step: 538/459, loss: 0.02935534343123436 2023-01-24 04:46:29.879951: step: 540/459, loss: 0.00438471557572484 2023-01-24 04:46:30.440677: step: 542/459, loss: 0.008155906572937965 2023-01-24 04:46:30.960271: step: 544/459, loss: 1.1293121133348905e-05 2023-01-24 04:46:31.473208: step: 546/459, loss: 0.013934127055108547 2023-01-24 04:46:32.040264: step: 548/459, loss: 0.001117532723583281 2023-01-24 04:46:32.749452: step: 550/459, loss: 0.0007941098301671445 2023-01-24 04:46:33.371231: step: 552/459, loss: 0.03356078267097473 2023-01-24 04:46:33.993096: step: 554/459, loss: 0.0077650719322264194 2023-01-24 04:46:34.595217: step: 556/459, loss: 0.0008159648859873414 2023-01-24 04:46:35.180850: step: 558/459, loss: 0.00020906385907437652 2023-01-24 04:46:35.775569: step: 560/459, loss: 0.0068863555788993835 2023-01-24 04:46:36.410673: step: 562/459, loss: 0.0019957837648689747 2023-01-24 04:46:36.989759: step: 564/459, loss: 0.06335976719856262 2023-01-24 04:46:37.576118: step: 566/459, loss: 0.003915264271199703 2023-01-24 04:46:38.171389: step: 568/459, loss: 0.00011519744293764234 2023-01-24 04:46:38.795958: step: 570/459, loss: 0.003125270828604698 2023-01-24 04:46:39.383182: step: 572/459, loss: 0.2608368992805481 2023-01-24 04:46:40.029073: step: 574/459, loss: 0.00020581837452482432 2023-01-24 04:46:40.587759: step: 576/459, loss: 0.0007121859816834331 2023-01-24 04:46:41.179107: step: 578/459, loss: 0.08421622961759567 2023-01-24 04:46:41.831592: step: 580/459, loss: 0.028241371735930443 2023-01-24 04:46:42.544811: step: 582/459, loss: 0.0330907441675663 2023-01-24 04:46:43.218175: step: 584/459, loss: 0.0003415916580706835 2023-01-24 04:46:43.911456: step: 586/459, loss: 0.020033447071909904 2023-01-24 04:46:44.511797: step: 588/459, loss: 0.0017059798119589686 2023-01-24 04:46:45.198245: step: 590/459, loss: 0.010430595837533474 2023-01-24 04:46:45.869296: step: 592/459, loss: 0.02316097356379032 2023-01-24 04:46:46.504965: step: 594/459, loss: 0.0023498975206166506 2023-01-24 04:46:47.136831: step: 596/459, loss: 6.393404328264296e-05 2023-01-24 04:46:47.769828: step: 598/459, loss: 0.007888703607022762 2023-01-24 04:46:48.465532: step: 600/459, loss: 0.0065407478250563145 2023-01-24 04:46:49.042576: step: 602/459, loss: 0.24531371891498566 2023-01-24 04:46:49.738569: step: 604/459, loss: 0.0006021627341397107 2023-01-24 04:46:50.335811: step: 606/459, loss: 0.0002011965843848884 2023-01-24 04:46:50.949208: step: 608/459, loss: 0.10070168226957321 2023-01-24 04:46:51.500502: step: 610/459, loss: 0.0039619687013328075 2023-01-24 04:46:52.091753: step: 612/459, loss: 0.002174646593630314 2023-01-24 04:46:52.731806: step: 614/459, loss: 0.0038370671682059765 2023-01-24 04:46:53.412167: step: 616/459, loss: 0.000642630155198276 2023-01-24 04:46:53.973102: step: 618/459, loss: 0.003533886279910803 2023-01-24 04:46:54.624367: step: 620/459, loss: 0.04018383473157883 2023-01-24 04:46:55.222251: step: 622/459, loss: 0.0011353848967701197 2023-01-24 04:46:55.840400: step: 624/459, loss: 0.018764829263091087 2023-01-24 04:46:56.499072: step: 626/459, loss: 0.0005435406928882003 2023-01-24 04:46:57.151967: step: 628/459, loss: 0.013734500855207443 2023-01-24 04:46:57.816195: step: 630/459, loss: 0.005639609415084124 2023-01-24 04:46:58.439348: step: 632/459, loss: 0.01124490611255169 2023-01-24 04:46:59.105642: step: 634/459, loss: 0.007579735945910215 2023-01-24 04:46:59.723948: step: 636/459, loss: 0.007819858379662037 2023-01-24 04:47:00.374314: step: 638/459, loss: 0.01634785905480385 2023-01-24 04:47:00.982547: step: 640/459, loss: 0.014360510744154453 2023-01-24 04:47:01.650006: step: 642/459, loss: 0.0030349968001246452 2023-01-24 04:47:02.304496: step: 644/459, loss: 0.4692196547985077 2023-01-24 04:47:02.933430: step: 646/459, loss: 0.000677343865390867 2023-01-24 04:47:03.569148: step: 648/459, loss: 0.0002628087531775236 2023-01-24 04:47:04.237203: step: 650/459, loss: 0.0004987729480490088 2023-01-24 04:47:04.816802: step: 652/459, loss: 0.002655508928000927 2023-01-24 04:47:05.412096: step: 654/459, loss: 0.001822113525122404 2023-01-24 04:47:05.980116: step: 656/459, loss: 0.04375014454126358 2023-01-24 04:47:06.546784: step: 658/459, loss: 0.0029968293383717537 2023-01-24 04:47:07.184342: step: 660/459, loss: 0.03558262065052986 2023-01-24 04:47:07.861344: step: 662/459, loss: 0.00033620200701989233 2023-01-24 04:47:08.482376: step: 664/459, loss: 0.005743315909057856 2023-01-24 04:47:09.106308: step: 666/459, loss: 0.006857685279101133 2023-01-24 04:47:09.712686: step: 668/459, loss: 0.0015682951780036092 2023-01-24 04:47:10.364207: step: 670/459, loss: 0.35988152027130127 2023-01-24 04:47:11.001895: step: 672/459, loss: 0.05537797510623932 2023-01-24 04:47:11.597365: step: 674/459, loss: 0.0547141432762146 2023-01-24 04:47:12.154847: step: 676/459, loss: 0.007006924133747816 2023-01-24 04:47:12.825321: step: 678/459, loss: 0.008098205551505089 2023-01-24 04:47:13.477562: step: 680/459, loss: 0.002127240877598524 2023-01-24 04:47:14.210885: step: 682/459, loss: 0.004009592346847057 2023-01-24 04:47:14.849158: step: 684/459, loss: 0.0039443885907530785 2023-01-24 04:47:15.589347: step: 686/459, loss: 0.06629004329442978 2023-01-24 04:47:16.194208: step: 688/459, loss: 0.1957138627767563 2023-01-24 04:47:16.805082: step: 690/459, loss: 6.917993596289307e-05 2023-01-24 04:47:17.451133: step: 692/459, loss: 0.10482411086559296 2023-01-24 04:47:18.014129: step: 694/459, loss: 0.00042761809891089797 2023-01-24 04:47:18.691205: step: 696/459, loss: 0.005655599758028984 2023-01-24 04:47:19.338414: step: 698/459, loss: 0.012318290770053864 2023-01-24 04:47:19.916050: step: 700/459, loss: 0.010343293659389019 2023-01-24 04:47:20.503608: step: 702/459, loss: 0.03360873833298683 2023-01-24 04:47:21.131747: step: 704/459, loss: 0.06270787864923477 2023-01-24 04:47:21.787478: step: 706/459, loss: 0.008680417202413082 2023-01-24 04:47:22.385766: step: 708/459, loss: 0.0008324018563143909 2023-01-24 04:47:22.982673: step: 710/459, loss: 0.013090288266539574 2023-01-24 04:47:23.676379: step: 712/459, loss: 0.01617041416466236 2023-01-24 04:47:24.250512: step: 714/459, loss: 0.0018056748667731881 2023-01-24 04:47:24.850503: step: 716/459, loss: 0.006406724452972412 2023-01-24 04:47:25.433581: step: 718/459, loss: 0.0030824511777609587 2023-01-24 04:47:26.040234: step: 720/459, loss: 0.0027865413576364517 2023-01-24 04:47:26.675381: step: 722/459, loss: 0.04319599270820618 2023-01-24 04:47:27.433722: step: 724/459, loss: 0.00040291310870088637 2023-01-24 04:47:28.084137: step: 726/459, loss: 0.04324004054069519 2023-01-24 04:47:28.696065: step: 728/459, loss: 9.262342791771516e-05 2023-01-24 04:47:29.312341: step: 730/459, loss: 1.5273366443580016e-05 2023-01-24 04:47:29.979450: step: 732/459, loss: 0.0019008852541446686 2023-01-24 04:47:30.637809: step: 734/459, loss: 0.011329255998134613 2023-01-24 04:47:31.238390: step: 736/459, loss: 0.037453167140483856 2023-01-24 04:47:31.790653: step: 738/459, loss: 0.014367367140948772 2023-01-24 04:47:32.439433: step: 740/459, loss: 0.007854033261537552 2023-01-24 04:47:33.082493: step: 742/459, loss: 0.040661606937646866 2023-01-24 04:47:33.688387: step: 744/459, loss: 0.003949925769120455 2023-01-24 04:47:34.347022: step: 746/459, loss: 0.005804467014968395 2023-01-24 04:47:34.967557: step: 748/459, loss: 0.00197548046708107 2023-01-24 04:47:35.551433: step: 750/459, loss: 0.0003501887549646199 2023-01-24 04:47:36.143230: step: 752/459, loss: 0.006415031850337982 2023-01-24 04:47:36.799401: step: 754/459, loss: 0.0002033664786722511 2023-01-24 04:47:37.377246: step: 756/459, loss: 0.0010221205884590745 2023-01-24 04:47:38.020057: step: 758/459, loss: 0.047149501740932465 2023-01-24 04:47:38.605081: step: 760/459, loss: 0.00584332924336195 2023-01-24 04:47:39.232872: step: 762/459, loss: 0.03523034602403641 2023-01-24 04:47:39.798673: step: 764/459, loss: 0.006508410442620516 2023-01-24 04:47:40.366042: step: 766/459, loss: 0.003813662566244602 2023-01-24 04:47:40.956731: step: 768/459, loss: 0.01600894331932068 2023-01-24 04:47:41.623076: step: 770/459, loss: 0.07984521985054016 2023-01-24 04:47:42.353240: step: 772/459, loss: 0.016756795346736908 2023-01-24 04:47:42.960182: step: 774/459, loss: 0.0012894327519461513 2023-01-24 04:47:43.495395: step: 776/459, loss: 0.03205569460988045 2023-01-24 04:47:44.118661: step: 778/459, loss: 0.0020609803032130003 2023-01-24 04:47:44.735682: step: 780/459, loss: 0.001206458662636578 2023-01-24 04:47:45.322545: step: 782/459, loss: 0.011384441517293453 2023-01-24 04:47:45.978822: step: 784/459, loss: 0.05787419527769089 2023-01-24 04:47:46.605975: step: 786/459, loss: 0.0018539958400651813 2023-01-24 04:47:47.200430: step: 788/459, loss: 0.0014146310277283192 2023-01-24 04:47:47.850856: step: 790/459, loss: 0.00898857694119215 2023-01-24 04:47:48.510283: step: 792/459, loss: 0.02817581407725811 2023-01-24 04:47:49.134571: step: 794/459, loss: 0.008024360053241253 2023-01-24 04:47:49.716378: step: 796/459, loss: 0.003290650900453329 2023-01-24 04:47:50.319990: step: 798/459, loss: 0.003342527197673917 2023-01-24 04:47:50.962562: step: 800/459, loss: 0.013854924589395523 2023-01-24 04:47:51.555895: step: 802/459, loss: 0.002006563823670149 2023-01-24 04:47:52.167882: step: 804/459, loss: 0.0025249694008380175 2023-01-24 04:47:52.865048: step: 806/459, loss: 0.02023204043507576 2023-01-24 04:47:53.515346: step: 808/459, loss: 0.007009006571024656 2023-01-24 04:47:54.112520: step: 810/459, loss: 0.004041195381432772 2023-01-24 04:47:54.741696: step: 812/459, loss: 0.0021971603855490685 2023-01-24 04:47:55.281998: step: 814/459, loss: 0.01660505123436451 2023-01-24 04:47:55.965292: step: 816/459, loss: 0.006226601544767618 2023-01-24 04:47:56.601603: step: 818/459, loss: 0.03286784142255783 2023-01-24 04:47:57.177921: step: 820/459, loss: 0.08104877173900604 2023-01-24 04:47:57.771205: step: 822/459, loss: 0.015094123780727386 2023-01-24 04:47:58.400237: step: 824/459, loss: 0.05780259892344475 2023-01-24 04:47:59.010273: step: 826/459, loss: 0.0002945966843981296 2023-01-24 04:47:59.614334: step: 828/459, loss: 0.00020568619947880507 2023-01-24 04:48:00.280305: step: 830/459, loss: 0.03908845782279968 2023-01-24 04:48:00.867335: step: 832/459, loss: 0.11842922866344452 2023-01-24 04:48:01.485980: step: 834/459, loss: 0.0003729503077920526 2023-01-24 04:48:02.091822: step: 836/459, loss: 0.015266028232872486 2023-01-24 04:48:02.784655: step: 838/459, loss: 0.03909643366932869 2023-01-24 04:48:03.433407: step: 840/459, loss: 0.013289537280797958 2023-01-24 04:48:03.987984: step: 842/459, loss: 0.0024018269032239914 2023-01-24 04:48:04.639512: step: 844/459, loss: 0.005174174439162016 2023-01-24 04:48:05.234532: step: 846/459, loss: 0.019931329414248466 2023-01-24 04:48:05.873755: step: 848/459, loss: 0.016558803617954254 2023-01-24 04:48:06.465511: step: 850/459, loss: 0.01591997779905796 2023-01-24 04:48:07.049887: step: 852/459, loss: 0.0005978790577501059 2023-01-24 04:48:07.686577: step: 854/459, loss: 0.015449082478880882 2023-01-24 04:48:08.307764: step: 856/459, loss: 0.0017402017256245017 2023-01-24 04:48:09.002927: step: 858/459, loss: 0.0010622297413647175 2023-01-24 04:48:09.613461: step: 860/459, loss: 0.0007045363308861852 2023-01-24 04:48:10.218252: step: 862/459, loss: 4.7909383283695206e-05 2023-01-24 04:48:10.833927: step: 864/459, loss: 0.0795058012008667 2023-01-24 04:48:11.467022: step: 866/459, loss: 0.0007330406806431711 2023-01-24 04:48:12.049832: step: 868/459, loss: 0.0016212441259995103 2023-01-24 04:48:12.644286: step: 870/459, loss: 0.0009713666513562202 2023-01-24 04:48:13.240101: step: 872/459, loss: 0.10950715839862823 2023-01-24 04:48:13.873375: step: 874/459, loss: 9.771339682629332e-05 2023-01-24 04:48:14.518055: step: 876/459, loss: 0.00175101135391742 2023-01-24 04:48:15.138786: step: 878/459, loss: 0.018389228731393814 2023-01-24 04:48:15.736757: step: 880/459, loss: 0.019886739552021027 2023-01-24 04:48:16.383190: step: 882/459, loss: 0.011176357977092266 2023-01-24 04:48:16.953569: step: 884/459, loss: 0.023539738729596138 2023-01-24 04:48:17.603692: step: 886/459, loss: 0.02875583991408348 2023-01-24 04:48:18.191024: step: 888/459, loss: 0.21475404500961304 2023-01-24 04:48:18.809466: step: 890/459, loss: 3.511065642669564e-06 2023-01-24 04:48:19.453380: step: 892/459, loss: 0.0012001785216853023 2023-01-24 04:48:20.070440: step: 894/459, loss: 0.3412286639213562 2023-01-24 04:48:20.693908: step: 896/459, loss: 0.015228291042149067 2023-01-24 04:48:21.324572: step: 898/459, loss: 0.02686469256877899 2023-01-24 04:48:21.960470: step: 900/459, loss: 0.0020298592280596495 2023-01-24 04:48:22.572756: step: 902/459, loss: 0.017779076471924782 2023-01-24 04:48:23.195196: step: 904/459, loss: 0.001009552855975926 2023-01-24 04:48:23.782114: step: 906/459, loss: 0.0024461522698402405 2023-01-24 04:48:24.412075: step: 908/459, loss: 0.015404877252876759 2023-01-24 04:48:24.977143: step: 910/459, loss: 0.0639994889497757 2023-01-24 04:48:25.588743: step: 912/459, loss: 0.023847809061408043 2023-01-24 04:48:26.258193: step: 914/459, loss: 0.028022680431604385 2023-01-24 04:48:26.901961: step: 916/459, loss: 0.0001897823967738077 2023-01-24 04:48:27.529653: step: 918/459, loss: 0.0009143702918663621 2023-01-24 04:48:27.952354: step: 920/459, loss: 0.0019775775726884604 ================================================== Loss: 0.025 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3535584833059925, 'r': 0.323368479987644, 'f1': 0.33779026551732083}, 'combined': 0.24889809038118377, 'epoch': 38} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3544273865795569, 'r': 0.2903082502801643, 'f1': 0.31917948556539805}, 'combined': 0.20427487076185472, 'epoch': 38} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3430776069518716, 'r': 0.3255005758556657, 'f1': 0.3340580398752402}, 'combined': 0.24614802938175592, 'epoch': 38} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.3517888674387895, 'r': 0.2852687906867275, 'f1': 0.31505589332871514}, 'combined': 0.20163577173037767, 'epoch': 38} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3682535214813962, 'r': 0.3298209907765066, 'f1': 0.34797930358202}, 'combined': 0.2564058026393832, 'epoch': 38} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.3655599767306778, 'r': 0.3022451220553553, 'f1': 0.3309010967013221}, 'combined': 0.23724984291792908, 'epoch': 38} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.28422619047619047, 'r': 0.22738095238095238, 'f1': 0.2526455026455027}, 'combined': 0.16843033509700178, 'epoch': 38} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.2717391304347826, 'r': 0.2717391304347826, 'f1': 0.2717391304347826}, 'combined': 0.1358695652173913, 'epoch': 38} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.36363636363636365, 'r': 0.13793103448275862, 'f1': 0.2}, 'combined': 0.13333333333333333, 'epoch': 38} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8} ****************************** Epoch: 39 command: python train.py --model_name slot --xlmr_model_name xlm-roberta-large --batch_size 16 --xlmr_learning_rate 2e-5 --max_epoch 40 --event_hidden_num 450 --role_hidden_num 350 --p1_data_weight 0.1 --learning_rate 9e-4 2023-01-24 04:51:03.619709: step: 2/459, loss: 0.0007728490745648742 2023-01-24 04:51:04.196158: step: 4/459, loss: 0.0008553013321943581 2023-01-24 04:51:04.820958: step: 6/459, loss: 0.029847510159015656 2023-01-24 04:51:05.546880: step: 8/459, loss: 0.03064531646668911 2023-01-24 04:51:06.186804: step: 10/459, loss: 0.008142560720443726 2023-01-24 04:51:06.801943: step: 12/459, loss: 0.002244191011413932 2023-01-24 04:51:07.410880: step: 14/459, loss: 0.014321141876280308 2023-01-24 04:51:08.038318: step: 16/459, loss: 0.0013732025399804115 2023-01-24 04:51:08.653403: step: 18/459, loss: 0.0005799831706099212 2023-01-24 04:51:09.221673: step: 20/459, loss: 0.03941210359334946 2023-01-24 04:51:09.780735: step: 22/459, loss: 0.0018437887774780393 2023-01-24 04:51:10.391073: step: 24/459, loss: 0.0001850540575105697 2023-01-24 04:51:10.988702: step: 26/459, loss: 6.229311838978902e-05 2023-01-24 04:51:11.655397: step: 28/459, loss: 0.0757938027381897 2023-01-24 04:51:12.267098: step: 30/459, loss: 7.810208626324311e-05 2023-01-24 04:51:12.858040: step: 32/459, loss: 0.003984445706009865 2023-01-24 04:51:13.513506: step: 34/459, loss: 1.1590527719818056e-05 2023-01-24 04:51:14.180579: step: 36/459, loss: 0.00012987529044039547 2023-01-24 04:51:14.769264: step: 38/459, loss: 8.208936196751893e-05 2023-01-24 04:51:15.347034: step: 40/459, loss: 0.00014003862452227622 2023-01-24 04:51:15.953834: step: 42/459, loss: 0.006686759181320667 2023-01-24 04:51:16.568954: step: 44/459, loss: 0.001330205355770886 2023-01-24 04:51:17.129403: step: 46/459, loss: 0.0010198187083005905 2023-01-24 04:51:17.768007: step: 48/459, loss: 0.011583387851715088 2023-01-24 04:51:18.382314: step: 50/459, loss: 0.11753929406404495 2023-01-24 04:51:18.996835: step: 52/459, loss: 0.001184136955998838 2023-01-24 04:51:19.576875: step: 54/459, loss: 0.009479454718530178 2023-01-24 04:51:20.183855: step: 56/459, loss: 0.0004953551688231528 2023-01-24 04:51:20.822278: step: 58/459, loss: 0.00337828416377306 2023-01-24 04:51:21.477206: step: 60/459, loss: 0.02856261096894741 2023-01-24 04:51:22.021922: step: 62/459, loss: 0.00031179789220914245 2023-01-24 04:51:22.594462: step: 64/459, loss: 0.20402316749095917 2023-01-24 04:51:23.182072: step: 66/459, loss: 0.009460526518523693 2023-01-24 04:51:23.872297: step: 68/459, loss: 0.0006151353591121733 2023-01-24 04:51:24.512079: step: 70/459, loss: 0.02391323633491993 2023-01-24 04:51:25.099355: step: 72/459, loss: 0.00011846339475596324 2023-01-24 04:51:25.705827: step: 74/459, loss: 0.0031267963349819183 2023-01-24 04:51:26.371573: step: 76/459, loss: 0.0024199991021305323 2023-01-24 04:51:27.002723: step: 78/459, loss: 0.0015697453636676073 2023-01-24 04:51:27.713606: step: 80/459, loss: 0.010411216877400875 2023-01-24 04:51:28.356523: step: 82/459, loss: 0.007483258843421936 2023-01-24 04:51:28.986906: step: 84/459, loss: 0.010751686058938503 2023-01-24 04:51:29.592568: step: 86/459, loss: 0.008388139307498932 2023-01-24 04:51:30.235884: step: 88/459, loss: 0.0056853159330785275 2023-01-24 04:51:30.903114: step: 90/459, loss: 0.23130148649215698 2023-01-24 04:51:31.533754: step: 92/459, loss: 0.0018117623403668404 2023-01-24 04:51:32.150068: step: 94/459, loss: 0.005419986322522163 2023-01-24 04:51:32.870063: step: 96/459, loss: 0.1338791847229004 2023-01-24 04:51:33.465504: step: 98/459, loss: 0.00029633002122864127 2023-01-24 04:51:34.084502: step: 100/459, loss: 0.027318967506289482 2023-01-24 04:51:34.643443: step: 102/459, loss: 0.003147565759718418 2023-01-24 04:51:35.325454: step: 104/459, loss: 0.10377588123083115 2023-01-24 04:51:35.953847: step: 106/459, loss: 0.00028525589732453227 2023-01-24 04:51:36.605865: step: 108/459, loss: 0.015975160524249077 2023-01-24 04:51:37.271432: step: 110/459, loss: 0.00018079181609209627 2023-01-24 04:51:37.914226: step: 112/459, loss: 2.1609810573863797e-05 2023-01-24 04:51:38.532994: step: 114/459, loss: 0.00045144505565986037 2023-01-24 04:51:39.147142: step: 116/459, loss: 0.015840239822864532 2023-01-24 04:51:39.741609: step: 118/459, loss: 0.001672366983257234 2023-01-24 04:51:40.320247: step: 120/459, loss: 0.01370259653776884 2023-01-24 04:51:40.929218: step: 122/459, loss: 0.002553352154791355 2023-01-24 04:51:41.569588: step: 124/459, loss: 8.338692168763373e-06 2023-01-24 04:51:42.132133: step: 126/459, loss: 0.006764210294932127 2023-01-24 04:51:42.733271: step: 128/459, loss: 0.00018503209867049009 2023-01-24 04:51:43.305777: step: 130/459, loss: 0.0018638541223481297 2023-01-24 04:51:43.962359: step: 132/459, loss: 0.10000630468130112 2023-01-24 04:51:44.648074: step: 134/459, loss: 0.0015166769735515118 2023-01-24 04:51:45.252418: step: 136/459, loss: 0.002265842631459236 2023-01-24 04:51:45.860958: step: 138/459, loss: 0.8341848850250244 2023-01-24 04:51:46.490417: step: 140/459, loss: 0.00017423676035832614 2023-01-24 04:51:47.110570: step: 142/459, loss: 0.002338535850867629 2023-01-24 04:51:47.689550: step: 144/459, loss: 0.0001631714985705912 2023-01-24 04:51:48.253129: step: 146/459, loss: 0.00012283891555853188 2023-01-24 04:51:48.833000: step: 148/459, loss: 2.5785426259972155e-05 2023-01-24 04:51:49.485523: step: 150/459, loss: 0.0015300411032512784 2023-01-24 04:51:50.053629: step: 152/459, loss: 4.010594784631394e-05 2023-01-24 04:51:50.686279: step: 154/459, loss: 0.07704416662454605 2023-01-24 04:51:51.268365: step: 156/459, loss: 0.020277170464396477 2023-01-24 04:51:51.859341: step: 158/459, loss: 0.00024428204051218927 2023-01-24 04:51:52.495245: step: 160/459, loss: 0.011922710575163364 2023-01-24 04:51:53.038450: step: 162/459, loss: 0.008638364262878895 2023-01-24 04:51:53.636721: step: 164/459, loss: 1.4096412087383214e-05 2023-01-24 04:51:54.296265: step: 166/459, loss: 0.00016735863755457103 2023-01-24 04:51:54.905303: step: 168/459, loss: 8.397979399887845e-06 2023-01-24 04:51:55.547741: step: 170/459, loss: 0.007381492294371128 2023-01-24 04:51:56.172974: step: 172/459, loss: 0.0163109190762043 2023-01-24 04:51:56.789242: step: 174/459, loss: 0.011514809913933277 2023-01-24 04:51:57.425003: step: 176/459, loss: 0.011171652935445309 2023-01-24 04:51:58.013485: step: 178/459, loss: 1.4795815332036e-05 2023-01-24 04:51:58.629631: step: 180/459, loss: 0.0022883277852088213 2023-01-24 04:51:59.278781: step: 182/459, loss: 0.009617874398827553 2023-01-24 04:51:59.892558: step: 184/459, loss: 0.0004815816064365208 2023-01-24 04:52:00.528815: step: 186/459, loss: 2.69147458311636e-05 2023-01-24 04:52:01.150220: step: 188/459, loss: 0.019736582413315773 2023-01-24 04:52:01.823954: step: 190/459, loss: 6.46616899757646e-06 2023-01-24 04:52:02.453632: step: 192/459, loss: 0.0011584454914554954 2023-01-24 04:52:03.091285: step: 194/459, loss: 0.010307812131941319 2023-01-24 04:52:03.797339: step: 196/459, loss: 0.00047089619329199195 2023-01-24 04:52:04.360100: step: 198/459, loss: 0.027647657319903374 2023-01-24 04:52:04.977108: step: 200/459, loss: 0.0008780183270573616 2023-01-24 04:52:05.586729: step: 202/459, loss: 0.0007856213487684727 2023-01-24 04:52:06.191083: step: 204/459, loss: 0.0012121681356802583 2023-01-24 04:52:06.765084: step: 206/459, loss: 0.022433508187532425 2023-01-24 04:52:07.428691: step: 208/459, loss: 0.015137987211346626 2023-01-24 04:52:08.085143: step: 210/459, loss: 0.028874041512608528 2023-01-24 04:52:08.663619: step: 212/459, loss: 0.03967245668172836 2023-01-24 04:52:09.246930: step: 214/459, loss: 0.00012908798817079514 2023-01-24 04:52:09.866489: step: 216/459, loss: 0.006161016877740622 2023-01-24 04:52:10.445772: step: 218/459, loss: 0.000882515509147197 2023-01-24 04:52:11.073015: step: 220/459, loss: 0.1405765563249588 2023-01-24 04:52:11.677075: step: 222/459, loss: 0.052479445934295654 2023-01-24 04:52:12.280286: step: 224/459, loss: 0.0006403145380318165 2023-01-24 04:52:12.847395: step: 226/459, loss: 0.00032331605325452983 2023-01-24 04:52:13.498905: step: 228/459, loss: 0.014056607149541378 2023-01-24 04:52:14.055433: step: 230/459, loss: 0.00245588063262403 2023-01-24 04:52:14.664445: step: 232/459, loss: 0.01796579174697399 2023-01-24 04:52:15.264925: step: 234/459, loss: 0.002344857668504119 2023-01-24 04:52:15.864299: step: 236/459, loss: 0.011162905022501945 2023-01-24 04:52:16.448994: step: 238/459, loss: 0.00616853591054678 2023-01-24 04:52:17.029845: step: 240/459, loss: 0.00014979971456341445 2023-01-24 04:52:17.747746: step: 242/459, loss: 0.008407200686633587 2023-01-24 04:52:18.373590: step: 244/459, loss: 0.036518458276987076 2023-01-24 04:52:18.952404: step: 246/459, loss: 0.000527906056959182 2023-01-24 04:52:19.619108: step: 248/459, loss: 0.014637802727520466 2023-01-24 04:52:20.202628: step: 250/459, loss: 0.010312311351299286 2023-01-24 04:52:20.865299: step: 252/459, loss: 0.00046786939492449164 2023-01-24 04:52:21.474862: step: 254/459, loss: 0.0006544493371620774 2023-01-24 04:52:22.078017: step: 256/459, loss: 0.003244394436478615 2023-01-24 04:52:22.694804: step: 258/459, loss: 0.0004552507307380438 2023-01-24 04:52:23.373807: step: 260/459, loss: 0.03211484104394913 2023-01-24 04:52:23.984016: step: 262/459, loss: 0.016949625685811043 2023-01-24 04:52:24.544724: step: 264/459, loss: 0.001888166181743145 2023-01-24 04:52:25.196406: step: 266/459, loss: 0.009566019289195538 2023-01-24 04:52:25.787051: step: 268/459, loss: 0.02361321449279785 2023-01-24 04:52:26.443590: step: 270/459, loss: 0.014025402255356312 2023-01-24 04:52:27.060112: step: 272/459, loss: 0.00663837231695652 2023-01-24 04:52:27.663632: step: 274/459, loss: 0.018615368753671646 2023-01-24 04:52:28.274035: step: 276/459, loss: 0.0015210473211482167 2023-01-24 04:52:28.874121: step: 278/459, loss: 0.00970454327762127 2023-01-24 04:52:29.518478: step: 280/459, loss: 0.009788411669433117 2023-01-24 04:52:30.225124: step: 282/459, loss: 0.13338236510753632 2023-01-24 04:52:30.868843: step: 284/459, loss: 0.013529833406209946 2023-01-24 04:52:31.495830: step: 286/459, loss: 0.00038397006574086845 2023-01-24 04:52:32.054615: step: 288/459, loss: 0.0004430499393492937 2023-01-24 04:52:32.738276: step: 290/459, loss: 0.1007687896490097 2023-01-24 04:52:33.378411: step: 292/459, loss: 0.0661744624376297 2023-01-24 04:52:33.953353: step: 294/459, loss: 0.003779317019507289 2023-01-24 04:52:34.556982: step: 296/459, loss: 0.023159975185990334 2023-01-24 04:52:35.220047: step: 298/459, loss: 4.3776271922979504e-05 2023-01-24 04:52:35.835323: step: 300/459, loss: 0.05273513123393059 2023-01-24 04:52:36.398104: step: 302/459, loss: 0.003829613560810685 2023-01-24 04:52:37.032109: step: 304/459, loss: 2.87820803350769e-05 2023-01-24 04:52:37.639187: step: 306/459, loss: 0.0004529423313215375 2023-01-24 04:52:38.314287: step: 308/459, loss: 0.00872135255485773 2023-01-24 04:52:38.882270: step: 310/459, loss: 0.0054794903844594955 2023-01-24 04:52:39.507607: step: 312/459, loss: 0.026152485981583595 2023-01-24 04:52:40.129507: step: 314/459, loss: 0.0007819805759936571 2023-01-24 04:52:40.671171: step: 316/459, loss: 0.0012512168614193797 2023-01-24 04:52:41.318574: step: 318/459, loss: 0.005699095316231251 2023-01-24 04:52:42.018338: step: 320/459, loss: 0.013750432059168816 2023-01-24 04:52:42.655437: step: 322/459, loss: 0.00458104582503438 2023-01-24 04:52:43.278073: step: 324/459, loss: 0.0009731086902320385 2023-01-24 04:52:43.899150: step: 326/459, loss: 0.01721147820353508 2023-01-24 04:52:44.504945: step: 328/459, loss: 0.005435234401375055 2023-01-24 04:52:45.052330: step: 330/459, loss: 0.0003514395793899894 2023-01-24 04:52:45.660475: step: 332/459, loss: 0.0005232645198702812 2023-01-24 04:52:46.180955: step: 334/459, loss: 0.001019012532196939 2023-01-24 04:52:46.839924: step: 336/459, loss: 0.0007295581744983792 2023-01-24 04:52:47.426492: step: 338/459, loss: 0.0007028350955806673 2023-01-24 04:52:47.991369: step: 340/459, loss: 0.0018598278984427452 2023-01-24 04:52:48.597068: step: 342/459, loss: 0.009678464382886887 2023-01-24 04:52:49.196819: step: 344/459, loss: 0.016959480941295624 2023-01-24 04:52:49.972381: step: 346/459, loss: 0.37538769841194153 2023-01-24 04:52:50.646418: step: 348/459, loss: 0.5615749955177307 2023-01-24 04:52:51.255576: step: 350/459, loss: 0.003129091579467058 2023-01-24 04:52:51.897187: step: 352/459, loss: 0.0064209382981061935 2023-01-24 04:52:52.503393: step: 354/459, loss: 0.0010642679408192635 2023-01-24 04:52:53.103842: step: 356/459, loss: 0.025971632450819016 2023-01-24 04:52:53.750447: step: 358/459, loss: 0.008586006239056587 2023-01-24 04:52:54.343650: step: 360/459, loss: 0.002004128647968173 2023-01-24 04:52:54.929957: step: 362/459, loss: 0.021169723942875862 2023-01-24 04:52:55.616371: step: 364/459, loss: 0.02242998592555523 2023-01-24 04:52:56.145045: step: 366/459, loss: 0.0002255764411529526 2023-01-24 04:52:56.777442: step: 368/459, loss: 0.017264625057578087 2023-01-24 04:52:57.481115: step: 370/459, loss: 0.0009000375866889954 2023-01-24 04:52:58.120232: step: 372/459, loss: 0.0005920648691244423 2023-01-24 04:52:58.795653: step: 374/459, loss: 0.006490268744528294 2023-01-24 04:52:59.405854: step: 376/459, loss: 0.0008050583419390023 2023-01-24 04:53:00.098359: step: 378/459, loss: 0.03978024795651436 2023-01-24 04:53:00.712809: step: 380/459, loss: 0.009786209091544151 2023-01-24 04:53:01.312670: step: 382/459, loss: 0.011390886269509792 2023-01-24 04:53:01.893461: step: 384/459, loss: 0.032871898263692856 2023-01-24 04:53:02.542356: step: 386/459, loss: 0.029986383393406868 2023-01-24 04:53:03.221160: step: 388/459, loss: 0.0006605214439332485 2023-01-24 04:53:03.938921: step: 390/459, loss: 0.011007932014763355 2023-01-24 04:53:04.571756: step: 392/459, loss: 0.017992258071899414 2023-01-24 04:53:05.251776: step: 394/459, loss: 0.0072202738374471664 2023-01-24 04:53:05.910682: step: 396/459, loss: 0.4771321415901184 2023-01-24 04:53:06.478138: step: 398/459, loss: 0.012814588844776154 2023-01-24 04:53:07.095769: step: 400/459, loss: 0.001225199899636209 2023-01-24 04:53:07.728296: step: 402/459, loss: 0.0014236151473596692 2023-01-24 04:53:08.400828: step: 404/459, loss: 0.0006821874994784594 2023-01-24 04:53:09.040469: step: 406/459, loss: 0.0654115378856659 2023-01-24 04:53:09.662372: step: 408/459, loss: 0.0009295700001530349 2023-01-24 04:53:10.372195: step: 410/459, loss: 0.01962721347808838 2023-01-24 04:53:10.988918: step: 412/459, loss: 0.0537998341023922 2023-01-24 04:53:11.621783: step: 414/459, loss: 0.0011359608033671975 2023-01-24 04:53:12.198559: step: 416/459, loss: 0.008883999660611153 2023-01-24 04:53:12.850411: step: 418/459, loss: 0.013629804365336895 2023-01-24 04:53:13.442163: step: 420/459, loss: 0.002946678316220641 2023-01-24 04:53:14.065851: step: 422/459, loss: 0.0036438312381505966 2023-01-24 04:53:14.613631: step: 424/459, loss: 0.018099425360560417 2023-01-24 04:53:15.208995: step: 426/459, loss: 0.006920254323631525 2023-01-24 04:53:15.754531: step: 428/459, loss: 0.0033572930842638016 2023-01-24 04:53:16.352021: step: 430/459, loss: 0.0032245488837361336 2023-01-24 04:53:16.939437: step: 432/459, loss: 0.0023290228564292192 2023-01-24 04:53:17.568357: step: 434/459, loss: 0.0032311107497662306 2023-01-24 04:53:18.252766: step: 436/459, loss: 0.012527801096439362 2023-01-24 04:53:18.828293: step: 438/459, loss: 0.004599592182785273 2023-01-24 04:53:19.507281: step: 440/459, loss: 0.10712932050228119 2023-01-24 04:53:20.114593: step: 442/459, loss: 0.003579304553568363 2023-01-24 04:53:20.656457: step: 444/459, loss: 0.00022028251260053366 2023-01-24 04:53:21.242018: step: 446/459, loss: 0.0008350232965312898 2023-01-24 04:53:21.908832: step: 448/459, loss: 0.004145108163356781 2023-01-24 04:53:22.550538: step: 450/459, loss: 0.0014122105203568935 2023-01-24 04:53:23.198342: step: 452/459, loss: 0.00012155695731053129 2023-01-24 04:53:23.796518: step: 454/459, loss: 0.00030537493876181543 2023-01-24 04:53:24.375859: step: 456/459, loss: 0.009681208990514278 2023-01-24 04:53:24.942076: step: 458/459, loss: 0.00120279542170465 2023-01-24 04:53:25.551181: step: 460/459, loss: 0.022579090669751167 2023-01-24 04:53:26.111064: step: 462/459, loss: 0.01668923906981945 2023-01-24 04:53:26.825595: step: 464/459, loss: 0.005636818706989288 2023-01-24 04:53:27.435395: step: 466/459, loss: 0.00014636515697930008 2023-01-24 04:53:28.066526: step: 468/459, loss: 0.006340291351079941 2023-01-24 04:53:28.684153: step: 470/459, loss: 0.0005216642748564482 2023-01-24 04:53:29.288764: step: 472/459, loss: 0.020610833540558815 2023-01-24 04:53:29.984112: step: 474/459, loss: 0.0067123291082680225 2023-01-24 04:53:30.642490: step: 476/459, loss: 0.004249088931828737 2023-01-24 04:53:31.288528: step: 478/459, loss: 5.153679376235232e-05 2023-01-24 04:53:31.995767: step: 480/459, loss: 0.0005919929826632142 2023-01-24 04:53:32.608599: step: 482/459, loss: 0.022248877212405205 2023-01-24 04:53:33.170067: step: 484/459, loss: 0.015473814681172371 2023-01-24 04:53:33.824915: step: 486/459, loss: 0.029536081477999687 2023-01-24 04:53:34.342869: step: 488/459, loss: 7.928679406177253e-05 2023-01-24 04:53:34.981640: step: 490/459, loss: 0.08154286444187164 2023-01-24 04:53:35.522558: step: 492/459, loss: 0.006073238328099251 2023-01-24 04:53:36.103975: step: 494/459, loss: 0.009599623270332813 2023-01-24 04:53:36.786413: step: 496/459, loss: 0.012890264391899109 2023-01-24 04:53:37.488696: step: 498/459, loss: 0.0003613817971199751 2023-01-24 04:53:38.107200: step: 500/459, loss: 0.006017654202878475 2023-01-24 04:53:38.691445: step: 502/459, loss: 0.11684559285640717 2023-01-24 04:53:39.364015: step: 504/459, loss: 0.018031325191259384 2023-01-24 04:53:39.950344: step: 506/459, loss: 0.01118611078709364 2023-01-24 04:53:40.680990: step: 508/459, loss: 0.0008881246903911233 2023-01-24 04:53:41.304816: step: 510/459, loss: 0.0036928774788975716 2023-01-24 04:53:41.963707: step: 512/459, loss: 0.006227694917470217 2023-01-24 04:53:42.576189: step: 514/459, loss: 0.0006926533533260226 2023-01-24 04:53:43.136159: step: 516/459, loss: 0.01010167133063078 2023-01-24 04:53:43.787820: step: 518/459, loss: 6.171659333631396e-05 2023-01-24 04:53:44.402158: step: 520/459, loss: 0.0016900165937840939 2023-01-24 04:53:44.950185: step: 522/459, loss: 0.0002726130187511444 2023-01-24 04:53:45.534416: step: 524/459, loss: 0.0009998977184295654 2023-01-24 04:53:46.157487: step: 526/459, loss: 0.0024923053570091724 2023-01-24 04:53:46.784319: step: 528/459, loss: 0.0007670830236747861 2023-01-24 04:53:47.463167: step: 530/459, loss: 0.0022989341523498297 2023-01-24 04:53:48.071198: step: 532/459, loss: 0.0010107697453349829 2023-01-24 04:53:48.682829: step: 534/459, loss: 0.0012117670848965645 2023-01-24 04:53:49.295400: step: 536/459, loss: 0.0016442947089672089 2023-01-24 04:53:49.899049: step: 538/459, loss: 0.00028943404322490096 2023-01-24 04:53:50.566006: step: 540/459, loss: 0.00045145119656808674 2023-01-24 04:53:51.209378: step: 542/459, loss: 0.08715062588453293 2023-01-24 04:53:51.823563: step: 544/459, loss: 0.06706850230693817 2023-01-24 04:53:52.428534: step: 546/459, loss: 0.002761382842436433 2023-01-24 04:53:53.022632: step: 548/459, loss: 0.003823158098384738 2023-01-24 04:53:53.641134: step: 550/459, loss: 0.005431145429611206 2023-01-24 04:53:54.209149: step: 552/459, loss: 0.00011156051914440468 2023-01-24 04:53:54.856713: step: 554/459, loss: 0.47989708185195923 2023-01-24 04:53:55.491454: step: 556/459, loss: 0.025216523557901382 2023-01-24 04:53:56.084953: step: 558/459, loss: 0.00020500289974734187 2023-01-24 04:53:56.688618: step: 560/459, loss: 4.164919300819747e-05 2023-01-24 04:53:57.342823: step: 562/459, loss: 0.0005838621291331947 2023-01-24 04:53:57.960194: step: 564/459, loss: 0.01403543446213007 2023-01-24 04:53:58.581848: step: 566/459, loss: 0.30383598804473877 2023-01-24 04:53:59.170629: step: 568/459, loss: 0.002092387294396758 2023-01-24 04:53:59.855324: step: 570/459, loss: 0.0067533873952925205 2023-01-24 04:54:00.497673: step: 572/459, loss: 0.004578908905386925 2023-01-24 04:54:01.114298: step: 574/459, loss: 0.00716030690819025 2023-01-24 04:54:01.706535: step: 576/459, loss: 0.0028744821902364492 2023-01-24 04:54:02.350371: step: 578/459, loss: 0.05449983850121498 2023-01-24 04:54:02.996786: step: 580/459, loss: 0.005053943954408169 2023-01-24 04:54:03.573557: step: 582/459, loss: 0.003935660235583782 2023-01-24 04:54:04.215668: step: 584/459, loss: 0.013502961955964565 2023-01-24 04:54:04.848227: step: 586/459, loss: 0.0003049001097679138 2023-01-24 04:54:05.439627: step: 588/459, loss: 0.008128901943564415 2023-01-24 04:54:06.085519: step: 590/459, loss: 0.002685113810002804 2023-01-24 04:54:06.689376: step: 592/459, loss: 0.011831517331302166 2023-01-24 04:54:07.326496: step: 594/459, loss: 0.453083872795105 2023-01-24 04:54:07.978384: step: 596/459, loss: 8.762772631598637e-05 2023-01-24 04:54:08.611309: step: 598/459, loss: 0.048386603593826294 2023-01-24 04:54:09.229252: step: 600/459, loss: 0.00034988808329217136 2023-01-24 04:54:09.827862: step: 602/459, loss: 0.012595182284712791 2023-01-24 04:54:10.430249: step: 604/459, loss: 0.06413163244724274 2023-01-24 04:54:11.042112: step: 606/459, loss: 1.6690248230588622e-05 2023-01-24 04:54:11.639343: step: 608/459, loss: 0.00016836349095683545 2023-01-24 04:54:12.351454: step: 610/459, loss: 0.008307889103889465 2023-01-24 04:54:12.957031: step: 612/459, loss: 0.0012849281774833798 2023-01-24 04:54:13.581742: step: 614/459, loss: 0.0003110389516223222 2023-01-24 04:54:14.209020: step: 616/459, loss: 0.002557017607614398 2023-01-24 04:54:14.812903: step: 618/459, loss: 0.0049609653651714325 2023-01-24 04:54:15.477529: step: 620/459, loss: 0.015468819066882133 2023-01-24 04:54:16.071790: step: 622/459, loss: 0.0004765949852298945 2023-01-24 04:54:16.650108: step: 624/459, loss: 0.03225879371166229 2023-01-24 04:54:17.274156: step: 626/459, loss: 0.04309697449207306 2023-01-24 04:54:17.829006: step: 628/459, loss: 8.721491030883044e-05 2023-01-24 04:54:18.387547: step: 630/459, loss: 0.0014491096371784806 2023-01-24 04:54:19.037518: step: 632/459, loss: 0.02094104140996933 2023-01-24 04:54:19.696899: step: 634/459, loss: 0.008639845065772533 2023-01-24 04:54:20.332587: step: 636/459, loss: 0.028809307143092155 2023-01-24 04:54:20.979619: step: 638/459, loss: 0.0009237484773620963 2023-01-24 04:54:21.633476: step: 640/459, loss: 7.331348024308681e-05 2023-01-24 04:54:22.234349: step: 642/459, loss: 0.0007067437982186675 2023-01-24 04:54:22.827540: step: 644/459, loss: 0.0011346323881298304 2023-01-24 04:54:23.409779: step: 646/459, loss: 5.3619220125256106e-05 2023-01-24 04:54:23.972354: step: 648/459, loss: 0.0006362090352922678 2023-01-24 04:54:24.585888: step: 650/459, loss: 0.006276685744524002 2023-01-24 04:54:25.229549: step: 652/459, loss: 0.01656927913427353 2023-01-24 04:54:25.901792: step: 654/459, loss: 0.00096513656899333 2023-01-24 04:54:26.506726: step: 656/459, loss: 0.0012091208482161164 2023-01-24 04:54:27.155191: step: 658/459, loss: 0.05463232472538948 2023-01-24 04:54:27.738968: step: 660/459, loss: 0.00012962421169504523 2023-01-24 04:54:28.297904: step: 662/459, loss: 4.646937213692581e-06 2023-01-24 04:54:28.959300: step: 664/459, loss: 0.0008869785815477371 2023-01-24 04:54:29.548651: step: 666/459, loss: 0.04454561322927475 2023-01-24 04:54:30.177575: step: 668/459, loss: 0.07209022343158722 2023-01-24 04:54:30.840055: step: 670/459, loss: 0.018645968288183212 2023-01-24 04:54:31.396021: step: 672/459, loss: 0.0144589152187109 2023-01-24 04:54:32.045710: step: 674/459, loss: 0.00022390058438759297 2023-01-24 04:54:32.689435: step: 676/459, loss: 0.08114124089479446 2023-01-24 04:54:33.322515: step: 678/459, loss: 0.0008177881245501339 2023-01-24 04:54:33.896107: step: 680/459, loss: 0.00637830700725317 2023-01-24 04:54:34.532152: step: 682/459, loss: 0.008720604702830315 2023-01-24 04:54:35.171082: step: 684/459, loss: 0.0008492283523082733 2023-01-24 04:54:35.832522: step: 686/459, loss: 0.0002198449947172776 2023-01-24 04:54:36.451336: step: 688/459, loss: 0.04001582786440849 2023-01-24 04:54:37.097427: step: 690/459, loss: 0.007720293011516333 2023-01-24 04:54:37.740724: step: 692/459, loss: 0.0018566121580079198 2023-01-24 04:54:38.376147: step: 694/459, loss: 0.016914037987589836 2023-01-24 04:54:38.980452: step: 696/459, loss: 0.01871415600180626 2023-01-24 04:54:39.539282: step: 698/459, loss: 0.0005987820914015174 2023-01-24 04:54:40.155179: step: 700/459, loss: 0.005220330320298672 2023-01-24 04:54:40.730965: step: 702/459, loss: 0.06714221835136414 2023-01-24 04:54:41.367149: step: 704/459, loss: 0.008873095735907555 2023-01-24 04:54:41.982939: step: 706/459, loss: 0.02839651331305504 2023-01-24 04:54:42.600573: step: 708/459, loss: 0.015555751509964466 2023-01-24 04:54:43.301846: step: 710/459, loss: 0.00708047254011035 2023-01-24 04:54:43.953029: step: 712/459, loss: 0.03267081826925278 2023-01-24 04:54:44.581758: step: 714/459, loss: 0.04752252250909805 2023-01-24 04:54:45.154481: step: 716/459, loss: 0.06846141815185547 2023-01-24 04:54:45.765742: step: 718/459, loss: 0.02000146545469761 2023-01-24 04:54:46.379977: step: 720/459, loss: 0.01806265488266945 2023-01-24 04:54:46.980292: step: 722/459, loss: 0.008251394145190716 2023-01-24 04:54:47.585969: step: 724/459, loss: 0.0015776724321767688 2023-01-24 04:54:48.155576: step: 726/459, loss: 0.007893107831478119 2023-01-24 04:54:48.771607: step: 728/459, loss: 0.0060599143616855145 2023-01-24 04:54:49.356906: step: 730/459, loss: 0.006801737006753683 2023-01-24 04:54:49.979051: step: 732/459, loss: 0.013510461896657944 2023-01-24 04:54:50.554396: step: 734/459, loss: 0.004565502051264048 2023-01-24 04:54:51.191036: step: 736/459, loss: 0.0036682067438960075 2023-01-24 04:54:51.796624: step: 738/459, loss: 0.609318733215332 2023-01-24 04:54:52.375304: step: 740/459, loss: 0.00046349188778549433 2023-01-24 04:54:53.056457: step: 742/459, loss: 0.122612863779068 2023-01-24 04:54:53.695261: step: 744/459, loss: 0.015260649845004082 2023-01-24 04:54:54.357013: step: 746/459, loss: 0.02521752379834652 2023-01-24 04:54:55.026854: step: 748/459, loss: 0.0006513801054097712 2023-01-24 04:54:55.673750: step: 750/459, loss: 0.0322779081761837 2023-01-24 04:54:56.225213: step: 752/459, loss: 0.0003467639908194542 2023-01-24 04:54:56.809899: step: 754/459, loss: 0.3352759778499603 2023-01-24 04:54:57.371960: step: 756/459, loss: 5.320789568941109e-05 2023-01-24 04:54:58.011638: step: 758/459, loss: 9.193258301820606e-05 2023-01-24 04:54:58.618169: step: 760/459, loss: 0.04289156198501587 2023-01-24 04:54:59.276835: step: 762/459, loss: 0.0007733869133517146 2023-01-24 04:54:59.945817: step: 764/459, loss: 0.06969955563545227 2023-01-24 04:55:00.647516: step: 766/459, loss: 0.0020245204214006662 2023-01-24 04:55:01.265237: step: 768/459, loss: 7.419702160404995e-05 2023-01-24 04:55:01.890937: step: 770/459, loss: 0.0012896201806142926 2023-01-24 04:55:02.541270: step: 772/459, loss: 0.029355628415942192 2023-01-24 04:55:03.117112: step: 774/459, loss: 0.009124183095991611 2023-01-24 04:55:03.663027: step: 776/459, loss: 0.0005997710977680981 2023-01-24 04:55:04.340535: step: 778/459, loss: 0.004343274049460888 2023-01-24 04:55:05.025791: step: 780/459, loss: 0.025675300508737564 2023-01-24 04:55:05.612260: step: 782/459, loss: 0.010678974911570549 2023-01-24 04:55:06.221253: step: 784/459, loss: 0.0005459152162075043 2023-01-24 04:55:06.853085: step: 786/459, loss: 0.004466529935598373 2023-01-24 04:55:07.455745: step: 788/459, loss: 0.0032622073777019978 2023-01-24 04:55:08.074592: step: 790/459, loss: 0.027499623596668243 2023-01-24 04:55:08.652635: step: 792/459, loss: 0.01739952340722084 2023-01-24 04:55:09.308426: step: 794/459, loss: 0.005134927108883858 2023-01-24 04:55:09.884088: step: 796/459, loss: 0.00969267450273037 2023-01-24 04:55:10.472593: step: 798/459, loss: 0.01619791053235531 2023-01-24 04:55:11.056027: step: 800/459, loss: 0.021067628636956215 2023-01-24 04:55:11.632502: step: 802/459, loss: 0.0007882756763137877 2023-01-24 04:55:12.284345: step: 804/459, loss: 0.0193905271589756 2023-01-24 04:55:12.914380: step: 806/459, loss: 0.1739833801984787 2023-01-24 04:55:13.523436: step: 808/459, loss: 0.0034754332154989243 2023-01-24 04:55:14.116393: step: 810/459, loss: 0.0022289473563432693 2023-01-24 04:55:14.707959: step: 812/459, loss: 0.009521136991679668 2023-01-24 04:55:15.345185: step: 814/459, loss: 0.017178872600197792 2023-01-24 04:55:16.035195: step: 816/459, loss: 3.9568527427036315e-05 2023-01-24 04:55:16.584351: step: 818/459, loss: 0.00013559870421886444 2023-01-24 04:55:17.128727: step: 820/459, loss: 0.00024096031847875565 2023-01-24 04:55:17.708215: step: 822/459, loss: 0.005944909993559122 2023-01-24 04:55:18.400376: step: 824/459, loss: 0.0064760479144752026 2023-01-24 04:55:18.978458: step: 826/459, loss: 0.0021523283794522285 2023-01-24 04:55:19.579911: step: 828/459, loss: 0.002908360678702593 2023-01-24 04:55:20.163975: step: 830/459, loss: 0.9745286107063293 2023-01-24 04:55:20.805032: step: 832/459, loss: 0.005584703292697668 2023-01-24 04:55:21.344324: step: 834/459, loss: 0.0004902730579487979 2023-01-24 04:55:21.918084: step: 836/459, loss: 0.0005916420486755669 2023-01-24 04:55:22.518240: step: 838/459, loss: 0.0006531256367452443 2023-01-24 04:55:23.148935: step: 840/459, loss: 0.005585872568190098 2023-01-24 04:55:23.769769: step: 842/459, loss: 0.0005421642563305795 2023-01-24 04:55:24.326226: step: 844/459, loss: 0.0011085092555731535 2023-01-24 04:55:24.973997: step: 846/459, loss: 0.000987773877568543 2023-01-24 04:55:25.581522: step: 848/459, loss: 0.0053025782108306885 2023-01-24 04:55:26.151336: step: 850/459, loss: 0.04489670693874359 2023-01-24 04:55:26.779057: step: 852/459, loss: 0.032778285443782806 2023-01-24 04:55:27.481601: step: 854/459, loss: 0.0002801896771416068 2023-01-24 04:55:28.101885: step: 856/459, loss: 0.0015414586523547769 2023-01-24 04:55:28.710283: step: 858/459, loss: 0.09101995080709457 2023-01-24 04:55:29.434716: step: 860/459, loss: 0.009250346571207047 2023-01-24 04:55:30.035313: step: 862/459, loss: 0.0006060494342818856 2023-01-24 04:55:30.697557: step: 864/459, loss: 0.010153562761843204 2023-01-24 04:55:31.255887: step: 866/459, loss: 0.009221027605235577 2023-01-24 04:55:31.881633: step: 868/459, loss: 0.0026687474455684423 2023-01-24 04:55:32.506635: step: 870/459, loss: 0.0005087440367788076 2023-01-24 04:55:33.120543: step: 872/459, loss: 8.678681479068473e-05 2023-01-24 04:55:33.792901: step: 874/459, loss: 1.1562827825546265 2023-01-24 04:55:34.398815: step: 876/459, loss: 0.07512712478637695 2023-01-24 04:55:34.996594: step: 878/459, loss: 0.04339824616909027 2023-01-24 04:55:35.576907: step: 880/459, loss: 0.0052745286375284195 2023-01-24 04:55:36.133488: step: 882/459, loss: 0.0033128452487289906 2023-01-24 04:55:36.777749: step: 884/459, loss: 0.0156217897310853 2023-01-24 04:55:37.453925: step: 886/459, loss: 0.0007347297505475581 2023-01-24 04:55:37.995994: step: 888/459, loss: 0.06703148037195206 2023-01-24 04:55:38.616010: step: 890/459, loss: 0.024640383198857307 2023-01-24 04:55:39.196817: step: 892/459, loss: 0.004359186161309481 2023-01-24 04:55:39.831597: step: 894/459, loss: 0.004566504154354334 2023-01-24 04:55:40.542973: step: 896/459, loss: 0.0007570600719191134 2023-01-24 04:55:41.197778: step: 898/459, loss: 0.0017347059911116958 2023-01-24 04:55:41.838662: step: 900/459, loss: 0.7988271117210388 2023-01-24 04:55:42.506710: step: 902/459, loss: 0.09740555286407471 2023-01-24 04:55:43.111122: step: 904/459, loss: 0.02244940586388111 2023-01-24 04:55:43.779377: step: 906/459, loss: 0.01841135136783123 2023-01-24 04:55:44.413953: step: 908/459, loss: 0.010562312789261341 2023-01-24 04:55:45.021586: step: 910/459, loss: 0.06267991662025452 2023-01-24 04:55:45.665814: step: 912/459, loss: 0.08008779585361481 2023-01-24 04:55:46.290370: step: 914/459, loss: 0.0006906664930284023 2023-01-24 04:55:46.846928: step: 916/459, loss: 0.03108927421271801 2023-01-24 04:55:47.411321: step: 918/459, loss: 0.0002505404409021139 2023-01-24 04:55:47.901945: step: 920/459, loss: 0.0004698470002040267 ================================================== Loss: 0.031 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.35578125000000005, 'r': 0.31054909867172675, 'f1': 0.3316299392097265}, 'combined': 0.24435890257558793, 'epoch': 39} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.35691623252407173, 'r': 0.29488620485155337, 'f1': 0.32294961547768525}, 'combined': 0.20668775390571853, 'epoch': 39} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3514051148635382, 'r': 0.3087297308952907, 'f1': 0.3286880165289256}, 'combined': 0.24219117007394517, 'epoch': 39} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.358326697359685, 'r': 0.2895305798502277, 'f1': 0.320275900609361}, 'combined': 0.204976576389991, 'epoch': 39} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.37314320928247824, 'r': 0.31295882068853015, 'f1': 0.340411348819103}, 'combined': 0.250829414919339, 'epoch': 39} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.35930116082682667, 'r': 0.30520928898702154, 'f1': 0.33005365636879685}, 'combined': 0.23664224418894872, 'epoch': 39} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.2889784946236559, 'r': 0.25595238095238093, 'f1': 0.2714646464646464}, 'combined': 0.18097643097643093, 'epoch': 39} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.275, 'r': 0.2391304347826087, 'f1': 0.2558139534883721}, 'combined': 0.12790697674418605, 'epoch': 39} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4, 'r': 0.13793103448275862, 'f1': 0.20512820512820515}, 'combined': 0.13675213675213677, 'epoch': 39} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3157146918227204, 'r': 0.32470087849699136, 'f1': 0.32014473894839}, 'combined': 0.2358961234356558, 'epoch': 10} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.34475450876253594, 'r': 0.29210109287880315, 'f1': 0.3162511832349247}, 'combined': 0.20240075727035176, 'epoch': 10} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.3333333333333333, 'r': 0.38095238095238093, 'f1': 0.35555555555555557}, 'combined': 0.23703703703703705, 'epoch': 10} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.3234579439252337, 'r': 0.32836812144212524, 'f1': 0.32589453860640305}, 'combined': 0.2401328179205075, 'epoch': 25} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.33820520545292077, 'r': 0.29673590233199043, 'f1': 0.3161163313667358}, 'combined': 0.20231445207471088, 'epoch': 25} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.32142857142857145, 'r': 0.391304347826087, 'f1': 0.35294117647058826}, 'combined': 0.17647058823529413, 'epoch': 25} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.34963790322580646, 'r': 0.33172476586888655, 'f1': 0.340445864874203}, 'combined': 0.25085484780204426, 'epoch': 8} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.36288552215953584, 'r': 0.3119426138527277, 'f1': 0.3354912229376885}, 'combined': 0.2405408768232484, 'epoch': 8} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4166666666666667, 'r': 0.1724137931034483, 'f1': 0.2439024390243903}, 'combined': 0.1626016260162602, 'epoch': 8}